Quuxplusone / LLVMBugzillaTest

0 stars 0 forks source link

loop pass manager issue prevents loops from being flattened #5857

Open Quuxplusone opened 15 years ago

Quuxplusone commented 15 years ago
Bugzilla Link PR5355
Status NEW
Importance P enhancement
Reported by Giangiacomo Mariotti (gg.mariotti@gmail.com)
Reported on 2009-10-30 14:05:01 -0700
Last modified on 2010-07-27 13:07:30 -0700
Version trunk
Hardware PC Linux
CC anton@korobeynikov.info, llvm-bugs@lists.llvm.org, llvm@sunfishcode.online
Fixed by commit(s)
Attachments
Blocks
Blocked by
See also
The following code, compiled with:
gcc-4.4.2 -O2
and
clang -O2

produces 2 completely different outputs because clang doesn't optimize out
useless code(or at least not as much useless code as gcc does):

1) simplest form:

#include <stdio.h>
#include <time.h>
#include <stdlib.h>

#define MAXN 100000
int x[MAXN];
int startn = 5000;
int n;

#define TRIALS 5

int main(void)
{
    int i, j, k;
    float fi;
    int t, ex, timesum, start;
    double nans;

    for (i = 0; i < n; i++)
        x[i] = rand();
    n = startn;
    printf("C Time Cost Model, n=%d\n", n);

    do {
        printf(" %-26s", "{}");
        k = 0;
        timesum = 0;
        for (ex = 0; ex < TRIALS; ex++) {
            start = clock();
            for (i = 1; i <= n; i++) {
                fi = (float) i;
                for (j = 1; j <= n; j++) {
                    {};
                }
            }
            t = clock()-start;
            printf("%8d", t);
            timesum += t;
        }
        nans = 1e9 * timesum / ((double)
            n*n * TRIALS * CLOCKS_PER_SEC);
        printf("%8.0f\n", nans);
    } while (0);

    return 0;
}

with gcc -O2, the output is:
C Time Cost Model, n=5000
 {}                               0       0       0       0       0       0

with clang -O2, the output is:
C Time Cost Model, n=5000
 {}                           20000   20000   10000   20000   20000       1

A more complex example is this:
/* Copyright (C) 1999 Lucent Technologies */
/* From 'Programming Pearls' by Jon Bentley */

/* timemod.c -- Produce table of C run time costs */

/*With some irrelevant modifications by me*/

#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <math.h>

#define MAXN 100000
int x[MAXN];
int startn = 5000;
int n;

/* FUNCTIONS TO BE TIMED */

int intcmp(int *i, int *j)
{
    return *i - *j;
}

#define swapmac(i, j) { t = x[i]; x[i] = x[j]; x[j] = t; }

void swapfunc(int i, int j)
{
    int t = x[i];
    x[i] = x[j];
    x[j] = t;
}

static inline void swap_func_inline(int arr[], int i, int j)
{
    int t = arr[i];
    arr[i] = arr[j];
    arr[j] = t;
}

#define maxmac(a, b) ((a) > (b) ? (a) : (b))

int maxfunc(int a, int b)
{
    return a > b ? a : b;
}

/* WORKHORSE */

#define T(s, n)\
do {\
    typeof(s) s__ = s;\
    typeof(n) n__ = n;\
    printf("%s (n=%d)\n", s__, n__);\
} while (0)

#define TRIALS 5

#define M(op)   do {                        \
    printf(" %-26s", #op);              \
    k = 0;                              \
    timesum = 0;                        \
    for (ex = 0; ex < TRIALS; ex++) {   \
        start = clock();                \
        for (i = 1; i <= n; i++) {      \
            fi = (float) i;             \
            for (j = 1; j <= n; j++) {  \
                op;                     \
            }                           \
        }                               \
        t = clock()-start;              \
        printf("%8d", t);               \
        timesum += t;                   \
    }                                   \
    nans = 1e9 * timesum / ((double)    \
        n*n * TRIALS * CLOCKS_PER_SEC); \
    printf("%8.0f\n", nans); } while (0)

int main()
{
    int i, j, k;
    float fi, fj, fk;
    int t, ex, timesum, start, globalstart;
    double nans;

    globalstart = clock();
    for (i = 0; i < n; i++)
        x[i] = rand();
    n = startn;
    printf("C Time Cost Model, n=%d\n", n);
    T("\nInteger Arithmetic", n);
    M({});
    M(k++);
    M(k = i);
    M(k = i + j);
    M(k = i - j);
    M(k = i * j);
    M(k = i / j);
    M(k = i % j);
    M(k = i & j);
    M(k = i | j);
    T("\nFloating Point Arithmetic", n);
    M(fj=j;);
    M(fj=j; fk = fi + fj;);
    M(fj=j; fk = fi - fj;);
    M(fj=j; fk = fi * fj;);
    M(fj=j; fk = fi / fj;);
    T("\nArray Operations", n);
    M(k = i + j);
    M(k = x[i] + j);
    M(k = i + x[j]);
    M(k = x[i] + x[j]);
    T("\nComparisons", n);
    M(if (i < j) k++);
    M(if (x[i] < x[j]) k++);
    T("\nArray Comparisons and Swaps", n);
    M(k = (x[i]<x[k]) ? -1 : 1);
    M(k = intcmp(x+i, x+j));
    M(swapmac(i, j));
    M(swapfunc(i, j));
    M(swap_func_inline(x, i, j));
    T("\nMax Function, Macro and Inline", n);
    M(k = (i > j) ? i : j);
    M(k = maxmac(i, j));
    M(k = maxfunc(i, j));
    n = startn / 5;
    T("\nMath Functions", n);
    M(fk = j+fi;);
    M(k = rand(););
    M(fk = sqrt(j+fi));
    M(fk = sin(j+fi));
    M(fk = sinh(j+fi));
    M(fk = asin(j+fi));
    M(fk = cos(j+fi));
    M(fk = tan(j+fi));
    n = startn / 10;
    T("\nMemory Allocation", n);
    M(free(malloc(16)););
    M(free(malloc(100)););
    M(free(malloc(2000)););

    /* Possible additions: input, output, malloc */
    printf("\n  Secs: %4.2f\n",
        ((double) clock()-globalstart)
        / CLOCKS_PER_SEC);
    return 0;
}

->with gcc -O2, the output is:
C Time Cost Model, n=5000

Integer Arithmetic (n=5000)
 {}                               0       0       0       0       0       0
 k++                              0       0       0       0       0       0
 k = i                            0       0       0       0       0       0
 k = i + j                        0       0       0       0       0       0
 k = i - j                        0       0       0       0       0       0
 k = i * j                        0       0       0       0       0       0
 k = i / j                        0       0       0       0       0       0
 k = i % j                        0       0       0       0       0       0
 k = i & j                        0       0       0       0       0       0
 k = i | j                        0       0       0       0       0       0

Floating Point Arithmetic (n=5000)
 fj=j;                            0       0       0       0       0       0
 fj=j; fk = fi + fj;              0       0       0       0       0       0
 fj=j; fk = fi - fj;              0       0       0       0       0       0
 fj=j; fk = fi * fj;              0       0       0       0       0       0
 fj=j; fk = fi / fj;              0       0       0       0       0       0

Array Operations (n=5000)
 k = i + j                        0       0       0       0       0       0
 k = x[i] + j                     0       0       0       0       0       0
 k = i + x[j]                     0       0       0       0       0       0
 k = x[i] + x[j]                  0       0       0       0       0       0

Comparisons (n=5000)
 if (i < j) k++                   0       0       0       0       0       0
 if (x[i] < x[j]) k++             0       0       0       0       0       0

Array Comparisons and Swaps (n=5000)
 k = (x[i]<x[k]) ? -1 : 1         0       0       0       0       0       0
 k = intcmp(x+i, x+j)             0       0       0       0       0       0
 swapmac(i, j)                30000   30000   30000   20000   30000       1
 swapfunc(i, j)               30000   30000   30000   30000   30000       1
 swap_func_inline(x, i, j)    40000   30000   30000   40000   30000       1

Max Function, Macro and Inline (n=5000)
 k = (i > j) ? i : j              0       0       0       0       0       0
 k = maxmac(i, j)                 0       0       0       0       0       0
 k = maxfunc(i, j)                0       0       0       0       0       0

Math Functions (n=1000)
 fk = j+fi;                       0       0       0       0       0       0
 k = rand();                  10000   10000   20000   10000   10000      12
 fk = sqrt(j+fi)                  0       0       0   10000       0       2
 fk = sin(j+fi)                   0       0       0       0       0       0
 fk = sinh(j+fi)              20000   20000   20000   20000   20000      20
 fk = asin(j+fi)              20000   20000   20000   20000   20000      20
 fk = cos(j+fi)                   0       0       0       0       0       0
 fk = tan(j+fi)                   0       0       0       0       0       0

Memory Allocation (n=500)
 free(malloc(16));            10000   10000       0   10000   10000      32
 free(malloc(100));           10000       0   10000   10000   10000      32
 free(malloc(2000));          10000   10000   20000   10000   20000      56

  Secs: 0.88

->with clang -O2, the output is:
C Time Cost Model, n=5000

Integer Arithmetic (n=5000)
 {}                           20000   10000   20000   20000   20000       1
 k++                          20000   20000   20000   20000   20000       1
 k = i                        10000   20000   20000   20000   20000       1
 k = i + j                    20000   20000   20000   20000   10000       1
 k = i - j                    20000   20000   20000   20000   20000       1
 k = i * j                    20000   20000   10000   20000   20000       1
 k = i / j                    20000   20000   20000   20000   20000       1
 k = i % j                    20000   10000   20000   20000   20000       1
 k = i & j                    20000   20000   20000   20000   20000       1
 k = i | j                    10000   20000   20000   20000   20000       1

Floating Point Arithmetic (n=5000)
 fj=j;                        20000   20000   20000   20000   10000       1
 fj=j; fk = fi + fj;          20000   20000   20000   20000   20000       1
 fj=j; fk = fi - fj;          20000   20000   10000   20000   20000       1
 fj=j; fk = fi * fj;          20000   20000   20000   20000   20000       1
 fj=j; fk = fi / fj;          10000   20000   20000   20000   20000       1

Array Operations (n=5000)
 k = i + j                    20000   20000   20000   10000   20000       1
 k = x[i] + j                 20000   20000   20000   20000   20000       1
 k = i + x[j]                 20000   10000   20000   20000   20000       1
 k = x[i] + x[j]              20000   20000   20000   10000   20000       1

Comparisons (n=5000)
 if (i < j) k++               20000   20000   20000   20000   20000       1
 if (x[i] < x[j]) k++         20000   10000   20000   20000   20000       1

Array Comparisons and Swaps (n=5000)
 k = (x[i]<x[k]) ? -1 : 1     20000   20000   20000   20000   10000       1
 k = intcmp(x+i, x+j)         20000   20000   20000   20000   20000       1
 swapmac(i, j)                30000   20000   30000   30000   30000       1
 swapfunc(i, j)               30000   20000   30000   30000   30000       1
 swap_func_inline(x, i, j)    30000   20000   30000   30000   30000       1

Max Function, Macro and Inline (n=5000)
 k = (i > j) ? i : j          20000   20000   10000   20000   20000       1
 k = maxmac(i, j)             20000   20000   20000   20000   20000       1
 k = maxfunc(i, j)            10000   20000   20000   20000   20000       1

Math Functions (n=1000)
 fk = j+fi;                       0       0       0       0       0       0
 k = rand();                  20000   10000   10000   10000   20000      14
 fk = sqrt(j+fi)              10000   20000   20000   20000   20000      18
 fk = sin(j+fi)               50000   60000   50000   60000   50000      54
 fk = sinh(j+fi)              30000   30000   30000   30000   30000      30
 fk = asin(j+fi)              30000   30000   30000   20000   30000      28
 fk = cos(j+fi)               60000   50000   60000   60000   50000      56
 fk = tan(j+fi)               80000   80000   90000   80000   80000      82

Memory Allocation (n=500)
 free(malloc(16));                0       0       0       0       0       0
 free(malloc(100));               0       0       0       0       0       0
 free(malloc(2000));              0       0       0       0       0       0

  Secs: 4.27
Quuxplusone commented 15 years ago
Here is a much smaller testcase:

int n = 50000;

int main(void) {
  int i, j;
  for (i = 1; i <= n; i++) {
    for (j = 1; j <= n; j++) {
    }
  }
  return 0;
}
Quuxplusone commented 14 years ago

Dan, was this helped by your recent patches?

Quuxplusone commented 14 years ago

The "much smaller" testcase and the original "simplest form" are now optimized.

In the "more complex" example, LLVM does not optimize the "Comparisons" tests or the sinh, asin, and tan tests.