hikettei / Caten

[wip] Deep Learning Compiler based on Polyhedral Compiler and Light-weight IRs based on Optimizing Pattern Matcher
https://hikettei.github.io/Caten/
Other
15 stars 1 forks source link

Implementing Polyhedral Compiler #6

Closed hikettei closed 1 month ago

hikettei commented 2 months ago

Reading

Workload

hikettei commented 2 months ago

Padding in a single kernel :v:

(jit (caten (!cos (!sin (!padding (make-tensor `(10 10) :initial-element 2.0) `((2 2) (2 2)) :value 0.0)))) :debug 4)

#include <math.h>
void main700672(float* val_0, float* val_3, float* val_10, float val_8);
void main700672(float* val_0, float* val_3, float* val_10, float val_8) {
  val_8 = 1.5707964;
  val_10[(0)] = val_8;
  for(int c0=0;(c0<=13);c0+=1) {
    for(int c1=0;(c1<=13);c1+=1) {
      val_0[14*(c0)+(c1)] = 0.0;
      if ((c0==2)&&(c1==2)) {
      }
      if ((((c0>=2)&&(c0<=11))&&(c1>=2))&&(c1<=11)) {
        val_3[10*((c0-2))+((c1-2))] = 2.0;
        val_0[14*((c0-2)+2)+((c1-2)+2)] = val_3[10*((c0-2))+((c1-2))];
      }
      val_0[14*(c0)+(c1)] = sin(val_0[14*(c0)+(c1)]);
      val_0[14*(c0)+(c1)] = val_0[14*(c0)+(c1)]+val_10[0+0];
      val_0[14*(c0)+(c1)] = sin(val_0[14*(c0)+(c1)]);
    }
  }
}

as well as the composed gemm:

CATEN-USER> (jit (caten (!matmul (make-tensor `(128 32)) (!matmul (make-tensor `(32 64)) (make-tensor `(64 128))))) :debug 1)
Compiled:

#include <math.h>
void main232208(float* val_29, float* val_21, float* val_13, float* val_5, float* val_9);
void main232208(float* val_29, float* val_21, float* val_13, float* val_5, float* val_9) {
  for(int c0=0;(c0<=31);c0+=1) {
    for(int c1=0;(c1<=127);c1+=1) {
      val_13[128*(c0)+(c1)+(0)] = 0.0;
      for(int c2=0;(c2<=63);c2+=1) {
        val_13[128*(c0)+(c1)+0] += val_5[64*(c0)+0+(c2)] * val_9[0+64*(c1)+(c2)];
      }
    }
  }
  for(int c0=0;(c0<=127);c0+=1) {
    for(int c1=0;(c1<=127);c1+=1) {
      val_29[128*(c0)+(c1)+(0)] = 0.0;
      for(int c2=0;(c2<=31);c2+=1) {
        val_29[128*(c0)+(c1)+0] += val_21[32*(c0)+0+(c2)] * val_13[0+32*(c1)+(c2)];
      }
    }
  }
}
hikettei commented 2 months ago

Axpy

CATEN-USER> (jit (caten (!add (!view (make-tensor `(n)) `(froma toa bya)) (!view (make-tensor `(n)) `(fromb tob byb)))) :debug 1)
Compiled:

#include <math.h>
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
void main7637355(int n, int fromb, int tob, int byb, int toa, int froma, int bya, float* val_39, float* val_28);
void main7637355(int n, int fromb, int tob, int byb, int toa, int froma, int bya, float* val_39, float* val_28) {
  for(int c0=0;(c0<tob);c0+=1) {
    val_39[bya*(c0+froma)] = val_39[bya*(c0+froma)]+val_28[byb*(c0+fromb)];
  }
}
hikettei commented 2 months ago

Softmax

TEST> (caten (!softmax (make-tensor `(3 3) :initial-element 1.0)))
Compiled:

#include <math.h>
#include <stdint.h>
#define boolean _Bool
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
/*
Arrays:
  - val_6[float32]: (3 1)
  - val_0[float32]: (3 3)
*/
void main1599824(float* val_6, float* val_0);
void main1599824(float* val_6, float* val_0) {
  float val_2;
  for(int c0=0;(c0<=2);c0+=1) {
    val_6[(c0)+(0)] = 0.0;
    for(int c1=0;(c1<=2);c1+=1) {
      val_2 = 1.442695;
      val_0[3*(c0)+(c1)] = 1.0;
      val_0[3*(c0)+(c1)] = val_0[3*(c0)+(c1)]*val_2;
      val_0[3*(c0)+(c1)] = exp2(val_0[3*(c0)+(c1)]);
      val_6[(c0)+0] += val_0[3*(c0)+(c1)];
    }
    val_6[(c0)+0] = 1.0 / (val_6[(c0)+0]);
    for(int c1=2;(c1<=4);c1+=1) {
      val_0[3*(c0)+((c1-2))] = val_0[3*(c0)+((c1-2))]*val_6[(c0)+0];
    }
  }
}
hikettei commented 2 months ago

最適化に手をつける前に多分コンパイルしたいIRを取得しておくのと,予めUnittest書いた方が便利だと思うのでこっち優先

hikettei commented 1 month ago

getting sophisticated