minio / c2goasm

C to Go Assembly
Apache License 2.0
1.31k stars 110 forks source link

go BLAS #17

Open zmonoid opened 5 years ago

zmonoid commented 5 years ago

Hi,

I am trying to convert openblas to go with this project.

A simple C code:

// degmm.c
#include <cblas.h>

void degmm()
{
  int i=0;
  double A[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
  double B[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
  double C[9] = {.5,.5,.5,.5,.5,.5,.5,.5,.5};
  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,3,3,2,1,A, 3, B, 3,2,C,3);
}

int main () {
    degmm();
}

Use the following command:

clang-6.0 -O3 -mavx -mfma -masm=intel -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -S degmm.c

Will give the following asm code degmm.s:

    .text
    .intel_syntax noprefix
    .file   "degmm.c"
    .section    .rodata.cst32,"aM",@progbits,32
    .p2align    5               # -- Begin function degmm
.LCPI0_0:
    .quad   4607182418800017408     # double 1
    .quad   4611686018427387904     # double 2
    .quad   4607182418800017408     # double 1
    .quad   -4609434218613702656    # double -3
    .section    .rodata.cst8,"aM",@progbits,8
    .p2align    3
.LCPI0_1:
    .quad   4607182418800017408     # double 1
.LCPI0_2:
    .quad   4611686018427387904     # double 2
    .text
    .globl  degmm
    .p2align    4, 0x90
    .type   degmm,@function
degmm:                                  # @degmm
# %bb.0:
    sub rsp, 168
    vmovaps ymm0, ymmword ptr [rip + .LCPI0_0] # ymm0 = [1.000000e+00,2.000000e+00,1.000000e+00,-3.000000e+00]
    vmovups ymmword ptr [rsp + 48], ymm0
    movabs  rax, 4616189618054758400
    mov qword ptr [rsp + 80], rax
    movabs  rcx, -4616189618054758400
    mov qword ptr [rsp + 88], rcx
    vmovups ymmword ptr [rsp], ymm0
    mov qword ptr [rsp + 32], rax
    mov qword ptr [rsp + 40], rcx
    mov rax, qword ptr [rip + .Ldegmm.C+64]
    mov qword ptr [rsp + 160], rax
    vmovups ymm0, ymmword ptr [rip + .Ldegmm.C+32]
    vmovups ymmword ptr [rsp + 128], ymm0
    vmovups ymm0, ymmword ptr [rip + .Ldegmm.C]
    vmovups ymmword ptr [rsp + 96], ymm0
    lea rax, [rsp + 96]
    mov r10, rsp
    lea r11, [rsp + 48]
    vmovsd  xmm0, qword ptr [rip + .LCPI0_1] # xmm0 = mem[0],zero
    vmovsd  xmm1, qword ptr [rip + .LCPI0_2] # xmm1 = mem[0],zero
    mov edi, 102
    mov esi, 111
    mov edx, 112
    mov ecx, 3
    mov r8d, 3
    mov r9d, 2
    push    3
    push    rax
    push    3
    push    r10
    push    3
    push    r11
    vzeroupper
    call    cblas_dgemm
    add rsp, 216
    ret
.Lfunc_end0:
    .size   degmm, .Lfunc_end0-degmm
                                        # -- End function
    .section    .rodata.cst32,"aM",@progbits,32
    .p2align    5               # -- Begin function main
.LCPI1_0:
    .quad   4607182418800017408     # double 1
    .quad   4611686018427387904     # double 2
    .quad   4607182418800017408     # double 1
    .quad   -4609434218613702656    # double -3
    .section    .rodata.cst8,"aM",@progbits,8
    .p2align    3
.LCPI1_1:
    .quad   4607182418800017408     # double 1
.LCPI1_2:
    .quad   4611686018427387904     # double 2
    .text
    .globl  main
    .p2align    4, 0x90
    .type   main,@function
main:                                   # @main
# %bb.0:
    sub rsp, 168
    vmovaps ymm0, ymmword ptr [rip + .LCPI1_0] # ymm0 = [1.000000e+00,2.000000e+00,1.000000e+00,-3.000000e+00]
    vmovups ymmword ptr [rsp + 48], ymm0
    movabs  rax, 4616189618054758400
    mov qword ptr [rsp + 80], rax
    movabs  rcx, -4616189618054758400
    mov qword ptr [rsp + 88], rcx
    vmovups ymmword ptr [rsp], ymm0
    mov qword ptr [rsp + 32], rax
    mov qword ptr [rsp + 40], rcx
    mov rax, qword ptr [rip + .Ldegmm.C+64]
    mov qword ptr [rsp + 160], rax
    vmovups ymm0, ymmword ptr [rip + .Ldegmm.C+32]
    vmovups ymmword ptr [rsp + 128], ymm0
    vmovups ymm0, ymmword ptr [rip + .Ldegmm.C]
    vmovups ymmword ptr [rsp + 96], ymm0
    lea rax, [rsp + 96]
    mov r10, rsp
    lea r11, [rsp + 48]
    vmovsd  xmm0, qword ptr [rip + .LCPI1_1] # xmm0 = mem[0],zero
    vmovsd  xmm1, qword ptr [rip + .LCPI1_2] # xmm1 = mem[0],zero
    mov edi, 102
    mov esi, 111
    mov edx, 112
    mov ecx, 3
    mov r8d, 3
    mov r9d, 2
    push    3
    push    rax
    push    3
    push    r10
    push    3
    push    r11
    vzeroupper
    call    cblas_dgemm
    add rsp, 48
    xor eax, eax
    add rsp, 168
    ret
.Lfunc_end1:
    .size   main, .Lfunc_end1-main
                                        # -- End function
    .type   .Ldegmm.C,@object       # @degmm.C
    .section    .rodata,"a",@progbits
    .p2align    4
.Ldegmm.C:
    .quad   4602678819172646912     # double 0.5
    .quad   4602678819172646912     # double 0.5
    .quad   4602678819172646912     # double 0.5
    .quad   4602678819172646912     # double 0.5
    .quad   4602678819172646912     # double 0.5
    .quad   4602678819172646912     # double 0.5
    .quad   4602678819172646912     # double 0.5
    .quad   4602678819172646912     # double 0.5
    .quad   4602678819172646912     # double 0.5
    .size   .Ldegmm.C, 72

    .ident  "clang version 6.0.0-1ubuntu2 (tags/RELEASE_600/final)"
    .section    ".note.GNU-stack","",@progbits

Then converting to go asm:

c2goasm -a degmm.s Degmm.s

Report the following error:

Processing cpp/degmm.s
panic: 'sub rsp' found but in unexpected scenario:      sub     rsp, 168

goroutine 1 [running]:
main.(*Epilogue).isPrologueInstruction(0xc0000575b8, 0xc0000171a0, 0xd, 0xd)
        /home/bzhou/go/src/github.com/minio/c2goasm/epilogue.go:205 +0x3d4
main.getPrologueLines(0xc0000d0150, 0x26, 0xeb, 0xc0000575b8, 0x0)
        /home/bzhou/go/src/github.com/minio/c2goasm/subroutine.go:231 +0xb4
main.(*Subroutine).removePrologueLines(0xc000057590, 0xc0000d0000, 0x8d, 0x100, 0x15, 0x3b)
        /home/bzhou/go/src/github.com/minio/c2goasm/subroutine.go:134 +0x87
main.extractSubroutine(0x3a, 0xc0000d0000, 0x8d, 0x100, 0x11, 0xc000017188, 0x5, 0x14, 0x0, 0x0, ...)
        /home/bzhou/go/src/github.com/minio/c2goasm/subroutine.go:127 +0x2cd
main.segmentSource(0xc0000d0000, 0x8d, 0x100, 0x20, 0xf, 0xc)
        /home/bzhou/go/src/github.com/minio/c2goasm/subroutine.go:85 +0x20e
main.process(0xc0000d0000, 0x8d, 0x100, 0xc000017058, 0x8, 0x0, 0x0, 0x0, 0x0, 0x0)
        /home/bzhou/go/src/github.com/minio/c2goasm/c2goasm.go:78 +0x80
main.main()
        /home/bzhou/go/src/github.com/minio/c2goasm/c2goasm.go:264 +0x37e

Any suggestion?

fwessels commented 5 years ago

Maybe experiment with -mno-red-zone and/or -mstackrealign options?

zmonoid commented 5 years ago

@fwessels Still the same error. I guess it may be the compiler problem. The ASM code I generated is different from the one given in example. May I check your compiler version?