malloc and strcpy in LLVM #235

Closed dequeb closed 6 months ago

dequeb commented 6 months ago

Hi everyone,

I'm still discovering LLIR/LLVM. I need to support global string variables in my target language. My goal is:

So I wrote the following prototype code the implement this:

func TestStrAllocGEN(*testing.T) {
    // Create a new LLVM IR module.
    m := ir.NewModule()
    // Convenience types and values.
    i32 := types.I32
    i8 := types.I8
    i8ptr := types.NewPointer(i8)

    // create link to stdlib.h
    // add string functions
    strcpy := m.NewFunc("strcpy", i8ptr, ir.NewParam("dst", i8ptr), ir.NewParam("src", i8ptr))
    puts := m.NewFunc("puts", i32, ir.NewParam("s", i8ptr))

    // memory management
    malloc := m.NewFunc("malloc", i8ptr, ir.NewParam("size", types.I32))
    free := m.NewFunc("free", types.Void, ir.NewParam("ptr", i8ptr))

    // Create a global variable of type string.
    str := m.NewGlobal("str", i8ptr)
    str.Init = constant.NewNull(i8ptr)

    // Create a new function main which returns an i32.
    main := m.NewFunc("main", types.I32)
    entry := main.NewBlock("")

    // create temporary string
    str0 := constant.NewCharArrayFromString("Hello, World!\n\x00")
    length := int64(len(str0.X))
    // allocate memory on heap
    str1 := entry.NewCall(malloc, constant.NewInt(types.I32, length))
    // copy string to allocated memory
    // gep := constant.NewGetElementPtr(str0.Typ, str0, zero, zero)
    entry.NewCall(strcpy, str1, str0)
    // store pointer to global string
    entry.NewStore(str1, str)

    // capture the pointer to the string
    str2 := entry.NewLoad(i8ptr, str)
    // call puts
    entry.NewCall(puts, str2) // verify we have the proper content

    // free memory
    entry.NewCall(free, str2)

    // Return 0 from main.
    entry.NewRet(constant.NewInt(types.I32, 0))

This generated this LLVM:

@str = global i8* null

declare i8* @strcpy(i8* %dst, i8* %src)

declare i32 @puts(i8* %s)

declare i8* @malloc(i32 %size)

declare void @free(i8* %ptr)

define i32 @main() {
    %1 = call i8* @malloc(i32 15)

    %2 = call i8* @strcpy(i8* %1, [15 x i8] c"Hello, World!\0A\00"). ; <-- error is here
    store i8* %1, i8** @str. ; <<-- from here and below, not tested yet!
    %3 = load i8*, i8** @str
    %4 = call i32 @puts(i8* %3)
    call void @free(i8* %1)
    ret i32 0

I get a segmentation fault error in the strcpy call. Could you help me figuring out what is wrong? Thank you in advance for your precious time.

dequeb commented 6 months ago

Here is my own solution (Thanks to C and clang).

for those interested, here is the revised code:

func TestStrAllocGEN(*testing.T) {
    // Create a new LLVM IR module.
    m := ir.NewModule()
    // Convenience types and values.
    i32 := types.I32
    i8 := types.I8
    i8ptr := types.NewPointer(i8)

    // create link to stdlib.h
    // add string functions
    strcpy := m.NewFunc("strcpy", i8ptr, ir.NewParam("dst", i8ptr), ir.NewParam("src", i8ptr))
    puts := m.NewFunc("puts", i32, ir.NewParam("s", i8ptr))

    // memory management
    malloc := m.NewFunc("malloc", i8ptr, ir.NewParam("size", types.I32))
    free := m.NewFunc("free", types.Void, ir.NewParam("ptr", i8ptr))

    // Create a global variable of type string.
    str := m.NewGlobal("str", i8ptr)
    str.Init = constant.NewNull(i8ptr)

    // test constant
    constantStr0 := constant.NewCharArrayFromString("Hello, World!\n\x00")
    constantStr1 := m.NewGlobalDef(".str0", constantStr0)

    // ---------------------------------------------------------
    // main ()
    // ---------------------------------------------------------
    // Create a new function main which returns an i32.
    main := m.NewFunc("main", types.I32)
    entry := main.NewBlock("")

    length := int64(len(constantStr0.X))
    // allocate heap memory for global strings
    str2 := entry.NewCall(malloc, constant.NewInt(types.I32, length))
    entry.NewStore(str2, str)

    // copy string to allocated memory
    str3 := entry.NewLoad(i8ptr, str)
    entry.NewCall(strcpy, str3, constantStr1)

    // capture the pointer to the string
    str10 := entry.NewLoad(i8ptr, str)
    // call puts
    entry.NewCall(puts, str10)

    // free memory
    entry.NewCall(free, str10)

    // Return 0 from main.
    entry.NewRet(constant.NewInt(types.I32, 0))

And the resulting LLVM:

@str = global i8* null
@.str0 = global [15 x i8] c"Hello, World!\0A\00"

declare i8* @strcpy(i8* %dst, i8* %src)

declare i32 @puts(i8* %s)

declare i8* @malloc(i32 %size)

declare void @free(i8* %ptr)

define i32 @main() {
        %1 = call i8* @malloc(i32 15)
        store i8* %1, i8** @str
        %2 = load i8*, i8** @str
        %3 = call i8* @strcpy(i8* %2, [15 x i8]* @.str0)
        %4 = load i8*, i8** @str
        %5 = call i32 @puts(i8* %4)
        call void @free(i8* %4)
        ret i32 0