ASDAlexander77 / TypeScriptCompiler

TypeScript Compiler (by LLVM)
MIT License
583 stars 28 forks source link

Library import issue #104

Open Sinfolke opened 1 month ago

Sinfolke commented 1 month ago

I'm sorry that there is already so much issues. There's another one that the compiler adds one more '_' to the functions it searches in static .lib library. I did write a bit some String class static properties on C and test them in ts. When it links it adds one more '_' (i understood it by the error message):

// ...
declare function __cfromCharsCode(numN: Reference<char>, count: i64): cst_string;
declare function __cfromCodePoint(numN: Reference<char>, count: u64): cst_string;
declare function __cadd_pointer(pointer: Reference<char>, value: int): Reference<char>;
// ...
EXPORT void* __cadd_pointer(void* ptr, int val) {
    return ptr + val;
}
EXPORT struct cst_string __cfromCharCode(const uint16_t* numN, const long long count) {
 // ...
}
EXPORT struct cst_string __cfromCodePoint(const uint32_t* numN, size_t count) {
// ...
}

as you may see there're two '__'. then compile:

C:\programming\tsc>clang -c -O3 native.c -o native.o
native.c:42:12: warning: passing 'const char *' to parameter of type 'void *' discards qualifiers
      [-Wincompatible-pointer-types-discards-qualifiers]
   42 |     memcpy(r.str, numN, r.len - 1);
      |            ^~~~~
C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Tools\MSVC\14.39.33519\include\vcruntime_string.h:44:41: note:
      passing argument to parameter '_Dst' here
   44 |     _Out_writes_bytes_all_(_Size) void* _Dst,
      |                                         ^
native.c:70:11: warning: initializing 'char *' with an expression of type 'const char *' discards qualifiers
      [-Wincompatible-pointer-types-discards-qualifiers]
   70 |     char* ptr = r.str;
      |           ^     ~~~~~
native.c:99:18: warning: passing 'const char *' to parameter of type 'void *' discards qualifiers
      [-Wincompatible-pointer-types-discards-qualifiers]
   99 |             free(r.str);
      |                  ^~~~~
C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\ucrt\corecrt_malloc.h:90:42: note: passing argument to
      parameter '_Block' here
   90 |     _Pre_maybenull_ _Post_invalid_ void* _Block
      |                                          ^
native.c:108:32: warning: passing 'const char *' to parameter of type 'void *' discards qualifiers
      [-Wincompatible-pointer-types-discards-qualifiers]
  108 |         char* newstr = realloc(r.str, r.len);
      |                                ^~~~~
C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\ucrt\corecrt_malloc.h:127:43: note: passing argument to
      parameter '_Block' here
  127 |     _Pre_maybenull_ _Post_invalid_ void*  _Block,
      |                                           ^
4 warnings generated.

C:\programming\tsc>llvm-ar rcs native.lib native.o

C:\programming\tsc>tsc.exe --lib=native --emit=exe test.ts -o test.exe
 "C:\\Program Files\\Microsoft Visual Studio\\2022\\Enterprise\\VC\\Tools\\MSVC\\14.39.33519\\bin\\Hostx64\\x64\\link.exe" -out:test.exe -defaultlib:oldnames "-libpath:C:\\Program Files\\Microsoft Visual Studio\\2022\\Enterprise\\VC\\Tools\\MSVC\\14.39.33519\\lib\\x64" "-libpath:C:\\Program Files\\Microsoft Visual Studio\\2022\\Enterprise\\VC\\Tools\\MSVC\\14.39.33519\\atlmfc\\lib\\x64" "-libpath:C:\\Program Files (x86)\\Windows Kits\\10\\Lib\\10.0.22621.0\\ucrt\\x64" "-libpath:C:\\Program Files (x86)\\Windows Kits\\10\\Lib\\10.0.22621.0\\um\\x64" -nologo "C:\\Users\\Admin\\AppData\\Local\\Temp\\test-791fce.obj" native.lib user32.lib msvcrtd.lib gcmt-lib.lib TypeScriptAsyncRuntime.lib LLVMSupport.lib
msvcrtd.lib(initializers.obj) : warning LNK4098: defaultlib 'msvcrt.lib' conflicts with use of other libs; use /NODEFAULTLIB:library
test-791fce.obj : error LNK2019: unresolved external symbol ___cadd_pointer referenced in function cst_string_to_string
test-791fce.obj : error LNK2019: unresolved external symbol ___cfromCharCode referenced in function String.fromCharCode
test-791fce.obj : error LNK2019: unresolved external symbol ___cfromCodePoint referenced in function String.fromCodePoint
test.exe : fatal error LNK1120: 3 unresolved externals
tsc: error: linker command failed with exit code 1120 (use -v to see invocation)

require 3 '___'. But if i use 3 '___' in C code and 2 '___' in ts it works
Sinfolke commented 1 month ago

i may send entire code

#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>

#ifdef _WIN32
    #define EXPORT __declspec(dllexport)
#else
    #define EXPORT __attribute__((visibility("default")))
#endif

/**e
 * @brief If the size we allocated differs too much with the actual size of the string it calls realloc to decrease at the actual size
 * For now it is defined to be 32 (16 utf16 characters, or 32 ascii characters)
 */
#define MIN_REALLOC_REQUEST_SIZE 32
enum {
    AllocError,
    RangeError
};
struct cst_string {
    size_t len;
    const char* str;
    int err;
    // err can be removed out, errors can be still handled into those to variables for optimization
    // but choose it as it makes easer to understand what's going on
};
EXPORT void* ___cadd_pointer(void* ptr, int val) {
    return ptr + val;
}
// ***  String class methods    ***
EXPORT struct cst_string ___cfromCharCode(const uint16_t* numN, const long long count) {
    printf("cfromCodePoint is executed (beginning)\n");
    printf("numN = %p\ncount = %lld\n", numN, count);
    struct cst_string r = {0, NULL};
    if (count <= 0) return r;
    printf("Doing allocation\n");
    r.len = count * sizeof(uint16_t);
    r.str = malloc(r.len);
    if (!r.str) return r;
    printf("Doing memcpy\n");
    memcpy(r.str, numN, r.len - 1);
    return r;
}
// code produced by charGpt (bits management in the loop)
// however seems it processes well 
EXPORT struct cst_string ___cfromCodePoint(const uint32_t* numN, size_t count) {
    // assume maximum length
    struct cst_string r = {count*4, NULL, 0};
    r.str = malloc(r.len);
    if (!r.str) {
        r.err = AllocError;
        return r;
    }

    // iterate through string and codePoint

    char* ptr = r.str;
    size_t offset = 0; // variable defines how many bytes may be deallocated
    for (size_t i = 0; i < count; ++i) {
        uint32_t codePoint = numN[i];
        if (codePoint <= 0xFFFF) {
            // BMP character
            // it consumes 2 bytes
            // so we're adding to offset other 2 bytes
            offset += 2;
            *ptr++ = (codePoint >> 8) & 0xFF; // High byte
            *ptr++ = codePoint & 0xFF;        // Low byte
            // remove from the size unnecessary bytes
        } else if (codePoint <= 0x10FFFF) {
            // Supplementary character
            // it consumes 4 bytes
            codePoint -= 0x10000;
            uint16_t highSurrogate = (codePoint >> 10) + 0xD800;
            uint16_t lowSurrogate = (codePoint & 0x3FF) + 0xDC00;
            *ptr++ = (highSurrogate >> 8) & 0xFF; // High surrogate high byte
            *ptr++ = highSurrogate & 0xFF;        // High surrogate low byte
            *ptr++ = (lowSurrogate >> 8) & 0xFF;  // Low surrogate high byte
            *ptr++ = lowSurrogate & 0xFF;         // Low surrogate low byte
        } else {
            // interact with the TS to throw RangeError
            // len is set to the codePoint (which must be in error message)
            // err is set to their error code
            // for str is called free. Therefore no need to worry for deallocation
            r.len = codePoint;
            r.err = RangeError;
            free(r.str);
            return r;
        }
    }
    r.len-=offset;
    // if the offset it too big we request realloc to reduce the size
    // if not we remain some extra bytes allocated.
    // They'll be automatically removed out when realloc or free is called
    if (offset >= MIN_REALLOC_REQUEST_SIZE) {
        char* newstr = realloc(r.str, r.len);
        if (!newstr)
            // unhandled realloc failure
            return r;
        r.str = newstr;
    }
    return r;
}
// declare type Reference<T> = T;
// declare function ReferenceOf(obj: any): any;
// declare function LoadReference<T>(obj: T) : T;
// a type used in C code to represent string in efficient way
// err variable often used to handle errors when allocating etc.
declare type cst_string = {
    len: u64;
    str: Reference<char>;
    err: int;
};

// memory management functions
// declare function malloc(size: long): Reference<char>;
// declare function free<T>(pointer: Reference<T>): void;
// declare function exit(code: int): void;

// Internal functions from native.c

declare function __cfromCharCode(numN: Reference<u16>, count: i64): cst_string;
declare function __cfromCodePoint(numN: Reference<u32>, count: u64): cst_string;
declare function __cadd_pointer(pointer: Reference<char>, value: int): Reference<char>;

// should convert the cst_string (which is used by the internal modules) into a string class
function cst_string_to_string(str: cst_string): string {
    // for now very unoptimized convertion. But the only way i found out
    let s: string = "";
    for (let i: int = 0; i < str.len; ++i) {
        str.str = __cadd_pointer(str.str, 1);
        s += LoadReference(str.str);
    }
    return s;
}
class String {
    static fromCharCode(...numN: u16[]): string {
        log("fromCharsCode executed");
        log(numN.length);
        let res: cst_string = __cfromCharCode(ReferenceOf(numN), numN.length);
        log("__cfromCharsCode executed");
        // call convertion to a regular string
        return cst_string_to_string(res);
    }
    static fromCodePoint(...numN: u32[]): string {
        let res: cst_string = __cfromCodePoint(ReferenceOf(numN), numN.length);
        if (res.err == 1) {
            // we do not have error class for now
            // use just output
            // throw Error("Allocation error");
            print("Allocation Error");
        } else if (res.err == 2) {
            // res.len represents the character value
            // res.str has been deallocated automatically once the error cocured in the internal function
            // throw RangeError(`Invalid code point ${res.len}`); // error message from the original js function
            print(`Invalid code point ${res.len}`);
        }
        // call convertion to a regular string
        return cst_string_to_string(res);
    }
    // fully written by the chatGpt
    static raw(strings: TemplateStringsArray, ...values: any[]): string {
        let result = '';
        for (let i = 0; i < strings.length; i++) {
            result += strings[i];
            if (i < values.length) {
                result += values[i];
            }
        }
        return result;
    }
}

function main() {
    log(String.fromCharCode(189, 293, 239, 901));
}
ASDAlexander77 commented 1 month ago

I'm sorry that there is already so much issues. There's another one that the compiler adds one more '' to the functions it searches in static .lib library. I did write a bit some String class static properties on C and test them in ts. When it links it adds one more '' (i understood it by the error message):

yes, this is TypeScript parser add one more '' . I don't know why they do it, try to avoid using "" at the beginning. BTW for examples how to implement some stuff like "Strings" etc. have a look at https://github.com/ASDAlexander77/TypeScriptCompilerDefaultLib you may find some good examples there

ASDAlexander77 commented 1 month ago

probably when casting numN.length into i64 something is not converted right. Try to use i32 or u32 in for size ___cfromCharCode.

I remember there were issue with i64 but I am not sure if it is fixed in your release version

On Fri, 26 Jul 2024 at 16:31, Yuriy @.***> wrote:

in my implementation printf shows that the length has a large value (4294966655) but must had '4'. Therefore the allocation is done for several (around 3-4) seconds and then it quits out on memcpy function. I tried many integer types to input into cfromCharCode, the input is often also '0'

— Reply to this email directly, view it on GitHub https://github.com/ASDAlexander77/TypeScriptCompiler/issues/104#issuecomment-2253006453, or unsubscribe https://github.com/notifications/unsubscribe-auth/ABRYRHFB3AU2VRIG6REITF3ZOJTTJAVCNFSM6AAAAABLOVIMD2VHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDENJTGAYDMNBVGM . You are receiving this because you commented.Message ID: @.***>

Sinfolke commented 1 month ago

I have currently the following implementaiton. fromCharCode function works if use wprintf (since it converts into utf-16 characters). But it still incorrectly outputs utf-16 specific characters (outputs normally only regular characters.). But the convertion from sequence of characters to string is done correctly. fromCodePoint seems has some issues during bits manipulations in C (it seems access a memory it shouldn't) I tryed to very optimise it and done a stack allocation depend on size, or an efficient heap allocation.

Such large examples also shows how actually compiler works.

String.ts:

type pointer_type<T> = string | Array<T> | Reference<T>;
type c_fromCodePoint_ret = {
    memory: string;
    offset: int;
    e: int;
}
let __stack__: string[] = [];

class bad_alloc {
   public name: string = "bad_alloc";
   public message: string = "allocation error";
   public cause: string = "Usually it occurs when no heap memory enough to allocate bytes. You should ensure you're allocating valid memory size and/or close unnecessary apps to free up RAM. Sometimes just restart the program";
   public stack?: string;
   constructor(message?: string) {
     if (message) this.message = message;
   }
}
class RangeError {
    public name: string = "RangeError";
    public message: string;
    public cause?: string;
    constructor(message: string, cause?: string) {
        this.message = message;
        if (cause) this.cause = cause;
    }
}
// consider redefinition if it is malloc
declare function _malloc(size: u64): string;
declare function _realloc(src: Opaque, newsize: int): string;
declare function free<T>(mem: T): void;
declare function memcpy(dest: Opaque, src: Opaque, bytes: int): Opaque;
declare function _memcpy(dest: Opaque, src: Opaque, bytes: int): Opaque;
declare function memmove(dest: Opaque, src: Opaque, bytes: int): Opaque;
declare function _alloc_and_strncpy(src: Opaque, bytes: u64): Opaque;
declare function wprintf(str: string, ...args: string[]): int;
declare function printf(str: string, ...args: string[]): int;
declare function _cfromCodePoint(numN: u32[], count: int, str: string): c_fromCodePoint_ret;
declare function _cfromCodePoint_stack(numN: u32, count: int): c_fromCodePoint_ret;
function mmalloc(size: u64): string {
   __stack__.push("mmalloc");
   let src: string  = _malloc(size);
   if (src == null)
      throw new bad_alloc();
   __stack__.pop();
   return src;
}

static class String {
   private MIN_REALLOC_REQUEST_SIZE: int = 32;
   private MIN_HEAP_ALLOC_REQUEST_SIZE: int = 500 / 4;
   fromCharCode(...numN: u16[]): string {
    if (numN.length == 0) return "";
    __stack__.push("class String");
    __stack__.push("fromCharCode");
    let str: string = _alloc_and_strncpy(numN, numN.length);
    __stack__.pop();
    __stack__.pop();
    return str;
   }
   fromCodePoint(...numN: u32[]): string {
    __stack__.push("class String");
    __stack__.push("fromCodePoint");
    let len = numN.length;
    print("_cfromCodePoint");
    let res: c_fromCodePoint_ret;
    if (len >= this.MIN_HEAP_ALLOC_REQUEST_SIZE) {
        len = len * 4 + 1;
        res = _cfromCodePoint(numN, numN.length, mmalloc(len));
        if (res.e) {
            free(res.memory);
            // code point assigned to offset property on error
            throw RangeError(`Invalid code point ${res.offset}`);
        }
        len -= res.offset;
        // if the offset is too big we request realloc to reduce the size
        // if not we remain some extra bytes allocated.
        // They'll be automatically removed out when realloc or free is called
        if (res.offset >= this.MIN_REALLOC_REQUEST_SIZE) {
            print(`doing realloc because offset ${res.offset} >= ${this.MIN_REALLOC_REQUEST_SIZE}`);
            print(`realloc(str, ${len})`);
            let newstr: string = _realloc(res.memory, len);
            if (newstr != null)
                res.memory = newstr;
        } else {
            print("realloc skipped due little offset: ", res.offset);
        }
    } else {
        // do stack optimization
        res = _cfromCodePoint_stack(numN, len);
        // code point assigned to offset property on error
        if (res.e) throw RangeError(`Invalid code point ${res.offset}`);
        len = res.offset;
    }
    __stack__.pop();
    __stack__.pop();
    return res.memory;
   }
}

function main() {
    __stack__.push("function main");
        wprintf(String.fromCodePoint(0x404, 65, 66, 67, 68,  69, 90));
        printf("\nHello, %p\n");
    __stack__.pop();
}

native.c:

#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <locale.h>
extern void* mmalloc(size_t size);
#ifdef _WIN32
    #define EXPORT __declspec(dllexport)
#else
    #define EXPORT __attribute__((visibility("default")))
#endif
struct c_fromCodePoint_ret {
    char* memory;
    int offset, e;
};
EXPORT void* _malloc(size_t size) {
   printf("_malloc internal: size = %d\n", size);
   void* p = malloc(size);
   printf("_malloc internal result: %p\n", p);
   return p;
}
EXPORT void* _realloc(void* _Block, size_t size) {
    printf("_realloc internal: __Block = %p, size = %llu\n", _Block, size);
    void* p = realloc(_Block,size);
    printf("_realloc internal result: %p\n", p);
    return p;
}
EXPORT void* _memcpy( void* dest, const void* src, size_t count ) {
    printf("_memcpy internal: dest = %p, src = %p, count = %llu\n", dest, src, count);
    char* p = memcpy(dest, src, count);
    p[count + 1] = '\0';
    printf("_memcpy internal result: dest = %s\n", p);
    return p;
}

EXPORT void* _alloc_and_strncpy(const wchar_t* src, size_t count) {
    printf("mmalloc(%llu) ", count + 2);
    wchar_t* dest = mmalloc(count + 2);
    memcpy(dest, src, count + 2);
    wprintf(L"dest: %ls\n", dest);
    return dest;
}
/**
 * @brief Called directly when the allocation is considered to be onto heap
 * 
 * @param numN 
 * @param count 
 * @param Str 
 * @return EXPORT struct 
 */
EXPORT struct c_fromCodePoint_ret _cfromCodePoint(const uint32_t* numN, int count, char* Str) {
    printf("Begin\n");
    // iterate through string and codePoint
    struct c_fromCodePoint_ret result = {Str, 0, 0};
    for (size_t i = 0; i < count; ++i) {
        uint32_t codePoint = numN[i];
        printf("codePoint = numN[%llu]: %u\nDoint byte manipulations\n", i, codePoint);
        if (codePoint <= 0xFFFF) {
            printf("BMP char\n");
            // BMP character
            // it consumes 2 bytes
            // so we're adding to offset other 2 bytes
            result.offset += 2;
            printf("high byte\n");
            *Str++ = (codePoint >> 8) & 0xFF; // High byte
            printf("Low byte\n");
            *Str++ = codePoint & 0xFF;        // Low byte
        } else if (codePoint <= 0x10FFFF) {
            printf("Supplementary char\n");
            // Supplementary character
            // it consumes 4 bytes
            codePoint -= 0x10000;
            uint16_t highSurrogate = (codePoint >> 10) + 0xD800;
            uint16_t lowSurrogate = (codePoint & 0x3FF) + 0xDC00;
            *Str++ = (highSurrogate >> 8) & 0xFF; // High surrogate high byte
            *Str++ = highSurrogate & 0xFF;        // High surrogate low byte
            *Str++ = (lowSurrogate >> 8) & 0xFF;  // Low surrogate high byte
            *Str++ = lowSurrogate & 0xFF;         // Low surrogate low byte
        } else {
            printf("Error occurred\n");
            result.offset = codePoint;
            result.e = 1;
            return result;
        }
    }
    return result;
}
/**
 * @brief allocate the memory first onto stack and copy onto heap with the exact size
 * 
 * @param numN 
 * @param count 
 * @return EXPORT struct 
 */
EXPORT struct c_fromCodePoint_ret _cfromCodePoint_stack(const uint32_t* numN, int count) {
    int maxCount = count * 4 + 1;
    char stack[maxCount];
    struct c_fromCodePoint_ret res = _cfromCodePoint(numN, count, stack);
    if (!res.e) {
        // an attemption to call up ts function
        res.offset = maxCount - res.offset; // mark up new length to offset variable
        char* heap = mmalloc(res.offset);
        memcpy(heap, stack, res.offset);
        // null terminate the string if required
        heap[res.offset] = '\0';
        res.memory = heap;
    }
    return res;
}
ASDAlexander77 commented 1 month ago

u need to be careful when u use "Union" types the code

let src: string  | 0 = _malloc(size);
   if (src == 0)
      throw new bad_alloc();

should be written like that:

let src: string  | 0 = _malloc(size);
   if (typeof src == "i32") if (src == 0)
      throw new bad_alloc();

first if will ensure that you have type "int" not "string" as union type contains and 0 (which is i32) and string which is pointer in your case it would be better to use "string | null" but string is "null" anyway

so I would use

let src: string  = _malloc(size);
   if (src == null)
      throw new bad_alloc();

to remove usage of union type

ASDAlexander77 commented 1 month ago
type pointer_type = string | Array<any> | Reference<any>;

Array type is not "pointer", it is struct { int size; any[] data } so using "Opaque" which is void* would be better. and Array<int> not the same as Array<any> which means it will not be a part of "pointer_type" more correct would be type pointer_type<T> = string | Array<T> | Reference<T>; and free should be free<T> in that case

Sinfolke commented 1 month ago

Thank you a lot, i will fix that