Open Sinfolke opened 1 month ago
i may send entire code
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#ifdef _WIN32
#define EXPORT __declspec(dllexport)
#else
#define EXPORT __attribute__((visibility("default")))
#endif
/**e
* @brief If the size we allocated differs too much with the actual size of the string it calls realloc to decrease at the actual size
* For now it is defined to be 32 (16 utf16 characters, or 32 ascii characters)
*/
#define MIN_REALLOC_REQUEST_SIZE 32
enum {
AllocError,
RangeError
};
struct cst_string {
size_t len;
const char* str;
int err;
// err can be removed out, errors can be still handled into those to variables for optimization
// but choose it as it makes easer to understand what's going on
};
EXPORT void* ___cadd_pointer(void* ptr, int val) {
return ptr + val;
}
// *** String class methods ***
EXPORT struct cst_string ___cfromCharCode(const uint16_t* numN, const long long count) {
printf("cfromCodePoint is executed (beginning)\n");
printf("numN = %p\ncount = %lld\n", numN, count);
struct cst_string r = {0, NULL};
if (count <= 0) return r;
printf("Doing allocation\n");
r.len = count * sizeof(uint16_t);
r.str = malloc(r.len);
if (!r.str) return r;
printf("Doing memcpy\n");
memcpy(r.str, numN, r.len - 1);
return r;
}
// code produced by charGpt (bits management in the loop)
// however seems it processes well
EXPORT struct cst_string ___cfromCodePoint(const uint32_t* numN, size_t count) {
// assume maximum length
struct cst_string r = {count*4, NULL, 0};
r.str = malloc(r.len);
if (!r.str) {
r.err = AllocError;
return r;
}
// iterate through string and codePoint
char* ptr = r.str;
size_t offset = 0; // variable defines how many bytes may be deallocated
for (size_t i = 0; i < count; ++i) {
uint32_t codePoint = numN[i];
if (codePoint <= 0xFFFF) {
// BMP character
// it consumes 2 bytes
// so we're adding to offset other 2 bytes
offset += 2;
*ptr++ = (codePoint >> 8) & 0xFF; // High byte
*ptr++ = codePoint & 0xFF; // Low byte
// remove from the size unnecessary bytes
} else if (codePoint <= 0x10FFFF) {
// Supplementary character
// it consumes 4 bytes
codePoint -= 0x10000;
uint16_t highSurrogate = (codePoint >> 10) + 0xD800;
uint16_t lowSurrogate = (codePoint & 0x3FF) + 0xDC00;
*ptr++ = (highSurrogate >> 8) & 0xFF; // High surrogate high byte
*ptr++ = highSurrogate & 0xFF; // High surrogate low byte
*ptr++ = (lowSurrogate >> 8) & 0xFF; // Low surrogate high byte
*ptr++ = lowSurrogate & 0xFF; // Low surrogate low byte
} else {
// interact with the TS to throw RangeError
// len is set to the codePoint (which must be in error message)
// err is set to their error code
// for str is called free. Therefore no need to worry for deallocation
r.len = codePoint;
r.err = RangeError;
free(r.str);
return r;
}
}
r.len-=offset;
// if the offset it too big we request realloc to reduce the size
// if not we remain some extra bytes allocated.
// They'll be automatically removed out when realloc or free is called
if (offset >= MIN_REALLOC_REQUEST_SIZE) {
char* newstr = realloc(r.str, r.len);
if (!newstr)
// unhandled realloc failure
return r;
r.str = newstr;
}
return r;
}
// declare type Reference<T> = T;
// declare function ReferenceOf(obj: any): any;
// declare function LoadReference<T>(obj: T) : T;
// a type used in C code to represent string in efficient way
// err variable often used to handle errors when allocating etc.
declare type cst_string = {
len: u64;
str: Reference<char>;
err: int;
};
// memory management functions
// declare function malloc(size: long): Reference<char>;
// declare function free<T>(pointer: Reference<T>): void;
// declare function exit(code: int): void;
// Internal functions from native.c
declare function __cfromCharCode(numN: Reference<u16>, count: i64): cst_string;
declare function __cfromCodePoint(numN: Reference<u32>, count: u64): cst_string;
declare function __cadd_pointer(pointer: Reference<char>, value: int): Reference<char>;
// should convert the cst_string (which is used by the internal modules) into a string class
function cst_string_to_string(str: cst_string): string {
// for now very unoptimized convertion. But the only way i found out
let s: string = "";
for (let i: int = 0; i < str.len; ++i) {
str.str = __cadd_pointer(str.str, 1);
s += LoadReference(str.str);
}
return s;
}
class String {
static fromCharCode(...numN: u16[]): string {
log("fromCharsCode executed");
log(numN.length);
let res: cst_string = __cfromCharCode(ReferenceOf(numN), numN.length);
log("__cfromCharsCode executed");
// call convertion to a regular string
return cst_string_to_string(res);
}
static fromCodePoint(...numN: u32[]): string {
let res: cst_string = __cfromCodePoint(ReferenceOf(numN), numN.length);
if (res.err == 1) {
// we do not have error class for now
// use just output
// throw Error("Allocation error");
print("Allocation Error");
} else if (res.err == 2) {
// res.len represents the character value
// res.str has been deallocated automatically once the error cocured in the internal function
// throw RangeError(`Invalid code point ${res.len}`); // error message from the original js function
print(`Invalid code point ${res.len}`);
}
// call convertion to a regular string
return cst_string_to_string(res);
}
// fully written by the chatGpt
static raw(strings: TemplateStringsArray, ...values: any[]): string {
let result = '';
for (let i = 0; i < strings.length; i++) {
result += strings[i];
if (i < values.length) {
result += values[i];
}
}
return result;
}
}
function main() {
log(String.fromCharCode(189, 293, 239, 901));
}
I'm sorry that there is already so much issues. There's another one that the compiler adds one more '' to the functions it searches in static .lib library. I did write a bit some String class static properties on C and test them in ts. When it links it adds one more '' (i understood it by the error message):
yes, this is TypeScript parser add one more '' . I don't know why they do it, try to avoid using "" at the beginning. BTW for examples how to implement some stuff like "Strings" etc. have a look at https://github.com/ASDAlexander77/TypeScriptCompilerDefaultLib you may find some good examples there
probably when casting numN.length into i64 something is not converted right. Try to use i32 or u32 in for size ___cfromCharCode.
I remember there were issue with i64 but I am not sure if it is fixed in your release version
On Fri, 26 Jul 2024 at 16:31, Yuriy @.***> wrote:
in my implementation printf shows that the length has a large value (4294966655) but must had '4'. Therefore the allocation is done for several (around 3-4) seconds and then it quits out on memcpy function. I tried many integer types to input into cfromCharCode, the input is often also '0'
— Reply to this email directly, view it on GitHub https://github.com/ASDAlexander77/TypeScriptCompiler/issues/104#issuecomment-2253006453, or unsubscribe https://github.com/notifications/unsubscribe-auth/ABRYRHFB3AU2VRIG6REITF3ZOJTTJAVCNFSM6AAAAABLOVIMD2VHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDENJTGAYDMNBVGM . You are receiving this because you commented.Message ID: @.***>
I have currently the following implementaiton. fromCharCode function works if use wprintf (since it converts into utf-16 characters). But it still incorrectly outputs utf-16 specific characters (outputs normally only regular characters.). But the convertion from sequence of characters to string is done correctly. fromCodePoint seems has some issues during bits manipulations in C (it seems access a memory it shouldn't) I tryed to very optimise it and done a stack allocation depend on size, or an efficient heap allocation.
Such large examples also shows how actually compiler works.
String.ts:
type pointer_type<T> = string | Array<T> | Reference<T>;
type c_fromCodePoint_ret = {
memory: string;
offset: int;
e: int;
}
let __stack__: string[] = [];
class bad_alloc {
public name: string = "bad_alloc";
public message: string = "allocation error";
public cause: string = "Usually it occurs when no heap memory enough to allocate bytes. You should ensure you're allocating valid memory size and/or close unnecessary apps to free up RAM. Sometimes just restart the program";
public stack?: string;
constructor(message?: string) {
if (message) this.message = message;
}
}
class RangeError {
public name: string = "RangeError";
public message: string;
public cause?: string;
constructor(message: string, cause?: string) {
this.message = message;
if (cause) this.cause = cause;
}
}
// consider redefinition if it is malloc
declare function _malloc(size: u64): string;
declare function _realloc(src: Opaque, newsize: int): string;
declare function free<T>(mem: T): void;
declare function memcpy(dest: Opaque, src: Opaque, bytes: int): Opaque;
declare function _memcpy(dest: Opaque, src: Opaque, bytes: int): Opaque;
declare function memmove(dest: Opaque, src: Opaque, bytes: int): Opaque;
declare function _alloc_and_strncpy(src: Opaque, bytes: u64): Opaque;
declare function wprintf(str: string, ...args: string[]): int;
declare function printf(str: string, ...args: string[]): int;
declare function _cfromCodePoint(numN: u32[], count: int, str: string): c_fromCodePoint_ret;
declare function _cfromCodePoint_stack(numN: u32, count: int): c_fromCodePoint_ret;
function mmalloc(size: u64): string {
__stack__.push("mmalloc");
let src: string = _malloc(size);
if (src == null)
throw new bad_alloc();
__stack__.pop();
return src;
}
static class String {
private MIN_REALLOC_REQUEST_SIZE: int = 32;
private MIN_HEAP_ALLOC_REQUEST_SIZE: int = 500 / 4;
fromCharCode(...numN: u16[]): string {
if (numN.length == 0) return "";
__stack__.push("class String");
__stack__.push("fromCharCode");
let str: string = _alloc_and_strncpy(numN, numN.length);
__stack__.pop();
__stack__.pop();
return str;
}
fromCodePoint(...numN: u32[]): string {
__stack__.push("class String");
__stack__.push("fromCodePoint");
let len = numN.length;
print("_cfromCodePoint");
let res: c_fromCodePoint_ret;
if (len >= this.MIN_HEAP_ALLOC_REQUEST_SIZE) {
len = len * 4 + 1;
res = _cfromCodePoint(numN, numN.length, mmalloc(len));
if (res.e) {
free(res.memory);
// code point assigned to offset property on error
throw RangeError(`Invalid code point ${res.offset}`);
}
len -= res.offset;
// if the offset is too big we request realloc to reduce the size
// if not we remain some extra bytes allocated.
// They'll be automatically removed out when realloc or free is called
if (res.offset >= this.MIN_REALLOC_REQUEST_SIZE) {
print(`doing realloc because offset ${res.offset} >= ${this.MIN_REALLOC_REQUEST_SIZE}`);
print(`realloc(str, ${len})`);
let newstr: string = _realloc(res.memory, len);
if (newstr != null)
res.memory = newstr;
} else {
print("realloc skipped due little offset: ", res.offset);
}
} else {
// do stack optimization
res = _cfromCodePoint_stack(numN, len);
// code point assigned to offset property on error
if (res.e) throw RangeError(`Invalid code point ${res.offset}`);
len = res.offset;
}
__stack__.pop();
__stack__.pop();
return res.memory;
}
}
function main() {
__stack__.push("function main");
wprintf(String.fromCodePoint(0x404, 65, 66, 67, 68, 69, 90));
printf("\nHello, %p\n");
__stack__.pop();
}
native.c:
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <locale.h>
extern void* mmalloc(size_t size);
#ifdef _WIN32
#define EXPORT __declspec(dllexport)
#else
#define EXPORT __attribute__((visibility("default")))
#endif
struct c_fromCodePoint_ret {
char* memory;
int offset, e;
};
EXPORT void* _malloc(size_t size) {
printf("_malloc internal: size = %d\n", size);
void* p = malloc(size);
printf("_malloc internal result: %p\n", p);
return p;
}
EXPORT void* _realloc(void* _Block, size_t size) {
printf("_realloc internal: __Block = %p, size = %llu\n", _Block, size);
void* p = realloc(_Block,size);
printf("_realloc internal result: %p\n", p);
return p;
}
EXPORT void* _memcpy( void* dest, const void* src, size_t count ) {
printf("_memcpy internal: dest = %p, src = %p, count = %llu\n", dest, src, count);
char* p = memcpy(dest, src, count);
p[count + 1] = '\0';
printf("_memcpy internal result: dest = %s\n", p);
return p;
}
EXPORT void* _alloc_and_strncpy(const wchar_t* src, size_t count) {
printf("mmalloc(%llu) ", count + 2);
wchar_t* dest = mmalloc(count + 2);
memcpy(dest, src, count + 2);
wprintf(L"dest: %ls\n", dest);
return dest;
}
/**
* @brief Called directly when the allocation is considered to be onto heap
*
* @param numN
* @param count
* @param Str
* @return EXPORT struct
*/
EXPORT struct c_fromCodePoint_ret _cfromCodePoint(const uint32_t* numN, int count, char* Str) {
printf("Begin\n");
// iterate through string and codePoint
struct c_fromCodePoint_ret result = {Str, 0, 0};
for (size_t i = 0; i < count; ++i) {
uint32_t codePoint = numN[i];
printf("codePoint = numN[%llu]: %u\nDoint byte manipulations\n", i, codePoint);
if (codePoint <= 0xFFFF) {
printf("BMP char\n");
// BMP character
// it consumes 2 bytes
// so we're adding to offset other 2 bytes
result.offset += 2;
printf("high byte\n");
*Str++ = (codePoint >> 8) & 0xFF; // High byte
printf("Low byte\n");
*Str++ = codePoint & 0xFF; // Low byte
} else if (codePoint <= 0x10FFFF) {
printf("Supplementary char\n");
// Supplementary character
// it consumes 4 bytes
codePoint -= 0x10000;
uint16_t highSurrogate = (codePoint >> 10) + 0xD800;
uint16_t lowSurrogate = (codePoint & 0x3FF) + 0xDC00;
*Str++ = (highSurrogate >> 8) & 0xFF; // High surrogate high byte
*Str++ = highSurrogate & 0xFF; // High surrogate low byte
*Str++ = (lowSurrogate >> 8) & 0xFF; // Low surrogate high byte
*Str++ = lowSurrogate & 0xFF; // Low surrogate low byte
} else {
printf("Error occurred\n");
result.offset = codePoint;
result.e = 1;
return result;
}
}
return result;
}
/**
* @brief allocate the memory first onto stack and copy onto heap with the exact size
*
* @param numN
* @param count
* @return EXPORT struct
*/
EXPORT struct c_fromCodePoint_ret _cfromCodePoint_stack(const uint32_t* numN, int count) {
int maxCount = count * 4 + 1;
char stack[maxCount];
struct c_fromCodePoint_ret res = _cfromCodePoint(numN, count, stack);
if (!res.e) {
// an attemption to call up ts function
res.offset = maxCount - res.offset; // mark up new length to offset variable
char* heap = mmalloc(res.offset);
memcpy(heap, stack, res.offset);
// null terminate the string if required
heap[res.offset] = '\0';
res.memory = heap;
}
return res;
}
u need to be careful when u use "Union" types the code
let src: string | 0 = _malloc(size);
if (src == 0)
throw new bad_alloc();
should be written like that:
let src: string | 0 = _malloc(size);
if (typeof src == "i32") if (src == 0)
throw new bad_alloc();
first if
will ensure that you have type "int" not "string"
as union type contains and 0 (which is i32) and string which is pointer
in your case it would be better to use "string | null" but string is "null" anyway
so I would use
let src: string = _malloc(size);
if (src == null)
throw new bad_alloc();
to remove usage of union type
type pointer_type = string | Array<any> | Reference<any>;
Array type is not "pointer", it is struct { int size; any[] data }
so using "Opaque" which is void*
would be better. and Array<int>
not the same as Array<any>
which means it will not be a part of "pointer_type"
more correct would be
type pointer_type<T> = string | Array<T> | Reference<T>;
and free
should be free<T>
in that case
Thank you a lot, i will fix that
I'm sorry that there is already so much issues. There's another one that the compiler adds one more '_' to the functions it searches in static .lib library. I did write a bit some String class static properties on C and test them in ts. When it links it adds one more '_' (i understood it by the error message):
as you may see there're two '__'. then compile: