I enjoyed reading your code

kelas / ooj

the origins of J

The Unlicense

60 stars 5 forks source link

I enjoyed reading your code #2

Open dataf3l opened 10 months ago

dataf3l commented 10 months ago

I made some minor modifications, would you consider taking a look at them and providing feedback?

#include<stdlib.h> //malloc
#include<stdio.h>  //fgets printf
#include<string.h> //strlen

//typedef void _;
typedef char C,*S;

typedef struct a {
  long long t, r;
  long long d[3];
  long long p[2];
}* Array;
static Array st[26];

#define globalArrayStack st
Array reduce(Array x, Array y);
Array scanArray(Array x, Array y);
Array ex(Array * e);

long long WS = 0;

long long * allocMem(long long size) {
  size *= 8;  // Assuming 8 bytes per long long
  WS += size; // Update Workspace Size
  return (long long *)malloc(size);
}

void copyMem(long long *dest, long long *src, long long count) {
  for (long long i = 0; i < count; i++) {
    dest[i] = src[i];
  }
}

long long calcSize(long long rank, long long *dimensions) {
  long long totalSize = 1;
  for (long long i = 0; i < rank; i++) {
    totalSize *= dimensions[i];
  }
  return totalSize;
}

Array indexOfChar(char targetChar, char* str) {
  for (long long i = 0; str[i] != '\0'; i++) {
    if (str[i] == targetChar) {
      return (Array)(i + 1); // +1 because index is 1-based in this context
    }
  }
  return 0;
}

Array createArray(long long type, long long rank, long long *dimensions) {
  Array newArray = (Array)allocMem(5 + calcSize(rank, dimensions));
  newArray->t = type;
  newArray->r = rank;
  copyMem(newArray->d, dimensions, rank);
  return newArray;
}

Array id(Array x) {
  return x;
}

Array sequence(Array x) {
  long long length = *x->p;
  Array newArray = createArray(0, 1, &length);
  for (long long i = 0; i < length; i++) {
    newArray->p[i] = i;
  }
  return newArray;
}

Array plus(Array x, Array y) {
  long long rank = y->r;
  long long *dimensions = y->d;
  long long totalCount = calcSize(rank, dimensions);
  Array resultArray = createArray(0, rank, dimensions);
  for (long long i = 0; i < totalCount; i++) {
    resultArray->p[i] = ((!x->r) ? *x->p : x->p[i]) + ((!y->r) ? *y->p : y->p[i]);
  }
  return resultArray;
}

Array mul(Array x, Array y) {
  long long rank = y->r;
  long long *dimensions = y->d;
  long long totalCount = calcSize(rank, dimensions);
  Array resultArray = createArray(0, rank, dimensions);
  for (long long i = 0; i < totalCount; i++) {
    resultArray->p[i] = ((!x->r) ? *x->p : x->p[i]) * ((!y->r) ? *y->p : y->p[i]);
  }
  return resultArray;
}

Array extractFromArray(Array indexArray, Array sourceArray) {
    long long reducedRank = sourceArray->r - 1;
    long long *reducedDimensions = sourceArray->d + 1;
    long long totalElements = calcSize(reducedRank, reducedDimensions);

    Array resultArray = createArray(sourceArray->t, reducedRank, reducedDimensions);
    copyMem(resultArray->p, sourceArray->p + (totalElements * *indexArray->p), totalElements);

    return resultArray;
}

Array boxArray(Array inputArray) {
    Array boxedArray = createArray(1, 0, 0);
    *boxedArray->p = (long long) inputArray;

    return boxedArray;
}

Array concatenateArrays(Array firstArray, Array secondArray) {
    long long firstArraySize = calcSize(firstArray->r, firstArray->d);
    long long secondArraySize = calcSize(secondArray->r, secondArray->d);
    long long totalSize = firstArraySize + secondArraySize;

    Array concatenatedArray = createArray(secondArray->t, 1, &totalSize);
    copyMem(concatenatedArray->p, firstArray->p, firstArraySize);
    copyMem(concatenatedArray->p + firstArraySize, secondArray->p, secondArraySize);

    return concatenatedArray;
}

Array findElement(Array x, Array y) {
  printf("nyi\n");
  return (Array) 0;
}

Array reshapeArray(Array shapeArray, Array inputArray) {
    // Determine the rank and total size for the new shape
    long long newRank = shapeArray->r ? *shapeArray->d : 1;
    long long newSize = calcSize(newRank, shapeArray->p);

    // Calculate the total number of elements in the input array
    long long inputSize = calcSize(inputArray->r, inputArray->d);

    // Create a new array with the desired shape and type
    Array reshapedArray = createArray(inputArray->t, newRank, shapeArray->p);

    // Determine the number of elements to copy (the smaller of newSize or inputSize)
    long long elementsToCopy = newSize > inputSize ? inputSize : newSize;

    // Copy elements from the input array to the reshaped array
    copyMem(reshapedArray->p, inputArray->p, elementsToCopy);

    // If the new size is larger, repeat the input array elements to fill the reshaped array
    if (newSize > inputSize) {
        copyMem(reshapedArray->p + inputSize, reshapedArray->p, newSize - inputSize);
    }

    return reshapedArray;
}

Array sha(Array x) {
  Array z = createArray(0, 1, & x -> r);
  copyMem(z -> p, x -> d, x -> r);
  return z;
}
Array size(Array x) {
    // Create an array to store the size result
    Array resultArray = createArray(0, 0, 0);

    // If the array x has a rank (r) greater than 0, use the size of the first dimension.
    // Otherwise, the size is considered as 1 (for rank 0 arrays).
    long long sizeValue = (x->r > 0) ? x->d[0] : 1;

    // Store the calculated size in the result array
    *resultArray->p = sizeValue;

    return resultArray;
}
Array firstElement(Array x) {
  Array z = createArray(0, 0, 0);
  *z -> p = * x -> p;
  return z;
}
Array reverseArray(Array inputArray) {
    long long rank = inputArray->r;
    long long *dimensions = inputArray->d;
    long long totalElements = calcSize(rank, dimensions);
    Array reversedArray = createArray(0, rank, dimensions);

    for (long long index = 0; index < totalElements; index++) {
        reversedArray->p[index] = inputArray->p[totalElements - index - 1];
    }

    return reversedArray;
}

char verbTable[] = "+{!<#,*|", at[] = "\\/";
Array( * verbDispatcher[])(Array, Array) = {
    0,
    plus,
    extractFromArray,
    findElement,
    0,
    reshapeArray,
    concatenateArrays,
    mul,
    0
  },
  ( * vm[])(Array) = {
    0,
    id,
    size,
    sequence,
    boxArray,
    sha,
    0,
    firstElement,
    reverseArray
  },
  ( * va[])(Array, Array) = {
    0,
    scanArray,
    reduce
  };

Array reduce(Array functionArray, Array inputArray) {
    // Check if the input array is empty or has rank 0; return it directly if so
    if (!inputArray->r) {
        return inputArray;
    }

    // Create arrays for the reduction process
    Array resultArray = createArray(0, 0, 0);
    Array tempArray = createArray(0, 0, 0);

    // Initialize the first element of resultArray with the first element of inputArray
    *resultArray->p = *inputArray->p;

    // If inputArray has more than one element, initialize tempArray with the second element
    *tempArray->p = inputArray->r > 1 ? inputArray->p[1] : 0;

    // Perform the reduction
    long long totalIterations = (inputArray->r ? *inputArray->d : 1) - 1;
    for (long long i = 0; i < totalIterations; ++i) {
        *tempArray->p = inputArray->p[i + 1];
        *resultArray->p = *((*verbDispatcher[(long long)functionArray])(resultArray, tempArray))->p;
    }

    return resultArray;
}

Array scanArray(Array functionArray, Array inputArray) {
  // Check if inputArray is empty or has rank 0, in which case return inputArray itself
  if (!inputArray->r) {
    return inputArray;
  }

  // Create an array 'result' with the same shape as inputArray
  Array result = createArray(0, inputArray->r, inputArray->d);

  // Initialize temporary arrays for the scan operation
  Array temp1 = createArray(0, 0, 0);
  Array temp2 = createArray(0, 0, 0);

  // Initialize the first elements of temp1 and result with the first element of inputArray
  *temp1->p = *result->p = *inputArray->p;

  // If inputArray has more than one element, initialize temp2 with the second element
  if (inputArray->r > 1) {
    *temp2->p = inputArray->p[1];
  }

  // Iterate over the elements of the inputArray, applying the functionArray
  long long totalIterations = (inputArray->r ? *inputArray->d : 1) - 1;
  for (long long i = 0; i < totalIterations; ++i) {
    *temp2->p = inputArray->p[i + 1];
    result->p[i + 1] = *temp1->p = *((*verbDispatcher[(long long)functionArray])(temp1, temp2))->p;
  }

  // Return the final result of the scan operation
  return result;
}

long long convertStringToLongLong(char* inputString, long long *numberLength) { // convertStringToLongLong
  char currentChar;
  long long index = 0, result = 0;
  *numberLength = 0;

  while ((currentChar = *inputString++) && (currentChar >= '0' && currentChar <= '9')) {
    index++;
    result = result * 10 + (currentChar - '0');
  }

  *numberLength = index;
  return result;
}

void printInt(long long i) {
  printf("%lld ", i);
}
void newline() {
  printf("\n");
}
void printTab() {
  printf(" ");
}
void printArray(Array arrayToPrint) {
    long long rank = arrayToPrint->r;
    long long *dimensions = arrayToPrint->d;
    long long totalElements = calcSize(rank, dimensions);

    if (arrayToPrint->t) {
        // If the array type is non-zero, handle as a nested (boxed) array
        for (long long i = 0; i < totalElements; i++) {
            printf("< ");
            printArray((Array)arrayToPrint->p[i]);
        }
    } else {
        // Handle as a flat array
        for (long long i = 0; i < totalElements; i++) {
            printInt(arrayToPrint->p[i]);
        }
    }
}

char isLowercase(long long a) {
  return a >= 'a' && a <= 'z';
}
char qv(long long a) {
  return a < 'a';
}
char isUppercase(long long a) { // qv
  return a < 'a';
}
Array getAdverb(char c) {
  return indexOfChar(c, at);
}
Array verb(char c) {
  return indexOfChar(c, verbTable);
}

Array noun(char** inputString) { // createNounFromString
    char firstCharacter = **inputString;
    long long stringLength;

    // Check if the first character is not a digit; return NULL if so
    if (firstCharacter < '0' || firstCharacter > '9') {
        return NULL;
    }

    // Create an array to store the converted number
    Array numberArray = createArray(0, 0, 0);

    // Convert the string to a long long number and store in the array
    *numberArray->p = convertStringToLongLong(*inputString, &stringLength);

    // Move the input string pointer forward by the length of the numeric part
    *inputString += stringLength - 1;

    return numberArray;
}

Array executeExpression(Array *expressionElements) {
  long long firstElement = (long long)*expressionElements;
  long long secondElement = (long long)expressionElements[1];

  if (isLowercase(firstElement)) {
    if (secondElement == ':') {
      return (st[firstElement - 'a'] = executeExpression(expressionElements + 2));
    }
    firstElement = (long long)st[firstElement - 'a'];
  }

  if (isUppercase(firstElement) && getAdverb(secondElement)) {
    Array (*adverbFunction)(Array, Array) = va[(long long)getAdverb(secondElement)];
    return adverbFunction((Array)firstElement, executeExpression(expressionElements + 2));
  }

  if (isUppercase(firstElement)) {
    Array (*monadFunction)(Array) = vm[firstElement];
    return monadFunction(executeExpression(expressionElements + 1));
  }

  if (secondElement) {
    Array (*verbFunction)(Array, Array) = verbDispatcher[secondElement];
    return verbFunction((Array)firstElement, executeExpression(expressionElements + 2));
  }

  return (Array)firstElement;
}

Array * parseInput(char* inputStr) {
  Array parsedElement;
  Array *parsedElements;
  char currentChar;
  long long strLength = strlen(inputStr);

  if (strLength==1) {
    printf("Error: Input string is empty\n");
    exit(1);
  }
  if (!strLength) return (Array *)0;

  inputStr[--strLength] = 0;
  parsedElements = (Array *)allocMem(strLength + 1);

  strLength = 0;
  while ((currentChar = *inputStr)) {
    parsedElement = noun(&inputStr);
    if (!parsedElement) {
      parsedElement = verb(currentChar);
    }
    if (!parsedElement) {
      parsedElement = (Array)(long long)currentChar;
    }
    parsedElements[strLength++] = parsedElement;
    inputStr++;
  }

  parsedElements[strLength] = 0;
  return parsedElements;
}

int main() {
    char inputString[99];

    while (1) {
        //printf("WS: %llu\n", WS); // Print workspace size
        printTab(); // Print a tab or space

        // Read input from stdin. If fgets returns NULL, break the loop
        if (fgets(inputString, 99, stdin) == NULL) {
            break;
        }

        // Process the input and print the result
        printArray(executeExpression(parseInput(inputString)));
        newline(); // Print a newline character
    }

    return 0;
}

// https://codebeautify.org/cpp-formatter-beautifier

kelas commented 10 months ago

@dataf3l thanks for caring to take a look, i'm flattered.

as it follows, the code isn't exactly mine - it is due to a much taller Canadian bear, only he wrote it when his hair wasn't completely grey yet, and i was 8 years old.

i'm short on time to figure out what "codebeautify" exactly is, but the result looks like a job of a fairly well-trained AI model. as it happens, those things still miss out on subtleties, and as it must be evident from your snippet still have a major difficulty maintaining consistent and confident style - which is the whole idea and the central point of this little piece of history.

impressive, though.

does it compile, if you know?

kelas commented 10 months ago

sorry, i missed your point. you've said you've made minor modifications, which i interpret as modifications to the logic of this software, but i have no means of figuring out what you've done. please point me in the right direction.

as for AI hiccups, the simplest example is pt(), which is not really "printTab() // print a tab or space", it stands for prompt, which is a single 0x20.

would you care to try the same approach on a slightly more production-grade piece of software which is written in the same style? i'm really keen to learn what the results may be:

https://github.com/kparc/bcc/blob/master/ec/e.c

kelas commented 10 months ago

another example of where the model successfully got the wind of what was going on, but completely obliterated the context, terminology and consistency of the original code is this:

Array reduce(Array x, Array y);
Array scanArray(Array x, Array y);

as i am responsible for basic implementation of these two staple k adverbs in this toy code, i have the authority to say that their names are cast in stone as over and scan. although reduce() hits the mark to a degree (i'd also accept fold()), scanArray() lost it completely. but what's worse is that the machine-generated code failed to see and reflect the rhyme between scan and over.

kelas commented 10 months ago

on a closing note:

char isUppercase(long long a) { // qv
  return a < 'a';
}

on your local terminal, try:

$ ascii -d

(you'd hopefully agree that isUppercase() is a fatally misleading interpretation of what arthur actually tests for in qv())

kelas commented 10 months ago

also, AI tends to use Element everywhere where it sees a member of an array. In some contexts, e.g. in parse function, this is just plain wrong - if the model had any clue of what it is "beautifying", it would use "Token" instead, which I'd gladly accept.

iota is totally not Sequence. I'd accept til instead, but that's far beyond the agency of any code beautifier you can feed this code to.

kelas commented 10 months ago

i gave you honest and direct feedback, and i hope you'll find it useful.

in return, i'd like to hear your honest opinion as to what degree the original code benefitted from your modifications.

dataf3l commented 10 months ago


I first approached the code not knowing exactly what it did other than it
was some form of interpreter or compiler:

I used “CPP” which can expand all the macros and an online code  beautifier to
add line breaks.

Later, I removed every comma I could find.

After this, I asked the AI what new names could be given, since this is a SHORT
program, it gave a lot of new names, I haven’t gotten around to renaming
everything the way the AI likes it.

Then, I asked the AI to refactor every single function, and tested the
interpreter along the way, to make sure the functionality was still there, so
this is bug by bug compatible (it segfaults a lot).

But it can read the same expressions the original one could apparently.

I did this so I could kinda understand the code, I can’t say I totally
understand it now, but I think it’s a step on that direction.

After all the work I googled what is K and J and found these wonderful
languages, perhaps with more time I’ll learn how to use them.

“does it compile, if you know?” => yes, it does build.

Try: gcc -o ./temp filename.c

“you've said you've made minor modifications”

Well, I didn’t add any keywords or free() or things like that.
But I guess one could consider them to be not so minor, perhaps this is a
subjective matter

“pt(), which is not really "printTab()" ”

If the AI likes printTab, I call it printTab().  But then again I totally see
what you mean by the AI misinterpreting the original intent, I’m surprised it
even got whatever was right right, which was probably a lot.

“would you care to try the same approach on a slightly more production-grade piece of software which is written in the same style? i'm really keen to learn what the results may be:“

That’s a lot of code.  Challenge accepted.

“over and scan”

I guess one could rename it back the way it was before…

To me, the whole question was, can AI make sense of the code Make it
understandable by mere mortals, who don’t know much And can it make the code not
only readable but also enjoyable?

These are questions on my mind, there is a lot of code out there, But not all
the code is readable, so, one wonders, can AI be used To make code better? Can
we learn to understand other people’s code using the AI?

These are the questions I set out to answer.  Can we refactor existing code? Can
the AI make sense of single Letter variables and very terse code somehow?

Sure, we may not Agree with the AI on everything, but surely the work done by it
is perhaps more digestible by the average programmer, than an endless array of
rather mysterious single-letter local variables.

“…isUppercase() is a fatally misleading interpretation...”

I noticed this as well, I didn’t have time to change it to a More meaningful
name, I also don’t think it’s a good interpretation. I have no clear thoughts on
what to call it at this point.

“AI tends to use Element everywhere … this is just plain wrong … it would use "Token" instead, which I'd gladly accept.”

Ok then I change it I guess… We’ll have to wait and see, for the next version.

“in return, i'd like to hear your honest opinion as to what degree the original code benefitted from your modifications.“

I did in fact add a single if statement somewhere manually because I didn’t like
the fact that it segfaults on some edge cases, it still segfaults a lot, because
error checking is non existent, but I figured maybe one can add that “over
time”.

So, the code is basically not yet improved, it needs more work before one can
start improving on it, I noticed a stale PR AFTER I started the work, that had
more features!, too late for that now I guess.

I had fun trying to understand the code, but some aspects of it are still a
mystery to me, the intent isn’t too clear, unfortunately.

I also didn’t get around renaming the names of the fields of the struct like p
or r or d to something different and perhaps more informative, yet, perhaps in a
future session.

I’ve considered adding the original code as “comments at the beginning of each
line” so it’s easier to keep track of the changes.

But I thought that could be considered offensive, so maybe not.

I also thought perhaps one could rewrite this in Rust, but I fear the current
design maybe Rust’ borrow checker won’t like the code very much if at all, I
foresee a lot of weird errors if one is to attempt such a thing.

Thanks for the feedback, and for the prompt response, I’ll review the other
code.

kelas commented 10 months ago

After all the work I googled what is K and J and found these wonderful languages

oh. welcome to the club :)

it segfaults a lot

this is by design. this is a sketch of an interpreter written in one sitting. i dare to assume that back in 1989 Arthur Whitney couldn't afford his own AT&T 3B1, which wasn't exactly a cheap machine.

I’ve considered adding the original code as “comments at the beginning of each line”

so did i, but i opted to leave this piece of code undocumented, and there are good reasons for that. and it keeps on paying off.

at the beginning of each line

usually we annotate this kind of code this way: https://github.com/kparc/pf/blob/master/pf.h it is up to you whether you want to beautify this code any further or read it as it is.

If the AI likes printTab, I call it printTab()

if you're available to take friendly advice - just don't. think for yourself and question "authority".

dataf3l commented 10 months ago

this were the ideas by the AI:

This program appears to be a minimalistic implementation of some operations common in array programming languages like J. It defines operations on arrays (referred to as K-structures in the code) and includes functions for arithmetic, reshaping, boxing, etc.

Here's a proposed renaming of variables and functions to make the code more readable. The goal is to maintain brevity while improving clarity. The renaming is based on the context in which these entities are used and the typical naming conventions in array programming:

Original Name	New Name	Description/Notes
`_`	`VoidFunction`	Typedef for void function
`C`	`Char`	Typedef for char
`S`	`String`	Typedef for char pointer (string)
`J`	`Long`	Typedef for long long
`K`	`Array`	Typedef for main array structure
`st`	`globalArrayStack`	Global stack for arrays
`WS`	`WorkspaceSize`	Tracks the workspace memory usage
`ma`	`allocMem`	Memory allocation function
`mv`	`copyMem`	Memory copy function
`tr`	`calcSize`	Calculate size of an array
`i`	`indexOfChar`	Find index of a character in a string
`ga`	`createArray`	Function to create a new array
`id`	`identity`	Identity function
`iota`	`sequence`	Generate sequence array
`plus`	`addArrays`	Add two arrays
`mul`	`multiplyArrays`	Multiply two arrays
`from`	`extract`	Extract subarray
`box`	`boxValue`	Box a value into an array
`cat`	`concatenate`	Concatenate two arrays
`find`	`findElement`	Find element (not yet implemented)
`rsh`	`reshapeArray`	Reshape an array
`sha`	`shapeArray`	Get the shape of an array
`size`	`arraySize`	Get the size of an array
`fst`	`firstElement`	Get the first element of an array
`rev`	`reverseArray`	Reverse an array
`vt`	`verbTable`	Table for verbs
`at`	`adverbTable`	Table for adverbs
`vd`	`verbDispatcher`	Dispatcher for verb functions
`vm`	`monadDispatcher`	Dispatcher for monadic functions
`va`	`adverbDispatcher`	Dispatcher for adverb functions
`over`	`reduce`	Reduction operation
`scan`	`scanArray`	Scan operation
`sI`	`stringToInt`	Convert string to integer
`pi`	`printInt`	Print an integer
`nl`	`newline`	Print a newline
`pt`	`printTab`	Print a tab (or space)
`pr`	`printArray`	Print an array
`qp`	`isLowercase`	Check if char is lowercase
`qv`	`isUppercase`	Check if char is uppercase
`adv`	`getAdverb`	Get adverb from character
`verb`	`getVerb`	Get verb from character
`noun`	`getNoun`	Get noun (number) from string
`ex`	`execute`	Execute parsed expressions
`wd`	`parseInput`	Parse input string into expressions
`main`	`main`	Main function (unchanged)

Please note that some functions, like find, are marked as "not yet implemented (nyi)". Additionally, the use of single-letter variable names and terse style is typical in languages like J, which this program seems to be inspired by. My renaming aims to strike a balance between maintaining the spirit of the original code and improving readability.

The reason the code is inconsistent is because I didn't apply all the AI suggestions, just some of them.

kelas commented 10 months ago

this were the ideas by the AI [...]

hats off, this part of the footwork the machine got almost flawlessly. only i'm not sure why anyone would want to make use of the middle column. it is superfluous.

endless array of rather mysterious single-letter local variables.

yes, i know - at first, some such code looks like an "endless" soup of someone's exercise in obscurity and pretentiousness.

but local variables are just xyzrf, and they are neither endless nor mysterious. sometimes you'll see extra locals (s, c, t) but they are rare and easy to infer even without comments.

i assume you discovered this repo via HN post. there's plenty of links there which point to resources which describe rationale and conventions of this particular style of c.

this were the ideas by the AI

i learned something new today. thank you.

kelas commented 10 months ago

this were the ideas by the AI

which AI did you employ, and how did you state the prompt?

ktye commented 10 months ago

@dataf3l fwiw, these are the next evolutions of j you can try and run:

This comes with a paper from the APL90 conference. the only version i got, was a dos binary distributed at that time: https://ktye.github.io/zoo/#j90

The next version from roger where i could get the source is j4.1 or 4.2: this is the first fully working version that you can study the source code of. compiled to wasm: https://ktye.github.io/zoo/#j42