kevinlawler / kona

Open-source implementation of the K programming language
ISC License
1.36k stars 139 forks source link

Incorrect format when saving K data to a file in Windows #643

Open tavmem opened 1 year ago

tavmem commented 1 year ago

If you make this addition to the code in src/kx.c (to display the result in detail):

$ git diff
diff --git a/src/kx.c b/src/kx.c
index ffa44da..447d2dd 100644
--- a/src/kx.c
+++ b/src/kx.c
@@ -634,6 +634,7 @@ K ex(K a)   //Input is (usually, but not always) 7-0 type from wd()
   fwh=stk=stk1=prj=prj2=fsf=0;
   if(prnt)cd(prnt);
   prnt=0;
+  sd_(z,2);
   R z; }

 Z K ex0(V*v,K k,I r)   //r: {0,1,2}->{code, (code), [code]}.
$ 

Then, in Linux you get

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5); 1: "file";
     0x7fbd6217b040 0x7fbd6217b058            7-6 6 0   
  \\
$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5); 1: "file"
     0x7fea0a2d8b00 0x7fea0a2d8b18            1-6 0 2   
("a"
 4 5)
 0x7fea0a2d8b18     0x7fea0a2d8b40 0x7fea0a2d8b58            1-6 3 1   "a"
 0x7fea0a2d8b20     0x7fea0a2d4048 0x7fea0a2d4060            1-6 -1 2   4 5
("a"
 4 5)
  \\
$ xxd file.K
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
$

In Windows, you get


$ rlwrap -n ./k
kona      \ for help. \\ to exit.

    "file" 1: ("a"; 4 5); 1: "file"
     0000000000890b00 0000000000890b18            1-6 0 2
("a"
 4 5)
 0000000000890b18     0000000000890b40 0000000000890b58            1-6 3 1   "a"
 0000000000890b20     0000000000dd0048 0000000000dd0060            0-1 -1 2   4 5
("a"
 4 5)
  \\
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0000 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
$
Line 00000040 is incorrect in Windows.
tavmem commented 1 year ago

Just as a check, as to what occurs using k2.8

$ rlwrap -n ./k
K 2.8 2000-10-10 Copyright (C) 1993-2000 Kx Systems
Evaluation. Not for commercial use. 
\ for help. \\ to exit.

  "file" 1: ("a"; 4 5); 1: "file"
("a"
 4 5)
  \\

$ xxd file.l
00000000: fdff ffff 0100 0000 0000 0000 0200 0000  ................
00000010: fdff ffff 0100 0000 0300 0000 6100 0000  ............a...
00000020: fdff ffff 0100 0000 ffff ffff 0200 0000  ................
00000030: 0400 0000 0500 0000                      ........

There are 3 occurrences of fdff ffff 0100 0000 The Windows version of kona is incorrect.

tavmem commented 1 year ago

Interestingly, making this change to the function _1d_write (which creates the file):

$ git diff
diff --git a/src/0.c b/src/0.c
index 6ed900a..2f46d93 100644
--- a/src/0.c
+++ b/src/0.c
@@ -577,6 +577,7 @@ K _1d(K x,K y) {

 //TODO: for testing this, use 1:write and 2:read (or 1:read) to confim items are the same before write & after read
 Z K _1d_write(K x,K y,I dosync,S e) {
+  sd_(y,2);
   //Note: all file objects must be at least 4*sizeof(I) bytes...fixes bugs in K3.2, too
   //K3.2 Bug - "a"1:`a;2:"a" or 1:"a" - wsfull, tries to read sym but didn't write enough bytes?
   I n=disk(y);
$ 

gives this result in both Linux and Windows

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5)
     0x7f828c946680 0x7f828c946698            2-6 0 2   
("a"
 4 5)
 0x7f828c946698     0x7f828c9468c0 0x7f828c9468d8            3-6 3 1   "a"
 0x7f828c9466a0     0x7f828c946980 0x7f828c946998            3-6 -1 2   4 5
  \\
$

So, when the function _1d_write is called, the reference count for the vector 4 5 is 3. In Linux, the reference count gets reset to 1 when writing the file. In Windows, it gets reset to 0.

tavmem commented 1 year ago

This is weird ... I can't explain it. I downloaded a fresh copy of kona from github to Windows ... and the problem disappeared

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

    "file" 1: ("a"; 4 5)
  \\
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
$

I'm closing this issue ... and will reopen it if the problem recurs.

tavmem commented 1 year ago

This problem appeared when working on issue #634. It is possible that some change that I made to track the cause of that issue resulted in this.

tavmem commented 1 year ago

Found the problem again ... in a fresh download of Kona from Github to Windows ... Creating the data structure and saving it to a file works fine.

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

    "file" 1: ("a"; 4 5)
  \\
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................

Reading the data structure from the file, modifies the saved data structure, decrementing the reference count on the vector 4 5

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

    1: "file"
("a"
 4 5)
  \\
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0000 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
$

This problem does not occur in Linux.

tavmem commented 1 year ago

Looks like I found where the problem exists. In the function _1m_r in src/0.c we currently have

    //ref count should be reset to 1 after mapping
    #ifndef WIN32
    mrc((K)z,1);   //suppressed in Windows to fix issue #628
    #endif

If we allow this to execute in WIN32, then, the reference count get completely messed up in the file, the type gets reset to 0, and the 4,5 vector disappears from the file.

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

    "file" 1: ("a"; 4 5)
  \\
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
$ rlwrap -n ./k
kona      \ for help. \\ to exit.

    1: "file"
("a"
 4 5)
  \\
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 4006 8900 0000 0000  ........@.......
00000050: 0000 0000 0000 0000 0000 0000 0000 0000  ................
00000060: 0000 0000 0000 0000 0000 0000 0000 0000  ................
$

However, skipping the reset in Windows leaves an incorrect reference count of 0. We need to fix the function mrc so that it properly resets the reference count in Windows.

$ grep " mrc(" src/km.c
K mrc(K x,I c){I k=sz(xt,xn);I r=lsz(k);x->_c=(c<<8)|r;R x;}
$ 
tavmem commented 1 year ago

This is interesting ... skipping the execution of function mrc in Win32 was done in commit b714da0 made on Jan 14, 2023 to fix an "Invalid Argument Error" in Windows, i.e., issue 628. Allowing mrc to execute in Win32 no longer causes the "Invalid Argument Error". So, with this change

--- a/src/0.c
+++ b/src/0.c
@@ -551,9 +551,7 @@ Z K _1m_r(I f,V fixed, V v,V aft,I*b) {   //File descriptor, moving * into mmap,
     z=(K)(((V)u+mod)-3*sizeof(I)); //3*sizeof(I) for c,t,n

     //ref count should be reset to 1 after mapping
-    #ifndef WIN32
     mrc((K)z,1);   //suppressed in Windows to fix issue #628
-    #endif
     //if(1<=t || 3<=t){dd(z->n)} // ???
   }

we get this in Windows

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5)
  1: "file"
("a"
 4 5)
  "file" 1: ("a"; 4 5)
  1: "file"
("a"
 4 5)
  \\

however, we get

$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0008 8900 0000 0000  ................
00000050: 0000 0000 0000 0000 0000 0000 0000 0000  ................
00000060: 0000 0000 0000 0000 0000 0000 0000 0000  ................

where not only the reference count for the vector (4,5) is wrong, but also the type-of-array, number-of-elements and the vector data is also obliterated, which was not the case after commit b714da0 for issue 628.

tavmem commented 1 year ago

Looks like the problem with the K data file has is not caused by the function mrc If we make these changes in Linux

$ git diff
diff --git a/src/0.c b/src/0.c
index 6ed900a..1666705 100644
--- a/src/0.c
+++ b/src/0.c
@@ -551,8 +551,9 @@ Z K _1m_r(I f,V fixed, V v,V aft,I*b) {   //File descriptor, moving * into mmap,
     z=(K)(((V)u+mod)-3*sizeof(I)); //3*sizeof(I) for c,t,n

     //ref count should be reset to 1 after mapping
+    O("z: "); sd_(z,2);
     #ifndef WIN32
-    mrc((K)z,1);   //suppressed in Windows to fix issue #628
+    //mrc((K)z,1);   //suppressed in Windows to fix issue #628
     #endif
     //if(1<=t || 3<=t){dd(z->n)} // ???
   }
$

then

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5)
  1: "file"
z:      0x7fe40a8d2048 0x7fe40a8d2060            0-1 -1 2   4 5
("a"
 4 5)
  \\
$ xxd file.K
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
$ 

Making similar changes in Windows gets us

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5)
  1: "file"
z:      00000000008d0048 00000000008d0060            0-1 -1 2   4 5
("a"
 4 5)
  \\
$
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0000 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
$
tavmem commented 1 year ago

In Windows

$ rm file.l
$ dir
Kona.png  LICENSE  Makefile  README.md  bench  k.exe  k_test.exe  misc  src  verb
$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5)
  \\
$ dir
Kona.png  LICENSE  Makefile  README.md  bench  file.l  k.exe  k_test.exe  misc  src  verb
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
$

This shows that the file is created correctly. The file is corrupted in the read step

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  1: "file"
("a"
 4 5)
  \\
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0000 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
$
tavmem commented 1 year ago

In Linux, it is relatively straightforward to check on the status of the saved file at any point in the command stream in the read step. Making these modifications checks the file immediately before executing the mrc command.

diff --git a/src/0.c b/src/0.c
index 6ed900a..52accf1 100644
--- a/src/0.c
+++ b/src/0.c
@@ -22,6 +22,10 @@
 #include "v.h"
 #include "vf.h"

+char *binaryPath = "/bin/xxd";
+char *arg1 = "-c16";
+char *arg2 = "/home/tom/kona/file.K";
+
 //Number verbs, monadic & dyadic

 //TODO: Do the 0:,1:,5:,6: writes need explicit file level locks (two K3.2 instances, second process can't write to same file first is (error))
@@ -551,6 +555,8 @@ Z K _1m_r(I f,V fixed, V v,V aft,I*b) {   //File descriptor, moving * into mmap,
     z=(K)(((V)u+mod)-3*sizeof(I)); //3*sizeof(I) for c,t,n

     //ref count should be reset to 1 after mapping
+    O("z: "); sd_(z,2);
+    execl(binaryPath, binaryPath, arg1, arg2, NULL);
     #ifndef WIN32
     mrc((K)z,1);   //suppressed in Windows to fix issue #628
     #endif
$

Then we get this, showing that the file is not corrupt.

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5)
  1: "file"
z:      0x7f5df3bff048 0x7f5df3bff060            0-1 -1 2   4 5
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
$ 

Howver, these changes do not work in Windows, and I haven't yet found a method that allows checking the file at an arbitrary point in the command stream in Windows.

tavmem commented 1 year ago

If we use exit(0) before mrc would have been executed

diff --git a/src/0.c b/src/0.c
index 6ed900a..06829a2 100644
--- a/src/0.c
+++ b/src/0.c
@@ -549,6 +549,7 @@ Z K _1m_r(I f,V fixed, V v,V aft,I*b) {   //File descriptor, moving * into mmap,
     mUsed+=length;if(mUsed>mMax)mMax=mUsed;

     z=(K)(((V)u+mod)-3*sizeof(I)); //3*sizeof(I) for c,t,n
+    exit(0);

     //ref count should be reset to 1 after mapping
     #ifndef WIN32

then file.l is OK

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5)
  1: "file"
$
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
$

If we allow mrc to execute and exit(0) right after mrc

diff --git a/src/0.c b/src/0.c
index 6ed900a..a82b7ec 100644
--- a/src/0.c
+++ b/src/0.c
@@ -551,9 +551,10 @@ Z K _1m_r(I f,V fixed, V v,V aft,I*b) {   //File descriptor, moving * into mmap,
     z=(K)(((V)u+mod)-3*sizeof(I)); //3*sizeof(I) for c,t,n

     //ref count should be reset to 1 after mapping
-    #ifndef WIN32
+    //#ifndef WIN32
     mrc((K)z,1);   //suppressed in Windows to fix issue #628
-    #endif
+    exit(0);
+    //#endif
     //if(1<=t || 3<=t){dd(z->n)} // ???
   }

then file.l get corrupted on line 040

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5)
  1: "file"
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0601 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
$

If we simply prevent mrc from executing and do not exit(0) at all

diff --git a/src/0.c b/src/0.c
index 6ed900a..453ae24 100644
--- a/src/0.c
+++ b/src/0.c
@@ -552,7 +552,7 @@ Z K _1m_r(I f,V fixed, V v,V aft,I*b) {   //File descriptor, moving * into mmap,

     //ref count should be reset to 1 after mapping
     #ifndef WIN32
-    mrc((K)z,1);   //suppressed in Windows to fix issue #628
+    //mrc((K)z,1);   //suppressed in Windows to fix issue #628
     #endif
     //if(1<=t || 3<=t){dd(z->n)} // ???
   }

then file.l still gets corrupted on line 040, but differently

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5)
  1: "file"
("a"
 4 5)
  \\
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0000 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
tavmem commented 1 year ago

This is interesting (done on a Windows computer with no mods to current Kona code):

$ date
Tue Sep 12 13:17:17 EDT 2023
$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5)
  \\
$ ls -lrt file.l
-rw-r--r-- 1 tavme tavme 112 Sep 12 13:17 file.l
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
$ date
Tue Sep 12 13:18:05 EDT 2023
$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  1: "file"
("a"
 4 5)
  \\
$ ls -lrt file.l
-rw-r--r-- 1 tavme tavme 112 Sep 12 13:17 file.l
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0000 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
$

The time is 13:17 Kona creates the file using command "file" 1: ("a"; 4 5) The ls -lrt file.l command shows that the file was created at 13:17 The xxd file.l command shows that row 040 contains 0100 The time is 13:18 Kona reads the file using command 1: "file" The ls -lrt file.l command still shows 13:17, implying that the file has not been modified. The xxd file.l command shows that line 040 has been updated to 0000

tavmem commented 1 year ago

Using Windows version and making this change

$ git diff src/km.c
-K mrc(K x,I c){I k=sz(xt,xn);I r=lsz(k);x->_c=(c<<8)|r;R x;}
+K mrc(K x,I c){I k=sz(xt,xn);I r=lsz(k);x->_c=(c<<8)|r; O("x->_c: %lld\n",x->_c); exit(0); R x;}

we see that x->_c gets set to 262, and we get the wrong result (0601) in line 0040

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5)
  1: "file"
x->_c: 262
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0601 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................

making this change

$ git diff src/km.c
-K mrc(K x,I c){I k=sz(xt,xn);I r=lsz(k);x->_c=(c<<8)|r;R x;}
+K mrc(K x,I c){I k=sz(xt,xn);I r=lsz(k);x->_c=(c<<8)|r; x->_c=1; O("x->_c: %lld\n",x->_c); exit(0); R x;}

We see that if x->_c is set to 1, then we get the correct result

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5)
  1: "file"
x->_c: 1
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................

So, on the face of it, the 262 seems to be the problem ... in Windows.

tavmem commented 1 year ago

However ... making the same changes in Linux

$ git diff src/km.c
diff --git a/src/km.c b/src/km.c
-K mrc(K x,I c){I k=sz(xt,xn);I r=lsz(k);x->_c=(c<<8)|r;R x;}
+K mrc(K x,I c){I k=sz(xt,xn);I r=lsz(k);x->_c=(c<<8)|r; O("x->_c: %lld\n",x->_c); exit(0); R x;}

we see that 262 works fine

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5);  1: "file"
x->_c: 262
$ xxd file.K
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................

Setting x->_c to 1 also works fine

$ git diff src/km.c
-K mrc(K x,I c){I k=sz(xt,xn);I r=lsz(k);x->_c=(c<<8)|r;R x;}
+K mrc(K x,I c){I k=sz(xt,xn);I r=lsz(k);x->_c=(c<<8)|r; x->_c=1; O("x->_c: %lld\n",x->_c); exit(0); R x;}

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5);  1: "file"
x->_c: 1
$ xxd file.K
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................

In fact, setting x->_c to any value at all (like 1111) works fine.

$ git diff src/km.c
-K mrc(K x,I c){I k=sz(xt,xn);I r=lsz(k);x->_c=(c<<8)|r;R x;}
+K mrc(K x,I c){I k=sz(xt,xn);I r=lsz(k);x->_c=(c<<8)|r; x->_c=1111; O("x->_c: %lld\n",x->_c); exit(0); R x;}

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5);  1: "file"
x->_c: 1111
$ xxd file.K
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
$ 

Furthermore, skipping mrc altogether works fine

$ git diff src/0.c
-    mrc((K)z,1);   //suppressed in Windows to fix issue #628
+    //mrc((K)z,1);   //suppressed in Windows to fix issue #628

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5);  1: "file"
("a"
 4 5)
  \\
$ xxd file.K
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................
tavmem commented 1 year ago

In fact, if we continue to suppress mrc, but exit(0) immediately after, there is no damage. Looks like the damage is done later on

$ git diff src/0.c
diff --git a/src/0.c b/src/0.c
index 6ed900a..4ca4db8 100644
--- a/src/0.c
+++ b/src/0.c
@@ -554,6 +554,7 @@ Z K _1m_r(I f,V fixed, V v,V aft,I*b) {   //File descriptor, moving * into mmap,
     #ifndef WIN32
     mrc((K)z,1);   //suppressed in Windows to fix issue #628
     #endif
+    exit(0);
     //if(1<=t || 3<=t){dd(z->n)} // ???
   }

$ rlwrap -n ./k
kona      \ for help. \\ to exit.

  "file" 1: ("a"; 4 5);   1: "file"
$ xxd file.l
00000000: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000010: 0000 0000 0000 0000 0200 0000 0000 0000  ................
00000020: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000030: 0300 0000 0000 0000 6100 0000 0000 0000  ........a.......
00000040: fdff ffff ffff ffff 0100 0000 0000 0000  ................
00000050: ffff ffff ffff ffff 0200 0000 0000 0000  ................
00000060: 0400 0000 0000 0000 0500 0000 0000 0000  ................