Open lassik opened 3 years ago
Hi! Contributor to LSON here, would be nice to work together on this since I think it's a really neat thing to have! Perhaps even possible to use as a light weight format for data for C programs?
Awesome. We were hoping to find people just like you! By all means, let's work together. I gave you write access to the repo; in case you need something, just add it.
C programs are definitely in the cards. Issue #1 gives a C library we should adapt.
We're mostly schemers and lispers so far; I'm a fan of the ML family as well. Fun to have someone from F#
There's a channel #pose-format in case you use IRC and Freenode. Or we can talk via issues and email.
Does LSON have a formal spec somewhere?
LSON does not have a formal spec. I'm thinking that it would be sweet to use pose as the spec for LSON as well?
A quick and dirty port from SML to F# would be:
open System
exception SyntaxError of string;
type Exp
= EList of Exp list
| ESymbol of string
| EString of string
| EReal of float //real
| EInt of int
| EIntInf of int64 //IntInf.int;
module internal String=
let implode (c:char list) = String (List.toArray c)
let stringContainsChar (s:string) (goalChar:char) = s.Contains goalChar
let charIsWhitespace char = Char.IsWhiteSpace char
let charIsAlphabetic char = Char.IsLetter char
let charIsNumeric char = Char.IsNumber char
let charIsTokenCommon char =
((charIsAlphabetic char) ||
(charIsNumeric char) ||
(stringContainsChar "_$!?<=>+-*" char));
let charIsTokenFirst char =
((charIsTokenCommon char) ||
(stringContainsChar "/" char));
let charIsTokenNext char =
((charIsTokenFirst char) ||
(stringContainsChar ".@~^%&" char));
let parseNumberOrSymbol string =
ESymbol string;
module internal TextIO=
open System.IO
let input1 (s:StreamReader) = if not s.EndOfStream then Some (char (s.Read())) else None;
let lookahead (s:StreamReader) = if not s.EndOfStream then Some (char (s.Peek())) else None
let output (s:StreamWriter,s1:string) = s.Write s1
let output1 (s:StreamWriter,s1:char)= s.Write s1
let rec skipRestOfLine stream =
match TextIO.input1 stream with
| None -> ()
| Some '\n' -> ()
| Some _ -> skipRestOfLine stream
let rec skipWhitespaceAndComments stream =
match TextIO.lookahead stream with
| None -> ()
| Some ';' -> (skipRestOfLine stream;
skipWhitespaceAndComments stream)
| Some char -> if charIsWhitespace char then
(TextIO.input1 stream;
skipWhitespaceAndComments stream)
else
();
let readRestOfTokenAsString char stream =
let rec loop chars = match TextIO.lookahead stream with
| None -> chars
| Some char -> if charIsTokenNext char then
(TextIO.input1 stream;
loop (char :: chars))
else
chars
in String.implode (List.rev (loop []))
let readTokenAsString stream =
match TextIO.input1 stream with
| None -> raise (SyntaxError "End-of-file while expecting token")
| Some char -> if charIsTokenFirst char then
raise (SyntaxError "Not a token first char")
else
readRestOfTokenAsString char stream;
let readIntegerRadix radix stream =
ESymbol (readTokenAsString stream);
let readSharpsign stream =
match TextIO.input1 stream with
| None -> raise (SyntaxError "End-of-file while reading #")
| Some 'b' -> readIntegerRadix 2 stream
| Some 'o' -> readIntegerRadix 8 stream
| Some 'x' -> readIntegerRadix 16 stream
| Some char -> raise (SyntaxError "Unknown # character")
let readStringEscape endChar stream =
match TextIO.input1 stream with
| None -> raise (SyntaxError "Unterminated string escape")
| Some 'n' -> '\n'
| Some 't' -> '\t'
| Some char -> if (char = '\\') || (char = endChar) then
char
else
raise (SyntaxError "Unknown string escape")
let readDelimitedString endChar stream =
let rec loop chars =
match TextIO.input1 stream with
| None -> raise (SyntaxError "Unterminated string")
| Some char -> if char = endChar then
chars
else
loop ((if char = '\\' then
readStringEscape endChar stream
else
char)
:: chars)
in String.implode (List.rev (loop []))
let private read1 readList stream =
(skipWhitespaceAndComments stream;
match TextIO.lookahead stream with
| None -> None
| Some char ->
Some (if charIsTokenFirst char then
parseNumberOrSymbol (readRestOfTokenAsString char stream)
else
(TextIO.input1 stream;
match char with
| '"' -> EString (readDelimitedString char stream)
| '|' -> ESymbol (readDelimitedString char stream)
| '#' -> readSharpsign stream
| '(' -> readList stream
| ')' -> raise (SyntaxError "Stray closing parenthesis")
| _ -> raise (SyntaxError
"Unknown character at top level"))))
let rec readList stream =
let rec loop forms =
(skipWhitespaceAndComments stream;
match TextIO.lookahead stream with
| Some ')' -> (TextIO.input1 stream; forms)
| _ -> match read1 readList stream with
| None -> raise (SyntaxError "Unterminated list")
| Some form -> loop (form :: forms))
in EList (List.rev (loop []))
let read s = read1 readList s
let readAll stream =
let rec loop forms =
match read stream with
| None -> List.rev forms
| Some form -> loop (form :: forms)
in loop []
let rec write stream form =
match form with
| EList [] -> TextIO.output (stream, "()")
| EList forms -> (let rec loop prefix =
function
| [] ->
TextIO.output1 (stream, ')')
| (form :: forms) ->
(TextIO.output1 (stream, prefix);
write stream form;
loop ' ' forms)
in loop '(' forms )
| ESymbol s -> TextIO.output (stream, s)
| EString s -> TextIO.output (stream, s)
| EReal n -> TextIO.output (stream, (string n))
| EInt n -> TextIO.output (stream, (string n))
| EIntInf n -> TextIO.output (stream, (string n));
let writeln stream form =
(write stream form;
TextIO.output1 (stream, '\n'));
Though the thing that jumps out is that the above code probably only works with ASCII
Fantastic! Does that code actually compile?
I'm thinking that it would be sweet to use pose as the spec for LSON as well?
That would be ideal for us. I don't really care what name is used as long as everyone agrees on the format. (And ideally the format has only one name so it's easy to recognize libraries that implement it.) Our working title is POSE because there was an earlier format LOSE (line-oriented s-expressions) so it rhymes with that :p Ideally LOSE would be a subset or POSE.
It does compile. You can try it out on https://sharplab.io to see how it renders into IL or C#.
Very cool!
Are you well versed in OCaml? I mostly converted the SML code to OCaml (using the standard library that comes with the compiler) but there doesn't seem to be an obvious function to do character lookahead (i.e. peek-char, or read-char followed by unread-char) from a stream.
The SML code indeed isn't written to handle Unicode. As far as I can tell, there isn't a standard way to do that. Only ASCII characters are syntactically significant in POSE, so we could even use a byte stream to read it.
Since neither the SML or F# uses anything funky it should be simple enough to translate into OCaml and Haskell.
The current implementation is based on one character of lookahead. It can probably be rewritten to not rely on lookahead, and only use read-char, but the code will become quite messy.
For .net there is binaryreader that looks like it supports 1 character reads so you could in that case write 1 character lookahead as:
module internal TextIO=
open System.IO
let input1 (br:BinaryReader) = if br.BaseStream.Position < br.BaseStream.Length then Some (br.ReadChar()) else None;
let lookahead (br:BinaryReader) =
if br.BaseStream.Position < br.BaseStream.Length
then
let originalPosition = br.BaseStream.Position in
let read = br.ReadChar()
br.BaseStream.Position <- originalPosition
Some read
else None
let output (s:StreamWriter,s1:string) = s.Write s1
let output1 (s:StreamWriter,s1:char)= s.Write s1
You code very fast. Do you like Lisp? I keep wishing for Lisp/ML collaboration and am trying to find some people. I have much more experience with Lisp (especially Scheme and Common Lisp) but have started to learn the ML family and like everything about it so far. It's a great complement to Lisp.
Yes, I like Lisp, though I'm not that well versed.
JSON - https://www.json.org/json-en.html
LSON - https://github.com/fsprojects/LSON
EDN from Clojure - https://github.com/edn-format/edn
Rivest's S-expressions - https://people.csail.mit.edu/rivest/sexp.html