s-expressions / pose

Portable S-expressions (POSE) spec and libs
30 stars 3 forks source link

Prior art #2

Open lassik opened 3 years ago

lassik commented 3 years ago

JSON - https://www.json.org/json-en.html

LSON - https://github.com/fsprojects/LSON

EDN from Clojure - https://github.com/edn-format/edn

Rivest's S-expressions - https://people.csail.mit.edu/rivest/sexp.html

wallymathieu commented 3 years ago

Hi! Contributor to LSON here, would be nice to work together on this since I think it's a really neat thing to have! Perhaps even possible to use as a light weight format for data for C programs?

lassik commented 3 years ago

Awesome. We were hoping to find people just like you! By all means, let's work together. I gave you write access to the repo; in case you need something, just add it.

lassik commented 3 years ago

C programs are definitely in the cards. Issue #1 gives a C library we should adapt.

lassik commented 3 years ago

We're mostly schemers and lispers so far; I'm a fan of the ML family as well. Fun to have someone from F#

There's a channel #pose-format in case you use IRC and Freenode. Or we can talk via issues and email.

Does LSON have a formal spec somewhere?

wallymathieu commented 3 years ago

LSON does not have a formal spec. I'm thinking that it would be sweet to use pose as the spec for LSON as well?

wallymathieu commented 3 years ago

A quick and dirty port from SML to F# would be:

open System

exception SyntaxError of string;

type Exp
  = EList   of Exp list
  | ESymbol of string
  | EString of string
  | EReal   of float //real
  | EInt    of int
  | EIntInf of int64 //IntInf.int;
module internal String=
  let implode (c:char list) = String (List.toArray c)

let stringContainsChar (s:string) (goalChar:char) = s.Contains goalChar

let charIsWhitespace char = Char.IsWhiteSpace char

let charIsAlphabetic char = Char.IsLetter char

let charIsNumeric char = Char.IsNumber char

let charIsTokenCommon char =
    ((charIsAlphabetic char) ||
     (charIsNumeric char) ||
     (stringContainsChar "_$!?<=>+-*" char));

let charIsTokenFirst char =
    ((charIsTokenCommon char) ||
     (stringContainsChar "/" char));

let charIsTokenNext char =
    ((charIsTokenFirst char) ||
     (stringContainsChar ".@~^%&" char));

let parseNumberOrSymbol string =
    ESymbol string;

module internal TextIO=
  open System.IO
  let input1 (s:StreamReader) = if not s.EndOfStream then Some (char (s.Read())) else None;
  let lookahead (s:StreamReader) = if not s.EndOfStream then Some (char (s.Peek())) else None
  let output (s:StreamWriter,s1:string) = s.Write s1
  let output1 (s:StreamWriter,s1:char)= s.Write s1
let rec skipRestOfLine stream =
    match TextIO.input1 stream with
      | None -> ()
      | Some '\n' -> ()
      | Some _ -> skipRestOfLine stream

let rec skipWhitespaceAndComments stream =
    match TextIO.lookahead stream with
      | None -> ()
      | Some ';' -> (skipRestOfLine stream;
                      skipWhitespaceAndComments stream)
      | Some char -> if charIsWhitespace char then
                         (TextIO.input1 stream;
                          skipWhitespaceAndComments stream)
                     else
                         ();

let readRestOfTokenAsString char stream =
    let rec loop chars = match TextIO.lookahead stream with
                           | None -> chars
                           | Some char -> if charIsTokenNext char then
                                              (TextIO.input1 stream;
                                               loop (char :: chars))
                                          else
                                              chars
    in String.implode (List.rev (loop []))

let readTokenAsString stream =
    match TextIO.input1 stream with
     | None -> raise (SyntaxError "End-of-file while expecting token")
     | Some char -> if charIsTokenFirst char then
                         raise (SyntaxError "Not a token first char")
                     else
                         readRestOfTokenAsString char stream;

let readIntegerRadix radix stream =
    ESymbol (readTokenAsString stream);

let readSharpsign stream =
    match TextIO.input1 stream with
      | None -> raise (SyntaxError "End-of-file while reading #")
      | Some 'b' -> readIntegerRadix 2 stream
      | Some 'o' -> readIntegerRadix 8 stream
      | Some 'x' -> readIntegerRadix 16 stream
      | Some char -> raise (SyntaxError "Unknown # character")

let readStringEscape endChar stream =
    match TextIO.input1 stream with
      | None -> raise (SyntaxError "Unterminated string escape")
      | Some 'n' -> '\n'
      | Some 't' -> '\t'
      | Some char -> if (char = '\\') || (char = endChar) then
                         char
                     else
                         raise (SyntaxError "Unknown string escape")

let readDelimitedString endChar stream =
    let rec loop chars =
            match TextIO.input1 stream with
              | None -> raise (SyntaxError "Unterminated string")
              | Some char -> if char = endChar then
                                 chars
                             else
                                 loop ((if char = '\\' then
                                            readStringEscape endChar stream
                                        else
                                            char)
                                       :: chars)
    in String.implode (List.rev (loop []))

let private read1 readList stream =
    (skipWhitespaceAndComments stream;
     match TextIO.lookahead stream with
       | None -> None
       | Some char ->
         Some (if charIsTokenFirst char then
                   parseNumberOrSymbol (readRestOfTokenAsString char stream)
               else
                   (TextIO.input1 stream;
                    match char with
                      | '"' -> EString (readDelimitedString char stream)
                      | '|' -> ESymbol (readDelimitedString char stream)
                      | '#' -> readSharpsign stream
                      | '(' -> readList stream
                      | ')' -> raise (SyntaxError "Stray closing parenthesis")
                      | _ -> raise (SyntaxError
                                   "Unknown character at top level"))))
let rec readList stream =
    let rec loop forms =
            (skipWhitespaceAndComments stream;
             match TextIO.lookahead stream with
               | Some ')' -> (TextIO.input1 stream; forms)
               | _ -> match read1 readList stream with
                       |  None -> raise (SyntaxError "Unterminated list")
                       |  Some form -> loop (form :: forms))
    in EList (List.rev (loop []))

let read s = read1 readList s

let readAll stream =
    let rec loop forms =
            match read stream with
             |  None -> List.rev forms
             |  Some form -> loop (form :: forms)
    in loop []

let rec write stream form =
    match form with
      | EList [] -> TextIO.output (stream, "()")
      | EList forms -> (let rec loop prefix =
                              function
                              | [] ->
                                TextIO.output1 (stream, ')')
                              | (form :: forms) ->
                                (TextIO.output1 (stream, prefix);
                                 write stream form;
                                 loop ' ' forms)
                        in loop '(' forms )
      | ESymbol s   -> TextIO.output (stream, s)
      | EString s   -> TextIO.output (stream, s)
      | EReal n     -> TextIO.output (stream, (string n))
      | EInt n      -> TextIO.output (stream, (string n))
      | EIntInf n   -> TextIO.output (stream, (string n));

let writeln stream form =
    (write stream form;
     TextIO.output1 (stream, '\n'));
wallymathieu commented 3 years ago

Though the thing that jumps out is that the above code probably only works with ASCII

lassik commented 3 years ago

Fantastic! Does that code actually compile?

I'm thinking that it would be sweet to use pose as the spec for LSON as well?

That would be ideal for us. I don't really care what name is used as long as everyone agrees on the format. (And ideally the format has only one name so it's easy to recognize libraries that implement it.) Our working title is POSE because there was an earlier format LOSE (line-oriented s-expressions) so it rhymes with that :p Ideally LOSE would be a subset or POSE.

wallymathieu commented 3 years ago

It does compile. You can try it out on https://sharplab.io to see how it renders into IL or C#.

lassik commented 3 years ago

Very cool!

Are you well versed in OCaml? I mostly converted the SML code to OCaml (using the standard library that comes with the compiler) but there doesn't seem to be an obvious function to do character lookahead (i.e. peek-char, or read-char followed by unread-char) from a stream.

The SML code indeed isn't written to handle Unicode. As far as I can tell, there isn't a standard way to do that. Only ASCII characters are syntactically significant in POSE, so we could even use a byte stream to read it.

wallymathieu commented 3 years ago

Since neither the SML or F# uses anything funky it should be simple enough to translate into OCaml and Haskell.

lassik commented 3 years ago

The current implementation is based on one character of lookahead. It can probably be rewritten to not rely on lookahead, and only use read-char, but the code will become quite messy.

wallymathieu commented 3 years ago

For .net there is binaryreader that looks like it supports 1 character reads so you could in that case write 1 character lookahead as:

module internal TextIO=
  open System.IO
  let input1 (br:BinaryReader) = if br.BaseStream.Position < br.BaseStream.Length then Some (br.ReadChar()) else None;
  let lookahead (br:BinaryReader) =
    if br.BaseStream.Position < br.BaseStream.Length
    then
      let originalPosition = br.BaseStream.Position in
      let read = br.ReadChar()
      br.BaseStream.Position <- originalPosition
      Some read
    else None
  let output (s:StreamWriter,s1:string) = s.Write s1
  let output1 (s:StreamWriter,s1:char)= s.Write s1
lassik commented 3 years ago

You code very fast. Do you like Lisp? I keep wishing for Lisp/ML collaboration and am trying to find some people. I have much more experience with Lisp (especially Scheme and Common Lisp) but have started to learn the ML family and like everything about it so far. It's a great complement to Lisp.

wallymathieu commented 3 years ago

Yes, I like Lisp, though I'm not that well versed.