dtolnay / request-for-implementation

Crates that don't exist, but should
610 stars 6 forks source link

Library to serialize and deserialize Syn syntax trees #25

Closed dtolnay closed 4 years ago

dtolnay commented 5 years ago

For a project, I would like to be able to manipulate Rust syntax trees from a different language. Rather than implementing a Rust parser in the other language, I would like to have Syn parse the code and share the parsed syntax tree as JSON.

This is similar to the use case for rustc's rustc src/main.rs -Z ast-json flag.

Consider an input file like the following:

struct S {
    x: i32,
}

fn main() {
    let s = S { x: 0 };
    println!("{}", s.x);
}
rustc currently produces this verbose JSON (click to show). ```json { "module": { "inner": { "lo": 0, "hi": 88 }, "items": [ { "ident": "", "attrs": [ { "id": { "_field0": 1 }, "style": "Outer", "path": { "span": { "lo": 0, "hi": 0 }, "segments": [ { "ident": "prelude_import", "id": 2, "args": null } ] }, "tokens": [], "is_sugared_doc": false, "span": { "lo": 0, "hi": 0 } } ], "id": 3, "node": { "variant": "Use", "fields": [ { "prefix": { "span": { "lo": 0, "hi": 0 }, "segments": [ { "ident": "{{root}}", "id": 4, "args": null }, { "ident": "std", "id": 5, "args": null }, { "ident": "prelude", "id": 6, "args": null }, { "ident": "v1", "id": 7, "args": null } ] }, "kind": "Glob", "span": { "lo": 0, "hi": 0 } } ] }, "vis": { "node": "Inherited", "span": { "lo": 0, "hi": 0 } }, "span": { "lo": 0, "hi": 0 }, "tokens": null }, { "ident": "std", "attrs": [ { "id": { "_field0": 0 }, "style": "Outer", "path": { "span": { "lo": 0, "hi": 0 }, "segments": [ { "ident": "macro_use", "id": 8, "args": null } ] }, "tokens": [], "is_sugared_doc": false, "span": { "lo": 0, "hi": 0 } } ], "id": 9, "node": { "variant": "ExternCrate", "fields": [ null ] }, "vis": { "node": "Inherited", "span": { "lo": 0, "hi": 0 } }, "span": { "lo": 0, "hi": 0 }, "tokens": null }, { "ident": "S", "attrs": [], "id": 10, "node": { "variant": "Struct", "fields": [ { "variant": "Struct", "fields": [ [ { "span": { "lo": 15, "hi": 21 }, "ident": "x", "vis": { "node": "Inherited", "span": { "lo": 15, "hi": 15 } }, "id": 11, "ty": { "id": 12, "node": { "variant": "Path", "fields": [ null, { "span": { "lo": 18, "hi": 21 }, "segments": [ { "ident": "i32", "id": 13, "args": null } ] } ] }, "span": { "lo": 18, "hi": 21 } }, "attrs": [] } ], 14 ] }, { "params": [], "where_clause": { "id": 15, "predicates": [], "span": { "lo": 0, "hi": 0 } }, "span": { "lo": 0, "hi": 0 } } ] }, "vis": { "node": "Inherited", "span": { "lo": 0, "hi": 0 } }, "span": { "lo": 0, "hi": 24 }, "tokens": [ { "variant": "Token", "fields": [ { "lo": 0, "hi": 6 }, { "variant": "Ident", "fields": [ "struct", false ] } ] }, { "variant": "Token", "fields": [ { "lo": 7, "hi": 8 }, { "variant": "Ident", "fields": [ "S", false ] } ] }, { "variant": "Delimited", "fields": [ { "open": { "lo": 9, "hi": 10 }, "close": { "lo": 23, "hi": 24 } }, "Brace", [ { "variant": "Token", "fields": [ { "lo": 15, "hi": 16 }, { "variant": "Ident", "fields": [ "x", false ] } ] }, { "variant": "Token", "fields": [ { "lo": 16, "hi": 17 }, "Colon" ] }, { "variant": "Token", "fields": [ { "lo": 18, "hi": 21 }, { "variant": "Ident", "fields": [ "i32", false ] } ] }, { "variant": "Token", "fields": [ { "lo": 21, "hi": 22 }, "Comma" ] } ] ] } ] }, { "ident": "main", "attrs": [], "id": 16, "node": { "variant": "Fn", "fields": [ { "inputs": [], "output": { "variant": "Default", "fields": [ { "lo": 36, "hi": 36 } ] }, "variadic": false }, { "unsafety": "Normal", "asyncness": "NotAsync", "constness": { "node": "NotConst", "span": { "lo": 26, "hi": 28 } }, "abi": "Rust" }, { "params": [], "where_clause": { "id": 17, "predicates": [], "span": { "lo": 0, "hi": 0 } }, "span": { "lo": 0, "hi": 0 } }, { "stmts": [ { "id": 65, "node": { "variant": "Local", "fields": [ { "pat": { "id": 20, "node": { "variant": "Ident", "fields": [ { "variant": "ByValue", "fields": [ "Immutable" ] }, "s", null ] }, "span": { "lo": 46, "hi": 47 } }, "ty": null, "init": { "id": 23, "node": { "variant": "Struct", "fields": [ { "span": { "lo": 50, "hi": 51 }, "segments": [ { "ident": "S", "id": 21, "args": null } ] }, [ { "ident": "x", "expr": { "id": 22, "node": { "variant": "Lit", "fields": [ { "node": { "variant": "Int", "fields": [ 0, "Unsuffixed" ] }, "span": { "lo": 57, "hi": 58 } } ] }, "span": { "lo": 57, "hi": 58 }, "attrs": { "_field0": null } }, "span": { "lo": 54, "hi": 58 }, "is_shorthand": false, "attrs": { "_field0": null } } ], null ] }, "span": { "lo": 50, "hi": 60 }, "attrs": { "_field0": null } }, "id": 19, "span": { "lo": 42, "hi": 61 }, "attrs": { "_field0": null } } ] }, "span": { "lo": 42, "hi": 61 } }, { "id": 66, "node": { "variant": "Semi", "fields": [ { "id": 30, "node": { "variant": "Block", "fields": [ { "stmts": [ { "id": 64, "node": { "variant": "Semi", "fields": [ { "id": 29, "node": { "variant": "Call", "fields": [ { "id": 28, "node": { "variant": "Path", "fields": [ null, { "span": { "lo": 8017743, "hi": 8017766 }, "segments": [ { "ident": "$crate", "id": 25, "args": null }, { "ident": "io", "id": 26, "args": null }, { "ident": "_print", "id": 27, "args": null } ] } ] }, "span": { "lo": 8017743, "hi": 8017766 }, "attrs": { "_field0": null } }, [ { "id": 63, "node": { "variant": "Call", "fields": [ { "id": 35, "node": { "variant": "Path", "fields": [ null, { "span": { "lo": 8017769, "hi": 8017803 }, "segments": [ { "ident": "$crate", "id": 31, "args": null }, { "ident": "fmt", "id": 32, "args": null }, { "ident": "Arguments", "id": 33, "args": null }, { "ident": "new_v1", "id": 34, "args": null } ] } ] }, "span": { "lo": 8017769, "hi": 8017803 }, "attrs": { "_field0": null } }, [ { "id": 39, "node": { "variant": "AddrOf", "fields": [ "Immutable", { "id": 38, "node": { "variant": "Array", "fields": [ [ { "id": 36, "node": { "variant": "Lit", "fields": [ { "node": { "variant": "Str", "fields": [ "", "Cooked" ] }, "span": { "lo": 75, "hi": 79 } } ] }, "span": { "lo": 75, "hi": 79 }, "attrs": { "_field0": null } }, { "id": 37, "node": { "variant": "Lit", "fields": [ { "node": { "variant": "Str", "fields": [ "\n", "Cooked" ] }, "span": { "lo": 75, "hi": 79 } } ] }, "span": { "lo": 75, "hi": 79 }, "attrs": { "_field0": null } } ] ] }, "span": { "lo": 75, "hi": 79 }, "attrs": { "_field0": null } } ] }, "span": { "lo": 75, "hi": 79 }, "attrs": { "_field0": null } }, { "id": 62, "node": { "variant": "AddrOf", "fields": [ "Immutable", { "id": 61, "node": { "variant": "Match", "fields": [ { "id": 44, "node": { "variant": "Tup", "fields": [ [ { "id": 43, "node": { "variant": "AddrOf", "fields": [ "Immutable", { "id": 42, "node": { "variant": "Field", "fields": [ { "id": 41, "node": { "variant": "Path", "fields": [ null, { "span": { "lo": 81, "hi": 82 }, "segments": [ { "ident": "s", "id": 40, "args": null } ] } ] }, "span": { "lo": 81, "hi": 82 }, "attrs": { "_field0": null } }, "x" ] }, "span": { "lo": 81, "hi": 84 }, "attrs": { "_field0": null } } ] }, "span": { "lo": 81, "hi": 84 }, "attrs": { "_field0": null } } ] ] }, "span": { "lo": 75, "hi": 79 }, "attrs": { "_field0": null } }, [ { "attrs": [], "pats": [ { "id": 45, "node": { "variant": "Tuple", "fields": [ [ { "id": 46, "node": { "variant": "Ident", "fields": [ { "variant": "ByValue", "fields": [ "Immutable" ] }, "arg0", null ] }, "span": { "lo": 0, "hi": 0 } } ], null ] }, "span": { "lo": 75, "hi": 79 } } ], "guard": null, "body": { "id": 60, "node": { "variant": "Array", "fields": [ [ { "id": 59, "node": { "variant": "Call", "fields": [ { "id": 51, "node": { "variant": "Path", "fields": [ null, { "span": { "lo": 8017769, "hi": 8017803 }, "segments": [ { "ident": "$crate", "id": 47, "args": null }, { "ident": "fmt", "id": 48, "args": null }, { "ident": "ArgumentV1", "id": 49, "args": null }, { "ident": "new", "id": 50, "args": null } ] } ] }, "span": { "lo": 8017769, "hi": 8017803 }, "attrs": { "_field0": null } }, [ { "id": 53, "node": { "variant": "Path", "fields": [ null, { "span": { "lo": 81, "hi": 84 }, "segments": [ { "ident": "arg0", "id": 52, "args": null } ] } ] }, "span": { "lo": 81, "hi": 84 }, "attrs": { "_field0": null } }, { "id": 58, "node": { "variant": "Path", "fields": [ null, { "span": { "lo": 81, "hi": 84 }, "segments": [ { "ident": "$crate", "id": 54, "args": null }, { "ident": "fmt", "id": 55, "args": null }, { "ident": "Display", "id": 56, "args": null }, { "ident": "fmt", "id": 57, "args": null } ] } ] }, "span": { "lo": 81, "hi": 84 }, "attrs": { "_field0": null } } ] ] }, "span": { "lo": 8017769, "hi": 8017803 }, "attrs": { "_field0": null } } ] ] }, "span": { "lo": 75, "hi": 79 }, "attrs": { "_field0": null } } } ] ] }, "span": { "lo": 75, "hi": 79 }, "attrs": { "_field0": null } } ] }, "span": { "lo": 75, "hi": 79 }, "attrs": { "_field0": null } } ] ] }, "span": { "lo": 8017769, "hi": 8017803 }, "attrs": { "_field0": null } } ] ] }, "span": { "lo": 8017743, "hi": 8017805 }, "attrs": { "_field0": null } } ] }, "span": { "lo": 8017743, "hi": 8017807 } } ], "id": 24, "rules": "Default", "span": { "lo": 8017741, "hi": 8017809 } }, null ] }, "span": { "lo": 8017741, "hi": 8017809 }, "attrs": { "_field0": null } } ] }, "span": { "lo": 8017741, "hi": 8017809 } } ], "id": 18, "rules": "Default", "span": { "lo": 36, "hi": 88 } } ] }, "vis": { "node": "Inherited", "span": { "lo": 26, "hi": 26 } }, "span": { "lo": 26, "hi": 88 }, "tokens": [ { "variant": "Token", "fields": [ { "lo": 26, "hi": 28 }, { "variant": "Ident", "fields": [ "fn", false ] } ] }, { "variant": "Token", "fields": [ { "lo": 29, "hi": 33 }, { "variant": "Ident", "fields": [ "main", false ] } ] }, { "variant": "Delimited", "fields": [ { "open": { "lo": 33, "hi": 34 }, "close": { "lo": 34, "hi": 35 } }, "Paren", [] ] }, { "variant": "Delimited", "fields": [ { "open": { "lo": 36, "hi": 37 }, "close": { "lo": 87, "hi": 88 } }, "Brace", [ { "variant": "Token", "fields": [ { "lo": 42, "hi": 45 }, { "variant": "Ident", "fields": [ "let", false ] } ] }, { "variant": "Token", "fields": [ { "lo": 46, "hi": 47 }, { "variant": "Ident", "fields": [ "s", false ] } ] }, { "variant": "Token", "fields": [ { "lo": 48, "hi": 49 }, "Eq" ] }, { "variant": "Token", "fields": [ { "lo": 50, "hi": 51 }, { "variant": "Ident", "fields": [ "S", false ] } ] }, { "variant": "Delimited", "fields": [ { "open": { "lo": 52, "hi": 53 }, "close": { "lo": 59, "hi": 60 } }, "Brace", [ { "variant": "Token", "fields": [ { "lo": 54, "hi": 55 }, { "variant": "Ident", "fields": [ "x", false ] } ] }, { "variant": "Token", "fields": [ { "lo": 55, "hi": 56 }, "Colon" ] }, { "variant": "Token", "fields": [ { "lo": 57, "hi": 58 }, { "variant": "Literal", "fields": [ { "variant": "Integer", "fields": [ "0" ] }, null ] } ] } ] ] }, { "variant": "Token", "fields": [ { "lo": 60, "hi": 61 }, "Semi" ] }, { "variant": "Token", "fields": [ { "lo": 66, "hi": 73 }, { "variant": "Ident", "fields": [ "println", false ] } ] }, { "variant": "Token", "fields": [ { "lo": 73, "hi": 74 }, "Not" ] }, { "variant": "Delimited", "fields": [ { "open": { "lo": 74, "hi": 75 }, "close": { "lo": 84, "hi": 85 } }, "Paren", [ { "variant": "Token", "fields": [ { "lo": 75, "hi": 79 }, { "variant": "Literal", "fields": [ { "variant": "Str_", "fields": [ "{}" ] }, null ] } ] }, { "variant": "Token", "fields": [ { "lo": 79, "hi": 80 }, "Comma" ] }, { "variant": "Token", "fields": [ { "lo": 81, "hi": 82 }, { "variant": "Ident", "fields": [ "s", false ] } ] }, { "variant": "Token", "fields": [ { "lo": 82, "hi": 83 }, "Dot" ] }, { "variant": "Token", "fields": [ { "lo": 83, "hi": 84 }, { "variant": "Ident", "fields": [ "x", false ] } ] } ] ] }, { "variant": "Token", "fields": [ { "lo": 85, "hi": 86 }, "Semi" ] } ] ] } ] } ], "inline": true }, "attrs": [], "span": { "lo": 0, "hi": 88 } } ```


I think we can take a somewhat more concise approach that aligns better with Syn's syntax tree, something like this:

{
  "items": [
    {
      "struct": {
        "ident": "S",
        "fields": {
          "named": [
            {
              "ident": "x",
              "ty": {
                "path": {
                  "segments": [
                    {
                      "ident": "i32"
                    }
                  ]
                }
              }
            }
          ]
        }
      }
    },
    {
      "fn": {
        "ident": "main",
        "inputs": [],
        "output": null,
        "stmts": [
          {
            "let": {
              "pats": [
                {
                  "ident": {
                    "ident": "s"
                  }
                }
              ]
            },
            "init": {
              "struct": {
                "path": {
                  "segments": [
                    {
                      "ident": "S"
                    }
                  ]
                },
                "fields": [
                  {
                    "ident": "x",
                    "expr": {
                      "lit": {
                        "int": "0"
                      }
                    }
                  }
                ]
              }
            }
          },
          {
            "semi": {
              "macro": {
                "path": {
                  "segments": [
                    {
                      "ident": "println"
                    }
                  ]
                },
                "delimiter": "paren",
                "tts": [
                  {
                    "lit": "\"{}\""
                  },
                  {
                    "punct": {
                      "op": ",",
                      "spacing": "alone"
                    }
                  },
                  {
                    "ident": "s"
                  },
                  {
                    "punct": {
                      "op": ".",
                      "spacing": "alone"
                    }
                  },
                  {
                    "ident": "x"
                  }
                ]
              }
            }
          }
        ]
      }
    }
  ]
}

I would prefer not to do this with derive(Serialize, Deserialize) attributes directly in Syn because the data structures that work best for manipulating a syntax tree from Rust code are not necessarily the same as the data structures that make for a nice serialized representation to work with across languages. Serialization will need a similar but not identical set of data structures that derive Serialize and Deserialize, with conversion logic back and forth to Syn types. I would expect it to work something like:

pub fn serialize(syn_file: &syn::File) -> String {
    let serializable_file = File::from(syn_file);
    serde_json::to_string_pretty(&serializable_file).unwrap()
}

pub fn deserialize(json: &str) -> serde_json::Result<syn::File> {
    let serializable_file: File = serde_json::from_str(json)?;
    let syn_file = syn::File::from(serializable_file);
    Ok(syn_file)
}
taiki-e commented 5 years ago

I started writing an implementation of this: https://github.com/taiki-e/serde-syn

taiki-e commented 5 years ago

I think the output of serde-syn is still a little redundant.

{
  "items": [
    {
      "struct": {
        "ident": "S",
        "fields": {
          "named": [
            {
              "ident": "x",
              "colon_token": true,
              "ty": {
                "path": {
                  "segments": [
                    {
                      "ident": "i32"
                    }
                  ]
                }
              }
            }
          ]
        }
      }
    },
    {
      "fn": {
        "ident": "main",
        "inputs": [],
        "output": null,
        "stmts": [
          {
            "let": {
              "pats": [
                {
                  "ident": {
                    "ident": "s"
                  }
                }
              ],
              "init": {
                "struct": {
                  "path": {
                    "segments": [
                      {
                        "ident": "S"
                      }
                    ]
                  },
                  "fields": [
                    {
                      "ident": "x",
                      "colon_token": true,
                      "expr": {
                        "lit": {
                          "int": "0"
                        }
                      }
                    }
                  ]
                }
              }
            }
          },
          {
            "semi": {
              "macro": {
                "path": {
                  "segments": [
                    {
                      "ident": "println"
                    }
                  ]
                },
                "delimiter": "paren",
                "tts": [
                  {
                    "lit": "\"{}\""
                  },
                  {
                    "punct": {
                      "op": ",",
                      "spacing": "alone"
                    }
                  },
                  {
                    "ident": "s"
                  },
                  {
                    "punct": {
                      "op": ".",
                      "spacing": "alone"
                    }
                  },
                  {
                    "ident": "x"
                  }
                ]
              }
            }
          }
        ]
      }
    }
  ]
}
dtolnay commented 5 years ago

Thanks for working on this! This is terrific.

My first impression is that the public API is too large:

taiki-e commented 5 years ago

Thanks for the feedback!

  • It is pretty unlikely that someone would want to convert specifically a ForeignItemStatic (for example) to JSON as their top-level data structure.

Indeed.

  • You could start with just File, which is the only use case I know about, and only add other types upon request.
  • Alternatively you could remove all the types from the public API including File, and expose them through an associated type only: for example <syn::File as Node>::Adapter.

The first way is easier, but I'm interested in the second way. Does this probably require the following trait?

pub trait Node {
    type Adapter: Serialize + for<'de> Deserialize<'de>;
    // probably #[doc(hidden)] ?
    fn to_adapter(&self) -> Self::Adapter;
    // probably #[doc(hidden)] ?
    fn from_adapter(adapter: &Self::Adapter) -> Self;
}

#[doc(hidden)]
pub use self::file::File;

impl Node for syn::File {
    type Adapter = File;
    fn to_adapter(&self) -> Self::Adapter { .. }
    fn from_adapter(adapter: &Self::Adapter) -> Self { .. }
}

pub fn serialize<N: Node>(syn_node: &N) -> String {
    let serializable = syn_node.to_adapter();
    serde_json::to_string_pretty(&serializable).unwrap()
}

pub fn deserialize<N: Node>(json: &str) -> serde_json::Result<N> {
    let serializable: N::Adapter = serde_json::from_str(json)?;
    let syn_node = N::from_adapter(&serializable);
    Ok(syn_node)
}

I think it would be better to focus the crate on serializing and deserializing. It should not be a goal to support other unrelated workflows like cloning the types, hashing, traversing fields, etc.

That makes sense. I opened https://github.com/taiki-e/serde-syn/pull/2 for remove these.

dtolnay commented 5 years ago

Yep, that trait would work! The name Syn for the trait could also be a good name since it will be implemented for all of Syn's types.

If you feel adventurous, you could try using syn.json and whatever we come up with for https://github.com/dtolnay/syn/issues/607 to generate most of the trait impls programmatically, leaving handwritten impls only where something needs to be specially handled.

taiki-e commented 5 years ago

Yep, that trait would work! The name Syn for the trait could also be a good name since it will be implemented for all of Syn's types.

That's a good name!

If you feel adventurous, you could try using syn.json and whatever we come up with for dtolnay/syn#607 to generate most of the trait impls programmatically, leaving handwritten impls only where something needs to be specially handled.

I will try that! It would be very nice to me if can generate most of the trait implementations programmatically!

dtolnay commented 5 years ago

Here is a programmatic description of the Syn syntax tree that could be useful for generating a lot of the serialization/deserialization library: https://docs.rs/syn-codegen

taiki-e commented 4 years ago

@dtolnay: Sorry for the delay. I've updated crate based on syn 1.0 and published 0.1: https://crates.io/crates/syn-serde

dtolnay commented 4 years ago

Nice! This crate is great for what I need, and the API is small and easy to understand. Thanks so much for working on this. I know it was an annoying library to write at times.

I added a link to your crate from the readme, and also filed https://github.com/taiki-e/syn-serde/issues/6 for one small issue to follow up.