yaml / libyaml

Canonical source repository for LibYAML
http://pyyaml.org/wiki/LibYAML
MIT License
951 stars 316 forks source link

Event based YAML parsing #255

Open Montana opened 2 years ago

Montana commented 2 years ago

Hey all,

I'm well aware token-based parsing (specifically when it comes to YAML) is not useful at all unless you are implementing a syntax highlighter. Let's say I had another use case though, would I want to use event-based parsing?

My next question is does libyaml provide a way to maintain this relationship in a tree?

I know event-based parsing does maintain the tree structure, features being sequences and mappings, which describe the input structure. If I'm remembering correctly then it should be pretty simple to build a list of struct input skimming over the event stream, something like this:

#include <yaml.h>
#include <string.h>
#include <stdio.h>
#include <stdbool.h>
#include <assert.h>

struct Input {
  char key[100];
  char value[100];
};

struct Input gen(const char *key, const char *value) {
  struct Input ret;
  strcpy(ret.key, key);
  strcpy(ret.value, value);
  return ret;
}

void append_all(yaml_parser_t *p, struct Input **target,
        char cur_key[100], size_t len) {
  yaml_event_t e;
  yaml_parser_parse(p, &e);
  switch (e.type) {
    case YAML_MAPPING_START_EVENT:
      yaml_event_delete(&e);
      yaml_parser_parse(p, &e);
      while (e.type != YAML_MAPPING_END_EVENT) {
        // assume scalar key
        assert(e.type == YAML_SCALAR_EVENT);
        if (len != 0) cur_key[len++] = '.';
        memcpy(cur_key + len, e.data.scalar.value,
            strlen(e.data.scalar.value) + 1);
        const size_t new_len = len + strlen(e.data.scalar.value);
        yaml_event_delete(&e);
        append_all(p, target, cur_key, new_len);
        if (len != 0) --len;
        cur_key[len] = '\0'; // remove key part
        yaml_parser_parse(p, &e);
      }
      break;
    case YAML_SCALAR_EVENT:
      *(*target)++ = gen(cur_key, e.data.scalar.value);
      break;
    default: assert(false);
  }
  yaml_event_delete(&e);
}

int main(int argc, char *argv[]) {
  yaml_parser_t p;
  yaml_event_t e;
  yaml_parser_initialize(&p);
  FILE *f = fopen("foo.yaml", "r");
  yaml_parser_set_input_file(&p, f);
  // skip stream start and document start
  yaml_parser_parse(&p, &e);
  yaml_event_delete(&e);
  yaml_parser_parse(&p, &e);
  yaml_event_delete(&e);

  char cur_key[100] = {'\0'};
  struct Input input[100];
  struct Input *input_end = input;
  append_all(&p, &input_end, cur_key, 0);

  // skip document end and stream end
  yaml_parser_parse(&p, &e);
  yaml_event_delete(&e);
  yaml_parser_parse(&p, &e);
  yaml_event_delete(&e);

  yaml_parser_delete(&p);
  fclose(f);

  // print out input items
  for (struct Input *cur = input; cur < input_end; ++cur) {
    printf("%s = %s\n", cur->key, cur->value);
  }
}