mity / md4c

C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.
MIT License
814 stars 150 forks source link

Callbacks returning positive numbers #266

Open nyankers opened 3 months ago

nyankers commented 3 months ago

Per md4c.h, a callback may abort further processing by returning non-zero.

This is partially implemented in the various direct calls to the callback functions, but because all of these functions ultimately just pass along whatever value the callback returns, it'll eventually get caught by a MD_CHECK() call. Unfortunately, MD_CHECK() only aborts on negative values, not positive non-zero values, which can cause some pretty strange behavior.

Some possible fixes include:

step- commented 3 months ago

@nyankers, can you please show me a markdown example that shows the issue at work, is it possible? Thank you.

nyankers commented 3 months ago

This is an issue with the API, so virtually any markdown can demonstrate it, but the following C code should show what I mean:

#include <stdio.h>
#include <string.h>
#include <md4c.h>

struct return_values {
    int enter_block;
    int leave_block;
    int enter_span;
    int leave_span;
    int text;
};

static int handle_enter_block(MD_BLOCKTYPE type, void *detail, void *userdata) {
    int ret = ((struct return_values*) userdata)->enter_block;
    printf("\tenter_block: %d\n", ret);
    return ret;
}

static int handle_leave_block(MD_BLOCKTYPE type, void *detail, void *userdata) {
    int ret = ((struct return_values*) userdata)->leave_block;
    printf("\tleave_block: %d\n", ret);
    return ret;
}

static int handle_enter_span(MD_SPANTYPE type, void *detail, void *userdata) {
    int ret = ((struct return_values*) userdata)->enter_span;
    printf("\tenter_span: %d\n", ret);
    return ret;
}

static int handle_leave_span(MD_SPANTYPE type, void *detail, void *userdata) {
    int ret = ((struct return_values*) userdata)->leave_span;
    printf("\tleave_span: %d\n", ret);
    return ret;
}

static int handle_text(MD_TEXTTYPE type, const MD_CHAR *text, MD_SIZE size, void *userdata) {
    int ret = ((struct return_values*) userdata)->text;
    printf("\ttext: %d (%.*s)\n", ret, size, text);
    return ret;
}

static int test(MD_PARSER *parser, struct return_values *ret, const char *str, int *n, const char *name) {
    printf("\nTesting %s:\n", name);
    for (*n = -1; *n <= 1; *n += 2) {
        printf("\n%s: %d\n", name, *n);
        if (md_parse(str, strlen(str), parser, ret) != 0) {
            printf("Parser failed or aborted.\n");
        } else {
            printf("Parser succeeded.\n");
        }
    }
    *n = 0;
}

int main(int argc, const char **argv) {
    MD_PARSER parser;
    struct return_values ret = {0,0,0,0,0};
    const char *md = "foo *bar*\n\nfoo **bar**";

    parser.abi_version = 0;
    parser.flags = 0;
    parser.enter_block = &handle_enter_block;
    parser.leave_block = &handle_leave_block;
    parser.enter_span = &handle_enter_span;
    parser.leave_span = &handle_leave_span;
    parser.text = &handle_text;
    parser.debug_log = NULL;
    parser.syntax = NULL;

    test(&parser, &ret, md, &ret.enter_block, "enter_block");
    test(&parser, &ret, md, &ret.leave_block, "leave_block");
    test(&parser, &ret, md, &ret.enter_span, "enter_span");
    test(&parser, &ret, md, &ret.leave_span, "leave_span");
    test(&parser, &ret, md, &ret.text, "text");

    return 0;
}

Given the comments in md4c.h, I would expect returning -1 or 1 to give the same behavior for each parser function, but the output with the latest code is:


Testing enter_block:

enter_block: -1
    enter_block: -1
Parser failed or aborted.

enter_block: 1
    enter_block: 1
Parser failed or aborted.

Testing leave_block:

leave_block: -1
    enter_block: 0
    enter_block: 0
    text: 0 (foo )
    enter_span: 0
    text: 0 (bar)
    leave_span: 0
    leave_block: -1
Parser failed or aborted.

leave_block: 1
    enter_block: 0
    enter_block: 0
    text: 0 (foo )
    enter_span: 0
    text: 0 (bar)
    leave_span: 0
    leave_block: 1
    enter_block: 0
    text: 0 (foo )
    enter_span: 0
    text: 0 (bar)
    leave_span: 0
    leave_block: 1
    leave_block: 1
Parser failed or aborted.

Testing enter_span:

enter_span: -1
    enter_block: 0
    enter_block: 0
    text: 0 (foo )
    enter_span: -1
Parser failed or aborted.

enter_span: 1
    enter_block: 0
    enter_block: 0
    text: 0 (foo )
    enter_span: 1
    leave_block: 0
    enter_block: 0
    text: 0 (foo )
    enter_span: 1
    leave_block: 0
    leave_block: 0
Parser succeeded.

Testing leave_span:

leave_span: -1
    enter_block: 0
    enter_block: 0
    text: 0 (foo )
    enter_span: 0
    text: 0 (bar)
    leave_span: -1
Parser failed or aborted.

leave_span: 1
    enter_block: 0
    enter_block: 0
    text: 0 (foo )
    enter_span: 0
    text: 0 (bar)
    leave_span: 1
    leave_block: 0
    enter_block: 0
    text: 0 (foo )
    enter_span: 0
    text: 0 (bar)
    leave_span: 1
    leave_block: 0
    leave_block: 0
Parser succeeded.

Testing text:

text: -1
    enter_block: 0
    enter_block: 0
    text: -1 (foo )
Parser failed or aborted.

text: 1
    enter_block: 0
    enter_block: 0
    text: 1 (foo )
    leave_block: 0
    enter_block: 0
    text: 1 (foo )
    leave_block: 0
    leave_block: 0
Parser succeeded.

As you can see, aside from enter_block, returning 1 instead of -1 will often cause the parser to keep going after the first attempt to abort (and in fairly unpredictable ways), and may even result md_parse() indicating a success by returning 0.

step- commented 3 months ago

I see. Thank you for taking the time to add the demonstration C code.