katef / libfsm

DFA regular expression library & friends
BSD 2-Clause "Simplified" License
931 stars 52 forks source link

Escape ? as a way to avoid emitting trigraphs. #433

Closed katef closed 1 year ago

katef commented 1 year ago

This is applicable for the inline memcmp()/strncmp() calls added recently for the vmc codegen.

Before:

; ./build/bin/re -k str -pl vmc '^abc\?\?-$'
int
fsm_main(const char *s)
{
    const char *p;
    int c;

    p = s;
    if (0 != strncmp(p, "abc??-", 6)) return -1;
    p += 6;

    if (c = (unsigned char) *p++, c == '\0') return 0x1; /* "^abc\?\?-$" */
    (void) c;

    return -1;
}

After:

; ./build/bin/re -k str -pl vmc '^abc\?\?-$' | grep strncmp 
    if (0 != strncmp(p, "abc\077\077-", 6)) return -1;
; ./build/bin/re -k pair -pl vmc '^abc\?\?-$' | grep memcmp 
    if (e - p < 6 || 0 != memcmp(p, "abc\077\077-", 6)) return -1;
;