VirusTotal / yara

The pattern matching swiss knife
https://virustotal.github.io/yara/
BSD 3-Clause "New" or "Revised" License
8.3k stars 1.45k forks source link

Grammar railroad diagram #1830

Open mingodad opened 1 year ago

mingodad commented 1 year ago

Using some online tools like https://www.bottlecaps.de/rr/ui and https://www.bottlecaps.de/convert/ and a bit of manual fixes we can have a nice navigable railroad diagram.

Copy and paste the EBNF shown bellow on https://www.bottlecaps.de/rr/ui on the tab Edit Grammar the click on the tab View Diagram to see/download a navigable railroad diagram.

/* converted on Mon Dec 5, 2022, 13:41 (UTC+01) by bison-to-w3c v0.62 which is Copyright (c) 2011-2022 by Gunther Rademacher <grd@gmx.net> */
// From https://github.com/VirusTotal/yara/blob/65feab41d4cbf4a75338561d8506fc1fa9fa6ba6/libyara/grammar.y
rules    ::= ( rule | import | error ( rule | import | 'include' ) | _END_OF_INCLUDED_FILE_ )*
import   ::= _IMPORT_ _TEXT_STRING_
rule     ::= rule_modifier* _RULE_ _IDENTIFIER_ tags '{' meta strings condition '}'
meta     ::= ( _META_ ':' meta_declaration+ )?
strings  ::= ( _STRINGS_ ':' string_declaration+ )?
condition
         ::= _CONDITION_ ':' boolean_expression
rule_modifier
         ::= _PRIVATE_
           | _GLOBAL_
tags     ::= ( ':' _IDENTIFIER_+ )?
meta_declaration
         ::= _IDENTIFIER_ '=' ( _TEXT_STRING_ | '-'? _NUMBER_ | _TRUE_ | _FALSE_ )
string_declaration
         ::= _STRING_IDENTIFIER_ '=' ( _TEXT_STRING_ string_modifier* | _REGEXP_ regexp_modifier* | _HEX_STRING_ hex_modifier* )
string_modifier
         ::= _WIDE_
           | _ASCII_
           | _NOCASE_
           | _FULLWORD_
           | _PRIVATE_
           | _XOR_ ( '(' _NUMBER_ ( '-' _NUMBER_ )? ')' )?
           | ( _BASE64_ | _BASE64_WIDE_ ) ( '(' _TEXT_STRING_ ')' )?
regexp_modifier
         ::= _WIDE_
           | _ASCII_
           | _NOCASE_
           | _FULLWORD_
           | _PRIVATE_
hex_modifier
         ::= _PRIVATE_
identifier
         ::= _IDENTIFIER_ ( '.' _IDENTIFIER_ | '[' primary_expression ']' | '(' arguments ')' )*
arguments
         ::= ( expression ( ',' expression )* )?
regexp   ::= _REGEXP_
boolean_expression
         ::= expression
expression
         ::= _TRUE_
           | _FALSE_
           | primary_expression ( _MATCHES_ regexp | ( _CONTAINS_ | _ICONTAINS_ | _STARTSWITH_ | _ISTARTSWITH_ | _ENDSWITH_ | _IENDSWITH_ | _IEQUALS_ | _LT_ | _GT_ | _LE_ | _GE_ | _EQ_ | _NEQ_ ) primary_expression | '%' _OF_ ( string_set | rule_set ) )?
           | _STRING_IDENTIFIER_ ( _AT_ primary_expression | _IN_ range )?
           | _FOR_ for_expression ( error | for_iteration ':' '(' boolean_expression ')' )
           | for_expression _OF_ ( string_set ( _IN_ range | _AT_ primary_expression )? | rule_set )
           | ( _NOT_ | _DEFINED_ | boolean_expression ( _AND_ | _OR_ ) ) boolean_expression
           | '(' expression ')'
for_iteration
         ::= _IDENTIFIER_ ( ',' _IDENTIFIER_ )* _IN_ iterator
           | _OF_ string_iterator
iterator ::= identifier
           | set
set      ::= '(' primary_expression ( ',' primary_expression )* ')'
           | range
range    ::= '(' primary_expression _DOT_DOT_ primary_expression ')'
string_iterator
         ::= string_set
string_set
         ::= '(' string_enumeration_item ( ',' string_enumeration_item )* ')'
           | _THEM_
string_enumeration_item
         ::= _STRING_IDENTIFIER_
           | _STRING_IDENTIFIER_WITH_WILDCARD_
rule_set ::= '(' rule_enumeration_item ( ',' rule_enumeration_item )* ')'
rule_enumeration_item
         ::= _IDENTIFIER_ '*'?
for_expression
         ::= primary_expression
           | for_quantifier
for_quantifier
         ::= _ALL_
           | _ANY_
           | _NONE_
primary_expression
         ::= _INTEGER_FUNCTION_? '(' primary_expression ')'
           | _FILESIZE_
           | _ENTRYPOINT_
           | _NUMBER_
           | _DOUBLE_
           | _TEXT_STRING_
           | _STRING_COUNT_ ( _IN_ range )?
           | ( _STRING_OFFSET_ | _STRING_LENGTH_ ) ( '[' primary_expression ']' )?
           | identifier
           | ( '-' | '~' | primary_expression ( '+' | '-' | '*' | '\\' | '%' | '^' | '&' | '|' | _SHIFT_LEFT_ | _SHIFT_RIGHT_ ) ) primary_expression
           | regexp

//
// Tokens
//
// From https://raw.githubusercontent.com/VirusTotal/yara/65feab41d4cbf4a75338561d8506fc1fa9fa6ba6/libyara/lexer.l

// \("[^"]+"\)\s+{ return \(\S[^;]+\);.+ -> \2 ::= \1

_DOT_DOT_ ::= ".."
_LT_ ::= "<"
_GT_ ::= ">"
_LE_ ::= "<="
_GE_ ::= ">="
_EQ_ ::= "=="
_NEQ_ ::= "!="
_SHIFT_LEFT_ ::= "<<"
_SHIFT_RIGHT_ ::= ">>"
_PRIVATE_ ::= "private"
_GLOBAL_ ::= "global"
_RULE_ ::= "rule"
_META_ ::= "meta"
_STRINGS_ ::= "strings"
_ASCII_ ::= "ascii"
_WIDE_ ::= "wide"
_XOR_ ::= "xor"
_BASE64_ ::= "base64"
_BASE64_WIDE_ ::= "base64wide"
_FULLWORD_ ::= "fullword"
_NOCASE_ ::= "nocase"
_CONDITION_ ::= "condition"
_TRUE_ ::= "true"
_FALSE_ ::= "false"
_NOT_ ::= "not"
_AND_ ::= "and"
_OR_ ::= "or"
_AT_ ::= "at"
_IN_ ::= "in"
_OF_ ::= "of"
_THEM_ ::= "them"
_FOR_ ::= "for"
_ALL_ ::= "all"
_ANY_ ::= "any"
_NONE_ ::= "none"
_ENTRYPOINT_ ::= "entrypoint"
_FILESIZE_ ::= "filesize"
_MATCHES_ ::= "matches"
_CONTAINS_ ::= "contains"
_STARTSWITH_ ::= "startswith"
_ENDSWITH_ ::= "endswith"
_ICONTAINS_ ::= "icontains"
_ISTARTSWITH_ ::= "istartswith"
_IENDSWITH_ ::= "iendswith"
_IEQUALS_ ::= "iequals"
_IMPORT_ ::= "import"
_DEFINED_ ::= "defined"
plusvic commented 1 year ago

This is really nice!