chimpler / pyhocon

HOCON parser for Python
Apache License 2.0
502 stars 118 forks source link

Trailing Element on String Value Raises ConfigWrongTypeException #233

Closed tonio-m closed 4 years ago

tonio-m commented 4 years ago
import pyhocon

text = '{someVersion=8.3.2-hmg-dev, someUnitName=IB, someMessage=Test. [BL056], someOrigin=MOBILE, someStatus=TEST, duration=3500, someNumber=9872329, someAppOrigin=APP_PADRAO, someId=c3ASAUSQTiWvl_YA9DYpDV:APA91bGfVcLNNGL20hfmaDDS0D8TuzJDuCjj4tgbRNcJcYASIBRVEE2FnA4exnE4ZWTuupRX7FQkdcJiMWkNEatk8lktkFcpR7P7mehb4r_SVnabIabGInjagGZ6pGyweDkxW2JUGK8g, someType=00001, someOriginOpen=null, someOS=null, eventSubType=TESTLOGON, someToken=, ip=error, somePair=0.4220043,-1.084015, eventType=SUCESSO, someMag=aWg4V01qSxDMjAvWmlEWGJ6aExnc2nZJbWZVPQ==, macAddress=33d94a3f7d2f8aff, someJSON={"ip":"error","hostname":null,"type":null,"concode":null,"continent":null,"country":null,"country_name":null,"code":null,"name":null,"city":null,"zip":null,"latitude":null,"longitude":null,"anotherJSON":{"id":null,"capital":null,"languages":null,"flag":null,"flag_emoji":null,"flag_emoji_unicode":null,"calling_code":null,"is_eu":null},"time_zone":{"id":null,"current_time":null,"gmt_offset":null,"code":null,"is_daylight_saving":null},"currency":{"code":null,"name":null,"plural":null,"symbol":null,"symbol_native":null},"connection":{"asn":null,"isp":null},"security":{"is_proxy":null,"proxy_type":null,"is_crawler":null,"crawler_name":null,"crawler_type":null,"is_tor":null,"threat_level":null,"threat_types":null}}, organization=IBPF, codigoCliente=440149, device=Android SDK built for x86, eventDate=6/1/20 4:03 PM}'

conf = pyhocon.ConfigParser.parse(text)

print(conf)

Raises Error:

raise ConfigWrongTypeException(
pyhocon.exceptions.ConfigWrongTypeException: Token '['BL056']' of type ConfigList (index 4) must be of type str (line: 1, col: 99)

if I add a line break or two line breaks to the brackets the traceback goes completely bonkers.

Traceback (most recent call last):
  File "/tmp/regex.py", line 6, in <module>
    conf = pyhocon.ConfigParser.parse(text)
  File "/Users/fo012531/.pyenv/versions/3.8.0/lib/python3.8/site-packages/pyhocon/config_parser.py", line 429, in parse
    config = config_expr.parseString(content, parseAll=True)[0]
  File "/Users/fo012531/.pyenv/versions/3.8.0/lib/python3.8/site-packages/pyparsing.py", line 1955, in parseString
    raise exc
  File "/Users/fo012531/.pyenv/versions/3.8.0/lib/python3.8/site-packages/pyparsing.py", line 4065, in parseImpl
    raise ParseSyntaxException._from_exception(pe)
pyparsing.ParseSyntaxException: Expected {: ... | {{{"=" | ":" | "+="} - [Suppress:({{{"#" | "//"} - SkipTo:({Suppress:(W:(
)) | StringEnd})} | Suppress:(W:(
))})]... -} ConcatenatedValueParser:([{{{{Suppress:({[Suppress:(W:(
,))] {"#" | "//"} - SkipTo:({Suppress:(W:(
)) | StringEnd})}) | {Suppress:("include") {Re:('"(?:[^"\\\\\\n]|\\\\.)*"[ \\t]*') | {{"url" | "file" | "package"} - Suppress:("(") - Re:('"(?:[^"\\\\\\n]|\\\\.)*"[ \\t]*') - Suppress:(")")} | {"required" - Suppress:("(") - {Re:('"(?:[^"\\\\\\n]|\\\\.)*"[ \\t
]*') | {{"url" | "file" | "package"} - Suppress:("(") - Re:('"(?:[^"\\\\\\n]|\\\\.)*"[ \\t]*') - Suppress:(")")}} - Suppress:(")")}}} | Re:('[ \\t]*\\$\\{[^\\}]+\\}[ \\t]*') | : ...} | Forward: {{{{Suppress:("[") -} ListParser:({{ConcatenatedValueParser:([{{{
{Suppress:({[Suppress:(W:(
,))] {"#" | "//"} - SkipTo:({Suppress:(W:(
)) | StringEnd})}) | {Suppress:("include") {Re:('"(?:[^"\\\\\\n]|\\\\.)*"[ \\t]*') | {{"url" | "file" | "package"} - Suppress:("(") - Re:('"(?:[^"\\\\\\n]|\\\\.)*"[ \\t]*') - Suppress:(")")} | {"required" - Suppress:("(") - {Re:('"(?:[^"\\\\\\n]|\\\\.)*"[ \\t
]*') | {{"url" | "file" | "package"} - Suppress:("(") - Re:('"(?:[^"\\\\\\n]|\\\\.)*"[ \\t]*') - Suppress:(")")}} - Suppress:(")")}}} | Re:('[ \\t]*\\$\\{[^\\}]+\\}[ \\t]*') | : ...} | : ...} | {{{{{Re:('(?P<value>\\d+)\\s*(?P<unit>ns|nano|nanos|nanosecond|na
noseconds|us|micro|micros|microsecond|microseconds|ms|milli|millis|millisecond|milliseconds|s|second|seconds|m|minute|minutes|h|hour|hours|w|week|weeks|d|day|days|mo|month|months|y|year|years)$') | Re:('[+-]?(\\d*\\.\\d+|\\d+(\\.\\d+)?)([eE][+\\-]?\\d+)?(?=$|
[ \\t]*([\\$\\}\\],#\\n\\r]|//))')} | "true"} | "false"} | "null"} | {{Re:('""".*?"*""} | {{{{{Re:('(?P<value>\\d+)\\s*(?P<unit>ns|nano|nanos|nanosecond|nanoseconds|us|micro|micros|microsecond|microseconds|ms|milli|millis|millisecond|milliseconds|s|second|sec
onds|m|minute|minutes|h|hour|hours|w|week|weeks|d|day|days|mo|month|months|y|year|years)$') | Re:('[+-]?(\\d*\\.\\d+|\\d+(\\.\\d+)?)([eE][+\\-]?\\d+)?(?=$|[ \\t]*([\\$\\}\\],#\\n\\r]|//))')} | "true"} | "false"} | "null"} | {{Re:('""".*?"*"""') | Re:('"(?:[^"
\\\\\\n]|\\\\.)*"[ \\t]*')} | Re:('(?:[^^`+?!@*&"\\[\\{\\s\\]\\}#,=\\$\\\\]|\\\\.)+[ \\t]*')}}} | Suppress:({{"\" -} Suppress:(W:(
))})}]...)}}, found ','  (at char 530), (line:1, col:531)

Obviously the problem is with the field someMessage=Test. [BL056] But how exactly would I treat this data to be read alongside with the string value? Any idea of how to fix/escape or something?

tonio-m commented 4 years ago

This was actually nota valid HOCON, but a serialized java object. There is no way it can be parsed as HOCON.