Closed stelmo closed 1 year ago
OK anyway, this is one possible parser:
import PikaParser as P
abstract type LogicalExpr end
struct AndExpr <: LogicalExpr
l::LogicalExpr
r::LogicalExpr
end
struct OrExpr <: LogicalExpr
l::LogicalExpr
r::LogicalExpr
end
struct NotExpr <: LogicalExpr
x::LogicalExpr
end
struct IdentExpr <: LogicalExpr
id::Symbol
end
isident(x::Char) = isletter(x) || isdigit(x) || x == '_' || x == '-' || x == ':' || x == '.'
noglue(x) = P.seq(x, P.not_followed_by(:identpart))
rules = Dict(
:identpart => P.satisfy(isident),
:space => P.many(P.satisfy(isspace)),
:id => noglue(:in_id => P.some(:identpart)),
:orop => P.first(P.tokens("||"), P.token('|'), noglue(P.tokens("OR")), noglue(P.tokens("or"))),
:andop => P.first(P.tokens("&&"), P.token('&'), noglue(P.tokens("AND")), noglue(P.tokens("and"))),
:notop => P.first(P.token('!'), noglue(P.tokens("NOT")), noglue(P.tokens("not"))),
:expr => P.seq(:space, :orexpr, :space),
:orexpr => P.first(
:or => P.seq(:andexpr, :space, :orop, :space, :orexpr),
:andexpr,
),
:andexpr => P.first(
:and => P.seq(:notexpr, :space, :andop, :space, :andexpr),
:notexpr,
),
:notexpr => P.first(:not => P.seq(:notop, :notexpr), :baseexpr),
:baseexpr => P.first(:id, :parenexpr => P.seq(P.token('('), :space, :orexpr, :space, P.token(')'))),
)
grmr = P.make_grammar([:expr], P.flatten(rules, Char))
input = " Mbar_A0384 and Mbar_A0385 and Mbar_A0389 and (Mbar_A0391 or Mbar_A0379) and (Mbar_A3102 or Mbar_A0386) and (Mbar_A0390 or Mbar_A0378) and Mbar_A0387 and Mbar_A0392 and (Mbar_A3101 or Mbar_A0388) "
s = P.parse(grmr, input)
m = P.find_match_at!(s, :expr, 1)
@assert s.matches[m].len == length(input) "match not found"
openfn(m,_) = m.rule == :expr ? Bool[0,1,0] :
m.rule == :parenexpr ? Bool[0,0,1,0,0] :
m.rule == :not ? Bool[0,1] :
m.rule in [:or, :and] ? Bool[1,0,0,0,1] :
m.rule in [:andexpr, :orexpr, :notexpr, :baseexpr] ? Bool[1] :
(false for _ in m.submatches)
foldfn(m,_,subvals) = m.rule == :id ? IdentExpr(Symbol(m.view)) :
m.rule == :not ? NotExpr(subvals[2]) :
m.rule == :and ? AndExpr(subvals[1], subvals[5]) :
m.rule == :or ? OrExpr(subvals[1], subvals[5]) :
m.rule == :parenexpr ? subvals[3] :
m.rule == :expr ? subvals[2] :
isempty(subvals) ? nothing : subvals[1]
grr = P.traverse_match(s, m, open = openfn, fold = foldfn)
Questions arise:
(the @assert
there will get simplified once I get time to implement this https://github.com/LCSB-BioCore/PikaParser.jl/issues/15 )
EDIT: we don't need not
because apparently no one actually uses that, the gene logic is plain constructive.
I just checked, this works! nice!
Additional fun:
also:
'
s genethis was addressed by #731
Currently if genes are not in DNF format they are not parsed correctly. E.g. if a model has:
then this is yielded by the accessor:
From
IAF692.json
on BIGG