Open shailavij opened 3 years ago
Can anyone let me know , how to write custom parser to fetch Chemical molecule name with constituents details in desired format
[Chemical name + addition : Constituents],[Chemical name + addition : Constituents]
doc = Paragraph('4-Methylmorpholine N-oxide (1.76 mL, 8.42 mmol) and potassium osmate dihydrate (97.3 mg, 0.38 mmol) ') print(doc.records.serialize())
class BoilingPoint(BaseModel): name=StringType() Quan = StringType() units = StringType()
Compound.addition = ListType(ModelType(BoilingPoint))
import re from chemdataextractor.parse import R, I, W, Optional, merge
units = R('^(mg|mL|mmol)$')(u'units').add_action(merge) # Define all units in parser Quan = R(u'^\d+(.\d+)?$')(u'value') bp = (Quan+ units)(u'mL')
from chemdataextractor.parse.base import BaseParser from chemdataextractor.utils import first
class BpParser(BaseParser): root=bp
def interpret(self, result, start, end): compound = Compound( addition=[ BoilingPoint( #name=first(result.xpath('./name/text()')) Quan=first(result.xpath('./value/text()')), units=first(result.xpath('./units/text()')) ) ] ) yield compound
Paragraph.parsers = [CompoundParser()]+[BpParser()]
Result : [{'names': ['4-Methylmorpholine N-oxide']}, {'names': ['potassium osmate dihydrate']}, {'addition': [{'Quan': '1.76', 'units': 'mL'}]}, {'addition': [{'Quan': '8.42', 'units': 'mmol'}]}, {'addition': [{'Quan': '97.3', 'units': 'mg'}]}, {'addition': [{'Quan': '0.38', 'units': 'mmol'}]}]
Expected Result: Chemical name + addition : Constituents
[{'names': ['4-Methylmorpholine N-oxide'],'addition': [{'Quan': '1.76', 'units': 'mL'}]},{'Quan': '8.42', 'units': 'mmol'}}]
Did you happen to figure this out?
Can anyone let me know , how to write custom parser to fetch Chemical molecule name with constituents details in desired format
[Chemical name + addition : Constituents],[Chemical name + addition : Constituents]
doc = Paragraph('4-Methylmorpholine N-oxide (1.76 mL, 8.42 mmol) and potassium osmate dihydrate (97.3 mg, 0.38 mmol) ') print(doc.records.serialize())
class BoilingPoint(BaseModel): name=StringType() Quan = StringType() units = StringType()
Compound.addition = ListType(ModelType(BoilingPoint))
import re from chemdataextractor.parse import R, I, W, Optional, merge
units = R('^(mg|mL|mmol)$')(u'units').add_action(merge) # Define all units in parser Quan = R(u'^\d+(.\d+)?$')(u'value') bp = (Quan+ units)(u'mL')
from chemdataextractor.parse.base import BaseParser from chemdataextractor.utils import first
class BpParser(BaseParser): root=bp
Paragraph.parsers = [CompoundParser()]+[BpParser()]
Result : [{'names': ['4-Methylmorpholine N-oxide']}, {'names': ['potassium osmate dihydrate']}, {'addition': [{'Quan': '1.76', 'units': 'mL'}]}, {'addition': [{'Quan': '8.42', 'units': 'mmol'}]}, {'addition': [{'Quan': '97.3', 'units': 'mg'}]}, {'addition': [{'Quan': '0.38', 'units': 'mmol'}]}]
Expected Result: Chemical name + addition : Constituents
[{'names': ['4-Methylmorpholine N-oxide'],'addition': [{'Quan': '1.76', 'units': 'mL'}]},{'Quan': '8.42', 'units': 'mmol'}}]