fumiama / go-docx

One of the most functional libraries to partially read and write .docx files (a.k.a. Microsoft Word documents or ECMA-376 Office Open XML) in Go.
GNU Affero General Public License v3.0
109 stars 14 forks source link

how to parse math? #12

Open lytdev opened 1 year ago

lytdev commented 1 year ago

docx documents support omath and Equation.DSMT4, how to extract them? OMath like this:

m:oMathPara>
<m:oMath>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>f</m:t>
</m:r>
<m:d>
<m:dPr>
<m:ctrlPr>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
</m:ctrlPr>
</m:dPr>
<m:e>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>x</m:t>
</m:r>
</m:e>
</m:d>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>=</m:t>
</m:r>
<m:sSub>
<m:sSubPr>
<m:ctrlPr>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
</m:ctrlPr>
</m:sSubPr>
<m:e>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>a</m:t>
</m:r>
</m:e>
<m:sub>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>0</m:t>
</m:r>
</m:sub>
</m:sSub>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>+</m:t>
</m:r>
<m:nary>
<m:naryPr>
<m:chr m:val="∑"/>
<m:grow m:val="1"/>
<m:ctrlPr>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
</m:ctrlPr>
</m:naryPr>
<m:sub>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>n=1</m:t>
</m:r>
</m:sub>
<m:sup>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>∞</m:t>
</m:r>
</m:sup>
<m:e>
<m:d>
<m:dPr>
<m:ctrlPr>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
</m:ctrlPr>
</m:dPr>
<m:e>
<m:sSub>
<m:sSubPr>
<m:ctrlPr>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
</m:ctrlPr>
</m:sSubPr>
<m:e>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>a</m:t>
</m:r>
</m:e>
<m:sub>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>n</m:t>
</m:r>
</m:sub>
</m:sSub>
<m:func>
<m:funcPr>
<m:ctrlPr>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
</m:ctrlPr>
</m:funcPr>
<m:fName>
<m:r>
<m:rPr>
<m:sty m:val="p"/>
</m:rPr>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>cos</m:t>
</m:r>
</m:fName>
<m:e>
<m:f>
<m:fPr>
<m:ctrlPr>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
</m:ctrlPr>
</m:fPr>
<m:num>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>nπx</m:t>
</m:r>
</m:num>
<m:den>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>L</m:t>
</m:r>
</m:den>
</m:f>
</m:e>
</m:func>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>+</m:t>
</m:r>
<m:sSub>
<m:sSubPr>
<m:ctrlPr>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
</m:ctrlPr>
</m:sSubPr>
<m:e>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>b</m:t>
</m:r>
</m:e>
<m:sub>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>n</m:t>
</m:r>
</m:sub>
</m:sSub>
<m:func>
<m:funcPr>
<m:ctrlPr>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
</m:ctrlPr>
</m:funcPr>
<m:fName>
<m:r>
<m:rPr>
<m:sty m:val="p"/>
</m:rPr>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>sin</m:t>
</m:r>
</m:fName>
<m:e>
<m:f>
<m:fPr>
<m:ctrlPr>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:eastAsia="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
</m:ctrlPr>
</m:fPr>
<m:num>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>nπx</m:t>
</m:r>
</m:num>
<m:den>
<m:r>
<w:rPr>
<w:rFonts w:ascii="Cambria Math" w:hAnsi="Cambria Math"/>
</w:rPr>
<m:t>L</m:t>
</m:r>
</m:den>
</m:f>
</m:e>
</m:func>
</m:e>
</m:d>
</m:e>
</m:nary>
</m:oMath>
</m:oMathPara>

Equation like this:

<w:object w:dxaOrig="4320" w:dyaOrig="1520" w14:anchorId="194DA0B5">
<v:shapetype id="_x0000_t75" coordsize="21600,21600" o:spt="75" o:preferrelative="t" path="m@4@5l@4@11@9@11@9@5xe" filled="f" stroked="f">
<v:stroke joinstyle="miter"/>
<v:formulas>
<v:f eqn="if lineDrawn pixelLineWidth 0"/>
<v:f eqn="sum @0 1 0"/>
<v:f eqn="sum 0 0 @1"/>
<v:f eqn="prod @2 1 2"/>
<v:f eqn="prod @3 21600 pixelWidth"/>
<v:f eqn="prod @3 21600 pixelHeight"/>
<v:f eqn="sum @0 0 1"/>
<v:f eqn="prod @6 1 2"/>
<v:f eqn="prod @7 21600 pixelWidth"/>
<v:f eqn="sum @8 21600 0"/>
<v:f eqn="prod @7 21600 pixelHeight"/>
<v:f eqn="sum @10 21600 0"/>
</v:formulas>
<v:path o:extrusionok="f" gradientshapeok="t" o:connecttype="rect"/>
<o:lock v:ext="edit" aspectratio="t"/>
</v:shapetype>
<v:shape id="_x0000_i1025" type="#_x0000_t75" style="width:3in;height:76.4pt" o:ole="">
<v:imagedata r:id="rId9" o:title=""/>
</v:shape>
<o:OLEObject Type="Embed" ProgID="Equation.DSMT4" ShapeID="_x0000_i1025" DrawAspect="Content" ObjectID="_1753715041" r:id="rId10"/>
</w:object>
fumiama commented 1 year ago

Now math has not been supported for its complexity, but you can add it by editing the code.