python-openxml / python-docx

Create and modify Word documents with Python
MIT License
4.66k stars 1.14k forks source link

feature: mailmerge fields #262

Open pravinkumars opened 8 years ago

pravinkumars commented 8 years ago

Find mailmerge fields and update the mail merge fields

taylorfturner commented 7 years ago

Having the ability to find mailmerge fields and update said fields would be totally awesome!

jeremy886 commented 7 years ago

I found an alternative: https://github.com/Bouke/docx-mailmerge It's a small project so maybe it can be re-implemented here.

User65k commented 5 years ago

I use this:

class DocxTemplate():
    def __init__(self, path):
        self.root = Document(path)

    def _findFields(self, of_type = None, search_in = None):
        search_path = './/w:instrText'
        if of_type:
            search_path += '[starts-with(., \''+of_type+'\')]'

        fields = []
        if search_in == None:
            search_in = self.root._body._element
        candidates = search_in.xpath(search_path)
        for instrText in candidates:
            field_val = instrText.text
            rs = None
            re = None
            state = 0
            _p = instrText.getparent().getparent() # instrText.xpath('../../*')

            for _r in _p:
                if not _r.tag.endswith('r'):
                    continue
                #end
                for part in _r:
                    if part.tag.endswith('fldChar'):
                        t = part.get(qn('w:fldCharType'))
                        if t=='begin' and state==0: # begin contains field info
                            state = 1
                            rs = part
                        elif t=='separate' and state==1: # separate contains std text
                            state = 2
                        elif t=='end' and (state==2 or state==1): # done
                            state = 0
                            re = part
                            break
                        else:
                            break
                    #we already know
                    #if part.tag.endswith('instrText') and state==1:
                    #    field_val = part.text

            if rs is not None and re is not None:
                fields.append( (field_val,DocxField(rs, re) ) )
        return fields

    def _create_field(self, paragraph, value, text="F9"):
        r = paragraph._p.add_r()
        sfldChar = OxmlElement('w:fldChar')
        sfldChar.set(qn('w:fldCharType'), "begin")
        r.append(sfldChar)
        r = paragraph._p.add_r()
        instrText = OxmlElement('w:instrText')
        instrText.text = value
        r.append(instrText)
        r = paragraph._p.add_r()
        fldChar = OxmlElement('w:fldChar')
        fldChar.set(qn('w:fldCharType'), "separate")
        r.append(fldChar)
        paragraph.add_run(text)
        r = paragraph._p.add_r()
        efldChar = OxmlElement('w:fldChar')
        efldChar.set(qn('w:fldCharType'), "end")
        r.append(efldChar)
        return DocxField(sfldChar, efldChar)

class DocxField():
    """
    Field consisting of fldChar's
    """
    def __init__(self, sfldChar, efldChar):
        # indexes must be calculated when needed, as doc might change
        self.s = sfldChar
        self.e = efldChar

    def _calc(self):
        _p = self.getparent()
        s = _p.index(self.s.getparent())
        e = _p.index(self.e.getparent())
        return (s, e)

    def replace(self, value):
        """replace the field with a run"""
        #remove all runs but last
        (si, ei) = self._calc()
        _p = self.getparent()
        for r in _p[si:ei]:
            _p.remove(r)
        #remove fldChar
        _r = self.e.getparent()
        _r.remove(self.e)
        #set its text
        r = Run(_r,_p)
        r.text = value

    def set(self, value):
        """update the field with text"""
        pass # TODO get or add separate and update run

    def getparent(self):
        return self.s.getparent().getparent()

p = DocxTemplate('templ.docx')

for (f, r) in p._findFields(" AUTHOR"):
    r.replace("me")

p.root.save("filled.docx")

For mailmerge one would of course search for the type MERGEFIELD I guess this could also be a way to implement #99

Not sure how/where to integrate it here