funderburkjim / testing

For testing various features of github. Nothing important here.
0 stars 0 forks source link

use lxml for xml validation against a dtd #23

Open funderburkjim opened 7 years ago

funderburkjim commented 7 years ago

Here is simple Python program for validating an xml document against a given dtd document. Note that lxml is not part of the Python standard library, so it must be installed. This validation code is gleaned from the documentation:

"""xmlvalidate.py
   Use additional lxml module to validate an xml file against an
   external DTD file.
   Usage:
   python xmlvalidate.py <xmlfilename> <dtdfilename>
"""
from lxml import etree
import sys

def validate(xmlfile,dtdfile):
 dtd = etree.DTD(dtdfile)
 tree = etree.parse(xmlfile)
 root = tree.getroot()
 status = dtd.validate(root)  # status is a Boolean
 if status:
  print("ok")
 else:
  errmsg=dtd.error_log.filter_from_errors()[0]
  print("Problem validating")
  print(errmsg)

if __name__ == "__main__":
 xmlfile = sys.argv[1]
 dtdfile = sys.argv[2]
 validate(xmlfile,dtdfile)
funderburkjim commented 7 years ago

Here is an improved version of the program.

"""xmlvalidate.py
   Use additional lxml module to validate an xml file against an
   external DTD file.
   Usage:
   python xmlvalidate.py <xmlfilename> <dtdfilename>
"""
from lxml import etree
import sys

def validate(xmlfile,dtdfile):
 try:
  tree = etree.parse(xmlfile)
 except etree.XMLSyntaxError as err:
  print("XMLSyntaxError:%s" %err)
  exit(1)
 if (dtdfile == None):
  print("%s is well-formed xml"%xmlfile)
  return
 dtd = etree.DTD(dtdfile)
 root = tree.getroot()
 status = dtd.validate(root)  # status is a Boolean
 if status:
  print("ok")
 else:
  errmsg=dtd.error_log.filter_from_errors()[0]
  print("Problem validating")
  print(errmsg)

if __name__ == "__main__":
 xmlfile = sys.argv[1]
 try:
  dtdfile = sys.argv[2]
 except:
  dtdfile=None
 validate(xmlfile,dtdfile)
funderburkjim commented 7 years ago

revision of 12/22/2016

"""xmlvalidate.py
   Use additional lxml module to validate an xml file against an
   external DTD file.
   Usage:
   python xmlvalidate.py <xmlfilename> <dtdfilename>
"""
from lxml import etree
import sys

def validate(xmlfile,dtdfile):
 dtd = etree.DTD(dtdfile)
 tree = etree.parse(xmlfile)
 root = tree.getroot()
 status = dtd.validate(root)  # status is a Boolean
 if status:
  print("ok")
 else:
  errmsg=dtd.error_log.filter_from_errors()[0]
  print("Problem validating")
  print(errmsg)

if __name__ == "__main__":
 if len(sys.argv) != 3:
  print "Usage: python xmlvalidate.py <xmlfilename> <dtdfilename>"
  exit(1)
 xmlfile = sys.argv[1]
 dtdfile = sys.argv[2]
 validate(xmlfile,dtdfile)