FasterXML / jackson-dataformat-xml

Extension for Jackson JSON processor that adds support for serializing POJOs as XML (and deserializing from XML) as an alternative to JSON
Apache License 2.0
561 stars 221 forks source link

unexpected escaping when serializing objects with dom nodes #582

Closed potoo0 closed 1 year ago

potoo0 commented 1 year ago

full code:

package com.example;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.fasterxml.jackson.dataformat.xml.XmlMapper;
import com.fasterxml.jackson.module.jaxb.JaxbAnnotationModule;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.w3c.dom.Element;
import org.w3c.dom.Node;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlAnyElement;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;

public class JacksonXmlIssueTest {
    private static final XmlMapper xmlMapper = buildXmlMapper();
    private static final JAXBContext jaxbContext = buildJAXBContext(RpcData.class);
    private static final TransformerFactory transformerFactory = buildTransformerFactory();

    @XmlRootElement(name = "data")
    @XmlAccessorType(XmlAccessType.FIELD)
    static class RpcData {
        public String name = "nameString";

        @XmlAnyElement
        public Element domElement;
    }

    @Test
    void testSer() throws Exception {
        String xml = "<L2VPN><VSIs>fake</VSIs></L2VPN>";
        // 1. parse string to dom
        DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        InputStream inputStream = new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8));
        Element element = db.parse(inputStream).getDocumentElement();
        Assertions.assertEquals(xml, transform(element));

        // 2.0 init javaBean
        RpcData rpcData = new RpcData();
        rpcData.domElement = element;

        // 2.1 test jaxb
        String xmlByJaxb = marshal(rpcData);
        System.out.println(xmlByJaxb);
        //<data><name>nameString</name><L2VPN><VSIs>fake</VSIs></L2VPN></data>

        // 2.2 test jackson-xml
        String xmlByJackson = xmlMapper.writeValueAsString(rpcData);
        System.out.println(xmlByJackson);
        //<data><name>nameString</name><domElement>&lt;L2VPN>&lt;VSIs>fake&lt;/VSIs>&lt;/L2VPN></domElement></data>
    }

    String marshal(Object data) throws JAXBException {
        Marshaller marshaller = jaxbContext.createMarshaller();
        marshaller.setProperty(Marshaller.JAXB_FRAGMENT, true);

        StringWriter sw = new StringWriter();
        marshaller.marshal(data, sw);
        return sw.toString();
    }

    String transform(Node node) throws Exception {
        Transformer transformer = transformerFactory.newTransformer();
        transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        transformer.setOutputProperty(OutputKeys.INDENT, "no");
        StreamResult result = new StreamResult(new StringWriter());
        transformer.transform(new DOMSource(node), result);
        return result.getWriter().toString();
    }

    static TransformerFactory buildTransformerFactory() {
        return TransformerFactory.newInstance();
    }

    static JAXBContext buildJAXBContext(Class clz) {
        try {
            return JAXBContext.newInstance(clz);
        } catch (JAXBException e) {
            throw new RuntimeException(e);
        }
    }

    static XmlMapper buildXmlMapper() {
        return XmlMapper.builder()
                .defaultUseWrapper(false)
                .serializationInclusion(JsonInclude.Include.NON_NULL)
                .addModule(new JaxbAnnotationModule())
                .disable(SerializationFeature.WRAP_ROOT_VALUE)
                .disable(DeserializationFeature.UNWRAP_ROOT_VALUE)
                .build();
    }
}

jaxb doesn't have the weird < escape, but jackson-xml has, how to fix it? output:

# xmlByJaxb
<data><name>nameString</name><L2VPN><VSIs>fake</VSIs></L2VPN></data>

# xmlByJackson
<data><name>nameString</name><domElement>&lt;L2VPN>&lt;VSIs>fake&lt;/VSIs>&lt;/L2VPN></domElement></data>
pjfanning commented 1 year ago
potoo0 commented 1 year ago
  • public Element domElement; is serialized as an element with escaped text by jackson-dataformat-xml
  • JAXB seems to serialize the dom element as an embedded XML structure
  • both approaches are valid encodings
  • if you don't like jackson-dataformat-xml's solution, you can stick with JAXB - or you can write your own custom Jackson serializer

In fact, < escape is do not fit my need here, cannot be parsed by the netconf server。

pjfanning commented 1 year ago

Still not a bug in a jackson-dataformat-xml. It is possible that jackson-dataformat-xml has a configuration that gets it to behave like you need but I am not aware of it. You still have the option to use another library (ie stop using jackson-dataformat-xml) or to write a custom Jackson serializer for the Element class and register it in ObjectMapper.

potoo0 commented 1 year ago

thx. just replace g.writeString(result.getWriter().toString()); with

g.writeRaw(result.getWriter().toString());
ToXmlGenerator toXmlGenerator = (ToXmlGenerator) g;
toXmlGenerator.setNextIsUnwrapped(true);
g.writeNull();

fit my needs.

full code:

package com.example.netconf;

import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JavaType;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.SerializerProvider;
import com.fasterxml.jackson.databind.jsonFormatVisitors.JsonFormatVisitorWrapper;
import com.fasterxml.jackson.databind.ser.std.StdSerializer;
import com.fasterxml.jackson.dataformat.xml.ser.ToXmlGenerator;
import org.w3c.dom.Node;

import javax.xml.XMLConstants;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.IOException;
import java.io.StringWriter;

/**
 * serialize the dom element as an embedded XML structure.<br>
 * todo: <br>
 *  1. deserialize an embedded XML structure<br>
 *  2. fix dom.Node direct serializer<br>
 **/
@SuppressWarnings("serial")
public class DOMEmbeddedSerializer extends StdSerializer<Node> {
    protected final TransformerFactory transformerFactory;

    public DOMEmbeddedSerializer() {
        super(Node.class);
        try {
            transformerFactory = TransformerFactory.newInstance();
            transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
        } catch (Exception e) {
            throw new IllegalStateException("Could not instantiate `TransformerFactory`: " + e.getMessage(), e);
        }
    }

    @Override
    public void serialize(Node value, JsonGenerator g, SerializerProvider provider)
            throws IOException {
        try {
            Transformer transformer = transformerFactory.newTransformer();
            transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
            transformer.setOutputProperty(OutputKeys.INDENT, "no");
            StreamResult result = new StreamResult(new StringWriter());
            transformer.transform(new DOMSource(value), result);
            g.writeRaw(result.getWriter().toString());
            // todo: will throw an error(`document has no root (ie. trying to output empty document)`)
            //      if dom.Node direct serializer
            ToXmlGenerator toXmlGenerator = (ToXmlGenerator) g;
            toXmlGenerator.setNextIsUnwrapped(true);
            g.writeNull();
        } catch (TransformerConfigurationException e) {
            throw new IllegalStateException("Could not create XML Transformer for writing DOM `Node` value: " + e.getMessage(), e);
        } catch (TransformerException e) {
            provider.reportMappingProblem(e, "DOM `Node` value serialization failed: %s", e.getMessage());
        }
    }

    @Override
    public JsonNode getSchema(SerializerProvider provider, java.lang.reflect.Type typeHint) {
        // Well... it is serialized as String
        return createSchemaNode("string", true);
    }

    @Override
    public void acceptJsonFormatVisitor(JsonFormatVisitorWrapper visitor, JavaType typeHint) throws JsonMappingException {
        if (visitor != null) visitor.expectAnyFormat(typeHint);
    }
}
cowtowncoder commented 1 year ago

@potoo0 Thank you for sharing this trick! That makes sense and could be useful for others who want to forcibly embed pre-encoded content. There are some possible challenges with it, but it probably works for many cases.

One other idea: given the work-around (which by-passes serialization), I wonder if use of @JsonRawValue would work? It should do about the same thing (just replace @XmlElement with @JsonRawValue -- "json" here doesn't mean format is JSON)

potoo0 commented 1 year ago

@potoo0 Thank you for sharing this trick! That makes sense and could be useful for others who want to forcibly embed pre-encoded content. There are some possible challenges with it, but it probably works for many cases.

One other idea: given the work-around (which by-passes serialization), I wonder if use of @JsonRawValue would work? It should do about the same thing (just replace @XmlElement with @JsonRawValue -- "json" here doesn't mean format is JSON)

@JsonRawValue not work, JsonRawValue call .toString not xml string. JsonRawValue code:

/**
 * This is a simple dummy serializer that will just output raw values by calling
 * toString() on value to serialize.
 */
@SuppressWarnings("serial")
public class RawSerializer<T>
    extends StdSerializer<T>
{
    // ....................
    @Override
    public void serialize(T value, JsonGenerator jgen, SerializerProvider provider) throws IOException {
        jgen.writeRawValue(value.toString());
    }
    // ....................
}
cowtowncoder commented 1 year ago

Ah ok. Makes sense; Element.toString() does not produce valid XML output.

Thank you for verifying this, @potoo0 . So to use that annotation, one would need to use a wrapper type that implemented serialization similar to code you showed earlier.

potoo0 commented 1 year ago

share a simple serializer for dynamic xml attr from map:

/**
 * serialize map as xml attribute.<br>
 * 注意: 配合 {@code @JsonInclude(value = JsonInclude.Include.NON_EMPTY)}
 * 或者 {@code @JacksonXmlProperty(isAttribute = true)}, 防止 value=null 时错误序列化
 *
 * <p>
 * 用法:
 * <pre>{@code
 *   // 不要直接注册到 mapper 里, 应该在字段上使用
 *   class Srv {
 *      @JacksonXmlProperty(isAttribute = true)
 *      @JsonSerialize(using = DynaXmlAttributeSerializer.class)
 *      Map<String, String> attrs;
 *
 *      @JsonInclude(value = JsonInclude.Include.NON_EMPTY)
 *      @JsonSerialize(using = DynaXmlAttributeSerializer.class)
 *      Map<String, String> attrsOther;
 *   }
 * }</pre>
 * </p>
 **/
public class DynaXmlAttributeSerializer extends StdSerializer<Map<Object, Object>> {
    protected DynaXmlAttributeSerializer() {
        super(Map.class, true);
    }

    @Override
    public void serialize(Map<Object, Object> value, JsonGenerator g, SerializerProvider provider) throws IOException {
        if (!(g instanceof ToXmlGenerator)) throw new RuntimeException("only support xml!");
        ToXmlGenerator xgen = (ToXmlGenerator) g;
        xgen.setNextIsUnwrapped(true);
        xgen.writeNull();
        // 防止额外使用 `@JacksonXmlProperty(isAttribute = true)` 注解后导致下个字段被错误 Unwrap. 疑问: writeNull 没有清空???
        xgen.setNextIsUnwrapped(false);
        if (value == null || value.isEmpty()) return;
        xgen.setNextIsAttribute(true);
        for (Map.Entry<Object, Object> entry : value.entrySet()) {
            if (entry.getKey() == null || entry.getValue() == null) continue;
            xgen.writeStringField(entry.getKey().toString(), entry.getValue().toString());
        }
        xgen.setNextIsAttribute(false);
    }
}

example:

class A {
    @JacksonXmlProperty(isAttribute = true)
    public String name = "Rick";

    @JsonSerialize(using = DynaXmlAttributeSerializer.class)
    public Map<Object, Object> attrs = ImmutableMap.of("k1","v1", "xmlns:xc", "h3c");
}
// serialize output
// <A name="Rick" k1="v1" xmlns:xc="h3c"/>