droyo / go-xml

utility and code-generation libraries for XML
MIT License
302 stars 114 forks source link

XML from MarshalIndent includes unwanted whitespaces #82

Closed bombsimon closed 5 years ago

bombsimon commented 5 years ago

Thank you for a great library! It's been really helpful for me to use while traversing XML.

I'm using the library to marshal XML from xmltree.Element. The XML generated will in some cases be read by a human and because of that I'm using xmltree.MarshalIndent. The problem is that the XML generated includes too much leading and trailing whitespaces so when being unmarshalled back to the original type (with standard library) the content includes unwanted newlines.

Example to reproduce

package main

import (
    "encoding/xml"
    "fmt"

    "aqwari.net/xml/xmltree"
)

type Test struct {
    Field1 string `xml:"parent>child1"`
    Field2 int    `xml:"parent>child2"`
    Field3 string `xml:"parent>child3"`
}

func main() {
    t := Test{"test string", 10, "another string"}

    // MarshalIndent with standard library
    std, _ := xml.MarshalIndent(t, "", "  ")

    // Parse the mashalled XML with xml tree and MarshalIndent the parsed tree
    elements, _ := xmltree.Parse(std)
    xt := xmltree.MarshalIndent(elements, "", "  ")

    // Print result from both libraries
    fmt.Println(string(std))
    fmt.Println("-")
    fmt.Println(string(xt))

    // Unmarshal the two generated byte slices with standard library
    t1 := Test{}
    t2 := Test{}

    xml.Unmarshal(std, &t1)
    xml.Unmarshal(xt, &t2)

    // The XML from stdandard library contains no whitespaces
    fmt.Printf(
        "Stdlib:  f1 '%s', f2 '%d', f3: '%s'\n",
        t1.Field1, t1.Field2, t1.Field3,
    )

    // The XML from xmltree library contains too much whitespaces
    fmt.Printf(
        "Xmltree: f1 '%s', f2 '%d', f3: '%s'\n",
        t2.Field1, t2.Field2, t2.Field3,
    )
}

Output

<Test>
  <parent>
    <child1>test string</child1>
    <child2>10</child2>
    <child3>another string</child3>
  </parent>
</Test>
-
<Test>
  <parent>
    <child1>
      test string
    </child1>
    <child2>
      10
    </child2>
    <child3>
      another string
    </child3>
  </parent>
</Test>

Stdlib:  f1 'test string', f2 '10', f3: 'another string'
Xmltree: f1 '
      test string
    ', f2 '10', f3: '
      another string
    '

This change to xmltree/marshal.go seems to fix my issues:

diff --git a/xmltree/marshal.go b/xmltree/marshal.go
index 001b63a..1c12495 100644
--- a/xmltree/marshal.go
+++ b/xmltree/marshal.go
@@ -115,16 +115,8 @@ func (e *encoder) encode(el, parent *Element, visited map[*Element]struct{}) err
 }

 func (e *encoder) WriteContent(content []byte, depth int) error {
-   if e.pretty {
-       io.WriteString(e.w, e.prefix)
-       for i := 0; i < depth; i++ {
-           io.WriteString(e.w, e.indent)
-       }
-   }
    e.w.Write(content)
-   if e.pretty && !bytes.HasSuffix(content, []byte("\n")) {
-       io.WriteString(e.w, "\n")
-   }
+
    return nil
 }

@@ -161,7 +153,9 @@ func (e *encoder) encodeOpenTag(el *Element, scope Scope, depth int) error {
        return err
    }
    if e.pretty {
-       io.WriteString(e.w, "\n")
+       if len(el.Children) > 0 || len(el.Content) == 0 {
+           io.WriteString(e.w, "\n")
+       }
    }
    return nil
 }
@@ -169,7 +163,9 @@ func (e *encoder) encodeOpenTag(el *Element, scope Scope, depth int) error {
 func (e *encoder) encodeCloseTag(el *Element, depth int) error {
    if e.pretty {
        for i := 0; i < depth; i++ {
-           io.WriteString(e.w, e.indent)
+           if len(el.Children) > 0 {
+               io.WriteString(e.w, e.indent)
+           }
        }
    }
    if err := tagTmpl.ExecuteTemplate(e.w, "end", el); err != nil {

So while running the example again with the above changes this is the output:

<Test>
  <parent>
    <child1>test string</child1>
    <child2>10</child2>
    <child3>another string</child3>
  </parent>
</Test>
-
<Test>
  <parent>
    <child1>test string</child1>
    <child2>10</child2>
    <child3>another string</child3>
  </parent>
</Test>

Stdlib:  f1 'test string', f2 '10', f3: 'another string'
Xmltree: f1 'test string', f2 '10', f3: 'another string'

Am I missing something or do you agree that this is a bug? If you agree, do you want me to create a PR?

Thanks!

droyo commented 5 years ago

Thanks for the report! Your fix looks reasonable. Can you make a PR?

bombsimon commented 5 years ago

@droyo Cool! Yeah of course, I created a PR with the changes and some tests to confirm this.

The Travis build failed. Do you want me to look into that or do you know why that is?