michaelrsweet / mxml

Tiny XML library.
https://www.msweet.org/mxml
Apache License 2.0
428 stars 157 forks source link

[LOTS OF PATCHES] Build + EBCDIC #104

Closed michaelrsweet closed 13 years ago

michaelrsweet commented 14 years ago

Version: -feature Original reporter:

Hello,

I've just found out that libxml2 cannot work in EBCDIC environment. The reason is that the source encoded in ASCII and the library assume it can use '<' == UTF_CHAR, while '<' is in fact in EBCDIC.

Same goes to mxml, however, it is much simpler library, so I added ENCODE_NATIVE encoding, which does not perform any conversion. And it works now.

In order to support cross compile, windows mingw, and proper "make check", "make dist", "make distcheck", separate builddir I also converted the proprietary Makefile to automake.

I also used libtool to create libraries, which is much more simple and generic than the manual attempt.

Then I reorganized the autoconf, to finish with the rewrite.

I also removed the generated files, there is no reason to store them in subversion.

More work to do is to move library files into src/lib, tools into src/tools to make it even more organized.

Please review, I will be happy to work with you in order to merge this work.

I hope I got this right.

michaelrsweet commented 14 years ago

Original reporter:

mxml-native-encoding.patch is native only changes. It works in EBCDIC environment, which is important as libxml2 does not and cannot.

It just uses native encoding, no translation, so it actually works.

The patch is missing adding --with-native-encoding at autoconf to set the ENABLE_NATIVE_ENCODING in config.h

michaelrsweet commented 14 years ago

Original reporter:

I posted latest diff against trunk.

Builds on Linux-2.6 (Intel, ppc), Linux-2.4, Windows(MSVC+mingw), Solaris-8, Solaris-10 (intel, sparc), AIX-5.3, AIX-6.1, OS/390.

Support cross compile, "make dist", "make check" and more.

Using libtool, automake. Any other way would make it much harder.

michaelrsweet commented 13 years ago

Original reporter: Michael Sweet

Moving to Future release; after looking at the changes it is unlikely I'll be adding them to the regular release anytime soon, sorry...

michaelrsweet commented 13 years ago

Original reporter:

What will you add? Is the native support acceptable?

michaelrsweet commented 13 years ago

Original reporter: Michael Sweet

At this point even the native support is not acceptable due to the changes involved.

michaelrsweet commented 14 years ago

"mxml-native-encoding.patch":

Index: mxml-file.c
===================================================================
--- mxml-file.c (revision 406)
+++ mxml-file.c (working copy)
@@ -68,13 +71,18 @@
 #define ENCODE_UTF8    0       /* UTF-8 */
 #define ENCODE_UTF16BE 1       /* UTF-16 Big-Endian */
 #define ENCODE_UTF16LE 2       /* UTF-16 Little-Endian */
+#define ENCODE_NATIVE  3       /* Native encoding, no conversion */

 /*
  * Macro to test for a bad XML character...
  */

+#if ' ' == 0x20        /* ASCII */
 #define mxml_bad_char(ch) ((ch) < ' ' && (ch) != '\n' && (ch) != '\r' && (ch) != '\t')
+#else
+#define mxml_bad_char(ch) (!isprint(ch) && (ch) != '\n' && (ch) != '\r' && (ch) != '\t')
+#endif

 /*
@@ -98,7 +106,7 @@
  */

 static int     mxml_add_char(int ch, char **ptr, char **buffer,
-                         int *bufsize);
+                         int *bufsize, int *encoding);
 static int     mxml_fd_getc(void *p, int *encoding);
 static int     mxml_fd_putc(int ch, void *p);
 static int     mxml_fd_read(_mxml_fdbuf_t *buf);
@@ -628,7 +636,8 @@
 mxml_add_char(int  ch,         /* I  - Character to add */
               char **bufptr,       /* IO - Current position in buffer */
          char **buffer,        /* IO - Current buffer */
-         int  *bufsize)        /* IO - Current buffer size */
+         int  *bufsize,        /* IO - Current buffer size */
+         int  *encoding)       /* I  - Encoding */
 {
   char *newbuffer;         /* New buffer value */

@@ -657,7 +666,7 @@
     *buffer = newbuffer;
   }

-  if (ch < 0x80)
+  if (ch < 0x80 || *encoding == ENCODE_NATIVE)
   {
    /*
     * Single byte ASCII...
@@ -727,12 +736,13 @@

   switch (*encoding)
   {
+    case ENCODE_NATIVE :
     case ENCODE_UTF8 :
        /*
    * Got a UTF-8 character; convert UTF-8 to Unicode and return...
    */

-   if (!(ch & 0x80))
+   if (!(ch & 0x80) || *encoding == ENCODE_NATIVE)
    {
 #if DEBUG > 1
           printf("mxml_fd_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
@@ -1145,12 +1155,13 @@

   switch (*encoding)
   {
+    case ENCODE_NATIVE :
     case ENCODE_UTF8 :
        /*
    * Got a UTF-8 character; convert UTF-8 to Unicode and return...
    */

-   if (!(ch & 0x80))
+   if (!(ch & 0x80) || *encoding == ENCODE_NATIVE)
    {
      if (mxml_bad_char(ch))
      {
@@ -1366,7 +1377,7 @@
   entptr = entity;

   while ((ch = (*getc_cb)(p, encoding)) != EOF)
-    if (ch > 126 || (!isalnum(ch) && ch != '#'))
+    if ((*encoding != ENCODE_NATIVE && ch > 126) || (!isalnum(ch) && ch != '#'))
       break;
     else if (entptr < (entity + sizeof(entity) - 1))
       *entptr++ = ch;
@@ -1459,7 +1470,11 @@
   parent     = top;
   first      = NULL;
   whitespace = 0;
+#ifdef ENABLE_NATIVE_ENCODING
+  encoding   = ENCODE_NATIVE;
+#else
   encoding   = ENCODE_UTF8;
+#endif

   if (cb && parent)
     type = (*cb)(parent);
@@ -1604,10 +1619,10 @@
      if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
        goto error;

-     if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+     if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding))
        goto error;
    }
-   else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+   else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding))
      goto error;
    else if (((bufptr - buffer) == 1 && buffer[0] == '?') ||
             ((bufptr - buffer) == 3 && !strncmp(buffer, "!--", 3)) ||
@@ -1627,7 +1642,7 @@
      if (ch == '>' && bufptr > (buffer + 4) &&
          bufptr[-3] != '-' && bufptr[-2] == '-' && bufptr[-1] == '-')
        break;
-     else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+     else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding))
        goto error;
    }

@@ -1684,7 +1699,7 @@
    {
      if (ch == '>' && !strncmp(bufptr - 2, "]]", 2))
        break;
-     else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+     else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding))
        goto error;
    }

@@ -1741,7 +1756,7 @@
    {
      if (ch == '>' && bufptr > buffer && bufptr[-1] == '?')
        break;
-     else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+     else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding))
        goto error;
    }

@@ -1814,7 +1829,7 @@
          if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
        goto error;

-       if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+       if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding))
          goto error;
      }
    }
@@ -1989,7 +2004,7 @@
       if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
    goto error;

-      if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+      if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding))
    goto error;
     }
     else if (type == MXML_OPAQUE || type == MXML_CUSTOM || !mxml_isspace(ch))
@@ -1998,7 +2013,7 @@
       * Add character to current buffer...
       */

-      if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
+      if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding))
    goto error;
     }
   }
@@ -2159,7 +2174,7 @@
      if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
        goto error;

-   if (mxml_add_char(ch, &ptr, &name, &namesize))
+   if (mxml_add_char(ch, &ptr, &name, &namesize, encoding))
      goto error;

    if (ch == quote)
@@ -2182,7 +2197,7 @@
        if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
          goto error;

-     if (mxml_add_char(ch, &ptr, &name, &namesize))
+     if (mxml_add_char(ch, &ptr, &name, &namesize, encoding))
        goto error;
    }
     }
@@ -2228,7 +2243,7 @@
          if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
            goto error;

-       if (mxml_add_char(ch, &ptr, &value, &valsize))
+       if (mxml_add_char(ch, &ptr, &value, &valsize, encoding))
          goto error;
      }

@@ -2252,7 +2267,7 @@
          if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
            goto error;

-       if (mxml_add_char(ch, &ptr, &value, &valsize))
+       if (mxml_add_char(ch, &ptr, &value, &valsize, encoding))
          goto error;
      }

@@ -2343,8 +2358,9 @@

     switch (*encoding)
     {
+      case ENCODE_NATIVE :
       case ENCODE_UTF8 :
-     if (!(ch & 0x80))
+     if (!(ch & 0x80) || *encoding == ENCODE_NATIVE)
      {
 #if DEBUG > 1
             printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);