vsemenov / protobuf-java-format

Automatically exported from code.google.com/p/protobuf-java-format
BSD 3-Clause "New" or "Revised" License
0 stars 0 forks source link

If serialized byte array has unescaped chars deserialization will fail (even in trunk) #20

Open GoogleCodeExporter opened 8 years ago

GoogleCodeExporter commented 8 years ago
What steps will reproduce the problem?
1. Serialize a object with bytes "P\001"
2. Try to deserialize it (XML)
3. Cannot deserialize

A little fix for that (bytes are still readable, plus serialize by default in 
hex):

--- XmlFormat.java  2010-06-03 17:06:18.000000000 +0100
+++ XmlFormat.java  2010-10-14 15:46:09.792528214 +0100
@@ -39,6 +39,7 @@

 import com.google.protobuf.Descriptors.EnumValueDescriptor;
 import com.google.protobuf.Descriptors.FieldDescriptor;
+import com.google.protobuf.util.*;

 /**
  * Provide ascii text parsing and formatting support for proto2 instances. The implementation
@@ -361,10 +362,11 @@
         private static final Pattern WHITESPACE =
           Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
         private static final Pattern TOKEN = Pattern.compile(
-          "extension|" + "[a-zA-Z_\\s;@][0-9a-zA-Z_\\s;@+-]*+|" +        // an 
identifier with special handling for 'extension'
+          "extension|" +                                // special handling 
for 'extension'
+          "[0-9a-zA-Z\\\\\\\"\\\']++|" +                // a \000 byte 
sequence for bytes handling
+          "[a-zA-Z_\\s;@][0-9a-zA-Z_\\s;@+-]*+|" +      // an identifier
           "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" +             // a number
           "</|" +                                       // an '</' closing element marker
-          "[\\\\0-9]++|" +                              // a \000 byte 
sequence for bytes handling
           "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" +       // a double-quoted string
           "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)",         // a single-quoted string
           Pattern.MULTILINE);
@@ -1041,13 +1043,11 @@
                     builder.append("\\\"");
                     break;
                 default:
-                    if (b >= 0x20) {
+                    if ((b >= 0x30 && b <= 0x39) || (b >= 0x41 && b <= 0x5A) 
|| (b >= 0x61 && b <= 0x7A)) {
                         builder.append((char) b);
                     } else {
-                        builder.append('\\');
-                        builder.append((char) ('0' + ((b >>> 6) & 3)));
-                        builder.append((char) ('0' + ((b >>> 3) & 7)));
-                        builder.append((char) ('0' + (b & 7)));
+                        builder.append("\\x");
+                        builder.append(HexUtils.getHexString(b, 2));
                     }
                     break;
             }

Original issue reported on code.google.com by mateus...@gmail.com on 14 Oct 2010 at 2:47