protocolbuffers / protobuf

Protocol Buffers - Google's data interchange format
http://protobuf.dev
Other
65.52k stars 15.46k forks source link

Generated python script file import error: SyntaxError: Non-ASCII character #2109

Closed lipixun closed 7 years ago

lipixun commented 8 years ago

It will cause an import error SyntaxError: Non-ASCII character xxxx in file xxx when the generated python script file contains characters other than ascii from my gRPC proto file. (In my case, I have some non-ascii characters in comments)

According to: https://www.python.org/dev/peps/pep-0263/

I came up with two ideas:

How can I solve this problem? BTW, I'm using protoc 3.0.0.

xfxyjwf commented 8 years ago

So python doesn't support non-ascii comments? Can you attach the generated python file by protoc?

lipixun commented 8 years ago

@xfxyjwf

sure, here is the proto file test.proto:

// Just a sample

syntax = "proto3";

package test;

message SomeMessage {
    string field = 1;
}

service SomeService {
  // Just comment this line in chinese: 一个方法
  rpc SomeMethod(SomeMessage) returns(SomeMessage) {}
}

Run protoc:

python -m grpc.tools.protoc -I=. --python_out=. --grpc_python_out=. test.proto

Run the generated file by python:

python test_pb2.py

This gives an error:

  File "test_pb2.py", line 94
SyntaxError: Non-ASCII character '\xe4' in file test_pb2.py on line 94, but no encoding declared; see http://python.org/dev/peps/pep-0263/ for details

The generated python file:

# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: test.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
from google.protobuf import descriptor_pb2
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()

DESCRIPTOR = _descriptor.FileDescriptor(
  name='test.proto',
  package='test',
  syntax='proto3',
  serialized_pb=_b('\n\ntest.proto\x12\x04test\"\x1c\n\x0bSomeMessage\x12\r\n\x05\x66ield\x18\x01 \x01(\t2C\n\x0bSomeService\x12\x34\n\nSomeMethod\x12\x11.test.SomeMessage\x1a\x11.test.SomeMessage\"\x00\x62\x06proto3')
)
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

_SOMEMESSAGE = _descriptor.Descriptor(
  name='SomeMessage',
  full_name='test.SomeMessage',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='field', full_name='test.SomeMessage.field', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=20,
  serialized_end=48,
)

DESCRIPTOR.message_types_by_name['SomeMessage'] = _SOMEMESSAGE

SomeMessage = _reflection.GeneratedProtocolMessageType('SomeMessage', (_message.Message,), dict(
  DESCRIPTOR = _SOMEMESSAGE,
  __module__ = 'test_pb2'
  # @@protoc_insertion_point(class_scope:test.SomeMessage)
  ))
_sym_db.RegisterMessage(SomeMessage)

import grpc
from grpc.beta import implementations as beta_implementations
from grpc.beta import interfaces as beta_interfaces
from grpc.framework.common import cardinality
from grpc.framework.interfaces.face import utilities as face_utilities

class SomeServiceStub(object):

  def __init__(self, channel):
    """Constructor.

    Args:
      channel: A grpc.Channel.
    """
    self.SomeMethod = channel.unary_unary(
        '/test.SomeService/SomeMethod',
        request_serializer=SomeMessage.SerializeToString,
        response_deserializer=SomeMessage.FromString,
        )

class SomeServiceServicer(object):

  def SomeMethod(self, request, context):
    """Just comment this line in chinese: 一个方法
    """
    context.set_code(grpc.StatusCode.UNIMPLEMENTED)
    context.set_details('Method not implemented!')
    raise NotImplementedError('Method not implemented!')

def add_SomeServiceServicer_to_server(servicer, server):
  rpc_method_handlers = {
      'SomeMethod': grpc.unary_unary_rpc_method_handler(
          servicer.SomeMethod,
          request_deserializer=SomeMessage.FromString,
          response_serializer=SomeMessage.SerializeToString,
      ),
  }
  generic_handler = grpc.method_handlers_generic_handler(
      'test.SomeService', rpc_method_handlers)
  server.add_generic_rpc_handlers((generic_handler,))

class BetaSomeServiceServicer(object):
  def SomeMethod(self, request, context):
    """Just comment this line in chinese: 一个方法
    """
    context.code(beta_interfaces.StatusCode.UNIMPLEMENTED)

class BetaSomeServiceStub(object):
  def SomeMethod(self, request, timeout, metadata=None, with_call=False, protocol_options=None):
    """Just comment this line in chinese: 一个方法
    """
    raise NotImplementedError()
  SomeMethod.future = None

def beta_create_SomeService_server(servicer, pool=None, pool_size=None, default_timeout=None, maximum_timeout=None):
  request_deserializers = {
    ('test.SomeService', 'SomeMethod'): SomeMessage.FromString,
  }
  response_serializers = {
    ('test.SomeService', 'SomeMethod'): SomeMessage.SerializeToString,
  }
  method_implementations = {
    ('test.SomeService', 'SomeMethod'): face_utilities.unary_unary_inline(servicer.SomeMethod),
  }
  server_options = beta_implementations.server_options(request_deserializers=request_deserializers, response_serializers=response_serializers, thread_pool=pool, thread_pool_size=pool_size, default_timeout=default_timeout, maximum_timeout=maximum_timeout)
  return beta_implementations.server(method_implementations, options=server_options)

def beta_create_SomeService_stub(channel, host=None, metadata_transformer=None, pool=None, pool_size=None):
  request_serializers = {
    ('test.SomeService', 'SomeMethod'): SomeMessage.SerializeToString,
  }
  response_deserializers = {
    ('test.SomeService', 'SomeMethod'): SomeMessage.FromString,
  }
  cardinalities = {
    'SomeMethod': cardinality.Cardinality.UNARY_UNARY,
  }
  stub_options = beta_implementations.stub_options(host=host, metadata_transformer=metadata_transformer, request_serializers=request_serializers, response_deserializers=response_deserializers, thread_pool=pool, thread_pool_size=pool_size)
  return beta_implementations.dynamic_stub(channel, 'test.SomeService', cardinalities, options=stub_options)
# @@protoc_insertion_point(module_scope)

Actually the error is caused by the comment Just comment this line in chinese: 一个方法

The walk around is to insert #encoding=utf8 to each generated file like this: sed -i "1s/^/#encoding=utf8\n/" *.py

anandolee commented 7 years ago

Protobuf's protoc does not print comments. The comments is printed by grpc plugin' PrintAllComments: https://github.com/grpc/grpc/blob/d86080d402d1e254bae8c001013527651db4f33a/src/compiler/python_generator.cc#L208

You may want to file an issue to gRPC team. Closing it for clean up.