Closed CaptSolo closed 9 years ago
The library was installed from this GIT repository using PIP.
Installation log:
Collecting git+https://github.com/gfairchild/pyxDamerauLevenshtein
Cloning https://github.com/gfairchild/pyxDamerauLevenshtein to /tmp/pip-CbcD1E-build
Installing collected packages: pyxDamerauLevenshtein
Running setup.py install for pyxDamerauLevenshtein
building 'pyxdameraulevenshtein' extension
/usr/bin/clang -fno-strict-aliasing -fno-common -dynamic -pipe -Os -fwrapv -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -I/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy/core/include -I/opt/local/Library/Frameworks/Python.framework/Versions/2.7/include/python2.7 -c pyxdameraulevenshtein/pyxdameraulevenshtein.c -o build/temp.macosx-10.9-x86_64-2.7/pyxdameraulevenshtein/pyxdameraulevenshtein.o
In file included from pyxdameraulevenshtein/pyxdameraulevenshtein.c:250:
In file included from /opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy/core/include/numpy/arrayobject.h:4:
In file included from /opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy/core/include/numpy/ndarrayobject.h:17:
In file included from /opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy/core/include/numpy/ndarraytypes.h:1804:
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h:15:2: warning: "Using deprecated NumPy API, disable it by " "#defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" [-W#warnings]
#warning "Using deprecated NumPy API, disable it by " \
^
In file included from pyxdameraulevenshtein/pyxdameraulevenshtein.c:250:
In file included from /opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy/core/include/numpy/arrayobject.h:4:
In file included from /opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy/core/include/numpy/ndarrayobject.h:26:
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy/core/include/numpy/__multiarray_api.h:1629:1: warning: unused function '_import_array' [-Wunused-function]
_import_array(void)
^
In file included from pyxdameraulevenshtein/pyxdameraulevenshtein.c:251:
In file included from /opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy/core/include/numpy/ufuncobject.h:317:
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy/core/include/numpy/__ufunc_api.h:241:1: warning: unused function '_import_umath' [-Wunused-function]
_import_umath(void)
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:7260:28: warning: unused function '__Pyx_PyObject_AsString' [-Wunused-function]
static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:7257:32: warning: unused function '__Pyx_PyUnicode_FromString' [-Wunused-function]
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:313:29: warning: unused function '__Pyx_Py_UNICODE_strlen' [-Wunused-function]
static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u)
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:7372:33: warning: unused function '__Pyx_PyIndex_AsSsize_t' [-Wunused-function]
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:7397:33: warning: unused function '__Pyx_PyInt_FromSize_t' [-Wunused-function]
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:5251:27: warning: unused function '__Pyx_ExceptionSave' [-Wunused-function]
static CYTHON_INLINE void __Pyx_ExceptionSave(PyObject **type, PyObject **value, PyObject **tb) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6052:32: warning: unused function '__Pyx_GetItemInt_List_Fast' [-Wunused-function]
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i,
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6066:32: warning: unused function '__Pyx_GetItemInt_Tuple_Fast' [-Wunused-function]
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i,
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6639:48: warning: unused function '__pyx_t_float_complex_from_parts' [-Wunused-function]
static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6649:30: warning: unused function '__Pyx_c_eqf' [-Wunused-function]
static CYTHON_INLINE int __Pyx_c_eqf(__pyx_t_float_complex a, __pyx_t_float_complex b) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6652:48: warning: unused function '__Pyx_c_sumf' [-Wunused-function]
static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sumf(__pyx_t_float_complex a, __pyx_t_float_complex b) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6658:48: warning: unused function '__Pyx_c_difff' [-Wunused-function]
static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_difff(__pyx_t_float_complex a, __pyx_t_float_complex b) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6670:48: warning: unused function '__Pyx_c_quotf' [-Wunused-function]
static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quotf(__pyx_t_float_complex a, __pyx_t_float_complex b) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6677:48: warning: unused function '__Pyx_c_negf' [-Wunused-function]
static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_negf(__pyx_t_float_complex a) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6683:30: warning: unused function '__Pyx_c_is_zerof' [-Wunused-function]
static CYTHON_INLINE int __Pyx_c_is_zerof(__pyx_t_float_complex a) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6686:48: warning: unused function '__Pyx_c_conjf' [-Wunused-function]
static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conjf(__pyx_t_float_complex a) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6700:52: warning: unused function '__Pyx_c_powf' [-Wunused-function]
static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_powf(__pyx_t_float_complex a, __pyx_t_float_complex b) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6759:49: warning: unused function '__pyx_t_double_complex_from_parts' [-Wunused-function]
static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6769:30: warning: unused function '__Pyx_c_eq' [-Wunused-function]
static CYTHON_INLINE int __Pyx_c_eq(__pyx_t_double_complex a, __pyx_t_double_complex b) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6772:49: warning: unused function '__Pyx_c_sum' [-Wunused-function]
static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum(__pyx_t_double_complex a, __pyx_t_double_complex b) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6778:49: warning: unused function '__Pyx_c_diff' [-Wunused-function]
static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff(__pyx_t_double_complex a, __pyx_t_double_complex b) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6790:49: warning: unused function '__Pyx_c_quot' [-Wunused-function]
static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot(__pyx_t_double_complex a, __pyx_t_double_complex b) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6797:49: warning: unused function '__Pyx_c_neg' [-Wunused-function]
static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg(__pyx_t_double_complex a) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6803:30: warning: unused function '__Pyx_c_is_zero' [-Wunused-function]
static CYTHON_INLINE int __Pyx_c_is_zero(__pyx_t_double_complex a) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6806:49: warning: unused function '__Pyx_c_conj' [-Wunused-function]
static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj(__pyx_t_double_complex a) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:6820:53: warning: unused function '__Pyx_c_pow' [-Wunused-function]
static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow(__pyx_t_double_complex a, __pyx_t_double_complex b) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:7010:32: warning: unused function '__Pyx_PyInt_From_long' [-Wunused-function]
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:7036:27: warning: function '__Pyx_PyInt_As_long' is not needed and will not be emitted [-Wunneeded-internal-declaration]
static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:3610:32: warning: unused function '__pyx_f_5numpy_PyArray_MultiIterNew1' [-Wunused-function]
static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:3660:32: warning: unused function '__pyx_f_5numpy_PyArray_MultiIterNew2' [-Wunused-function]
static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:3710:32: warning: unused function '__pyx_f_5numpy_PyArray_MultiIterNew3' [-Wunused-function]
static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:3760:32: warning: unused function '__pyx_f_5numpy_PyArray_MultiIterNew4' [-Wunused-function]
static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:3810:32: warning: unused function '__pyx_f_5numpy_PyArray_MultiIterNew5' [-Wunused-function]
static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:4577:27: warning: unused function '__pyx_f_5numpy_set_array_base' [-Wunused-function]
static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_arr, PyObject *__pyx_v_base) {
^
pyxdameraulevenshtein/pyxdameraulevenshtein.c:4665:32: warning: unused function '__pyx_f_5numpy_get_array_base' [-Wunused-function]
static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__pyx_v_arr) {
^
38 warnings generated.
/usr/bin/clang -bundle -undefined dynamic_lookup -L/opt/local/lib -Wl,-headerpad_max_install_names -L/opt/local/lib/db48 build/temp.macosx-10.9-x86_64-2.7/pyxdameraulevenshtein/pyxdameraulevenshtein.o -o build/lib.macosx-10.9-x86_64-2.7/pyxdameraulevenshtein.so
Successfully installed pyxDamerauLevenshtein-1.2.1
I wonder if @ovarene has seen this (he made the pull request - #3). I haven't yet had a chance to test this yet (which is why it's not in the PyPI install). I'm hoping to be able to test it in the next couple days.
the specified dtype is incorrect, you should use dtype='S'
In [17]: import numpy as np In [18]: from pyxdameraulevenshtein import damerau_levenshtein_distance_withNPArray In [19]: store = { "key1":"value1","key2":"value2" } In [20]: keys = np.array(store.keys(),dtype='S') In [21]: damerau_levenshtein_distance_withNPArray("test",keys) Out[21]: array([3, 3]) python 2.7.6 - numpy 1.9.2
the use of dtype = "S" is not an obvious choice when the string is Unicode (but I am not exprienced with NumPy so might be mistaken) and throws encoding errors if there are non-ASCII characters:
a = (u"Lācis", u"lieliskā piektdiena")
b = numpy.array(a, dtype="S")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
UnicodeEncodeError: 'ascii' codec can't encode character u'\u0101' in position 1: ordinal not in range(128)
does this mean that strings should be encoded as UTF-8 before being stored in NumPy array and that the library will know to convert them back to Unicode?
In [1]: from pyxdameraulevenshtein import damerau_levenshtein_distance_withNPArray In [2]: import numpy as np In [3]: v = ["éllo","màlo"] In [4]: d = np.array(v) In [6]: damerau_levenshtein_distance_withNPArray("hello",d) Out[6]: array([2, 3])
You have to set it to dtype='S' because of this function called inside damereau_levensthein_distance()
cdef unicode to_unicode(s): """ Convert s to a proper unicode type (handles difference between Python 2 and 3). Code comes from https://groups.google.com/d/msg/cython-users/ofT3fo48ohs/rrf3dtbHkm4J """ if isinstance(s, bytes): return ( < bytes > s).decode('UTF-8') return s
That's the way I had to make it work for my use-case. (My use-case is reading data files via pandas with utf-8 encoding)
Hope it helps
if you have any idea how to make it work for your own use case, please feel free to provide a patch, that would be indeed a very welcomed contribution.
best regards
The function to_unicode() is expecting a bytestring so to be able to use this with your unicode array, you could do something like :
In [15]: import numpy as np In [16]: from pyxdameraulevenshtein import damerau_levenshtein_distance_withNPArray as dlda In [17]: a = (u"Lācis", u"lieliskā piektdiena") In [18]: b = np.array( [ x.encode('utf8') for x in a ] ) In [19]: dlda("test",b) Out[19]: array([ 5, 16])
@CaptSolo, are you happy with the resolution of this issue? Can it be closed?
I'm going to go ahead and close this issue since there's been no activity on it in about 2 months.
Found a problem running
damerau_levenshtein_distance_withNPArray
with an array of Unicode strings under Python 2.7:Environment: Python 2.7 (under Mac OS X), NumPy 1.9.2