blaze / odo

Data Migration for the Blaze Project
http://odo.readthedocs.org/
BSD 3-Clause "New" or "Revised" License
1.01k stars 138 forks source link

Support Nullable Foreign-Keys #554

Closed dhirschfeld closed 7 years ago

dhirschfeld commented 7 years ago

A database I'm trying to connect to has a nullable foreign-key which odo doesn't currently handle:

Test Case:

import blaze as bz
from numpy.random import randint
import sqlalchemy as sa

engine = sa.create_engine('sqlite://')
metadata = sa.MetaData(bind=engine)

T1 = sa.Table(
    'NullableForeignKeyDemo',
    metadata,
    sa.Column('pkid', sa.Integer, primary_key=True),
    sa.Column('label_id', sa.Integer, sa.ForeignKey("ForeignKeyLabels.pkid"), nullable=True),
)

T2 = sa.Table(
    'ForeignKeyLabels',
    metadata,
    sa.Column('pkid', sa.Integer, primary_key=True),
    sa.Column('label', sa.String),
)

metadata.create_all()

records1 = [
    {'pkid': idx, 'label_id': int(value)}
    for idx, value
    in enumerate(randint(0, 10, 10))
]
records1[-1]['label_id'] = None  # foreign-key is nullable!

records2 = [
    {'pkid': pkid, 'label': chr(pkid+65)}
    for pkid in range(10)
]
with engine.connect() as conn:
    conn.execute(T1.insert(), records1)
    conn.execute(T2.insert(), records2)

data = bz.data(T1)

As can be seen the inferred datashape doesn't recognize the map key as being nullable:

In [42]: data
Out[42]: <'Table' data; _name='_4', dshape='var * {  pkid: int32,  label_id: map[int32, {pkid:...'>

...which then causes an issue when odo tries to convert None to an integer:

In [43]: odo(data, pd.DataFrame)
Traceback (most recent call last):

  File "<ipython-input-43-9399f6fc5f8e>", line 1, in <module>
    odo(data, pd.DataFrame)

  File "C:\Miniconda3\lib\site-packages\odo\odo.py", line 91, in odo
    return into(target, source, **kwargs)

  File "C:\Miniconda3\lib\site-packages\multipledispatch\dispatcher.py", line 164, in __call__
    return func(*args, **kwargs)

  File "C:\Miniconda3\lib\site-packages\blaze\compute\core.py", line 372, in into
    return into(a, b.data, **kwargs)

  File "C:\Miniconda3\lib\site-packages\multipledispatch\dispatcher.py", line 164, in __call__
    return func(*args, **kwargs)

  File "C:\Miniconda3\lib\site-packages\odo\into.py", line 43, in wrapped
    return f(*args, **kwargs)

  File "C:\Miniconda3\lib\site-packages\odo\into.py", line 53, in into_type
    return convert(a, b, dshape=dshape, **kwargs)

  File "C:\Miniconda3\lib\site-packages\odo\core.py", line 83, in __call__
    return _transform(self.graph, *args, **kwargs)

  File "C:\Miniconda3\lib\site-packages\odo\core.py", line 106, in _transform
    x = f(x, excluded_edges=excluded_edges, **kwargs)

  File "C:\Miniconda3\lib\site-packages\odo\backends\sql.py", line 744, in select_or_selectable_to_frame
    dtype=[(str(c), dtypes[c]) for c in columns]))

TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'