Open scottiegarcia opened 4 months ago
@scottiegarcia good to close?
Unfortunately no. I needed to test loading as well and am running into errors there.
Code snippet below
from dataprofiler import Profiler
import pandas as pd
import os
df = pd.DataFrame({"a": [130277]})
dp = Profiler(df)
dp.save("test.json", save_method="json")
dp = Profiler.load("test.json", load_method="json")
stacktrace
Traceback (most recent call last):
File "/helpful-scripts/bug.py", line 12, in <module>
dp = Profiler.load("test.json", load_method="json")
File "/DataProfiler/dataprofiler/profilers/profile_builder.py", line 3160, in load
return BaseProfiler.load(filepath, load_method)
File "/DataProfiler/dataprofiler/profilers/profile_builder.py", line 1201, in load
return load_profiler(json.load(infile), {})
File "/DataProfiler/dataprofiler/profilers/json_decoder.py", line 246, in load_profiler
return profiler_cls.load_from_dict(serialized_json["data"], config)
File "/DataProfiler/dataprofiler/profilers/profile_builder.py", line 2142, in load_from_dict
structured_profiler = super().load_from_dict(data, config)
File "/DataProfiler/dataprofiler/profilers/profile_builder.py", line 913, in load_from_dict
value[idx] = load_structured_col_profiler(profile, config)
File "/DataProfiler/dataprofiler/profilers/json_decoder.py", line 276, in load_structured_col_profiler
return profiler_cls.load_from_dict(serialized_json["data"], config)
File "/DataProfiler/dataprofiler/profilers/profile_builder.py", line 413, in load_from_dict
value[profile_key] = load_compiler(profile_value, config)
File "/DataProfiler/dataprofiler/profilers/json_decoder.py", line 191, in load_compiler
return column_profiler_cls.load_from_dict(serialized_json["data"], config)
File "/DataProfiler/dataprofiler/profilers/column_profile_compilers.py", line 242, in load_from_dict
value[col_type] = load_column_profile(profile_as_dict, config)
File "/DataProfiler/dataprofiler/profilers/json_decoder.py", line 159, in load_column_profile
return column_profiler_cls.load_from_dict(serialized_json["data"], config)
File "/DataProfiler/dataprofiler/profilers/datetime_column_profile.py", line 152, in load_from_dict
profile._dt_obj_min = pd.Timestamp(profile._dt_obj_min)
File "pandas/_libs/tslibs/timestamps.pyx", line 1698, in pandas._libs.tslibs.timestamps.Timestamp.__new__
File "pandas/_libs/tslibs/conversion.pyx", line 249, in pandas._libs.tslibs.conversion.convert_to_tsobject
File "pandas/_libs/tslibs/conversion.pyx", line 523, in pandas._libs.tslibs.conversion._convert_str_to_tsobject
File "pandas/_libs/tslibs/conversion.pyx", line 506, in pandas._libs.tslibs.conversion._convert_str_to_tsobject
File "pandas/_libs/tslibs/np_datetime.pyx", line 212, in pandas._libs.tslibs.np_datetime.check_dts_bounds
pandas._libs.tslibs.np_datetime.OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 277-01-03 00:00:00
General Information:
Describe the bug:
JSON Serialization errors due to datetime object not getting serialized beforehand
To Reproduce:
Expected behavior: Dataprofiler should serialize all datetime attributes to strings before saving to JSON, and deserialize on load
Screenshots:
Traceback