chmp / serde_arrow

Convert sequences of Rust objects to Arrow tables
MIT License
60 stars 17 forks source link

Unable to sereialize std::net::IpAddr #220

Closed ahassany closed 1 month ago

ahassany commented 1 month ago

I've tried the following code, but I got strange error. I don't have enough experience with the echo system to tell if this is a bug or not.

use std::net::IpAddr;
use serde::{Deserialize, Deserializer, Serialize};
use arrow::datatypes::FieldRef;
use serde_arrow::schema::{SchemaLike, TracingOptions};

#[derive(Debug, Serialize, Deserialize)]
struct Record {
    a: IpAddr,
}

pub fn main() -> serde_arrow::Result<()> {
    let fields = Vec::<FieldRef>::from_type::<Record>(TracingOptions::default())?;
    eprintln!("Fields: {fields:?}");
    Ok(())
}

And the error I got:

Error: <Error: serde::de::Error: invalid IP address syntax
Backtrace:
   0: std::backtrace_rs::backtrace::libunwind::trace
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/../../backtrace/src/backtrace/libunwind.rs:116:5
   1: std::backtrace_rs::backtrace::trace_unsynchronized
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
   2: std::backtrace::Backtrace::create
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/backtrace.rs:331:13
   3: std::backtrace::Backtrace::capture
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/backtrace.rs:296:9
   4: serde_arrow::internal::error::Error::custom
             at /Users/ahassany/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde_arrow-0.11.6/src/internal/error.rs:30:24
   5: <serde_arrow::internal::error::Error as serde::de::Error>::custom
             at /Users/ahassany/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde_arrow-0.11.6/src/internal/error.rs:129:9
   6: core::ops::function::FnOnce::call_once
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/ops/function.rs:250:5
   7: core::result::Result<T,E>::map_err
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/result.rs:854:27
   8: <serde::de::impls::FromStrVisitor<T> as serde::de::Visitor>::visit_str
             at /Users/ahassany/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde-1.0.204/src/de/impls.rs:3195:9
   9: serde::de::Visitor::visit_borrowed_str
             at /Users/ahassany/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde-1.0.204/src/de/mod.rs:1515:9
  10: <serde_arrow::internal::schema::from_type::TraceAny as serde::de::Deserializer>::deserialize_str
             at /Users/ahassany/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde_arrow-0.11.6/src/internal/schema/from_type/mod.rs:122:9
  11: serde::de::impls::<impl serde::de::Deserialize for core::net::ip_addr::IpAddr>::deserialize
             at /Users/ahassany/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde-1.0.204/src/de/impls.rs:1727:13
  12: <core::marker::PhantomData<T> as serde::de::DeserializeSeed>::deserialize
             at /Users/ahassany/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde-1.0.204/src/de/mod.rs:801:9
  13: <serde_arrow::internal::schema::from_type::TraceStruct as serde::de::MapAccess>::next_value_seed
             at /Users/ahassany/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde_arrow-0.11.6/src/internal/schema/from_type/mod.rs:342:21
  14: serde::de::MapAccess::next_value
             at /Users/ahassany/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde-1.0.204/src/de/mod.rs:1874:9
  15: <example::_::<impl serde::de::Deserialize for example::Record>::deserialize::__Visitor as serde::de::Visitor>::visit_map
             at ./example/examples/example.rs:98:28
  16: <serde_arrow::internal::schema::from_type::TraceAny as serde::de::Deserializer>::deserialize_struct
             at /Users/ahassany/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde_arrow-0.11.6/src/internal/schema/from_type/mod.rs:228:9
  17: example::_::<impl serde::de::Deserialize for example::Record>::deserialize
             at ./example/examples/example.rs:98:28
  18: serde_arrow::internal::schema::from_type::<impl serde_arrow::internal::schema::tracer::Tracer>::from_type
             at /Users/ahassany/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde_arrow-0.11.6/src/internal/schema/from_type/mod.rs:35:13
  19: <serde_arrow::internal::schema::SerdeArrowSchema as serde_arrow::internal::schema::SchemaLike>::from_type
             at /Users/ahassany/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde_arrow-0.11.6/src/internal/schema/mod.rs:304:9
  20: serde_arrow::arrow_impl::schema::<impl serde_arrow::internal::schema::SchemaLike for alloc::vec::Vec<alloc::sync::Arc<arrow_schema::field::Field>>>::from_type
             at /Users/ahassany/.cargo/registry/src/index.crates.io-6f17d22bba15001f/serde_arrow-0.11.6/src/arrow_impl/schema.rs:140:12
  21: example::main
             at ./example/examples/example.rs:104:18
  22: core::ops::function::FnOnce::call_once
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/ops/function.rs:250:5
  23: std::sys_common::backtrace::__rust_begin_short_backtrace
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/sys_common/backtrace.rs:155:18
  24: std::rt::lang_start::{{closure}}
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/rt.rs:159:18
  25: core::ops::function::impls::<impl core::ops::function::FnOnce<A> for &F>::call_once
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/ops/function.rs:284:13
  26: std::panicking::try::do_call
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panicking.rs:559:40
  27: std::panicking::try
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panicking.rs:523:19
  28: std::panic::catch_unwind
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panic.rs:149:14
  29: std::rt::lang_start_internal::{{closure}}
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/rt.rs:141:48
  30: std::panicking::try::do_call
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panicking.rs:559:40
  31: std::panicking::try
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panicking.rs:523:19
  32: std::panic::catch_unwind
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panic.rs:149:14
  33: std::rt::lang_start_internal
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/rt.rs:141:20
  34: std::rt::lang_start
             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/rt.rs:158:17
  35: _main
  36: <unknown>
chmp commented 1 month ago

Hi @ahassany

Thanks for the report. The issue is the same as with the #203: the IpAddr type is not self-describing, i.e., you need to know it's an IP addr to supply the correct data. You could use from_samples or use from_value to describe the schema yourself.

I will add IpAddr to the growing number of types not supported by from_type.

I also need to figure out a better error message for from_type so it's clear from the error how to debug it.

ahassany commented 1 month ago

Thank you @chmp for the clarification, I imagine it's hard to chase down every single type. Definitely a more informative error message would be in the right direction.