Closed paulip1792 closed 1 month ago
Hey, thanks for the investigation. It is true that DefaultOnError
(and some other types) have overhead, but I think it might be unavoidable. The problem with your code snippet is that TAs::deserialize_as(deserializer).unwrap_or_default()
is not a correct implementation. In general, serde errors are non-recoverable, so continuing after a deserialization error by just ignoring it, will lead to future trouble. I do see the suggestion to use unwrap_or_default
from time-to-time, but without explanation that it is not generic.
You see this is you change the benchmark to use this JSON, with an empty object in the middle.
["1720679940000","57974.6","57978.4",{},"57955.4","321.1","3.211","186129.0766","0"]
What happens here now is that the Deserializer
processes the {
token, then returns an error. If you continue using the Deserializer
, then next the }
token comes up and leads to a new error.
Here is an updated benchmark to see the issue. expect
the result of serde_json::from_str
to ensure that the deserialization was actually successful and did not produce an Err
. I also reordered the bench_function
s to make it easier to see.
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use serde::{de::Error, Deserialize, Deserializer};
use serde_with::{As, DefaultOnError, DeserializeAs, DisplayFromStr};
use std::marker::PhantomData;
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct CandlestickDefaultOnError {
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub ts: u64,
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub open: f64,
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub high: f64,
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub low: f64,
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub close: f64,
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub volume: f64,
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub volume_ccy: f64,
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub volume_ccy_quote: f64,
#[serde(with = "As::<DefaultOnError<BoolFromStr>>")]
pub confirm: bool,
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct CandlestickDefaultOnErrorFast {
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub ts: u64,
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub open: f64,
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub high: f64,
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub low: f64,
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub close: f64,
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub volume: f64,
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub volume_ccy: f64,
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub volume_ccy_quote: f64,
#[serde(with = "As::<DefaultOnErrorFast<BoolFromStr>>")]
pub confirm: bool,
}
struct DefaultOnErrorFast<T>(PhantomData<T>);
impl<'de, T, TAs> DeserializeAs<'de, T> for DefaultOnErrorFast<TAs>
where
T: Default,
TAs: DeserializeAs<'de, T>,
{
#[inline]
fn deserialize_as<D>(deserializer: D) -> Result<T, D::Error>
where
D: Deserializer<'de>,
{
Ok(TAs::deserialize_as(deserializer).unwrap_or_default())
}
}
struct BoolFromStr;
impl<'de> DeserializeAs<'de, bool> for BoolFromStr {
#[inline]
fn deserialize_as<D>(deserializer: D) -> Result<bool, D::Error>
where
D: Deserializer<'de>,
{
let s: &str = Deserialize::deserialize(deserializer)?;
match s {
"1" => Ok(true),
"0" => Ok(false),
variant => Err(Error::unknown_variant(variant, &["1", "0"])),
}
}
}
fn criterion_benchmark(c: &mut Criterion) {
let s = r#"["1720679940000","57974.6","57978.4",{},"57955.4","321.1","3.211","186129.0766","0"]"#;
let mut group = c.benchmark_group("bench");
group.bench_function(BenchmarkId::new("CandlestickDefaultOnError", 0), |b| {
b.iter(|| {
let _ = black_box(serde_json::from_str::<CandlestickDefaultOnError>(
black_box(s),
)).expect("Deserialization must work");
})
});
group.bench_function(BenchmarkId::new("CandlestickDefaultOnErrorFast", 0), |b| {
b.iter(|| {
let _ = black_box(serde_json::from_str::<CandlestickDefaultOnErrorFast>(
black_box(s),
)).expect("Deserialization must work");
})
});
group.finish();
}
bench/CandlestickDefaultOnError/0
time: [1.3799 µs 1.4135 µs 1.4499 µs]
change: [+6.9098% +10.277% +13.891%] (p = 0.00 < 0.05)
Performance has regressed.
Found 4 outliers among 100 measurements (4.00%)
3 (3.00%) high mild
1 (1.00%) high severe
Benchmarking bench/CandlestickDefaultOnErrorFast/0: Warming up for 3.0000 sthread 'main' panicked at benches/my_benchmark.rs:102:16:
Deserialization must work: Error("expected `,` or `]`", line: 1, column: 38)
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
error: bench failed, to rerun pass `--bench my_benchmark`
I'd love to know if you find something faster than DefaultOnError
that handles the updated benchmark.
["1720679940000","57974.6","57978.4",{},"57955.4","321.1","3.211","186129.0766","0"]
The following implementation should perform better:
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use serde::{
de::{Error, Visitor},
Deserialize, Deserializer,
};
use serde_with::{As, DefaultOnError, DeserializeAs, DisplayFromStr, Same};
use std::marker::PhantomData;
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Candlestick {
#[serde(with = "As::<DisplayFromStr>")]
pub ts: u64,
#[serde(with = "As::<DisplayFromStr>")]
pub open: f64,
#[serde(with = "As::<DisplayFromStr>")]
pub high: f64,
#[serde(with = "As::<DisplayFromStr>")]
pub low: f64,
#[serde(with = "As::<DisplayFromStr>")]
pub close: f64,
#[serde(with = "As::<DisplayFromStr>")]
pub volume: f64,
#[serde(with = "As::<DisplayFromStr>")]
pub volume_ccy: f64,
#[serde(with = "As::<DisplayFromStr>")]
pub volume_ccy_quote: f64,
#[serde(with = "As::<BoolFromStr>")]
pub confirm: bool,
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct CandlestickDefaultOnError {
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub ts: u64,
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub open: f64,
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub high: f64,
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub low: f64,
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub close: f64,
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub volume: f64,
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub volume_ccy: f64,
#[serde(with = "As::<DefaultOnError<DisplayFromStr>>")]
pub volume_ccy_quote: f64,
#[serde(with = "As::<DefaultOnError<BoolFromStr>>")]
pub confirm: bool,
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct CandlestickDefaultOnErrorFast {
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub ts: u64,
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub open: f64,
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub high: f64,
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub low: f64,
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub close: f64,
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub volume: f64,
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub volume_ccy: f64,
#[serde(with = "As::<DefaultOnErrorFast<DisplayFromStr>>")]
pub volume_ccy_quote: f64,
#[serde(with = "As::<DefaultOnErrorFast<BoolFromStr>>")]
pub confirm: bool,
}
struct DefaultOnErrorFast<T = Same>(PhantomData<T>);
impl<'de, T, TAs> DeserializeAs<'de, T> for DefaultOnErrorFast<TAs>
where
T: Default,
TAs: DeserializeAs<'de, T>,
{
#[inline]
fn deserialize_as<D>(deserializer: D) -> Result<T, D::Error>
where
D: Deserializer<'de>,
{
Ok(TAs::deserialize_as(DefaultOnErrorDeserializer { deserializer }).unwrap_or_default())
}
}
struct DefaultOnErrorDeserializer<D> {
deserializer: D,
}
impl<'de, D> Deserializer<'de> for DefaultOnErrorDeserializer<D>
where
D: Deserializer<'de>,
{
type Error = D::Error;
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_i8<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_i16<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_i32<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_i64<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_u8<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_u16<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_u32<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_u64<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_f32<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_f64<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_str<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_unit_struct<V>(
self,
_: &'static str,
visitor: V,
) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_newtype_struct<V>(
self,
_: &'static str,
visitor: V,
) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_tuple<V>(self, _: usize, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_tuple_struct<V>(
self,
_: &'static str,
_: usize,
visitor: V,
) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_struct<V>(
self,
_: &'static str,
_: &'static [&'static str],
visitor: V,
) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_enum<V>(
self,
_: &'static str,
_: &'static [&'static str],
visitor: V,
) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
self.deserializer.deserialize_any(visitor)
}
}
struct BoolFromStr;
impl<'de> DeserializeAs<'de, bool> for BoolFromStr {
#[inline]
fn deserialize_as<D>(deserializer: D) -> Result<bool, D::Error>
where
D: Deserializer<'de>,
{
let s: &str = Deserialize::deserialize(deserializer)?;
match s {
"1" => Ok(true),
"0" => Ok(false),
variant => Err(Error::unknown_variant(variant, &["1", "0"])),
}
}
}
fn criterion_benchmark(c: &mut Criterion) {
let valid_data = r#"["1720679940000","57974.6","57978.4","57955.4","57955.4","321.1","3.211","186129.0766","0"]"#;
let invalid_data =
r#"["1720679940000","57974.6","57978.4",{},"57955.4","321.1","3.211","186129.0766","0"]"#;
let mut group = c.benchmark_group("bench");
group.bench_function(BenchmarkId::new("Candlestick", 0), |b| {
b.iter(|| {
let _ = black_box(serde_json::from_str::<CandlestickDefaultOnErrorFast>(
black_box(valid_data),
));
})
});
group.bench_function(BenchmarkId::new("CandlestickDefaultOnErrorFast", 0), |b| {
b.iter(|| {
let _ = black_box(serde_json::from_str::<CandlestickDefaultOnErrorFast>(
black_box(valid_data),
));
})
});
group.bench_function(
BenchmarkId::new("CandlestickDefaultOnErrorFastInvalidData", 0),
|b| {
b.iter(|| {
let _ = black_box(serde_json::from_str::<CandlestickDefaultOnErrorFast>(
black_box(invalid_data),
));
})
},
);
group.bench_function(BenchmarkId::new("CandlestickDefaultOnError", 0), |b| {
b.iter(|| {
let _ = black_box(serde_json::from_str::<CandlestickDefaultOnError>(
black_box(valid_data),
));
})
});
group.bench_function(
BenchmarkId::new("CandlestickDefaultOnErrorInvalidData", 0),
|b| {
b.iter(|| {
let _ = black_box(serde_json::from_str::<CandlestickDefaultOnError>(
black_box(invalid_data),
));
})
},
);
group.finish();
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
bench/Candlestick/0 time: [215.27 ns 218.86 ns 223.35 ns]
Found 17 outliers among 100 measurements (17.00%)
4 (4.00%) high mild
13 (13.00%) high severe
bench/CandlestickDefaultOnErrorFast/0
time: [212.43 ns 215.31 ns 218.99 ns]
change: [+0.9075% +2.6560% +4.5866%] (p = 0.00 < 0.05)
Change within noise threshold.
Found 13 outliers among 100 measurements (13.00%)
4 (4.00%) high mild
9 (9.00%) high severe
bench/CandlestickDefaultOnErrorFastInvalidData/0
time: [302.02 ns 311.48 ns 322.10 ns]
change: [-8.9891% -6.7644% -4.5054%] (p = 0.00 < 0.05)
Performance has improved.
Found 16 outliers among 100 measurements (16.00%)
5 (5.00%) high mild
11 (11.00%) high severe
bench/CandlestickDefaultOnError/0
time: [302.25 ns 304.31 ns 307.16 ns]
change: [+1.0376% +4.6617% +8.8266%] (p = 0.01 < 0.05)
Performance has regressed.
Found 19 outliers among 100 measurements (19.00%)
1 (1.00%) high mild
18 (18.00%) high severe
bench/CandlestickDefaultOnErrorInvalidData/0
time: [410.68 ns 423.84 ns 438.98 ns]
change: [+4.3424% +6.8453% +9.3650%] (p = 0.00 < 0.05)
Performance has regressed.
Found 17 outliers among 100 measurements (17.00%)
1 (1.00%) low mild
4 (4.00%) high mild
12 (12.00%) high severe
With a small modification of the JSON, the new code still fails: {}
-> {"foo":false}
.