dtolnay / serde-yaml

Strongly typed YAML library for Rust
Apache License 2.0
965 stars 164 forks source link

Yaml 1.1 octal parsing quirk #225

Closed CertainLach closed 2 years ago

CertainLach commented 3 years ago

I dont know how to handle this better, without reimplementing deserialization myself, and i think someone needs this too

Per yaml 1.2 spec, octals should be written as following: 0o123, however, golang yaml implementation, which is used for a lot of devops stuff, supports octals in 0123 format: https://github.com/go-yaml/yaml/issues/420

I propose to add optional ability to deserialize octals in same way, because a lot of people expect golang's parser behavior

This setting only supports deserialization, serialization is still performed in standard-complicant way Fixes: #134

dtolnay commented 2 years ago

Example code:

use serde::de::{
    Deserialize, DeserializeSeed, Deserializer, EnumAccess, Error, MapAccess, SeqAccess,
    VariantAccess, Visitor,
};
use std::fmt;

#[derive(serde::Deserialize, Debug)]
pub struct MyStruct {
    pub field: u64,
}

fn main() {
    let input_data = "field: 0777";
    let de = serde_yaml::Deserializer::from_str(input_data);
    let de2 = YeOldeOctalDeserializer::new(de);
    let v: MyStruct = Deserialize::deserialize(de2).unwrap();
    println!("field={:o}", v.field);
}

pub struct YeOldeOctalDeserializer<D>(D);

impl<D> YeOldeOctalDeserializer<D> {
    pub fn new(de: D) -> Self {
        YeOldeOctalDeserializer(de)
    }
}

impl<'de, D> Deserializer<'de> for YeOldeOctalDeserializer<D>
where
    D: Deserializer<'de>,
{
    type Error = D::Error;

    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_any(YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_bool(YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_i8<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_any(ConvertYeOldeOctal(visitor))
    }
    fn deserialize_i16<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_any(ConvertYeOldeOctal(visitor))
    }
    fn deserialize_i32<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_any(ConvertYeOldeOctal(visitor))
    }
    fn deserialize_i64<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_any(ConvertYeOldeOctal(visitor))
    }
    fn deserialize_i128<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_any(ConvertYeOldeOctal(visitor))
    }
    fn deserialize_u8<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_any(ConvertYeOldeOctal(visitor))
    }
    fn deserialize_u16<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_any(ConvertYeOldeOctal(visitor))
    }
    fn deserialize_u32<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_any(ConvertYeOldeOctal(visitor))
    }
    fn deserialize_u64<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_any(ConvertYeOldeOctal(visitor))
    }
    fn deserialize_u128<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_any(ConvertYeOldeOctal(visitor))
    }
    fn deserialize_f32<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_f32(YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_f64<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_f64(YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_char(YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_str<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_str(YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_string(YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_bytes(YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0
            .deserialize_byte_buf(YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_option(YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_unit(YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_unit_struct<V>(
        self,
        name: &'static str,
        visitor: V,
    ) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0
            .deserialize_unit_struct(name, YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_newtype_struct<V>(
        self,
        name: &'static str,
        visitor: V,
    ) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0
            .deserialize_newtype_struct(name, YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_seq(YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_tuple<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0
            .deserialize_tuple(len, YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_tuple_struct<V>(
        self,
        name: &'static str,
        len: usize,
        visitor: V,
    ) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0
            .deserialize_tuple_struct(name, len, YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.deserialize_map(YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_struct<V>(
        self,
        name: &'static str,
        fields: &'static [&'static str],
        visitor: V,
    ) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0
            .deserialize_struct(name, fields, YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_enum<V>(
        self,
        name: &'static str,
        variants: &'static [&'static str],
        visitor: V,
    ) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0
            .deserialize_enum(name, variants, YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0
            .deserialize_identifier(YeOldeOctalDeserializer(visitor))
    }
    fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0
            .deserialize_ignored_any(YeOldeOctalDeserializer(visitor))
    }
    fn is_human_readable(&self) -> bool {
        self.0.is_human_readable()
    }
}

impl<'de, V> Visitor<'de> for YeOldeOctalDeserializer<V>
where
    V: Visitor<'de>,
{
    type Value = V::Value;

    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        self.0.expecting(formatter)
    }
    fn visit_bool<E>(self, v: bool) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_bool(v)
    }
    fn visit_i8<E>(self, v: i8) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_i8(v)
    }
    fn visit_i16<E>(self, v: i16) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_i16(v)
    }
    fn visit_i32<E>(self, v: i32) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_i32(v)
    }
    fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_i64(v)
    }
    fn visit_i128<E>(self, v: i128) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_i128(v)
    }
    fn visit_u8<E>(self, v: u8) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_u8(v)
    }
    fn visit_u16<E>(self, v: u16) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_u16(v)
    }
    fn visit_u32<E>(self, v: u32) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_u32(v)
    }
    fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_u64(v)
    }
    fn visit_u128<E>(self, v: u128) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_u128(v)
    }
    fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_f32(v)
    }
    fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_f64(v)
    }
    fn visit_char<E>(self, v: char) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_char(v)
    }
    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_str(v)
    }
    fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_borrowed_str(v)
    }
    fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_string(v)
    }
    fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_bytes(v)
    }
    fn visit_borrowed_bytes<E>(self, v: &'de [u8]) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_borrowed_bytes(v)
    }
    fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_byte_buf(v)
    }
    fn visit_none<E>(self) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_none()
    }
    fn visit_some<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
    where
        D: Deserializer<'de>,
    {
        self.0.visit_some(YeOldeOctalDeserializer(deserializer))
    }
    fn visit_unit<E>(self) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_unit()
    }
    fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
    where
        D: Deserializer<'de>,
    {
        self.0
            .visit_newtype_struct(YeOldeOctalDeserializer(deserializer))
    }
    fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
    where
        A: SeqAccess<'de>,
    {
        self.0.visit_seq(YeOldeOctalDeserializer(seq))
    }
    fn visit_map<A>(self, map: A) -> Result<Self::Value, A::Error>
    where
        A: MapAccess<'de>,
    {
        self.0.visit_map(YeOldeOctalDeserializer(map))
    }
    fn visit_enum<A>(self, data: A) -> Result<Self::Value, A::Error>
    where
        A: EnumAccess<'de>,
    {
        self.0.visit_enum(YeOldeOctalDeserializer(data))
    }
}

impl<'de, T> DeserializeSeed<'de> for YeOldeOctalDeserializer<T>
where
    T: DeserializeSeed<'de>,
{
    type Value = T::Value;

    fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
    where
        D: Deserializer<'de>,
    {
        self.0.deserialize(YeOldeOctalDeserializer(deserializer))
    }
}

impl<'de, A> SeqAccess<'de> for YeOldeOctalDeserializer<A>
where
    A: SeqAccess<'de>,
{
    type Error = A::Error;

    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
    where
        T: DeserializeSeed<'de>,
    {
        self.0.next_element_seed(YeOldeOctalDeserializer(seed))
    }
    fn size_hint(&self) -> Option<usize> {
        self.0.size_hint()
    }
}

impl<'de, A> MapAccess<'de> for YeOldeOctalDeserializer<A>
where
    A: MapAccess<'de>,
{
    type Error = A::Error;

    fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>, Self::Error>
    where
        K: DeserializeSeed<'de>,
    {
        self.0.next_key_seed(YeOldeOctalDeserializer(seed))
    }
    fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value, Self::Error>
    where
        V: DeserializeSeed<'de>,
    {
        self.0.next_value_seed(YeOldeOctalDeserializer(seed))
    }
    fn next_entry_seed<K, V>(
        &mut self,
        kseed: K,
        vseed: V,
    ) -> Result<Option<(K::Value, V::Value)>, Self::Error>
    where
        K: DeserializeSeed<'de>,
        V: DeserializeSeed<'de>,
    {
        self.0.next_entry_seed(
            YeOldeOctalDeserializer(kseed),
            YeOldeOctalDeserializer(vseed),
        )
    }
    fn size_hint(&self) -> Option<usize> {
        self.0.size_hint()
    }
}

impl<'de, A> EnumAccess<'de> for YeOldeOctalDeserializer<A>
where
    A: EnumAccess<'de>,
{
    type Error = A::Error;
    type Variant = YeOldeOctalDeserializer<A::Variant>;

    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
    where
        V: DeserializeSeed<'de>,
    {
        let (value, variant) = self.0.variant_seed(YeOldeOctalDeserializer(seed))?;
        Ok((value, YeOldeOctalDeserializer(variant)))
    }
}

impl<'de, A> VariantAccess<'de> for YeOldeOctalDeserializer<A>
where
    A: VariantAccess<'de>,
{
    type Error = A::Error;

    fn unit_variant(self) -> Result<(), Self::Error> {
        self.0.unit_variant()
    }
    fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
    where
        T: DeserializeSeed<'de>,
    {
        self.0.newtype_variant_seed(YeOldeOctalDeserializer(seed))
    }
    fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0.tuple_variant(len, YeOldeOctalDeserializer(visitor))
    }
    fn struct_variant<V>(
        self,
        fields: &'static [&'static str],
        visitor: V,
    ) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
        self.0
            .struct_variant(fields, YeOldeOctalDeserializer(visitor))
    }
}

struct ConvertYeOldeOctal<V>(V);

impl<'de, V> Visitor<'de> for ConvertYeOldeOctal<V>
where
    V: Visitor<'de>,
{
    type Value = V::Value;

    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        self.0.expecting(formatter)
    }
    fn visit_bool<E>(self, v: bool) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_bool(v)
    }
    fn visit_i8<E>(self, v: i8) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_i8(v)
    }
    fn visit_i16<E>(self, v: i16) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_i16(v)
    }
    fn visit_i32<E>(self, v: i32) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_i32(v)
    }
    fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_i64(v)
    }
    fn visit_i128<E>(self, v: i128) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_i128(v)
    }
    fn visit_u8<E>(self, v: u8) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_u8(v)
    }
    fn visit_u16<E>(self, v: u16) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_u16(v)
    }
    fn visit_u32<E>(self, v: u32) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_u32(v)
    }
    fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_u64(v)
    }
    fn visit_u128<E>(self, v: u128) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_u128(v)
    }
    fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_f32(v)
    }
    fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_f64(v)
    }
    fn visit_char<E>(self, v: char) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_char(v)
    }
    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
    where
        E: Error,
    {
        if v.starts_with('-') {
            if let Ok(octal) = i64::from_str_radix(v, 8) {
                return self.0.visit_i64(octal);
            }
        } else {
            if let Ok(octal) = u64::from_str_radix(v, 8) {
                return self.0.visit_u64(octal);
            }
        }
        self.0.visit_str(v)
    }
    fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_bytes(v)
    }
    fn visit_borrowed_bytes<E>(self, v: &'de [u8]) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_borrowed_bytes(v)
    }
    fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_byte_buf(v)
    }
    fn visit_none<E>(self) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_none()
    }
    fn visit_some<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
    where
        D: Deserializer<'de>,
    {
        self.0.visit_some(YeOldeOctalDeserializer(deserializer))
    }
    fn visit_unit<E>(self) -> Result<Self::Value, E>
    where
        E: Error,
    {
        self.0.visit_unit()
    }
    fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
    where
        D: Deserializer<'de>,
    {
        self.0
            .visit_newtype_struct(YeOldeOctalDeserializer(deserializer))
    }
    fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
    where
        A: SeqAccess<'de>,
    {
        self.0.visit_seq(YeOldeOctalDeserializer(seq))
    }
    fn visit_map<A>(self, map: A) -> Result<Self::Value, A::Error>
    where
        A: MapAccess<'de>,
    {
        self.0.visit_map(YeOldeOctalDeserializer(map))
    }
    fn visit_enum<A>(self, data: A) -> Result<Self::Value, A::Error>
    where
        A: EnumAccess<'de>,
    {
        self.0.visit_enum(YeOldeOctalDeserializer(data))
    }
}
CertainLach commented 2 years ago

I should be clarified: this PR is intended for self-describing deserialization (i.e to serde_json::Value), so having a deserialization wrapper won't help, as octal literal is already decoded to string by serde_yaml:

fn de(input_data: &str) -> serde_json::Value {
    let de = serde_yaml::Deserializer::from_str(input_data);
    let de2 = YeOldeOctalDeserializer::new(de);
    Deserialize::deserialize(de2).unwrap()
}

fn main() {
    assert_eq!(de("field: 0777"), de("field: 0o777"))
}
thread 'main' panicked at 'assertion failed: `(left == right)`
  left: `Object({"field": String("0777")})`,
 right: `Object({"field": Number(511)})`', src/main.rs:15:5
dtolnay commented 2 years ago

(Untested) It should work equally well for that with:

    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'de>,
    {
-       self.0.deserialize_any(YeOldeOctalDeserializer(visitor))
+       self.0.deserialize_any(ConvertYeOldeOctal(visitor))
    }
CertainLach commented 2 years ago

This way string literal "0777" gets decoded as octal, but it should be kept as string

    assert_ne!(de("field: \"0777\""), de("field: 0777"));
thread 'main' panicked at 'assertion failed: `(left != right)`
  left: `Object({"field": Number(511)})`,
 right: `Object({"field": Number(511)})`', src/main.rs:16:5