p-ranav / alpaca

Serialization library written in C++17 - Pack C++ structs into a compact byte-array without any macros or boilerplate code
MIT License
464 stars 43 forks source link

feat(options): Add `strict` option to return false if reaching the end of input early #51

Closed finger563 closed 1 day ago

finger563 commented 1 day ago

We had a need to reject old versions of serialized data structures, and even turning on both fixed_length_encoding and with_checksum did not prevent older versions of the structures from deserializing as newer versions of the structures. This PR adds a new strict option, which properly disables that behavior. This allows us to have some nice fold semantics to handle what we want to do when we deserialize an older structure as a newer structure (ensuring properly defined movement / defaults).

Manually handling versions:

template <alpaca::options O, typename CurrentVersionType, typename... OldVersionTypes>
class Storage {
    ...

    bool read() {
        if (!std::filesystem::exists(file_)) {
            logger_.warn("No file exists at '{}'", file_.string());
            return false;
        }

        // read the data from the file
        logger_.info("Reading file '{}'", file_.string());
        size_t file_size = std::filesystem::file_size(file_);
        std::ifstream ifs(file_, std::ios::in | std::ios::binary);
        ifs.seekg(0, std::ios::beg);
        std::vector<uint8_t> bytes;
        bytes.resize(file_size);
        ifs.read(reinterpret_cast<char *>(bytes.data()), file_size);
        ifs.close();
        logger_.debug("Read bytes: {::#02X}", bytes);

        // deserialize the data, trying each type in the list until one works
        std::optional<CurrentVersionType> maybe_new_data;
        bool did_deserialize = deserialize_type<CurrentVersionType>(bytes, maybe_new_data);
        if (!did_deserialize) {
            // use a fold expression (https://en.cppreference.com/w/cpp/language/fold) to
            // try each old version type in order
            did_deserialize = (deserialize_type<OldVersionTypes>(bytes, maybe_new_data) || ...);
        }

        if (did_deserialize) {
            data_ = maybe_new_data.value();
            logger_.info("Deserialized {} bytes from '{}'", file_size, file_.string());
            return true;
        } else {
            return false;
        }
    }

    template <typename U>
    bool deserialize_type(const std::vector<uint8_t> &bytes,
                          std::optional<CurrentVersionType> &new_data) {
        std::error_code ec;
        U test_data = bb::deserialize<O, U>(bytes, ec);
        if (!ec) {
            // NOTE: since alpaca requires aggregate types (no constructors), we
            //      can't use copy initialization / converting constructor here.
            //      Instead, we use default initialization and then use a
            //      converting assignment operator
            CurrentVersionType raw_data;
            // the next line requires that a conversion operator exists from U
            // to CurrentVersionType, such as:
            //   CurrentVersionType &operator=(const U &old_version_u) { ... }
            raw_data = std::move(test_data);
            // then we can set the optional value using the latest data
            new_data = raw_data;
            return true;
        } else {
            logger_.error(
                "Could not deserialize data from file '{}' - {}", file_.string(), ec.message());
            logger_.error("\traw bytes: {::#02X}", bytes);
            new_data = std::nullopt;
            return false;
        }
    }

    ...

};

Example code:

    // now test writing a version 1 to file and deserializing it as a version 2
    // (should fail)
    {
        logger.info("Running binary persistent data example with versioning!");
        //! [persistent data versioning example]
        // the filesystem must have been initialized
        auto &fs = espp::FileSystem::get();
        // where will we store the data?
        std::string filename = "version.pac";
        // now actually create it!
        bb::PersistentData<alpaca::options::fixed_length_encoding | alpaca::options::with_checksum | alpaca::options::strict, DeviceInfoDataV1> pd(
            {.file_path = fs.get_root_path() / filename,
             .log_level = espp::Logger::Verbosity::INFO});
        // get the data
        auto pd_data = pd.get();
        // modify the persistent data
        // now set the persistent data data back!
        pd.set(pd_data);
        bool success{false};
        logger.info("Writing version 1...");
        success = pd.write();
        if (!success) {
            logger.error("Failed!");
        }

        logger.info("Reading version 1 as version 2...");
        // now try to load that same file as a version 2
        bb::PersistentData<alpaca::options::fixed_length_encoding | alpaca::options::with_checksum | alpaca::options::strict, DeviceInfoDataV2> pd_v2(
            {.file_path = fs.get_root_path() / filename,
             .auto_load = false,
             .log_level = espp::Logger::Verbosity::INFO});
        success = pd_v2.read();
        if (success) {
            logger.error("Should have failed to read version 1 as version 2!");
        } else {
            logger.info("Success: properly failed to read version 1 as version 2!");
        }
        auto updated_pd_data = pd_v2.get();
    }

Before: CleanShot 2024-09-30 at 15 16 02

After: CleanShot 2024-09-30 at 15 05 34