jacebrowning / datafiles

A file-based ORM for Python dataclasses.
https://datafiles.readthedocs.io
MIT License
198 stars 18 forks source link

Custom Mapper e.g. for use with S3 #320

Closed PeterSR closed 6 months ago

PeterSR commented 7 months ago

Hi!

I am wondering if there is a way to change the part of the system that reads and writes the yaml files such that it can be used with an alternative filesystem, for instance reading and writing to an S3 bucket.

If there is no official way to do it, it seems that it might be as simple as monkey patching create_mapper. There also seem to be something about a root mapper from the code, but I am not entirely sure how it works.

jacebrowning commented 6 months ago

Unfortunately the library wasn't designed with alternative file systems in mind. It won't be automatic but one option is to manipulate the text property:

content = my_object.datafile.text
write_to_s3(content)

content = read_from_s3()
my_object.datafile.text = content
PeterSR commented 6 months ago

Thank you for the reply and the idea. Hadn't thought about the text property :)

Here's my own approach:

import os
import boto3
import datafiles.mapper
from ..logger import create_logger

class S3Mapper(datafiles.mapper.Mapper):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self._last_load = None

        self.logger = create_logger()
        self.logger.info("S3Mapper.__init__")

        self.s3_client = boto3.client("s3")
        self.bucket_name = os.getenv("S3_BUCKET_NAME")

    @property
    def s3_key(self):
        str_path = str(self.path)
        if str_path.startswith("/tmp/"):
            str_path = str_path[len("/tmp/") :]
        return str_path

    @property
    def s3_object_stat(self):
        try:
            return self.s3_client.head_object(Bucket=self.bucket_name, Key=self.s3_key)
        except self.s3_client.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == "404":
                return None
            else:
                raise e

    @property
    def exists(self):
        self.logger.info("S3Mapper.exists")

        return self.s3_object_stat is not None

    @property
    def modified(self):
        self.logger.info("S3Mapper.modified")

        if self._last_load is None:
            return True

        stat = self.s3_object_stat

        return stat["LastModified"] != self._last_load if stat else False

    @modified.setter
    def modified(self, modified: bool):
        self.logger.info("S3Mapper.modified.setter")

        if modified:
            self._last_load = None
        else:
            stat = self.s3_object_stat
            self._last_load = stat["LastModified"] if stat else None

    def save(self, *args, **kwargs):
        self.logger.info("S3Mapper.save")

        # First save the file as normal
        super().save(*args, **kwargs)

        # Next, write the local file to S3 bucket
        self.s3_client.upload_file(self.path, self.bucket_name, self.s3_key)

    def load(self, *args, **kwargs):
        self.logger.info("S3Mapper.load")

        os.makedirs(self.path.parent, exist_ok=True)

        # First, download the file from S3 bucket
        try:
            self.s3_client.download_file(self.bucket_name, self.s3_key, self.path)
        except self.s3_client.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == "404":
                raise FileNotFoundError from e
            else:
                raise e

        # Next, load the file as normal
        super().load(*args, **kwargs)

# ---

import gorilla   # Framework for monkey patching

patch = gorilla.Patch(
    datafiles.mapper,
    "Mapper",
    S3Mapper,
    settings=gorilla.Settings(allow_hit=True),
)

gorilla.apply(patch)

# ---

@datafile("/tmp/samples/{self.key}.yml")
class Sample:

    key: int
    name: str
    value: float = 0.0

os.environ["S3_BUCKET_NAME"] = "my-bucket-name"
sample = Sample(42, "widget")

Could probably be improved a fair bit, but it seems to be good enough for me for now. Will leave it here for anyone else that might stumble upon it.