Closed PeterSR closed 6 months ago
Unfortunately the library wasn't designed with alternative file systems in mind. It won't be automatic but one option is to manipulate the text
property:
content = my_object.datafile.text
write_to_s3(content)
content = read_from_s3()
my_object.datafile.text = content
Thank you for the reply and the idea. Hadn't thought about the text property :)
Here's my own approach:
import os
import boto3
import datafiles.mapper
from ..logger import create_logger
class S3Mapper(datafiles.mapper.Mapper):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._last_load = None
self.logger = create_logger()
self.logger.info("S3Mapper.__init__")
self.s3_client = boto3.client("s3")
self.bucket_name = os.getenv("S3_BUCKET_NAME")
@property
def s3_key(self):
str_path = str(self.path)
if str_path.startswith("/tmp/"):
str_path = str_path[len("/tmp/") :]
return str_path
@property
def s3_object_stat(self):
try:
return self.s3_client.head_object(Bucket=self.bucket_name, Key=self.s3_key)
except self.s3_client.exceptions.ClientError as e:
if e.response["Error"]["Code"] == "404":
return None
else:
raise e
@property
def exists(self):
self.logger.info("S3Mapper.exists")
return self.s3_object_stat is not None
@property
def modified(self):
self.logger.info("S3Mapper.modified")
if self._last_load is None:
return True
stat = self.s3_object_stat
return stat["LastModified"] != self._last_load if stat else False
@modified.setter
def modified(self, modified: bool):
self.logger.info("S3Mapper.modified.setter")
if modified:
self._last_load = None
else:
stat = self.s3_object_stat
self._last_load = stat["LastModified"] if stat else None
def save(self, *args, **kwargs):
self.logger.info("S3Mapper.save")
# First save the file as normal
super().save(*args, **kwargs)
# Next, write the local file to S3 bucket
self.s3_client.upload_file(self.path, self.bucket_name, self.s3_key)
def load(self, *args, **kwargs):
self.logger.info("S3Mapper.load")
os.makedirs(self.path.parent, exist_ok=True)
# First, download the file from S3 bucket
try:
self.s3_client.download_file(self.bucket_name, self.s3_key, self.path)
except self.s3_client.exceptions.ClientError as e:
if e.response["Error"]["Code"] == "404":
raise FileNotFoundError from e
else:
raise e
# Next, load the file as normal
super().load(*args, **kwargs)
# ---
import gorilla # Framework for monkey patching
patch = gorilla.Patch(
datafiles.mapper,
"Mapper",
S3Mapper,
settings=gorilla.Settings(allow_hit=True),
)
gorilla.apply(patch)
# ---
@datafile("/tmp/samples/{self.key}.yml")
class Sample:
key: int
name: str
value: float = 0.0
os.environ["S3_BUCKET_NAME"] = "my-bucket-name"
sample = Sample(42, "widget")
Could probably be improved a fair bit, but it seems to be good enough for me for now. Will leave it here for anyone else that might stumble upon it.
Hi!
I am wondering if there is a way to change the part of the system that reads and writes the yaml files such that it can be used with an alternative filesystem, for instance reading and writing to an S3 bucket.
If there is no official way to do it, it seems that it might be as simple as monkey patching
create_mapper
. There also seem to be something about a root mapper from the code, but I am not entirely sure how it works.