Open eycheu opened 4 years ago
import json
import threading
import uuid
from pathlib import Path
import sys
from time import time
import asyncio
import aiofiles
from aiohttp_session import AbstractStorage, Session
import logging
logger = logging.getLogger(__package__)
__version__ = '0.0.2'
async def get_expiration(expiration_filepath):
"""Read expiration time in expiration file.
Parameters
----------
expiration_filepath : Path
Path to expiration file.
Returns
-------
int | None
Expiration time in seconds since the epoch else None.
"""
async with aiofiles.open(expiration_filepath, 'r') as fp:
try:
expiration = int(await fp.read())
except (TypeError, ValueError):
expiration = None
return expiration
class FileRemovalThread(threading.Thread):
def __init__(self, dirpath, check_interval=60, group=None, target=None, name=None,
args=(), kwargs=None, *, daemon=None):
"""[summary]
Parameters
----------
dirpath : pathlib.Path
Path to folder that store session files
check_interval : int, optional
Duration in seconds, after previous check for files that have expired, by default 60
group : refer to threading.Thread
target : refer to threading.Thread
name : refer to threading.Thread
args : refer to threading.Thread
kwargs : refer to threading.Thread
daemon : refer to threading.Thread
"""
super().__init__(group=group, target=target, name=name,
args=args, kwargs=kwargs, daemon=daemon)
self.dirpath = dirpath
self.check_interval = check_interval
self._thread_stop_event = threading.Event()
self._loop = None
def __del__(self):
self.stop()
def start(self):
self._thread_stop_event.clear()
super().start()
def stop(self):
self._thread_stop_event.set()
async def remove_file(self):
current_time = int(time())
for expiration_filepath in self.dirpath.glob('*.expiration'):
if self._thread_stop_event.is_set():
break
try:
expiration = await get_expiration(expiration_filepath)
if expiration and expiration < current_time: # expired
filepath = expiration_filepath.parent / expiration_filepath.stem
try:
# pathlib.Path.unlink(missing_ok=False) is only for Python version 3.8
filepath.unlink()
except Exception as ex:
logger.warning(f"Unable to remove {filepath} : {ex}")
try:
expiration_filepath.unlink()
except Exception as ex:
logger.warning(f"Unable to remove {expiration_filepath} : {ex}")
except Exception as ex:
logger.exception(ex)
def run(self):
try:
self._loop = asyncio.new_event_loop()
while not self._thread_stop_event.is_set():
self._loop.run_until_complete(self.remove_file())
self._thread_stop_event.wait(self.check_interval)
except:
pass
finally:
try:
self._loop.stop()
except:
pass
try:
self._loop.close()
except:
pass
class FileStorage(AbstractStorage):
"""File storage"""
def __init__(self, dirpath, *, cookie_name="AIOHTTP_SESSION",
domain=None, max_age=None, path='/',
secure=None, httponly=True,
key_factory=lambda: uuid.uuid4().hex,
encoder=json.dumps, decoder=json.loads,
expired_removal_interval=60):
super().__init__(cookie_name=cookie_name, domain=domain,
max_age=max_age, path=path, secure=secure,
httponly=httponly,
encoder=encoder, decoder=decoder)
self._key_factory = key_factory
self.dirpath = Path(dirpath)
self.dirpath.mkdir(parents=True, exist_ok=True)
# expired file removal thread
self._efr_thread = FileRemovalThread(self.dirpath,
check_interval=expired_removal_interval)
self._efr_thread.start()
def __del__(self):
try:
# stop expired file removal thread
self._efr_thread.stop()
except:
pass
async def shutdown(self, app):
"""Shutdown File Storage
To be called by app.on_shutdown.append(self.shutdown).
Parameters
----------
app : aiohttp.web
Application
"""
self.__del__()
async def load_session(self, request):
cookie = self.load_cookie(request)
if cookie is None:
return Session(None, data=None, new=True, max_age=self.max_age)
else:
key = str(cookie)
stored_key = self.cookie_name + '_' + key
filepath = self.dirpath / stored_key
# expiration file
expiration_filepath = filepath.with_suffix('.expiration')
if expiration_filepath.exists():
# get the expiration time in expiration file
expiration = await get_expiration(expiration_filepath)
if expiration is None:
# remove invalid expiration file
try:
# pathlib.Path.unlink(missing_ok=False) is only for Python version 3.8
expiration_filepath.unlink()
except Exception as ex:
logger.warning(f"Unable to remove {expiration_filepath} : {ex}")
if expiration and expiration < int(time()): # expired
try:
# pathlib.Path.unlink(missing_ok=False) is only for Python version 3.8
filepath.unlink()
except Exception as ex:
logger.warning(f"Unable to remove {filepath} : {ex}")
try:
# pathlib.Path.unlink(missing_ok=False) is only for Python version 3.8
expiration_filepath.unlink()
except Exception as ex:
logger.warning(f"Unable to remove {expiration_filepath} : {ex}")
return Session(None, data=None,
new=True, max_age=self.max_age)
if filepath.exists():
async with aiofiles.open(filepath, 'r') as fp:
data = await fp.read()
else:
return Session(None, data=None,
new=True, max_age=self.max_age)
try:
data = self._decoder(data)
except ValueError:
data = None
return Session(key, data=data, new=False, max_age=self.max_age)
async def save_session(self, request, response, session):
key = session.identity
if key is None:
key = self._key_factory()
self.save_cookie(response, key,
max_age=session.max_age)
else:
if session.empty:
self.save_cookie(response, '',
max_age=session.max_age)
else:
key = str(key)
self.save_cookie(response, key,
max_age=session.max_age)
data = self._encoder(self._get_session_data(session))
max_age = session.max_age
expiration = int(time()) + max_age if max_age is not None else 0
stored_key = self.cookie_name + '_' + key
filepath = self.dirpath / stored_key
if expiration:
# expiration file
expiration_filepath = filepath.with_suffix('.expiration')
async with aiofiles.open(expiration_filepath, 'w') as fp:
await fp.write(str(expiration))
async with aiofiles.open(filepath, 'w') as fp:
await fp.write(data)
Hi @eycheu . Yes. You are right. I had considered this case. But I don't think it is a good idea to run such a thread in the asynchronous environment. As far as I know, all the official storages (cookie storage, memcached storage, redis storage, etc.) and known third party extensions (mongodb storage, dynamodb storage) have their own mechanism to clean up expired data automatically outside of the libraries. So I think the better way is to write a script to periodically clean up the expired session files outside of this tiny library (e.g. a cron job for the clean-up script). But for short-running applications I think the clean-up task can be done while startup.
Hi @zhangkaizhao , would you be able to point to me the document or code in aiohttp-session that is doing the cleanup. I didn't find abstract method from AbstractStorage that we need to implement for the cleanup. I could have simply run another process or thread (not using the coroutine in a new asyncio loop of that thread) in reading out the expiration.
By the way, pathlib unlink method for python 3.8 has an additional keyword argument missing_ok. I had issue running on Python 3.7.
I am using tempfile as your example to hook into aiohttp.web on_cleanup and on_shutdown to remove the temporary folder and content.
Oh! There must be a understanding. I meant the memcached server, redis server, mongodb server, HTTP client-side cookie management, etc. themselves all have their own mechanism for expiry clean-up but not the corresponding aiohttp-session extensions.
By the way, pathlib unlink method for python 3.8 has an additional keyword argument missing_ok. I had issue running on Python 3.7.
Good catch! Yes. There is no keyword argument missing_ok
for pathlib.Path.unlink
method on Python <3.8 . I will fix it later. Thank you!
After some investigations I think there is no need to check the expiration file after loading cookie and the related lines are commented out. And a note of cleaning up expiry session files is added to README file.
Revert checking session expiration after loading cookie. The session key in cookies can be reused in some attack. So we still need to verify expiration of sessions for security consideration. A test case is added for this scenario.
The example shows the use of aiohttp.web on_cleanup trigger to remove the temporary session directory and contents within it. Orphan files will remain in the session folder as long as aiohttp is not cleaned up even the corresponding sessions have expired. Would like to have the following features :