Radiance-Technologies / prism

Utilities for creating and curating a dataset for automated proof repair in the Coq Proof Assistant.
GNU Lesser General Public License v3.0
5 stars 2 forks source link

Implement OpamSwitch locking mechanisms to protect against corruption. #55

Open a-gardner1 opened 1 year ago

a-gardner1 commented 1 year ago

A rough sketch of a solution involves creating a read lock for the switch directory that lasts for the lifetime of each OpamSwitch object. Cloned switches would need to also place a lock on their origin switch to prevent it from being deleted.

Below is an example that got thrown out in early discussions. Note that it is INCORRECT in that it deletes the actual switch directory upon deletion of the OpamSwitch object. Instead, a switch

Ideally, we would mimic opam's locking mechanisms so that external opam processes are also locked out from modifying switches in use by our switch managers.

"""
Provides an object-oriented abstraction of OPAM switches.
"""
import fcntl
from io import TextIOWrapper
import logging
import os
import re
import shutil
from dataclasses import InitVar, dataclass, field
from functools import cached_property
from pathlib import Path
from typing import Any, ClassVar, List, Optional
__all__ = ['OpamSwitch']
logger = logging.getLogger(__file__)
logger.setLevel(logging.DEBUG)
_allow_unsafe_clone: List[Any] = []
# Private module variable to allow fallback in OpamSwitch.run for clones
# that cannot invoke bwrap in an unprivileged container (e.g., a docker
# container runner within Gitlab CI)
# Very hacky and ugly.

@dataclass(frozen=True)
class OpamSwitch:
    """
    An OPAM switch.
    Note that OPAM must be installed to use all of the features of this
    class.
    """
    _whitespace_regex: ClassVar[re.Pattern] = re.compile(r"\s+")
    _newline_regex: ClassVar[re.Pattern] = re.compile("\n")
    _external_dirname: ClassVar[str] = "_opam"
    switch_name: InitVar[Optional[str]] = None
    """
    The name of the switch, by default None.
    If None, then this implies usage of the "default" switch.
    """
    switch_root: InitVar[Optional[str]] = None
    """
    The root path in which to create the switch, by default None.
    If None, then this implies usage of the "default" root.
    """
    name: str = field(init=False)
    """
    The name of the switch.
    Equivalent to setting ``$OPAMSWITCH`` to `name`.
    """
    root: str = field(init=False)
    """
    The root in which the switch was created.
    Equivalent to setting ``$OPAMROOT`` to `root`.
    """
    _is_external: bool = field(init=False)
    """
    Whether this is an external (i.e., local) switch.
    """
    _rlock_file: TextIOWrapper = field(init=False)
    """

    """

    def __post_init__(
            self,
            switch_name: Optional[str],
            switch_root: Optional[str]):
        """
        Realize switch name and root and perform validation.
        """
        assert isinstance(switch_name, str)
        if self.is_external(switch_name):
            # ensure local switch name is unambiguous
            switch_name = str(Path(switch_name).resolve())
        object.__setattr__(self, 'name', switch_name)
        object.__setattr__(self, 'root', switch_root)
        object.__setattr__(self, '_is_external', self.is_external(self.name))
        ##### NEW CODE PART 1 START ###########
        # Aquire read lock to prevent switch deletion
        if self.is_clone:
            self.in_use_path.touch(exist_ok=True)
            _rlock_file = open(str(self.in_use_path), "r")
            fcntl.flock(_rlock_file, fcntl.LOCK_SH)
            object.__setattr__(self, "_rlock_file", _rlock_file)
        ##### NEW CODE PART 1 END ###########
        ##### NEW CODE PART 2 START ###########

    def __del__(self):
        """
        Release instance's lock and delete unlocked switch files.
        """
        # Setting attribute to None unlocks the file.
        object.__setattr__(self, "_rlock_file", None)
        try:
            fcntl.flock(open(str(self.in_use_path), "w"), fcntl.LOCK_EX | fcntl.LOCK_NB)
            shutil.rmtree(self.path)
        except OSError:
            pass
        ##### NEW CODE PART 2 END ###########

    def __str__(self) -> str:
        """
        Return the name of the switch.
        """
        return self.name

    @property
    def is_clone(self) -> bool:
        """
        Return whether this switch is a clone or not.
        """
        return True

    @cached_property
    def path(self) -> Path:
        """
        Get the path to the switch directory.
        """
        return self.get_root(self.root, self.name)

    @cached_property
    def in_use_path(self) -> Path:
        """
        Get the path to lock file in the switch directory.
        """
        return self.path / "in_use.lock"

    @classmethod
    def get_root(cls, root: str, name: str) -> Path:
        """
        Get the root directory of the switch's files.
        Note that this is not to be confused with the Opam root, which
        may differ in the case of external (local) switches.
        Parameters
        ----------
        root : PathLike
            The Opam root with respect to which the hypothetical switch
            `name` should be evaluated.
        name : str
            The name of a hypothetical switch.
        Returns
        -------
        Path
            The absolute path to the switch's directory.
        """
        # based on `get_root` at src/format/opamSwitch.ml in Opam's
        # GitHub repository
        if cls.is_external(name):
            path = Path(name).resolve() / cls._external_dirname
        else:
            path = Path(root) / name
        return path.resolve()

    @classmethod
    def is_external(cls, name: str) -> bool:
        """
        Return whether `name` denotes an external (i.e., local) switch.
        """
        # based on `is_external` at src/format/opamSwitch.ml in Opam's
        # GitHub repository
        return name.startswith(".") or os.path.sep in name

if os.path.exists("./switch-1"):
    shutil.rmtree("./switch-1")
os.makedirs("./switch-1", exist_ok=True)
switch = OpamSwitch("switch-1", "./")
same_switch = OpamSwitch("switch-1", "./")
assert switch.in_use_path
in_use_path = switch.in_use_path
switch_path = switch.path
del switch
assert switch_path.exists()
assert in_use_path.exists()
del same_switch
assert not in_use_path.exists()
assert not switch_path.exists()
SimpleConjugate commented 1 year ago

I didn't consider the possibility of a sigkill preventing the successful removal of a switch after the write lock is in place, but I would assume the write lock on the file disappears with the process.

A comprehensive solution should provide a clean up method to finish any unfinished removal due to sigkill. One example is at the start of a switch manager, which would delete any switches that were cloned but currently do not have any active read locks.