guardicore / monkey

Infection Monkey - An open-source adversary emulation platform
https://www.guardicore.com/infectionmonkey/
GNU General Public License v3.0
6.63k stars 775 forks source link

Write a script to avoid building up unused dependencies #1624

Open shreyamalviya opened 2 years ago

shreyamalviya commented 2 years ago

Related to #1599.

Since there are no existing options that could help us track and make sure that unused dependencies don't keep building up (see comment on #1599 for more), write a script that can also be used with Travis and pre-commit to check and notify us of any unused dependencies.

VakarisZ commented 2 years ago

A working, cross-platform script with whitelisting (not production-ready though):

from pathlib import Path
import os
from typing import List
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

ISLAND_WHITELIST = [
    "pyinstaller",  # used to package, so not imported
    "Flask-JWT-Extended",  # Some kind of flask dependency?
    "Flask-PyMongo",  # Some kind of flask dependency?
    "Flask-RESTful",  # Some kind of flask dependency?
]

AGENT_WHITELIST = [

]

def get_package_list_from_pipfile(pipfile_path: Path):
    logger.debug(f"Openinig {pipfile_path} pip file")
    reading_packages = False
    package_list = []

    with open(pipfile_path, "r") as file:
        for line in file:
            if reading_packages:
                if line == "\n":
                    break
                else:
                    package_list.append(line.split(' ')[0])

            if "[packages]" in line:
                reading_packages = True
    logger.debug(f"Packages found in the pipfile: {package_list}")
    return package_list

def find_in_dir(keyword: str, dir_path: Path) -> List[str]:
    files_with_keyword = []

    for root, dirs, files in os.walk(dir_path, onerror=None):  # walk the root dir
        if "\\cc\\" in root and "\\ui\\" in root:
            continue
        for filename in files:
            if not str(filename).endswith(".py"):
                continue
            file_path = Path(root) / filename
            try:
                with open(file_path, "rb") as f:

                    for line in f:
                        try:
                            line = line.decode("utf-8")
                        except ValueError:
                            continue
                        if package_import_in_line(line, keyword):
                            files_with_keyword.append(file_path)
                            logger.debug(f"Found usage of {keyword} in {file_path}")
                            break
            except (IOError, OSError):
                pass
    return files_with_keyword

def package_import_in_line(line: str, package_name: str):
    package_name = " "+package_name.lower()
    line = line.lower()
    return package_name in line and "import" in line

def find_unused_packages(pipfile_path: Path, dir_path: Path, whitelist: List[str]):
    package_list = get_package_list_from_pipfile(pipfile_path)
    for package in package_list:
        usages = find_in_dir(package, dir_path)
        if usages:
            logger.info(f"Package {package} used in {len(usages)} files.")
        else:
            if package in whitelist:
                logger.info(f"Package {package} doesn't have imports but is in the whitelist.")
            logger.error(f"Package {package} usages not found!!!!!!!!!!!!!!!!!!!!!")

if __name__ == "__main__":
    island_pipfile = Path("C:\\Path\\to\\Pipfile\\")
    island_path = Path("C:\\Path\\to\\island_dir\\")
    logger.info("Package usages in the island codebase:")
    find_unused_packages(island_pipfile, island_path, ISLAND_WHITELIST)

    monkey_pipfile = Path("C:\\Path\\to\\Pipfile\\")
    monkey_dir_path = Path("C:\\Path\\to\\agent_dir\\")
    logger.info("Package usages in the agent codebase:")
    find_unused_packages(monkey_pipfile, monkey_dir_path, AGENT_WHITELIST)