tweag / FawltyDeps

Python dependency checker
Other
201 stars 14 forks source link

PyPI experiment run results enhancements #417

Closed mknorps closed 10 months ago

mknorps commented 10 months ago

This PR may be checked commit by commit.

The guiding idea was to add more data at the stage of the experiment run to be able to filter and transform it on the later stages of the analysis.

This PR introduces:

  1. metadata of the FD run:
    • project_name
    • fawltydeps_version, also the version is bumped. Every time we change the schema of the results of the way they are computed, we should bump the last number in the version. This way we know that we branched out at version 0.13.1 from the original FD, and still have a version for the experiment.
    • repo_url - added in the execution stage (PyPI_analysis/download_and_analyze.py)
    • creation_timestamp- added in the execution stage (PyPI_analysis/download_and_analyze.py)
  2. Add information about all imports.
  3. Collection numbers of .py and .ipynb files per each directory. Note here, that the number always represents .py or .ipynb files that are directly contained in the directory. This example:
    │   ├── mixed_project
    │   │   ├── expected.toml
    │   │   ├── main.py
    │   │   ├── pyproject.toml
    │   │   ├── subdir1
    │   │   │   ├── notebook.ipynb
    │   │   │   ├── script.py
    │   │   │   └── setup.cfg

    will give us:

    [
        "mixed_project": {
          "py": 1,
          "ipynb": 0,
          "total": 1
        },
        "mixed_project/subdir1": {
          "py": 1,
          "ipynb": 1,
          "total": 2
        }
    ]

For the reference - result of the run on FawltyDeps

{
  "metadata": {
    "project_name": "",
    "fawltydeps_version": "0.14.0"
  },
  "code_dirs": {
    "tests": {
      "py": 26,
      "ipynb": 0,
      "total": 26
    },
    "fawltydeps": {
      "py": 13,
      "ipynb": 0,
      "total": 13
    },
    "PyPI_analysis": {
      "py": 6,
      "ipynb": 0,
      "total": 6
    },
    "tests/sample_projects/mixed_project/subdir2": {
      "py": 2,
      "ipynb": 1,
      "total": 3
    },
    "tests/sample_projects/mixed_project/subdir1": {
      "py": 1,
      "ipynb": 1,
      "total": 2
    },
    "PyPI_analysis/tests/main": {
      "py": 2,
      "ipynb": 0,
      "total": 2
    },
    "PyPI_analysis/notebooks": {
      "py": 0,
      "ipynb": 2,
      "total": 2
    },
    "PyPI_analysis/tests": {
      "py": 1,
      "ipynb": 0,
      "total": 1
    },
    "tests/sample_projects/file__requirements_unused": {
      "py": 1,
      "ipynb": 0,
      "total": 1
    },
    "tests/sample_projects/mixed_project": {
      "py": 1,
      "ipynb": 0,
      "total": 1
    },
    "tests/sample_projects/blog_post_example": {
      "py": 1,
      "ipynb": 0,
      "total": 1
    },
    "tests/sample_projects/file__requirements_undeclared": {
      "py": 1,
      "ipynb": 0,
      "total": 1
    },
    "tests/sample_projects/beautifulsoup_and_html5lib": {
      "py": 1,
      "ipynb": 0,
      "total": 1
    },
    ".": {
      "py": 1,
      "ipynb": 0,
      "total": 1
    },
    "tests/sample_projects/file__requirements_undeclared_unused": {
      "py": 1,
      "ipynb": 0,
      "total": 1
    },
    "PyPI_analysis/experiments/biomedical_projects_experiment": {
      "py": 1,
      "ipynb": 0,
      "total": 1
    },
    "tests/sample_projects/no_issues": {
      "py": 1,
      "ipynb": 0,
      "total": 1
    }
  },
  "deps_file": [
    {
      "source_type": "DepsSource",
      "path": "tests/sample_projects/beautifulsoup_and_html5lib/requirements.txt",
      "parser_choice": "requirements.txt",
      "deps_count": 2,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "tests/sample_projects/file__requirements_undeclared_unused/requirements.txt",
      "parser_choice": "requirements.txt",
      "deps_count": 2,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "tests/sample_projects/no_issues/requirements.txt",
      "parser_choice": "requirements.txt",
      "deps_count": 2,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "tests/sample_projects/file__requirements_undeclared/requirements.txt",
      "parser_choice": "requirements.txt",
      "deps_count": 2,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "tests/sample_projects/mixed_project/subdir2/setup.py",
      "parser_choice": "setup.py",
      "deps_count": 4,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "tests/sample_projects/mixed_project/subdir1/setup.cfg",
      "parser_choice": "setup.cfg",
      "deps_count": 3,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "PyPI_analysis/tests/pyproject.toml",
      "parser_choice": "pyproject.toml",
      "deps_count": 1,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "tests/sample_projects/blog_post_example/requirements.txt",
      "parser_choice": "requirements.txt",
      "deps_count": 1,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "PyPI_analysis/tests/setup.py",
      "parser_choice": "setup.py",
      "deps_count": 1,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "PyPI_analysis/tests/requirements-dev.txt",
      "parser_choice": "requirements.txt",
      "deps_count": 1,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "tests/sample_projects/mixed_project/pyproject.toml",
      "parser_choice": "pyproject.toml",
      "deps_count": 3,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "tests/sample_projects/beautifulsoup_and_html5lib/requirements.workaround.txt",
      "parser_choice": "requirements.txt",
      "deps_count": 1,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "tests/sample_projects/file__requirements_unused/requirements.txt",
      "parser_choice": "requirements.txt",
      "deps_count": 2,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "tests/sample_projects/no_issues/subdir/requirements.txt",
      "parser_choice": "requirements.txt",
      "deps_count": 2,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "tests/sample_projects/blog_post_example/dev-requirements.txt",
      "parser_choice": "requirements.txt",
      "deps_count": 1,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "PyPI_analysis/tests/requirements.txt",
      "parser_choice": "requirements.txt",
      "deps_count": 2,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "pyproject.toml",
      "parser_choice": "pyproject.toml",
      "deps_count": 37,
      "warnings": false
    },
    {
      "source_type": "DepsSource",
      "path": "PyPI_analysis/tests/setup.cfg",
      "parser_choice": "setup.cfg",
      "deps_count": 1,
      "warnings": false
    }
  ],
  "imports": [
    {
      "Regular": {
        "name": "pip_requirements_parser",
        "source": {
          "path": "fawltydeps/extract_declared_dependencies.py",
          "lineno": 13
        }
      }
    },
...
    {
      "Docstring": {
        "name": "docstring_2",
        "source": {
          "path": "PyPI_analysis/tests/main/docstrings.py",
          "lineno": 12
        }
      }
    },
    {
      "Docstring": {
        "name": "docstring_3",
        "source": {
          "path": "PyPI_analysis/tests/main/docstrings.py",
          "lineno": 12
        }
      }
    },
...
]
}