pepkit / peppy

Project metadata manager for PEPs in Python
https://pep.databio.org/peppy
BSD 2-Clause "Simplified" License
37 stars 13 forks source link

Sample table as input cause does not initialize all object variables #392

Closed khoroshevskyi closed 2 years ago

khoroshevskyi commented 2 years ago

Using sample table as input does not initialize all object variables, e.g.: _config. That leads to downsteem problems with reinitializing project. Example:

peppy.Project("example/proj.csv")

Object variables:

{
  "name": null,
  "_samples": [
    {
      "time": "0",
      "organism": "frog",
      "protocol": "ATAC-seq",
      "file_path": "data/lab/project/frog_0h.fastq",
      "sample_name": "frog_0h"
    },
    {
      "time": "1",
      "organism": "frog",
      "protocol": "ATAC-seq",
      "file_path": "data/lab/project/frog_1h.fastq",
      "sample_name": "frog_1h"
    },
    {
      "time": "1",
      "organism": "human",
      "protocol": "ATAC-seq",
      "file_path": "data/lab/project/human_1h.fastq",
      "sample_name": "human_1h"
    },
    {
      "time": "0",
      "organism": "human",
      "protocol": "ATAC-seq",
      "file_path": "data/lab/project/human_0h.fastq",
      "sample_name": "human_0h"
    },
    {
      "time": "1",
      "organism": "mouse",
      "protocol": "ATAC-seq",
      "file_path": "data/lab/project/mouse_1h.fastq",
      "sample_name": "mouse_1h"
    },
    {
      "time": "0",
      "organism": "mouse",
      "protocol": "ATAC-seq",
      "file_path": "data/lab/project/mouse_1h.fastq",
      "sample_name": "mouse_0h"
    }
  ],
  "st_index": "sample_name",
  "sst_index": [
    "sample_name",
    "subsample_name"
  ],
  "_sample_df": {
    "time": {
      "frog_0h": "0",
      "frog_1h": "1",
      "human_0h": "0",
      "human_1h": "1",
      "mouse_0h": "0",
      "mouse_1h": "1"
    },
    "organism": {
      "frog_0h": "frog",
      "frog_1h": "frog",
      "human_0h": "human",
      "human_1h": "human",
      "mouse_0h": "mouse",
      "mouse_1h": "mouse"
    },
    "protocol": {
      "frog_0h": "ATAC-seq",
      "frog_1h": "ATAC-seq",
      "human_0h": "ATAC-seq",
      "human_1h": "ATAC-seq",
      "mouse_0h": "ATAC-seq",
      "mouse_1h": "ATAC-seq"
    },
    "file_path": {
      "frog_0h": "data/lab/project/frog_0h.fastq",
      "frog_1h": "data/lab/project/frog_1h.fastq",
      "human_0h": "data/lab/project/human_0h.fastq",
      "human_1h": "data/lab/project/human_1h.fastq",
      "mouse_0h": "data/lab/project/mouse_1h.fastq",
      "mouse_1h": "data/lab/project/mouse_1h.fastq"
    },
    "sample_name": {
      "frog_0h": "frog_0h",
      "frog_1h": "frog_1h",
      "human_0h": "human_0h",
      "human_1h": "human_1h",
      "mouse_0h": "mouse_0h",
      "mouse_1h": "mouse_1h"
    }
  },
  "description": null,
  "_sample_table": {
    "time": {
      "frog_0h": "0",
      "frog_1h": "1",
      "human_0h": "0",
      "human_1h": "1",
      "mouse_0h": "0",
      "mouse_1h": "1"
    },
    "organism": {
      "frog_0h": "frog",
      "frog_1h": "frog",
      "human_0h": "human",
      "human_1h": "human",
      "mouse_0h": "mouse",
      "mouse_1h": "mouse"
    },
    "protocol": {
      "frog_0h": "ATAC-seq",
      "frog_1h": "ATAC-seq",
      "human_0h": "ATAC-seq",
      "human_1h": "ATAC-seq",
      "mouse_0h": "ATAC-seq",
      "mouse_1h": "ATAC-seq"
    },
    "file_path": {
      "frog_0h": "data/lab/project/frog_0h.fastq",
      "frog_1h": "data/lab/project/frog_1h.fastq",
      "human_0h": "data/lab/project/human_0h.fastq",
      "human_1h": "data/lab/project/human_1h.fastq",
      "mouse_0h": "data/lab/project/mouse_1h.fastq",
      "mouse_1h": "data/lab/project/mouse_1h.fastq"
    },
    "sample_name": {
      "frog_0h": "frog_0h",
      "frog_1h": "frog_1h",
      "human_0h": "human_0h",
      "human_1h": "human_1h",
      "mouse_0h": "mouse_0h",
      "mouse_1h": "mouse_1h"
    }
  },
  "_subsample_df": null,
  "_large_sample_df": false,
  "_samples_touched": true,
  "_sample_table_path": "/home/bnt4me/Virginia/pephub_db/sample_pep/amendments2/sample_table.csv",
  "_subsample_tables_path": null
}
khoroshevskyi commented 2 years ago

Fixed!