Closed biomystery closed 3 years ago
django_data=# select tenx_pipeline_status, count(*) from singlecell_app_singlecellobject group by tenx_pipeline_status;
tenx_pipeline_status | count
----------------------+-------
Yes | 312
InProcess | 1
ClickToSubmit | 1368
No | 1
InQueue | 44
django_data=# select * from singlecell_app_singlecellobject where seqinfo_id=3721;
id | experiment_type | date_last_modified | tenx_pipeline_status | path_to_websummary | random_string_link | object_id | content_type_id | cooladminsubmiss
ion_id | seqinfo_id
------+-----------------+--------------------+----------------------+--------------------+----------------------------------------------------------------------+-----------+-----------------+-----------------
-------+------------
746 | 10xATAC | 2020-09-09 | Yes | | http://epigenomics.sdsc.edu/zhc268/lims_singlecell/0Ih2zd4w_JYH_1046 | 416 | 31 |
| 3721
1688 | 10xATAC | 2019-06-24 | Yes | | | 629 | 31 |
| 3721
(2 rows)
with cte as (select seqinfo_id,count(*) as vcount from singlecell_app_singlecellobject where tenx_pipeline_status='Yes' group by seqinfo_id ) select vcount,count(vcount) from cte group by vcount;
vcount | count
--------+-------
2 | 156
(1 row)
select random_string_link,count(random_string_link) from singlecell_app_singlecellobject where random_string_link='' and tenx_pipeline_status='Yes' group by random_string_link;
delete from singlecell_app_singlecellobject where random_string_link='' and tenx_pipeline_status='Yes';
with cte as (select seqinfo_id,count(*) as vcount from singlecell_app_singlecellobject where tenx_pipeline_status='Yes' group by seqinfo_id ) select vcount,count(vcount) from cte group by vcount;
2.1 check number of duplicates
django_data=# with cte as (select seqinfo_id,count(*) as vcount from singlecell_app_singlecellobject group by seqinfo_id ) select vcount,count(vcount) from cte group by vcount; vcount | count
--------+-------
1 | 1456
2 | 56
Check the items to be deleted:
django_data=# with cte as(select experiment_type,date_last_modified,seqinfo_id, row_number() over (partition by seqinfo_id order by date_last_modified) rownum from singlecell_app_singlecellobject) select rownum,count(rownum) from cte where rownum>1 group by rownum;
rownum | count
--------+-------
2 | 56
verified 2 (in postgres)
django_data=# with cte as(select *,row_number() over (partition by seqinfo_id order by date_last_modified) as rownum from singlecell_app_singlecellobject) select count(*) from singlecell_app_singlecellobject where id in (select id from cte where rownum>1) ;
count
-------
56
(1 row)
delete items that the rownum >1 :
django_data=# with cte as(select *,row_number() over (partition by seqinfo_id order by date_last_modified) as rownum from singlecell_app_singlecellobject) delete from singlecell_app_singlecellobject where id in (select id from cte where rownum>1) ;
DELETE 56
select * from singlecell_app_singlecellobject where seqinfo_id=2256;
- two copiesselect * from singlecell_app_tenxqcinfo where num_fragments=6530765;
- multiple copiesfor the second task, need delete ones without being referenced in
singlecell_app_singlecellobject
table.