ecotaxa / ecotaxa_front

Front end of the EcoTaxa application
Other
6 stars 6 forks source link

Duplicates in taxonomic tree #640

Closed grololo06 closed 3 years ago

grololo06 commented 3 years ago

As apparently ecotaxoserver does not prevent creation of 2+ children with same name inside same parent.

select * from taxonomy txo 
where exists (select 1 from taxonomy txo2 
               where txo2.parent_id = txo.parent_id 
                 and txo2.name = txo.name 
                 and txo2.id != txo.id)
order by name, nbrobj, parent_id

125 lines

Record is 'Aloconta' which is 3 times for its parent.

grololo06 commented 3 years ago
with dup_names as (select txo.id,txo.parent_id,txo.name from taxonomy txo 
where exists (select 1 from taxonomy txo2 
               where txo2.parent_id = txo.parent_id 
                 and txo2.name = txo.name 
                 and txo2.id != txo.id))
select dpn.id,(select count(1) from objects where classif_id=dpn.id)+(select count(1) from objectsclassifhisto where classif_id=dpn.id), dpn.name, dpn.parent_id 
from dup_names dpn order by dpn.parent_id,dpn.name;
  id   | ?column? |                name                 | parent_id 
-------+----------+-------------------------------------+-----------
   380 |        0 | Katablepharidida                    |         7
   357 |        0 | Katablepharidida                    |         7
 18113 |        0 | Bryophyta X                         |     12440
 85778 |        0 | Bryophyta X                         |     12440
 18053 |        0 | Pythium monospermum                 |     13804
 18052 |        0 | Pythium monospermum                 |     13804
 20896 |        0 | Picochlorum oklahomensis            |     15364
 20897 |        0 | Picochlorum oklahomensis            |     15364
 23531 |        0 | Ophiocordyceps                      |     16213
 88863 |        0 | Ophiocordyceps                      |     16213
 25438 |        0 | Rhopalomyces                        |     16500
 25442 |        0 | Rhopalomyces                        |     16500
 26633 |        0 | Cosmocercoidea                      |     16857
 88153 |        0 | Cosmocercoidea                      |     16857
 88327 |        0 | Macrostomida                        |     16912
 26955 |        0 | Macrostomida                        |     16912
 31563 |        0 | Gyrodinium dominans                 |     18765
 78889 |        0 | Gyrodinium dominans                 |     18765
 31559 |        0 | Gyrodinium instriatum               |     18765
 78883 |        0 | Gyrodinium instriatum               |     18765
 31605 |        0 | Ostreopsis                          |     18800
 31697 |        0 | Ostreopsis                          |     18800
 31683 |        0 | Gonyaulax                           |     18802
 31681 |        1 | Gonyaulax                           |     18802
 36162 |        0 | Boeremia exigua var. exigua         |     22618
 36161 |        0 | Boeremia exigua var. exigua         |     22618
 37254 |        0 | Aspergillus sp.                     |     22798
 37280 |        0 | Aspergillus sp.                     |     22798
 38767 |        0 | Arthrobotrys oligospora             |     23278
 38756 |        0 | Arthrobotrys oligospora             |     23278
 86198 |        0 | Oedipodrilus                        |     25587
 43461 |        0 | Oedipodrilus                        |     25587
 44441 |        0 | Gasteracantha                       |     25792
 86374 |        0 | Gasteracantha                       |     25792
 87284 |        0 | Aloconota                           |     25849
 48973 |        0 | Aloconota                           |     25849
 87118 |        0 | Aloconota                           |     25849
 48599 |        0 | Ctenophthalmus                      |     25849
 87015 |        0 | Ctenophthalmus                      |     25849
 87021 |        0 | Schizonycha                         |     25849
 45591 |        0 | Schizonycha                         |     25849
 45346 |        0 | Toxorhynchites                      |     25849
 87225 |        0 | Toxorhynchites                      |     25849
 51403 |        0 | Zschokkella                         |     26017
 87904 |        0 | Zschokkella                         |     26017
 51436 |        0 | Myxosporea cf. Myxidium sp. MF-2010 |     26022
 51437 |        0 | Myxosporea cf. Myxidium sp. MF-2010 |     26022
 88252 |        0 | Hemicycliophora                     |     26760
 53234 |        0 | Hemicycliophora                     |     26760
 54934 |        0 | Holacanthida B1 X sp.               |     27723
 54935 |        0 | Holacanthida B1 X sp.               |     27723
 55638 |      219 | Nitzschia sp.                       |     28152
 55643 |        1 | Nitzschia sp.                       |     28152
 55971 |     2035 | Thalassiosira sp.                   |     28220
 85510 |       42 | Thalassiosira sp.                   |     28220
 55869 |        0 | Cyclotella striata                  |     28253
 55870 |        0 | Cyclotella striata                  |     28253
 35102 |        0 | Pinus koraiensis                    |     28825
 35147 |        0 | Pinus koraiensis                    |     28825
 35457 |        0 | Rosa hybrid cultivar                |     29760
 35458 |        0 | Rosa hybrid cultivar                |     29760
 78778 |     1001 | Dinophysis hastata                  |     31666
 58193 |      109 | Dinophysis hastata                  |     31666
 58242 |        0 | Gambierdiscus australes             |     31680
 31551 |        0 | Gambierdiscus australes             |     31680
 58239 |        0 | Gambierdiscus caribaeus             |     31680
 31550 |        0 | Gambierdiscus caribaeus             |     31680
 31549 |        0 | Gambierdiscus pacificus             |     31680
 58237 |        0 | Gambierdiscus pacificus             |     31680
 31548 |        0 | Gambierdiscus polynesiensis         |     31680
 58236 |        0 | Gambierdiscus polynesiensis         |     31680
 58234 |        0 | Gambierdiscus sp.                   |     31680
 31547 |        0 | Gambierdiscus sp.                   |     31680
 58232 |        0 | Gambierdiscus yasumotoi             |     31680
 31546 |        0 | Gambierdiscus yasumotoi             |     31680
 58270 |        0 | Alexandrium affine                  |     31692
 58279 |        0 | Alexandrium affine                  |     31692
 58274 |        0 | Alexandrium fundyense               |     31692
 58266 |        0 | Alexandrium fundyense               |     31692
 58271 |        0 | Alexandrium tamarense               |     31692
 58263 |        0 | Alexandrium tamarense               |     31692
 58273 |        8 | Alexandrium ostenfeldii             |     31693
 58250 |        0 | Alexandrium ostenfeldii             |     31693
 58249 |        0 | Alexandrium peruvianum              |     31693
 58272 |        0 | Alexandrium peruvianum              |     31693
 58165 |        0 | Coolia malayensis                   |     31698
 58299 |        0 | Coolia malayensis                   |     31698
 58297 |        0 | Coolia sp.                          |     31698
 31536 |        0 | Coolia sp.                          |     31698
 78831 |        0 | Gymnodinium catenatum               |     31777
 58397 |        0 | Gymnodinium catenatum               |     31777
 78829 |        0 | Gymnodinium sp.                     |     31777
 58394 |        0 | Gymnodinium sp.                     |     31777
 58422 |   118166 | Podolampas                          |     31795
 18749 |     3956 | Podolampas                          |     31795
 58512 |        0 | Protoperidinium conicum             |     31853
 78926 |        0 | Protoperidinium conicum             |     31853
 58548 |        0 | Prorocentrum cassubicum             |     31873
 79038 |        0 | Prorocentrum cassubicum             |     31873
 79046 |        0 | Prorocentrum consutum               |     31873
 58547 |        0 | Prorocentrum consutum               |     31873
 18953 |        5 | Symbiodinium                        |     31878
 58556 |        1 | Symbiodinium                        |     31878
 69738 |        0 | Schedorhinotermes sp.               |     45861
 86724 |        0 | Schedorhinotermes sp.               |     45861
 68119 |        0 | Mayetiola destructor                |     47055
 68120 |        0 | Mayetiola destructor                |     47055
 77104 |        0 | Paratoplana renatae                 |     53926
 88405 |        0 | Paratoplana renatae                 |     53926
 77867 |        0 | Acanthophracta E1E2 X sp.           |     54890
 77868 |        0 | Acanthophracta E1E2 X sp.           |     54890
 77903 |        0 | Acanthometridae F3 X sp.            |     54912
 77902 |        0 | Acanthometridae F3 X sp.            |     54912
 77921 |        0 | Stauracanthidae F3 X sp.            |     54915
 77922 |        0 | Stauracanthidae F3 X sp.            |     54915
 77926 |        0 | Beella digitata                     |     54955
 77927 |        0 | Beella digitata                     |     54955
 78885 |     1416 | Akashiwo sanguinea                  |     58375
 78886 |        2 | Akashiwo sanguinea                  |     58375
 78960 |        0 | Scrippsiella sp.                    |     58431
 78952 |        0 | Scrippsiella sp.                    |     58431
 78950 |        0 | Scrippsiella trochoidea             |     58431
 58494 |        0 | Scrippsiella trochoidea             |     58431
 84708 |        0 | Vibilia antarctica                  |     83737
 87686 |        0 | Vibilia antarctica                  |     83737
(125 lignes)
grololo06 commented 3 years ago

Fix script applied on prod DB 11 March 2021, on EcoTaxa DB and EcoTaxoServer DB. Of course obj_* tables are not on the taxo server.

begin;

WITH fromto (_from, _to) AS (VALUES (357,380),(85778,18113),(18052,18053),(20897,20896),(88863,23531),(25442,25438),(88153,26633),
(26955,88327),(78889,31563 ),(78883,31559 ),(31697,31605),(31683,31681),(36161,36162),(37280,37254),(38756,38767),(43461, 86198),
(86374, 44441),(87284,48973),(87118, 48973),(87015,48599),(87021,45591),(87225,45346),
(87904,51403),(51437,51436),(88252, 53234),(54935,54934),(55870,55869),(35147, 35102),(35458,  35457),(58242,31551),
(58239,31550),(58237,31549),(58236,31548),(58234,31547),(58232, 31546),(58279, 58270),(58274,58266),(58271, 58263),(58273,58250),
(58272,58249),(58299,58165),(58297,31536),(78831, 58397),(78829,58394),(78926,58512),(79038,58548),(79046,58547),
(86724,69738),(68120,68119),(88405,77104),(77868,77867),(77903,77902),(77922,77921),(77927,77926),
(78960,78952),(78950,58494),(87686,84708),(55643, 55638),(85510, 55971),(58193, 78778),(18749, 58422),(58556, 18953),(78886,78885))
update obj_head set classif_id=fromto._to
from fromto where classif_id=fromto._from;

WITH fromto (_from, _to) AS (VALUES (357,380),(85778,18113),(18052,18053),(20897,20896),(88863,23531),(25442,25438),(88153,26633),
(26955,88327),(78889,31563 ),(78883,31559 ),(31697,31605),(31683,31681),(36161,36162),(37280,37254),(38756,38767),(43461, 86198),
(86374, 44441),(87284,48973),(87118, 48973),(87015,48599),(87021,45591),(87225,45346),
(87904,51403),(51437,51436),(88252, 53234),(54935,54934),(55870,55869),(35147, 35102),(35458,  35457),(58242,31551),
(58239,31550),(58237,31549),(58236,31548),(58234,31547),(58232, 31546),(58279, 58270),(58274,58266),(58271, 58263),(58273,58250),
(58272,58249),(58299,58165),(58297,31536),(78831, 58397),(78829,58394),(78926,58512),(79038,58548),(79046,58547),
(86724,69738),(68120,68119),(88405,77104),(77868,77867),(77903,77902),(77922,77921),(77927,77926),
(78960,78952),(78950,58494),(87686,84708),(55643, 55638),(85510, 55971),(58193, 78778),(18749, 58422),(58556, 18953),(78886,78885))
update objectsclassifhisto set classif_id=fromto._to
from fromto where classif_id=fromto._from;

WITH fromto (_from, _to) AS (VALUES (357,380),(85778,18113),(18052,18053),(20897,20896),(88863,23531),(25442,25438),(88153,26633),
(26955,88327),(78889,31563 ),(78883,31559 ),(31697,31605),(31683,31681),(36161,36162),(37280,37254),(38756,38767),(43461, 86198),
(86374, 44441),(87284,48973),(87118, 48973),(87015,48599),(87021,45591),(87225,45346),
(87904,51403),(51437,51436),(88252, 53234),(54935,54934),(55870,55869),(35147, 35102),(35458,  35457),(58242,31551),
(58239,31550),(58237,31549),(58236,31548),(58234,31547),(58232, 31546),(58279, 58270),(58274,58266),(58271, 58263),(58273,58250),
(58272,58249),(58299,58165),(58297,31536),(78831, 58397),(78829,58394),(78926,58512),(79038,58548),(79046,58547),
(86724,69738),(68120,68119),(88405,77104),(77868,77867),(77903,77902),(77922,77921),(77927,77926),
(78960,78952),(78950,58494),(87686,84708),(55643, 55638),(85510, 55971),(58193, 78778),(18749, 58422),(58556, 18953),(78886,78885))
update taxonomy set parent_id=fromto._to
from fromto where parent_id=fromto._from;

WITH fromto (_from, _to) AS (VALUES (357,380),(85778,18113),(18052,18053),(20897,20896),(88863,23531),(25442,25438),(88153,26633),
(26955,88327),(78889,31563 ),(78883,31559 ),(31697,31605),(31683,31681),(36161,36162),(37280,37254),(38756,38767),(43461, 86198),
(86374, 44441),(87284,48973),(87118, 48973),(87015,48599),(87021,45591),(87225,45346),
(87904,51403),(51437,51436),(88252, 53234),(54935,54934),(55870,55869),(35147, 35102),(35458,  35457),(58242,31551),
(58239,31550),(58237,31549),(58236,31548),(58234,31547),(58232, 31546),(58279, 58270),(58274,58266),(58271, 58263),(58273,58250),
(58272,58249),(58299,58165),(58297,31536),(78831, 58397),(78829,58394),(78926,58512),(79038,58548),(79046,58547),
(86724,69738),(68120,68119),(88405,77104),(77868,77867),(77903,77902),(77922,77921),(77927,77926),
(78960,78952),(78950,58494),(87686,84708),(55643, 55638),(85510, 55971),(58193, 78778),(18749, 58422),(58556, 18953),(78886,78885))
delete from taxonomy
using fromto where id=fromto._from;

with dup_names as (select txo.id,txo.parent_id,txo.name from taxonomy txo
where exists (select 1 from taxonomy txo2
               where txo2.parent_id = txo.parent_id
                 and txo2.name = txo.name
                 and txo2.id != txo.id))
select dpn.id,(select count(1) from objects where classif_id=dpn.id)+(select count(1) from objectsclassifhisto where classif_id=dpn.id), dpn.name, dpn.parent_id
from dup_names dpn order by dpn.parent_id,dpn.name;

/*
  id   | ?column? |         name         | parent_id
-------+----------+----------------------+-----------
 30794 |        0 | Bryophyta X sp.      |     18113
 89205 |        0 | Bryophyta X sp.      |     18113
 79121 |        0 | Symbiodinium sp.     |     18953
 31891 |        0 | Symbiodinium sp.     |     18953
 43242 |        0 | Rhopalomyces elegans |     25438
 43243 |        0 | Rhopalomyces elegans |     25438
 58301 |        0 | Ostreopsis sp.       |     31605
 58166 |        0 | Ostreopsis sp.       |     31605
 58292 |        0 | Gonyaulax polygramma |     31681
 58296 |      136 | Gonyaulax polygramma |     31681
 */

WITH fromto (_from, _to) AS (VALUES (89205,30794),(79121,31891),(43242,43243),(58301,58166),(58292,58296))
update taxonomy set parent_id=fromto._to
from fromto where parent_id=fromto._from;

WITH fromto (_from, _to) AS (VALUES (89205,30794),(79121,31891),(43242,43243),(58301,58166),(58292,58296))
delete from taxonomy where id in (select _from from fromto);

create unique index is_taxo_parent_name on taxonomy (parent_id, name);