ufal / ParCzech

ParCzech is a project on compiling Czech parliamentary data into annotated corpora.
https://ufal.mff.cuni.cz/parczech
0 stars 1 forks source link

ParlaMint 3.1 #196

Open matyaskopp opened 1 year ago

matyaskopp commented 1 year ago
matyaskopp commented 1 year ago

ParlaMint-CZ fresh run notes

cd /opt/data/data-ParlaMint3.1-FRESH

mkdir parczech.tei.raw/final
mkdir parczech.tei.ana/final

copy all data

rsync -a parczech.tei.raw/consolidated/ parczech.tei.raw/final
rsync -a parczech.tei.ana/consolidated/ parczech.tei.ana/final

insert PetrFiala

vim parczech.tei.raw/final/ParCzech-listPerson.xml
   <person xml:id="PetrFiala">
      <persName><surname>Fiala</surname><forename>Petr</forename></persName>
      <sex value="M"/>
      <idno type="URI" subtype="parliament">https://www.psp.cz/sqw/detail.sqw?id=6870</idno>
      <idno type="URI" subtype="parliament">https://www.senat.cz/senatori/index.php?ke_dni=24.9.2022&amp;par_3=355</idno>
      <affiliation ref="#senate.SE14" role="member" from="2022-09-24T14:00:00">
         <roleName xml:lang="cs">Člen</roleName>
         <roleName xml:lang="en">Member</roleName>
      </affiliation>
   </person>

merge affiliations

svn export https://github.com/ufal/ParlaMint-UA/trunk/Scripts/affiliations-remove-overlaps.xsl affiliations-remove-overlaps.xsl
svn export https://github.com/ufal/ParlaMint-UA/trunk/Scripts/ParlaMint-UA-lib.xsl ParlaMint-UA-lib.xsl

mv parczech.tei.raw/final/ParCzech-listPerson.xml parczech.tei.raw/final/ParCzech-listPerson.xml.bak
java -cp /opt/tools/shared/saxon/saxon-he-10.1.jar net.sf.saxon.Transform -t -xsl:affiliations-remove-overlaps.xsl \
     -s:parczech.tei.raw/final/ParCzech-listPerson.xml.bak \
     -o:parczech.tei.raw/final/ParCzech-listPerson.xml

rm parczech.tei.raw/final/ParCzech-listPerson.xml.bak

cp parczech.tei.raw/final/ParCzech-listPerson.xml parczech.tei.ana/final/ParCzech-listPerson.xml

fix no date (end of the file)

sed -i '/0.02 hodin/s/when=""/when="2023-06-28T00:02:00"/' parczech.tei.raw/final/ps2021-070/ps2021-070-01-001-098.xml
sed -i '/0.02 hodin/s/when=""/when="2023-06-28T00:02:00"/' parczech.tei.ana/final/ps2021-070/ps2021-070-01-001-098.ana.xml

convert to ParlaMint

cd /opt/tools/current/
nohup ./run_parczech2parlamint.sh -c /opt/tools/shared/config-ParlaMint3.1-FRESH.sh -t /opt/data/data-ParlaMint3.1-FRESH/parczech.tei.raw/final/ -a /opt/data/data-ParlaMint3.1-FRESH/parczech.tei.ana/final/ > /opt/data/data-ParlaMint3.1-FRESH/run_parczech2parlamint.20230823.log 2>&1 &
matyaskopp commented 1 year ago

Final ParlaMint-CZ 3.1 patches

cd /opt/data/data-ParlaMint3.1-FRESH

edit parczech version

edit listPerson

cp parczech.tei.raw/final/ParCzech-listPerson.xml ParCzech-listPerson.xml.bak

sed -i '/ref=/s/ref="#president.PR.417"/ref="#republic.CZ"/' parczech.tei.raw/final/ParCzech-listPerson.xml
sed -i '/ref=/s/ref="#czechNationalCouncil.CNR6"/ref="#nationalCouncil.CNR"/' parczech.tei.raw/final/ParCzech-listPerson.xml
sed -i '/ana=/s/ana="#czechN\(ationalCouncil.CNR[0-9]\)\.[^" ]*"/ana="#n\1"/' parczech.tei.raw/final/ParCzech-listPerson.xml
vim parczech.tei.raw/final/ParCzech-listPerson.xml

remove (2x)

     <affiliation ref="#parliamentaryGroup.ODS"
                  role="member"
                  from="2013-11-06T00:00:00"
                  to="2017-10-26T23:00:00"
                  ana="#parliamentaryGroup.ODS.1106">
        <roleName xml:lang="cs">Člen předsednictva</roleName>
        <roleName xml:lang="en">Presidium Member</roleName>
     </affiliation>

fix PavelBelobradek.1976 government timespan (change time of member affiliation to corespond with minister timespan)

cp parczech.tei.raw/final/ParCzech-listPerson.xml parczech.tei.ana/final/ParCzech-listPerson.xml

edit listOrg

cp parczech.tei.raw/final/ParCzech-listOrg.xml ParCzech-listOrg.xml.bak

add at the beginning

   <org xml:id="republic.CZ" role="republic">
      <orgName full="yes" xml:lang="cs">Česká republika</orgName>
      <orgName full="yes" xml:lang="en">The Czech Republic</orgName>
      <orgName full="abb">CZ</orgName>
      <event from="1993-01-01">
         <label xml:lang="en">existence</label>
      </event>
   </org>
sed -i '/role=/s/role="boardOfGovernors"/role="boardOfDirectors"/' parczech.tei.raw/final/ParCzech-listOrg.xml
sed -i '/role=/s/role="boardOfTrustees"/role="supervisoryBoard"/' parczech.tei.raw/final/ParCzech-listOrg.xml
sed -i '/role=/s/role="presidium"/role="institution"/' parczech.tei.raw/final/ParCzech-listOrg.xml
sed -i '/role=/s/role="supervisoryCommission"/role="institution"/' parczech.tei.raw/final/ParCzech-listOrg.xml

sed -i 's/chamberOfThePeople/chamberOfPeople/' parczech.tei.raw/final/ParCzech-listOrg.xml
sed -i 's/chamberOfTheNations/chamberOfNations/' parczech.tei.raw/final/ParCzech-listOrg.xml

cp parczech.tei.raw/final/ParCzech-listOrg.xml parczech.tei.ana/final/ParCzech-listOrg.xml

cp ParlaMint/20230823T140053/ParlaMint-CZ/ParlaMint-CZ.TEI/ParlaMint-CZ-listPerson.xml ParlaMint-CZ-listPerson.xml.bak
cp ParlaMint/20230823T140053/ParlaMint-CZ/ParlaMint-CZ.TEI/ParlaMint-CZ-listOrg.xml ParlaMint-CZ-listOrg.xml.bak
cp parczech.tei.raw/final/ParCzech-listPerson.xml ParlaMint-CZ-listPerson.xml
cp parczech.tei.raw/final/ParCzech-listOrg.xml ParlaMint-CZ-listOrg.xml

copy and reformat it to ParlaMint version

xmlstarlet edit --inplace \
                  --insert '/*[not(@xml:id)]' \
                    --type attr -n 'xml:id' \
                    --value "ParlaMint-CZ-listPerson" \
                  --insert '/*[not(@xml:lang)]' \
                    --type attr -n 'xml:lang' \
                    --value "cs" \
                  "ParlaMint-CZ-listPerson.xml"
xmlstarlet edit --inplace \
                  --insert '/*[not(@xml:id)]' \
                    --type attr -n 'xml:id' \
                    --value "ParlaMint-CZ-listOrg" \
                  --insert '/*[not(@xml:lang)]' \
                    --type attr -n 'xml:lang' \
                    --value "cs" \
                  "ParlaMint-CZ-listOrg.xml"

copy to ParlaMint folders

cp ParlaMint-CZ-listPerson.xml ParlaMint/20230823T140053/ParlaMint-CZ/ParlaMint-CZ.TEI/ParlaMint-CZ-listPerson.xml
cp ParlaMint-CZ-listOrg.xml ParlaMint/20230823T140053/ParlaMint-CZ/ParlaMint-CZ.TEI/ParlaMint-CZ-listOrg.xml
cp ParlaMint-CZ-listPerson.xml ParlaMint/20230823T140053/ParlaMint-CZ.ana/ParlaMint-CZ.TEI.ana/ParlaMint-CZ-listPerson.xml
cp ParlaMint-CZ-listOrg.xml ParlaMint/20230823T140053/ParlaMint-CZ.ana/ParlaMint-CZ.TEI.ana/ParlaMint-CZ-listOrg.xml