Open sedmar opened 2 years ago
Ahoj, díky za PR!
diff --git a/data/ares/main.py b/data/ares/main.py
index 8ff002f..9a96cff 100644
--- a/data/ares/main.py
+++ b/data/ares/main.py
@@ -87,10 +87,13 @@ def remote_data(partial):
def main(outdir: str, partial: bool = False):
with open(os.path.join(outdir, "firmy.csv"), "w", encoding="utf8") as ud, open(
os.path.join(outdir, "fosoby.csv"), "w", encoding="utf8"
- ) as fo, open(os.path.join(outdir, "posoby.csv"), "w", encoding="utf8") as po:
+ ) as fo, open(os.path.join(outdir, "posoby.csv"), "w", encoding="utf8") as po, open(
+ os.path.join(outdir, "cinnosti.csv"), "w", encoding="utf8"
+ ) as co:
udc = csv.writer(ud, lineterminator="\n")
foc = csv.writer(fo, lineterminator="\n")
poc = csv.writer(po, lineterminator="\n")
+ coc = csv.writer(co, lineterminator="\n")
cols = [
"zdroj",
@@ -133,6 +136,13 @@ def main(outdir: str, partial: bool = False):
"adresa",
]
)
+ coc.writerow(
+ [
+ "ico",
+ "typ_cinnosti",
+ "text",
+ ]
+ )
for rw, (el, fl) in enumerate(remote_data(partial)):
et = lxml.etree.fromstring(fl)
@@ -203,6 +213,13 @@ def main(outdir: str, partial: bool = False):
for j in org["posoby"]:
poc.writerow(j)
+ for txt in vypis.iterfind(
+ ".//are:Cinnosti/*/are:Text", namespaces=et.nsmap
+ ):
+ text = txt.text.strip()
+ ctype = txt.getparent().tag.rpartition("}")[-1]
+ coc.writerow([ico, ctype, text])
+
if __name__ == "__main__":
main(".")
Celý soubor bez diffu je tady: https://gist.github.com/kokes/c2f155c26a03ac93a50fe7e52e7a15a4
Co myslíš?
Ahoj, sorry za zpozdeni, ted jsem zkousel to tvoje navrhovane reseni a libi se mi vic, z pohledu datovych struktur je to asi i cistsi nez to moje.
Pridani schopnosti zpracovat pri parsovani ARESu pole predmet cinnost a predmet podnikani. Do vystupniho csv je pridan sloupec "cinnosti" kde v kazdem radku je json dictionary s klici PredmetPodnikaniText a PredmetCinnostiText. Hodnoty u kazdeho klice jsou strednikem oddelene predmety cinnosti pripadne predmety podnikani (muze byt 0 nebo N).