Closed bhuffaker closed 1 year ago
add the following script as an additional alternative solution to: https://catalog.caida.org/recipe/getting_an_asns_name_country_organization
from datetime import datetime import gzip import os import pandas as pd import sys filename = "20230401.as-org2info.txt.gz" date = datetime.strptime(filename, '%Y%m%d.as-org2info.txt.gz') with gzip.open(filename, "r") as fin: content = fin.readlines() index_org = [x for x in range(len(content)) if "# format:org_id|changed|org_name|country|source" in str(content[x])][0] index_asn = [x for x in range(len(content)) if "# format:aut|changed|aut_name|org_id|opaque_id|source" in str(content[x])][0] print(index_org, index_asn, index_asn - index_org - 2) org_df = pd.read_csv(filename, delimiter="|", skiprows=index_org, nrows=index_asn - index_org - 2).rename(columns={"# format:org_id": "org_id"})[["org_id", "org_name", "country", "source"]] asn_df = pd.read_csv(filename, delimiter="|", skiprows=index_asn).rename(columns={"# format:aut": "asn", "aut_name": "asn_name"})[["asn", "asn_name", "org_id"]] asn_df.merge(org_df, on="org_id").to_csv(str(date.date())+'_org2info.csv', index=False)
add the following script as an additional alternative solution to: https://catalog.caida.org/recipe/getting_an_asns_name_country_organization