Open mangiafrangette opened 5 years ago
Versione con liste: Questa ritorna velocemente il grafo. Bisogna però aggiungere shortest_path e controllare che sia corretto.
def do_aut_distance(data, sse, aut):
coauthors_to_do = list()
coauthors_to_do.append(aut)
coauthors_done = set()
current_coauthors = []
coauthors_graph = Graph()
coauthors_graph.add_node(aut)
while coauthors_to_do:
for a in coauthors_to_do:
if a not in coauthors_done:
for dict in sse.data:
if a in dict['authors'].split('; '):
current_coauthors.extend(dict['authors'].split('; '))
current_coauthors.remove(a)
coauthors_to_do.extend(current_coauthors)
counted_coauthors = Counter(current_coauthors).items()
for name, count in counted_coauthors:
coauthors_graph.add_edge(a, name, co_authored_papers=count)
current_coauthors.clear()
coauthors_to_do.remove(a)
coauthors_done.add(a)
else:
coauthors_to_do.remove(a)
return coauthors_graph
Versione ricorsiva
def do_aut_distance(data, sse, aut, dist=1):
coauthors = list()
coauthors_done = set()
coauthors_done.add(aut)
coauthors_graph = Graph()
coauthors_graph.add_node(aut)
for dict in sse.data:
if aut in dict["authors"]:
coauthors.extend(dict["authors"].split("; "))
coauthors.remove(aut)
counted_coauthors = Counter(coauthors).items()
for name, count in counted_coauthors:
if name not in coauthors_done:
coauthors_graph.add_edge(aut, name, co_authored_papers=count, distance=dist)
do_aut_distance(data, sse, name, dist + 1)
else:
return coauthors_graph
Versione con dizionari:
def do_aut_distance(data, sse, aut):
coauthors = defaultdict(list)
coauthors[aut] = []
coauthors_graph = Graph()
coauthors_graph.add_node(aut)
while coauthors:
for a in coauthors:
for dict in sse.data:
if a in dict["authors"].split("; "):
coauthors[a].extend(dict['authors'].split('; '))
coauthors[a].remove(a)
counted_coauthors = Counter(coauthors[a]).items()
for name, count in counted_coauthors:
if name not in coauthors:
coauthors_graph.add_edge(a, name, co_authored_papers=count)
return coauthors_graph
Versione con dizionari integrata con versione di Filippo:
def do_aut_distance(data, sse, aut):
authors = defaultdict(list)
authors[aut] = []
coauthors_graph = Graph()
coauthors_graph.add_node(aut)
coauthor_groups = [article['authors'].split('; ') for article in sse.data]
while authors:
for a in authors:
for group in coauthor_groups:
if a in group:
authors[a].extend(group)
authors[a].remove(a)
counted_coauthors = Counter(authors[a]).items()
for name, count in counted_coauthors:
if name not in authors:
coauthors_graph.add_edge(a, name, co_authored_papers=count)
return coauthors_graph
Versione con liste, shortest_path_length aggiunto: controllare che tutti i nodi abbiano l'informazione della distanza
def do_aut_distance(data, sse, aut):
coauthors_to_do = list()
coauthors_to_do.append(aut)
coauthors_done = set()
current_coauthors = []
coauthors_graph = nx.Graph()
coauthors_graph.add_node(aut)
while coauthors_to_do:
for a in coauthors_to_do:
if a not in coauthors_done:
for dict in sse.data:
if a in dict['authors'].split('; '):
current_coauthors.extend(dict['authors'].split('; '))
current_coauthors.remove(a)
coauthors_to_do.extend(current_coauthors)
counted_coauthors = Counter(current_coauthors).items()
for name, count in counted_coauthors:
coauthors_graph.add_edge(a, name, co_authored_papers=count)
current_coauthors.clear()
coauthors_to_do.remove(a)
coauthors_done.add(a)
number_edges = nx.shortest_path_length(coauthors_graph, source=a, target=aut)
coauthors_graph.add_node(a, distance=number_edges)
else:
coauthors_to_do.remove(a)
return coauthors_graph