mangiafrangette / the_fellowship_of_the_loop

Computational thinking project - DHDK 2018/19
1 stars 1 forks source link

do_distance_aut #3

Open mangiafrangette opened 5 years ago

mangiafrangette commented 5 years ago
def do_aut_distance(data, sse, aut):

    authors_to_do = {aut}
    authors_done = set()
    coauthors_graph = Graph()
    coauthors_graph.add_node(aut)

    author_groups = [article['authors'].split('; ') for article in sse.data]

    while authors_to_do:
        author = authors_to_do.pop()
        authors_done.add(author)
        for group in author_groups:
            if author in group:  # only consider actual coauthors
                for coauthor in group:
                    if coauthor not in authors_done:
                        authors_to_do.add(coauthor)
                        coauthors_graph.add_edge(author, coauthor)
    return coauthors_graph
federicabologna commented 5 years ago

Versione con liste: Questa ritorna velocemente il grafo. Bisogna però aggiungere shortest_path e controllare che sia corretto.

def do_aut_distance(data, sse, aut):
    coauthors_to_do = list()
    coauthors_to_do.append(aut)
    coauthors_done = set()
    current_coauthors = []
    coauthors_graph = Graph()
    coauthors_graph.add_node(aut)

    while coauthors_to_do:
        for a in coauthors_to_do:
            if a not in coauthors_done:
                for dict in sse.data:
                    if a in dict['authors'].split('; '):
                        current_coauthors.extend(dict['authors'].split('; '))
                        current_coauthors.remove(a)
                        coauthors_to_do.extend(current_coauthors)
                counted_coauthors = Counter(current_coauthors).items()
                for name, count in counted_coauthors:
                    coauthors_graph.add_edge(a, name, co_authored_papers=count)
                    current_coauthors.clear()
                coauthors_to_do.remove(a)
                coauthors_done.add(a)
            else:
                coauthors_to_do.remove(a)
    return coauthors_graph

Versione ricorsiva

def do_aut_distance(data, sse, aut, dist=1):
    coauthors = list()
    coauthors_done = set()
    coauthors_done.add(aut)
    coauthors_graph = Graph()
    coauthors_graph.add_node(aut)

    for dict in sse.data:
        if aut in dict["authors"]:
            coauthors.extend(dict["authors"].split("; "))
            coauthors.remove(aut)
    counted_coauthors = Counter(coauthors).items()
    for name, count in counted_coauthors:
        if name not in coauthors_done:
            coauthors_graph.add_edge(aut, name, co_authored_papers=count, distance=dist)
            do_aut_distance(data, sse, name, dist + 1)
        else:
            return coauthors_graph

Versione con dizionari:

def do_aut_distance(data, sse, aut):
    coauthors = defaultdict(list)
    coauthors[aut] = []
    coauthors_graph = Graph()
    coauthors_graph.add_node(aut)

    while coauthors:
        for a in coauthors:
            for dict in sse.data:
                if a in dict["authors"].split("; "):
                    coauthors[a].extend(dict['authors'].split('; '))
                    coauthors[a].remove(a)
                counted_coauthors = Counter(coauthors[a]).items()
                for name, count in counted_coauthors:
                    if name not in coauthors:
                        coauthors_graph.add_edge(a, name, co_authored_papers=count)
    return coauthors_graph

Versione con dizionari integrata con versione di Filippo:

def do_aut_distance(data, sse, aut):
    authors = defaultdict(list)
    authors[aut] = []
    coauthors_graph = Graph()
    coauthors_graph.add_node(aut)

    coauthor_groups = [article['authors'].split('; ') for article in sse.data]

    while authors:
        for a in authors:
            for group in coauthor_groups:
                if a in group:
                    authors[a].extend(group)
                    authors[a].remove(a)
                counted_coauthors = Counter(authors[a]).items()
                for name, count in counted_coauthors:
                    if name not in authors:
                        coauthors_graph.add_edge(a, name, co_authored_papers=count)

    return coauthors_graph
federicabologna commented 5 years ago

Versione con liste, shortest_path_length aggiunto: controllare che tutti i nodi abbiano l'informazione della distanza

def do_aut_distance(data, sse, aut):
    coauthors_to_do = list()
    coauthors_to_do.append(aut)
    coauthors_done = set()
    current_coauthors = []
    coauthors_graph = nx.Graph()
    coauthors_graph.add_node(aut)

    while coauthors_to_do:
        for a in coauthors_to_do:
            if a not in coauthors_done:
                for dict in sse.data:
                    if a in dict['authors'].split('; '):
                        current_coauthors.extend(dict['authors'].split('; '))
                        current_coauthors.remove(a)
                        coauthors_to_do.extend(current_coauthors)
                counted_coauthors = Counter(current_coauthors).items()
                for name, count in counted_coauthors:
                    coauthors_graph.add_edge(a, name, co_authored_papers=count)
                    current_coauthors.clear()
                coauthors_to_do.remove(a)
                coauthors_done.add(a)
                number_edges = nx.shortest_path_length(coauthors_graph, source=a, target=aut)
                coauthors_graph.add_node(a, distance=number_edges)
            else:
                coauthors_to_do.remove(a)
    return coauthors_graph