ropensci / openalexR

Getting bibliographic records from OpenAlex
https://docs.ropensci.org/openalexR/
Other
98 stars 21 forks source link

Another oa_snowball vignette #119

Closed trangdata closed 2 weeks ago

trangdata commented 1 year ago

An example use case contributed by @rkrug.

Note: the "step backward" is only based on 5 articles citing W1964141474.

library(openalexR)
library(ggraph)
#> Loading required package: ggplot2
library(tidygraph)
#> 
#> Attaching package: 'tidygraph'
#> The following object is masked from 'package:stats':
#> 
#>     filter
library(tidyverse)

snowball_docs <- oa_snowball(
  identifier = "W1964141474",
  verbose = TRUE
)
#> Requesting url: https://api.openalex.org/works/W1964141474
#> Collecting all documents citing the target papers...
#> Requesting url: https://api.openalex.org/works?filter=cites%3AW1964141474
#> Getting 1 page of results with a total of 177 records...
#> Collecting all documents cited by the target papers...
#> Requesting url: https://api.openalex.org/works?filter=cited_by%3AW1964141474
#> Getting 1 page of results with a total of 61 records...

next_works <- snowball_docs$edges |> 
  filter(to == "W1964141474") |> 
  pull(from) |> 
  head(5) # use only first 5 for demo

snowball_next <- oa_snowball(
  identifier = next_works,
  verbose = TRUE
)
#> Requesting url: https://api.openalex.org/works?filter=openalex_id%3AW2200229034%7CW2614350785%7CW2256910178%7CW2744994535%7CW2144288786
#> Getting 1 page of results with a total of 5 records...
#> Collecting all documents citing the target papers...
#> Requesting url: https://api.openalex.org/works?filter=cites%3AW2200229034%7CW2614350785%7CW2256910178%7CW2744994535%7CW2144288786
#> Getting 4 pages of results with a total of 628 records...
#> Collecting all documents cited by the target papers...
#> Requesting url: https://api.openalex.org/works?filter=cited_by%3AW2200229034%7CW2614350785%7CW2256910178%7CW2744994535%7CW2144288786
#> Getting 3 pages of results with a total of 596 records...

filter_edges <- snowball_next$edges |> 
  filter(to %in% next_works)

kept_works <- unique(c(filter_edges$from, filter_edges$to))
filter_nodes <- snowball_next$nodes |> 
  filter(id %in% kept_works)

snowball_docs$nodes <- bind_rows(
  snowball_docs$nodes,
  filter_nodes
) |> 
  distinct(id, .keep_all = TRUE)

snowball_docs$edges <- bind_rows(
  snowball_docs$edges,
  filter_edges
)

ggraph(graph = as_tbl_graph(snowball_docs), layout = "stress") +
  geom_edge_link(aes(alpha = after_stat(index)), show.legend = FALSE) +
  geom_node_point(aes(fill = oa_input, size = cited_by_count), shape = 21, color = "white") +
  geom_node_label(aes(filter = oa_input, label = id), nudge_y = 0.2, size = 3) +
  scale_edge_width(range = c(0.1, 1.5), guide = "none") +
  scale_size(range = c(3, 10), guide = "none") +
  scale_fill_manual(values = c("#a3ad62", "#d46780"), na.value = "grey", name = "") +
  theme_graph() +
  theme(
    plot.background = element_rect(fill = "transparent", colour = NA),
    panel.background = element_rect(fill = "transparent", colour = NA),
    legend.position = "bottom"
  ) +
  guides(fill = "none")

Created on 2023-06-28 with reprex v2.0.2

trangdata commented 2 weeks ago

Closing. Now tracking in #284.