bnosac / udpipe

R package for Tokenization, Parts of Speech Tagging, Lemmatization and Dependency Parsing Based on the UDPipe Natural Language Processing Toolkit
https://bnosac.github.io/udpipe/en
Mozilla Public License 2.0
209 stars 33 forks source link

plot dependency parsing #59

Closed jwijffels closed 4 years ago

jwijffels commented 5 years ago
library(udpipe)
library(igraph)
library(ggraph)
library(ggplot2)

plot_annotation <- function(x, size = 3){
  stopifnot(is.data.frame(x) & all(c("sentence_id", "token_id", "head_token_id", "dep_rel",
                                     "token_id", "token", "lemma", "upos", "xpos", "feats") %in% colnames(x)))
  x <- x[!is.na(x$head_token_id), ]
  x <- x[x$sentence_id %in% min(x$sentence_id), ]
  edges <- x[x$head_token_id != 0, c("token_id", "head_token_id", "dep_rel")]
  edges$label <- edges$dep_rel
  g <- graph_from_data_frame(edges,
                             vertices = x[, c("token_id", "token", "lemma", "upos", "xpos", "feats")],
                             directed = TRUE)

  ggraph(g, layout = "linear") +
    geom_edge_arc(ggplot2::aes(label = dep_rel, vjust = -0.20),
                  arrow = grid::arrow(length = unit(4, 'mm'), ends = "last", type = "closed"),
                  end_cap = ggraph::label_rect("wordswordswords"),
                  label_colour = "red", check_overlap = TRUE, label_size = size) +
    geom_node_label(ggplot2::aes(label = token), col = "darkgreen", size = size, fontface = "bold") +
    geom_node_text(ggplot2::aes(label = upos), nudge_y = -0.35, size = size) +
    theme_graph(base_family = "Arial Narrow") +
    labs(title = "udpipe output", subtitle = "tokenisation, parts of speech tagging & dependency relations")
}

x <- udpipe("The economy is weak but the outlook is bright", "english")
plot_annotation(x, size = 4)
x <- udpipe("Rats are various medium-sized long-tailed animals", "english")
plot_annotation(x, size = 3)
x <- udpipe("Wir gehen zum kino", "german")
plot_annotation(x, size = 5)
x <- udpipe("UDPipe provides tokenization, tagging, lemmatization and dependency parsing of raw text", "english")
plot_annotation(x, size = 3)
x <- udpipe("His speech about marshmallows in New York is utter bullshit", "english")
plot_annotation(x, size = 4)

depenceny-parsing-example2

jwijffels commented 4 years ago

Closing. Now available in package https://github.com/bnosac/textplot.