Closed iangow closed 4 years ago
library(dplyr, warn.conflicts = FALSE) library(DBI) pg <- dbConnect(RPostgres::Postgres()) dbExecute(pg, "SET search_path TO streetevents") crsp_link <- tbl(pg, "crsp_link") company_ids <- tbl(pg, "company_ids") calls <- tbl(pg, "calls") stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames")) permcos <- stocknames %>% select(permno, permco, comnam) %>% distinct() %>% group_by(permno, permco) %>% arrange(comnam) %>% summarize(comnams = array_agg(comnam)) crsp_link_permco <- crsp_link %>% inner_join(permcos, by = "permno") crsp_link_auto <- company_ids %>% mutate(ncusip = substr(cusip, 1, 8)) %>% select(file_name, last_update, ncusip) %>% inner_join(stocknames, by = "ncusip") %>% select(file_name, last_update, permno) %>% distinct() %>% compute() merged <- crsp_link_auto %>% inner_join(permcos, by = "permno") %>% inner_join(crsp_link_permco, by = "file_name") %>% mutate(same_permno = permno.x == permno.y, same_permco = permco.x == permco.y) %>% compute() merged %>% count(same_permco, match_type_desc) %>% arrange(desc(n)) merged %>% filter(!same_permco | is.na(same_permco)) %>% count(match_type_desc) %>% arrange(desc(n)) merged %>% filter(same_permno) %>% count(match_type_desc) %>% arrange(desc(n)) event_titles <- calls %>% select(file_name, last_update, event_title) diff_permcos <- merged %>% filter(match_type == 1L, !same_permco) %>% select(-starts_with("match_type"), -starts_with("same_")) %>% inner_join(event_titles, by = c("file_name", "last_update")) %>% arrange(permco.x, permco.y) %>% collect() %>% write_csv("diff_permcos.csv")