iangow / se_links

Database links for StreetEvents
1 stars 0 forks source link

Check cases with different PERMCOs #2

Closed iangow closed 4 years ago

iangow commented 4 years ago
library(dplyr, warn.conflicts = FALSE)
library(DBI)

pg <- dbConnect(RPostgres::Postgres())

dbExecute(pg, "SET search_path TO streetevents")

crsp_link <- tbl(pg, "crsp_link")
company_ids <- tbl(pg, "company_ids")
calls <- tbl(pg, "calls")

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

permcos <- 
    stocknames %>%
    select(permno, permco, comnam) %>%
    distinct() %>%
    group_by(permno, permco) %>%
    arrange(comnam) %>%
    summarize(comnams = array_agg(comnam)) 

crsp_link_permco <- 
    crsp_link %>%
    inner_join(permcos, by = "permno")

crsp_link_auto <-
    company_ids %>%
    mutate(ncusip = substr(cusip, 1, 8)) %>%
    select(file_name, last_update, ncusip) %>%
    inner_join(stocknames, by = "ncusip") %>%
    select(file_name, last_update, permno) %>%
    distinct() %>%
    compute()

merged <-
    crsp_link_auto %>%
    inner_join(permcos, by = "permno") %>%
    inner_join(crsp_link_permco, by = "file_name") %>%
    mutate(same_permno = permno.x == permno.y,
           same_permco = permco.x == permco.y) %>%
    compute()

merged %>% 
    count(same_permco, match_type_desc) %>%
    arrange(desc(n))

merged %>% 
    filter(!same_permco | is.na(same_permco)) %>%
    count(match_type_desc) %>%
    arrange(desc(n))

merged %>% 
    filter(same_permno) %>%
    count(match_type_desc) %>%
    arrange(desc(n))

event_titles <-
    calls %>%
    select(file_name, last_update, event_title)

diff_permcos <-
    merged %>%
    filter(match_type == 1L, !same_permco) %>%
    select(-starts_with("match_type"), -starts_with("same_")) %>%
    inner_join(event_titles, by = c("file_name", "last_update")) %>%
    arrange(permco.x, permco.y) %>%
    collect() %>%
    write_csv("diff_permcos.csv")