iangow / se_links

Database links for StreetEvents
1 stars 0 forks source link

Check additional unclear cases #8

Closed iangow closed 3 years ago

iangow commented 4 years ago

This is an extension of #3 for an additional 18 cases uncovered when I removed some dubious code. No need to do these all in one day!

Here is the code I used to make the additional rows:

library(dplyr, warn.conflicts = FALSE)
library(DBI)

pg <- dbConnect(RPostgres::Postgres())

dbExecute(pg, "SET search_path TO streetevents")
dbExecute(pg, "SET work_mem = '8GB'")

calls <- tbl(pg, "calls")
crsp_link <- tbl(pg, "crsp_link")
crsp_link_new <- tbl(pg, "crsp_link_new")
bad_matches <- tbl(pg, "bad_matches")

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

event_titles <-
    calls %>%
    select(file_name, last_update, event_title) %>%
    compute()

permcos <- 
    stocknames %>%
    select(permno, permco, comnam) %>%
    distinct() %>%
    group_by(permno, permco) %>%
    arrange(comnam) %>%
    summarize(comnams = array_agg(comnam)) %>%
    compute()

crsp_link_permco <- 
    crsp_link %>%
    inner_join(permcos, by = "permno") %>%
    compute()

crsp_link_new_permno <-
    crsp_link_new %>%
    inner_join(permcos, by = "permno") %>%
    compute()

merged <-
    crsp_link_new_permno %>%
    right_join(crsp_link_permco, by = "file_name") %>%
    mutate(same_permno = permno.x == permno.y,
           same_permco = permco.x == permco.y) %>%
    inner_join(event_titles, by = c("file_name", "last_update")) %>%
    compute()

merged %>% 
    count(same_permco, match_type_desc) %>%
    arrange(desc(n))

merged %>% 
    filter(!same_permco | is.na(same_permco)) %>%
    count(match_type_desc) %>%
    arrange(desc(n))

merged %>% 
    filter(same_permno) %>%
    count(match_type_desc) %>%
    arrange(desc(n))

diff_permcos <-
    merged %>%
    filter(!same_permco | is.na(permco.x)) %>%
    select(-starts_with("match_type"), -starts_with("same_")) %>%
    arrange(permco.x, permco.y) 

diff_permcos %>% 
    collect() %>%
    write_csv("diff_permcos.csv")
iangow commented 4 years ago

Confirmation that we have 18 cases here.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
    diff_permcos %>%
    filter(is.na(correct))

to_investigate %>% 
    select(permno.x, permno.y) %>% 
    distinct() %>% 
    count()
#> # A tibble: 1 x 1
#>       n
#>   <int>
#> 1    18

Created on 2020-01-08 by the reprex package (v0.3.0)

iangow commented 4 years ago

Case 1/18. Here most cases are clearly "X" (LKQ Corp), but some relate to "Keystone Automotive" (PERMNO 83669).

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

diff_permcos %>% count(correct)
#> # A tibble: 4 x 2
#>   correct     n
#>   <chr>   <int>
#> 1 X         211
#> 2 Y        1733
#> 3 Z          30
#> 4 <NA>      244

to_investigate <-
    diff_permcos %>%
    filter(is.na(correct))

sample <-
    to_investigate %>%
    group_by(permno.x, permno.y) %>%
    count() %>%
    ungroup() %>%
    top_n(1, wt=n) 

sample_permcos <-
    to_investigate %>%
    semi_join(sample) %>%
    select(starts_with("permco")) %>%
    distinct() %>%
    .[1, ] %>% 
    as.integer()
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
    filter(permco %in% sample_permcos) %>%
    select(permno, namedt, nameenddt, ncusip, comnam, ticker) %>% 
    print(n=Inf)
#> # Source:   lazy query [?? x 6]
#> # Database: postgres [igow@10.101.13.99:5432/crsp]
#>   permno namedt     nameenddt  ncusip   comnam                 ticker
#>    <int> <date>     <date>     <chr>    <chr>                  <chr> 
#> 1  21178 1939-12-09 1962-07-01 <NA>     LOCKHEED AIRCRAFT CORP <NA>  
#> 2  21178 1962-07-02 1968-01-01 <NA>     LOCKHEED AIRCRAFT CORP LK    
#> 3  21178 1968-01-02 1977-10-02 53982110 LOCKHEED AIRCRAFT CORP LK    
#> 4  21178 1977-10-03 1995-03-15 53982110 LOCKHEED CORP          LK    
#> 5  21178 1995-03-16 2018-12-31 53983010 LOCKHEED MARTIN CORP   LMT   
#> 6  89866 2003-10-03 2012-05-21 50188920 L K Q CORP             LKQX  
#> 7  89866 2012-05-22 2018-12-31 50188920 L K Q CORP             LKQ

Created on 2020-01-08 by the reprex package (v0.3.0)

iangow commented 4 years ago

Case 2/18. Clearly bad ticker matches (ticker PPL was reused).

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

diff_permcos %>% count(correct)
#> # A tibble: 4 x 2
#>   correct     n
#>   <chr>   <int>
#> 1 X         270
#> 2 Y        1733
#> 3 Z          46
#> 4 <NA>      169

to_investigate <-
    diff_permcos %>%
    filter(is.na(correct))

sample <-
    to_investigate %>%
    group_by(permno.x, permno.y) %>%
    count() %>%
    ungroup() %>%
    top_n(1, wt=n) 

sample_permcos <-
    to_investigate %>%
    semi_join(sample) %>%
    select(starts_with("permco")) %>%
    distinct() %>%
    .[1, ] %>% 
    as.integer()
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
    filter(permco %in% sample_permcos) %>%
    select(permno, namedt, nameenddt, ncusip, comnam, ticker) %>% 
    print(n=Inf)
#> # Source:   lazy query [?? x 6]
#> # Database: postgres [igow@10.101.13.99:5432/crsp]
#>   permno namedt     nameenddt  ncusip   comnam                       ticker
#>    <int> <date>     <date>     <chr>    <chr>                        <chr> 
#> 1  22517 1945-12-03 1945-12-30 <NA>     PENNSYLVANIA POWER & LIGHT … <NA>  
#> 2  22517 1945-12-31 1968-01-01 <NA>     PENNSYLVANIA POWER & LIGHT … PPL   
#> 3  22517 1968-01-02 1997-09-11 70905110 PENNSYLVANIA POWER & LIGHT … PPL   
#> 4  22517 1997-09-12 2000-02-14 69349910 P P & L RESOURCES INC        PPL   
#> 5  22517 2000-02-15 2018-12-31 69351T10 P P L CORP                   PPL   
#> 6  35051 1962-07-02 1968-01-01 <NA>     PALL CORP                    PPL   
#> 7  35051 1968-01-02 1972-11-30 69642910 PALL CORP                    PPL   
#> 8  35051 1972-12-01 1992-10-04 69642930 PALL CORP                    PLL   
#> 9  35051 1992-10-05 2015-08-28 69642930 PALL CORP                    PLL

Created on 2020-01-08 by the reprex package (v0.3.0)

iangow commented 4 years ago

Note that for Case 1/18, etc., I did the following check, but have not posted it here to keep things simple.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
    diff_permcos %>%
    filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(16529, 61955)

to_investigate %>% 
    filter(permno.x %in% permno_list,
           permno.y %in% permno_list) %>%
    select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

iangow commented 4 years ago

Case 3/18 is Prologis/AMB again (with some new Catellus). Same resolution as before.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
    diff_permcos %>%
    filter(is.na(correct))

sample <-
    to_investigate %>%
    group_by(permno.x, permno.y) %>%
    count() %>%
    ungroup() %>%
    top_n(1, wt=n) 

sample_permcos <-
    to_investigate %>%
    semi_join(sample) %>%
    select(starts_with("permco")) %>%
    distinct() %>%
    .[1, ] %>% 
    as.integer()
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
    filter(permco %in% sample_permcos) %>%
    select(permno, namedt, nameenddt, ncusip, comnam, ticker) %>% 
    print(n=Inf)
#> # Source:   lazy query [?? x 6]
#> # Database: postgres [igow@10.101.13.99:5432/crsp]
#>   permno namedt     nameenddt  ncusip   comnam                       ticker
#>    <int> <date>     <date>     <chr>    <chr>                        <chr> 
#> 1  80406 1994-03-31 1998-06-30 81413810 SECURITY CAPITAL INDUSTRIAL… SCN   
#> 2  80406 1998-07-01 2002-06-27 74341010 PROLOGIS TRUST               PLD   
#> 3  80406 2002-06-28 2011-06-02 74341010 PROLOGIS                     PLD   
#> 4  85592 1997-11-21 2011-06-02 00163T10 A M B PROPERTY CORP          AMB   
#> 5  85592 2011-06-03 2018-12-31 74340W10 PROLOGIS INC                 PLD

Created on 2020-01-08 by the reprex package (v0.3.0)

iangow commented 4 years ago

Case 4/18. Simple date-split case.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
    diff_permcos %>%
    filter(is.na(correct))

sample <-
    to_investigate %>%
    group_by(permno.x, permno.y) %>%
    count() %>%
    ungroup() %>%
    top_n(1, wt=n) 

sample_permcos <-
    to_investigate %>%
    semi_join(sample) %>%
    select(starts_with("permco")) %>%
    distinct() %>%
    .[1, ] %>% 
    as.integer()
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
    filter(permco %in% sample_permcos) %>%
    select(permno, namedt, nameenddt, ncusip, comnam, ticker) %>% 
    print(n=Inf)
#> # Source:   lazy query [?? x 6]
#> # Database: postgres [igow@10.101.13.99:5432/crsp]
#>   permno namedt     nameenddt  ncusip   comnam             ticker
#>    <int> <date>     <date>     <chr>    <chr>              <chr> 
#> 1  16112 2016-06-14 2018-12-31 92852W20 VIVEVE MEDICAL INC VIVE  
#> 2  77568 1992-03-03 1992-09-16 69341D10 P L C SYSTEMS INC  PLCSF 
#> 3  77568 1992-09-17 2008-11-07 69341D10 P L C SYSTEMS INC  PLC

Created on 2020-01-08 by the reprex package (v0.3.0)

iangow commented 4 years ago

Case 5/18: Additional observations on an earlier case. A bit more complicated, but straightforward to fix.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
    diff_permcos %>%
    filter(is.na(correct))

sample <-
    to_investigate %>%
    group_by(permno.x, permno.y) %>%
    count() %>%
    ungroup() %>%
    top_n(1, wt=n) 

sample_permcos <-
    to_investigate %>%
    semi_join(sample) %>%
    select(starts_with("permco")) %>%
    distinct() %>%
    .[1, ] %>% 
    as.integer()
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
    filter(permco %in% sample_permcos) %>%
    select(permno, namedt, nameenddt, ncusip, comnam, ticker) %>% 
    print(n=Inf)
#> # Source:   lazy query [?? x 6]
#> # Database: postgres [igow@10.101.13.99:5432/crsp]
#>   permno namedt     nameenddt  ncusip   comnam                       ticker
#>    <int> <date>     <date>     <chr>    <chr>                        <chr> 
#> 1  83583 1996-05-21 2004-11-30 94973H10 WELLPOINT HEALTH NETWRKS IN… WLP   
#> 2  89179 2001-10-30 2004-11-30 03674B10 ANTHEM INC                   ATH   
#> 3  89179 2004-12-01 2014-12-02 94973V10 WELLPOINT INC                WLP   
#> 4  89179 2014-12-03 2018-12-31 03675210 ANTHEM INC                   ANTM

Created on 2020-01-08 by the reprex package (v0.3.0)

iangow commented 4 years ago

@Yvonne-Han So the first four didn't take too long. Note that I have tweaked the code a bit here (now correct is simply NA rather than DN for the problematic cases).

Yvonne-Han commented 4 years ago

@Yvonne-Han So the first four didn't take too long. Note that I have tweaked the code a bit here (now correct is simply NA rather than DN for the problematic cases).

You actually did 5 instead of 4 for me (so I've updated your count in the comments). Confirmed that there are 13 left.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

to_investigate %>% 
  select(permno.x, permno.y) %>% 
  distinct() %>% 
  count()
#> # A tibble: 1 x 1
#>       n
#>   <int>
#> 1    13

Created on 2020-01-09 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 6/18: An old one involving M&A and change of names. This one is a little bit complicated as 3 permnos are involved. Rayovac Corp used the 85625 permno from 1997-11-21 to 2005-05-01, changed their name (and ticker) to Spectrum Brands from 2005-05-02 to 2008-12-19 (permno was still 85625). Spectrum Brands changed their permno and used 93295 from 2010-03-18 to 2018-07-13. Zapata Corp used the 38295 permno from 2001-01-13 to 2009-12-23, and change the name to Harbinger Group from 2009-12-24 to 2015-03-10 (permno was still 38295). Later, it also changed its name to HRG group from 2015-03-11 to 2018-07-15, and merged with Spectrum from 2018-07-16 and onwards (changed the name to Spectrum too).

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Auto-refreshing stale OAuth token.
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>    permno permco namedt     nameenddt  ncusip   comnam               ticker
#>     <int>  <int> <date>     <date>     <chr>    <chr>                <chr> 
#>  1  38295  21948 1962-07-02 1968-01-01 <NA>     ZAPATA OFF SHORE CO  ZOS   
#>  2  38295  21948 1968-01-02 1968-04-14 98911010 ZAPATA OFF SHORE CO  ZOS   
#>  3  38295  21948 1968-04-15 1968-11-24 98911010 ZAPATA OFF SHORE CO  ZOS   
#>  4  38295  21948 1968-11-25 1972-02-14 98910210 ZAPATA NORNESS INC   ZOS   
#>  5  38295  21948 1972-02-15 1994-05-02 98907010 ZAPATA CORP          ZOS   
#>  6  38295  21948 1994-05-03 1996-02-14 98907050 ZAPATA CORP          ZOS   
#>  7  38295  21948 1996-02-15 2001-01-30 98907050 ZAPATA CORP          ZAP   
#>  8  85625  32162 1997-11-21 2005-05-01 75508110 RAYOVAC CORP         ROV   
#>  9  38295  21948 2001-01-31 2009-12-23 98907060 ZAPATA CORP          ZAP   
#> 10  85625  32162 2005-05-02 2008-12-19 84762L10 SPECTRUM BRANDS INC  SPC   
#> 11  38295  21948 2009-12-24 2015-03-10 41146A10 HARBINGER GROUP INC  HRG   
#> 12  93295  32162 2010-03-18 2010-06-16 84762L20 SPECTRUM BRANDS INC  SPB   
#> 13  93295  32162 2010-06-17 2018-07-13 84763R10 SPECTRUM BRANDS HOL… SPB   
#> 14  38295  21948 2015-03-11 2018-07-15 40434J10 H R G GROUP INC      HRG   
#> 15  38295  21948 2018-07-16 2018-12-31 84790A10 SPECTRUM BRANDS HOL… SPB

Created on 2020-01-09 by the reprex package (v0.3.0)

All 13 affected calls were held before 2005-05-01 by Rayovac, so we should use the original permno for Rayovac. The 85625 was neither permno.x nor permno.y, so I've marked it as Z and specified 85625 in the permno.z column. After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(38295L, 93295L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-09 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 7/18: I've handled this one yesterday too. See below. Thermo used the 62092 permno from 1980-08-27 to 2018-12-31, and changed their comnam/ticker slightly during the period. Invitrogen used the 86725 permno from 1999-02-26 to 2014-02-03, and changed their comnam to Life Tech on 2008-11-24. Google told me that

Life Technologies was acquired by Thermo Fisher Scientific in Jan 2014.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  62092   4557 1972-12-14 1980-08-26 88355610 THERMO ELECTRON CORP  TRMO  
#> 2  62092   4557 1980-08-27 2006-11-09 88355610 THERMO ELECTRON CORP  TMO   
#> 3  86725  16425 1999-02-26 2008-11-23 46185R10 INVITROGEN CORP       IVGN  
#> 4  62092   4557 2006-11-10 2018-12-31 88355610 THERMO FISHER SCIENT… TMO   
#> 5  86725  16425 2008-11-24 2014-02-03 53217V10 LIFE TECHNOLOGIES CO… LIFE

Created on 2020-01-09 by the reprex package (v0.3.0)

OK, this one seems a little bit complicated because the event title was related to Biosource and Bioreliance, rather than the comnams as listed above. Google told me that,

In 2004, BioReliance becomes a subsidiary of Invitrogen, but later was acquired by Avista Capital Partners in 2007 (divestiture).

In July 2005 Invitrogen Corp (now Thermo Fisher Scientific) acquired BioSource International. (The first joint announcement was on 2005-07-26).

I've marked these 9 cases (line 1976 to line 1984) as IDG_TO_CHECK because I believe both permno.x and permno.y are incorrect. For these earnings calls related to BioSource and BioReliance, they were all held before the M&A (and before the joint announcement), so they should be linked to their original permnos.

After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(62092L, 86725L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-09 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 8/18: This one was handled yesterday before. A series of M&As with inconsistent records.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  85304  15631 1997-08-05 2005-01-06 67574410 OCULAR SCIENCES INC   OCLR  
#> 2  88149  37145 2000-04-13 2004-09-09 09856Q10 BOOKHAM TECHNOLOGY P… BKHM  
#> 3  90354  37145 2004-09-20 2009-04-27 09856E10 BOOKHAM INC           BKHM  
#> 4  90354  37145 2009-04-28 2010-04-29 67555N10 OCLARO INC            OCLR  
#> 5  90354  37145 2010-04-30 2018-12-07 67555N20 OCLARO INC            OCLR

Created on 2020-01-09 by the reprex package (v0.3.0)

The 8 calls were held by Bookham between 2002-2004, but there was both permno.x and permno.y don't match with Bookham's permno during that period (should be 88149). I've marked them as Z and put 88149 in the permno.z column.

After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(85304L, 90354L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-09 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 9/18: A relatively complicated one. Involves M&A and name changes. Interland used permno 75993 from 2001-08-07 yo 2006-03-19, and changed its comnam to Web Com on 2006-03-20. Website Pros used permno 90984 from 2005-11-02 to 2008-06-08. It merged with Web Com on 2008-06-09 and changed its name a couple of times after that. The permno was always 90984.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  75993  10311 1989-09-25 1995-04-09 98943210 ZEOS INTERNATIONAL L… ZEOS  
#> 2  75993  10311 1995-04-10 2001-08-06 59510010 MICRON ELECTRONICS I… MUEI  
#> 3  75993  10311 2001-08-07 2003-08-03 45872710 INTERLAND INC NEW     INLD  
#> 4  75993  10311 2003-08-04 2006-03-19 45872720 INTERLAND INC NEW     INLD  
#> 5  90984  48782 2005-11-02 2008-06-08 94769V10 WEBSITE PROS INC      WSPI  
#> 6  75993  10311 2006-03-20 2007-09-28 94732Q10 WEB COM INC           WWWW  
#> 7  90984  48782 2008-06-09 2008-10-26 94769V10 WEBSITE PROS INC      WWWW  
#> 8  90984  48782 2008-10-27 2015-11-09 94733A10 WEB COM GROUP INC     WWWW  
#> 9  90984  48782 2015-11-10 2018-10-10 94733A10 WEB COM GROUP INC     WEB

Created on 2020-01-09 by the reprex package (v0.3.0)

The 6 affected calls were held by Website Pros, Inc. between 2005-11-02 and 2008-10-26. This relates to two periods, but the permno was always 90984, so I've marked them as X (using permno 90984).

After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(90984L, 75993L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-09 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 10/18: Caesars used the 86448 permno before 2005-06-13, and started to use the 13267 permno after 2012-02-08.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  86447  34953 1999-01-04 2004-01-05 70069010 PARK PLACE ENTERTAIN… PPE   
#> 2  86447  34953 2004-01-06 2005-06-13 12768710 CAESARS ENTERTAINMEN… CZR   
#> 3  13267  53998 2012-02-08 2018-12-31 12768610 CAESARS ENTERTAINMEN… CZR

Created on 2020-01-09 by the reprex package (v0.3.0)

The company changed their name from Harrah's to Caesar some time in the history, so the event_title was actually referring to Caesar. The 6 affected calls were held between 2004-01-06 and 2005-06-13, so I've marked them all as Y (using the 86447 permno). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(86447L, 13267L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-09 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 11/18: There seems to be an overlap in the time frame.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  80399  30362 1994-03-10 2018-12-31 55438210 MACERICH CO           MAC   
#> 2  14177  54569 2013-09-09 2018-12-31 57538510 MASONITE INTERNATION… DOOR

Created on 2020-01-09 by the reprex package (v0.3.0)

The 6 affected calls were held by Masonite during 2007-2008 and 2012, but there was no records for Masonite before 2013-09-09. I've googled this and it seems that Masonite went bankrupt in 2009 and went public again in 2013 after 4 years. I've marked these as IDG_TO_CHECK as I can't find the permno for Masonite before 2013 and I don't think it is related to Macerich.

After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filterfilter(is.na(correct))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(80399L, 14177L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-09 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 12/18: Scansoft Inc used the 82759 permno from 1993-03-03 to 2005-10-17, while Nuance Communications used the 88180 permno from 2000-04-13 to 2005-09-16. On 2005-10-18, Scansoft Inc merged with Nuance Communications and started to use the 82759 permno. The merged comnam is always Nuance Communications, but the ticker changed on 2005-11-20.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  82759  14205 1995-12-11 1999-03-02 92830V10 VISIONEER INC         VSNR  
#> 2  82759  14205 1999-03-03 2005-10-17 80603P10 SCANSOFT INC          SSFT  
#> 3  88180  37174 2000-04-13 2005-09-16 66996710 NUANCE COMMUNICATIONS NUAN  
#> 4  82759  14205 2005-10-18 2005-11-20 67020Y10 NUANCE COMMUNICATION… SSFT  
#> 5  82759  14205 2005-11-21 2018-12-31 67020Y10 NUANCE COMMUNICATION… NUAN

Created on 2020-01-09 by the reprex package (v0.3.0)

2 calls were held by Scansoft during 2004-2005, so I've marked them as X (using the 82759 permno). One call was a joint announcement by Scansoft and Nuance regarding the M&A, and I've marked it under the Scansoft permno (that is, X, 82759). There are two calls related to SpeechWorks, which seem a little bit complicated. Google:

SpeechWorks was acquired by Scansoft in 2003.

Given that these two calls (file_name == "734964_T" and file_name == "619210_T") were held by SpeechWorks only (i.e., not a joint announcement, and the date was before the M&A I believe), I've marked them as IDG_TO_CHECK because I don't have the permno for SpeechWorks for now.

After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(82759L, 88180L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-09 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 13/18: A new ticker reuse case.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  82107   2253 1976-10-15 1983-06-30 45384010 INDEPENDENT BANKSHAR… IBCX  
#> 2  82107   2253 1983-07-01 1987-01-08 95709010 WESTAMERICA BANCORPO… WSAM  
#> 3  82107   2253 1987-01-09 1994-04-25 95709010 WESTAMERICA BANCORPO… WAB   
#> 4  82107   2253 1994-04-26 2018-12-31 95709010 WESTAMERICA BANCORPO… WABC  
#> 5  81677  30913 1995-06-16 2000-05-01 96038610 WESTINGHOUSE AIR BRA… WAB   
#> 6  81677  30913 2000-05-02 2018-12-31 92974010 WABTEC CORP           WAB

Created on 2020-01-09 by the reprex package (v0.3.0)

The 3 affected calls were held by Westinghouse in 2018, so I've marked them as X (using permno 81677). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(82107L, 81677L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-09 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 14/18: Seems to be caused by reusing tickers.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>    permno permco namedt     nameenddt  ncusip   comnam               ticker
#>     <int>  <int> <date>     <date>     <chr>    <chr>                <chr> 
#>  1  24803  21926 1952-02-18 1955-11-13 <NA>     WINN & LOVETT GROCE… <NA>  
#>  2  24803  21926 1955-11-14 1962-07-01 <NA>     WINN DIXIE STORES I… <NA>  
#>  3  24803  21926 1962-07-02 1968-01-01 <NA>     WINN DIXIE STORES I… WIN   
#>  4  24803  21926 1968-01-02 2005-02-18 97428010 WINN DIXIE STORES I… WIN   
#>  5  59475  21926 1969-03-17 1981-04-16 97428020 WINN DIXIE STORES I… WIN   
#>  6  91391  50964 2006-07-18 2009-12-09 97381W10 WINDSTREAM CORP      WIN   
#>  7  91624  21926 2006-12-22 2012-03-09 97428030 WINN DIXIE STORES I… WINN  
#>  8  91391  50964 2009-12-10 2013-09-02 97381W10 WINDSTREAM CORP      WIN   
#>  9  91391  50964 2013-09-03 2015-04-26 97382A10 WINDSTREAM HOLDINGS… WIN   
#> 10  91391  50964 2015-04-27 2018-05-28 97382A20 WINDSTREAM HOLDINGS… WIN   
#> 11  91391  50964 2018-05-29 2018-12-31 97382A30 WINDSTREAM HOLDINGS… WIN

Created on 2020-01-09 by the reprex package (v0.3.0)

2 affected calls were held by CT Communications - this seems to be weird. My best friend Google told me that,

On May 29, 2007, the company (Windstream Holdings) announced that it would acquire CT Communications for $585 million

So this is related to Windstream. I've marked these two as IDG_TO_CHECK because I don't have the permno for CT Communications and I believe that the calls were held before the M&A (2004).

After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(91391L, 91624L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-09 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 15/18: A M&A case. Google:

MIDLAND, Texas, Oct. 9, 2014 /PRNewswire/ -- Dawson Geophysical Company (Dawson) (NASDAQ: DWSN) and TGC Industries, Inc. (TGC) (NASDAQ: TGE) today jointly announced a proposed strategic business combination.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  29050   1433 1981-04-23 2015-02-11 23935910 DAWSON GEOPHYSICAL CO DWSN  
#> 2  80912  27333 1994-09-22 1998-11-08 87241710 T G C INDUSTRIES INC  TGCI  
#> 3  80912  27333 1998-11-09 1998-11-26 87241730 T G C INDUSTRIES INC  TGDC  
#> 4  80912  27333 1998-11-27 2002-05-23 87241730 T G C INDUSTRIES INC  TGCI  
#> 5  80912  27333 2002-05-24 2005-04-17 87241730 T G C INDUSTRIES INC  <NA>  
#> 6  80912  27333 2005-04-18 2007-11-05 87241730 T G C INDUSTRIES INC  TGE   
#> 7  80912  27333 2007-11-06 2015-02-11 87241730 T G C INDUSTRIES INC  TGE   
#> 8  80912  27333 2015-02-12 2018-12-31 23936010 DAWSON GEOPHYSICAL C… DWSN

Created on 2020-01-09 by the reprex package (v0.3.0)

The affected call was held by TGC in 2014, so I've marked it as X (using 80912 permno). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(29050L, 80912L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-09 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 16/18: A case caused by reusing tickers.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam               ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                <chr> 
#> 1  30648  20470 1962-07-02 1968-01-01 <NA>     COHU ELECTRONICS INC COH   
#> 2  30648  20470 1968-01-02 1972-07-02 19257610 COHU ELECTRONICS INC COH   
#> 3  30648  20470 1972-07-03 1994-09-13 19257610 COHU INC             COH   
#> 4  30648  20470 1994-09-14 2018-12-31 19257610 COHU INC             COHU  
#> 5  88661  40143 2000-10-05 2017-10-30 18975410 COACH INC            COH   
#> 6  88661  40143 2017-10-31 2018-12-31 87603010 TAPESTRY INC         TPR

Created on 2020-01-09 by the reprex package (v0.3.0)

The affected call was held by Tapestry in 2018, so I've marked it as X (using the 88661 permno). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(30648L, 88661L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-09 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 17/18: Seems to be a M&A one. Rackable Systems acquired Silicon Graphics on 2009-05-18 and used their name afterwards.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  10791   8685 1986-10-29 1990-02-05 82705610 SILICON GRAPHICS INC  SGIC  
#> 2  10791   8685 1990-02-06 2005-11-04 82705610 SILICON GRAPHICS INC  SGI   
#> 3  90742  46864 2005-06-10 2009-05-17 75007710 RACKABLE SYSTEMS INC  RACK  
#> 4  91506   8685 2006-10-23 2009-04-14 82705630 SILICON GRAPHICS INC  SGIC  
#> 5  90742  46864 2009-05-18 2016-10-31 82706L10 SILICON GRAPHICS INT… SGI

Created on 2020-01-09 by the reprex package (v0.3.0)

The affected call was held by Rackable Systems in 2005, so I've labeled it as X (using the 90742 permno). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(90742L, 91506L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-09 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 18/18: I guess this one should be a restructuring case (divestiture or spin-off?). It seems that Alexander & Baldwin was split into Matson Inc and Alexander & Baldwin Inc New on 2012-07-02.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  11691    157 1972-12-14 2008-09-30 01448210 ALEXANDER & BALDWIN … ALEX  
#> 2  11691    157 2008-10-01 2009-05-05 01448210 ALEXANDER & BALDWIN … AXB   
#> 3  11691    157 2009-05-06 2012-06-06 01448210 ALEXANDER & BALDWIN … ALEX  
#> 4  11691    157 2012-06-07 2012-07-01 01448110 ALEXANDER & BALDWIN … ALEX  
#> 5  11691    157 2012-07-02 2018-12-31 57686G10 MATSON INC            MATX  
#> 6  13429  54099 2012-07-02 2018-12-31 01449110 ALEXANDER & BALDWIN … ALEX

Created on 2020-01-09 by the reprex package (v0.3.0)

The affected call was held in 2008, which was before the restructuring. I've marked this one as Y (using permno 11691). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(11691L, 13429L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-09 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Confirmed that there were no cases left.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(is.na(correct))

to_investigate %>% 
  select(permno.x, permno.y) %>% 
  distinct() %>% 
  count()
#> # A tibble: 1 x 1
#>       n
#>   <int>
#> 1     0

Created on 2020-01-09 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

@iangow As was done yesterday, I've prepared a summary.