iangow / se_links

Database links for StreetEvents
1 stars 0 forks source link

Check unclear cases #3

Closed iangow closed 4 years ago

iangow commented 4 years ago

In this Google Sheets document on the diff_permcos tab there are several entries where the value in the correct column is DN (meaning "I don't know whether permno.x or permno.y is correct") or NONE (meaning "neither permno.x nor permno.y appears correct"). I think we want to use other information about the call to try to resolve the ambiguity.

(Note that permno.y is based on older data and tickers, while permno.x is based on newer data on CUSIPs.)

iangow commented 4 years ago

Here I look at the first case. It seems that there are non-overlapping date ranges for the two PERMCOs, so this one should be easy.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
# You may need to edit this line
options(gargle_oauth_email = "iandgow@gmail.com")
stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
    diff_permcos %>%
    filter(correct %in% c('DN', 'NONE'))

sample <-
    to_investigate %>%
    group_by(permno.x, permno.y) %>%
    count() %>%
    ungroup() %>%
    top_n(1, wt=n) 

sample_permcos <-
    to_investigate %>%
    semi_join(sample) %>%
    select(starts_with("permco")) %>%
    distinct() %>%
    .[1, ] %>% 
    as.integer()
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
    filter(permco %in% sample_permcos)
#> # Source:   lazy query [?? x 16]
#> # Database: postgres [igow@10.101.13.99:5432/crsp]
#>   permno permco namedt     nameenddt  cusip ncusip ticker comnam hexcd
#>    <int>  <int> <date>     <date>     <chr> <chr>  <chr>  <chr>  <dbl>
#> 1  83559  14681 1996-05-31 2005-11-23 8522… 85227… SSNC   SS & …     3
#> 2  93312  53374 2010-03-31 2018-12-31 7846… 78467… SSNC   S S &…     3
#> # … with 7 more variables: exchcd <dbl>, siccd <int64>, shrcd <int64>,
#> #   shrcls <chr>, st_date <date>, end_date <date>, namedum <dbl>

Created on 2020-01-07 by the reprex package (v0.3.0)

So I used the date split to code the earlier calls as Y and the later ones as X (ones in the gap between seem to relate to the older CIK, so I coded those as Y too). After doing this, there are no more entries to process for these PERMNOs.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)

pg <- dbConnect(RPostgres::Postgres())

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
    diff_permcos %>%
    filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(83559, 93312)

to_investigate %>% 
    filter(permno.x %in% permno_list,
           permno.y %in% permno_list) %>%
    select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-07 by the reprex package (v0.3.0)

iangow commented 4 years ago

Apart from a few days of overlap, the second case (obtained by running the same code given that the earlier case has "gone" from this) is similar, though there are a few days of overlap.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
    diff_permcos %>%
    filter(correct %in% c('DN', 'NONE'))

sample <-
    to_investigate %>%
    group_by(permno.x, permno.y) %>%
    count() %>%
    ungroup() %>%
    top_n(1, wt=n) 

sample_permcos <-
    to_investigate %>%
    semi_join(sample) %>%
    select(starts_with("permco")) %>%
    distinct() %>%
    .[1, ] %>% 
    as.integer()
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
    filter(permco %in% sample_permcos)
#> # Source:   lazy query [?? x 16]
#> # Database: postgres [igow@10.101.13.99:5432/crsp]
#>   permno permco namedt     nameenddt  cusip ncusip ticker comnam hexcd
#>    <int>  <int> <date>     <date>     <chr> <chr>  <chr>  <chr>  <dbl>
#> 1  16529  55825 2016-12-28 2018-12-31 7078… 70788… PVAC   PENN …     3
#> 2  61955   3730 1972-12-14 1997-09-10 7078… 70788… PVIR   PENN …     1
#> 3  61955   3730 1997-09-11 2016-01-12 7078… 70788… PVA    PENN …     1
#> # … with 7 more variables: exchcd <dbl>, siccd <int64>, shrcd <int64>,
#> #   shrcls <chr>, st_date <date>, end_date <date>, namedum <dbl>

Created on 2020-01-07 by the reprex package (v0.3.0)

iangow commented 4 years ago

The next case is a bit different. There are overlapping date ranges. But it seems that (in the opinion of CRSP) AMB Property Corp acquired Prologis on 2011-06-03 and took on its name and ticker from that date. So we need to look at the event_name of calls before that date and, if they refer to Prologis, the 80406 PERMNO is the correct one. If there are joint calls, I guess the success PERMNO should be used.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
    diff_permcos %>%
    filter(correct %in% c('DN', 'NONE'))

sample <-
    to_investigate %>%
    group_by(permno.x, permno.y) %>%
    count() %>%
    ungroup() %>%
    top_n(1, wt=n) 

sample_permcos <-
    to_investigate %>%
    semi_join(sample) %>%
    select(starts_with("permco")) %>%
    distinct() %>%
    .[1, ] %>% 
    as.integer()
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
    filter(permco %in% sample_permcos) %>%
    select(permno, namedt, nameenddt, ncusip, comnam, ticker)
#> # Source:   lazy query [?? x 6]
#> # Database: postgres [igow@10.101.13.99:5432/crsp]
#>   permno namedt     nameenddt  ncusip   comnam                       ticker
#>    <int> <date>     <date>     <chr>    <chr>                        <chr> 
#> 1  80406 1994-03-31 1998-06-30 81413810 SECURITY CAPITAL INDUSTRIAL… SCN   
#> 2  80406 1998-07-01 2002-06-27 74341010 PROLOGIS TRUST               PLD   
#> 3  80406 2002-06-28 2011-06-02 74341010 PROLOGIS                     PLD   
#> 4  85592 1997-11-21 2011-06-02 00163T10 A M B PROPERTY CORP          AMB   
#> 5  85592 2011-06-03 2018-12-31 74340W10 PROLOGIS INC                 PLD

Created on 2020-01-07 by the reprex package (v0.3.0)

After fixing this case, there are still four unresolved entries. Three where neither PERMNO seems right; one where further research would be required. I will defer this for now.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
    diff_permcos %>%
    filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(85592L, 80406L)

to_investigate %>% 
    filter(permno.x %in% permno_list,
           permno.y %in% permno_list) %>%
    select(correct, event_title)
#> # A tibble: 4 x 2
#>   correct event_title                                          
#>   <chr>   <chr>                                                
#> 1 NONE    Back to the Future – REITs Sponsoring Funds          
#> 2 NONE    Catellus Development Corporation Shareholders Meeting
#> 3 NONE    Catellus Development Corporation Shareholders Meeting
#> 4 NONE    Catellus Development Corporation Shareholders Meeting

Created on 2020-01-07 by the reprex package (v0.3.0)

iangow commented 4 years ago

This next one is a little less clear, except that prior to 2012-07-02 the only valid PERMNO is 11691. Let's see how far this gets us.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
    diff_permcos %>%
    filter(correct %in% c('DN', 'NONE'))

sample <-
    to_investigate %>%
    group_by(permno.x, permno.y) %>%
    count() %>%
    ungroup() %>%
    top_n(1, wt=n) 

sample_permcos <-
    to_investigate %>%
    semi_join(sample) %>%
    select(starts_with("permco")) %>%
    distinct() %>%
    .[1, ] %>% 
    as.integer()
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
    filter(permco %in% sample_permcos) %>%
    select(permno, namedt, nameenddt, ncusip, comnam, ticker)
#> # Source:   lazy query [?? x 6]
#> # Database: postgres [igow@10.101.13.99:5432/crsp]
#>   permno namedt     nameenddt  ncusip   comnam                       ticker
#>    <int> <date>     <date>     <chr>    <chr>                        <chr> 
#> 1  11691 1972-12-14 2008-09-30 01448210 ALEXANDER & BALDWIN INC      ALEX  
#> 2  11691 2008-10-01 2009-05-05 01448210 ALEXANDER & BALDWIN INC      AXB   
#> 3  11691 2009-05-06 2012-06-06 01448210 ALEXANDER & BALDWIN INC      ALEX  
#> 4  11691 2012-06-07 2012-07-01 01448110 ALEXANDER & BALDWIN HLDGS I… ALEX  
#> 5  11691 2012-07-02 2018-12-31 57686G10 MATSON INC                   MATX  
#> 6  13429 2012-07-02 2018-12-31 01449110 ALEXANDER & BALDWIN INC NEW  ALEX

Created on 2020-01-07 by the reprex package (v0.3.0)

It seems to cover things:

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
    diff_permcos %>%
    filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(11691, 13429)

to_investigate %>% 
    filter(permno.x %in% permno_list,
           permno.y %in% permno_list) %>%
    select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-07 by the reprex package (v0.3.0)

iangow commented 4 years ago

This one seems a bit more complicated, but there is only one name associated with the two PERMNOs ("STERLING BANCORP") and here the dates seem to split things nicely.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
    diff_permcos %>%
    filter(correct %in% c('DN', 'NONE'))

sample <-
    to_investigate %>%
    group_by(permno.x, permno.y) %>%
    count() %>%
    ungroup() %>%
    top_n(1, wt=n) 

sample_permcos <-
    to_investigate %>%
    semi_join(sample) %>%
    select(starts_with("permco")) %>%
    distinct() %>%
    .[1, ] %>% 
    as.integer()
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
    filter(permco %in% sample_permcos) %>%
    select(permno, namedt, nameenddt, ncusip, comnam, ticker) %>% 
    print(n=Inf)
#> # Source:   lazy query [?? x 6]
#> # Database: postgres [igow@10.101.13.99:5432/crsp]
#>    permno namedt     nameenddt  ncusip   comnam                      ticker
#>     <int> <date>     <date>     <chr>    <chr>                       <chr> 
#>  1  27254 1960-04-25 1962-07-01 <NA>     STANDARD FINANCIAL CORP     <NA>  
#>  2  27254 1962-07-02 1963-01-01 <NA>     STANDARD FINANCIAL CORP     SFR   
#>  3  27254 1963-01-02 1964-04-22 <NA>     S F C FINANCIAL CORP        SFR   
#>  4  27254 1964-04-23 1966-08-28 <NA>     STANDARD FINANCIAL CORP     SFR   
#>  5  27254 1966-08-29 1968-10-21 <NA>     STANDARD PRUDENTIAL UTD CO… STU   
#>  6  27254 1968-10-22 1978-10-23 85387010 STANDARD PRUDENTIAL CORP    STU   
#>  7  27254 1978-10-24 2013-10-31 85915810 STERLING BANCORP            STL   
#>  8  86582 1999-01-08 2004-01-14 74383510 PROVIDENT BANCORP INC NY    PBCP  
#>  9  86582 2004-01-15 2005-06-29 74383A10 PROVIDENT BANCORP INC DE    PBCP  
#> 10  86582 2005-06-30 2011-12-27 74402810 PROVIDENT NEW YORK BANCORP  PBNY  
#> 11  86582 2011-12-28 2013-10-31 74402810 PROVIDENT NEW YORK BANCORP  PBNY  
#> 12  86582 2013-11-01 2018-12-31 85917A10 STERLING BANCORP DEL        STL

Created on 2020-01-07 by the reprex package (v0.3.0)

After updating the spreadsheet, this is what I see:

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
    diff_permcos %>%
    filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(27254, 86582)

to_investigate %>% 
    filter(permno.x %in% permno_list,
           permno.y %in% permno_list) %>%
    select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-07 by the reprex package (v0.3.0)

iangow commented 4 years ago

Next case has a clear date split:

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
    diff_permcos %>%
    filter(correct %in% c('DN', 'NONE'))

sample <-
    to_investigate %>%
    group_by(permno.x, permno.y) %>%
    count() %>%
    ungroup() %>%
    top_n(1, wt=n) 

sample_permcos <-
    to_investigate %>%
    semi_join(sample) %>%
    select(starts_with("permco")) %>%
    distinct() %>%
    .[1, ] %>% 
    as.integer()
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
    filter(permco %in% sample_permcos) %>%
    select(permno, namedt, nameenddt, ncusip, comnam, ticker) %>% 
    print(n=Inf)
#> # Source:   lazy query [?? x 6]
#> # Database: postgres [igow@10.101.13.99:5432/crsp]
#>   permno namedt     nameenddt  ncusip   comnam                  ticker
#>    <int> <date>     <date>     <chr>    <chr>                   <chr> 
#> 1  30382 1972-12-14 1995-01-31 25666910 DOLLAR GENERAL CORP     DOLR  
#> 2  30382 1995-02-01 2007-07-06 25666910 DOLLAR GENERAL CORP     DG    
#> 3  93096 2009-11-13 2018-12-31 25667710 DOLLAR GENERAL CORP NEW DG

Created on 2020-01-07 by the reprex package (v0.3.0)

Again, edit the spreadsheet, then all works.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
    diff_permcos %>%
    filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(30382, 93096)

to_investigate %>% 
    filter(permno.x %in% permno_list,
           permno.y %in% permno_list) %>%
    select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-07 by the reprex package (v0.3.0)

iangow commented 4 years ago

After the work above, there are 33 combinations remaining:

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
    diff_permcos %>%
    filter(correct %in% c('DN'))

to_investigate %>% 
    select(permno.x, permno.y) %>% 
    distinct() %>% 
    count()
#> # A tibble: 1 x 1
#>       n
#>   <int>
#> 1    33

Created on 2020-01-07 by the reprex package (v0.3.0)

iangow commented 4 years ago

@Yvonne-Han Do you think you might find a little time to work on this today? I've done six (bigger) cases and there are 33 to go. No need to do all now. Once this is done, we should have a pretty good StreetEvents-PERMNO match.

There are no real instructions except these:

  1. I run the first set of code (using reprex::reprex() and on my own computer, as reprex is a bit difficult to run on RStudio Server) and paste the results in a new comment.
  2. I edit the spreadsheet to choose the correct PERMNO for each call. If in doubt, perhaps put some other value in correct (say, IDG to check).
  3. I run the second set of code (again using reprex::reprex()) and paste the results in the same comment.
  4. Repeat from 1 above.

You may need access to this spreadsheet (just ask and I will give it to you).

iangow commented 4 years ago

@Yvonne-Han Note that I ran something like this:

Sys.setenv(PGHOST = "10.101.13.99", PGDATABASE="crsp")

so that the reprex code connects to the right server, etc. Set PGUSER, PGPASSWORD, etc., as necessary using the same approach.

iangow commented 4 years ago

@Yvonne-Han Finally, you may need to run this part interactively (not with reprex) the first time to get authorization set up. Change options(gargle_oauth_email = "iandgow@gmail.com") to use the email address that you use in that process.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"
options(gargle_oauth_email = "iandgow@gmail.com")
diff_permcos <- read_sheet(gs, sheet = "diff_permcos")
Yvonne-Han commented 4 years ago

@iangow Thanks for the detailed documentation (I just finished discussing with Chung-Yu and Mario). I think I know what you are after now. Let me have a try and keep you updated below (I'm keeping track of everything that I did - In case I did something wrong, fixing things can probably be easier with the below comments).

Yvonne-Han commented 4 years ago

Case 1/33: In this case, it seems that the Midstream company used the 11003 permno while the Williams Partners company used the 90851 permno. On 2015-02-03, Midstream acquired Williams Partners and used their name but the original 11003 permno.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  print(n=Inf)
#> # Source:   lazy query [?? x 7]
#> # Database: postgres [yanzih1@10.101.13.99:5432/crsp]
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  11003  53459 2010-07-29 2012-07-23 16524K10 CHESAPEAKE MIDSTREAM… CHKM  
#> 2  11003  53459 2012-07-24 2015-02-02 00434L10 ACCESS MIDSTREAM PAR… ACMP  
#> 3  11003  53459 2015-02-03 2018-08-10 96949L10 WILLIAMS PARTNERS LP… WPZ   
#> 4  90851  47199 2005-08-18 2015-02-02 96950F10 WILLIAMS PARTNERS L P WPZ

Created on 2020-01-08 by the reprex package (v0.3.0)

To code this one, I had a look at the related 33 entries. All of them happened before 2015, and had Williams Partners in the event title, so I coded them as 'Y' (meaning that they should use the 90851 permno). One exception is file_name == 5553456_T, which is a joint conference call by Williams Partners and Midstream. According your rule above, I coded this as X (the 11003 permno from Midstream).

After editing the spreadsheet, it seems that all works.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(90851L, 11003L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 2/33: It seems that this one is a simple date-split case. Company name hasn't really changed (Knight xxx and KCG seem to refer to the same comname). Before 2013-07-01, the company used the 87131 permno; After 2013-07-05, the company started to use the 13998 permno.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  print(n=Inf)
#> # Source:   lazy query [?? x 7]
#> # Database: postgres [yanzih1@10.101.13.99:5432/crsp]
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  13998  54450 2013-07-05 2017-07-19 48244B10 K C G HOLDINGS INC    KCG   
#> 2  87131  16201 1998-07-08 2000-01-11 49906710 KNIGHT TRIMARK GROUP… NITE  
#> 3  87131  16201 2000-01-12 2000-05-18 49906810 KNIGHT TRIMARK GROUP… NITE  
#> 4  87131  16201 2000-05-19 2005-05-30 49906310 KNIGHT TRADING GROUP… NITE  
#> 5  87131  16201 2005-05-31 2010-05-24 49900510 KNIGHT CAPITAL GROUP… NITE  
#> 6  87131  16201 2010-05-25 2013-07-01 49900510 KNIGHT CAPITAL GROUP… KCG

Created on 2020-01-08 by the reprex package (v0.3.0)

It seems that the affected calls in the spreadsheet were all held before 2013 and had the Knight xx as the comnam, so I coded all of them as Y (using the 87131 permno). After doing all this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(13998L, 87131L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 3/33: This change of permno is related to one company only, Kinder Morgan Inc. This one seems to be a little bit unusual because there was a gap in the time (from 2007 to 2011). I googled the company and it launched an IPO in 2011 (should be the reason why the permno changed). Therefore, prior to 2011-02-11, the permno should be 51596 while after 2011-02-11, the permno should be 12558.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  51596  21029 1970-11-24 1983-03-24 48527810 KANSAS NEBRASKA NATU… KNB   
#> 2  51596  21029 1983-03-25 1999-10-07 48262010 K N ENERGY INC        KNE   
#> 3  51596  21029 1999-10-08 2007-05-30 49455P10 KINDER MORGAN INC KA… KMI   
#> 4  12558  53667 2011-02-11 2018-12-31 49456B10 KINDER MORGAN INC     KMI

Created on 2020-01-08 by the reprex package (v0.3.0)

In the spreadsheet, all calls happened before 2011 (in fact, they were all held no later than year 2007), so they are all coded as Y (meaning the 51596 permno). After doing this, again, we are all good.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(12558L, 51596L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 4/33: Prior to 2009-09-25, Bankrate should use the 86895 permno; After 2011-06-17, it should use the 12835 permno.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  86895  16528 1999-05-13 1999-12-15 45816V10 INTELLIGENT LIFE CORP ILIF  
#> 2  86895  16528 1999-12-16 2000-09-24 45172Q10 ILIFE COM INC         ILIF  
#> 3  86895  16528 2000-09-25 2001-01-28 06646V10 BANKRATE INC          RATE  
#> 4  86895  16528 2001-01-29 2003-01-08 06646V10 BANKRATE INC          <NA>  
#> 5  86895  16528 2003-01-09 2009-09-25 06646V10 BANKRATE INC          RATE  
#> 6  12835  53804 2011-06-17 2017-11-08 06647F10 BANKRATE INC DE       RATE

Created on 2020-01-08 by the reprex package (v0.3.0)

Affected calls were all held before 2009-09-25, so they are all coded as Y (using the 86895 permno). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(86895L, 12835L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 5/33: This one involves both M&A and name changes. If I understand correctly, prior to 2004-11-30, Wellpoint used the 83583 permno, while Anthem used the 89179 permno. On 2004-11-30, Anthem merged with Wellpoint and used their name (Wellpoint) but the 89179 permno. On 2014-12-03, Wellpoint changed the name back to Anthem but the permno remains unchanged as 89179.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  83583  31650 1996-05-21 2004-11-30 94973H10 WELLPOINT HEALTH NET… WLP   
#> 2  89179  42273 2001-10-30 2004-11-30 03674B10 ANTHEM INC            ATH   
#> 3  89179  42273 2004-12-01 2014-12-02 94973V10 WELLPOINT INC         WLP   
#> 4  89179  42273 2014-12-03 2018-12-31 03675210 ANTHEM INC            ANTM

Created on 2020-01-08 by the reprex package (v0.3.0)

In the spreadsheet, most of the calls were held by WellPoint between 2003-2004, so I coded them as Y (permno 83583).

@iangow Special notice here: I've coded file_name == "617866_T" and file_name == "788853_T" as Y here. These two calls were held by Cobalt, NOT by WellPoint, but I checked that Wellpoint acquired Cobalt in 2003 (which was approx. when the two calls were held, on 2002-04-15 and 2003-10-06). One alternative might be to code them using Cobalt permno (not in the spreadsheet), so you need to make a decision here. After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(83583L, 89179L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 6/33: A simple one. Prior to 2008-08-13, Digimarc used 87482 permno; After 2008-10-17, it started to use 92808 permno.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam            ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>             <chr> 
#> 1  87482  17474 1999-12-02 2008-08-13 25380710 DIGIMARC CORP     DMRC  
#> 2  92808  53096 2008-10-17 2018-12-31 25381B10 DIGIMARC CORP NEW DMRC

Created on 2020-01-08 by the reprex package (v0.3.0)

In the spreadsheet, all affected calls were held before the date, so they are all coded as Y (using the 87482 permno). Again, we have this now:

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(87482L, 92808L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 7/33: Date-split one. Prior to 2008-12-01, Pilgrims Pride Corp used the 70965 permno; After 2009-12-29, it started to use the 93179 permno.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Auto-refreshing stale OAuth token.
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  70965  21407 1986-11-14 1988-09-29 72146710 PILGRIMS PRIDE CORP   CHX   
#> 2  70965  21407 1988-09-30 2001-08-23 72146710 PILGRIMS PRIDE CORP   PPC   
#> 3  87040  21407 1999-08-02 2003-11-21 72146720 PILGRIMS PRIDE CORP   CHX   
#> 4  70965  21407 2001-08-24 2003-11-23 72146710 PILGRIMS PRIDE CORP   CHX   
#> 5  70965  21407 2003-11-24 2008-12-01 72146710 PILGRIMS PRIDE CORP   PPC   
#> 6  93179  53305 2009-12-29 2012-12-27 72147K10 PILGRIMS PRIDE CORP … PPC   
#> 7  93179  53305 2012-12-28 2018-12-31 72147K10 PILGRIMS PRIDE CORP … PPC

Created on 2020-01-08 by the reprex package (v0.3.0)

In the spreadsheet, all affected rows are coded as Y (permno 70965) as the calls were all held before 2008-12-01. After doing this, we now have:

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(70965L, 91379L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 8/33: Prior to 2008-05-28, Bright Horizons used the 85531 permno; After 2013-01-25, it started to use the 13739 permno.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  85531  15812 1997-11-07 1998-07-26 10919010 BRIGHT HORIZONS INC   BRHZ  
#> 2  85531  15812 1998-07-27 2008-05-28 10919510 BRIGHT HORIZONS FAMI… BFAM  
#> 3  13739  54297 2013-01-25 2018-12-31 10919410 BRIGHT HORIZONS FAM … BFAM

Created on 2020-01-08 by the reprex package (v0.3.0)

In the spreadsheet, all calls were held before 2008-05-28 so they were all coded as Y (permno 85531). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(85531L, 13739L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 9/33: This seems to be a complicated one. The period that is of interest to us should be after 2000, so let's start from there. Rayovac Corp used the 85625 permno from 1997-11-21 to 2005-05-01, changed their name (and ticker) to Spectrum Brands from 2005-05-02 to 2008-12-19 (permno was still 85625). Spectrum Brands changed their permno and used 93295 from 2010-03-18 to 2018-07-13. Zapata Corp used the 38295 permno from 2001-01-13 to 2009-12-23, and change the name to Harbinger Group from 2009-12-24 to 2015-03-10 (permno was still 38295). Later, it also changed its name to HRG group from 2015-03-11 to 2018-07-15, and merged with Spectrum from 2018-07-16 and onwards (changed the name to Spectrum too).

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>    permno permco namedt     nameenddt  ncusip   comnam               ticker
#>     <int>  <int> <date>     <date>     <chr>    <chr>                <chr> 
#>  1  38295  21948 1962-07-02 1968-01-01 <NA>     ZAPATA OFF SHORE CO  ZOS   
#>  2  38295  21948 1968-01-02 1968-04-14 98911010 ZAPATA OFF SHORE CO  ZOS   
#>  3  38295  21948 1968-04-15 1968-11-24 98911010 ZAPATA OFF SHORE CO  ZOS   
#>  4  38295  21948 1968-11-25 1972-02-14 98910210 ZAPATA NORNESS INC   ZOS   
#>  5  38295  21948 1972-02-15 1994-05-02 98907010 ZAPATA CORP          ZOS   
#>  6  38295  21948 1994-05-03 1996-02-14 98907050 ZAPATA CORP          ZOS   
#>  7  38295  21948 1996-02-15 2001-01-30 98907050 ZAPATA CORP          ZAP   
#>  8  85625  32162 1997-11-21 2005-05-01 75508110 RAYOVAC CORP         ROV   
#>  9  38295  21948 2001-01-31 2009-12-23 98907060 ZAPATA CORP          ZAP   
#> 10  85625  32162 2005-05-02 2008-12-19 84762L10 SPECTRUM BRANDS INC  SPC   
#> 11  38295  21948 2009-12-24 2015-03-10 41146A10 HARBINGER GROUP INC  HRG   
#> 12  93295  32162 2010-03-18 2010-06-16 84762L20 SPECTRUM BRANDS INC  SPB   
#> 13  93295  32162 2010-06-17 2018-07-13 84763R10 SPECTRUM BRANDS HOL… SPB   
#> 14  38295  21948 2015-03-11 2018-07-15 40434J10 H R G GROUP INC      HRG   
#> 15  38295  21948 2018-07-16 2018-12-31 84790A10 SPECTRUM BRANDS HOL… SPB

Created on 2020-01-08 by the reprex package (v0.3.0)

@iangow This one needs a little bit of your attention here. From line 1743 to 1763: I've created 21 IDG_TO_CHECK because I think the correct permno should be neither X (93295) nor Y (38295). Given that these calls were held in 2003-2010 by Spectrum or Rayovac, in my opinion, they should follow these firms' original permno (that is, 85625). I've created another column in your spreadsheet (called permno.z and documented these proposed values so that you don't have to add them again if you agree with this).

This one is not really solved, but the correct column was no longer marked as either DN or NONE so you need to manually search for IDG_TO_CHECK to fetch these roles.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(93295L, 38295L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 10/33: A much simpler one. Education Management used the 84075 permno before 2006-06-01, and started to use the 93080 permno after 2009-10-02.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  84075  15098 1996-10-31 2006-06-01 28139T10 EDUCATION MANAGEMENT… EDMC  
#> 2  93080  53248 2009-10-02 2014-11-12 28140M10 EDUCATION MANAGEMENT… EDMC

Created on 2020-01-08 by the reprex package (v0.3.0)

All affected calls were held before 2006-06-01, so they are all coded as Y (using the 84075 permno). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(93080L, 84075L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 11/33: This Universal xx company changed their comnam slightly during the period, and changed the permno from 79477 to 12785 on 2011-04-29.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  79477   5981 1983-05-12 1996-07-22 91359010 UNIVERSAL HOLDING CO… UHCO  
#> 2  79477   5981 1996-07-23 2007-12-02 91337710 UNIVERSAL AMERICAN F… UHCO  
#> 3  79477   5981 2007-12-03 2011-04-29 91337710 UNIVERSAL AMERICAN C… UAM   
#> 4  12785  53766 2011-05-02 2017-04-28 91338E10 UNIVERSAL AMERICAN C… UAM

Created on 2020-01-08 by the reprex package (v0.3.0)

All affected calls were held on or before 2011-04-29, so I've coded them all as Y (using the 79477 permno).

@iangow You might want to double confirm this specific call held on 2011-04-29 (file_name == 3997477_T). The reason why I chosed Y is because - the new permno started on 2011-05-02 so this should belong to the old one.

After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(79477L, 12785L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 12/33: Prior to 2007-09-21, this First Republic xx company used the 10623 permno; After 2010-12-09, it started to use the 12448 permno.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  10623   8523 1986-08-27 1988-02-29 33615F10 FIRST REPUBLIC BANCO… FRBC  
#> 2  10623   8523 1988-03-01 1992-07-27 33615F10 FIRST REPUBLIC BANCO… FRC   
#> 3  10623   8523 1992-07-28 1997-09-14 33615F10 FIRST REPUBLIC BANCO… FRC   
#> 4  10623   8523 1997-09-15 2007-09-21 33615810 FIRST REPUBLIC BANK … FRC   
#> 5  12448  53609 2010-12-09 2018-12-31 33616C10 FIRST REPUBLIC BANK … FRC

Created on 2020-01-08 by the reprex package (v0.3.0)

All affected calls were held before 2007-09-21, so they are all coded as Y (using the 10623 permno). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(10623L, 12448L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 13/33: A M&A + name change case. Scansoft Inc used the 82759 permno from 1993-03-03 to 2005-10-17, while Nuance Communications used the 88180 permno from 2000-04-13 to 2005-09-16. On 2005-10-18, Scansoft Inc merged with Nuance Communications and started to use the 82759 permno. The merged comnam is always Nuance Communications, but the ticker changed on 2005-11-20.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  82759  14205 1995-12-11 1999-03-02 92830V10 VISIONEER INC         VSNR  
#> 2  82759  14205 1999-03-03 2005-10-17 80603P10 SCANSOFT INC          SSFT  
#> 3  88180  37174 2000-04-13 2005-09-16 66996710 NUANCE COMMUNICATIONS NUAN  
#> 4  82759  14205 2005-10-18 2005-11-20 67020Y10 NUANCE COMMUNICATION… SSFT  
#> 5  82759  14205 2005-11-21 2018-12-31 67020Y10 NUANCE COMMUNICATION… NUAN

Created on 2020-01-08 by the reprex package (v0.3.0)

All calls were held before 2005-10-18. Calls held by Nuance were coded as Y (using the 88180 permno) while calls held by ScanSoft were coded as X (using the 82759 permno). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(10623L, 12448L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 14/33: Only one firm is involved here. This firm was called Premier Parks Inc before 2000-07-02, and changed its name to Six Flags afterwards. It used the 76209 permno before 2009-04-17, and changed the permno to 93423 from 2010-06-21.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  76209   4450 1977-10-28 1981-06-02 88650610 TIERCO                TIERS 
#> 2  76209   4450 1981-06-03 1990-10-08 88650610 TIERCO GROUP INC      TIER  
#> 3  76209   4450 1990-10-09 1996-05-29 88650610 TIERCO GROUP INC      <NA>  
#> 4  76209   4450 1996-05-30 1997-12-21 74054020 PREMIER PARKS INC     PARK  
#> 5  76209   4450 1997-12-22 2000-07-02 74054020 PREMIER PARKS INC     PKS   
#> 6  76209   4450 2000-07-03 2006-06-04 83001P10 SIX FLAGS INC         PKS   
#> 7  76209   4450 2006-06-05 2009-04-17 83001P10 SIX FLAGS INC         SIX   
#> 8  93423  53440 2010-06-21 2018-12-31 83001A10 SIX FLAGS ENTERTAINM… SIX

Created on 2020-01-08 by the reprex package (v0.3.0)

All affected calls were held before 2009-04-17, so I've coded them all as Y (using the 76209 permno). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(76209L, 93423L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 15/33: A date-split one. Prior to 2007-05-10, Swift used the 76214 permno. After 2010-12-26, Swift used the 12473 permno. It also slightly changed its comnam on 2017-09-10.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  76214  10657 1990-06-29 2007-05-10 87075610 SWIFT TRANSPORTATION… SWFT  
#> 2  12473  53620 2010-12-16 2017-09-10 87074U10 SWIFT TRANSPORTATION… SWFT  
#> 3  12473  53620 2017-09-11 2018-12-31 49904910 KNIGHT SWIFT TRANSPO… KNX

Created on 2020-01-08 by the reprex package (v0.3.0)

All affected calls were held before 2007-05-10, so I've coded them all as Y (using the 76214 permno). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(12473L, 76214L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 16/33: This company Cowen xx changed their permno from 93198 to 93128 on 2009-10-30.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam              ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>               <chr> 
#> 1  91398  50971 2006-07-13 2009-10-30 22362110 COWEN GROUP INC     COWN  
#> 2  93128  53282 2009-11-02 2016-12-04 22362210 COWEN GROUP INC NEW COWN  
#> 3  93128  53282 2016-12-05 2017-05-22 22362260 COWEN GROUP INC NEW COWN  
#> 4  93128  53282 2017-05-23 2018-12-31 22362260 COWEN INC           COWN

Created on 2020-01-08 by the reprex package (v0.3.0)

All calls were held before 2009-10-30, so I've coded almost of them as Y (using the 50971 permno). The only exception is file_name == 2262198_T" held on 2009-06-05, as it was a joint announcement from Cowen and Ramius for their upcoming M&A. I've coded this one as X (using permno 93128).

After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(93128L, 91398L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 17/33: A M&A one. Boise used the 42024 permno prior to 2004-10-31, while Officemax used the 81063 permno prior to 2003-12-09. On 2004-11-01, Boise merged with Officemax and change its comnam to Officemax. It continued to use the 42024 permno.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam             ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>              <chr> 
#> 1  42024  20316 1965-06-02 1968-01-01 <NA>     BOISE CASCADE CORP BCC   
#> 2  42024  20316 1968-01-02 2004-10-31 09738310 BOISE CASCADE CORP BCC   
#> 3  81063  30523 1994-11-02 2003-12-09 67622M10 OFFICEMAX INC      OMX   
#> 4  42024  20316 2004-11-01 2013-11-05 67622P10 OFFICEMAX INC NEW  OMX

Created on 2020-01-08 by the reprex package (v0.3.0)

All calls were held before 2003-12-09. I've coded all but one (joint announcement from Boise and Officemax about the M&A, file_name == "764524_T") asY(using permno 81063). The joint one was coded asX` (using permno 42024). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(81063L, 42024L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 18/33: A date-split one. Before 2005-02-24, Select Medical used the 88972 permno. After 2009-09-25, it started to use the 93035 permno.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  88972  41465 2001-04-05 2002-08-27 81619610 SELECT MEDICAL CORPO… SLMC  
#> 2  88972  41465 2002-08-28 2005-02-24 81619610 SELECT MEDICAL CORP   SEM   
#> 3  93035  53214 2009-09-25 2018-12-31 81619Q10 SELECT MEDICAL HOLDI… SEM

Created on 2020-01-08 by the reprex package (v0.3.0)

All calls were held before 2005, so I've coded them all as Y (using the 88972 permno). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(41465L, 53214L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 19/33: A relatively complicated one. Involves M&A and name changes. Interland used permno 75993 from 2001-08-07 yo 2006-03-19, and changed its comnam to Web Com on 2006-03-20. Website Pros used permno 90984 from 2005-11-02 to 2008-06-08. It merged with Web Com on 2008-06-09 and changed its name a couple of times after that. The permno was always 90984.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  75993  10311 1989-09-25 1995-04-09 98943210 ZEOS INTERNATIONAL L… ZEOS  
#> 2  75993  10311 1995-04-10 2001-08-06 59510010 MICRON ELECTRONICS I… MUEI  
#> 3  75993  10311 2001-08-07 2003-08-03 45872710 INTERLAND INC NEW     INLD  
#> 4  75993  10311 2003-08-04 2006-03-19 45872720 INTERLAND INC NEW     INLD  
#> 5  90984  48782 2005-11-02 2008-06-08 94769V10 WEBSITE PROS INC      WSPI  
#> 6  75993  10311 2006-03-20 2007-09-28 94732Q10 WEB COM INC           WWWW  
#> 7  90984  48782 2008-06-09 2008-10-26 94769V10 WEBSITE PROS INC      WWWW  
#> 8  90984  48782 2008-10-27 2015-11-09 94733A10 WEB COM GROUP INC     WWWW  
#> 9  90984  48782 2015-11-10 2018-10-10 94733A10 WEB COM GROUP INC     WEB

Created on 2020-01-08 by the reprex package (v0.3.0)

All calls were coded as Y (using 75993) except for the joint announcement of the M&A (using 90984) - file_name == "1590821_T". After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(75993L, 90984L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 20/33: Caesars used the 86448 permno before 2005-06-13, and started to use t he 13267 permno after 2012-02-08.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  86447  34953 1999-01-04 2004-01-05 70069010 PARK PLACE ENTERTAIN… PPE   
#> 2  86447  34953 2004-01-06 2005-06-13 12768710 CAESARS ENTERTAINMEN… CZR   
#> 3  13267  53998 2012-02-08 2018-12-31 12768610 CAESARS ENTERTAINMEN… CZR

Created on 2020-01-08 by the reprex package (v0.3.0)

All calls were held before 2005-06-13, so they are all coded as Y (using the 86447 permno). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Auto-refreshing stale OAuth token.
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(86447L, 13267L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 21/33: The records from stocknames don't seem to be very consistent.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  85304  15631 1997-08-05 2005-01-06 67574410 OCULAR SCIENCES INC   OCLR  
#> 2  88149  37145 2000-04-13 2004-09-09 09856Q10 BOOKHAM TECHNOLOGY P… BKHM  
#> 3  90354  37145 2004-09-20 2009-04-27 09856E10 BOOKHAM INC           BKHM  
#> 4  90354  37145 2009-04-28 2010-04-29 67555N10 OCLARO INC            OCLR  
#> 5  90354  37145 2010-04-30 2018-12-07 67555N20 OCLARO INC            OCLR

Created on 2020-01-08 by the reprex package (v0.3.0)

@iangow I've marked this case (6 entries, line 598-603) as IDG_TO_CHECK as this one was a little bit wired. The calls were held by Bookham between 2002-2004, but there was both permno.x and permno.y don't match with Bookham's permno during that period (should be 88149). Again, I've created a permno.z column and put it there. My google search on this group wasn't too successful either, so I will defer this for now. You need to manually search for IDG_TO_CHECK for this.

Yvonne-Han commented 4 years ago

Case 22/33: Thermo used the 62092 permno from 1980-08-27 to 2018-12-31, and changed their comnam/ticker slightly during the period. Invitrogen used the 86725 permno from 1999-02-26 to 2014-02-03, and changed their comnam to Life Tech on 2008-11-24. Honestly, I can't really see the connection here, but Google told me that

Life Technologies was acquired by Thermo Fisher Scientific in Jan 2014.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  62092   4557 1972-12-14 1980-08-26 88355610 THERMO ELECTRON CORP  TRMO  
#> 2  62092   4557 1980-08-27 2006-11-09 88355610 THERMO ELECTRON CORP  TMO   
#> 3  86725  16425 1999-02-26 2008-11-23 46185R10 INVITROGEN CORP       IVGN  
#> 4  62092   4557 2006-11-10 2018-12-31 88355610 THERMO FISHER SCIENT… TMO   
#> 5  86725  16425 2008-11-24 2014-02-03 53217V10 LIFE TECHNOLOGIES CO… LIFE

Created on 2020-01-08 by the reprex package (v0.3.0)

Affected calls were mostly held in 2009 (one exception: file_name == "1106406_T" held in 2005 regarding BioSource's potential M&A - but didn't proceed). I've marked them all as Y (using permno 86725). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(86725L, 62092L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 23/33: This one is actually the one you deferred before. I googled Catellus and Prologis, and here's some relevant info.:

From September 2005 through June 2011, Catellus merged with ProLogis, a leading owner, operator and developer of industrial real estate worldwide. ProLogis sold the majority of its retail and mixed-use assets, as well as rights to the Catellus name, to TPG Capital, a private entity, in 2011.

"Back to the Future" is an event organised by Prologis to highlight their revolution in the logistics field. See here if needed.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  80406  30369 1994-03-31 1998-06-30 81413810 SECURITY CAPITAL IND… SCN   
#> 2  85592  32135 1997-11-21 2011-06-02 00163T10 A M B PROPERTY CORP   AMB   
#> 3  80406  30369 1998-07-01 2002-06-27 74341010 PROLOGIS TRUST        PLD   
#> 4  80406  30369 2002-06-28 2011-06-02 74341010 PROLOGIS              PLD   
#> 5  85592  32135 2011-06-03 2018-12-31 74340W10 PROLOGIS INC          PLD

Created on 2020-01-08 by the reprex package (v0.3.0)

Given the above info., I've coded the 3 None cases related to Catellus Shareholders Meeting as IDG_TO_CHECK because I don't have the permno for Catellus and the calls were held prior to the M&A. The Back to the Future one was coded as Y (permno 80406) because it was related to Prologistics and it was in 2007. You need to manually search for IDG_TO_CHECK for these entries.

After doing this, we now have:

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(80406L, 85592L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 24/33: A simple date-split one. Prior to 2013-07-17, Spirit used the 13580 permno; After 2013-07-17, it started to use the 14039 permno.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  13580  54181 2012-09-20 2013-07-17 84860F10 SPIRIT REALTY CAPITA… SRC   
#> 2  14039  54472 2013-07-18 2018-12-12 84860W10 SPIRIT REALTY CAPITA… SRC   
#> 3  14039  54472 2018-12-13 2018-12-31 84860W30 SPIRIT REALTY CAPITA… SRC

Created on 2020-01-08 by the reprex package (v0.3.0)

Four affected calls were held before 2013-07-17, so I've marked them all as Y (using the 54181 permno). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(13580L, 14039L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 25/33: Same company, different permnos over years. There was an overlap in 1986-12-24 to 2002-11-18.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam           ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>            <chr> 
#> 1  25022    962 1972-12-14 2002-11-18 20030010 COMCAST CORP     CMCSA 
#> 2  11997    962 1986-12-24 2002-11-18 20030020 COMCAST CORP     CMCSK 
#> 3  89525  43613 2002-11-19 2018-12-31 20030N10 COMCAST CORP NEW CMCSA 
#> 4  89565  43613 2002-11-19 2015-12-11 20030N20 COMCAST CORP NEW CMCSK

Created on 2020-01-08 by the reprex package (v0.3.0)

The affected calls were all held before 2002-11-18. Given that I have to choose between 89525 and 25022, I've marked them as Y because it occurred before the change of permno. After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(89525L, 25022L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 26/33: Prior to 2005-11-30, Metals used 89769 as permno; After 2010-04-09, Metals used 93323 as permno.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  85247  32040 1997-07-11 2001-11-14 59132410 METALS U S A INC      MUI   
#> 2  89769  32040 2003-06-04 2004-03-07 59132420 METALS U S A INC      MLT   
#> 3  89769  32040 2004-03-08 2005-11-30 59132420 METALS U S A INC      MUSA  
#> 4  93323  53380 2010-04-09 2013-04-12 59132A10 METALS USA HOLDINGS … MUSA

Created on 2020-01-08 by the reprex package (v0.3.0)

All three affected calls were held before 2005-11-30, so I've marked them as Y (using the 89769 permno). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(89769L, 93323L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 27/33: Same company, change permno over time.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  39087  21833 1963-04-10 1968-01-01 <NA>     UNITED UTILITIES INC  UT    
#> 2  39087  21833 1968-01-02 1972-06-04 91313010 UNITED UTILITIES INC  UT    
#> 3  39087  21833 1972-06-05 1992-02-26 91302510 UNITED TELECOMMUNICA… UT    
#> 4  39087  21833 1992-02-27 2005-08-14 85206110 SPRINT CORP           FON   
#> 5  86414  21833 1998-11-24 2004-04-22 85206150 SPRINT CORP           PCS   
#> 6  39087  21833 2005-08-15 2013-07-10 85206110 SPRINT NEXTEL CORP    S     
#> 7  14040  54473 2013-07-12 2018-12-31 85207U10 SPRINT CORP NEW       S

Created on 2020-01-08 by the reprex package (v0.3.0)

Two calls were held before 2013-07-10, so I've marked them as Y (using permno 39087). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(39087L, 14040L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 28/33: AMSURG merged with Envision Healthcare on 2016-12-02.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  85675  15867 1997-12-04 2001-07-12 03232P10 AMSURG CORP           AMSGA 
#> 2  85676  15867 1997-12-04 2001-07-12 03232P20 AMSURG CORP           AMSGB 
#> 3  85675  15867 2001-07-13 2016-12-01 03232P40 AMSURG CORP           AMSG  
#> 4  14065  54492 2013-08-14 2016-12-01 29413U10 ENVISION HEALTHCARE … EVHC  
#> 5  85675  15867 2016-12-02 2018-10-10 29414D10 ENVISION HEALTHCARE … EVHC

Created on 2020-01-08 by the reprex package (v0.3.0)

I've marked the joint announcement as X (using permno 85675). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Auto-refreshing stale OAuth token.
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(85675L, 14065L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 29/33: Two companies, two permnos - I can't really tell the relationship for now. Can be an error from crsp. Google:

Two firms are parent and subsidiary. PAA Natural Gas Storage, L.P. is a publicly traded master limited partnership engaged in the development, acquisition, operation and commercial management of natural gas storage facilities. The Partnership owns and operates three natural gas storage facilities located in Louisiana, Mississippi and Michigan. The Partnership's general partner, as well as the majority of the Partnership's limited partner interests, is owned by Plains All American Pipeline, L.P. PNG is headquartered in Houston, Texas.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  89858  34920 1998-11-18 2018-12-31 72650310 PLAINS ALL AMERN PIP… PAA   
#> 2  93329  53382 2010-04-30 2013-12-31 69313910 P A A NATURAL GAS ST… PNG

Created on 2020-01-08 by the reprex package (v0.3.0)

I've coded this as IDG_TO_CHECK as I don't know which one should be used. I would suggest X as it refers to the parent one (I've put the permno for the parent in the permno.z column). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(89858L, 93329L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 30/33: Same firm changed permno over years.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam             ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>              <chr> 
#> 1  91120  50320 2006-02-14 2007-07-06 84651320 SPARK NETWORKS PLC LOV   
#> 2  92185  52531 2007-07-09 2017-11-02 84651P10 SPARK NETWORKS INC LOV

Created on 2020-01-08 by the reprex package (v0.3.0)

I've coded the call as Y (using permno 91120) because it was held before 2007-07-06. After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(91120L, 92185L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 31/33: Same firm changed permno over years.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam             ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>              <chr> 
#> 1  92185  52531 2007-07-09 2017-11-02 84651P10 SPARK NETWORKS INC LOV   
#> 2  17100  56145 2017-11-17 2018-12-31 84651710 SPARK NETWORKS S E LOV

Created on 2020-01-08 by the reprex package (v0.3.0)

The affected call was held after 2017-11-02, so I've coded it as Y (using the 17100 permno). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(17100L, 92185L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 32/33: A M&A case.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  90064  44969 2004-03-19 2007-08-01 14028810 CAPITAL LEASE FUNDIN… LSE   
#> 2  90064  44969 2007-08-02 2013-11-05 14028810 CAPLEASE INC          LSE   
#> 3  13003  53881 2011-09-07 2015-07-30 02917T10 AMERICAN REALTY CAPI… ARCP  
#> 4  13003  53881 2015-07-31 2018-12-31 92339V10 VEREIT INC            VER

Created on 2020-01-08 by the reprex package (v0.3.0)

I've marked the joint announcement of M&A as X (using permno 13003). After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(90064L, 13003L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Case 33/33: Hooray! This should be the last one. Seems to be a same company - but there was some overlap in the records. Google:

The company filed for Chapter 11 bankruptcy in 2012 after the SEC accused CEO Morten Arntzen and CFO Miles Itkin of falsifying financial statements.[7] In 2017, the company paid a $75,000 fine to the SEC to settle the securities fraud allegations.

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#>   permno permco namedt     nameenddt  ncusip   comnam                ticker
#>    <int>  <int> <date>     <date>     <chr>    <chr>                 <chr> 
#> 1  51457  21346 1970-11-05 1973-08-08 69036810 OVERSEAS SHIPHOLDING… OSG   
#> 2  51457  21346 1973-08-09 2012-11-13 69036810 OVERSEAS SHIPHOLDING… OSG   
#> 3  14976  55056 2014-10-09 2016-05-27 69036R40 OVERSEAS SHIPHOLDING… OSGB  
#> 4  15860  55056 2015-12-18 2016-06-12 69036R30 OVERSEAS SHIPHOLDING… OSG   
#> 5  15860  55056 2016-06-13 2016-06-27 69036R86 OVERSEAS SHIPHOLDING… OSG   
#> 6  15860  55056 2016-06-28 2018-12-31 69036R86 OVERSEAS SHIPHOLDING… OSG

Created on 2020-01-08 by the reprex package (v0.3.0)

This call was last updated on 2013-05-30 (which is not covered by the above records - should be due to the bankruptcy + fraud issues) and it was for the Q2 2012 OSG earnings. I would prefer the 51457 permno so I've marked it as Y (using 51457 permno). @iangow You might want to double check on this one as this is quite unusual.

After doing this,

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)

gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

# Edit this line to reflect the PERMNOs in the code above in this same comment
permno_list <- c(15860L, 51457L)

to_investigate %>% 
  filter(permno.x %in% permno_list,
         permno.y %in% permno_list) %>%
  select(correct, event_title)
#> # A tibble: 0 x 2
#> # … with 2 variables: correct <chr>, event_title <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

Checked that 33 cases are all done and there's nothing left!

library(googlesheets4)
library(dplyr, warn.conflicts = FALSE)
library(DBI)
library(reprex)

pg <- dbConnect(RPostgres::Postgres())

# You may need to run gs_auth() to set this up
gs <- "14F6zjJQZRsf5PonOfZ0GJrYubvx5e_eHMV_hCGe42Qg"

# You may need to edit this line
options(gargle_oauth_email = "yanzih1@student.unimelb.edu.au")
options(httr_oob_default=TRUE)

diff_permcos <- read_sheet(gs, sheet = "diff_permcos") 
#> Reading from 'streetevents.manual_permno_matches'
#> Range "'diff_permcos'"

stocknames <- tbl(pg, sql("SELECT * FROM crsp.stocknames"))

to_investigate <-
  diff_permcos %>%
  filter(correct %in% c('DN', 'NONE'))

sample <-
  to_investigate %>%
  group_by(permno.x, permno.y) %>%
  count() %>%
  ungroup() %>%
  top_n(1, wt=n) 

sample_permcos <-
  to_investigate %>%
  semi_join(sample) %>%
  select(starts_with("permco")) %>%
  distinct() %>%
  .[1, ] %>% 
  as.integer()
#> Joining, by = c("permno.x", "permno.y")
#> Joining, by = c("permno.x", "permno.y")

stocknames %>%
  filter(permco %in% sample_permcos) %>%
  select(permno, permco, namedt, nameenddt, ncusip, comnam, ticker) %>% 
  arrange(namedt) %>%
  print(n=Inf)
#> # Source:     lazy query [?? x 7]
#> # Database:   postgres [yanzih1@10.101.13.99:5432/crsp]
#> # Ordered by: namedt
#> # … with 7 variables: permno <int>, permco <int>, namedt <date>,
#> #   nameenddt <date>, ncusip <chr>, comnam <chr>, ticker <chr>

Created on 2020-01-08 by the reprex package (v0.3.0)

Yvonne-Han commented 4 years ago

@iangow I know I've posted too many things so here's a summary for you:

Todo list for you:

iangow commented 4 years ago

9/33: Went with 85625. 21/33: Went with your "Z" PERMNO. This is likely the result of a series of mergers. 23/33: Found PERMNO for Catellus (76513). 29/33: Used parent's PERMNO when both firms were involved.

iangow commented 4 years ago

@Yvonne-Han I think you've correctly handled the other three cases above (5, 11, 33). So I think this is all good.