iangow / annie_thesis

0 stars 0 forks source link

Get M&A data. #1

Open iangow opened 4 months ago

iangow commented 4 months ago
library(readxl)
library(tidyverse)

data_file <- str_c("~/Downloads/",
                   "SPGlobal_Export_6-20-2024",
                   "_a742c5d9-0985-410d-8617-307023545225.xlsx")

col_names <- c("target_name", "transacion_id", "announced_date", 
               "transaction_type", "transaction_status", "transaction_value",
               "buyers", "sellers", "target_geography", "buyer_geography", 
               "seller_geography", "deal_value",  "deal_value_reported", 
               "announcement_date", "termination_date", "target_name_alt", 
               "target_key", "target_entity_id", "target_spciq_id",
               "target_ticker",  "target_cusip")
ma_df <- read_excel(data_file, skip = 6, col_names = col_names)
ma_df
#> # A tibble: 3,881 × 21
#>    target_name                transacion_id announced_date      transaction_type
#>    <chr>                      <chr>         <dttm>              <chr>           
#>  1 0.5Ha of Prime Water loca… SPTRD1561234  2019-04-05 00:00:00 M&A - Asset     
#>  2 067 587 991 Pty Ltd        SPTRD712739   2009-03-31 00:00:00 M&A - Whole     
#>  3 1.2% NSR on Séguéla proje… SPTRD1819314  2021-03-17 00:00:00 M&A - Asset     
#>  4 10% of Karpatia Gas and 1… SPTRD447085   2011-12-23 00:00:00 M&A - Asset     
#>  5 100 Bedrooms of Camp Capa… SPTRD1636211  2019-05-14 00:00:00 M&A - Asset     
#>  6 100% in Once Life Pty, 10… SPTRD598920   2011-05-16 00:00:00 M&A - Asset     
#>  7 103km2 area                SPTRD1731295  2020-05-04 00:00:00 M&A - Asset     
#>  8 10557501 Canada Corp       SPTRD1566235  2018-11-27 00:00:00 M&A - Whole     
#>  9 11 Freehold Child Care Ce… SPTRD683513   2005-08-15 00:00:00 M&A - Asset     
#> 10 11 State Office Buildings… SPTRD619067   2010-10-11 00:00:00 M&A - Asset     
#> # ℹ 3,871 more rows
#> # ℹ 17 more variables: transaction_status <chr>, transaction_value <chr>,
#> #   buyers <chr>, sellers <chr>, target_geography <chr>, buyer_geography <chr>,
#> #   seller_geography <chr>, deal_value <chr>, deal_value_reported <chr>,
#> #   announcement_date <dttm>, termination_date <dttm>, target_name_alt <chr>,
#> #   target_key <chr>, target_entity_id <chr>, target_spciq_id <chr>,
#> #   target_ticker <chr>, target_cusip <chr>
ma_df |> count(transaction_type)
#> # A tibble: 4 × 2
#>   transaction_type              n
#>   <chr>                     <int>
#> 1 M&A - Asset                1706
#> 2 M&A - Minority              234
#> 3 M&A - Spinoff or Splitoff    35
#> 4 M&A - Whole                1906

ma_df |> 
  filter(transaction_type == "M&A - Whole") |>
  select(target_ticker)
#> # A tibble: 1,906 × 1
#>    target_ticker          
#>    <chr>                  
#>  1 067 587 991 Pty Ltd    
#>  2 Rossiter Mining Corp.  
#>  3 123 Careers Pty. Ltd.  
#>  4 123gaming Inc.         
#>  5 1344346 B.C. Ltd.      
#>  6 1-800 Bardots          
#>  7 24Nme, LLC             
#>  8 2Up Holdings Pty Ltd   
#>  9 3D Global Ltd.         
#> 10 3D Energi Limited (TDO)
#> # ℹ 1,896 more rows

ma_df |> 
  mutate(announced_date = as.Date(announced_date),
         ma = transaction_type == "M&A - Whole") |>
  ggplot(aes(x = announced_date, fill = ma)) +
  geom_histogram() +
  scale_x_date(breaks = "1 year", date_labels = "%Y") + 
  theme(axis.text.x = element_text(angle = 90))
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Created on 2024-06-22 with reprex v2.1.0

iangow commented 3 months ago

Perhaps you need to clean the data up to extract IDs. I'm not sure how to link these IDs to another database (e.g., one with prices).

library(readxl)
library(tidyverse)

data_file <- str_c("~/Downloads/",
                   "SPGlobal_Export_6-20-2024",
                   "_a742c5d9-0985-410d-8617-307023545225.xlsx")

col_names <- c("target_name", "transacion_id", "announced_date", 
               "transaction_type", "transaction_status", "transaction_value",
               "buyers", "sellers", "target_geography", "buyer_geography", 
               "seller_geography", "deal_value",  "deal_value_reported", 
               "announcement_date", "termination_date", "target_name_alt", 
               "target_key", "target_entity_id", "target_spciq_id",
               "target_ticker",  "target_cusip")
ma_df <- read_excel(data_file, skip = 6, col_names = col_names)
ma_df |>
  select(target_entity_id) 
#> # A tibble: 3,881 × 1
#>    target_entity_id                                                             
#>    <chr>                                                                        
#>  1 <NA>                                                                         
#>  2 067 587 991 Pty Ltd (5115720)                                                
#>  3 <NA>                                                                         
#>  4 Karpatia Gas s.r.o. (7600768); UNICA Exploration s.r.o. (7600770)            
#>  5 <NA>                                                                         
#>  6 Infochoice Ltd. (5045973); Bidmy Pty Ltd (5220737); Once Life Pty Limited (7…
#>  7 <NA>                                                                         
#>  8 Rossiter Mining Corp. (10421212)                                             
#>  9 <NA>                                                                         
#> 10 <NA>                                                                         
#> # ℹ 3,871 more rows

ma_df |>
  select(transacion_id, target_spciq_id) |>
  separate_longer_delim(target_spciq_id, ';') |>
  separate_wider_regex(target_spciq_id,
                       c(target_name = "^[^(]+", "\\(", 
                         target_spciq_id = ".*", "\\)$")) |>
  mutate(target_name = str_trim(target_name)) |>
  filter(!is.na(target_name))
#> # A tibble: 2,418 × 3
#>    transacion_id target_name              target_spciq_id
#>    <chr>         <chr>                    <chr>          
#>  1 SPTRD712739   067 587 991 Pty Ltd      IQ30518756     
#>  2 SPTRD447085   Karpatia Gas s.r.o.      IQ145130204    
#>  3 SPTRD447085   UNICA Exploration s.r.o. IQ145130265    
#>  4 SPTRD598920   Infochoice Ltd.          IQ4493603      
#>  5 SPTRD598920   Bidmy Pty Ltd            IQ132863239    
#>  6 SPTRD598920   Once Life Pty Limited    IQ60228659     
#>  7 SPTRD1566235  Rossiter Mining Corp.    IQ557631878    
#>  8 SPTRD1137523  123 Careers Pty. Ltd.    IQ49253923     
#>  9 SPTRD1695976  123gaming Inc.           IQ421074463    
#> 10 SPTRD1973134  1344346 B.C. Ltd.        IQ1773950522   
#> # ℹ 2,408 more rows

Created on 2024-06-26 with reprex v2.1.0

iangow commented 3 months ago

More code. Perhaps easiest to make separate tables then join up later if need be.

library(readxl)
library(tidyverse)

data_file <- str_c("~/Downloads/",
                   "SPGlobal_Export_6-20-2024",
                   "_a742c5d9-0985-410d-8617-307023545225.xlsx")

col_names <- c("target_name", "transacion_id", "announced_date", 
               "transaction_type", "transaction_status", "transaction_value",
               "buyers", "sellers", "target_geography", "buyer_geography", 
               "seller_geography", "deal_value",  "deal_value_reported", 
               "announcement_date", "termination_date", "target_name_alt", 
               "target_key", "target_entity_id", "target_spciq_id",
               "target_ticker",  "target_cusip")
ma_df <- read_excel(data_file, skip = 6, col_names = col_names)
ma_df |>
  select(target_entity_id) 
#> # A tibble: 3,881 × 1
#>    target_entity_id                                                             
#>    <chr>                                                                        
#>  1 <NA>                                                                         
#>  2 067 587 991 Pty Ltd (5115720)                                                
#>  3 <NA>                                                                         
#>  4 Karpatia Gas s.r.o. (7600768); UNICA Exploration s.r.o. (7600770)            
#>  5 <NA>                                                                         
#>  6 Infochoice Ltd. (5045973); Bidmy Pty Ltd (5220737); Once Life Pty Limited (7…
#>  7 <NA>                                                                         
#>  8 Rossiter Mining Corp. (10421212)                                             
#>  9 <NA>                                                                         
#> 10 <NA>                                                                         
#> # ℹ 3,871 more rows

target_spciq_ids <-
  ma_df |>
  select(transacion_id, target_spciq_id) |>
  separate_longer_delim(target_spciq_id, ';') |>
  separate_wider_regex(target_spciq_id,
                       c(target_name = "^[^(]+", "\\(", 
                         target_spciq_id = ".*", "\\)$")) |>
  mutate(target_name = str_trim(target_name)) |>
  filter(!is.na(target_name))

target_cusips <-
  ma_df |>
  select(transacion_id, target_cusip) |>
  separate_longer_delim(target_cusip, ';') |>
  separate_wider_regex(target_cusip,
                       c(target_name = "^[^(]+", "\\(", 
                         target_cusip = ".*?", "\\)$"),
                       too_few = "align_start") |>
  mutate(target_name = str_trim(target_name)) |>
  filter(!is.na(target_cusip), target_cusip != "")
target_cusips
#> # A tibble: 1,036 × 3
#>    transacion_id target_name                    target_cusip
#>    <chr>         <chr>                          <chr>       
#>  1 SPTRD598920   Infochoice Ltd.                Q4933U109   
#>  2 SPTRD1671620  3D Global Ltd.                 Q90344104   
#>  3 SPTRD650345   3D Energi Limited              Q9034H113   
#>  4 SPTRD1760711  3P Learning Limited            Q9034Z105   
#>  5 SPTRD296253   4DS Memory Limited             Q3946R103   
#>  6 SPTRD437754   Pursuit Minerals Limited       Q7778J105   
#>  7 SPTRD393679   A.C.N. 079 010 772 Pty Ltd     Q91940116   
#>  8 SPTRD535638   The a2 Milk Company Limited    Q2774Q104   
#>  9 SPTRD470714   Applabs Technologies Limited   Q0261S138   
#> 10 SPTRD1647227  Abano Healthcare Group Limited Q0010H102   
#> # ℹ 1,026 more rows

target_tickers <-
  ma_df |>
  select(transacion_id, target_ticker) |>
  separate_longer_delim(target_ticker, ';') |>
  separate_wider_regex(target_ticker,
                       c(target_name = "^[^(]+", "\\(", 
                         target_ticker = ".*?", "\\)$"),
                       too_few = "align_start") |>
  mutate(target_name = str_trim(target_name)) |>
  filter(!is.na(target_ticker), target_ticker != "")
target_tickers
#> # A tibble: 1,049 × 3
#>    transacion_id target_name                    target_ticker
#>    <chr>         <chr>                          <chr>        
#>  1 SPTRD598920   Infochoice Ltd.                ICH          
#>  2 SPTRD650345   3D Energi Limited              TDO          
#>  3 SPTRD1760711  3P Learning Limited            3PL          
#>  4 SPTRD296253   4DS Memory Limited             4DS          
#>  5 SPTRD437754   Pursuit Minerals Limited       PUR          
#>  6 SPTRD393679   A.C.N. 079 010 772 Pty Ltd     TVL          
#>  7 SPTRD535638   The a2 Milk Company Limited    ATM          
#>  8 SPTRD470714   Applabs Technologies Limited   ALA          
#>  9 SPTRD1647227  Abano Healthcare Group Limited ABA          
#> 10 SPTRD398358   Abano Healthcare Group Limited ABA          
#> # ℹ 1,039 more rows

Created on 2024-06-26 with reprex v2.1.0

iangow commented 3 months ago

Looking across the "global" tables on Compustat, I don't see any identifiers shared with the data file you sent. Below I picked on a2 Milk, as it was one name I recognised on the list.

library(readxl)
library(tidyverse)
library(farr)
#> 
#> Attaching package: 'farr'
#> The following object is masked from 'package:base':
#> 
#>     truncate
library(DBI)

db <- dbConnect(duckdb::duckdb())

g_secd <- load_parquet(db, schema = "comp", table = "g_secd")
g_names <- load_parquet(db, schema = "comp", table = "g_names")
g_company <- load_parquet(db, schema = "comp", table = "g_company")
g_security <- load_parquet(db, schema = "comp", table = "g_security")

g_secd |> filter(gvkey == "275289")  |> select(exchg:tpci) |> distinct()
#> # Source:   SQL [2 x 5]
#> # Database: DuckDB v1.0.0 [root@Darwin 23.6.0:R 4.4.0/:memory:]
#>   exchg isin         secstat sedol   tpci 
#>   <int> <chr>        <chr>   <chr>   <chr>
#> 1   225 NZATME0002S8 A       6287250 0    
#> 2   106 <NA>         A       BWSRTS7 0
g_names |> filter(gvkey == "275289")
#> # Source:   SQL [1 x 12]
#> # Database: DuckDB v1.0.0 [root@Darwin 23.6.0:R 4.4.0/:memory:]
#>   gvkey  conm     sedol isin  fic   costat sic   naics gsubind gind  year1 year2
#>   <chr>  <chr>    <chr> <chr> <chr> <chr>  <chr> <chr> <chr>   <chr> <dbl> <dbl>
#> 1 275289 THE A2 … 6287… NZAT… NZL   A      2020  3115… 302020… 3020…  2000  2023
g_company |> filter(gvkey == "275289")
#> # Source:   SQL [1 x 39]
#> # Database: DuckDB v1.0.0 [root@Darwin 23.6.0:R 4.4.0/:memory:]
#>   conm     gvkey add1  add2  add3  add4  addzip busdesc cik   city  conml costat
#>   <chr>    <chr> <chr> <chr> <chr> <chr> <chr>  <chr>   <chr> <chr> <chr> <chr> 
#> 1 THE A2 … 2752… 51 S… <NA>  <NA>  <NA>  1010   The a2… 0001… Auck… The … A     
#> # ℹ 27 more variables: county <chr>, dlrsn <chr>, ein <chr>, fax <chr>,
#> #   fic <chr>, fyrc <int>, ggroup <chr>, gind <chr>, gsector <chr>,
#> #   gsubind <chr>, idbflag <chr>, incorp <chr>, loc <chr>, naics <chr>,
#> #   phone <chr>, prican <chr>, prirow <chr>, priusa <chr>, sic <chr>,
#> #   spcindcd <int>, spcseccd <int>, spcsrc <chr>, state <chr>, stko <int>,
#> #   weburl <chr>, dldte <date>, ipodate <date>
g_security |> filter(gvkey == "275289")
#> # Source:   SQL [3 x 15]
#> # Database: DuckDB v1.0.0 [root@Darwin 23.6.0:R 4.4.0/:memory:]
#>   tic   gvkey  iid   cusip dlrsni dsci   epf   exchg excntry ibtic isin  secstat
#>   <chr> <chr>  <chr> <chr> <chr>  <chr>  <chr> <int> <chr>   <chr> <chr> <chr>  
#> 1 <NA>  275289 01W   <NA>  <NA>   ORD N… Y       225 NZL     @AAIX NZAT… A      
#> 2 <NA>  275289 02W   <NA>  <NA>   PARTL… Y         1 NZL     <NA>  <NA>  A      
#> 3 <NA>  275289 03W   <NA>  <NA>   ORD N… N       106 AUS     @00YX <NA>  A      
#> # ℹ 3 more variables: sedol <chr>, tpci <chr>, dldtei <date>

data_file <- str_c("~/Downloads/",
                   "SPGlobal_Export_6-20-2024",
                   "_a742c5d9-0985-410d-8617-307023545225.xlsx")

col_names <- c("target_name", "transacion_id", "announced_date", 
               "transaction_type", "transaction_status", "transaction_value",
               "buyers", "sellers", "target_geography", "buyer_geography", 
               "seller_geography", "deal_value",  "deal_value_reported", 
               "announcement_date", "termination_date", "target_name_alt", 
               "target_key", "target_entity_id", "target_spciq_id",
               "target_ticker",  "target_cusip")
ma_df <- read_excel(data_file, skip = 6, col_names = col_names)
ma_df |>
  select(target_entity_id) 
#> # A tibble: 3,881 × 1
#>    target_entity_id                                                             
#>    <chr>                                                                        
#>  1 <NA>                                                                         
#>  2 067 587 991 Pty Ltd (5115720)                                                
#>  3 <NA>                                                                         
#>  4 Karpatia Gas s.r.o. (7600768); UNICA Exploration s.r.o. (7600770)            
#>  5 <NA>                                                                         
#>  6 Infochoice Ltd. (5045973); Bidmy Pty Ltd (5220737); Once Life Pty Limited (7…
#>  7 <NA>                                                                         
#>  8 Rossiter Mining Corp. (10421212)                                             
#>  9 <NA>                                                                         
#> 10 <NA>                                                                         
#> # ℹ 3,871 more rows

target_keys <-
  ma_df |>
  select(transacion_id, target_key) |>
  separate_longer_delim(target_key, ';') |>
  separate_wider_regex(target_key,
                       c(target_name = "^[^(]+", "\\(", 
                         target_key = ".*?", "\\)$"),
                       too_few = "align_start") |>
  mutate(target_name = str_trim(target_name)) |>
  filter(!is.na(target_key), target_key != "")

target_keys |>
  filter(str_detect(target_name, "a2 Milk"))
#> # A tibble: 2 × 3
#>   transacion_id target_name                 target_key
#>   <chr>         <chr>                       <chr>     
#> 1 SPTRD535638   The a2 Milk Company Limited 4912768   
#> 2 SPTRD787585   The a2 Milk Company Limited 4912768

target_entity_ids <-
  ma_df |>
  select(transacion_id, target_entity_id) |>
  separate_longer_delim(target_entity_id, ';') |>
  separate_wider_regex(target_entity_id,
                       c(target_name = "^[^(]+", "\\(", 
                         target_entity_id = ".*?", "\\)$"),
                       too_few = "align_start") |>
  mutate(target_name = str_trim(target_name)) |>
  filter(!is.na(target_entity_id), target_entity_id != "")
target_entity_ids |>
  filter(str_detect(target_name, "a2 Milk"))
#> # A tibble: 2 × 3
#>   transacion_id target_name                 target_entity_id
#>   <chr>         <chr>                       <chr>           
#> 1 SPTRD535638   The a2 Milk Company Limited 4912768         
#> 2 SPTRD787585   The a2 Milk Company Limited 4912768

target_spciq_ids <-
  ma_df |>
  select(transacion_id, target_spciq_id) |>
  separate_longer_delim(target_spciq_id, ';') |>
  separate_wider_regex(target_spciq_id,
                       c(target_name = "^[^(]+", "\\(", 
                         target_spciq_id = ".*", "\\)$")) |>
  mutate(target_name = str_trim(target_name)) |>
  filter(!is.na(target_name))

target_cusips <-
  ma_df |>
  select(transacion_id, target_cusip) |>
  separate_longer_delim(target_cusip, ';') |>
  separate_wider_regex(target_cusip,
                       c(target_name = "^[^(]+", "\\(", 
                         target_cusip = ".*?", "\\)$"),
                       too_few = "align_start") |>
  mutate(target_name = str_trim(target_name)) |>
  filter(!is.na(target_cusip), target_cusip != "")

target_cusips |>
  filter(str_detect(target_name, "a2 Milk"))
#> # A tibble: 2 × 3
#>   transacion_id target_name                 target_cusip
#>   <chr>         <chr>                       <chr>       
#> 1 SPTRD535638   The a2 Milk Company Limited Q2774Q104   
#> 2 SPTRD787585   The a2 Milk Company Limited Q2774Q104

target_tickers <-
  ma_df |>
  select(transacion_id, target_ticker) |>
  separate_longer_delim(target_ticker, ';') |>
  separate_wider_regex(target_ticker,
                       c(target_name = "^[^(]+", "\\(", 
                         target_ticker = ".*?", "\\)$"),
                       too_few = "align_start") |>
  mutate(target_name = str_trim(target_name)) |>
  filter(!is.na(target_ticker), target_ticker != "")

target_tickers |>
  filter(str_detect(target_name, "a2 Milk"))
#> # A tibble: 2 × 3
#>   transacion_id target_name                 target_ticker
#>   <chr>         <chr>                       <chr>        
#> 1 SPTRD535638   The a2 Milk Company Limited ATM          
#> 2 SPTRD787585   The a2 Milk Company Limited ATM

Created on 2024-06-26 with reprex v2.1.0

iangow commented 3 months ago

You can probably get to returns from Compustat using tickers than ISINs. I put a mapping from current ASX tickers to ISIN in the far data directory. Below is some code using these data. I could explain the process to use in the meeting.

I would do ticker matches, then name matches for the rest, then approximate name matches, then perhaps some manual searches to fill out the balance. Each step would involve manual checks to confirm matches. I would recommend using Google Sheets to accumulate the data for reasons I can explain.

library(readxl)
library(tidyverse)
library(farr, warn.conflicts = FALSE)
library(DBI)
library(readxl)

db <- dbConnect(duckdb::duckdb())

isins <- load_parquet(db, schema = "asx", table = "isins")
g_secd <- load_parquet(db, schema = "comp", table = "g_secd")
g_names <- load_parquet(db, schema = "comp", table = "g_names")
g_company <- load_parquet(db, schema = "comp", table = "g_company")
g_security <- load_parquet(db, schema = "comp", table = "g_security")

data_file <- str_c("~/Downloads/",
                   "SPGlobal_Export_6-20-2024",
                   "_a742c5d9-0985-410d-8617-307023545225.xlsx")

col_names <- c("target_name", "transaction_id", "announced_date", 
               "transaction_type", "transaction_status", "transaction_value",
               "buyers", "sellers", "target_geography", "buyer_geography", 
               "seller_geography", "deal_value",  "deal_value_reported", 
               "announcement_date", "termination_date", "target_name_alt", 
               "target_key", "target_entity_id", "target_spciq_id",
               "target_ticker",  "target_cusip")
ma_df <- read_excel(data_file, skip = 6, col_names = col_names) 

target_tickers <-
  ma_df |>
  select(transaction_id, target_ticker) |>
  separate_longer_delim(target_ticker, ';') |>
  separate_wider_regex(target_ticker,
                       c(target_name = "^[^(]+", "\\(", 
                         target_ticker = ".*?", "\\)$"),
                       too_few = "align_start") |>
  mutate(target_name = str_trim(target_name)) |>
  filter(!is.na(target_ticker), target_ticker != "") |>
  copy_to(db, df = _, name = "target_tickers", overwrite = TRUE)

candidate_matches_ticker <-
  target_tickers |>
  inner_join(isins, join_by(target_ticker == asx_code)) |>
  collect() |>
  write_csv("~/Google Drive/My Drive/data/candidate_matches_ticker.csv")

candidate_matches_ticker
#> # A tibble: 448 × 6
#>    transaction_id target_name target_ticker company_name security_type isin_code
#>    <chr>          <chr>       <chr>         <chr>        <chr>         <chr>    
#>  1 SPTRD650345    3D Energi … TDO           3D ENERGI L… ORDINARY FUL… AU000000…
#>  2 SPTRD1760711   3P Learnin… 3PL           3P LEARNING… ORDINARY FUL… AU000000…
#>  3 SPTRD296253    4DS Memory… 4DS           4DS MEMORY … ORDINARY FUL… AU000000…
#>  4 SPTRD171741    Acrow Limi… ACF           ACROW LIMIT… ORDINARY FUL… AU000000…
#>  5 SPTRD696006    Acuvax Lim… ACU           ACUMENTIS G… ORDINARY FUL… AU000007…
#>  6 SPTRD311100    Ascension … ACE           ACUSENSUS L… ORDINARY FUL… AU000025…
#>  7 SPTRD952863    Adbri Limi… ABC           ADBRI LIMIT… ORDINARY FUL… AU000000…
#>  8 SPTRD1877350   Adherium L… ADR           ADHERIUM LTD ORDINARY FUL… AU000000…
#>  9 SPTRD767509    Admiralty … ADY           ADMIRALTY R… ORDINARY FUL… AU000000…
#> 10 SPTRD2055280   Adore Beau… ABY           ADORE BEAUTY ORDINARY FUL… AU000010…
#> # ℹ 438 more rows
candidate_matches_ticker |> count()
#> # A tibble: 1 × 1
#>       n
#>   <int>
#> 1   448

candidate_matches_name <-
  target_tickers |>
  mutate(target_name = str_to_upper(target_name)) |>
  inner_join(isins, join_by(target_name == company_name)) |>
  collect() |>
  write_csv("~/Google Drive/My Drive/data/candidate_matches_name.csv")

candidate_matches_name
#> # A tibble: 299 × 6
#>    transaction_id target_name     target_ticker asx_code security_type isin_code
#>    <chr>          <chr>           <chr>         <chr>    <chr>         <chr>    
#>  1 SPTRD296253    4DS MEMORY LIM… 4DS           4DS      ORDINARY FUL… AU000000…
#>  2 SPTRD296253    4DS MEMORY LIM… 4DS           4DSAAA   OPTION EXPIR… AU000032…
#>  3 SPTRD296253    4DS MEMORY LIM… 4DS           4DSAAB   OPTION EXPIR… AU000032…
#>  4 SPTRD296253    4DS MEMORY LIM… 4DS           4DSAW    OPTION EXPIR… AU000022…
#>  5 SPTRD296253    4DS MEMORY LIM… 4DS           4DSAX    OPTION EXPIR… AU000026…
#>  6 SPTRD296253    4DS MEMORY LIM… 4DS           4DSAY    OPTION EXPIR… AU000026…
#>  7 SPTRD296253    4DS MEMORY LIM… 4DS           4DSAZ    OPTION EXPIR… AU000027…
#>  8 SPTRD171741    ACROW LIMITED   ACF           ACF      ORDINARY FUL… AU000000…
#>  9 SPTRD171741    ACROW LIMITED   ACF           ACFXX    SHORTFALL PL… AU000033…
#> 10 SPTRD171741    ACROW LIMITED   ACF           ACFAD    OPTION EXPIR… AU000000…
#> # ℹ 289 more rows
candidate_matches_name |> count()
#> # A tibble: 1 × 1
#>       n
#>   <int>
#> 1   299

Created on 2024-06-27 with reprex v2.1.0

iangow commented 3 months ago

I recently did something similar to this project, but linking IPOs (in place of M&A deals) with Compustat.

Here is some code related to that. I can explain what I did and you could use it as a "template" of sorts (you will have different issues) to work from.

atjianguom commented 3 months ago

Thanks Ian for the guidance. I note you mentioned to match based on ISINs but I got a bit confused when trying to link databases. Hence, I've used an alternative approach, with the code below.

library(tidyverse)
library(DBI)
library(farr)
library(stringr)
library(writexl)

Sys.setenv(DATA_DIR = "C:/Users/Annie/Desktop/far")

db <- dbConnect(duckdb::duckdb())

isins <- load_parquet(db, schema = "asx", table = "isins")
wrds_ciqsymbol <- load_parquet(db, schema = "ciq", table = "wrds_ciqsymbol")
wrds_gvkey <- load_parquet(db, schema = "ciq", table = "wrds_gvkey")
g_secd <- load_parquet(db, schema = "comp", table = "g_secd")
g_names <- load_parquet(db, schema = "comp", table = "g_names")
g_company <- load_parquet(db, schema = "comp", table = "g_company")
g_security <- load_parquet(db, schema = "comp", table = "g_security")

data_file <-str_c("C:/Users/Annie/Documents/far/0 - THESIS",
                  "/240623 CapIQ Export.xlsm")

col_names <- c("target_name", "mi_transaction_id", "target_ticker", "announced_name", "transaction_type", "transaction_status", "transaction_value_audm", "buyer", "seller", "target_geography", "buyer_geography", "seller_geopgrahy", "snl_deal_key", "snl_offering_key", "ciq_transaction_id", "target_id", "transaction_geography", "transaction_industry_ciq_gics", "transaction_industry_mi", "announced_date", "definitive_agreement_date", "expected_completion_date_start", "expected_completion_date_end", "termination_date", "pe_involvement", "deal_summary", "transaction_value_audm1", "sponsor_backed", "consideration_description", "buyer_id", "buyer_ticker", "actual_acquirer_name", "actual_acquirer_ticker", "actual_acquirer_country", "target_country", "deal_value_audm", "minority_interest", "deal_conditions", "deal_attitude", "deal_approach", "m&a_announcement_date", "m&a_termination_date", "target_name1", "target_mi_key", "target_entity_id", "target_spciq_id", "target_cusip", "target_company_industry")

ma_df <- read_excel(data_file, skip = 6, col_names = col_names)
# Testing whether target_spciq_id is companyid using A2 Milk
wrds_gvkey |>
  filter(companyname == "The a2 Milk Company Limited") |>
  select(companyid, companyname)

target_spciq_id_table |>
  filter(target_name == "The a2 Milk Company Limited") |>
  select(target_spciq_id, target_name)
# Linking Compustat and Capital IQ using wrds_gvkey linking table
target_spciq_id_table <-
  ma_df|>
  select(mi_transaction_id, target_spciq_id, target_ticker) |>
  separate_longer_delim(target_spciq_id, ';') |>
  separate_wider_regex(target_spciq_id,
                       c(target_name = "^[^(]+", "\\(", 
                         target_spciq_id = ".*", "\\)$")) |>
  mutate(target_name = str_trim(target_name), 
         target_spciq_id = str_remove(target_spciq_id, "^IQ")) |>
  filter(!is.na(target_name))

wrds_gvkey <-
  wrds_gvkey |>
  mutate(companyid = as.character(companyid))

ma_df_linked <-
  target_spciq_id_table |>
  inner_join(wrds_gvkey, by = c("target_spciq_id" = "companyid"), copy = TRUE) |>
  select(mi_transaction_id, target_name, target_spciq_id, gvkey, target_ticker) |>
  separate_longer_delim(target_ticker, ';') |>
  separate_wider_regex(target_ticker,
                       c(target_name1 = "^[^(]+", "\\(", 
                         target_ticker = ".*?", "\\)$"),
                       too_few = "align_start") |>
  filter(!is.na(target_ticker) & target_ticker != "") |>
  select(mi_transaction_id, target_name, target_spciq_id, gvkey, target_ticker)

g_security <-
  g_security |>
  mutate(gvkey = as.character(gvkey))

final_linked <-
  ma_df_linked |>
  inner_join(g_security, by = "gvkey", copy = TRUE) |>
  select(mi_transaction_id, target_name, target_spciq_id, gvkey, target_ticker, isin) |>
  filter(!is.na(isin) & isin != "") 

final_linked 

write_xlsx(final_linked, "C:/Users/Annie/Documents/far/0 - THESIS/final_linked.xlsx")