Nonprofit-Open-Data-Collective / npcompete

R package to generate metrics describing competition in nonprofit markets.
1 stars 0 forks source link

metro and subsectors as factors #6

Open lecy opened 2 years ago

lecy commented 2 years ago

There are issues with metric availability for all geography-subsector pairs across time.

1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 
2772 2863 2877 2894 2930 2964 2967 2927 2888 2942 2821 2922 2918 2968 2882 2875 2892 2934 2969 3703 2967 2939 2939 2943 
2014 2015 2016 2017 2018 2019 
2983 2995 2419 1860  587   52

We should have 4,428 observations each year.

length( unique( dat.allyears$geo ))
[1] 369
length( unique( dat.allyears$subsector ))
[1] 12
369*12
[1] 4428

The sample code for HHI includes a line to ensure geo and subsector are factors. Perhaps coverage varies by year, though, in which case we should include a global GEO.LEVELS with all metro areas and SUBSECTOR.LEVELS as well. We can add these to utils.R so they are available to all.

  dat.hhi <-
    df %>%
    dplyr::mutate( geo=factor(geo),
                   subsector=factor(subsector),
                   resource = as.numeric(resource),
                   resource=bottomcode(resource) ) %>%
    dplyr::group_by( geo, subsector ) %>%
    dplyr::summarize( hhi= sum( ( resource / sum(resource))^2 ),
                      n=dplyr::n(),
                      {{resource.name}} := sum(resource) )

To use the consistent set of levels we would add a levels argument to factor().

geo=factor( geo, levels=get_geo_levels() ),
subsector=factor( subsector, levels=get_subsector_levels ),
lecy commented 2 years ago

I added these to utils.R:

# @export
get_geo_levels <- function()
{
  x <-   
  structure(c(4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 1L, 14L, 
  15L, 16L, 17L, 2L, 18L, 19L, 20L, 21L, 22L, 3L, 23L, 24L, 42L, 
  43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 
  56L, 25L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 
  26L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 27L, 76L, 77L, 78L, 
  79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 
  28L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 
  103L, 104L, 105L, 106L, 29L, 107L, 108L, 109L, 110L, 111L, 112L, 
  113L, 114L, 115L, 116L, 117L, 118L, 119L, 120L, 121L, 122L, 123L, 
  124L, 125L, 30L, 126L, 127L, 128L, 31L, 129L, 130L, 131L, 132L, 
  133L, 134L, 32L, 135L, 136L, 137L, 138L, 139L, 140L, 141L, 142L, 
  143L, 144L, 145L, 146L, 147L, 148L, 149L, 33L, 150L, 151L, 152L, 
  153L, 154L, 34L, 155L, 156L, 157L, 158L, 159L, 160L, 161L, 162L, 
  163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L, 171L, 172L, 173L, 
  174L, 175L, 35L, 176L, 177L, 178L, 179L, 180L, 181L, 182L, 183L, 
  184L, 185L, 186L, 187L, 188L, 189L, 190L, 191L, 192L, 193L, 36L, 
  194L, 195L, 196L, 197L, 37L, 198L, 199L, 200L, 38L, 201L, 202L, 
  39L, 203L, 204L, 205L, 206L, 40L, 207L, 208L, 41L, 215L, 210L, 
  216L, 217L, 218L, 219L, 220L, 211L, 221L, 222L, 223L, 224L, 225L, 
  226L, 227L, 228L, 229L, 230L, 212L, 231L, 232L, 233L, 234L, 235L, 
  236L, 237L, 238L, 239L, 240L, 241L, 242L, 243L, 244L, 245L, 209L, 
  246L, 247L, 213L, 248L, 249L, 250L, 251L, 252L, 253L, 254L, 255L, 
  256L, 257L, 258L, 259L, 260L, 214L, 261L, 262L, 263L, 264L, 265L, 
  266L, 267L, 268L, 269L, 270L, 271L, 272L, 273L, 274L, 275L, 277L, 
  278L, 279L, 280L, 281L, 282L, 283L, 284L, 285L, 286L, 287L, 288L, 
  276L, 289L, 290L, 300L, 301L, 302L, 303L, 304L, 305L, 306L, 307L, 
  308L, 309L, 310L, 311L, 312L, 313L, 314L, 315L, 316L, 317L, 318L, 
  319L, 320L, 321L, 322L, 323L, 324L, 325L, 326L, 293L, 327L, 294L, 
  328L, 329L, 330L, 331L, 332L, 333L, 334L, 335L, 336L, 337L, 338L, 
  339L, 295L, 340L, 341L, 296L, 342L, 343L, 291L, 344L, 345L, 346L, 
  347L, 348L, 349L, 350L, 351L, 297L, 298L, 352L, 353L, 354L, 355L, 
  356L, 292L, 357L, 358L, 359L, 360L, 361L, 362L, 299L, 363L, 364L, 
  365L, 366L, 367L, 368L, 369L), .Label = c("520", "680", "760", 
  "1123", "1280", "1602", "1642", "1960", "2082", "2162", "3120", 
  "4472", "5120", "5602", "5960", "6280", "6442", "7040", "7320", 
  "7362", "7460", "7480", "7602", "8872", "160", "200", "240", 
  "320", "380", "460", "480", "500", "600", "640", "733", "840", 
  "860", "870", "880", "920", "960", "1000", "1020", "1040", "1080", 
  "1240", "1260", "1303", "1320", "1360", "1400", "1440", "1480", 
  "1520", "1540", "1560", "1660", "1692", "1720", "1740", "1760", 
  "1800", "1840", "1880", "1900", "1922", "1950", "2000", "2020", 
  "2040", "2120", "2180", "2290", "2320", "2335", "2400", "2520", 
  "2560", "2620", "2655", "2670", "2700", "2710", "2750", "2840", 
  "2975", "2985", "2995", "3000", "3040", "3160", "3240", "3283", 
  "3290", "3320", "3362", "3480", "3520", "3560", "3580", "3600", 
  "3605", "3660", "3710", "3720", "3760", "3810", "3840", "3850", 
  "3880", "3920", "3960", "3980", "4000", "4040", "4100", "4120", 
  "4150", "4200", "4280", "4320", "4360", "4400", "4420", "4520", 
  "4640", "4680", "4720", "4880", "4890", "4900", "4920", "4940", 
  "4992", "5082", "5170", "5240", "5280", "5330", "5345", "5360", 
  "5483", "5523", "5560", "5720", "5790", "5880", "5920", "5990", 
  "6080", "6120", "6162", "6200", "6323", "6403", "6483", "6560", 
  "6580", "6640", "6680", "6690", "6720", "6740", "6760", "6800", 
  "6840", "6880", "6895", "6922", "6960", "7000", "7120", "7160", 
  "7200", "7240", "7490", "7510", "7520", "7560", "7640", "7680", 
  "7800", "7840", "7880", "7920", "8003", "8050", "8080", "8120", 
  "8160", "8240", "8280", "8320", "8400", "8440", "8520", "8560", 
  "8600", "8640", "8680", "8750", "8780", "8800", "8960", "9040", 
  "9080", "9280", "9320", "40", "120", "220", "280", "450", "743", 
  "1010", "1350", "1580", "1620", "2030", "2190", "2200", "2240", 
  "2330", "2340", "2360", "2440", "2580", "2650", "2720", "2760", 
  "2880", "2900", "2980", "3080", "3150", "3285", "3350", "3400", 
  "3440", "3500", "3610", "3620", "3680", "3700", "3870", "4080", 
  "4243", "4600", "4800", "5160", "5200", "5800", "6015", "6020", 
  "6240", "6340", "6520", "6660", "6820", "6980", "7610", "7620", 
  "7720", "7760", "8140", "8360", "8920", "8940", "9000", "9140", 
  "9200", "9260", "9340", "9360", "0", "720", "1120", "1600", "1920", 
  "2160", "3280", "3360", "4480", "5600", "5640", "6160", "6440", 
  "6920", "7360", "8840", "60", "80", "440", "470", "560", "580", 
  "730", "740", "875", "1125", "1145", "1150", "1160", "1200", 
  "1305", "1310", "1640", "1680", "1890", "1930", "2080", "2281", 
  "2600", "2640", "2680", "2800", "2920", "2960", "3060", "3180", 
  "3200", "3640", "3740", "3800", "4160", "4240", "4560", "4760", 
  "4840", "5000", "5015", "5080", "5140", "5190", "5350", "5380", 
  "5400", "5480", "5520", "5660", "5775", "5910", "5945", "6320", 
  "6360", "6400", "6450", "6480", "6600", "6780", "7080", "7400", 
  "7440", "7485", "7500", "7600", "8000", "8040", "8200", "8480", 
  "8720", "8735", "8760", "8880", "9160", "9240", "9270", "1122", 
  "7442"), class = "factor")

  return(x)
}  

# @export
get_subsector_levels <- function()
{
  x <- 
  structure(c(2L, 7L, 10L, 9L, 11L, 1L, 4L, 6L, 8L, 3L, 5L, 12L
  ), .Label = c("Arts", "Education", "Environmental", "Health", 
  "Hospitals", "Human Services", "International", "Mutual Benefit", 
  "Public Benefit", "Religion", "Universities", "Unknown", ""), class = "factor")

  return(x)
}