lhenneman / hyspdisp

4 stars 4 forks source link

PP.units.monthly1995_2017 how was this created? #53

Open schoolAccountMajaG opened 4 years ago

schoolAccountMajaG commented 4 years ago

PP.units.monthly1995_2017 is part of the package. How was this dataset created?

schoolAccountMajaG commented 4 years ago

@lhenneman https://github.com/lhenneman/hyspdisp/blob/master/data/PP.units.monthly1995_2017.RData

lhenneman commented 4 years ago

It's data downloaded from the EPA AMPD database. Unfortunately, it can't be downloaded with a script, but we do have bookmarks for the data queries (let me check on them quickly to make sure they still work).

Here's code that creates RData file (it can definitely be streamlined)

##----- Data sources
# All 2010-2015
#   http://ampd.epa.gov/ampd/#?bookmark=12264
# All 2000-2009
#   http://ampd.epa.gov/ampd/#?bookmark=12265  
# All 1980-1999
#  http://ampd.epa.gov/ampd/#?bookmark=12266

##----- Load R packages

library(data.table)
library(maps)
library(stringr)

##----- Read and aggregate AMPD data

variables <- make.names(c(
  "State",
  "Facility Name",
  "Facility ID (ORISPL)",
  "Unit ID",
  "Associated Stacks",
  "Month",
  "Year",
  "Program(s)",
  "SO2 (tons)",
  "Avg. NOx Rate (lb/MMBtu)",
  "NOx (tons)",
  "CO2 (short tons)",
  "Heat Input (MMBtu)",
  "Operating Time",
  "Gross Load (MW-h)",
  "Steam Load (1000lb)",
  "EPA Region",
  "NERC Region",
  "County",
  "Source Category",
  "Facility Latitude",
  "Facility Longitude",
  "Owner",
  "Operator",
  "Representative (Primary)",
  "Representative (Secondary)",
  "SO2 Phase",
  "NOx Phase",
  "Operating Status",
  "Unit Type",
  "Fuel Type (Primary)",
  "Fuel Type (Secondary)",
  "SO2 Control(s)",
  "NOx Control(s)",
  "PM Control(s)",
  "Hg Control(s)",
  "NA"
))

# Files obtained from: http://ampd.epa.gov/ampd/
#
#   2016-2018 ## LRFH added 2019-01-10
# https://ampd.epa.gov/ampd/?bookmark=12264
# 
#   2010-2015
# http://ampd.epa.gov/ampd/#?bookmark=12264
#   
#   2000-2009
# http://ampd.epa.gov/ampd/#?bookmark=12265
#   
#   1980-1999
# http://ampd.epa.gov/ampd/#?bookmark=12266
#
# as emission_2010-2015.csv, emission_2000-2009.csv, emission_1980-1999.csv
#
# CSV files are opened in MS Excel and saved as CSV with "_2" postfix to
# sort a duplicate rowname issue.
#
# LRFH added emission_2010-2015_3.csv with complete 2015 data 2019-01-10
# LRFH added emission_2016-2018_3.csv with complete 2018 data 2019-07-13

setwd('~/Dropbox/Harvard/ARP/Data_AMPD_EIA')
e1 <- fread("emission_1980-1999_2.csv")
setnames(e1, make.names(names(e1)))
# setnames(e1, c("State", substring(names(e1), 3)[-1]))
e2 <- fread("emission_2000-2009_2.csv")
setnames(e2, make.names(names(e2)))
# setnames(e2, c("State", substring(names(e2), 3)[-1]))
e3 <- fread("emission_2010-2015_3.csv")
setnames(e3, make.names(names(e3)))
# setnames(e3, c("State", substring(names(e3), 3)[-1]))
e4 <- fread("emission_2016-2018_3.csv")
setnames(e4, make.names(names(e4)))
# setnames(e4, c("State", substring(names(e4), 3)[-1]))

e1 <- e1[, by = variables]
e2 <- e2[, by = variables]
e3 <- e3[, by = variables]
e4 <- e4[, by = variables]

emissions <- rbind(e1, e2, e3, e4)

emissions[, Owner := NULL]
emissions[, Operator := NULL]
emissions[, Representative..Primary. := NULL]
emissions[, Representative..Secondary. := NULL]

write.csv(emissions, "emissions_all.csv")

##----- Scrubber

DTU <- copy(emissions)

# Parse scrubber information
# source("scrubber_parser.R")

# Parse operating status
# source("parse_operating_status.R")

##----- Add FIPS

data(county.fips)
DTU[, StateCounty:= tolower(paste(state.name[match(State, state.abb)], County, sep=","))]
DTU[, FIPS:= county.fips[match(StateCounty, county.fips$polyname), ]$fips]
DTU[, StateCounty := NULL]

##----- Coal-burning units

DTU[, Fuel1.IsCoal := as.numeric(grepl("Coal", Fuel.Type..Primary.))]
DTU[Fuel.Type..Primary. == "", Fuel1.IsCoal := NA]

DTU[, Fuel2.IsCoal := as.numeric(grepl("Coal", Fuel.Type..Secondary.))]
DTU[Fuel.Type..Primary. == "", Fuel2.IsCoal := NA]

DTUS <- DTU[, list(Facility.Name, Facility.ID..ORISPL., Unit.ID,
                   Year, Month, Program.s.,
                   State, County, FIPS,
                   Facility.Latitude, Facility.Longitude,
                   SO2..tons., NOx..tons., Avg..NOx.Rate..lb.MMBtu., CO2..short.tons.,
                   Heat.Input..MMBtu., Gross.Load..MW.h., Steam.Load..1000lb.,
                   Operating.Time,
                   Operating.Status,
                   Source.Category,
                   Fuel.Type..Primary., Fuel1.IsCoal,
                   Fuel.Type..Secondary., Fuel2.IsCoal,
                   SO2.Phase, NOx.Phase)]

##----- Write unit-level AMPD dataset

write.csv(DTUS, "AMPD_Unit.csv")

##----- Read unit-level AMPD dataset
PP.vars <- c("FacID" = "Facility.ID..ORISPL.",
             "Unit.ID" = "Unit.ID",
             "Latitude" = "Facility.Latitude",
             "Longitude" = "Facility.Longitude",
             "year" = "Year",
             "month" = "Month",
             "SO2.tons" = 'SO2..tons.',
             "NOx.tons" = 'NOx..tons.',
             "HeatIn.MMBtu" = 'Heat.Input..MMBtu.',
             "GrossLoad.MWh" = 'Gross.Load..MW.h.')
PP.units.monthly1995_2017 <- fread("~/Dropbox/Harvard/ARP/Data_AMPD_EIA/AMPD_Unit.csv",
                                   select = PP.vars)[Year < 2018]

##----- Ensure naming conventions, uID convention consistent
setnames( PP.units.monthly1995_2017, PP.vars, names( PP.vars))
PP.units.monthly1995_2017[, uID := gsub('_|-|\\*', '.',
                                        paste( FacID, Unit.ID, sep = '.'))]

##----- Write unit-level AMPD dataset
save( PP.units.monthly1995_2017,
      file = '~/Dropbox/Rpackages/hyspdisp/data/PP.units.monthly1995_2017.RData')
lhenneman commented 4 years ago

Updated bookmarks (including 2018 data now):

##----- Data sources
# All 2010-2018
#   http://ampd.epa.gov/ampd/#?bookmark=12264
# All 2000-2009
#   http://ampd.epa.gov/ampd/#?bookmark=12265  
# All 1980-1999
#  http://ampd.epa.gov/ampd/#?bookmark=12266