Open schoolAccountMajaG opened 4 years ago
It's data downloaded from the EPA AMPD database. Unfortunately, it can't be downloaded with a script, but we do have bookmarks for the data queries (let me check on them quickly to make sure they still work).
Here's code that creates RData file (it can definitely be streamlined)
##----- Data sources
# All 2010-2015
# http://ampd.epa.gov/ampd/#?bookmark=12264
# All 2000-2009
# http://ampd.epa.gov/ampd/#?bookmark=12265
# All 1980-1999
# http://ampd.epa.gov/ampd/#?bookmark=12266
##----- Load R packages
library(data.table)
library(maps)
library(stringr)
##----- Read and aggregate AMPD data
variables <- make.names(c(
"State",
"Facility Name",
"Facility ID (ORISPL)",
"Unit ID",
"Associated Stacks",
"Month",
"Year",
"Program(s)",
"SO2 (tons)",
"Avg. NOx Rate (lb/MMBtu)",
"NOx (tons)",
"CO2 (short tons)",
"Heat Input (MMBtu)",
"Operating Time",
"Gross Load (MW-h)",
"Steam Load (1000lb)",
"EPA Region",
"NERC Region",
"County",
"Source Category",
"Facility Latitude",
"Facility Longitude",
"Owner",
"Operator",
"Representative (Primary)",
"Representative (Secondary)",
"SO2 Phase",
"NOx Phase",
"Operating Status",
"Unit Type",
"Fuel Type (Primary)",
"Fuel Type (Secondary)",
"SO2 Control(s)",
"NOx Control(s)",
"PM Control(s)",
"Hg Control(s)",
"NA"
))
# Files obtained from: http://ampd.epa.gov/ampd/
#
# 2016-2018 ## LRFH added 2019-01-10
# https://ampd.epa.gov/ampd/?bookmark=12264
#
# 2010-2015
# http://ampd.epa.gov/ampd/#?bookmark=12264
#
# 2000-2009
# http://ampd.epa.gov/ampd/#?bookmark=12265
#
# 1980-1999
# http://ampd.epa.gov/ampd/#?bookmark=12266
#
# as emission_2010-2015.csv, emission_2000-2009.csv, emission_1980-1999.csv
#
# CSV files are opened in MS Excel and saved as CSV with "_2" postfix to
# sort a duplicate rowname issue.
#
# LRFH added emission_2010-2015_3.csv with complete 2015 data 2019-01-10
# LRFH added emission_2016-2018_3.csv with complete 2018 data 2019-07-13
setwd('~/Dropbox/Harvard/ARP/Data_AMPD_EIA')
e1 <- fread("emission_1980-1999_2.csv")
setnames(e1, make.names(names(e1)))
# setnames(e1, c("State", substring(names(e1), 3)[-1]))
e2 <- fread("emission_2000-2009_2.csv")
setnames(e2, make.names(names(e2)))
# setnames(e2, c("State", substring(names(e2), 3)[-1]))
e3 <- fread("emission_2010-2015_3.csv")
setnames(e3, make.names(names(e3)))
# setnames(e3, c("State", substring(names(e3), 3)[-1]))
e4 <- fread("emission_2016-2018_3.csv")
setnames(e4, make.names(names(e4)))
# setnames(e4, c("State", substring(names(e4), 3)[-1]))
e1 <- e1[, by = variables]
e2 <- e2[, by = variables]
e3 <- e3[, by = variables]
e4 <- e4[, by = variables]
emissions <- rbind(e1, e2, e3, e4)
emissions[, Owner := NULL]
emissions[, Operator := NULL]
emissions[, Representative..Primary. := NULL]
emissions[, Representative..Secondary. := NULL]
write.csv(emissions, "emissions_all.csv")
##----- Scrubber
DTU <- copy(emissions)
# Parse scrubber information
# source("scrubber_parser.R")
# Parse operating status
# source("parse_operating_status.R")
##----- Add FIPS
data(county.fips)
DTU[, StateCounty:= tolower(paste(state.name[match(State, state.abb)], County, sep=","))]
DTU[, FIPS:= county.fips[match(StateCounty, county.fips$polyname), ]$fips]
DTU[, StateCounty := NULL]
##----- Coal-burning units
DTU[, Fuel1.IsCoal := as.numeric(grepl("Coal", Fuel.Type..Primary.))]
DTU[Fuel.Type..Primary. == "", Fuel1.IsCoal := NA]
DTU[, Fuel2.IsCoal := as.numeric(grepl("Coal", Fuel.Type..Secondary.))]
DTU[Fuel.Type..Primary. == "", Fuel2.IsCoal := NA]
DTUS <- DTU[, list(Facility.Name, Facility.ID..ORISPL., Unit.ID,
Year, Month, Program.s.,
State, County, FIPS,
Facility.Latitude, Facility.Longitude,
SO2..tons., NOx..tons., Avg..NOx.Rate..lb.MMBtu., CO2..short.tons.,
Heat.Input..MMBtu., Gross.Load..MW.h., Steam.Load..1000lb.,
Operating.Time,
Operating.Status,
Source.Category,
Fuel.Type..Primary., Fuel1.IsCoal,
Fuel.Type..Secondary., Fuel2.IsCoal,
SO2.Phase, NOx.Phase)]
##----- Write unit-level AMPD dataset
write.csv(DTUS, "AMPD_Unit.csv")
##----- Read unit-level AMPD dataset
PP.vars <- c("FacID" = "Facility.ID..ORISPL.",
"Unit.ID" = "Unit.ID",
"Latitude" = "Facility.Latitude",
"Longitude" = "Facility.Longitude",
"year" = "Year",
"month" = "Month",
"SO2.tons" = 'SO2..tons.',
"NOx.tons" = 'NOx..tons.',
"HeatIn.MMBtu" = 'Heat.Input..MMBtu.',
"GrossLoad.MWh" = 'Gross.Load..MW.h.')
PP.units.monthly1995_2017 <- fread("~/Dropbox/Harvard/ARP/Data_AMPD_EIA/AMPD_Unit.csv",
select = PP.vars)[Year < 2018]
##----- Ensure naming conventions, uID convention consistent
setnames( PP.units.monthly1995_2017, PP.vars, names( PP.vars))
PP.units.monthly1995_2017[, uID := gsub('_|-|\\*', '.',
paste( FacID, Unit.ID, sep = '.'))]
##----- Write unit-level AMPD dataset
save( PP.units.monthly1995_2017,
file = '~/Dropbox/Rpackages/hyspdisp/data/PP.units.monthly1995_2017.RData')
Updated bookmarks (including 2018 data now):
##----- Data sources
# All 2010-2018
# http://ampd.epa.gov/ampd/#?bookmark=12264
# All 2000-2009
# http://ampd.epa.gov/ampd/#?bookmark=12265
# All 1980-1999
# http://ampd.epa.gov/ampd/#?bookmark=12266
PP.units.monthly1995_2017 is part of the package. How was this dataset created?