Open braedenmc1 opened 5 years ago
I ran the code from Clean_Census.Rmd
. It looks like corr
contains rows with missing values, so cor()
cannot calculate correlation coefficients when a variable is missing values. Additionally, corr
is a simple-features data frame which contains a geometry column. You need to remove that column from the data frame, then remove the observations with missing values. Only then can you estimate the correlation matrix. Try this code instead:
# convert to plain tibble data frame
associations <- as_tibble(corr) %>%
# remove geometry column
select(-geometry) %>%
# drop rows with missing values
drop_na() %>%
# estimate correlation matrix
cor() %>%
round(3)
ggcorrplot(associations)
Reproducible example
library(tidyverse)
#> Registered S3 methods overwritten by 'ggplot2':
#> method from
#> [.quosures rlang
#> c.quosures rlang
#> print.quosures rlang
library(tidycensus)
library(ggcorrplot)
# get census data
census_white <- get_acs(geography = "county",
variables = c(pop_white = "DP05_0037PE"),
year = 2017)
#> Getting data from the 2013-2017 5-year ACS
#> Using the ACS Data Profile
census_income <- get_acs(geography = "county",
variables = c(income = "B19013_001"),
year = 2017)
#> Getting data from the 2013-2017 5-year ACS
census_grads <- get_acs(geography = "county",
variables = c(grads = "DP02_0064E"),
year = 2017,
geo = TRUE)
#> Getting data from the 2013-2017 5-year ACS
#> Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
#> Using the ACS Data Profile
#>
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|==== | 7%
|
|===== | 7%
|
|===== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 10%
|
|======= | 11%
|
|======== | 12%
|
|======== | 13%
|
|========= | 13%
|
|========= | 14%
|
|========= | 15%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 16%
|
|=========== | 17%
|
|=========== | 18%
|
|============ | 18%
|
|============ | 19%
|
|============= | 19%
|
|============= | 20%
|
|============= | 21%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 22%
|
|=============== | 23%
|
|=============== | 24%
|
|================ | 24%
|
|================ | 25%
|
|================= | 25%
|
|================= | 26%
|
|================= | 27%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 29%
|
|=================== | 30%
|
|==================== | 30%
|
|==================== | 31%
|
|==================== | 32%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 33%
|
|====================== | 34%
|
|====================== | 35%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 36%
|
|======================== | 37%
|
|======================== | 38%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 39%
|
|========================== | 40%
|
|========================== | 41%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 42%
|
|============================ | 43%
|
|============================ | 44%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 45%
|
|============================== | 46%
|
|============================== | 47%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 53%
|
|=================================== | 54%
|
|=================================== | 55%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 56%
|
|===================================== | 57%
|
|===================================== | 58%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 59%
|
|======================================= | 60%
|
|======================================= | 61%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 62%
|
|========================================= | 63%
|
|========================================= | 64%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 65%
|
|=========================================== | 66%
|
|=========================================== | 67%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 68%
|
|============================================= | 69%
|
|============================================= | 70%
|
|============================================== | 70%
|
|============================================== | 71%
|
|============================================== | 72%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 73%
|
|================================================ | 74%
|
|================================================ | 75%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 76%
|
|================================================== | 77%
|
|================================================== | 78%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 79%
|
|==================================================== | 80%
|
|==================================================== | 81%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 82%
|
|====================================================== | 83%
|
|====================================================== | 84%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|======================================================== | 87%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 93%
|
|============================================================= | 94%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 96%
|
|=============================================================== | 97%
|
|=============================================================== | 98%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 99%
|
|=================================================================| 100%
# import data files
yale <- read_csv("Yale 2018 - Sheet1.csv")
#> Parsed with column specification:
#> cols(
#> .default = col_double(),
#> GeoName = col_character()
#> )
#> See spec(...) for full column specifications.
pop <- read_csv("ACS Data - Population(1).csv")
#> Warning: Missing column names filled in: 'X1' [1]
#> Parsed with column specification:
#> cols(
#> X1 = col_double(),
#> geoID = col_double(),
#> county_name = col_character(),
#> moe_grad = col_double(),
#> median_income = col_double(),
#> moe_income = col_double(),
#> population = col_double()
#> )
geo <- read_csv("ACS Data - Geo.csv")
#> Warning: Missing column names filled in: 'X1' [1]
#> Parsed with column specification:
#> cols(
#> X1 = col_double(),
#> GEOID = col_double(),
#> NAME = col_character(),
#> variable = col_character(),
#> total_land = col_double(),
#> rural_land = col_double(),
#> urban_land = col_double(),
#> percent_rural = col_double(),
#> percent_urban = col_double(),
#> ruralscore = col_double(),
#> urbanscore = col_double()
#> )
trump <- read_csv("Presidential Elections - TrumpElect.csv")
#> Parsed with column specification:
#> cols(
#> trump_per = col_double(),
#> elect_county = col_character()
#> )
blsdata <- read_csv("County Unemployment Data - Sheet1.csv")
#> Parsed with column specification:
#> cols(
#> labor_force = col_number(),
#> employed = col_number(),
#> unemployed = col_number(),
#> unemp_rate = col_double(),
#> bls_county = col_character()
#> )
# combine data frames
transform1 <- left_join(yale, pop, by = c("GeoName" = "county_name"))
transform2 <- left_join(transform1, geo, by = c("GeoName" = "NAME"))
transform3 <- left_join(transform2, trump, by = c("GeoName" = "elect_county"))
transform4 <- left_join(census_white, transform3, by = c("NAME" = "GeoName"))
transform5 <- left_join(census_income, transform4, by = c("NAME" = "NAME"))
transform6 <- left_join(transform5, blsdata, by = c("NAME" = "bls_county"))
geolabs <- get_acs(geography = "county",
variables = c(totalpops = "B00001_001E"),
year = 2017,
geometry = TRUE)
#> Getting data from the 2013-2017 5-year ACS
#> Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
finalform <- left_join(geolabs, transform6, by = c("NAME" = "NAME"))
# generate correlation coefficients
# keep only required columns and remove duplicate observations
corr <- finalform %>%
distinct(ruralscore, urbanscore, estimate.x, estimate.y, estimate,
trump_per, human, harmUS, supportRPS)
# need to convert to plain tibble data frame in order
# to drop the geometry column
associations <- as_tibble(corr) %>%
# remove geometry column
select(-geometry) %>%
# drop rows with missing values
drop_na() %>%
# estimate correlation matrix
cor() %>%
round(3)
associations
#> ruralscore urbanscore estimate.x estimate.y estimate trump_per
#> ruralscore 1.000 -1.000 -0.369 0.198 -0.398 0.067
#> urbanscore -1.000 1.000 0.369 -0.198 0.398 -0.067
#> estimate.x -0.369 0.369 1.000 0.143 0.263 -0.077
#> estimate.y 0.198 -0.198 0.143 1.000 -0.174 0.134
#> estimate -0.398 0.398 0.263 -0.174 1.000 -0.082
#> trump_per 0.067 -0.067 -0.077 0.134 -0.082 1.000
#> human -0.440 0.440 0.306 -0.367 0.375 -0.162
#> harmUS -0.321 0.321 0.188 -0.508 0.326 -0.177
#> supportRPS -0.510 0.510 0.286 -0.465 0.367 -0.192
#> human harmUS supportRPS
#> ruralscore -0.440 -0.321 -0.510
#> urbanscore 0.440 0.321 0.510
#> estimate.x 0.306 0.188 0.286
#> estimate.y -0.367 -0.508 -0.465
#> estimate 0.375 0.326 0.367
#> trump_per -0.162 -0.177 -0.192
#> human 1.000 0.909 0.924
#> harmUS 0.909 1.000 0.807
#> supportRPS 0.924 0.807 1.000
# draw the correlation plot
ggcorrplot(associations)
Created on 2019-07-17 by the reprex package (v0.3.0)
Dr. Soltoff,
I have added the specific code I wanted to use for my correlations. I found a tutorial online with a package similar to ggplot. I included the code and error.
This is my first time working with Git, so I don't think this showed up in the earlier code.
corr <- finalform %>% distinct(ruralscore, urbanscore, estimate.x, estimate.y, estimate, trump_per, human, harmUS, supportRPS)
as.data.frame(corr)
data(corr) associations <- round(cor(corr), 3) head(associations[, 1:6])
p.mat <- cor_pmat(associations) head(p.mat[, 1:4])
ggcorrplot(associations)
Error in cor(corr) : 'x' must be numeric