ruger001 / Rstudio-ass2

0 stars 0 forks source link

ass2 #1

Open ruger001 opened 2 years ago

ruger001 commented 2 years ago

library(repr) library(GGally) library(lubridate) library(gcookbook) library(forecast) library(ade4) library(ggdendro) library(arules) library(tidyverse) library(tree) library(rpart) library(rpart.plot) library(caret) library(precrec) library(e1071) library(ISLR) library(ggcorrplot)

setwd("/Users/ruger/Downloads/Data_Extract_From_World_Development_Indicators-6")

gdp_per_capital.df<-read.csv("GDP.csv") co2_per_capital.df<-read.csv("co2.csv")

foodco2_per_capital<-food_consumption %>% group_by(country) %>% summarise(food_co2=sum(co2_emmission))

food_co2_gdp.df<-gdp_per_capital.df%>% inner_join(co2_per_capital.df, by="country") %>% inner_join(foodco2_per_capital, by="country") %>% na.omit()

food_co2_gdp.df<-food_co2_gdp.df %>% mutate(food_proportion=food_co2/co2)

summary(food_co2_gdp.df)

food_co2_gdp.df<-food_co2_gdp.df %>% mutate(GDP = as.numeric(GDP))

the correlation between the variables

pic1<-food_co2_gdp.df%>% select(-country)%>% cor()%>% ggcorrplot(hc.order=TRUE) pic1

pic2<-food_co2_gdp.df%>% select(-country)%>% ggscatmat()+ theme(axis.text.x = element_text(angle = 270, hjust = 1)) pic2

ruger001 commented 2 years ago

predict1

selected.var <- c(2,4)

predict2

set.seed(1)

train.index<- sample((1:113),55)

train.df <- food_co2_gdp.df[train.index, selected.var] valid.df <- food_co2_gdp.df[-train.index, selected.var]

predict3

food_co2_gdp.lm <- lm(food_co2 ~ ., data = train.df)

options(scipen = 999)

summary(food_co2_gdp.lm)

predict4

food_co2_gdp.lm.pred <- predict(food_co2_gdp.lm, valid.df)

options(scipen=999, digits = 0)

some.residuals <- valid.df$food_co2[1:55] - food_co2_gdp.lm.pred[1:55]

data.frame("Predicted" = food_co2_gdp.lm.pred[1:55], "Actual" = valid.df$food_co2[1:55], "Residual" = some.residuals)

options(scipen=999, digits = 3)

accuracy(food_co2_gdp.lm.pred, valid.df$food_co2)

predict5

food_co2_gdp.lm.pred <- predict(food_co2_gdp.lm, valid.df) residuals <- valid.df$food_co2 - food_co2_gdp.lm.pred residuals.df <- data.frame(residuals) ggplot(data=residuals.df, aes(x=residuals)) + geom_histogram(bins = 20)

ruger001 commented 2 years ago
ggplot(data = food_co2_gdp.df,
       aes(x      = co2,
           y      = food_co2)) +
geom_point (alpha=0.5,
            aes(size   = GDP)) +
scale_size(range = c(0,15)) +
geom_smooth(method   = 'lm', 
            se       = FALSE,
            formula = y~log(x))