Polishing skills with Udemy. It's easy and fun and remind me there are a lof of methods in R to write code. I like nice easy instructor and I may do course for girls who wanted to learn R and try Statistical Analysis with my favorite tool on the most of the powerfull language in Math and Data Science.
##Ex 1: Recreate the following dataframe by creating vectors and
##using the data.frame function:
## I did 2 methods: a and df
Age <- c(22,25,26)
Weight <- c(150, 165, 120)
Sex <- c("M", "M", "F")
Name <- c("Sam", "Frank", "Ami")
a <- cbind.data.frame( Age, Weight, Sex)
row.names(a)<- c("Sam", "Frank", "Ami")
a
d <- data.frame(row.names =Name, Age, Weight, Sex )
d
##Ex2.Check if mtcars is a dataframe using is.data.frame()
is.data.frame(mtcars)
##Ex3.Use as.data.frame() to convert a matrix into a dataframe:
mat <- matrix(1:25, nrow=5)
mat <- as.data.frame(mat)
mat
## Ex 4: Set the built-in data frame mtcars as a variable df.
## We'll use this df variable for the rest of the exercises.
df <- mtcars
## Ex 5: Display the first 6 rows of df
head(mtcars)
## Ex 6: What is the average mpg value for all the cars?
avg <- mean(df$mpg)
avg
##Ex 7: Select the rows where all cars have 6 cylinders (cyl column)
mtcars[mtcars$cyl==6, ]
##Ex 8: Select the columns am,gear, and carb.
df(df$am, df$gear, df$carb)
df[ , c("am", "gear", "carb")]
##Ex 9: Create a new column called performance, which is calculated by hp/wt.
perfomence <- c(df$hp/df$wt)
cbind(df, perfomence)
##or method 2
df$perfomence <- df$hp/df$wt
##Ex 9: Your performance column will have several decimal place precision.
##Figure out how to use round() (check help(round)) to reduce this accuracy to only
##2 decimal places.
##round(x, digits = 0)
df$perfomence <- round(df$perfomence, digits = 2)
head(df)
##Ex 10: What is the average mpg for cars that have more than 100 hp
##AND a wt value of more than 2.5.
avg.mpg <- mean(df$mpg[ (df$hp>100) & (df$wt>2.5)])
avg.mpg
##method 2
mean(subset( df, df$hp>100 & df$wt>2.5)$mpg)
##Ex 11: What is the mpg of the Hornet Sportabout?
df[["Hornet Sportabout","mpg"]]
##install.packages('ggplot2')
#install.packages('ggplot2movies')
library(ggplot2)
library(ggplot2movies)
# Data & Aesthetics
colnames(movies)
head(movies)
## will use rating for Aestethis to make first plot
pl <-ggplot(movies,aes(x=rating))
print(pl+ geom_histogram()) # will give you the base histogram plot1
print(pl+ geom_histogram(binwidth = 0.1)) ## will show mor precise solution plot2
print(pl+ geom_histogram(binwidth = 0.1, color = "red")) # and add red countor plot3
pl2 <- pl + geom_histogram(binwidth = 0.1, color = 'red', fill= "pink", alpha = 0.4)
print(pl2) ## plot 4
pl3 <- pl + geom_histogram(binwidth = 0.1, color = 'red', fill= "pink", alpha = 0)
print(pl3) ## plot 5 alfa shows the trancparancy
pl4 <-pl2 + xlab('Movie Rating') + ylab('Count')
print(pl4) ## plot6 with adding x and y names
pl5 <- pl + geom_histogram(binwidth = 0.1, aes(fill = ..count..))
pl7 <-pl5 + xlab('Movie Rating') + ylab('Count')
print(pl7) ##plot7
pl8 <-pl7 + xlab('Movie Rating') + ylab('Count') +theme(legend.position = "bottom")
print(pl8 + ggtitle("MY TITLE")) ##plot8 with TITLE and lables
library(ggplot2)
df <- mtcars
pl_sc <- ggplot(df,aes(x=wt,y=mpg))
print(pl_sc + geom_point( alpha = 0.5, size = 5)) # alpha shows clarity 0:1, plot10
print(pl_sc + geom_point(aes(size=hp))) ## by horse power, plot11
print(pl_sc + geom_point(aes(size = cyl))) ## by cylinders, plot12
print(pl_sc + geom_point(aes(size = factor(cyl)))) ## plot13
print(pl_sc + geom_point(aes(shape = factor(cyl)), size = 5)) ## plot14
print(pl_sc + geom_point(aes(shape = factor(cyl)), size = 5, color = "#8470ff")) ##plot15
pl_sc2 <- pl_sc + geom_point(aes(color = hp, size=hp))
print(pl_sc2) ## plot16
pl_sc3 <- pl_sc2 + scale_color_gradient(low = 'black', high = 'green')
print(pl_sc3) ## plot17
df <- mpg
pl.b <- ggplot(df, aes(x=class)) # class is a catigorical data in DF
print(pl.b + geom_bar())
print(pl.b + geom_bar(color="red", fill="blue")) # color shows the line around, you not nessasary
print(pl.b + geom_bar(aes(fill=drv))) # will classify by color of class dvr
# "dodge" will show staking = classify bars next to each other for easy to compare
print(pl.b + geom_bar(aes(fill=drv), position = "dodge"))
# "fill will show percentage%
print(pl.b + geom_bar(aes(fill=drv), position = "fill"))
df <- mtcars
pl.box <- ggplot(df, aes(x=cyl,y=mpg))
print(pl.box + geom_boxplot())
# but cyl a catigorical value, so we need to use as a factor for x
pl.box1 <- ggplot(df, aes(x=factor(cyl),y=mpg))
# add fill with color
print(pl.box1 + geom_boxplot(fill="blue"))
#we can also flip coordinates and make baxplot horizontal
pl.box1 <- ggplot(df, aes(x=factor(cyl),y=mpg))
print(pl.box1 + geom_boxplot()+ coord_flip())
#and we can add more color by group of factor cyl and theme as background
pl.box1 <- ggplot(df, aes(x=factor(cyl),y=mpg))
print(pl.box1 + geom_boxplot(aes(fill=factor(cyl)))+ theme_dark())
library(ggplot2)
library(ggplot2movies)
library(dplyr)
pl <- ggplot(movies, aes(x=year,y=rating))
pl2 <- pl + geom_bin2d() ## pl1
pl2 %>%print()
# pl1 <-( pl + geom_bin2d()) %>% print() can work like this also
pl3 <- pl2+ scale_fill_gradient(high='red', low = 'green')
print(pl3)
pl4 <- pl3 + geom_bin2d(binwidth=c(3,1))
print(pl4)
pl5 <- pl4+ scale_fill_gradient(high='red', low = 'blue')
print(pl5)
install.packages('hexbin')
##install.packages('stat_binhex')
require('hexbin')
pl <- ggplot(movies, aes(x=year, y=rating))
pl6 <- pl + geom_hex()
pl7 <- pl6+ scale_fill_gradient(high='red', low = 'blue')
print(pl7)
pl8 <- pl+ geom_density2d()
print(pl8)
gqplot(x=years, y=rating, data = df, geom = "density2d")