dgomezv / coding_R

Statistical R programming
0 stars 0 forks source link

Basic R Programming #1

Open dgomezv opened 10 years ago

dgomezv commented 10 years ago

2+2 search() epicalc package library(epicalc) help.start() help(help) ?help

dgomezv commented 10 years ago

USING R 1+1 sqrt(25) exp(1) exp(-5) log(3.8)

dgomezv commented 10 years ago

R OBJECTS a = 3 + 5 a b <- sqrt(36) a + b a + 3*b -> c c xyx <- 1 xyx baht.per.dollar <- 40 baht.per.dollar A <- "Prince of Songkla University" A

PUTTING COMMANDS IN A COMMAND LINE 3_3 == 3^2 3_2 == 3^2 3*2 < 3^2

LOGICAL CONNECTION TRUE & TRUE TRUE & FALSE FALSE & FALSE (FALSE & TRUE) == (TRUE & FALSE)

FALSE & TRUE == TRUE & FALSE

LOGICAL CONNECTION WITH | (logical 'or')

TRUE | TRUE TRUE | FALSE 3_3 == 3^2 | 3_2 == 3^2

dgomezv commented 10 years ago

c(1,2,3) [1] 1 2 3

c(1,2,3) -> d d

d + 4 d - 3 d * 7 d / 10 d * d d ^ 2 d / d d == d

B <- c("Faculty of Medicine","Prince of Songkla University") B [1] "Faculty of Medicine" "Prince of Songkla University"

VECTORS AND SYSTEMATIC NUMBERS

x <- 1:10; x [1] 1 2 3 4 5 6 7 8 9 10

rep(13, times=5) [1] 13 13 13 13 13

seq(from = -1, to = 11, by = 3) [1] -1 2 5 8 11

seq(10, 23) [1] 10 11 12 13 14 15 16 17 18 19 20 21 22 23 seq(10, -3) [1] 10 9 8 7 6 5 4 3 2 1 0 -1 -2 -3

seq(by=-1, to=-3, from=10)

seq(from=3, to=100, by=7) -> x x [1] 3 10 17 24 31 38 45 52 59 66 73 80 87 94

x[5] [1] 31

x[c(4,6,7)] [1] 24 38 45

dgomezv commented 10 years ago

x[4,6,7] Error in x[4, 6, 7] : incorrect number of dimensions

x[-(1:4)] [1] 31 38 45 52 59 66 73 80 87 94

B[2] [1] "Prince of Songkla University"

x[x/2 == trunc(x/2)] [1] 10 24 38 52 66 80 94

subset(x, x/2==trunc(x/2))

subset(x, x/2!=trunc(x/2)) [1] 3 17 31 45 59 73 87

x[x>30] [1] 31 38 45 52 59 66 73 80 87 94

dgomezv commented 10 years ago

FUNCTIONS RELATED TO MANIPULATION OF VECTORS

fruits <- c(5, 10, 1, 20) summary(fruits) Min. 1st Qu. Median Mean 3rd Qu. Max. 1.0 4.0 7.5 9.0 12.5 20.0 sum(fruits) [1] 36

length(fruits) # number of different types of fruits [1] 4 mean(fruits) # mean of number of fruits [1] 9 sd(fruits) # standard deviation [1] 8.205689 var(fruits) # variance [1] 67.33333

NON NUMERIC VECTORS

person <- c("A","B","C","D","E","F","G","H","I","J","K")

person <- LETTERS[1:11]

class(person) [1] "character" class(fruits) [1] "numeric"

sex <- c(1,2,1,1,1,1,1,1,1,1,2) class(sex) [1] "numeric" sex1 <- as.factor(sex) # Creating sex1 from sex

sex1 [1] 1 2 1 1 1 1 1 1 1 1 2 Levels: 1 2

class(sex1) [1] "factor" is.factor(sex) [1] FALSE is.factor(sex1) [1] TRUE

levels(sex1) <- c("male", "female")

sex1 [1] male female male male male male male [8] male male male female Levels: male female

ORDERING ELEMENTS OF A VECTOR

age <- c(10,23,48,56,15,25,40,21,60,59,80)

sort(age) [1] 10 15 21 23 25 40 48 56 59 60 80

median(age) [1] 40

quantile(age) 0% 25% 50% 75% 100% 10.0 22.0 40.0 57.5 80.0

quantile(age, prob = .3) 30% 23

agegr <- cut(age, breaks=c(0,15,60,100))

is.factor(agegr) [1] TRUE attributes(agegr) $levels [1] "(0,15]" "(15,60]" "(60,100]" $class [1] "factor"

data.frame(age, agegr) age agegr 1 10 (0,15] 2 23 (15,60] 3 48 (15,60] 4 56 (15,60] 5 15 (0,15] 6 25 (15,60] 7 40 (15,60] 8 21 (15,60] 9 60 (15,60] 10 59 (15,60] 11 80 (60,100]

table(agegr) agegr (0,15] (15,60] (60,100] 2 8 1

summary(agegr) # same result as the preceding command class(agegr) [1] "factor"

agegr1 <- unclass(agegr) summary(agegr1) Min. 1st Qu. Median Mean 3rd Qu. Max. 1.000 2.000 2.000 1.909 2.000 3.000 class(agegr1) [1] "integer"

MISSING VALUES

b <- NA b * 3 [1] NA c <- 3 + b c [1] NA

height <- c(100,150,NA,160) height [1] 100 150 NA 160 weight <- c(33, 45, 60,55) weight [1] 33 45 60 55

mean(weight) [1] 48.25 mean(height) [1] NA

length(height) [1] 4

mean(height, na.rm=TRUE) [1] 136.6667

length(na.omit(height)) [1] 3 mean(na.omit(height)) [1] 136.6667

dgomezv commented 10 years ago

ARRAYS MATRICES AND TABLES

Usually a vector has no dimensions

a <- (1:10) a [1] 1 2 3 4 5 6 7 8 9 10 dim(a) NULL

dim(a) <- c(2,5) a [,1] [,2] [,3] [,4] [,5] [1,] 1 3 5 7 9 [2,] 2 4 6 8 10

a[1,] # for the first row and all columns of array 'a' a[,3] # for all rows of the third column a[2,4] # extract 1 cell from the 2nd row and 4th column a[2,2:4] # 2nd row, from 2nd to 4th columns

b <- 1:24 dim(b) <- c(3,4,2) # or b <- array(1:24, c(3,4,2)) b , , 1 [,1] [,2] [,3] [,4] [1,] 1 4 7 10 [2,] 2 5 8 11 [3,] 3 6 9 12 , , 2 [,1] [,2] [,3] [,4] [1,] 13 16 19 22 [2,] 14 17 20 23 [3,] 15 18 21 24

b[1:3,1:2,2] [,1] [,2] [1,] 13 16 [2,] 14 17 [3,] 15 18

VECTOR BINDING

fruit <- c(5, 10, 1, 20) fruit2 <- c(1, 5, 3, 4) Col.fruit <- cbind(fruits, fruits2) rownames(Col.fruit) <- c("orange","banana","durian","mango") Col.fruit fruits fruits2 orange 5 1 banana 10 5 durian 1 3 mango 20 4

Row.fruit <- rbind(fruits, fruits2) colnames(Col.fruit) <- c("orange","banana","durian","mango") Row.fruit orange banana durian mango fruits 5 10 1 20 fruits2 1 5 3 4

TRANSPOSITION OF AN ARRAY

t(Col.fruit) t(Row.fruit)

BASIC STATISITCS OF AN ARRAY

sum(Col.fruit) sum(Col.fruit[2,])

summary(Col.fruit)

summary(Row.fruit)

fruits3 <- c(20, 15, 3, 5, 8) cbind(Col.fruit, fruits3) fruits fruits2 fruits3 orange 5 1 20 banana 10 5 15 durian 1 3 3 mango 20 4 5 Warning message: number of rows of result is not a multiple of vector length (arg 2) in: cbind(Col.fruit, fruits3)

fruits4 <- c(1,2,3) cbind(Col.fruit, fruits4) fruits fruits2 fruits4 orange 5 1 1 banana 10 5 2 durian 1 3 3 mango 20 4 1 Warning message: number of rows of result is not a multiple of vector length (arg 2) in: cbind(Col.fruit, fruits4)

STRING ARRAYS

Thais <- c("Somsri", "Daeng", "Somchai", "Veena") dim(Thais) <- c(2,2); Thais [,1] [,2] [1,] "Somsri" "Somchai" [2,] "Daeng" "Veena"

dgomezv commented 10 years ago

IMPLICIT ARRAY OF TWO VECTORS OF EQUAL LENGTH

cities <- c("Bangkok","Hat Yai","Chiang Mai") postcode <- c(10000, 90110, 50000) postcode[cities=="Bangkok"] [1] 10000

subset(postcode, cities=="Bangkok") [1] 10000

(1:length(cities))cities=="Hat Yai"[cities=="Hat Yai"] subset(1:3, cities=="Hat Yai") which(cities=="Hat Yai")

cbind(cities,postcode) cities postcode [1,] "Bangkok" "10000" [2,] "Hat Yai" "90110" [3,] "Chiang Mai" "50000"

MATRICES

TABLES

sex <- c(1,2,2,1,2,2)

age <- c(1,1,1,2,2,1)

visits <- c(1,2,3,4,5,6) table1 <- table(sex, age); table1 age sex 1 2 1 1 1 2 3 1

table2 <- tapply(visits, list(Sex=sex, Age=age), FUN=sum) table2 Age Sex 1 2 1 1 4 2 11 5

tapply(visits, list(Sex=sex, Age=age), FUN=mean) Age Sex 1 2 1 1.000 4 2 3.667 5

table2 <- as.table(table2)

SUMMARY OF TABLE VS SUMMARY OF ARRAY

summary(table1) Number of cases in table: 6 Number of factors: 2 Test for independence of all factors: Chisq = 0.375, df = 1, p-value = 0.5403 Chi-squared approximation may be incorrect

is.table(Col.fruits) [1] FALSE summary(Col.fruits) fruits fruits2 Min. : 1.0 Min. :1.00 1st Qu.: 4.0 1st Qu.:2.50 Median : 7.5 Median :3.50 Mean : 9.0 Mean :3.25 3rd Qu.:12.5 3rd Qu.:4.25 Max. :20.0 Max. :5.00 fruits.table <- as.table(Col.fruits) summary(fruits.table) Number of cases in table: 49 Number of factors: 2 Test for independence of all factors: Chisq = 6.675, df = 3, p-value = 0.08302 Chi-squared approximation may be incorrect fisher.test(fruits.table) Fisher's Exact Test for Count Data data: fruits.table p-value = 0.07728 alternative hypothesis: two.sided

LISTS

list1 <- list(a=1, b=fruits, c=cities) list1 $a [1] 1 $b [1] 5 10 1 20 $c [1] "Bangkok" "Hat Yai" "Chiang Mai"

rm(list=c("list1", "fruits"))

rm(list1); rm(fruits)

sample1 <- rnorm(10)

qqnorm(sample1)

list2 <- qqnorm(sample1)

list2 $x [1] 0.123 -1.547 -0.375 0.655 1.000 0.375 -0.123 [8] -1.000 -0.655 1.547 $y [1] -0.4772 -0.9984 -0.7763 0.0645 0.9595 -0.1103 [7] -0.5110 -0.9112 -0.8372 2.4158