jlin47 / mp4

0 stars 0 forks source link

Working on code #4

Closed dasom1348 closed 6 years ago

dasom1348 commented 6 years ago

title: "mp4" author: "Dasom An & Janell Lin" output: html_document

library(mdsr)
library(RMySQL)
db <- dbConnect_scidb(dbname = "imdb")
sql <- "
SELECT t.id, t.title, t.production_year, mii.info AS votes, mii2.info AS rating, movie_id
FROM title t
JOIN movie_info_idx mii ON mii.movie_id = t.id
JOIN movie_info_idx mii2 ON mii2.movie_id = t.id

WHERE t.kind_id = 1
  AND mii.info_type_id = 100
  AND mii2.info_type_id = 101
  AND mii.info > 100000
ORDER BY mii2.info desc;
"
best_movies <- db %>%
  dbGetQuery(sql)
head(best_movies)

best_movies100 <- best_movies %>%
  head(100)
sql_2 <- "SELECT n.name, ci.role_id, person_id, ci.id, movie_id
FROM cast_info ci
JOIN name n ON n.id = ci.person_id
WHERE role_id = '8'
AND gender = 'f';
"
peopleF_info <- db %>%
  dbGetQuery(sql_2)
head(peopleF_info)
sql_3 <- "SELECT n.name, ci.role_id, person_id, ci.id, movie_id 
FROM cast_info ci
JOIN name n ON n.id = ci.person_id
WHERE role_id = '8'
AND gender = 'm';
"
peopleM_info <- db %>%
  dbGetQuery(sql_3)
head(peopleM_info)
Female_info<- peopleF_info %>%
  select(name, person_id, id, movie_id)%>%
  group_by(name, person_id, id, movie_id)%>%
  summarise(num=n()) 

Male_info<- peopleM_info %>%
  select(name, person_id, id, movie_id)%>%
  group_by(name, person_id, id, movie_id)%>%
  summarise(num=n()) 
sql_4 <- "
SELECT movie_id, title, production_year 
FROM aka_title;
"
movie_id <- db %>%
  dbGetQuery(sql_4)

head(movie_id)

movie_id2 <- movie_id %>%
  inner_join(best_movies100, by = "title") %>%
  filter(movie_id, title, name, production_year.y, votes, rating, person_id) 

#Join people info data frames with best movies data frame
Movie_F<- movie_id2 %>%
  inner_join(Female_info, by = "movie_id")

Movie_M <- movie_id2 %>%
   inner_join(Male_info, by= "movie_id")

# Removing repeatition 
Movie_F <- Movie_f %>%
  group_by(movie_id, title, name, production_year.y, votes, rating, person_id)