ixxmu / mp_duty

抓取网络文章到github issues保存
https://archives.duty-machine.now.sh/
120 stars 30 forks source link

[GBD数据库挖掘] 3.R自动绘制复杂三线表 #2649

Closed ixxmu closed 2 years ago

ixxmu commented 2 years ago

https://mp.weixin.qq.com/s/_5B-xUIabLKmBvglhiOhxw

ixxmu commented 2 years ago

[GBD数据库挖掘] 3.R自动绘制复杂三线表 by R语言数据分析指南

欢迎关注R语言数据分析指南

本节继续来进行GBD数据库的挖掘,来通过「gt」包来绘制「Incidence三线表」;在之前的基础上进行了一些加工,各位观众老爷们细细品味,「数据代码已经上传VIP群,请自行下载」(

有需要学习R数据可视化的观众老爷欢迎加入小编的VIP群,目前已经上传「2021-2022两年公众号文档数据+代码约190篇左右」包含付费文档,扫描文中尾二维码加小编微信「付费99元」后邀请进群,「由于群名额有限人满之后将不在添加新成员」,有需要的请尽早加入,早进早享受;「一定让你感受到物超所值」加入小编的VIP如果你有一些让我感兴趣的图表提供示例数据小编若有时间会写成推文发送

加载R包

library(tidyverse)
library(magrittr)
library(data.table)
library(gt)

定义函数

myread <- function(path,metric,year,age) {
  df <- path %>% read_tsv() %>%
    filter(measure == 'Incidence',
           location %in% c("Global","High SDI","High-middle SDI","Middle SDI",
                           "Low-middle SDI","Low SDI","Andean Latin America",
                           "Australasia","Caribbean","Central Asia",
                           "Central Europe","Central Latin America",
                           "Central Sub-Saharan Africa","East Asia",
                           "Eastern Europe","Eastern Sub-Saharan Africa",
                           "High-income Asia Pacific","High-income North America",
                           "North Africa and Middle East","Oceania","South Asia",
                           "Southeast Asia","Southern Latin America",
                           "Southern Sub-Saharan Africa","Tropical Latin America",
                           "Western Europe","Western Sub-Saharan Africa")) %>%
    filter(metric=={{metric}},year == {{year}},age=={{age}}) %>% 
    select(location,sex,val,upper,lower) %>%
    return()
}

数据清洗

A <- myread("liver_cancer.xls",'Number',1990,"All ages") %>%
  select(location,sex,val,upper,lower) %>% 
  mutate_if(is.numeric,function(x) x/100) %>%
  mutate(across(where(is.numeric), ~ round(.,2))) %>%
  unite(.,col="A",lower,upper,sep="-",remove = T,na.rm = F) %>% 
  mutate(A= paste0(" [",A,"]")) %>% 
  unite(.,col="1990-1",val,A,sep="",remove = T,na.rm = F)

B <- myread("liver_cancer.xls",'Rate',1990,"Age-standardized")  %>%
  select(location,sex,val,upper,lower) %>% 
  mutate(across(where(is.numeric), ~ round(.,2))) %>% 
  unite(.,col="A",lower,upper,sep="-",remove = T,na.rm = F) %>% 
  mutate(A= paste0(" [",A,"]")) %>%
  unite(.,col="1990-2",val,A,sep="",remove = T,na.rm = F)

C <- myread("liver_cancer.xls",'Number',1990,"All ages")  %>%
  select(location,sex,val,upper,lower) %>% 
  mutate_if(is.numeric,function(x) x/100) %>%
  mutate(across(where(is.numeric), ~ round(.,2))) %>%
  unite(.,col="A",lower,upper,sep="-",remove = T,na.rm = F) %>% 
  mutate(A= paste0(" [",A,"]")) %>% 
  unite(.,col="2019-1",val,A,sep="",remove = T,na.rm = F)

D <- myread("liver_cancer.xls",'Rate',2019,"Age-standardized")  %>%
  select(location,sex,val,upper,lower) %>% 
  mutate(across(where(is.numeric), ~ round(.,2))) %>% 
  unite(.,col="A",lower,upper,sep="-",remove = T,na.rm = F) %>% 
  mutate(A= paste0(" [",A,"]")) %>%
  unite(.,col="2019-2",val,A,sep="",remove = T,na.rm = F)

整合数据

table1 <- A %>% left_join(.,B,by=c("location","sex")) %>%
  left_join(.,C,by=c("location","sex")) %>%
  left_join(.,D,by=c("location","sex")) %>%
  filter(location=="Global"|sex=="Both") %>% 
  unite(.,col="type",location,sex,sep="-",remove = T,na.rm = F) %>% 
  mutate(across("type",str_replace,"-Both","")) %>% 
  mutate(across("type",str_replace,"Global","Overall")) %>% 
  mutate(across("type",str_replace,"Overall-","")) %>% distinct()

筛选数据

这里「location」根据自己需要来进行选择,由于数据库更新变得名称会有些许变换已实际为准

years <- fread("liver_cancer.xls")

df2 <- years[measure == 'Incidence' &
               age == 'Age-standardized' &
               metric == 'Rate' &
               location %in% c("Global","High SDI","High-middle SDI","Middle SDI",
                                    "Low-middle SDI","Low SDI",
                                    "Andean Latin America","Australasia",
                                    "Caribbean","Central Asia","Central Europe",
                                    "Central Latin America","Central Sub-Saharan Africa",
                                    "East Asia","Eastern Europe","Eastern Sub-Saharan Africa",
                                    "High-income Asia Pacific","High-income North America",
                                    "North Africa and Middle East","Oceania","South Asia",
                                    "Southeast Asia","Southern Latin America",
                                    "Southern Sub-Saharan Africa","Tropical Latin America",
                                    "Western Europe","Western Sub-Saharan Africa")] %>%
  dplyr::rename(location_id="location")

计算Both-EAPC值

此处应该打包成函数方便以后调用,有需要的观众老爷可自行整理

regions_years_ASIR <- df2 %>% filter(sex %in% c("Both"))

EAPC_ASIR <- data.table() 
for (i in unique(regions_years_ASIR$location_id)) {
  temp <- regions_years_ASIR[location_id==i,]
  temp$lograte <- log(temp$val)
  fit <- lm(lograte ~ year, temp)
  b <- coefficients(fit)
  CI <- confint(fit)
  EAPC0.5 <- format(round(100 * (exp(b[2]) - 1), digits = 2))
  EAPC0.025 <- format(round(100 * (exp(CI[2]) - 1), digits = 2))
  EAPC0.975 <- format(round(100 * (exp(CI[4]) - 1), digits = 2))
  location_id <- regions_years_ASIR$location_id[regions_years_ASIR$location_id == i] %>% unique()
  EAPC <- cbind(location_id, EAPC0.025, EAPC0.5, EAPC0.975)
  EAPC_ASIR <- rbind(EAPC_ASIR,EAPC)
}

EAPC_ASIR1 <- EAPC_ASIR[,.(location_id,
                           EAPC_ASIR=paste(EAPC_ASIR$EAPC0.5,' [',EAPC_ASIR$EAPC0.025,' to ',
                                           EAPC_ASIR$EAPC0.975,']',sep=''))] %>% 
  mutate(across("location_id",str_replace,"Global","Overall"))

计算Male-EAPC值

regions_years_ASIR <- df2 %>% filter(sex %in% c("Male"))

EAPC_ASIR <- data.table()
for (i in unique(regions_years_ASIR$location_id)) {
  temp <- regions_years_ASIR[location_id==i,]
  temp$lograte <- log(temp$val)
  fit <- lm(lograte ~ year, temp)
  b <- coefficients(fit)
  CI <- confint(fit)
  EAPC0.5 <- format(round(100 * (exp(b[2]) - 1), digits = 2))
  EAPC0.025 <- format(round(100 * (exp(CI[2]) - 1), digits = 2))
  EAPC0.975 <- format(round(100 * (exp(CI[4]) - 1), digits = 2))
  location_id <- regions_years_ASIR$location_id[regions_years_ASIR$location_id == i] %>% unique()
  EAPC <- cbind(location_id, EAPC0.025, EAPC0.5, EAPC0.975)
  EAPC_ASIR <- rbind(EAPC_ASIR,EAPC)
}

EAPC_ASIR2 <- EAPC_ASIR[,.(location_id,
                           EAPC_ASIR = paste(EAPC_ASIR$EAPC0.5,' [',EAPC_ASIR$EAPC0.025,' to ',
                                             EAPC_ASIR$EAPC0.975,']',sep=''))] %>% 
  filter(location_id=="Global") %>% mutate(across("location_id",str_replace,"Global","Male"))
regions_years_ASIR <- df2 %>% filter(sex %in% c("Female"))
EAPC_ASIR <- data.table() 
for (i in unique(regions_years_ASIR$location_id)) {
  temp <- regions_years_ASIR[location_id==i,]
  temp$lograte <- log(temp$val)
  fit <- lm(lograte ~ year, temp)
  b <- coefficients(fit)
  CI <- confint(fit)
  EAPC0.5 <- format(round(100 * (exp(b[2]) - 1), digits = 2))
  EAPC0.025 <- format(round(100 * (exp(CI[2]) - 1), digits = 2))
  EAPC0.975 <- format(round(100 * (exp(CI[4]) - 1), digits = 2))
  location_id <- regions_years_ASIR$location_id[regions_years_ASIR$location_id == i] %>% unique()
  EAPC <- cbind(location_id, EAPC0.025, EAPC0.5, EAPC0.975)
  EAPC_ASIR <- rbind(EAPC_ASIR,EAPC)
}

EAPC_ASIR3 <- EAPC_ASIR[,.(location_id,
                           EAPC_ASIR = paste(EAPC_ASIR$EAPC0.5,' [',EAPC_ASIR$EAPC0.025,' to ',
                                             EAPC_ASIR$EAPC0.975,']',sep=''))] %>% 
  filter(location_id=="Global") %>% mutate(across("location_id",str_replace,"Global","Female")) 

整合EAPC

EAPC <- EAPC_ASIR1 %>% bind_rows(EAPC_ASIR2) %>% bind_rows(EAPC_ASIR3) %>% as.data.frame() %>% 
  dplyr::rename(type=location_id)

gt绘制三线表

t1 <- table1 %>% left_join(.,EAPC,by="type") %>% 
  arrange(match(type,c("Overall","Female","Male","High SDI",
                       "High-middle SDI","Middle SDI",
                       "Low-middle SDI","Low SDI",
                       "Andean Latin America","Australasia",
                       "Caribbean","Central Asia","Central Europe",
                       "Central Latin America","Central Sub-Saharan Africa",
                       "East Asia","Eastern Europe","Eastern Sub-Saharan Africa",
                       "High-income Asia Pacific","High-income North America",
                       "North Africa and Middle East","Oceania","South Asia",
                       "Southeast Asia","Southern Latin America",
                       "Southern Sub-Saharan Africa","Tropical Latin America",
                       "Western Europe","Western Sub-Saharan Africa")))

t1 %>%
  add_row(`type`="Sex",`1990-1`="",`1990-2`="",`2019-1`="",`2019-2`="",
          `EAPC_ASIR`="",.before=2) %>%
  add_row(`type`="Socio-demographic index",`1990-1`="",`1990-2`="",`2019-1`="",`2019-2`="",
          `EAPC_ASIR`="",.before=5) %>% 
  add_row(`type`="Region",`1990-1`="",`1990-2`="",`2019-1`="",`2019-2`="",
          `EAPC_ASIR`="",.before=11) %>% 
  dplyr::rename(" "="type") %>% 
  gt() %>% 
  cols_label(`1990-1`= md("Incident cases<br>No. *10<sup>2</sup> (95% UI)")) %>% 
  cols_label(`1990-2`=md("ASIR per 100,000<br>No. (95% UI)")) %>% 
  cols_label(`2019-1`=md("Incident cases<br>No. *10<sup>2</sup> (95% UI)")) %>% 
  cols_label(`2019-2`=md("ASIR per 100,000<br>No.(95% UI)")) %>% 
  cols_label(`EAPC_ASIR`=md("EAPC<br>No. (95% CI)")) %>% 
  tab_style(style = list(cell_text(weight = "bold")),
            locations = cells_column_labels(everything())) %>% 
  tab_spanner(label=md("**1990**"),columns=2:3) %>% 
  tab_spanner(label=md("**2019**"),columns=4:5) %>% 
  tab_spanner(label=md("**1990-2019**"),columns=6) %>%
  tab_options(
    column_labels.border.top.color = "grey",
    column_labels.border.top.width = px(2.5),
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width=px(2.5),
    table_body.hlines.color = "black",
    table_body.hlines.width=px(1),
    table.border.bottom.color = "grey",
    table.border.bottom.width = px(2.5))

此处可以将表格保存为PDF格式之后在通过软件转换为「word」格式插入文章中,「Deaths,DALYs」的表格制作方法基本类似;好的本文介绍到此介绍,相信看过相关文档的都很了解具体内容,小编也不过多阐述了;「当然更推荐大家加入我的VIP交流群」扫描下方二维码加小编微信「付费99元」后邀请进群,「一定让你感受到物超所值」「添加小编微信请备注来意,以便高效处理」

小编微信

关注下方公众号下回更新不迷路



往期推荐

[GBD数据库挖掘] 1.数据的下载与整合

[GBD数据库挖掘2] ggplot2优雅的展示发病率