Closed HughParsonage closed 5 years ago
Age_range | number_of_taxpayers | persons | %_taxpayers | %_taxpayers_at_least_this_old |
---|---|---|---|---|
<18 | 14,542 | 5,492,297 | 0.3% | 43.4% |
18 - 24 | 992,674 | 1,860,733 | 53.3% | 56.6% |
25 - 29 | 1,202,706 | 1,664,620 | 72.3% | 57.0% |
30 - 34 | 1,244,144 | 1,703,855 | 73.0% | 55.2% |
35 - 39 | 1,119,227 | 1,561,686 | 71.7% | 52.8% |
40 - 44 | 1,137,549 | 1,583,259 | 71.8% | 50.2% |
45 - 49 | 1,128,651 | 1,581,447 | 71.4% | 46.6% |
50 - 54 | 1,073,431 | 1,523,542 | 70.5% | 41.6% |
55 - 59 | 952,360 | 1,454,322 | 65.5% | 34.8% |
60 - 64 | 661,796 | 1,299,411 | 50.9% | 25.8% |
65 - 69 | 312,680 | 1,189,001 | 26.3% | 17.0% |
70 - 74 | 126,700 | 887,707 | 14.3% | 12.5% |
75+ | 184,107 | 1,600,053 | 11.5% | 11.5% |
library(readxl)
library(magrittr)
library(hutils)
library(data.table)
library(knitr)
URL_Table2 <-
"https://data.gov.au/dataset/d170213c-4391-4d10-ac24-b0c11768da3f/resource/ca3d7752-e689-47fa-a746-20cfee9f2630/download/taxstats2016individual02lodgmentmethodgendertaxablestatusstateage.xlsx"
destfile <- "data-raw/Table2.xlsx"
if (!file.exists("DESCRIPTION") || !dir.exists(basename(destfile))) {
message("Using non-interactive mode => xlsx file will not be cached")
destfile <- tempfile(fileext = ".xlsx")
}
res <-
download.file(URL_Table2,
destfile = destfile,
mode = "wb")
if (res) {
if (HTTP_ERROR <- httr::http_error(URL_Table2)) {
stop(HTTP_ERROR)
} else {
stop("Unable to download \n\t",
URL_Table2)
}
}
individuals_table2_201516_raw <-
read_excel(destfile,
sheet = "Individuals Table 2A",
skip = 2) %>%
as.data.table
individuals_table2_201516 <-
individuals_table2_201516_raw %>%
melt.data.table(id.vars = c("Lodgment method",
"Gender",
"Taxable status",
"State / Territory1",
"Age range2"),
variable.factor = FALSE,
variable.name = "raw_variable") %>%
.[, c("variable", "suffix") := tstrsplit(raw_variable, split = "\r\n")] %>%
.[, c("variable", "suffix") := lapply(.SD, trimws), .SDcols = c("variable", "suffix")] %>%
setnames(c("Lodgment method",
"Gender",
"Taxable status",
"State / Territory1",
"Age range2"),
c("Lodgment_method",
"Gender",
"TaxableStatus",
"State",
"Age_range")) %>%
.[]
number_of_taxpayers <-
individuals_table2_201516 %>%
.[variable %ein% "Number of individuals" & TaxableStatus %ein% "Taxable"] %>%
.[suffix %ein% "no."] %>%
.[, .(number_of_taxpayers = sum(value)), keyby = "Age_range"] %>%
.[, Age_range := sub("^[^0-9]+", "", Age_range)] %>%
.[, c("min_age", "max_age") := tstrsplit(sub("^18$",
"0 - 18",
sub(" and over",
" - 100",
Age_range)),
split = " - ",
fixed = TRUE)] %>%
.[]
persons_by_age <-
Census2016.DataPack::STE__Age.min %>%
.[, .(persons = sum(persons)),
keyby = .(Age_range = cut(Age.min,
breaks = c(-Inf, number_of_taxpayers[, unique(max_age)]),
labels = number_of_taxpayers[, unique(Age_range)]))] %>%
.[]
format_numbers <- function(x) {
if (!is.numeric(x)) {
return(x)
}
if (min(x) > -1 && max(x) < 1) {
rx <- round(x * 100, 1)
return(paste0(formatC(rx, width = 3, flag = "#", digits = 1, format = "f",
drop0trailing = FALSE),
"%"))
}
if (max(abs(x)) > 1e3) {
return(prettyNum(round(x), big.mark = ","))
}
x
}
stopifnot(identical(format_numbers(c(0.17, 0.115)),
c("17.0%", "11.5%")))
number_of_taxpayers[persons_by_age, on = "Age_range"] %>%
.[, "%_taxpayers" := number_of_taxpayers / persons] %>%
.[, "%_taxpayers_at_least_this_old" := rev(cumsum(rev(number_of_taxpayers)) / cumsum(rev(persons)))] %>%
.[] %>%
.[, c("min_age", "max_age") := NULL] %>%
.[Age_range %ein% "18", Age_range := "<18"] %>%
.[Age_range %ein% "75 and over", Age_range := "75+"] %T>%
.[, stopifnot(sum(number_of_taxpayers) %between% c(10e6, 11e6),
sum(persons) %between% c(23e6, 24e6))] %>%
.[, lapply(.SD, format_numbers)] %>%
kable(align = "rrrrr")
Proportion of each age group paying income tax
fy_year | Age | nTaxpayers | Population | P |
---|---|---|---|---|
2003-04 | under 20 | 618,400 | 5,342,113 | 11.6% |
2003-04 | 20 to 24 | 968,800 | 1,380,726 | 70.2% |
2003-04 | 25 to 29 | 1,009,200 | 1,343,718 | 75.1% |
2003-04 | 30 to 34 | 1,080,100 | 1,509,100 | 71.6% |
2003-04 | 35 to 39 | 1,045,200 | 1,451,389 | 72.0% |
2003-04 | 40 to 44 | 1,097,700 | 1,530,232 | 71.7% |
2003-04 | 45 to 49 | 1,020,100 | 1,424,317 | 71.6% |
2003-04 | 50 to 54 | 882,100 | 1,310,016 | 67.3% |
2003-04 | 55 to 59 | 697,400 | 1,186,934 | 58.8% |
2003-04 | 60 to 64 | 404,500 | 895,320 | 45.2% |
2003-04 | 65 to 69 | 137,700 | 732,674 | 18.8% |
2003-04 | 70 and over | 246,900 | 1,826,183 | 13.5% |
2004-05 | under 20 | 643,100 | 5,362,923 | 12.0% |
2004-05 | 20 to 24 | 1,043,700 | 1,414,648 | 73.8% |
2004-05 | 25 to 29 | 997,800 | 1,352,553 | 73.8% |
2004-05 | 30 to 34 | 1,080,400 | 1,500,518 | 72.0% |
2004-05 | 35 to 39 | 1,054,900 | 1,468,398 | 71.8% |
2004-05 | 40 to 44 | 1,122,700 | 1,527,646 | 73.5% |
2004-05 | 45 to 49 | 1,032,700 | 1,451,092 | 71.2% |
2004-05 | 50 to 54 | 925,400 | 1,325,456 | 69.8% |
2004-05 | 55 to 59 | 742,400 | 1,226,445 | 60.5% |
2004-05 | 60 to 64 | 448,600 | 935,286 | 48.0% |
2004-05 | 65 to 69 | 154,000 | 754,865 | 20.4% |
2004-05 | 70 and over | 271,900 | 1,857,014 | 14.6% |
2005-06 | under 20 | 666,200 | 5,395,069 | 12.3% |
2005-06 | 20 to 24 | 1,057,200 | 1,448,435 | 73.0% |
2005-06 | 25 to 29 | 1,009,500 | 1,381,588 | 73.1% |
2005-06 | 30 to 34 | 1,103,000 | 1,474,144 | 74.8% |
2005-06 | 35 to 39 | 1,047,400 | 1,508,828 | 69.4% |
2005-06 | 40 to 44 | 1,133,200 | 1,516,444 | 74.7% |
2005-06 | 45 to 49 | 1,061,200 | 1,477,732 | 71.8% |
2005-06 | 50 to 54 | 929,600 | 1,347,835 | 69.0% |
2005-06 | 55 to 59 | 786,300 | 1,257,992 | 62.5% |
2005-06 | 60 to 64 | 478,400 | 978,835 | 48.9% |
2005-06 | 65 to 69 | 169,900 | 773,121 | 22.0% |
2005-06 | 70 and over | 294,900 | 1,890,943 | 15.6% |
2006-07 | under 20 | 589,200 | 5,467,582 | 10.8% |
2006-07 | 20 to 24 | 1,067,900 | 1,483,141 | 72.0% |
2006-07 | 25 to 29 | 1,026,000 | 1,431,018 | 71.7% |
2006-07 | 30 to 34 | 1,073,100 | 1,457,084 | 73.6% |
2006-07 | 35 to 39 | 1,073,300 | 1,555,685 | 69.0% |
2006-07 | 40 to 44 | 1,119,800 | 1,504,232 | 74.4% |
2006-07 | 45 to 49 | 1,060,800 | 1,509,850 | 70.3% |
2006-07 | 50 to 54 | 925,100 | 1,373,058 | 67.4% |
2006-07 | 55 to 59 | 809,900 | 1,254,193 | 64.6% |
2006-07 | 60 to 64 | 495,300 | 1,055,169 | 46.9% |
2006-07 | 65 to 69 | 158,500 | 800,638 | 19.8% |
2006-07 | 70 and over | 270,700 | 1,935,972 | 14.0% |
2007-08 | under 20 | 613,200 | 5,548,508 | 11.1% |
2007-08 | 20 to 24 | 1,150,400 | 1,526,362 | 75.4% |
2007-08 | 25 to 29 | 1,104,400 | 1,500,008 | 73.6% |
2007-08 | 30 to 34 | 1,135,200 | 1,458,344 | 77.8% |
2007-08 | 35 to 39 | 1,159,300 | 1,589,553 | 72.9% |
2007-08 | 40 to 44 | 1,170,000 | 1,499,409 | 78.0% |
2007-08 | 45 to 49 | 1,140,300 | 1,537,823 | 74.2% |
2007-08 | 50 to 54 | 997,000 | 1,397,815 | 71.3% |
2007-08 | 55 to 59 | 820,100 | 1,268,595 | 64.6% |
2007-08 | 60 to 64 | 494,800 | 1,117,615 | 44.3% |
2007-08 | 65 to 69 | 136,600 | 827,160 | 16.5% |
2007-08 | 70 and over | 244,700 | 1,978,007 | 12.4% |
2008-09 | under 20 | 237,500 | 5,620,478 | 4.2% |
2008-09 | 20 to 24 | 967,700 | 1,581,376 | 61.2% |
2008-09 | 25 to 29 | 1,110,000 | 1,577,309 | 70.4% |
2008-09 | 30 to 34 | 1,026,900 | 1,476,377 | 69.6% |
2008-09 | 35 to 39 | 1,105,300 | 1,604,280 | 68.9% |
2008-09 | 40 to 44 | 1,069,900 | 1,512,063 | 70.8% |
2008-09 | 45 to 49 | 1,125,300 | 1,554,430 | 72.4% |
2008-09 | 50 to 54 | 1,004,700 | 1,430,082 | 70.3% |
2008-09 | 55 to 59 | 836,500 | 1,287,172 | 65.0% |
2008-09 | 60 to 64 | 570,600 | 1,157,520 | 49.3% |
2008-09 | 65 to 69 | 151,500 | 865,863 | 17.5% |
2008-09 | 70 and over | 228,200 | 2,024,703 | 11.3% |
2009-10 | under 20 | 197,800 | 5,658,746 | 3.5% |
2009-10 | 20 to 24 | 914,000 | 1,605,054 | 56.9% |
2009-10 | 25 to 29 | 1,116,800 | 1,627,256 | 68.6% |
2009-10 | 30 to 34 | 1,042,800 | 1,498,197 | 69.6% |
2009-10 | 35 to 39 | 1,105,100 | 1,600,546 | 69.0% |
2009-10 | 40 to 44 | 1,072,500 | 1,537,102 | 69.8% |
2009-10 | 45 to 49 | 1,095,200 | 1,554,804 | 70.4% |
2009-10 | 50 to 54 | 1,006,800 | 1,460,583 | 68.9% |
2009-10 | 55 to 59 | 845,500 | 1,308,403 | 64.6% |
2009-10 | 60 to 64 | 592,100 | 1,194,384 | 49.6% |
2009-10 | 65 to 69 | 171,600 | 908,395 | 18.9% |
2009-10 | 70 and over | 211,600 | 2,078,280 | 10.2% |
2010-11 | under 20 | 74,500 | 5,687,072 | 1.3% |
2010-11 | 20 to 24 | 828,900 | 1,611,663 | 51.4% |
2010-11 | 25 to 29 | 1,134,100 | 1,658,170 | 68.4% |
2010-11 | 30 to 34 | 1,091,500 | 1,536,161 | 71.1% |
2010-11 | 35 to 39 | 1,044,300 | 1,573,910 | 66.4% |
2010-11 | 40 to 44 | 1,129,400 | 1,587,244 | 71.2% |
2010-11 | 45 to 49 | 1,093,500 | 1,541,837 | 70.9% |
2010-11 | 50 to 54 | 1,064,900 | 1,494,063 | 71.3% |
2010-11 | 55 to 59 | 913,300 | 1,335,993 | 68.4% |
2010-11 | 60 to 64 | 675,600 | 1,226,000 | 55.1% |
2010-11 | 65 to 69 | 235,000 | 954,260 | 24.6% |
2010-11 | 70 and over | 265,600 | 2,133,651 | 12.4% |
2011-12 | under 20 | 193,400 | 5,764,298 | 3.4% |
2011-12 | 20 to 24 | 946,650 | 1,628,778 | 58.1% |
2011-12 | 25 to 29 | 1,208,700 | 1,698,768 | 71.2% |
2011-12 | 30 to 34 | 1,130,650 | 1,589,894 | 71.1% |
2011-12 | 35 to 39 | 1,093,000 | 1,557,083 | 70.2% |
2011-12 | 40 to 44 | 1,150,000 | 1,634,211 | 70.4% |
2011-12 | 45 to 49 | 1,104,000 | 1,533,749 | 72.0% |
2011-12 | 50 to 54 | 1,088,150 | 1,523,432 | 71.4% |
2011-12 | 55 to 59 | 912,850 | 1,363,695 | 66.9% |
2011-12 | 60 to 64 | 652,200 | 1,225,704 | 53.2% |
2011-12 | 65 to 69 | 304,200 | 1,022,037 | 29.8% |
2011-12 | 70 and over | 388,900 | 2,191,816 | 17.7% |
2012-13 | under 20 | 126,150 | 5,844,248 | 2.2% |
2012-13 | 20 to 24 | 839,050 | 1,645,662 | 51.0% |
2012-13 | 25 to 29 | 1,154,750 | 1,732,064 | 66.7% |
2012-13 | 30 to 34 | 1,116,750 | 1,652,555 | 67.6% |
2012-13 | 35 to 39 | 1,055,200 | 1,552,804 | 68.0% |
2012-13 | 40 to 44 | 1,127,050 | 1,659,367 | 67.9% |
2012-13 | 45 to 49 | 1,050,550 | 1,531,499 | 68.6% |
2012-13 | 50 to 54 | 1,051,850 | 1,547,308 | 68.0% |
2012-13 | 55 to 59 | 892,000 | 1,389,209 | 64.2% |
2012-13 | 60 to 64 | 611,100 | 1,243,035 | 49.2% |
2012-13 | 65 to 69 | 231,250 | 1,079,676 | 21.4% |
2012-13 | 70 and over | 250,200 | 2,250,702 | 11.1% |
2013-14 | under 20 | 126,850 | 5,910,431 | 2.1% |
2013-14 | 20 to 24 | 834,850 | 1,661,633 | 50.2% |
2013-14 | 25 to 29 | 1,163,500 | 1,756,185 | 66.3% |
2013-14 | 30 to 34 | 1,179,250 | 1,704,549 | 69.2% |
2013-14 | 35 to 39 | 1,068,700 | 1,555,049 | 68.7% |
2013-14 | 40 to 44 | 1,149,750 | 1,663,547 | 69.1% |
2013-14 | 45 to 49 | 1,064,400 | 1,539,898 | 69.1% |
2013-14 | 50 to 54 | 1,064,100 | 1,561,634 | 68.1% |
2013-14 | 55 to 59 | 905,800 | 1,417,422 | 63.9% |
2013-14 | 60 to 64 | 634,900 | 1,263,190 | 50.3% |
2013-14 | 65 to 69 | 250,850 | 1,118,120 | 22.4% |
2013-14 | 70 and over | 277,400 | 2,324,028 | 11.9% |
(Wide format)
Age | 2003-04 | 2007-08 | 2012-13 |
---|---|---|---|
under 20 | 11.6% | 11.1% | 2.2% |
20 to 24 | 70.2% | 75.4% | 51.0% |
25 to 29 | 75.1% | 73.6% | 66.7% |
30 to 34 | 71.6% | 77.8% | 67.6% |
35 to 39 | 72.0% | 72.9% | 68.0% |
40 to 44 | 71.7% | 78.0% | 67.9% |
45 to 49 | 71.6% | 74.2% | 68.6% |
50 to 54 | 67.3% | 71.3% | 68.0% |
55 to 59 | 58.8% | 64.6% | 64.2% |
60 to 64 | 45.2% | 44.3% | 49.2% |
65 to 69 | 18.8% | 16.5% | 21.4% |
70 and over | 13.5% | 12.4% | 11.1% |
library(data.table)
library(knitr)
library(readxl)
library(magrittr)
library(ggplot2)
library(taxstats)
library(taxstats1516)
# Table 2 individuals has age and taxable status
individuals_table1_1516.tsv <- "~/taxstats/data-raw/2015-16/individuals_table2_201516.tsv"
if (!file.exists(individuals_table1_1516.tsv)) {
individuals_table1_1516.tsv <- tempfile(fileext = ".tsv")
res <-
download.file("https://github.com/HughParsonage/taxstats/raw/master/data-raw/2015-16/individuals_table1_201516.tsv",
destfile = individuals_table1_1516.tsv,
method = "wb")
if (res) {
if (HTTP_ERROR <- httr::http_error("https://github.com/HughParsonage/taxstats/raw/master/data-raw/2015-16/individuals_table1_201516.tsv")) {
stop(HTTP_ERROR)
} else {
stop("Unable to download file.")
}
}
}
individuals_table1_1516 <-
fread(file = individuals_table1_1516.tsv, sep = "\t")
individuals_table1_1516 %>%
.[startsWith(Selected_items, "18")]
age2age_range <- function(age) {
if_else(age >= 70,
0,
pmin(11 - {(age - 15) %/% 5}, 11))
}
stopifnot(age2age_range(65) == 1)
stopifnot(age2age_range(64) == 2)
stopifnot(age2age_range(60) == 2)
stopifnot(age2age_range(24) == 10)
stopifnot(age2age_range(20) == 10)
stopifnot(age2age_range(19) == 11)
aus_pop_by_fy_age_range <-
grattan:::aus_pop_fy_age(tbl = TRUE) %>%
.[, .(Population = sum(Population),
maxAge = max(Age)),
keyby = .(fy_year, age_range = age2age_range(Age))]
sfa <- get_sample_files_all2()
sfa[, tax := income_tax(Taxable_Income, fy.year = .BY[[1L]], .dots.ATO = .SD),
keyby = .(fy_year)]
nTaxpayers_by_fy_age_range <-
sfa[tax > 0][, .(nTaxpayers = sum(WEIGHT)), keyby = c("fy_year", "age_range")]
format_numbers <- function(x) {
if (!is.numeric(x)) {
return(x)
}
if (min(x) > -1 && max(x) < 1) {
rx <- round(x * 100, 1)
return(paste0(formatC(rx, width = 3, flag = "#", digits = 1, format = "f",
drop0trailing = FALSE),
"%"))
}
if (max(abs(x)) > 1e3) {
return(prettyNum(round(x), big.mark = ","))
}
x
}
nTaxpayers_by_fy_age_range[aus_pop_by_fy_age_range,
on = c("fy_year", "age_range"),
nomatch = 0L] %>%
.[, P := nTaxpayers / Population] %>%
.[age_range_decoder, on = "age_range"] %>%
setnames("age_range_description", "Age") %>%
.[, age_range := NULL] %>%
.[, maxAge := NULL] %>%
setkey(fy_year, Age) %>%
.[, nTaxpayers := prettyNum(nTaxpayers, big.mark = ",")] %>%
.[, Population := prettyNum(Population, big.mark = ",")] %>%
.[, lapply(.SD, format_numbers)] %>%
set_cols_first(key(.)) %>%
kable(align = "r")
nTaxpayers_by_fy_age_range[aus_pop_by_fy_age_range,
on = c("fy_year", "age_range"),
nomatch = 0L] %>%
.[, P := nTaxpayers / Population] %>%
.[age_range_decoder, on = "age_range"] %>%
setnames("age_range_description", "Age") %>%
.[, age_range := NULL] %>%
.[, maxAge := NULL] %>%
setkey(fy_year, Age) %>%
set_cols_first(key(.)) %>%
.[, lapply(.SD, format_numbers)] %>%
.[fy_year %in% yr2fy(c(2004, 2008, 2013)),
.(Age, fy_year, P)] %>%
dcast(Age ~ fy_year, value.var = "P") %>%
kable(align = "r")
nTaxpayers_by_fy_age_range[aus_pop_by_fy_age_range,
on = c("fy_year", "age_range"),
nomatch = 0L] %>%
.[, P := nTaxpayers / Population] %>%
.[, Year := fy2yr(fy_year)] %>%
.[age_range_decoder, on = "age_range"] %>%
ggplot(aes(x = Year,
y = P,
group = age_range_description,
color = age_range_description)) +
geom_line(size = 1.4) +
theme_dark() +
scale_y_continuous(labels = format_numbers)
Update:
for age groups