Open genghiskhanofnz opened 1 year ago
Hi, thank you for your interest of the package. Here is my response:
library("PriceIndices") df.final <- tibble::tribble( ~time, ~prices, ~quantities, ~prodID, ~retID, ~group, "2022-01-01", 5.71420076094582, 0.537123656386962, 84L, 2L, "colorectal", "2022-02-01", 6.1670657020864, 0.579692072025658, 66L, 2L, "colorectal", "2022-03-01", 6.61993064322699, 0.622260487664354, 79L, 1L, "colorectal", "2022-04-01", 6.79763468634815, 0.638964318941747, 10L, 1L, "colorectal", "2022-05-01", 6.13458008378699, 0.576638487664354, 72L, 3L, "colorectal", "2022-06-01", 6.07391126385699, 0.570935737664354, 12L, 3L, "colorectal", "2022-07-01", 5.6817151426464, 0.534070072025658, 82L, 3L, "colorectal", "2022-08-01", 6.07391126385699, 0.570935737664354, 53L, 2L, "colorectal", "2022-09-01", 5.6210463227164, 0.528367322025658, 80L, 3L, "colorectal", "2022-10-01", 5.22885020150582, 0.491501656386962, 23L, 3L, "colorectal", "2022-11-01", 5.83123598413699, 0.548124737664354, 24L, 2L, "colorectal", "2022-12-01", 6.88648670790873, 0.647316234580443, 61L, 3L, "colorectal", "2022-01-01", 7.97852546664873, 0.749965734580443, 49L, 1L, "ortho", "2022-02-01", 7.70766698529815, 0.724505568941747, 51L, 3L, "ortho", "2022-03-01", 7.97852546664873, 0.749965734580443, 34L, 2L, "ortho", "2022-04-01", 8.31005276792931, 0.781128650219139, 30L, 2L, "ortho", "2022-05-01", 7.52566052550815, 0.707397318941747, 64L, 1L, "ortho", "2022-06-01", 7.91785664671873, 0.744262984580443, 98L, 2L, "ortho", "2022-07-01", 8.12804630813931, 0.764020400219139, 22L, 2L, "ortho", "2022-08-01", 8.3989047894899, 0.789480565857835, 52L, 2L, "ortho", "2022-09-01", 7.79651900685873, 0.732857484580443, 74L, 2L, "ortho", "2022-10-01", 7.90161383756902, 0.742736192399791, 35L, 1L, "ortho", "2022-11-01", 7.67518136699873, 0.721451984580443, 74L, 2L, "ortho", "2022-12-01", 6.0457280622264, 0.568286572025658, 33L, 1L, "ortho" )
df.final$time<-as.Date(df.final$time) price_indices(data=df.final, start="2022-01", end="2022-12", formula="fisher", interval=TRUE)
final_index(data=df.final, start = '2022-01', end = '2022-12', formula = 'fisher', groups = TRUE, outlets = FALSE, aggr = 'laspeyres', by = 'group', interval = TRUE)
df_matched<-matched(df.final, period1="2022-01", period2="2022-12", interval=TRUE, type="retID") nrow(df_matched) #you have no matched retIDs - that is the problem on the side of the data set
@JacekBialek Thank you so much for your very quick response. Sorted and thanks. G
@JacekBialek I have been carefully building this dataset learnt from your comment yesterday about each prodID must have outlets.
Even though it complies with your specs, I am facing 'empty data frame' and 'matched' function returns NULL.
Please explain and advise how to solve. Thank you very much. I need this dataset for my paper for submission.
My repex here.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tibble)
library(readxl)
library(PriceIndices)
library(ggpubr)
#> Loading required package: ggplot2
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:lubridate':
#>
#> hour, isoweek, mday, minute, month, quarter, second, wday, week,
#> yday, year
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
th.sun.txt <- fread('D:\\GTI\\theatre_sunday.txt')
th.sun.txt %>%
tibble()
#> # A tibble: 360 × 4
#> time prodID retID description
#> <chr> <int> <int> <chr>
#> 1 1/01/2022 1 1 gensurg
#> 2 1/01/2022 1 1 gensurg
#> 3 1/01/2022 1 1 gensurg
#> 4 1/01/2022 1 2 ortho
#> 5 1/01/2022 1 2 ortho
#> 6 1/01/2022 2 1 gensurg
#> 7 1/01/2022 2 1 gensurg
#> 8 1/01/2022 2 1 gensurg
#> 9 1/01/2022 2 2 ortho
#> 10 1/01/2022 2 2 ortho
#> # ℹ 350 more rows
set.seed(20)
# add p and q
th.sun.txt$time <- dmy(th.sun.txt$time)
th.sun.txt$prices <- rnorm(n = 360, mean = 38, sd = 1.8)
th.sun.txt$quantities <- rnorm(n = 360, mean = 370, sd = 30)
th.sun.txt$retID <- factor(th.sun.txt$retID, levels = c('1', '2'))
th.sun.txt$prodID <- factor(th.sun.txt$prodID, levels = c('1', '2', '3'))
th.sun.txt %>%
tibble()
#> # A tibble: 360 × 6
#> time prodID retID description prices quantities
#> <date> <fct> <fct> <chr> <dbl> <dbl>
#> 1 2022-01-01 1 1 gensurg 40.1 305.
#> 2 2022-01-01 1 1 gensurg 36.9 384.
#> 3 2022-01-01 1 1 gensurg 41.2 374.
#> 4 2022-01-01 1 2 ortho 35.6 371.
#> 5 2022-01-01 1 2 ortho 37.2 371.
#> 6 2022-01-01 2 1 gensurg 39.0 353.
#> 7 2022-01-01 2 1 gensurg 32.8 423.
#> 8 2022-01-01 2 1 gensurg 36.4 418.
#> 9 2022-01-01 2 2 ortho 37.2 338.
#> 10 2022-01-01 2 2 ortho 37.0 352.
#> # ℹ 350 more rows
price_indices(data = th.sun.txt,
start = '2022-01',
end = '2023-12',
formula = 'fisher',
interval = T)
#> time fisher
#> 1 2022-01 1.000000
#> 2 2022-02 1.002509
#> 3 2022-03 1.012294
#> 4 2022-04 1.037292
#> 5 2022-05 1.013678
#> 6 2022-06 1.024017
#> 7 2022-07 1.035660
#> 8 2022-08 1.007630
#> 9 2022-09 1.020173
#> 10 2022-10 1.019776
#> 11 2022-11 1.035899
#> 12 2022-12 1.012325
#> 13 2023-01 1.024042
#> 14 2023-02 1.012208
#> 15 2023-03 1.017154
#> 16 2023-04 1.017222
#> 17 2023-05 1.011207
#> 18 2023-06 1.021264
#> 19 2023-07 1.012819
#> 20 2023-08 1.034836
#> 21 2023-09 1.014474
#> 22 2023-10 1.009947
#> 23 2023-11 1.028053
#> 24 2023-12 1.002978
final_index(data = th.sun.txt,
start = '2022-01',
end = '2023-12',
formula = 'fisher',
outlets = T,
by = 'description',
aggr = 'laspeyres',
groups = T,
interval = T)
#> Error in price_index(data, start, end, formula[form], window = p_window[form], : A data frame is empty
Created on 2023-06-25 with reprex v2.0.2
Hi again. I will check your data set but I need the file theatre_sunday.txt. I am sure that something (still) must be wrong with this data set.... Please not that if you want to aggregate results over outlets you must make sure that each outlet must available in each period. Regards, Jacek.
@JacekBialek theatre_sunday.txt is attached in my post. All outlets available in each period. Thanks. G
Hi, as I suspected, the problem is wit data, The data set is not continuous. Plese note that after filtering:
d1<-dplyr::filter(th.sun.txt, th.sun.txt$description=="gensurg") d1g2<-dplyr::filter(d1, d1$retID=="2")
you will get an empty data set. So there are no 'gensurg' products in outlet no. 2. Please fix it and than it should woork.
The same problem concerns the following filtration:
d2<-dplyr::filter(th.sun.txt, th.sun.txt$description=="ortho") d2g1<-dplyr::filter(d2, d2$retID=="1")
Regards, Jacek.
*with data
Hi again, I modified final_index and now it skips data frames with zero rows. Please install it from GitHub:
library("remotes") remotes::install_github("JacekBialek/PriceIndices")
It should be fine now even with your data set. Regards - Jacek Białek
@JacekBialek First and foremost, thanks for paying so much attention to my issue. Appreciated. Now works like a charm with 0.1.8 version installed from your update.
My situation is quite different to your comments.
you will get an empty data set. So there are no 'gensurg' products in outlet no. 2. Please fix it and than it should woork. The same problem concerns the following filtration: d2<-dplyr::filter(th.sun.txt, th.sun.txt$description=="ortho") d2g1<-dplyr::filter(d2, d2$retID=="1")
nrow(d2g1) is zero.....
I have 3 prodID = acute, elective and emergency operations 2 retID = 1 and 2 = description = specialties of general surgery (gensurg) and orthopaedics (ortho) So, outlet (retID) 1 is gensurg and outlet 2 ortho. They both operate on 3 prodID of acute, elective and emergency operations. This is a real-world situation in our hospital.
Absolutely true that outlet (retID) 2 != gensurg, but it is ortho specialty.
Now, you have fixed final_index = coping with my situation very well indeed = very useful PriceIndices package we are seeing now
I will be using your package in my paper and surely acknowledge you. Thank you so much. G
@genghiskhanofnz Hi, I am really glad I could help. Your specific data set made me to improve the package so I would like to thank you too :) Anyway, if it works now, please consider citing the following publications:
Białek, J. (2021). PriceIndices – a New R Package for Bilateral and Multilateral Price Index Calculations, Statistika – Statistics and Economy Journal, Vol. 2/2021, 122-141, Czech Statistical Office, Praga. Białek, J. (2022). Scanner data processing in a newest version of the PriceIndices package, Statistical Journal of the IAOS, 38 (4), 1369-1397, DOI: 10.3233/SJI-220963.
And by the way - the version from GitHub will be available one CRAN in 24 hours.
Best regards and good luck with your publications and study, Jacek.
@JacekBialek Your references are already in my reference section but with (version 0.1.7. See here. Białek, J. (2023). PriceIndices: Calculating Bilateral and Multilateral Price Indexes (Version 0.1.7). This is using citation() function. My citation appears to be wrong in comparison to yours above. If I had a chance, then talk to you in-person online say, "thank you". I see a huge potential in your PriceIndices package. See you soon. Will send you a link to my paper when publlished. G
@JacekBialek How can I interpret price quantity contributions from your bennet function? Not much in your manual. Need your advice. I am weak at that contribution area. Thanks.
Bennet, T. L., (1920). The Theory of Measurement of Changes in Cost of Living. Journal of the Royal Statistical Society, 83, 455-462
I read the book but fail to understand it.
Hi, please give me your private email - I will send you interesting papers concerning the Bennet indicators. It should help:) Regards - Jacek.
Od: Genghis Khan @.> Wysłane: wtorek, 4 lipca 2023 01:39 Do: JacekBialek/PriceIndices @.> DW: Jacek Białek @.>; Mention @.> Temat: Re: [JacekBialek/PriceIndices] A data frame is empty from final_index function (Issue #4)
Bennet, T. L., (1920). The Theory of Measurement of Changes in Cost of Living. Journal of the Royal Statistical Society, 83, 455-462
I read the book but fail to understand it.
— Reply to this email directly, view it on GitHubhttps://github.com/JacekBialek/PriceIndices/issues/4#issuecomment-1619338032, or unsubscribehttps://github.com/notifications/unsubscribe-auth/APFLCXDXLGYZBCGARMAINU3XONX6DANCNFSM6AAAAAAZSLOZPA. You are receiving this because you were mentioned.Message ID: @.***>
I need to delete it after receiving your papers. Thanks. G
I am facing this error from the above function on df.final dataframe object. I hope you will help. Thank you very much. G
Sample data repex.
df.final <- tibble::tribble( ~time, ~prices, ~quantities, ~prodID, ~retID, ~group, "2022-01-01", 5.71420076094582, 0.537123656386962, 84L, 2L, "colorectal", "2022-02-01", 6.1670657020864, 0.579692072025658, 66L, 2L, "colorectal", "2022-03-01", 6.61993064322699, 0.622260487664354, 79L, 1L, "colorectal", "2022-04-01", 6.79763468634815, 0.638964318941747, 10L, 1L, "colorectal", "2022-05-01", 6.13458008378699, 0.576638487664354, 72L, 3L, "colorectal", "2022-06-01", 6.07391126385699, 0.570935737664354, 12L, 3L, "colorectal", "2022-07-01", 5.6817151426464, 0.534070072025658, 82L, 3L, "colorectal", "2022-08-01", 6.07391126385699, 0.570935737664354, 53L, 2L, "colorectal", "2022-09-01", 5.6210463227164, 0.528367322025658, 80L, 3L, "colorectal", "2022-10-01", 5.22885020150582, 0.491501656386962, 23L, 3L, "colorectal", "2022-11-01", 5.83123598413699, 0.548124737664354, 24L, 2L, "colorectal", "2022-12-01", 6.88648670790873, 0.647316234580443, 61L, 3L, "colorectal", "2022-01-01", 7.97852546664873, 0.749965734580443, 49L, 1L, "ortho", "2022-02-01", 7.70766698529815, 0.724505568941747, 51L, 3L, "ortho", "2022-03-01", 7.97852546664873, 0.749965734580443, 34L, 2L, "ortho", "2022-04-01", 8.31005276792931, 0.781128650219139, 30L, 2L, "ortho", "2022-05-01", 7.52566052550815, 0.707397318941747, 64L, 1L, "ortho", "2022-06-01", 7.91785664671873, 0.744262984580443, 98L, 2L, "ortho", "2022-07-01", 8.12804630813931, 0.764020400219139, 22L, 2L, "ortho", "2022-08-01", 8.3989047894899, 0.789480565857835, 52L, 2L, "ortho", "2022-09-01", 7.79651900685873, 0.732857484580443, 74L, 2L, "ortho", "2022-10-01", 7.90161383756902, 0.742736192399791, 35L, 1L, "ortho", "2022-11-01", 7.67518136699873, 0.721451984580443, 74L, 2L, "ortho", "2022-12-01", 6.0457280622264, 0.568286572025658, 33L, 1L, "ortho" )
final_index(df.final, start = '2022-01', end = '2022-12', formula = 'fisher', groups = TRUE, outlets = TRUE, aggr = 'laspeyres', by = 'group', interval = TRUE)
Error in prices(data2, period = end, set = id) : A data frame is empty