Open hidekoji opened 4 years ago
I am also on windows but it is working fine
date <- c("18/08", "17/11", "18/08", "18/05", "18/05", "18/07")
data.frame(
date = date,
date = lubridate::ymd(date, truncated = 1)
)
#> date date.1
#> 1 18/08 2018-08-01
#> 2 17/11 2017-11-01
#> 3 18/08 2018-08-01
#> 4 18/05 2018-05-01
#> 5 18/05 2018-05-01
#> 6 18/07 2018-07-01
Created on 2019-12-13 by the reprex package (v0.3.0)
However, I think this is a local issue, if I change it I can reproduce
Sys.setlocale("LC_TIME", "Japanese_Japan.932")
#> [1] "Japanese_Japan.932"
date <- c("18/08", "17/11", "18/08", "18/05", "18/05", "18/07")
data.frame(
date = date,
date = lubridate::ymd(date, truncated = 1)
)
#> date date.1
#> 1 18/08 2018-08-01
#> 2 17/11 2017-01-01
#> 3 18/08 2018-08-01
#> 4 18/05 2018-05-01
#> 5 18/05 2018-05-01
#> 6 18/07 2018-07-01
Here is a minimal reprex:
lubridate::ymd("17/11", truncated = 1, locale = "Japanese_Japan.932")
#> [1] "2017-01-01"
lubridate::ymd("17/11", truncated = 1, locale = "French_France.1252")
#> [1] "2017-11-01"
lubridate::ymd("17/11", truncated = 1, locale = "English_United States.1252")
#> [1] "2017-11-01"
@cderv could you please post the value of lubridate:::.get_locale_regs("Japanese_Japan.932")
?
I don't see this problem with japanese locale on Linux.
Here is the result
> lubridate:::.get_locale_regs("Japanese_Japan.932")
$alpha_flex
b
"((?<b_b>1|2|3|4|5|6|7|8|9|10|11|12)|(?<B_b>1\\?|2\\?|3\\?|4\\?|5\\?|6\\?|7\\?|8\\?|9\\?|10\\?|11\\?|12\\?))(?![[:alpha:]])"
B
"(?<B_B>1\\?|2\\?|3\\?|4\\?|5\\?|6\\?|7\\?|8\\?|9\\?|10\\?|11\\?|12\\?)(?![[:alpha:]])"
a
"((?<a_a>\\?)|(?<A_a>\\?\\?\\?))(?![[:alpha:]])"
A
"(?<A_A>\\?\\?\\?)(?![[:alpha:]])"
Ou
"(?<Ou_Ou>Z)(?![[:alpha:]])"
p
"(?<p>\\?\\?)(?![[:alpha:]])"
$num_flex
d
"(?<d>[012]?[1-9]|3[01]|[12]0)(?!\\d)"
q
"(?<q>[0]?[1-4])(?!\\d)"
H
"(?<H>2[0-4]|[01]?\\d)(?!\\d)"
h
"(?<H>2[0-4]|[01]?\\d)(?!\\d)"
I
"(?<I>1[0-2]|0?[1-9])(?!\\d)"
j
"(?<j>[0-3]?\\d?\\d)(?!\\d)"
M
"(?<M>[0-5]?\\d)(?!\\d)"
S
"((?<OS_S>[0-5]?\\d\\.\\d+)|(?<S>[0-6]?\\d))(?!\\d)"
s
"((?<OS_S>[0-5]?\\d\\.\\d+)|(?<S>[0-6]?\\d))(?!\\d)"
U
"(?<U>[0-5]?\\d)(?!\\d)"
w
"(?<w>[0-6])(?!\\d)"
u
"(?<u>[1-7])(?!\\d)"
W
"(?<W>[0-5]?\\d)(?!\\d)"
Y
"(?<Y>\\d{4})(?!\\d)"
y
"((?<Y_y>\\d{4})|(?<y>\\d{2}))(?!\\d)"
Oz
"(?<Oz_Oz>[-+]\\d{4})(?!\\d)"
OO
"(?<OO>[-+]\\d{2}:\\d{2})(?!\\d)"
Oo
"(?<Oo>[-+]\\d{2})(?!\\d)"
T
"(((?<I_s>1[0-2]|0?[1-9])\\D+(?<M_s_T>[0-5]?\\d)\\D+((?<OS_s_T_S>[0-5]?\\d\\.\\d+)|(?<S_s_T>[0-6]?\\d))\\D*(?<p_s>\\?\\?)(?![[:alpha:]]))|((?<H_s>2[0-4]|[01]?\\d)\\D+(?<M_s>[0-5]?\\d)\\D+((?<OS_s_S>[0-5]?\\d\\.\\d+)|(?<S_s>[0-6]?\\d))))(?!\\d)"
R
"(((?<I_s>1[0-2]|0?[1-9])\\D+(?<M_s_T>[0-5]?\\d)\\D*(?<p_s>\\?\\?)(?![[:alpha:]]))|((?<H_s>2[0-4]|[01]?\\d)\\D+(?<M_s>[0-5]?\\d)))(?!\\d)"
r
"(((?<I_s>1[0-2]|0?[1-9])\\D*(?<p_s>\\?\\?)(?![[:alpha:]]))|(?<H_s>2[0-4]|[01]?\\d))(?!\\d)"
m
"((?<m>1[0-2]|0?[1-9](?!\\d))|(((?<b_m>1|2|3|4|5|6|7|8|9|10|11|12)|(?<B_m>1\\?|2\\?|3\\?|4\\?|5\\?|6\\?|7\\?|8\\?|9\\?|10\\?|11\\?|12\\?))(?![[:alpha:]])))"
OS
"(?<OS_f>[0-5]\\d\\.\\d+)"
z
"((?<Ou_Ou>Z)(?![[:alpha:]])|(?<Oz_Oz>[-+]\\d{4})(?!\\d)|(?<OO>[-+]\\d{2}:\\d{2})(?!\\d)|(?<Oo>[-+]\\d{2})(?!\\d))"
$alpha_exact
b
"((?<b_b_e>1|2|3|4|5|6|7|8|9|10|11|12)|(?<B_b_e>1\\?|2\\?|3\\?|4\\?|5\\?|6\\?|7\\?|8\\?|9\\?|10\\?|11\\?|12\\?))(?![[:alpha:]])"
B
"(?<B_B_e>1\\?|2\\?|3\\?|4\\?|5\\?|6\\?|7\\?|8\\?|9\\?|10\\?|11\\?|12\\?)(?![[:alpha:]])"
a
"((?<a_a_e>\\?)|(?<A_a_e>\\?\\?\\?))(?![[:alpha:]])"
A
"(?<A_A_e>\\?\\?\\?)(?![[:alpha:]])"
Ou
"(?<Ou_Ou_e>Z)(?![[:alpha:]])"
p
"(?<p_e>\\?\\?)(?![[:alpha:]])"
$num_exact
d
"(?<d_e>[012][1-9]|3[01]|[12]0)"
q
"(?<q_e>[0][1-4])"
H
"(?<H_e>2[0-4]|[01]\\d)"
h
"(?<H_e>2[0-4]|[01]\\d)"
I
"(?<I_e>1[0-2]|0[1-9])"
j
"(?<j_e>[0-3]\\d\\d)"
M
"(?<M_e>[0-5]\\d)"
S
"((?<OS_S_e>[0-5]\\d\\.\\d*)|(?<S_e>[0-6]\\d))"
s
"((?<OS_S_e>[0-5]\\d\\.\\d*)|(?<S_e>[0-6]\\d))"
U
"(?<U_e>[0-5]\\d)"
w
"(?<w_e>[0-6])"
u
"(?<u_e>[1-7])"
W
"(?<W_e>[0-5]\\d)"
Y
"(?<Y_e>\\d{4})"
y
"((?<Y_y_e>\\d{4})|(?<y_e>\\d{2}))"
Oz
"(?<Oz_Oz_e>[-*]\\d{4})"
OO
"(?<OO_e>[-*]\\d{2}:\\d{2})"
Oo
"(?<Oo_e>[-*]\\d{2})"
T
"(((?<I_s_e>1[0-2]|0[1-9])\\D*(?<M_s_T_e>[0-5]\\d)\\D*((?<OS_s_T_S_e>[0-5]\\d\\.\\d*)|(?<S_s_T_e>[0-6]\\d))\\D*(?<p_s_e>\\\\)(?![[:alpha:]]))|((?<H_s_e>2[0-4]|[01]\\d)\\D*(?<M_s_e>[0-5]\\d)\\D*((?<OS_s_S_e>[0-5]\\d\\.\\d*)|(?<S_s_e>[0-6]\\d))))"
R
"(((?<I_s_e>1[0-2]|0[1-9])\\D*(?<M_s_T_e>[0-5]\\d)\\D*(?<p_s_e>\\\\)(?![[:alpha:]]))|((?<H_s_e>2[0-4]|[01]\\d)\\D*(?<M_s_e>[0-5]\\d)))"
r
"(((?<I_s_e>1[0-2]|0[1-9])\\D*(?<p_s_e>\\\\)(?![[:alpha:]]))|(?<H_s_e>2[0-4]|[01]\\d))"
m
"((?<m_e>1[0-2]|0[1-9])|(((?<b_m_e>1|2|3|4|5|6|7|8|9|10|11|12)|(?<B_m_e>1\\?|2\\?|3\\?|4\\?|5\\?|6\\?|7\\?|8\\?|9\\?|10\\?|11\\?|12\\?))(?![[:alpha:]])))"
OS
"(?<OS_e>[0-5]\\d\\.\\d+)"
z
"((?<Ou_Ou_e>Z)(?![[:alpha:]])|(?<Oz_Oz_e>[-*]\\d{4})|(?<OO_e>[-*]\\d{2}:\\d{2})|(?<Oo_e>[-*]\\d{2}))"
$wday_names
$wday_names$abr
[1] "\\?"
$wday_names$full
[1] "\\?\\?\\?"
$month_names
$month_names$abr
[1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12"
$month_names$full
[1] "1\\?" "2\\?" "3\\?" "4\\?" "5\\?" "6\\?" "7\\?" "8\\?" "9\\?" "10\\?"
[11] "11\\?" "12\\?"
Apologies for the belated update. Here is my result for lubridate:::.get_locale_regs("Japanese_Japan.932")
on Windows.
lubridate:::.get_locale_regs("Japanese_Japan.932")
#> $alpha_flex
#> b
#> "((?<b_b>1|2|3|4|5|6|7|8|9|10|11|12)|(?<B_b>1月|2月|3月|4月|5月|6月|7月|8月|9月|10月|11月|12月))(?![[:alpha:]])"
#> B
#> "(?<B_B>1月|2月|3月|4月|5月|6月|7月|8月|9月|10月|11月|12月)(?![[:alpha:]])"
#> a
#> "((?<a_a>木|月|火|日|金|水|土)|(?<A_a>木曜日|月曜日|火曜日|日曜日|金曜日|水曜日|土曜日))(?![[:alpha:]])"
#> A
#> "(?<A_A>木曜日|月曜日|火曜日|日曜日|金曜日|水曜日|土曜日)(?![[:alpha:]])"
#> Ou
#> "(?<Ou_Ou>Z)(?![[:alpha:]])"
#> p
#> "(?<p>午前|午後)(?![[:alpha:]])"
#>
#> $num_flex
#> d
#> "(?<d>[012]?[1-9]|3[01]|[12]0)(?!\\d)"
#> q
#> "(?<q>[0]?[1-4])(?!\\d)"
#> H
#> "(?<H>2[0-4]|[01]?\\d)(?!\\d)"
#> h
#> "(?<H>2[0-4]|[01]?\\d)(?!\\d)"
#> I
#> "(?<I>1[0-2]|0?[1-9])(?!\\d)"
#> j
#> "(?<j>[0-3]?\\d?\\d)(?!\\d)"
#> M
#> "(?<M>[0-5]?\\d)(?!\\d)"
#> S
#> "((?<OS_S>[0-5]?\\d\\.\\d+)|(?<S>[0-6]?\\d))(?!\\d)"
#> s
#> "((?<OS_S>[0-5]?\\d\\.\\d+)|(?<S>[0-6]?\\d))(?!\\d)"
#> U
#> "(?<U>[0-5]?\\d)(?!\\d)"
#> w
#> "(?<w>[0-6])(?!\\d)"
#> u
#> "(?<u>[1-7])(?!\\d)"
#> W
#> "(?<W>[0-5]?\\d)(?!\\d)"
#> Y
#> "(?<Y>\\d{4})(?!\\d)"
#> y
#> "((?<Y_y>\\d{4})|(?<y>\\d{2}))(?!\\d)"
#> Oz
#> "(?<Oz_Oz>[-+]\\d{4})(?!\\d)"
#> OO
#> "(?<OO>[-+]\\d{2}:\\d{2})(?!\\d)"
#> Oo
#> "(?<Oo>[-+]\\d{2})(?!\\d)"
#> T
#> "(((?<I_s>1[0-2]|0?[1-9])\\D+(?<M_s_T>[0-5]?\\d)\\D+((?<OS_s_T_S>[0-5]?\\d\\.\\d+)|(?<S_s_T>[0-6]?\\d))\\D*(?<p_s>午前|午後)(?![[:alpha:]]))|((?<H_s>2[0-4]|[01]?\\d)\\D+(?<M_s>[0-5]?\\d)\\D+((?<OS_s_S>[0-5]?\\d\\.\\d+)|(?<S_s>[0-6]?\\d))))(?!\\d)"
#> R
#> "(((?<I_s>1[0-2]|0?[1-9])\\D+(?<M_s_T>[0-5]?\\d)\\D*(?<p_s>午前|午後)(?![[:alpha:]]))|((?<H_s>2[0-4]|[01]?\\d)\\D+(?<M_s>[0-5]?\\d)))(?!\\d)"
#> r
#> "(((?<I_s>1[0-2]|0?[1-9])\\D*(?<p_s>午前|午後)(?![[:alpha:]]))|(?<H_s>2[0-4]|[01]?\\d))(?!\\d)"
#> m
#> "((?<m>1[0-2]|0?[1-9](?!\\d))|(((?<b_m>1|2|3|4|5|6|7|8|9|10|11|12)|(?<B_m>1月|2月|3月|4月|5月|6月|7月|8月|9月|10月|11月|12月))(?![[:alpha:]])))"
#> OS
#> "(?<OS_f>[0-5]\\d\\.\\d+)"
#> z
#> "((?<Ou_Ou>Z)(?![[:alpha:]])|(?<Oz_Oz>[-+]\\d{4})(?!\\d)|(?<OO>[-+]\\d{2}:\\d{2})(?!\\d)|(?<Oo>[-+]\\d{2})(?!\\d))"
#>
#> $alpha_exact
#> b
#> "((?<b_b_e>1|2|3|4|5|6|7|8|9|10|11|12)|(?<B_b_e>1月|2月|3月|4月|5月|6月|7月|8月|9月|10月|11月|12月))(?![[:alpha:]])"
#> B
#> "(?<B_B_e>1月|2月|3月|4月|5月|6月|7月|8月|9月|10月|11月|12月)(?![[:alpha:]])"
#> a
#> "((?<a_a_e>木|月|火|日|金|水|土)|(?<A_a_e>木曜日|月曜日|火曜日|日曜日|金曜日|水曜日|土曜日))(?![[:alpha:]])"
#> A
#> "(?<A_A_e>木曜日|月曜日|火曜日|日曜日|金曜日|水曜日|土曜日)(?![[:alpha:]])"
#> Ou
#> "(?<Ou_Ou_e>Z)(?![[:alpha:]])"
#> p
#> "(?<p_e>午前|午後)(?![[:alpha:]])"
#>
#> $num_exact
#> d
#> "(?<d_e>[012][1-9]|3[01]|[12]0)"
#> q
#> "(?<q_e>[0][1-4])"
#> H
#> "(?<H_e>2[0-4]|[01]\\d)"
#> h
#> "(?<H_e>2[0-4]|[01]\\d)"
#> I
#> "(?<I_e>1[0-2]|0[1-9])"
#> j
#> "(?<j_e>[0-3]\\d\\d)"
#> M
#> "(?<M_e>[0-5]\\d)"
#> S
#> "((?<OS_S_e>[0-5]\\d\\.\\d*)|(?<S_e>[0-6]\\d))"
#> s
#> "((?<OS_S_e>[0-5]\\d\\.\\d*)|(?<S_e>[0-6]\\d))"
#> U
#> "(?<U_e>[0-5]\\d)"
#> w
#> "(?<w_e>[0-6])"
#> u
#> "(?<u_e>[1-7])"
#> W
#> "(?<W_e>[0-5]\\d)"
#> Y
#> "(?<Y_e>\\d{4})"
#> y
#> "((?<Y_y_e>\\d{4})|(?<y_e>\\d{2}))"
#> Oz
#> "(?<Oz_Oz_e>[-*]\\d{4})"
#> OO
#> "(?<OO_e>[-*]\\d{2}:\\d{2})"
#> Oo
#> "(?<Oo_e>[-*]\\d{2})"
#> T
#> "(((?<I_s_e>1[0-2]|0[1-9])\\D*(?<M_s_T_e>[0-5]\\d)\\D*((?<OS_s_T_S_e>[0-5]\\d\\.\\d*)|(?<S_s_T_e>[0-6]\\d))\\D*(?<p_s_e>午前|午後)(?![[:alpha:]]))|((?<H_s_e>2[0-4]|[01]\\d)\\D*(?<M_s_e>[0-5]\\d)\\D*((?<OS_s_S_e>[0-5]\\d\\.\\d*)|(?<S_s_e>[0-6]\\d))))"
#> R
#> "(((?<I_s_e>1[0-2]|0[1-9])\\D*(?<M_s_T_e>[0-5]\\d)\\D*(?<p_s_e>午前|午後)(?![[:alpha:]]))|((?<H_s_e>2[0-4]|[01]\\d)\\D*(?<M_s_e>[0-5]\\d)))"
#> r
#> "(((?<I_s_e>1[0-2]|0[1-9])\\D*(?<p_s_e>午前|午後)(?![[:alpha:]]))|(?<H_s_e>2[0-4]|[01]\\d))"
#> m
#> "((?<m_e>1[0-2]|0[1-9])|(((?<b_m_e>1|2|3|4|5|6|7|8|9|10|11|12)|(?<B_m_e>1月|2月|3月|4月|5月|6月|7月|8月|9月|10月|11月|12月))(?![[:alpha:]])))"
#> OS
#> "(?<OS_e>[0-5]\\d\\.\\d+)"
#> z
#> "((?<Ou_Ou_e>Z)(?![[:alpha:]])|(?<Oz_Oz_e>[-*]\\d{4})|(?<OO_e>[-*]\\d{2}:\\d{2})|(?<Oo_e>[-*]\\d{2}))"
#>
#> $wday_names
#> $wday_names$abr
#> [1] "日" "月" "火" "水" "木" "金" "土"
#>
#> $wday_names$full
#> [1] "日曜日" "月曜日" "火曜日" "水曜日" "木曜日" "金曜日" "土曜日"
#>
#>
#> $month_names
#> $month_names$abr
#> [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12"
#>
#> $month_names$full
#> [1] "1月" "2月" "3月" "4月" "5月" "6月" "7月" "8月" "9月" "10月"
#> [11] "11月" "12月"
Created on 2019-12-27 by the reprex package (v0.3.0)
It's a bug in strptime
. On windows with the above japanese locale it gives:
> x <- c("18/08", "17/11", "18/08", "18/05", "18/05", "18/07")
> strptime(x, "%y/%b%d")
[1] NA "2017-01-01 PST" NA NA NA NA
I am afraid I cannot do much about this ATM. The only solution is to drop dependency on strptime in our parser, thing which is planned but is unlikely to happen before summer I am afraid.
A workaround is to avoid training during the parsing:
> parse_date_time(x, c("ym","ymd"), train=F)
[1] "2018-08-01 UTC" "2017-11-01 UTC" "2018-08-01 UTC" "2018-05-01 UTC" "2018-05-01 UTC" "2018-07-01 UTC"
In below example, I expect 17/11 to be parsed as "2017-11-01" but it's parsed as "2017-01-01"
Created on 2019-12-12 by the reprex package (v0.3.0)