Open mdavis-xyz opened 1 year ago
from datetime import datetime
from itertools import combinations
opening_char = '['
closing_char = ']'
def parse_datetime(date_string, date_formats):
for format_string in date_formats:
try:
parsed_date = datetime.strptime(date_string, format_string)
return parsed_date
except ValueError:
continue
print(f"Unable to parse date with any given format for string: {date_string}")
return None
def _extract_optional_components(format_string):
if opening_char in format_string:
sub_strings = _get_bracketed_strings(format_string)
for s in sub_strings:
s.replace(opening_char, '')
s.replace(closing_char, '')
return sub_strings
else:
return []
def _get_bracketed_strings(input_string):
sub_strings = []
for i, char in enumerate(input_string):
if char == opening_char:
openpos = i
closepos = openpos
counter = 1
while counter > 0:
closepos += 1
c = format_string[closepos]
if c == opening_char:
counter += 1
elif c == closing_char:
counter -= 1
sub_strings.append(input_string[openpos + 1:closepos])
return sub_strings
def _generate_date_formats(format_string):
optional_components = _extract_optional_components(format_string)
num_optionals = len(optional_components)
all_combinations = []
for r in range(num_optionals + 1):
for combination in combinations(range(num_optionals), r):
all_combinations.append(combination)
output_formats = []
for combination in all_combinations:
new_format = format_string
for i in range(num_optionals):
if i in combination:
new_format = new_format.replace(f'[{optional_components[i]}]', optional_components[i])
else:
new_format = new_format.replace(f'[{optional_components[i]}]', '')
output_formats.append(new_format)
return output_formats
if __name__ == "__main__":
# Example usage
format_string = "%Y-%m-%d[T%H:%M:%S[.%f]][Z]"
optional_format_list = _generate_date_formats(format_string)
date_string1 = "2023-06-16T03:09:23.155Z"
date_string2 = "2023-06-16T02:53:18Z"
date_string3 = "2023-06-16"
datetime_obj1 = parse_datetime(date_string1, optional_format_list)
datetime_obj2 = parse_datetime(date_string2, optional_format_list)
datetime_obj3 = parse_datetime(date_string3, optional_format_list)
print(datetime_obj1) # 2023-06-16 03:09:23.155000+00:00
print(datetime_obj2) # 2023-06-16 02:53:18+00:00
print(datetime_obj3) # 2023-06-16 00:00:00+00:00
Work around for this problem in case anyone else ends up here and just wants something that works. Combination of proposed option C and D in the OP. Apologies if dropping a code block isn't proper etiquette
Feature or enhancement
I propose that a new % format code(s) be added to strptime to allow parsing of timestamps which are either whole seconds, or fractions of a second.
The current
%f
format code will throw an error if the input string has no microsecond component.Pitch
As an example, I would like to pass both of the following strings with the same code.
This currently fails with:
In my code I could write a
try/except
block to catch the error and try with a different string. I think it would be nice ifstrptime
could handle that for me. Although I do know that the implementation of this function depends on the system, so this might be a tricky change.There's a few options.
Option A
Add a new format code which is similar to
%f
, but can handle an empty microsecond component. I note that%F
is not yet taken, so we could use that. Note that%f
today doesn't expect a.
.So in the above example you'd use
%Y-%m-%d %H:%M:%S%F
.Option B
Add a new format code for parsing seconds and optionally microseconds in one go. I note that
%s
is not yet taken, so we could use that.So in the above example you'd use
%Y-%m-%d %H:%M:%s
.Option C
Add
[]
to make a component optional.So in the above example you'd use
%Y-%m-%d %H:%M:%S[.%f]
.My guess is that this would be quite a substantial change.
Option D
Modify the arguments to strptime so that it can take a list of format strings. It tries them in order until it one succeeds.
So in the above example you'd use
Since a list of strings, and a single string are both iterators of strings, this might be a bit fiddly to implement. You could add a new
formats
argument, and require that eitherformat
orformats
is passed in. Although that kind of clutters the function signature.Previous discussion
https://bugs.python.org/issue1982 @abalkin https://bugs.python.org/issue1158