paulbartrum / jurassic

A .NET library to parse and execute JavaScript code.
MIT License
873 stars 122 forks source link

DateParser #179

Closed zgsxtyzgd closed 4 years ago

zgsxtyzgd commented 4 years ago

in Library/Date/DateParser.cs "2020-10-01 05:12+1" In v8 is correct, but is wrong in Jurassic Regex regex = new Regex( @"^( (?<year> [0-9]{4} ) (- (?<month> [0-9]{2} ) (- (?<day> [0-9]{2} ))?)?) (T (?<hour> [0-9]{2} ) : (?<minute> [0-9]{2} ) (: (?<second> [0-9]{2} ) (\. (?<millisecond> [0-9]{1,3} ) [0-9]* )?)? (?<zone> Z | (?<zoneHours> [+-][0-9]{2} ) (: (?<zoneMinutes> [0-9]{2} ))? )?)?$", RegexOptions.ExplicitCapture | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace) Should be modified: Regex regex = new Regex( @"^( (?<year> [0-9]{4} ) (- (?<month> [0-9]{1,2} ) (- (?<day> [0-9]{1,2} ))?)?) ((T|(\s+)) (?<hour> [0-9]{1,2} ) : (?<minute> [0-9]{1,2} ) (: (?<second> [0-9]{1,2} ) (\. (?<millisecond> [0-9]{1,3} ) [0-9]* )?)? (?<zone> Z | (?<zoneHours> [\+\-][0-9]{1,2} ) (: (?<zoneMinutes> [0-9]{1,2} ))? )?)?$", RegexOptions.ExplicitCapture | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace) Day time string should contain a T part used in faster processing instead of using the regx expression, it is too slow DateTime d; if (Str2UTCDate(input, out d)) return d; // Match the regex. var match = regex_Parser.Match(input); if (match.Success == true) { Str2UTCDate: ` static bool checkNumber(char c, out int v) { var x = c - 48; if (x < 0 || x > 9) { v = -1; return false; } v = x; return true; } static bool Str2UTCDate(string str, out DateTime date) { int offset = 0, year, month = 1, day = 1, ms = 0; bool utc = false; int length = str.Length; int v; if (offset >= length || !checkNumber(str[offset++], out v)) goto ERR; year = v 1000; if (offset >= length || !checkNumber(str[offset++], out v)) goto ERR; year += v 100; if (offset >= length || !checkNumber(str[offset++], out v)) goto ERR; year += v 10; if (offset >= length || !checkNumber(str[offset++], out v)) goto ERR; year += v; if (offset < length && (str[offset] == 'T' || str[offset] == 't')) goto HOUR; //MONTH: if (offset >= length || str[offset++] != '-') goto ERR; if (offset >= length || !checkNumber(str[offset++], out v)) goto ERR; month = v 10; if (offset >= length || !checkNumber(str[offset++], out v)) goto ERR; month += v; if (month > 12) goto ERR; if (offset < length && (str[offset] == 'T' || str[offset] == 't')) goto HOUR; //day if (offset >= length || str[offset++] != '-') goto ERR; if (offset >= length || !checkNumber(str[offset++], out v)) goto ERR; day = v * 10; if (offset >= length || !checkNumber(str[offset++], out v)) goto ERR; day += v; switch (month) { case 1: case 3: case 5: case 7: case 8: case 10: case 12: if (day > 31) goto ERR; break; case 2: if (year % 4 == 0) { if (day > 29) goto ERR; } else if (day > 28) goto ERR; break; default: if (day > 30) goto ERR; break; }

        if (offset >= length || (str[offset] != 'T' && str[offset] != 't'))
            goto ERR;
        HOUR:
        offset++;

        //hour
        if (offset >= length || !checkNumber(str[offset++], out v))
            goto ERR;
        int hour = v * 10;
        if (offset >= length || !checkNumber(str[offset++], out v))
            goto ERR;
        hour += v;
        if (hour > 24)
            goto ERR;
        ms = hour * 60 * 60 * 1000;
        //minute
        if (offset >= length || str[offset++] != ':')
            goto ERR;
        if (offset >= length || !checkNumber(str[offset++], out v))
            goto ERR;
        int minute = v * 10;
        if (offset >= length || !checkNumber(str[offset++], out v))
            goto ERR;
        minute += v;
        if (minute > 59)
            goto ERR;
        ms += minute * 60 * 1000;
        if (offset >= length)
            goto OK;
        if (offset == length - 1 && (str[offset] == 'Z' || str[offset] == 'z'))
        {
            utc = true;
            goto OK;
        }
        //second
        if (str[offset++] != ':')
            goto ERR;
        if (offset >= length || !checkNumber(str[offset++], out v))
            goto ERR;
        int second = v * 10;
        if (offset >= length || !checkNumber(str[offset++], out v))
            goto ERR;
        second += v;
        if (second > 59)
            goto ERR;
        ms += second * 1000;
        if (offset >= length)
            goto OK;
        if (offset == length - 1 && (str[offset] == 'Z' || str[offset] == 'z'))
        {
            utc = true;
            goto OK;
        }
        if (str[offset++] != '.')
            goto ERR;
        int p = 100;
        while (true)
        {
            if (offset >= length || !checkNumber(str[offset++], out v))
                goto ERR;
            if (p >= 1)
            {
                ms += v * p;
                p /= 10;
            }
            if (offset >= length)
                goto OK;
            if (offset == length - 1 && (str[offset] == 'Z' || str[offset] == 'z'))
            {
                utc = true;
                goto OK;
            }
        }
    OK:
        if (ms > 86400000)
            goto ERR;
        date = new DateTime(year, month, day, 0, 0, 0, utc ? DateTimeKind.Utc : DateTimeKind.Local);
        date = date.AddMilliseconds(ms);
        return true;
    ERR:
        date = new DateTime();
        return false;
    }`
zgsxtyzgd commented 4 years ago

in source code 115 row UTC is not quite right, should be: `bool utc = false; // Parse the zone information (the default is UTC). if (match.Groups["zone"].Value != string.Empty) { utc = true; if (match.Groups["zone"].Value != "Z") { // Parse the numeric values. int zoneHours, zoneMinutes = 0; if (int.TryParse(match.Groups["zoneHours"].Value, out zoneHours) == false) return DateTime.MinValue; if (match.Groups["zoneMinutes"].Value != string.Empty) { if (int.TryParse(match.Groups["zoneMinutes"].Value, out zoneMinutes) == false) return DateTime.MinValue; } // Validate the components. if (zoneHours >= 24) return DateTime.MinValue; if (zoneMinutes >= 60) return DateTime.MinValue;

                    // Calculate the zone offset, in minutes.
                    offsetInMinutes -= zoneHours < 0 ? zoneHours * 60 - zoneMinutes : zoneHours * 60 + zoneMinutes;
                }
            }
           `
zgsxtyzgd commented 4 years ago

I'm sorry, my English is very bad

paulbartrum commented 4 years ago

I've checked in a fix, and "2020-10-01 05:12+1" now parses successfully. Thanks for the bug report!

zgsxtyzgd commented 4 years ago

I've checked in a fix, and "2020-10-01 05:12+1" now parses successfully. Thanks for the bug report!

Similar to 2020-04-20t20:34 or 2020t20:34 the Date format with t, you should use the strict judgment, rather than using regular expressions, in the case of strictly judge failed to consider using regular expressions.Which judgment can be faster than a regular expression.There are time zone issues, tail prefix Z should use UTC time, and there is no Z should use the local time. Thank you for your work, I'm just some of the best advice. Thank you