euro-cordex / cc-plugin-cc6

Compliance checker plugin to test compliance with CORDEX-CMIP6 archive specifications
Apache License 2.0
0 stars 0 forks source link

convert POSIX regex to python regex #18

Open larsbuntemeyer opened 1 month ago

larsbuntemeyer commented 1 month ago

some prototype code:

import re

def convert_posix_to_python(posix_regex):
    """
    Convert common POSIX regular expressions to Python regular expressions.
    """
    # Dictionary of POSIX to Python character class conversions
    posix_to_python_classes = {
        r'\[\[:alnum:\]\]': r'[a-zA-Z0-9]',
        r'\[\[:alpha:\]\]': r'[a-zA-Z]',
        r'\[\[:digit:\]\]': r'\d',
        r'\[\[:xdigit:\]\]': r'[0-9a-fA-F]',
        r'\[\[:lower:\]\]': r'[a-z]',
        r'\[\[:upper:\]\]': r'[A-Z]',
        r'\[\[:blank:\]\]': r'[ \t]',
        r'\[\[:space:\]\]': r'\s',
        r'\[\[:punct:\]\]': r'[!"#$%&\'()*+,\-./:;<=>?@[\\\]^_`{|}~]',
        r'\[\[:word:\]\]': r'\w'
    }

    # Replace POSIX character classes with Python equivalents
    for posix_class, python_class in posix_to_python_classes.items():
        posix_regex = re.sub(posix_class, python_class, posix_regex)

    # Replace POSIX quantifiers with Python equivalents
    posix_regex = posix_regex.replace(r'\{', '{').replace(r'\}', '}')

    return posix_regex

# Example usage
posix_regex = r'r[[:digit:]]\{1,\}i[[:digit:]]\{1,\}p[[:digit:]]\{1,\}f[[:digit:]]\{1,\}$'
python_regex = convert_posix_to_python(posix_regex)
print("Converted Python regex:", python_regex)
sol1105 commented 1 week ago

That looks nice, we can add this in the next PR.