h0tk3y / better-parse

A nice parser combinator library for Kotlin
Apache License 2.0
419 stars 42 forks source link

"null cannot be cast to non-null type" when parsing generates > 16 token matches #64

Open nikclayton opened 1 year ago

nikclayton commented 1 year ago

This might be a duplicate of https://github.com/h0tk3y/better-parse/issues/29. But there's no solution in that issue, and it's almost three years old.

Using Kotlin 1.8.20, and better-parse 0.4.4, this code (which is supposed to parse an Android resource qualifier string, like values-en-rGB-land-v28) has a runtime error:

data class MobileCodes(val mcc: String, val mnc: String? = null)

data class Locale(val lang: String, val region: String? = null, val script: String? = null)

data class ConfigurationQualifier(val mobileCodes: MobileCodes? = null, val locale: Locale? = null)

/**
 * Parse an Android `values-*` resource directory name and extract the configuration qualifiers
 *
 * Directory name has the following components in a specific order, listed in
 * https://developer.android.com/guide/topics/resources/providing-resources#table2
 */
class ValuesParser : Grammar<ConfigurationQualifier>() {
    // Tokenizers
    private val values by literalToken("values")
    private val sep by literalToken("-")
    private val mobileCodes by regexToken("(?i:mcc\\d+)(?i:mnc\\d+)?")
    private val locale by regexToken("(?i:[a-z]{2,3})(?i:-r([a-z]{2,3}))?(?=-|$)")
    private val bcpStartTag by regexToken("(?i:b\\+[a-z]{2,3})")
    private val bcpSubtag by regexToken("(?i:\\+[a-z]+)")

    private val layoutDirection by regexToken("(?i:ldrtl|ldltr)")
    private val smallestWidth by regexToken("(?i:sw\\d+dp)")
    private val availableDimen by regexToken("(?i:[wh]\\d+dp)")
    private val screenSize by regexToken("(?i:small|normal|large|xlarge)")
    private val screenAspect by regexToken("(?i:long|notlong)")
    private val roundScreen by regexToken("(?i:round|notround)")
    private val wideColorGamut by regexToken("(?i:widecg|nowidecg)")
    private val highDynamicRange by regexToken("(?i:highdr|lowdr)")
    private val screenOrientation by regexToken("(?i:port|land)")
    private val uiMode by regexToken("(?i:car|desk|television|appliance|watch|vrheadset)")
    private val nightMode by regexToken("(?i:night|notNight)")
    private val screenDpi by regexToken("(?i:(?:l|m|h|x|xx|xxx|no|tv|any|\\d+)dpi)")
    private val touchScreen by regexToken("(?i:notouch|finger)")
    private val keyboardAvailability by regexToken("(?i:keysexposed|keyshidden|keyssoft)")
    private val inputMethod by regexToken("(?i:nokeys|qwerty|12key)")
    private val navKeyAvailability by regexToken("(?i:naxexposed|navhidden)")
    private val navMethod by regexToken("(?i:nonav|dpad|trackball|wheel)")
    private val platformVersion by regexToken("(?i:v\\d+)")

    // Parsers
    private val mobileCodesParser by mobileCodes use {
        val parts = this.text.split("-")
        MobileCodes(mcc = parts[0], mnc = parts.getOrNull(1))
    }

    private val localeParser by locale use {
        val parts = this.text.split("-r".toRegex(), 2)
        Locale(lang = parts[0], region = parts.getOrNull(1))
    }

    private val bcpLocaleParser = bcpStartTag and zeroOrMore(bcpSubtag) use {
        Locale(
            lang = this.t1.text.split("+")[1],
            script = this.t2.getOrNull(0)?.text?.split("+")?.get(1),
            region = this.t2.getOrNull(1)?.text?.split("+")?.get(1)
        )
    }

    private val qualifier = skip(values) and
        optional(skip(sep) and mobileCodesParser) and
        optional(skip(sep) and (localeParser or bcpLocaleParser)) and
        optional(skip(sep) and layoutDirection) and
        optional(skip(sep) and smallestWidth) and
        optional(skip(sep) and availableDimen) and
        optional(skip(sep) and screenSize) and
        optional(skip(sep) and screenAspect) and
        optional(skip(sep) and roundScreen) and
        optional(skip(sep) and wideColorGamut) and
        optional(skip(sep) and highDynamicRange) and
        optional(skip(sep) and screenOrientation) and
        optional(skip(sep) and uiMode) and
        optional(skip(sep) and nightMode) and
        optional(skip(sep) and screenDpi) and
        optional(skip(sep) and touchScreen) and
        optional(skip(sep) and keyboardAvailability) and
        optional(skip(sep) and inputMethod) and
        optional(skip(sep) and navKeyAvailability) and
        optional(skip(sep) and navMethod) and
        optional(skip(sep) and platformVersion)

    private val qualifierParser by qualifier use {
        // Here, the type of `this` is 
        // Tuple5<Tuple16<MobileCodes?, Locale?, TokenMatch?, TokenMatch?, TokenMatch?, TokenMatch?, TokenMatch?, TokenMatch?, TokenMatch?, TokenMatch?, TokenMatch?, TokenMatch?, TokenMatch?, TokenMatch?, TokenMatch?, TokenMatch?>, TokenMatch?, TokenMatch?, TokenMatch?, TokenMatch?>.`<anonymous>`(): ConfigurationQualifier
        ConfigurationQualifier(
            mobileCodes = this.t1.t1,
            locale = this.t1.t2
        )
    }

    override val rootParser by qualifierParser
}

The error is:

null cannot be cast to non-null type com.github.h0tk3y.betterParse.utils.Tuple16<app.tusky.mklanguages.MobileCodes?, app.tusky.mklanguages.Locale?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?>
java.lang.NullPointerException: null cannot be cast to non-null type com.github.h0tk3y.betterParse.utils.Tuple16<app.tusky.mklanguages.MobileCodes?, app.tusky.mklanguages.Locale?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?, com.github.h0tk3y.betterParse.lexer.TokenMatch?>
    at app.tusky.mklanguages.ValuesParser$special$$inlined$and4$2.invoke(andFunctions.kt:42)
    at app.tusky.mklanguages.ValuesParser$special$$inlined$and4$2.invoke(andFunctions.kt:41)
    at com.github.h0tk3y.betterParse.combinators.AndCombinator.tryParse(AndCombinator.kt:72)
    at com.github.h0tk3y.betterParse.combinators.MapCombinator.tryParse(MapCombinator.kt:14)
    at com.github.h0tk3y.betterParse.parser.ParserKt.tryParseToEnd(Parser.kt:18)
    at com.github.h0tk3y.betterParse.parser.ParserKt.parseToEnd(Parser.kt:29)
    at com.github.h0tk3y.betterParse.grammar.GrammarKt.parseToEnd(Grammar.kt:70)
    at app.tusky.mklanguages.ValuesParserTest$ParseLocale.returns the expected locale(ValuesParserTest.kt:54)
    at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.base/java.lang.reflect.Method.invoke(Method.java:566)
    at org.junit.platform.commons.util.ReflectionUtils.invokeMethod(ReflectionUtils.java:727)
    at org.junit.jupiter.engine.execution.MethodInvocation.proceed(MethodInvocation.java:60)
    at org.junit.jupiter.engine.execution.InvocationInterceptorChain$ValidatingInvocation.proceed(InvocationInterceptorChain.java:131)
    at org.junit.jupiter.engine.extension.TimeoutExtension.intercept(TimeoutExtension.java:156)
    at org.junit.jupiter.engine.extension.TimeoutExtension.interceptTestableMethod(TimeoutExtension.java:147)
    at org.junit.jupiter.engine.extension.TimeoutExtension.interceptTestTemplateMethod(TimeoutExtension.java:94)
    at org.junit.jupiter.engine.execution.InterceptingExecutableInvoker$ReflectiveInterceptorCall.lambda$ofVoidMethod$0(InterceptingExecutableInvoker.java:103)
    at org.junit.jupiter.engine.execution.InterceptingExecutableInvoker.lambda$invoke$0(InterceptingExecutableInvoker.java:93)
    at org.junit.jupiter.engine.execution.InvocationInterceptorChain$InterceptedInvocation.proceed(InvocationInterceptorChain.java:106)
    at org.junit.jupiter.engine.execution.InvocationInterceptorChain.proceed(InvocationInterceptorChain.java:64)
    at org.junit.jupiter.engine.execution.InvocationInterceptorChain.chainAndInvoke(InvocationInterceptorChain.java:45)
    at org.junit.jupiter.engine.execution.InvocationInterceptorChain.invoke(InvocationInterceptorChain.java:37)
    at org.junit.jupiter.engine.execution.InterceptingExecutableInvoker.invoke(InterceptingExecutableInvoker.java:92)
    at org.junit.jupiter.engine.execution.InterceptingExecutableInvoker.invoke(InterceptingExecutableInvoker.java:86)
    at org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.lambda$invokeTestMethod$7(TestMethodTestDescriptor.java:217)
    at org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
    at org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.invokeTestMethod(TestMethodTestDescriptor.java:213)
    at org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.execute(TestMethodTestDescriptor.java:138)
    at org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.execute(TestMethodTestDescriptor.java:68)
    at org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$6(NodeTestTask.java:151)
    at org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
    at org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$8(NodeTestTask.java:141)
    at org.junit.platform.engine.support.hierarchical.Node.around(Node.java:137)
    at org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$9(NodeTestTask.java:139)
    at org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
    at org.junit.platform.engine.support.hierarchical.NodeTestTask.executeRecursively(NodeTestTask.java:138)
    at org.junit.platform.engine.support.hierarchical.NodeTestTask.execute(NodeTestTask.java:95)
    at org.junit.platform.engine.support.hierarchical.ForkJoinPoolHierarchicalTestExecutorService$ExclusiveTask.compute(ForkJoinPoolHierarchicalTestExecutorService.java:202)
    at java.base/java.util.concurrent.RecursiveAction.exec(RecursiveAction.java:189)
    at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:290)
    at java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1020)
    at java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1656)
    at java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1594)
    at java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:183)

If the number of parsers is kept to 16 or below it works. E.g., by making the following modifications to the bottom half of the file:

// Replace `qualifier` and `qualifierParser` with:

    private val qualifier = skip(values) and
        optional(skip(sep) and mobileCodesParser) and
        optional(skip(sep) and (localeParser or bcpLocaleParser)) and
        optional(skip(sep) and layoutDirection) and
        optional(skip(sep) and smallestWidth) and
        optional(skip(sep) and availableDimen) and
        optional(skip(sep) and screenSize) and
        optional(skip(sep) and screenAspect) and
        optional(skip(sep) and roundScreen) and
        optional(skip(sep) and wideColorGamut) and
        optional(skip(sep) and highDynamicRange) and
        optional(skip(sep) and screenOrientation) and
        optional(skip(sep) and uiMode) and
        optional(skip(sep) and nightMode) and
        optional(skip(sep) and screenDpi) and
        optional(skip(sep) and touchScreen) and
        optional(skip(sep) and keyboardAvailability) // and
//        optional(skip(sep) and inputMethod) and
//        optional(skip(sep) and navKeyAvailability) and
//        optional(skip(sep) and navMethod) and
//        optional(skip(sep) and platformVersion)

    private val qualifierParser by qualifier use {
        ConfigurationQualifier(
            mobileCodes = this.t1,
            locale = this.t2
        )
    }

then the string values-en-rGB-land successfully parses.

h0tk3y commented 1 year ago

Thanks for this report! Now that a use case appeared for such a long and-chain, I think I will have to increase the number of generated tuple classes and the supported chain length. If building the library from sources in an included build is fine for you, you can try to tweak this in https://github.com/h0tk3y/better-parse/blob/af4599c04f84463a4b708e7e1385217b41ae7b9e/build.gradle.kts#L65 As an alternative workaround, consider splitting the rule into several shorter ones.

nikclayton commented 1 year ago

Thanks -- I'll wait for a new release with a fix at the moment.

nikclayton commented 1 year ago

Hi -- do you have plans to bump maxTupleSize and include that in a release any time soon?