apache / fury

A blazingly fast multi-language serialization framework powered by JIT and zero-copy.
https://fury.apache.org/
Apache License 2.0
3.08k stars 247 forks source link

Long meta string than 32767 is not allowed #1829

Closed davidnadeau closed 3 weeks ago

davidnadeau commented 2 months ago

Search before asking

Version

0.7.0

Component(s)

Java, Other

Minimal reproduce step

FuryUtils.scala

package testing

import testing.ListingFeatures
import org.apache.fury._
import org.apache.fury.config._
import org.apache.fury.resolver.MetaContext

object FuryUtils {
  val cores = Runtime.getRuntime().availableProcessors()
  lazy val fury = Fury
    .builder()
    .withLanguage(Language.JAVA)
    .withScalaOptimizationEnabled(true)
    .requireClassRegistration(true)
    .withMetaShare(true)
    .buildThreadSafeFuryPool(cores, cores)

  val context: MetaContext = new MetaContext()

  def toFury(value: ListingFeatures): Array[Byte] = {
    fury.execute((f) => {
      f.getSerializationContext.setMetaContext(context)
      f.serialize(value)
    })
  }

  def fromFury(bytes: Array[Byte]): ListingFeatures = {
    fury.execute((f) => {
      f.getSerializationContext.setMetaContext(context)
      f.deserialize(bytes).asInstanceOf[ListingFeatures]
    })
  }

  fury.register(
    Class.forName("scala.collection.generic.DefaultSerializationProxy")
  )
  fury.register(Class.forName("scala.collection.immutable.$colon$colon"))
  fury.register(Class.forName("scala.collection.immutable.Nil$"))
  fury.register(Class.forName("scala.collection.IterableFactory$ToFactory"))
  fury.register(Class.forName("scala.collection.immutable.List$"))
  fury.register(Class.forName("scala.collection.generic.SerializeEnd$"))
  fury.register(Class.forName("scala.Some"))
  fury.register(Class.forName("testing.TimestampedUserId"))
  fury.register(Class.forName("testing.ListingCountFeatures"))
  fury.register(
    Class.forName("testing.ListingTimeseriesFeatures")
  )
  fury.register(Class.forName("testing.ListingFeatures"))
}

ListingFeatures.scala

package testing

import java.time.Instant

case class TimestampedUserId(
    timestamp: Instant,
    userId: Long
)

trait FamilyType {
  def isEmpty: Boolean
}

case class ListingCountFeatures(
    feature1: Option[Long] = None,
    feature2: Option[Long] = None,
    feature3: Option[Long] = None,
    feature4: Option[Long] = None
) extends FamilyType {
  def isEmpty: Boolean = this == ListingCountFeatures()
}

case class ListingTimeseriesFeatures(
    feature5: Option[List[TimestampedUserId]] = None,
    feature6: Option[List[TimestampedUserId]] = None,
    feature7: Option[List[TimestampedUserId]] = None,
    feature8: Option[List[TimestampedUserId]] = None
) extends FamilyType {
  def isEmpty: Boolean = this == ListingTimeseriesFeatures()
}

case class ListingFeatures(
    id: Long,
    counts: Option[ListingCountFeatures] = None,
    timeseries: Option[ListingTimeseriesFeatures] = None
)

Generators.scala (generates data for the test)

package testing

package testing

import java.time.Instant

object Generators {
  val random = new scala.util.Random(0)

  val listingFeatures = ListingFeatures(
    id = random.nextLong(),
    counts = Some(
      ListingCountFeatures(
        feature1 = Some(random.nextLong()),
        feature2 = Some(random.nextLong()),
        feature3 = Some(random.nextLong()),
        feature4 = Some(random.nextLong())
      )
    ),
    timeseries = Some(
      ListingTimeseriesFeatures(
        feature5 = Some(
          List.fill(random.between(0, 51))(
            TimestampedUserId(Instant.now(), random.nextLong())
          )
        ),
        feature6 = Some(
          List.fill(random.between(0, 51))(
            TimestampedUserId(Instant.now(), random.nextLong())
          )
        ),
        feature7 = Some(
          List.fill(random.between(0, 51))(
            TimestampedUserId(Instant.now(), random.nextLong())
          )
        ),
        feature8 = Some(
          List.fill(random.between(0, 51))(
            TimestampedUserId(Instant.now(), random.nextLong())
          )
        )
      )
    )
  )
}

SerializationBenchmarks.scala

package testing

import testing.serialization._
import org.openjdk.jmh.annotations._

import java.util.concurrent.TimeUnit

@State(Scope.Benchmark)
class SerializationBenchmarks {
  var features: ListingFeatures = _

  @Setup(Level.Trial)
  def setup(): Unit = {
    features = Generators.listingFeatures
  }

  @Benchmark
  @BenchmarkMode(Array(Mode.AverageTime))
  @OutputTimeUnit(TimeUnit.MICROSECONDS)
  @Warmup(iterations = 100)
  @Measurement(iterations = 10000)
  def furySerializationRoundTrip(): ListingFeatures = {
    FuryUtils.fromFury(FuryUtils.toFury(features))
  }
}

build.sbt

ThisBuild / version := "0.0.0"

ThisBuild / scalaVersion := "2.13.14"

lazy val root = (project in file("."))
  .settings(
    name := "testing",
    libraryDependencies ++= Seq(
      "org.apache.fury" % "fury-core" % "0.7.0",
       "org.openjdk.jmh" % "jmh-core" % "1.37",
      "org.openjdk.jmh" % "jmh-generator-annprocess" % "1.37",
    )
  )

What did you expect to see?

The benchmark to run.

What did you see instead?

[info] java.lang.RuntimeException: java.lang.IllegalArgumentException: Long meta string than 32767 is not allowed
[info]  at org.apache.fury.pool.ThreadPoolFury.execute(ThreadPoolFury.java:82)
[info]  at testing.serialization.FuryUtils$.fromFury(FuryUtils.scala:29)
[info]  at testing.SerializationBenchmarks.furySerializationRoundTrip(SerializationBenchmarks.scala:23)
[info]  at com.etsy.cachetesting.jmh_generated.SerializationBenchmarks_furySerializationRoundTrip_jmhTest.furySerializationRoundTrip_avgt_jmhStub(SerializationBenchmarks_furySerializationRoundTrip_jmhTest.java:236)
[info]  at com.etsy.cachetesting.jmh_generated.SerializationBenchmarks_furySerializationRoundTrip_jmhTest.furySerializationRoundTrip_AverageTime(SerializationBenchmarks_furySerializationRoundTrip_jmhTest.java:176)
[info]  at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
[info]  at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
[info]  at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
[info]  at java.base/java.lang.reflect.Method.invoke(Method.java:568)
[info]  at org.openjdk.jmh.runner.BenchmarkHandler$BenchmarkTask.call(BenchmarkHandler.java:527)
[info]  at org.openjdk.jmh.runner.BenchmarkHandler$BenchmarkTask.call(BenchmarkHandler.java:504)
[info]  at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
[info]  at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:539)
[info]  at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
[info]  at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
[info]  at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
[info]  at java.base/java.lang.Thread.run(Thread.java:833)
[info] Caused by: java.lang.IllegalArgumentException: Long meta string than 32767 is not allowed
[info]  at org.apache.fury.util.Preconditions.checkArgument(Preconditions.java:58)
[info]  at org.apache.fury.meta.MetaStringEncoder.encode(MetaStringEncoder.java:80)
[info]  at org.apache.fury.resolver.ClassResolver.populateBytesToClassInfo(ClassResolver.java:1713)
[info]  at org.apache.fury.resolver.ClassResolver.loadBytesToClassInfo(ClassResolver.java:1699)
[info]  at org.apache.fury.resolver.ClassResolver.readClassInternal(ClassResolver.java:1588)
[info]  at org.apache.fury.serializer.ReplaceResolveSerializer.readObject(ReplaceResolveSerializer.java:310)
[info]  at org.apache.fury.serializer.ReplaceResolveSerializer.read(ReplaceResolveSerializer.java:305)
[info]  at org.apache.fury.Fury.readData(Fury.java:923)
[info]  at org.apache.fury.serializer.ReplaceResolveSerializer.read(ReplaceResolveSerializer.java:284)
[info]  at org.apache.fury.serializer.collection.CollectionSerializers$JDKCompatibleCollectionSerializer.read(CollectionSerializers.java:743)
[info]  at scala.SomeFuryCodec_1_1108411398_52533857.read(SomeFuryCodec_1_1108411398_52533857.java:70)
[info]  at testing.ListingTimeseriesFeaturesFuryCodec_1_1108411398_981481512.readFields$(ListingTimeseriesFeaturesFuryCodec_1_1108411398_981481512.java:137)
[info]  at testing.ListingTimeseriesFeaturesFuryCodec_1_1108411398_981481512.read(ListingTimeseriesFeaturesFuryCodec_1_1108411398_981481512.java:171)
[info]  at scala.SomeFuryCodec_1_1108411398_52533857.read(SomeFuryCodec_1_1108411398_52533857.java:70)
[info]  at com.etsy.cachetesting.ListingFeaturesFuryCodec_1_1108411398_857240609.readFields$(ListingFeaturesFuryCodec_1_1108411398_857240609.java:97)
[info]  at com.etsy.cachetesting.ListingFeaturesFuryCodec_1_1108411398_857240609.read(ListingFeaturesFuryCodec_1_1108411398_857240609.java:118)
[info]  at org.apache.fury.Fury.readDataInternal(Fury.java:955)
[info]  at org.apache.fury.Fury.readRef(Fury.java:857)
[info]  at org.apache.fury.Fury.deserialize(Fury.java:789)
[info]  at org.apache.fury.Fury.deserialize(Fury.java:711)
[info]  at testing.serialization.FuryUtils$.$anonfun$fromFury$1(FuryUtils.scala:31)
[info]  at org.apache.fury.pool.ThreadPoolFury.execute(ThreadPoolFury.java:79)
[info]  ... 16 more

Anything Else?

This error occurs when forcing class registration. When I turn class registration off, I instead get:

[info] Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 447 out of bounds for length 14
[info]  at org.apache.fury.collection.ObjectArray.get(ObjectArray.java:62)
[info]  at org.apache.fury.resolver.ClassResolver.readClassInfoWithMetaShare(ClassResolver.java:1348)
[info]  at org.apache.fury.resolver.ClassResolver.readClassInfo(ClassResolver.java:1638)
[info]  at scala.collection.immutable._colon_colonFuryMetaShared8782623466212560208Codec_1_1108411398_975194116.readFields$(_colon_colonFuryMetaShared8782623466212560208Codec_1_1108411398_975194116.java:73)
[info]  at scala.collection.immutable._colon_colonFuryMetaShared8782623466212560208Codec_1_1108411398_975194116.read(_colon_colonFuryMetaShared8782623466212560208Codec_1_1108411398_975194116.java:122)
[info]  at scala.SomeFuryMetaShared2848185738884915280Codec_1_1108411398_52533857.read(SomeFuryMetaShared2848185738884915280Codec_1_1108411398_52533857.java:47)
[info]  at com.etsy.cachetesting.ListingTimeseriesFeaturesFuryMetaShared7510468037580857040Codec_1_1108411398_981481512.readFields$(ListingTimeseriesFeaturesFuryMetaShared7510468037580857040Codec_1_1108411398_981481512.java:51)
[info]  at com.etsy.cachetesting.ListingTimeseriesFeaturesFuryMetaShared7510468037580857040Codec_1_1108411398_981481512.read(ListingTimeseriesFeaturesFuryMetaShared7510468037580857040Codec_1_1108411398_981481512.java:80)
[info]  at scala.SomeFuryMetaShared2848185738884915280Codec_1_1108411398_52533857.read(SomeFuryMetaShared2848185738884915280Codec_1_1108411398_52533857.java:47)
[info]  at com.etsy.cachetesting.ListingFeaturesFuryMetaShared8713304253468170576Codec_1_1108411398_857240609.readFields$(ListingFeaturesFuryMetaShared8713304253468170576Codec_1_1108411398_857240609.java:53)
[info]  at com.etsy.cachetesting.ListingFeaturesFuryMetaShared8713304253468170576Codec_1_1108411398_857240609.read(ListingFeaturesFuryMetaShared8713304253468170576Codec_1_1108411398_857240609.java:66)
[info]  at org.apache.fury.Fury.readDataInternal(Fury.java:955)
[info]  at org.apache.fury.Fury.readRef(Fury.java:857)
[info]  at org.apache.fury.Fury.deserialize(Fury.java:789)
[info]  at org.apache.fury.Fury.deserialize(Fury.java:711)
[info]  at com.etsy.cachetesting.serialization.FuryUtils$.$anonfun$fromFury$1(FuryUtils.scala:31)
[info]  at org.apache.fury.pool.ThreadPoolFury.execute(ThreadPoolFury.java:79)
[info]  ... 16 more

Are you willing to submit a PR?

chaokunyang commented 1 month ago

Hi @davidnadeau, could you use fury snapshot jar instead? I believe this issue has been addressed in main branch in #1812