Closed chuwy closed 4 years ago
Benchmark in Enrich:
package com.snowplowanalytics.snowplow.enrich.bench
import org.openjdk.jmh.annotations._
import java.util.concurrent.TimeUnit
import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent
/**
* @example
* {{{
* jmh:run -i 15 -wi 10 -f1 -t1 EnrichBench
* }}}
*/
@State(Scope.Thread)
@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.MICROSECONDS)
class ToTSVBench {
@Benchmark
def toTSV(state: ToTSVBench.BenchState) = {
ToTSVBench.tabSeparatedEnrichedEvent(state.enriched)
}
}
object ToTSVBench {
@State(Scope.Benchmark)
class BenchState {
var enriched: EnrichedEvent = _
@Setup(Level.Trial)
def setup(): Unit = {
enriched = new EnrichedEvent()
}
}
def tabSeparatedEnrichedEvent(enrichedEvent: EnrichedEvent): String =
enrichedEvent.getClass.getDeclaredFields
.map { field =>
field.setAccessible(true)
Option(field.get(enrichedEvent)).getOrElse("")
}
.mkString("\t")
}
Results with -i 10 -wi 3 -f2 -t2
:
[info] Result "com.snowplowanalytics.snowplow.enrich.bench.ToTSVBench.toTSV":
[info] 8.322 ±(99.9%) 0.372 us/op [Average]
[info] (min, avg, max) = (7.610, 8.322, 9.323), stdev = 0.429
[info] CI (99.9%): [7.950, 8.695] (assumes normal distribution)
I encourage you to implement tabSeparatedEnrichedEvent
as:
private val EnrichedFields =
classOf[EnrichedEvent].getDeclaredFields
.filterNot(_.getName.equals("pii"))
.map { field => field.setAccessible(true); field }
.toList
def tabSeparatedEnrichedEvent(enrichedEvent: EnrichedEvent): String =
EnrichedFields
.map { field =>
val prop = field.get(enrichedEvent)
if (prop == null) "" else prop.toString
}
.mkString("\t")
It is how it implemented in FS2 Enrich.
Also worth adding some fields to the event
to make it actually doing something - ideally into beginning, middle and end of an object as JVM might make some experiment-breaking hotpath optimizations.
With this implementation :
@State(Scope.Thread)
@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.MICROSECONDS)
class ToTSVBench {
@Benchmark
def toTSV(state: ToTSVBench.BenchState) = {
ToTSVBench.tabSeparatedEnrichedEvent(state.enriched)
}
}
object ToTSVBench {
@State(Scope.Benchmark)
class BenchState {
var enriched: EnrichedEvent = _
@Setup(Level.Trial)
def setup(): Unit = {
enriched = new EnrichedEvent()
enriched.app_id = "app_id"
enriched.geo_country = "FR"
enriched.geo_country = "FR"
enriched.mkt_campaign = "campaign"
enriched.os_name = "Android"
enriched.event_vendor = "vendor"
}
}
private val EnrichedFields =
classOf[EnrichedEvent].getDeclaredFields
.filterNot(_.getName.equals("pii"))
.map { field => field.setAccessible(true); field }
.toList
def tabSeparatedEnrichedEvent(enrichedEvent: EnrichedEvent): String =
EnrichedFields
.map { field =>
val prop = field.get(enrichedEvent)
if (prop == null) "" else prop.toString
}
.mkString("\t")
}
Results for +jmh:run -i 10 -wi 3 -f2 -t2 .*ToTSVBench.*
:
[info] Result "com.snowplowanalytics.snowplow.enrich.bench.ToTSVBench.toTSV":
[info] 8.267 ±(99.9%) 0.830 us/op [Average]
[info] (min, avg, max) = (5.829, 8.267, 9.701), stdev = 0.956
[info] CI (99.9%): [7.437, 9.098] (assumes normal distribution)
Results with the manual implementation with List(...).mkString
:
[info] Result "com.snowplowanalytics.snowplow.analytics.scalasdk.benchmark.ToTsvBenchmark.toTsv":
[info] 4.389 ±(99.9%) 0.085 us/op [Average]
[info] (min, avg, max) = (4.264, 4.389, 4.610), stdev = 0.098
[info] CI (99.9%): [4.304, 4.474] (assumes normal distribution)
Only 2x improvement
Results with the manual implementation with ... + '\t' + ... + '\t' + ...
:
[info] Result "com.snowplowanalytics.snowplow.analytics.scalasdk.benchmark.ToTsvBenchmark.toTsv":
[info] 3.982 ±(99.9%) 0.381 us/op [Average]
[info] (min, avg, max) = (3.486, 3.982, 5.379), stdev = 0.439
[info] CI (99.9%): [3.600, 4.363] (assumes normal distribution)
A little bit better
In order to convert
Event
back to TSV.We can write a property-based test with Event -> TSV -> Event check.