flier / gohs

GoLang Binding of HyperScan https://www.hyperscan.io/
Other
280 stars 51 forks source link

cgo call C.hs_scan performance is much lower than hyperscan in C #39

Closed ze2o closed 2 years ago

ze2o commented 3 years ago

I want to analyse time-consuming of hs_scan,then I find there is big gap between cgo and c. I finish test code depend on "singlegrep" code in examples, use one rule for example string "food" match rule "o{2,}" to run 20w times with test code.This is my result:

func total time time per
func (bs *blockScanner) Scan(...) 1028389 us 5141ns
hs_scan 10633us 53 ns

golang func Scan takes around 100 times longer per time.Is there any mistakes in my test?

1. gohs test code https://github.com/flier/gohs/blob/master/examples/simplegrep/main.go

package main_test

import (
    _ "bytes"
    "flag"
    "fmt"
    "github.com/flier/gohs/hyperscan"
    "os"
    "testing"
    "time"
)

var (
    flagNoColor    = flag.Bool("C", false, "Disable colorized output.")
    flagByteOffset = flag.Bool("b", false, "The offset in bytes of a matched pattern is displayed in front of the respective matched line")
)

var theme = func(s string) string { return s }

func highlight(s string) string {
    return "\033[35m" + s + "\033[0m"
}

func eventHandler(id uint, from, to uint64, flags uint, context interface{}) error {
    return nil
}

func TestGoHs(t *testing.T) {
    expr := fmt.Sprintf("o{2,}")
    pattern := hyperscan.NewPattern(expr, hyperscan.Caseless|hyperscan.SingleMatch|hyperscan.AllowEmpty)

    database, err := hyperscan.NewBlockDatabase(pattern)
    if err != nil {
        fmt.Printf("ERROR: Unable to compiel pattern \"%s\" : %s\n", pattern.String(), err.Error())
        os.Exit(-1)
    }
    defer database.Close()

    scratch, err := hyperscan.NewScratch(database)
    if err != nil {
        fmt.Fprint(os.Stderr, "ERROR: Unable to allocate scratch space. Exiting.\n")
        os.Exit(-1)
    }

    defer scratch.Free()
    inputData := "food"
    fmt.Printf("Scanning %d bytes with Hyperscan\n", len(inputData))

    if err := database.Scan([]byte(inputData), scratch, eventHandler, inputData); err != nil {
        fmt.Printf("ERROR: Unable to scan input buffer. Exiting.\n")
        os.Exit(-1)
    }
    //统计时间
    t1 := time.Now()
    for i := 0; i < 200000; i++ {
        database.Scan([]byte(inputData), scratch, eventHandler, inputData)
    }
    elapsed := time.Since(t1)
    fmt.Println("App elapsed: ", elapsed)
}

2. c code https://github.com/intel/hyperscan/blob/master/examples/simplegrep.c

#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>

#include <hs.h>

static int eventHandler(unsigned int id, unsigned long long from,
                        unsigned long long to, unsigned int flags, void *ctx) {
    //printf("Match for pattern");
    return 0;
}

int main(int argc, char *argv[]) {

    char *pattern = "o{2,}";
    hs_database_t *database;
    hs_compile_error_t *compile_err;
    if (hs_compile(pattern, HS_FLAG_DOTALL, HS_MODE_BLOCK, NULL, &database,
                   &compile_err) != HS_SUCCESS) {
        printf("ERROR: Unable to compile pattern \"%s\": %s\n",
                pattern, compile_err->message);
        hs_free_compile_error(compile_err);
        return -1;
    }

    /* Next, we read the input data file into a buffer. */
    unsigned int length = 4;
    char *inputData = "food";

    hs_scratch_t *scratch = NULL;
    if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) {
        printf("ERROR: Unable to allocate scratch space. Exiting.\n");
        hs_free_database(database);
        return -1;
    }

    printf("Scanning %u bytes with Hyperscan\n", length);

    if (hs_scan(database, inputData, length, 0, scratch, eventHandler,
                pattern) != HS_SUCCESS) {
        printf("ERROR: Unable to scan input buffer. Exiting.\n");
        hs_free_scratch(scratch);
        hs_free_database(database);
        return -1;
    }

    struct timeval start,end;
    gettimeofday( &start, NULL );
    //printf("start %lu s, %lu ns\n", time_start.tv_sec, time_start.tv_nsec);

    for (int i = 1; i <= 200000; i++) {
        hs_scan(database, inputData, length, 0, scratch, eventHandler,pattern);
    }

    gettimeofday( &end, NULL );
    long timeuse = (end.tv_usec - start.tv_usec) + 1000000 * (end.tv_sec-start.tv_sec);
    //printf("end %lu s, %lu ns\n", time_end.tv_sec, time_end.tv_nsec);

    printf("total time run %ld us \n", timeuse);
    printf("per time run %ld ns \n", timeuse*1000/200000);

    hs_free_scratch(scratch);
    hs_free_database(database);
    return 0;
}
flier commented 2 years ago

I've add a simpe benchmark for hyperscan.BlockScanner, hyperscan.StreamScanner and regexp.Match.

If your application scenario uses simple regular expressions to match short strings, perhaps regexp would be a better choice.

var benchData = []struct{ name, re string }{
    {"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
    {"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
    {"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"},
    {"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
    {"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
    {"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"},
}
$PKG_CONFIG_PATH=<path>/lib/pkgconfig go test -bench .
goos: darwin
goarch: amd64
pkg: github.com/flier/gohs/tests
cpu: Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz
BenchmarkBlockScan/Easy0/16-8            1598247           774.0 ns/op    20.67 MB/s
BenchmarkBlockScan/Easy0/32-8            1491242          1031 ns/op      31.05 MB/s
BenchmarkBlockScan/Easy0/1K-8            1000000          1071 ns/op     956.42 MB/s
BenchmarkBlockScan/Easy0/32K-8            397480          4343 ns/op    7544.27 MB/s
BenchmarkBlockScan/Easy0/1M-8              17223         71234 ns/op    14720.25 MB/s
BenchmarkBlockScan/Easy0/32M-8               362       3394656 ns/op    9884.49 MB/s
BenchmarkBlockScan/Easy0i/16-8           1528971           772.0 ns/op    20.72 MB/s
BenchmarkBlockScan/Easy0i/32-8           1482652           826.3 ns/op    38.73 MB/s
BenchmarkBlockScan/Easy0i/1K-8           1000000          1053 ns/op     972.65 MB/s
BenchmarkBlockScan/Easy0i/32K-8           368038          3340 ns/op    9811.18 MB/s
BenchmarkBlockScan/Easy0i/1M-8             14882         79754 ns/op    13147.69 MB/s
BenchmarkBlockScan/Easy0i/32M-8              368       3395654 ns/op    9881.58 MB/s
BenchmarkBlockScan/Easy1/16-8            1624982           728.9 ns/op    21.95 MB/s
BenchmarkBlockScan/Easy1/32-8            1579255           757.7 ns/op    42.23 MB/s
BenchmarkBlockScan/Easy1/1K-8            1265148           978.9 ns/op  1046.05 MB/s
BenchmarkBlockScan/Easy1/32K-8            188155          7485 ns/op    4377.74 MB/s
BenchmarkBlockScan/Easy1/1M-8               6889        183689 ns/op    5708.44 MB/s
BenchmarkBlockScan/Easy1/32M-8               192       6977544 ns/op    4808.92 MB/s
BenchmarkBlockScan/Medium/16-8           1645785           751.7 ns/op    21.29 MB/s
BenchmarkBlockScan/Medium/32-8           1530352           774.5 ns/op    41.32 MB/s
BenchmarkBlockScan/Medium/1K-8           1345494           913.9 ns/op  1120.49 MB/s
BenchmarkBlockScan/Medium/32K-8           363076          3150 ns/op    10404.05 MB/s
BenchmarkBlockScan/Medium/1M-8             17047         78425 ns/op    13370.37 MB/s
BenchmarkBlockScan/Medium/32M-8              397       2986380 ns/op    11235.82 MB/s
BenchmarkBlockScan/Hard/16-8             1639332           724.1 ns/op    22.10 MB/s
BenchmarkBlockScan/Hard/32-8             1560104           811.7 ns/op    39.43 MB/s
BenchmarkBlockScan/Hard/1K-8             1369534           960.1 ns/op  1066.60 MB/s
BenchmarkBlockScan/Hard/32K-8             373824          3287 ns/op    9968.22 MB/s
BenchmarkBlockScan/Hard/1M-8               15556         87712 ns/op    11954.77 MB/s
BenchmarkBlockScan/Hard/32M-8                100      13579221 ns/op    2471.01 MB/s
BenchmarkBlockScan/Hard1/16-8             951288          1542 ns/op      10.37 MB/s
BenchmarkBlockScan/Hard1/32-8            1392430           854.4 ns/op    37.45 MB/s
BenchmarkBlockScan/Hard1/1K-8            1213184           982.7 ns/op  1042.01 MB/s
BenchmarkBlockScan/Hard1/32K-8            185710          6501 ns/op    5040.36 MB/s
BenchmarkBlockScan/Hard1/1M-8               6640        172784 ns/op    6068.71 MB/s
BenchmarkBlockScan/Hard1/32M-8               194       6075786 ns/op    5522.65 MB/s
BenchmarkStreamScan/Easy0/16-8            628572          1936 ns/op       8.27 MB/s
BenchmarkStreamScan/Easy0/32-8            626991          1882 ns/op      17.01 MB/s
BenchmarkStreamScan/Easy0/1K-8            602320          1948 ns/op     525.60 MB/s
BenchmarkStreamScan/Easy0/32K-8           117933         10274 ns/op    3189.30 MB/s
BenchmarkStreamScan/Easy0/1M-8              3704        431607 ns/op    2429.47 MB/s
BenchmarkStreamScan/Easy0/32M-8               92      11703300 ns/op    2867.09 MB/s
BenchmarkStreamScan/Easy0i/16-8           522352          2435 ns/op       6.57 MB/s
BenchmarkStreamScan/Easy0i/32-8           652843          1869 ns/op      17.12 MB/s
BenchmarkStreamScan/Easy0i/1K-8           594235          1988 ns/op     515.12 MB/s
BenchmarkStreamScan/Easy0i/32K-8          113883          9934 ns/op    3298.66 MB/s
BenchmarkStreamScan/Easy0i/1M-8             3816        288265 ns/op    3637.55 MB/s
BenchmarkStreamScan/Easy0i/32M-8             122       9712375 ns/op    3454.81 MB/s
BenchmarkStreamScan/Easy1/16-8            633846          1882 ns/op       8.50 MB/s
BenchmarkStreamScan/Easy1/32-8            635702          1950 ns/op      16.41 MB/s
BenchmarkStreamScan/Easy1/1K-8            609622          2000 ns/op     512.01 MB/s
BenchmarkStreamScan/Easy1/32K-8           105356         11300 ns/op    2899.92 MB/s
BenchmarkStreamScan/Easy1/1M-8              3052        346826 ns/op    3023.35 MB/s
BenchmarkStreamScan/Easy1/32M-8               93      11009010 ns/op    3047.91 MB/s
BenchmarkStreamScan/Medium/16-8           666102          1851 ns/op       8.64 MB/s
BenchmarkStreamScan/Medium/32-8           637526          1882 ns/op      17.00 MB/s
BenchmarkStreamScan/Medium/1K-8           582178          1979 ns/op     517.39 MB/s
BenchmarkStreamScan/Medium/32K-8          119418          9954 ns/op    3292.09 MB/s
BenchmarkStreamScan/Medium/1M-8             3974        286697 ns/op    3657.44 MB/s
BenchmarkStreamScan/Medium/32M-8             122       9698514 ns/op    3459.75 MB/s
BenchmarkStreamScan/Hard/16-8             653290          1832 ns/op       8.73 MB/s
BenchmarkStreamScan/Hard/32-8             652280          1852 ns/op      17.28 MB/s
BenchmarkStreamScan/Hard/1K-8             631614          1926 ns/op     531.73 MB/s
BenchmarkStreamScan/Hard/32K-8            120248         10745 ns/op    3049.64 MB/s
BenchmarkStreamScan/Hard/1M-8               4154        285726 ns/op    3669.87 MB/s
BenchmarkStreamScan/Hard/32M-8               122       9988984 ns/op    3359.14 MB/s
BenchmarkStreamScan/Hard1/16-8            675237          1822 ns/op       8.78 MB/s
BenchmarkStreamScan/Hard1/32-8            667137          1851 ns/op      17.29 MB/s
BenchmarkStreamScan/Hard1/1K-8            612369          1985 ns/op     515.85 MB/s
BenchmarkStreamScan/Hard1/32K-8            89500         13165 ns/op    2488.99 MB/s
BenchmarkStreamScan/Hard1/1M-8              2901        388390 ns/op    2699.80 MB/s
BenchmarkStreamScan/Hard1/32M-8               72      15704594 ns/op    2136.60 MB/s
BenchmarkMatch/Easy0/16-8               280097866            4.296 ns/op    3724.22 MB/s
BenchmarkMatch/Easy0/32-8               24614971            48.80 ns/op  655.74 MB/s
BenchmarkMatch/Easy0/1K-8                4749654           253.3 ns/op  4042.64 MB/s
BenchmarkMatch/Easy0/32K-8                254571          4382 ns/op    7477.52 MB/s
BenchmarkMatch/Easy0/1M-8                   4681        254353 ns/op    4122.53 MB/s
BenchmarkMatch/Easy0/32M-8                   130       8712851 ns/op    3851.14 MB/s
BenchmarkMatch/Easy0i/16-8              279434544            4.271 ns/op    3745.90 MB/s
BenchmarkMatch/Easy0i/32-8               1523772           766.7 ns/op    41.74 MB/s
BenchmarkMatch/Easy0i/1K-8                 52428         22901 ns/op      44.72 MB/s
BenchmarkMatch/Easy0i/32K-8                 1234        971739 ns/op      33.72 MB/s
BenchmarkMatch/Easy0i/1M-8                    37      32107996 ns/op      32.66 MB/s
BenchmarkMatch/Easy0i/32M-8                    2     995575008 ns/op      33.70 MB/s
BenchmarkMatch/Easy1/16-8               271861591            4.512 ns/op    3546.21 MB/s
BenchmarkMatch/Easy1/32-8               26305459            48.14 ns/op  664.76 MB/s
BenchmarkMatch/Easy1/1K-8                1889414           628.6 ns/op  1629.08 MB/s
BenchmarkMatch/Easy1/32K-8                 41116         34497 ns/op     949.87 MB/s
BenchmarkMatch/Easy1/1M-8                   1178       1100574 ns/op     952.75 MB/s
BenchmarkMatch/Easy1/32M-8                    34      42833052 ns/op     783.38 MB/s
BenchmarkMatch/Medium/16-8              244246960            4.925 ns/op    3248.57 MB/s
BenchmarkMatch/Medium/32-8               1263304           843.4 ns/op    37.94 MB/s
BenchmarkMatch/Medium/1K-8                 45930         24356 ns/op      42.04 MB/s
BenchmarkMatch/Medium/32K-8                 1006       1093250 ns/op      29.97 MB/s
BenchmarkMatch/Medium/1M-8                    36      34104154 ns/op      30.75 MB/s
BenchmarkMatch/Medium/32M-8                    1    1085179024 ns/op      30.92 MB/s
BenchmarkMatch/Hard/16-8                266092641            5.439 ns/op    2941.86 MB/s
BenchmarkMatch/Hard/32-8                  967228          1193 ns/op      26.83 MB/s
BenchmarkMatch/Hard/1K-8                   32984         37836 ns/op      27.06 MB/s
BenchmarkMatch/Hard/32K-8                    813       1535033 ns/op      21.35 MB/s
BenchmarkMatch/Hard/1M-8                      24      51340615 ns/op      20.42 MB/s
BenchmarkMatch/Hard/32M-8                      1    1584898005 ns/op      21.17 MB/s
BenchmarkMatch/Hard1/16-8                 351211          3474 ns/op       4.61 MB/s
BenchmarkMatch/Hard1/32-8                 174608          6872 ns/op       4.66 MB/s
BenchmarkMatch/Hard1/1K-8                   5761        202412 ns/op       5.06 MB/s
BenchmarkMatch/Hard1/32K-8                   163       7273184 ns/op       4.51 MB/s
BenchmarkMatch/Hard1/1M-8                      5     234971990 ns/op       4.46 MB/s
BenchmarkMatch/Hard1/32M-8                     1    7629507335 ns/op       4.40 MB/s
PASS
ok      github.com/flier/gohs/tests 128.323s