Allow for paging through more results than the limit set by `index.max_result_window`

Used this script to bulk load 15,000 occurrences and try to page through them:

bulk.go

```go package main import ( "bytes" "context" "encoding/json" "fmt" "io" "log" "net/http" "time" "github.com/brianvoe/gofakeit/v6" "github.com/grafeas/grafeas/proto/v1beta1/build_go_proto" "github.com/grafeas/grafeas/proto/v1beta1/common_go_proto" "github.com/grafeas/grafeas/proto/v1beta1/grafeas_go_proto" "github.com/grafeas/grafeas/proto/v1beta1/provenance_go_proto" "github.com/grafeas/grafeas/proto/v1beta1/source_go_proto" "google.golang.org/grpc" ) const ( chunkSize = 1000 numberOfOccurrences = 15000 project = "rode" grafeasUrl = "localhost:8080" ) var ( fake = gofakeit.New(0) ) func main() { conn, client := createGrafeasClient() defer conn.Close() createProject() log.Println("created project") loadOccurrences(client) log.Println("loaded occurrences") pageThroughOccurrences(client) } func createProject() { client := http.Client{ Timeout: time.Minute, } projectPayload := map[string]string{ "name": "projects/" + project, } response, err := client.Post(fmt.Sprintf("%s/v1beta1/projects", "http://" + grafeasUrl), "application/json", jsonBody(&projectPayload)) if err != nil { log.Fatal("error creating project", err) } if response.StatusCode != http.StatusOK { log.Fatal("unexpected response creating project", response.StatusCode) } } func createGrafeasClient() (*grpc.ClientConn, grafeas_go_proto.GrafeasV1Beta1Client) { connection, err := grpc.DialContext(context.Background(), grafeasUrl, grpc.WithInsecure(), grpc.WithBlock()) if err != nil { log.Fatal("error creating grafeas client", err) } grafeasClient := grafeas_go_proto.NewGrafeasV1Beta1Client(connection) return connection, grafeasClient } func loadOccurrences(client grafeas_go_proto.GrafeasV1Beta1Client) { occurrences := make([]*grafeas_go_proto.Occurrence, numberOfOccurrences) for i := 0; i < len(occurrences); i++ { occurrences[i] = createRandomBuildOccurrence() } var occurrenceChunks [][]*grafeas_go_proto.Occurrence for i := 0; i < len(occurrences); i+= chunkSize { end := i + chunkSize if end > len(occurrences) { end = len(occurrences) } occurrenceChunks = append(occurrenceChunks, occurrences[i:end]) } for i := range occurrenceChunks { o := occurrenceChunks[i] _, err := client.BatchCreateOccurrences(context.Background(), &grafeas_go_proto.BatchCreateOccurrencesRequest{ Parent: "projects/"+project, Occurrences: o, }) if err != nil { log.Fatal("error batch creating occurrences", err) } } } func pageThroughOccurrences(client grafeas_go_proto.GrafeasV1Beta1Client) { currentPage := 1 pageToken := "" for { log.Println("requesting page", currentPage) request := &grafeas_go_proto.ListOccurrencesRequest{ Parent: "projects/"+project, Filter: "", PageSize: 1000, PageToken: pageToken, } response, err := client.ListOccurrences(context.Background(), request) if err != nil { log.Fatal("failed to list occurrences", err) } currentPage++ pageToken = response.NextPageToken log.Printf("got %d occurrences\n", len(response.Occurrences)) if len(response.Occurrences) == 0 { log.Println("reached the end of the result set") break } } } func createRandomBuildOccurrence() *grafeas_go_proto.Occurrence { return &grafeas_go_proto.Occurrence{ Name: fake.Name(), Resource: &grafeas_go_proto.Resource{ Uri: fake.URL(), }, NoteName: fmt.Sprintf("projects/%s/notes/%s", project, fake.UUID()), Kind: common_go_proto.NoteKind_BUILD, Remediation: "", CreateTime: nil, UpdateTime: nil, Details: &grafeas_go_proto.Occurrence_Build{ Build: &build_go_proto.Details{ Provenance: &provenance_go_proto.BuildProvenance{ Id: fake.UUID(), ProjectId: "projects/rode", Commands: nil, BuiltArtifacts: []*provenance_go_proto.Artifact{ { Checksum: fake.LetterN(5), Id: fake.UUID(), Names: []string{ fake.URL(), fake.URL(), }, }, }, SourceProvenance: &provenance_go_proto.Source{ ArtifactStorageSourceUri: fake.URL(), Context: &source_go_proto.SourceContext{ Context: &source_go_proto.SourceContext_Git{ Git: &source_go_proto.GitSourceContext{ Url: fake.URL(), RevisionId: fake.LetterN(7), }, }, Labels: nil, }, }, }, }, }, } } func jsonBody(val interface{}) io.Reader { jsonBytes, err := json.Marshal(val) if err != nil { log.Fatal("serialization error", err) } return bytes.NewReader(jsonBytes) } ```

output

```shell $ go run bulk.go WARNING: Package "github.com/golang/protobuf/protoc-gen-go/generator" is deprecated. A future release of golang/protobuf will delete this package, which has long been excluded from the compatibility promise. 2021/04/09 16:52:11 requesting page 1 2021/04/09 16:52:11 got 1000 occurrences 2021/04/09 16:52:11 requesting page 2 2021/04/09 16:52:12 got 1000 occurrences 2021/04/09 16:52:12 requesting page 3 2021/04/09 16:52:12 got 1000 occurrences 2021/04/09 16:52:12 requesting page 4 2021/04/09 16:52:12 got 1000 occurrences 2021/04/09 16:52:12 requesting page 5 2021/04/09 16:52:12 got 1000 occurrences 2021/04/09 16:52:12 requesting page 6 2021/04/09 16:52:12 got 1000 occurrences 2021/04/09 16:52:12 requesting page 7 2021/04/09 16:52:12 got 1000 occurrences 2021/04/09 16:52:12 requesting page 8 2021/04/09 16:52:13 got 1000 occurrences 2021/04/09 16:52:13 requesting page 9 2021/04/09 16:52:13 got 1000 occurrences 2021/04/09 16:52:13 requesting page 10 2021/04/09 16:52:13 got 1000 occurrences 2021/04/09 16:52:13 requesting page 11 2021/04/09 16:52:13 failed to list occurrencesrpc error: code = Internal desc = unexpected response from elasticsearch exit status 1 ```

On the 11th page, this error is returned from Elasticsearch:

{
  "error": {
    "root_cause": [
      {
        "type": "illegal_argument_exception",
        "reason": "Result window is too large, from + size must be less than or equal to: [10000] but was [11000]. See the scroll api for a more efficient way to request large data sets. This limit can be set by changing the [index.max_result_window] index level setting."
      }
    ],
    "type": "search_phase_execution_exception",
    "reason": "all shards failed",
    "phase": "query",
    "grouped": true,
    "failed_shards": [
      {
        "shard": 0,
        "index": "grafeas-v1beta2-rode-occurrences",
        "node": "y40fPpNDRm648olC-Ut-tA",
        "reason": {
          "type": "illegal_argument_exception",
          "reason": "Result window is too large, from + size must be less than or equal to: [10000] but was [11000]. See the scroll api for a more efficient way to request large data sets. This limit can be set by changing the [index.max_result_window] index level setting."
        }
      }
    ],
    "caused_by": {
      "type": "illegal_argument_exception",
      "reason": "Result window is too large, from + size must be less than or equal to: [10000] but was [11000]. See the scroll api for a more efficient way to request large data sets. This limit can be set by changing the [index.max_result_window] index level setting.",
      "caused_by": {
        "type": "illegal_argument_exception",
        "reason": "Result window is too large, from + size must be less than or equal to: [10000] but was [11000]. See the scroll api for a more efficient way to request large data sets. This limit can be set by changing the [index.max_result_window] index level setting."
      }
    }
  },
  "status": 400
}

rode / grafeas-elasticsearch

Allow for paging through more results than the limit set by `index.max_result_window` #67