arangodb / arangodb-docker

Docker container for ArangoDB
Apache License 2.0
106 stars 32 forks source link

Several calls to createDocuments() crashes Arango docker instance #86

Open naulacambra opened 4 years ago

naulacambra commented 4 years ago

I'm trying to upload a bunch of json data to an arango server using Arango GoDriver

Here is my code

package main

import (
    "context"
    "encoding/csv"
    "encoding/json"
    "fmt"
    "io"
    "io/ioutil"
    "os"
    "regexp"
    "strconv"
    "time"

    driver "github.com/arangodb/go-driver"
    "github.com/arangodb/go-driver/http"
)

type ChannelInfoJson struct {
    Channel int           `json:"Channel"`
    Values  []interface{} `json:"Values"`
    From    time.Time     `json:"From"`
    To      time.Time     `json:"To"`
}

func getClient() driver.Client {
    conn, _ := http.NewConnection(http.ConnectionConfig{
        Endpoints: []string{"http://[url]:8529/"},
    })

    client, _ := driver.NewClient(driver.ClientConfig{
        Connection:     conn,
        Authentication: driver.BasicAuthentication("root", "root"),
    })

    return client
}

func getDb(ctx context.Context, dbName string) driver.Database {
    client := getClient()
    db, _ := client.Database(ctx, dbName)

    return db
}

func main() {
    ctx := context.Background()
    db := getDb(ctx, "dbName")

    var col driver.Collection = nil
    exists, _ := db.CollectionExists(ctx, colName)

    if !exists {
              // Create collection options
          options := &driver.CreateCollectionOptions{
              WaitForSync: true,
           }
        col, _ = db.CreateCollection(
            ctx,
            colName,
            options,
        )
    } else {
        col, _ = db.Collection(ctx, colName)
    }

    files, _ := ioutil.ReadDir("D:\\Documents\\json")
    docs := []ChannelInfoJson{}
    batchSize := 10

    for fileIndex, f := range files {

        // loading file
        json_file, _ := ioutil.ReadFile(fmt.Sprintf("D:\\Documentos\\TFG\\json\\%s", f.Name()))

               // unmarshal data
        var channelInfoArr []interface{}
        _ = json.Unmarshal([]byte(json_file), &channelInfoArr)

        for _, channelInfo := range channelInfoArr {
            fields, _ := channelInfo.(map[string]interface{})

            from, _ := time.Parse("02-Jan-2006 15:04:05", fields["From"].(string))
            to, _ := time.Parse("02-Jan-2006 15:04:05", fields["To"].(string))
            doc := ChannelInfoJson{
                Channel: int(field["Channel"].(float64)),
                Values:  field["Values"].([]interface{}),
                From:    from,
                To:      to,
            }
            docs = append(docs, doc)
        }

        if (fileIndex+1)%batchSize == 0 {
            _, _, err := col.CreateDocuments(ctx, docs)
            if err != nil {
                fmt.Println("Error in batch creation", err)
                os.Exit(-1)
            }

            docs = nil
            docs = []ChannelInfoJson{}
        }
    }
}

I'm trying to upload 2000 json files.

Each one has info of N channels (wifi channels).

This code works on localhost, but when I try to upload it to a docker instance, it loads the first batch, but the second makes the docker crash. I'm unable to get more info from the docker logs

docker logs [containerId]

Neither from the Arango logs.

Anyone could know why this is happening?

Thank you

dothebart commented 4 years ago

Hi, there are several possible reasons and ways to proceed. Docker may restrict resources without these restrictions being visible in /proc. Since ArangoDB 3.6.3 you may specify ARANGODB_OVERRIDE_DETECTED_TOTAL_MEMORY with a number for this, to work around it.

Second way to get more information about whats going on could be to try the devel / nightly container, it has a crash handler which will print the position its in on SEGFAULTs.