milvus-io / milvus

A cloud-native vector database, storage for next generation AI applications
https://milvus.io
Apache License 2.0
30.03k stars 2.88k forks source link

[Bug]: [GoSDK] When inserting rows without primary key data, all the returned IDs are zero #33460

Open ThreadDao opened 4 months ago

ThreadDao commented 4 months ago

Is there an existing issue for this?

Environment

- Milvus version: master-20240528-b138ae74-amd64
- Deployment mode(standalone or cluster):
- MQ type(rocksmq, pulsar or kafka):    
- SDK version(e.g. pymilvus v2.0.0rc2): go-sdk v2
- OS(Ubuntu or CentOS): 
- CPU/Memory: 
- GPU: 
- Others:

Current Behavior

  1. create collection with autoID true, int64(pk) anf floatVec field
  2. insert rows without int64 data gets ids: [0, ....., 0]

    • case:
      
      // test insert rows enable or disable dynamic field
      func TestInsertDefaultRows(t *testing.T) {
      t.Parallel()
      ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
      mc := createDefaultMilvusClient(ctx, t)

    for , autoId := range []bool{true} { cp := hp.NewCreateCollectionParams(hp.Int64Vec) , schema := hp.CollPrepare.CreateCollection(ctx, t, mc, cp, hp.TNewFieldsOption(), hp.TNewSchemaOption().TWithAutoID(autoId)) log.Info("fields", zap.Any("FieldNames", schema.Fields))

    // insert rows
    rows := hp.GenInt64VecRows(common.DefaultNb, false, autoId, *hp.TNewDataOption())
    log.Info("rows data", zap.Any("rows[8]", rows[8]))
    ids, err := mc.Insert(ctx, clientv2.NewRowBasedInsertOption(schema.CollectionName, rows...))
    common.CheckErr(t, err, true)
    
    int64Values := make([]int64, 0, common.DefaultNb)
    for i := 0; i < common.DefaultNb; i++ {
        int64Values = append(int64Values, int64(i))
    }
    common.CheckInsertResult(t, ids.IDs, column.NewColumnInt64(common.DefaultInt64FieldName, int64Values))
    
    // flush and check row count
    flushTask, errFlush := mc.Flush(ctx, clientv2.NewFlushOption(schema.CollectionName))
    common.CheckErr(t, errFlush, true)
    errFlush = flushTask.Await(ctx)
    common.CheckErr(t, errFlush, true)

    } }

Expected Behavior

No response

Steps To Reproduce

No response

Milvus Log

No response

Anything else?

No response

yanliang567 commented 4 months ago

/assign @congqixia /unassign

congqixia commented 4 months ago

Test with following code

package main

import (
    "context"
    "log"
    "math/rand"

    milvusclient "github.com/milvus-io/milvus/client/v2"
    "github.com/milvus-io/milvus/client/v2/row"
)

type Data struct {
    ID     int64     `milvus:"name:id;primary_key;auto_id"`
    Vector []float32 `milvus:"name:vector;dim:128"`
}

const (
    milvusAddr     = `localhost:19530`
    nEntities, dim = 10, 128
    collectionName = "hello_milvus"

    msgFmt                         = "==== %s ====\n"
    idCol, randomCol, embeddingCol = "id", "random", "vector"
    topK                           = 3
)

func main() {
    schema, err := row.ParseSchema(&Data{})
    if err != nil {
        log.Fatal("failed to parse schema from struct", err.Error())
    }

    for _, field := range schema.Fields {
        log.Printf("Field name: %s, FieldType %s, IsPrimaryKey: %t", field.Name, field.DataType, field.PrimaryKey)
    }
    schema.WithName(collectionName)

    ctx := context.Background()

    log.Printf(msgFmt, "start connecting to Milvus")
    c, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
        Address: milvusAddr,
    })
    if err != nil {
        log.Fatal("failed to connect to milvus, err: ", err.Error())
    }
    defer c.Close(ctx)

    if has, err := c.HasCollection(ctx, milvusclient.NewHasCollectionOption(collectionName)); err != nil {
        log.Fatal("failed to check collection exists or not", err.Error())
    } else if has {
        c.DropCollection(ctx, milvusclient.NewDropCollectionOption(collectionName))
    }

    err = c.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(collectionName, schema))
    if err != nil {
        log.Fatal("failed to create collection", err.Error())
    }

    var rows []any
    for i := 0; i < nEntities; i++ {
        vec := make([]float32, 0, dim)
        for j := 0; j < dim; j++ {
            vec = append(vec, rand.Float32())
        }
        rows = append(rows, &Data{
            Vector: vec,
        })
    }

    insertResult, err := c.Insert(ctx, milvusclient.NewRowBasedInsertOption(collectionName, rows...))
    if err != nil {
        log.Fatal("failed to insert data")
    }
    log.Println(insertResult.IDs)
}

the output was like follow

2024/05/29 20:58:28 Field name: id, FieldType int64, IsPrimaryKey: true
2024/05/29 20:58:28 Field name: vector, FieldType []float32, IsPrimaryKey: false
2024/05/29 20:58:28 ==== start connecting to Milvus ====
2024/05/29 20:58:28 &{{} id [450094383872654028 450094383872654029 450094383872654030 450094383872654031 450094383872654032 450094383872654033 450094383872654034 450094383872654035 450094383872654036 450094383872654037]}
congqixia commented 4 months ago

It looks like the schema in your test case does not set auto id of pk field to true /assign @ThreadDao /unassign

ThreadDao commented 4 months ago

It looks like the schema in your test case does not set auto id of pk field to true /assign @ThreadDao /unassign

@congqixia Oh~ I only set schema AutoId to true.

In other words, the above test is equivalent to setting AutoID to false. In this case, inserting a row without pk data should also result in an error instead of returning zero IDs.

congqixia commented 4 months ago

@ThreadDao BaseRow has field named Int64 the test case is equivalent to pass zero value pk column

ThreadDao commented 4 months ago

@congqixia I should not have passed an int64 value. My struct is set to ignore zero values omitempty . You can also see that I printed a line of logs showing that there is no int64 value

[2024/05/29 18:27:11.321 +08:00] [INFO] [testcases/insert_test.go:50] ["rows data"] ["rows[8]"="{\"floatVec\":[0.6178241,0.21087281,0.44867408,0.7325401,0.16834813,0.93801135,0.7424177,0.45748225,0.31714168,0.52351075,0.23468941,0.20924239,0.39932206,0.27672815,0.94168675,0.15122142,0.67313886,0.33818346,0.490186,0.37304404,0.8647382,0.984301,0.86432034,0.6576149,0.034932584,0.11069491,0.9180387,0.27059072,0.7769772,0.4492993,0.9091361,0.49744037,0.79990107,0.947381,0.79302114,0.9386312,0.08769191,0.08857079,0.6660659,0.63789123,0.8110448,0.6232179,0.06447095,0.7867628,0.011526427,0.23668459,0.31532583,0.930082,0.5526945,0.35251418,0.7554627,0.6498429,0.8466115,0.15021151,0.6578219,0.6485041,0.36646286,0.022119252,0.6632878,0.90742284,0.024551086,0.69591314,0.041073326,0.6479019,0.0280014,0.5167739,0.76998764,0.96678704,0.3818616,0.7679683,0.58915144,0.75805616,0.8849203,0.3163661,0.6954391,0.29520997,0.012191617,0.817875,0.84755325,0.89885163,0.9787547,0.7072435,0.14403586,0.124327034,0.67088,0.24942382,0.40041453,0.6156071,0.57584834,0.0073799007,0.52532995,0.416126,0.18008909,0.14110237,0.99673915,0.13406962,0.33921134,0.41489682,0.442895,0.5816908,0.80970913,0.6648036,0.26837,0.5643006,0.53988045,0.02402428,0.16577701,0.91346854,0.61975455,0.2552258,0.6679491,0.01729772,0.6181179,0.4244343,0.31080252,0.36212626,0.89749485,0.5521082,0.40784967,0.87652504,0.87272465,0.47998312,0.28501573,0.16392118,0.8223444,0.761234,0.7927178,0.7222227]}"]
type BaseRow struct {
    Bool       bool      `json:"bool,omitempty" milvus:"name:bool"`
    Int8       int8      `json:"int8,omitempty" milvus:"name:int8"`
    Int16      int16     `json:"int16,omitempty" milvus:"name:int16"`
    Int32      int32     `json:"int32,omitempty" milvus:"name:int32"`
    Int64      int64     `json:"int64,omitempty" milvus:"name:int64"`
    Float      float32   `json:"float,omitempty" milvus:"name:float"`
    Double     float64   `json:"double,omitempty" milvus:"name:double"`
    Varchar    string    `json:"varchar,omitempty" milvus:"name:varchar"`
    JSON       *JSONStruct `json:"json,omitempty" milvus:"name:json"`
    FloatVec   []float32 `json:"floatVec,omitempty" milvus:"name:floatVec"`
    Fp16Vec    []byte    `json:"fp16Vec,omitempty" milvus:"name:fp16Vec"`
    Bf16Vec     []byte    `json:"bf16Vec,omitempty" milvus:"name:bf16Vec"`
    BinaryVec    []byte    `json:"binaryVec,omitempty" milvus:"name:binaryVec"`
    SparseVec    entity.SparseEmbedding    `json:"sparseVec,omitempty" milvus:"name:sparseVec"`
    Array
    Dynamic
}
congqixia commented 4 months ago

@ThreadDao json omitempty not working for client for now. maybe we shall add support later?