aerospike / aerospike-client-go

Aerospike Client Go
Apache License 2.0
430 stars 198 forks source link

How to check if aerospike set is empty fast #309

Closed andot closed 4 years ago

khaf commented 4 years ago

Use the following info command: asinfo -v sets/<namespace>/<set_name> It will return the total number of objects in the set.

andot commented 4 years ago

How to check if aerospike set is empty fast in golang? Can you add an Empty(namespace string, setName string) (bool, error) method on golang client?

khaf commented 4 years ago
package main

import (
    "fmt"
    "strconv"
    "strings"
    "time"

    as "github.com/aerospike/aerospike-client-go"
)

func main() {
    cp := as.NewClientPolicy()
    asc, err := as.NewClientWithPolicy(cp, <host>, <port>)
    if err != nil {
        panic(err)
    }

    totalUniqueObjects, err := countSetUniqueObjects(asc, <namespace>, <set>)
    if err != nil {
        panic(err)
    }

    println("Total Unique Object Count:", totalUniqueObjects)
}

func countSetUniqueObjects(client *as.Client, ns, set string) (int, error) {
    const statKey = "objects"

    // get the list of cluster nodes
    nodes := client.GetNodes()

    infop := as.NewInfoPolicy()

    objCount := 0

    // iterate over nodes
    for _, n := range nodes {
        cmd := fmt.Sprintf("sets/%s/%s", ns, set)
        info, err := n.RequestInfo(infop, cmd)
        if err != nil {
            return -1, err
        }
        vals := strings.Split(info[cmd], ":")
        for _, val := range vals {
            if i := strings.Index(val, statKey); i > -1 {
                cnt, err := strconv.Atoi(val[i+len(statKey)+1:])
                if err != nil {
                    return -1, err
                }
                objCount += cnt
                break
            }
        }
    }

        // find replication factor
    replFactor, err := replicationFactor(client, ns)
    if err != nil {
        panic(err)
    }

        // unique objects count
        uObjCount := objCount / replFactor

    return uObjectCount, nil
}

func replicationFactor(client *as.Client, ns string) (int, error) {
    const statKey = "effective_replication_factor"

    // get the list of cluster nodes
    nodes := client.GetNodes()

    infop := as.NewInfoPolicy()

    replFactor := -1

    // iterate over nodes
    for _, n := range nodes {
        cmd := fmt.Sprintf("namespace/%s", ns)
        info, err := n.RequestInfo(infop, cmd)
        if err != nil {
            return -1, err
        }
        vals := strings.Split(info[cmd], ";")
        for _, val := range vals {
            if i := strings.Index(val, statKey); i > -1 {
                rf, err := strconv.Atoi(val[i+len(statKey)+1:])
                if err != nil {
                    return -1, err
                }

                if replFactor == -1 {
                    replFactor = rf
                } else if replFactor != rf {
                    return -1, fmt.Errorf("Inconsistent replication factor for namespace %s in cluster.", ns)
                }

                break
            }
        }
    }

    return replFactor, nil
}
andot commented 4 years ago

Thank you! it's very useful!

khaf commented 4 years ago

This is a simplistic code so you understand what's happening, but it is not efficient since it sends info commands and iterates through nodes twice. It is also not concurrent, which will be a problem for large clusters. I'll leave coding it properly to you though.

andot commented 4 years ago

Thank you. I just need to check if the collection is empty. So I simplified the code:

func IsSetEmpty(client *as.Client, ns, set string) (bool, error) {
    const statKey = "objects"

    // get the list of cluster nodes
    nodes := client.GetNodes()

    infop := as.NewInfoPolicy()

    // iterate over nodes
    for _, n := range nodes {
        cmd := fmt.Sprintf("sets/%s/%s", ns, set)
        info, err := n.RequestInfo(infop, cmd)
        if err != nil {
            return true, err
        }
        vals := strings.Split(info[cmd], ":")
        for _, val := range vals {
            if i := strings.Index(val, statKey); i > -1 {
                cnt, err := strconv.Atoi(val[i+len(statKey)+1:])
                if err != nil {
                    return true, err
                }
                if cnt > 0 {
                    return false, nil
                }
                break
            }
        }
    }
    return true, nil
}

countSetUniqueObjects need 8-9ms in my production environment, and IsSetEmpty only need 2ms. That's enough 😄.