go-faker / faker

Go (Golang) Fake Data Generator for Struct, previously https://github.com/bxcodec/faker
https://pkg.go.dev/github.com/go-faker/faker/v4
MIT License
613 stars 30 forks source link

Performance issue when generating 10k elements in array #42

Open mdrokz opened 5 months ago

mdrokz commented 5 months ago

Problem

Hello first of all thanks for the great library! it is really useful

Im using faker to seed my local database and i want to generate 10k elements the issue is if i use fakers default method faker.FakeData it hangs & takes up all of my memory

users := []domain.User{}

err = faker.FakeData(&users, options.WithRandomMapAndSliceMinSize(5000), options.WithRandomMapAndSliceMaxSize(10000))

But if i do it manually like this it finishes in under 1 second

for i := 0; i < 10000; i++ {
        user := domain.User{}
        // err = faker.FakeData(&user)
        user.Name = faker.Name()
        user.Email = faker.Email()
        user.PhoneNumber = faker.Phonenumber()
        user.OrganizationID = org.ID

        user.Locations = []domain.Location{}
        user.Services = []domain.Service{}

        _ = faker.FakeData(&user.Locations, options.WithRandomMapAndSliceMinSize(5), options.WithRandomMapAndSliceMaxSize(10))
        _ = faker.FakeData(&user.Services, options.WithRandomMapAndSliceMinSize(5), options.WithRandomMapAndSliceMaxSize(10))

        if err != nil {
            panic(err)
        }
        users = append(users, user)

    }

here is the full code & the models

domain.go

type User struct {
    ID             uuid.UUID      `json:"id" gorm:"primary_key; unique; type:uuid; column:id; default:uuid_generate_v4();"`
    OrganizationID uuid.UUID      `json:"organization_id" faker:"orgID"`
    Organization   *Organization  `json:"organization"`
    Name           string         `json:"name" faker:"name"`
    Email          string         `json:"email" faker:"email"`
    PhoneNumber    string         `json:"phone_number" faker:"phone_number"`
    Photo          string         `json:"photo" faker:"-"`
    AuditLogs      []AuditLog     `json:"audit_logs" gorm:"foreignKey:PerformerID;references:ID;" faker:"-"`
    Locations      []Location     `json:"locations" gorm:"many2many:location_user;" faker:"locations"`
    Services       []Service      `json:"services" gorm:"many2many:service_user;" faker:"services"`
    FilterPresets  datatypes.JSON `json:"filter_presets" gorm:"type:jsonb" faker:"-"`
    Role           userRole       `json:"role" sql:"type:user_role" faker:"-"`
    CreatedAt      time.Time      `json:"created_at" gorm:"autoCreateTime" faker:"-"`
}

type Location struct {
    ID              uuid.UUID      `json:"id" gorm:"primary_key; unique; type:uuid; column:id; default:uuid_generate_v4();" faker:"-"`
    OrganizationID  uuid.UUID      `json:"organization_id" gorm:"type:uuid;" faker:"orgID"`
    Name            string         `json:"name" gorm:"type:varchar(200); not null;" faker:"cityName"`
    Description     string         `json:"description" faker:"sentence"`
    Addresses       pq.StringArray `json:"addresses" gorm:"type:text[]" faker:"-"`
    Logo            string         `json:"logo" faker:"-"`
    SocialLinks     pq.StringArray `json:"social_links" gorm:"type:text[]" faker:"-"`
    WorkingHours    datatypes.JSON `json:"working_hours" gorm:"type:jsonb; not null;" faker:"jsonb"`
    ServiceSettings datatypes.JSON `json:"service_settings" gorm:"type:jsonb; not null;" faker:"jsonb"`
    Kiosks          datatypes.JSON `json:"kiosks" gorm:"type:jsonb; not null;" faker:"jsonb"`
    TV              datatypes.JSON `json:"tv" gorm:"type:jsonb; not null;" faker:"jsonb"`
    Bookings        []*Booking     `json:"bookings" gorm:"foreignKey:LocationID;references:ID;" faker:"-"`
    Services        []*Service     `json:"services" gorm:"many2many:location_service;" faker:"-"`
    Users           []User         `json:"users" gorm:"many2many:location_user;" faker:"-"`
    CreatedAt       time.Time      `json:"created_at" gorm:"autoCreateTime" faker:"-"`
}

type Service struct {
    ID               uuid.UUID      `json:"id" gorm:"primary_key; unique; type:uuid; column:id; default:uuid_generate_v4();" faker:"orgID"`
    Name             string         `json:"name" gorm:"type:varchar(200); not null;" faker:"name"`
    Description      string         `json:"description" faker:"sentence"`
    Bookings         []*Booking     `json:"bookings" gorm:"foreignKey:ServiceID;references:ID;" faker:"-"`
    Locations        []*Location    `json:"locations" gorm:"many2many:location_service;" faker:"-"`
    Users            []User         `json:"users" gorm:"many2many:service_user;" faker:"-"`
    BookingForm      datatypes.JSON `json:"booking_form" gorm:"type:jsonb" faker:"jsonb"`
    WelcomePage      datatypes.JSON `json:"welcome_page" gorm:"type:jsonb" faker:"jsonb"`
    ConfirmationPage datatypes.JSON `json:"confirmation_page" gorm:"type:jsonb" faker:"jsonb"`
    CreatedAt        time.Time      `json:"created_at" gorm:"autoCreateTime" faker:"-"`
}

main.go

package main

import (
    "log"
    "os"
    "reflect"
    "time"

    "github.com/IBM/fp-go/option"
    "github.com/go-faker/faker/v4"
    "github.com/go-faker/faker/v4/pkg/options"
    "github.com/makerstudio-io/livqueue/libs/domain"
    "github.com/makerstudio-io/livqueue/libs/utils"
    "gorm.io/driver/postgres"
    "gorm.io/gorm"
    "gorm.io/gorm/logger"
)

var (
    _ = faker.AddProvider("cityName", func(v reflect.Value) (interface{}, error) {
        return faker.GetRealAddress().City, nil
    })

    _ = faker.AddProvider("locations", func(v reflect.Value) (interface{}, error) {
        locations := []domain.Location{}
        err := faker.FakeData(&locations, options.WithRandomMapAndSliceMinSize(5), options.WithRandomMapAndSliceMaxSize(10))
        return locations, err
    })

    _ = faker.AddProvider("services", func(v reflect.Value) (interface{}, error) {
        services := []domain.Service{}
        err := faker.FakeData(&services, options.WithRandomMapAndSliceMinSize(5), options.WithRandomMapAndSliceMaxSize(10))
        return services, err
    })

    _ = faker.AddProvider("jsonb", func(v reflect.Value) (interface{}, error) {
        return []byte{'{', '}'}, nil
    })

    _ = faker.AddProvider("users", func(v reflect.Value) (interface{}, error) {
        user := domain.User{}
        err := faker.FakeData(&user)
        return user, err
    })
)

// _ = faker.AddProvider()

func main() {

    conn := utils.ConstructDBConnString(utils.PostgresConfig{
        SslMode: option.Some("disable"),
    })

    newLogger := logger.New(
        log.New(os.Stdout, "\r\n", log.LstdFlags), // io writer
        logger.Config{
            SlowThreshold:             time.Second,   // Slow SQL threshold
            LogLevel:                  logger.Silent, // Log level
            IgnoreRecordNotFoundError: true,          // Ignore ErrRecordNotFound error for logger
            ParameterizedQueries:      false,         // Don't include params in the SQL log
            Colorful:                  true,          // Disable color
        },
    )

    opts := gorm.Config{
        Logger: newLogger,
    }

    db, err := gorm.Open(postgres.Open(conn), &opts)

    if err != nil {
        panic(err)
    }

    var org domain.Organization

    res := db.Model(&domain.Organization{}).First(&org)

    if res.Error != nil {
        panic(res.Error)
    }

    _ = faker.AddProvider("orgID", func(v reflect.Value) (interface{}, error) {
        return org.ID, nil
    })

    users := []domain.User{}

    // err = faker.FakeData(&users, options.WithRandomMapAndSliceMinSize(5000), options.WithRandomMapAndSliceMaxSize(10000))

    // for loop over 10000
    for i := 0; i < 10000; i++ {
        user := domain.User{}
        // err = faker.FakeData(&user)
        user.Name = faker.Name()
        user.Email = faker.Email()
        user.PhoneNumber = faker.Phonenumber()
        user.OrganizationID = org.ID

        user.Locations = []domain.Location{}
        user.Services = []domain.Service{}

        _ = faker.FakeData(&user.Locations, options.WithRandomMapAndSliceMinSize(5), options.WithRandomMapAndSliceMaxSize(10))
        _ = faker.FakeData(&user.Services, options.WithRandomMapAndSliceMinSize(5), options.WithRandomMapAndSliceMaxSize(10))

        if err != nil {
            panic(err)
        }
        users = append(users, user)

    }

    if err != nil {
        panic(err)
    }

    println(len(users), len(users[0].Locations), users[0].Locations[0].WorkingHours, len(users[0].Services))

    req := db.CreateInBatches(&users, 500)

    if req.Error != nil {
        panic(req.Error)
    }
}
bayaderpack commented 3 months ago

Do it in goroutine try like this func GenerateData() []domain.User{}{ var wg sync.WaitGroup users := []domain.User{} for i := 0; i < 10000; i++ { wg.Add(1) go func(no int) { defer wg.Done()

                user := domain.User{}
    // err = faker.FakeData(&user)
    user.Name = faker.Name()
    user.Email = faker.Email()
    user.PhoneNumber = faker.Phonenumber()
    user.OrganizationID = org.ID

    user.Locations = []domain.Location{}
    user.Services = []domain.Service{}

    _ = faker.FakeData(&user.Locations, options.WithRandomMapAndSliceMinSize(5), options.WithRandomMapAndSliceMaxSize(10))
    _ = faker.FakeData(&user.Services, options.WithRandomMapAndSliceMinSize(5), options.WithRandomMapAndSliceMaxSize(10))

    if err != nil {
        panic(err)
    }
    users = append(users, user)
    }(i + 1)
}

wg.Wait()
return users

} I dont test the code I just thinking about something like this

bxcodec commented 2 weeks ago

Interesting. Will need a focus weekend to work on this. I'll pick up and investigate whenever I'm free. But if you have the fixes, please help raise the PR whenever you're ready.