Closed angelbeltran closed 5 years ago
Thank you for this report @angelbeltran and welcome to the Google-Cloud-Go project!
In deed, I can confirm this bug with this repro below:
package main
import (
"context"
"fmt"
"log"
"strings"
"cloud.google.com/go/bigtable"
)
func main() {
ctx := context.Background()
projectID := "odeke-sandbox"
iac, err := bigtable.NewInstanceAdminClient(ctx, projectID)
if err != nil {
msg := err.Error()
switch {
case strings.Contains(msg, "already exists"):
case strings.Contains(msg, "unimplemented"):
default:
log.Fatalf("Failed to create an instance admin client: %v", err)
}
}
defer iac.Close()
instanceID := "issue-1499"
err = iac.CreateInstance(ctx, &bigtable.InstanceConf{
InstanceId: instanceID,
ClusterId: "c1499-99",
Zone: "us-central1-c",
DisplayName: "Issue-1499 google-cloud-go",
InstanceType: bigtable.DEVELOPMENT,
})
if err != nil && !strings.Contains(err.Error(), "already exists") {
msg := err.Error()
switch {
case strings.Contains(msg, "already exists"):
case strings.Contains(msg, "unimplemented"):
default:
log.Fatalf("Failed to create the test instance: %v", err)
}
}
adminClient, err := bigtable.NewAdminClient(ctx, projectID, instanceID)
if err != nil {
log.Fatalf("Failed to create adminClient: %v", err)
}
defer adminClient.Close()
client, err := bigtable.NewClient(ctx, projectID, instanceID)
if err != nil {
log.Fatalf("Failed to create NewClient: %v", err)
}
defer client.Close()
tblName := "mytable"
err = adminClient.CreateTable(ctx, tblName)
if err != nil && !strings.Contains(err.Error(), "already exists") {
log.Printf("Failed to create table: %v", err)
return
}
columnFamilyName := "cf"
err = adminClient.CreateColumnFamily(ctx, tblName, columnFamilyName)
if err != nil && !strings.Contains(err.Error(), "existing") {
log.Printf("Failed to create column family: %v", err)
return
}
var muts []*bigtable.Mutation
var rowKeys []string
rowValues := []string{"", "a", "A", "abc"}
for i, rowValue := range rowValues {
mut := bigtable.NewMutation()
mut.Set(columnFamilyName, fmt.Sprintf("col%d", i), bigtable.Now(), []byte(rowValue))
muts = append(muts, mut)
rowKeys = append(rowKeys, fmt.Sprintf("row%d", i))
}
tbl := client.Open(tblName)
// Write the mutations.
if _, err := tbl.ApplyBulk(ctx, rowKeys, muts); err != nil {
log.Printf("Failed to apply mutations: %v", err)
return
}
err = tbl.ReadRows(ctx, bigtable.InfiniteRange(""), func(r bigtable.Row) bool {
log.Printf("Matched row: %#v\n", r)
return true
}, bigtable.RowFilter(bigtable.ValueFilter("|a")))
log.Printf("Final error: %v\n", err)
}
Matched row: bigtable.Row{"cf":[]bigtable.ReadItem{bigtable.ReadItem{Row:"row0", Column:"cf:col0", Timestamp:1563061458586000, Value:[]uint8(nil)}, bigtable.ReadItem{Row:"row0", Column:"cf:col0", Timestamp:1563060811982000, Value:[]uint8(nil)}, bigtable.ReadItem{Row:"row0", Column:"cf:col0", Timestamp:1563060541800000, Value:[]uint8(nil)}}}
Matched row: bigtable.Row{"cf":[]bigtable.ReadItem{bigtable.ReadItem{Row:"row1", Column:"cf:col1", Timestamp:1563061458586000, Value:[]uint8{0x61}}, bigtable.ReadItem{Row:"row1", Column:"cf:col1", Timestamp:1563060811982000, Value:[]uint8{0x61}}, bigtable.ReadItem{Row:"row1", Column:"cf:col1", Timestamp:1563060541800000, Value:[]uint8{0x61}}}}
Final error: <nil>
$ BIGTABLE_EMULATOR_HOST=localhost:9000 go run main.go
Matched row: bigtable.Row{"cf":[]bigtable.ReadItem{bigtable.ReadItem{Row:"row0", Column:"cf:col0", Timestamp:1563061354963000, Value:[]uint8(nil)}, bigtable.ReadItem{Row:"row0", Column:"cf:col0", Timestamp:1563061083337000, Value:[]uint8(nil)}}}
Matched row: bigtable.Row{"cf":[]bigtable.ReadItem{bigtable.ReadItem{Row:"row1", Column:"cf:col1", Timestamp:1563061354963000, Value:[]uint8{0x61}}, bigtable.ReadItem{Row:"row1", Column:"cf:col1", Timestamp:1563061083337000, Value:[]uint8{0x61}}}}
Matched row: bigtable.Row{"cf":[]bigtable.ReadItem{bigtable.ReadItem{Row:"row2", Column:"cf:col2", Timestamp:1563061354963000, Value:[]uint8{0x41}}, bigtable.ReadItem{Row:"row2", Column:"cf:col2", Timestamp:1563061083337000, Value:[]uint8{0x41}}}}
Matched row: bigtable.Row{"cf":[]bigtable.ReadItem{bigtable.ReadItem{Row:"row3", Column:"cf:col3", Timestamp:1563061354963000, Value:[]uint8{0x61, 0x62, 0x63}}, bigtable.ReadItem{Row:"row3", Column:"cf:col3", Timestamp:1563061083337000, Value:[]uint8{0x61, 0x62, 0x63}}}}
Final error: <nil>
So the problem here exists because it seems like Bigtable on production splits regex parts by the OR operator "|" if it isn't in or detectes "|" while in the Bigtable emulator we try to match the whole regex ^<PATTERN>$
and translated ^|a$
therefore matches all the values, since in re2 and PCRE, |a
will also match the empty character and the same applies for Python too.
@igorbernstein2 I am going to send a CL that mimicks the functionality of Bigtable that I am deriving from the outside just by examining its responses, but please feel free to chime in.
From the offline discussion with @igorbernstein2, he has let me know that Bigtable's regex translation is according to this rule
ANY_USER_REGEX
-> ^(?:ANY_USER_REGEX)$
and that makes the CL a whole lot simpler but also matches the cases properly. Thank you Igor!
Client
golang Bigtable emulator, bttest
Describe Your Environment
macOS Mojave Version 10.14.5
Inserted several rows, each with a single column with various values. Used a ValueFilter with pattern "|a" and RowFilter to filter the rows by value.
Expected Behavior
Only rows whose column's value was "" or "a" would be retrieved.
Actual Behavior
All rows were retrieved.
Note
This behavior is not present with Bigtable, i.e. when tested against Bigtable, only rows whose column values are "" or "a" are retrieved.