Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding pushdown filers, enum column #2

Merged
merged 1 commit into from
Jun 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -427,16 +427,15 @@ The benchmarks below were ran on a collection of *500 items* containing a dozen

```
cpu: Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz
BenchmarkCollection/insert-8 27589314 43.05 ns/op 1 B/op 0 allocs/op
BenchmarkCollection/fetch-8 21041593 56.84 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/count-slow-8 109107 11001 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/count-8 9300270 128.6 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/range-8 1871557 641.0 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/select-8 1214799 975.8 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/update-at-8 28573945 41.99 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/update-all-8 184694 6481 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/delete-at-8 2613982 459.1 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/delete-all-8 296901 3762 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/insert-8 5013795 239.9 ns/op 27 B/op 0 allocs/op
BenchmarkCollection/fetch-8 23730796 50.63 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/scan-8 234990 4743 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/count-8 7965873 152.7 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/range-8 1512513 799.9 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/update-at-8 5409420 224.7 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/update-all-8 196626 6099 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/delete-at-8 2006052 594.9 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/delete-all-8 1889685 643.2 ns/op 0 B/op 0 allocs/op
```

## Contributing
Expand Down
35 changes: 24 additions & 11 deletions collection_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,17 @@ import (
"github.com/stretchr/testify/assert"
)

// BenchmarkCollection/insert-8 5717271 210.1 ns/op 3 B/op 0 allocs/op
// BenchmarkCollection/fetch-8 23014076 52.73 ns/op 0 B/op 0 allocs/op
// BenchmarkCollection/unindexed-8 144264 7534 ns/op 0 B/op 0 allocs/op
// BenchmarkCollection/count-8 8954762 132.2 ns/op 0 B/op 0 allocs/op
// BenchmarkCollection/range-8 1760739 682.5 ns/op 0 B/op 0 allocs/op
// BenchmarkCollection/update-at-8 9917469 122.9 ns/op 0 B/op 0 allocs/op
// BenchmarkCollection/update-all-8 200008 6014 ns/op 0 B/op 0 allocs/op
// BenchmarkCollection/delete-at-8 2208020 544.8 ns/op 0 B/op 0 allocs/op
// BenchmarkCollection/delete-all-8 2013384 599.2 ns/op 0 B/op 0 allocs/op
/*
BenchmarkCollection/insert-8 5013795 239.9 ns/op 27 B/op 0 allocs/op
BenchmarkCollection/fetch-8 23730796 50.63 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/scan-8 234990 4743 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/count-8 7965873 152.7 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/range-8 1512513 799.9 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/update-at-8 5409420 224.7 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/update-all-8 196626 6099 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/delete-at-8 2006052 594.9 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/delete-all-8 1889685 643.2 ns/op 0 B/op 0 allocs/op
*/
func BenchmarkCollection(b *testing.B) {
players := loadPlayers()
obj := Object{
Expand Down Expand Up @@ -62,7 +64,7 @@ func BenchmarkCollection(b *testing.B) {
assert.NotEmpty(b, name)
})

b.Run("unindexed", func(b *testing.B) {
b.Run("scan", func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for n := 0; n < b.N; n++ {
Expand Down Expand Up @@ -392,7 +394,18 @@ func loadPlayers() *Collection {

// Load the items into the collection
players := loadFixture("players.json")
out.CreateColumnsOf(players[0])
out.CreateColumn("serial", ForAny())
out.CreateColumn("name", ForAny())
out.CreateColumn("active", ForBool())
out.CreateColumn("class", ForEnum())
out.CreateColumn("race", ForEnum())
out.CreateColumn("age", ForFloat64())
out.CreateColumn("hp", ForFloat64())
out.CreateColumn("mp", ForFloat64())
out.CreateColumn("balance", ForFloat64())
out.CreateColumn("gender", ForEnum())
out.CreateColumn("guild", ForEnum())
out.CreateColumn("location", ForAny())
out.Query(func(txn *Txn) error {
for _, p := range players {
txn.Insert(p)
Expand Down
205 changes: 117 additions & 88 deletions columns.go → column.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
// Copyright (c) Roman Atachiants and contributors. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for details.

//go:generate genny -pkg=column -in=generic.go -out=z_numbers.go gen "number=float32,float64,int,int16,int32,int64,uint,uint16,uint32,uint64"
//go:generate genny -pkg=column -in=generic_test.go -out=z_numbers_test.go gen "number=float32,float64,int,int16,int32,int64,uint,uint16,uint32,uint64"
//go:generate genny -pkg=column -in=column_generate.go -out=column_numbers.go gen "number=float32,float64,int,int16,int32,int64,uint,uint16,uint32,uint64"

package column

Expand All @@ -14,6 +13,28 @@ import (
"github.com/kelindar/column/commit"
)

// columnType represents a type of a column.
type columnType uint8

const (
typeGeneric = columnType(0) // Generic column, every column should support this
typeNumeric = columnType(1 << 0) // Numeric column supporting float64, int64 or uint64
typeTextual = columnType(1 << 1) // Textual column supporting strings
)

// typeOf resolves all supported types of the column
func typeOf(column Column) (typ columnType) {
if _, ok := column.(Numeric); ok {
typ = typ | typeNumeric
}
if _, ok := column.(Textual); ok {
typ = typ | typeTextual
}
return
}

// --------------------------- Contracts ----------------------------

// Column represents a column implementation
type Column interface {
Grow(idx uint32)
Expand All @@ -24,11 +45,22 @@ type Column interface {
Index() *bitmap.Bitmap
}

// Numerical represents a numerical column implementation
type numerical interface {
Float64(uint32) (float64, bool)
Uint64(uint32) (uint64, bool)
Int64(uint32) (int64, bool)
// Numeric represents a column that stores numbers.
type Numeric interface {
Column
LoadFloat64(uint32) (float64, bool)
LoadUint64(uint32) (uint64, bool)
LoadInt64(uint32) (int64, bool)
FilterFloat64(*bitmap.Bitmap, func(v float64) bool)
FilterUint64(*bitmap.Bitmap, func(v uint64) bool)
FilterInt64(*bitmap.Bitmap, func(v int64) bool)
}

// Textual represents a column that stores strings.
type Textual interface {
Column
LoadString(uint32) (string, bool)
FilterString(*bitmap.Bitmap, func(v string) bool)
}

// --------------------------- Constructors ----------------------------
Expand All @@ -48,6 +80,7 @@ var (
ForUint32 = makeUint32s
ForUint64 = makeUint64s
ForBool = makeBools
ForEnum = makeEnum
)

// ForKind creates a new column instance for a specified reflect.Kind
Expand Down Expand Up @@ -85,18 +118,30 @@ func ForKind(kind reflect.Kind) Column {
// column represents a column wrapper that synchronizes operations
type column struct {
sync.RWMutex
name string
Column
kind columnType // The type of the colum
name string // The name of the column
}

// columnFor creates a synchronized column for a column implementation
func columnFor(name string, v Column) *column {
return &column{
kind: typeOf(v),
name: name,
Column: v,
}
}

// Is checks whether a column type supports certain numerical operations.
func (c *column) IsNumeric() bool {
return (c.kind & typeNumeric) == typeNumeric
}

// Is checks whether a column type supports certain string operations.
func (c *column) IsTextual() bool {
return (c.kind & typeTextual) == typeTextual
}

// Intersect performs a logical and operation and updates the destination bitmap.
func (c *column) Intersect(dst *bitmap.Bitmap) {
c.RLock()
Expand Down Expand Up @@ -147,6 +192,14 @@ func (c *column) Value(idx uint32) (v interface{}, ok bool) {
return
}

// Value retrieves a value at a specified index
func (c *column) String(idx uint32) (v string, ok bool) {
c.RLock()
v, ok = c.loadString(idx)
c.RUnlock()
return
}

// Float64 retrieves a float64 value at a specified index
func (c *column) Float64(idx uint32) (v float64, ok bool) {
c.RLock()
Expand Down Expand Up @@ -177,26 +230,34 @@ func (c *column) loadValue(idx uint32) (v interface{}, ok bool) {
return
}

// loadFloat64 (unlocked) retrieves a float64 value at a specified index
func (c *column) loadString(idx uint32) (v string, ok bool) {
if column, ok := c.Column.(Textual); ok {
v, ok = column.LoadString(idx)
}
return
}

// loadFloat64 (unlocked) retrieves a float64 value at a specified index
func (c *column) loadFloat64(idx uint32) (v float64, ok bool) {
if n, contains := c.Column.(numerical); contains {
v, ok = n.Float64(idx)
if n, contains := c.Column.(Numeric); contains {
v, ok = n.LoadFloat64(idx)
}
return
}

// loadInt64 (unlocked) retrieves an int64 value at a specified index
func (c *column) loadInt64(idx uint32) (v int64, ok bool) {
if n, contains := c.Column.(numerical); contains {
v, ok = n.Int64(idx)
if n, contains := c.Column.(Numeric); contains {
v, ok = n.LoadInt64(idx)
}
return
}

// loadUint64 (unlocked) retrieves an uint64 value at a specified index
func (c *column) loadUint64(idx uint32) (v uint64, ok bool) {
if n, contains := c.Column.(numerical); contains {
v, ok = n.Uint64(idx)
if n, contains := c.Column.(Numeric); contains {
v, ok = n.LoadUint64(idx)
}
return
}
Expand All @@ -219,11 +280,18 @@ func makeAny() Column {

// Grow grows the size of the column until we have enough to store
func (c *columnAny) Grow(idx uint32) {
// TODO: also grow the bitmap
size := uint32(len(c.data))
for i := size; i <= idx; i++ {
c.data = append(c.data, nil)
if idx < uint32(len(c.data)) {
return
}

if idx < uint32(cap(c.data)) {
c.data = c.data[:idx+1]
return
}

clone := make([]interface{}, idx+1, capacityFor(idx+1))
copy(clone, c.data)
c.data = clone
}

// Update performs a series of updates at once
Expand Down Expand Up @@ -261,6 +329,26 @@ func (c *columnAny) Index() *bitmap.Bitmap {
return &c.fill
}

// LoadString retrieves a value at a specified index
func (c *columnAny) LoadString(idx uint32) (string, bool) {
v, has := c.Value(idx)
s, ok := v.(string)
return s, has && ok
}

// FilterString filters down the values based on the specified predicate. The column for
// this filter must be a string.
func (c *columnAny) FilterString(index *bitmap.Bitmap, predicate func(v string) bool) {
index.Filter(func(idx uint32) (match bool) {
if idx < uint32(len(c.data)) && c.fill.Contains(idx) {
if s, ok := c.LoadString(idx); ok {
return predicate(s)
}
}
return false
})
}

// --------------------------- booleans ----------------------------

// columnBool represents a boolean column
Expand Down Expand Up @@ -315,75 +403,16 @@ func (c *columnBool) Index() *bitmap.Bitmap {
return &c.data
}

// --------------------------- computed index ----------------------------
// --------------------------- funcs ----------------------------

// computed represents a computed column
type computed interface {
Column() string
}

// Index represents the index implementation
type index struct {
fill bitmap.Bitmap
prop string
rule func(v interface{}) bool
}

// newIndex creates a new indexer
func newIndex(indexName, columnName string, rule func(v interface{}) bool) *column {
return columnFor(indexName, &index{
fill: make(bitmap.Bitmap, 0, 4),
prop: columnName,
rule: rule,
})
}

// Grow grows the size of the column until we have enough to store
func (c *index) Grow(idx uint32) {
// TODO
}

// Column returns the target name of the column on which this index should apply.
func (c *index) Column() string {
return c.prop
}

// Update performs a series of updates at once
func (c *index) Update(updates []commit.Update) {

// Index can only be updated based on the final stored value, so we can only work
// with put operations here. The trick is to update the final value after applying
// on the actual column.
for _, u := range updates {
if u.Type == commit.Put {
if c.rule(u.Value) {
c.fill.Set(u.Index)
} else {
c.fill.Remove(u.Index)
}
}
}
}

// Delete deletes a set of items from the column.
func (c *index) Delete(items *bitmap.Bitmap) {
c.fill.AndNot(*items)
}

// Value retrieves a value at a specified index.
func (c *index) Value(idx uint32) (v interface{}, ok bool) {
if idx < uint32(len(c.fill))<<6 {
v, ok = c.fill.Contains(idx), true
}
return
}

// Contains checks whether the column has a value at a specified index.
func (c *index) Contains(idx uint32) bool {
return c.fill.Contains(idx)
}

// Index returns the fill list for the column
func (c *index) Index() *bitmap.Bitmap {
return &c.fill
// capacityFor computes the next power of 2 for a given index
func capacityFor(v uint32) int {
v--
v |= v >> 1
v |= v >> 2
v |= v >> 4
v |= v >> 8
v |= v >> 16
v++
return int(v)
}
Loading