Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pat: Correct number matching (again) #330

Merged
merged 3 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions PATTERNS.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,19 @@ Thus, the following Pattern would match both JSON events above:
{"alpha": {"beta": [1]}}
```

### Numeric Values

It would be convenient if Quamina knew, for matching purposes, that 35,
35.00, and 3.5e1 were all the same number.

In many cases, Quamina can manage this. Specifically, for numbers that:

* are between -5.0e9 and 5.0e9 inclusive.
* have five or fewer fractional digits.

Numbers which do not meet these criteria will be treated as strings, which
usually produces good results.

## Extended Patterns
An **Extended Pattern** **MUST** be a JSON object containing
a single field whose name is called the **Pattern Type**.
Expand Down
9 changes: 2 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,8 @@ The `"exists":true` and `"exists":false` patterns
have corner cases; details are covered in
[Patterns in Quamina](PATTERNS.md).

Number matching is weak - the number has to appear
exactly the same in the Pattern and the Event. I.e.,
Quamina doesn't know that 35, 35.000, and 3.5e1 are the
same number. There's a fix for this in the code which
is not yet activated because it causes a
significant performance penalty, so the API needs to
be enhanced to only ask for it when you need it.
Quamina can match numeric values correctly, subject to
certain limits; details are in [Patterns in Quamina](PATTERNS.md).

## Flattening and Matching

Expand Down
128 changes: 64 additions & 64 deletions cl2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,127 +16,93 @@ var (
cl2LineCount int
)

/* This test adopted, with thanks, from aws/event-ruler */

func getCL2Lines(t *testing.T) [][]byte {
t.Helper()

cl2Lock.Lock()
defer cl2Lock.Unlock()
if cl2Lines != nil {
return cl2Lines
}
file, err := os.Open("testdata/citylots2.json.gz")
if err != nil {
t.Fatalf("Can't open citylots2.json.gz: %v", err.Error())
}
defer func(file *os.File) {
_ = file.Close()
}(file)
zr, err := gzip.NewReader(file)
if err != nil {
t.Fatalf("Can't open zip reader: %v", err.Error())
}

scanner := bufio.NewScanner(zr)
buf := make([]byte, oneMeg)
scanner.Buffer(buf, oneMeg)
for scanner.Scan() {
cl2LineCount++
cl2Lines = append(cl2Lines, []byte(scanner.Text()))
}
return cl2Lines
}

func TestRulerCl2(t *testing.T) {
exactRules := []string{
// test rules pulled out so that it's easy to write test funcs focusing in on one set of htem.
var (
anythingButRules = []string{
"{\n" +
" \"properties\": {\n" +
" \"MAPBLKLOT\": [ \"1430022\" ]\n" +
" }" +
" \"STREET\": [ { \"anything-but\": [ \"FULTON\" ] } ]\n" +
" }\n" +
"}",
"{\n" +
" \"properties\": {\n" +
" \"MAPBLKLOT\": [ \"2607117\" ]\n" +
" \"STREET\": [ { \"anything-but\": [ \"MASON\" ] } ]\n" +
" }\n" +
"}",
"{\n" +
" \"properties\": {\n" +
" \"MAPBLKLOT\": [ \"2607218\" ]\n" +
" \"ST_TYPE\": [ { \"anything-but\": [ \"ST\" ] } ]\n" +
" }\n" +
"}",
"{\n" +
" \"properties\": {\n" +
" \"MAPBLKLOT\": [ \"3745012\" ]\n" +
" \"geometry\": {\n" +
" \"type\": [ {\"anything-but\": [ \"Polygon\" ] } ]\n" +
" }\n" +
"}",
"{\n" +
" \"properties\": {\n" +
" \"MAPBLKLOT\": [ \"VACSTWIL\" ]\n" +
" \"FROM_ST\": [ { \"anything-but\": [ \"441\" ] } ]\n" +
" }\n" +
"}",
}
exactMatches := []int{1, 101, 35, 655, 1}

prefixRules := []string{
anythingButMatches = []int{211158, 210411, 96682, 120, 210615}
exactRules = []string{
"{\n" +
" \"properties\": {\n" +
" \"STREET\": [ { \"prefix\": \"AC\" } ]\n" +
" }\n" +
" \"MAPBLKLOT\": [ \"1430022\" ]\n" +
" }" +
"}",
"{\n" +
" \"properties\": {\n" +
" \"STREET\": [ { \"prefix\": \"BL\" } ]\n" +
" \"MAPBLKLOT\": [ \"2607117\" ]\n" +
" }\n" +
"}",
"{\n" +
" \"properties\": {\n" +
" \"STREET\": [ { \"prefix\": \"DR\" } ]\n" +
" \"MAPBLKLOT\": [ \"2607218\" ]\n" +
" }\n" +
"}",
"{\n" +
" \"properties\": {\n" +
" \"STREET\": [ { \"prefix\": \"FU\" } ]\n" +
" \"MAPBLKLOT\": [ \"3745012\" ]\n" +
" }\n" +
"}",
"{\n" +
" \"properties\": {\n" +
" \"STREET\": [ { \"prefix\": \"RH\" } ]\n" +
" \"MAPBLKLOT\": [ \"VACSTWIL\" ]\n" +
" }\n" +
"}",
}
prefixMatches := []int{24, 442, 38, 2387, 328}

anythingButRules := []string{
exactMatches = []int{1, 101, 35, 655, 1}
prefixRules = []string{
"{\n" +
" \"properties\": {\n" +
" \"STREET\": [ { \"anything-but\": [ \"FULTON\" ] } ]\n" +
" \"STREET\": [ { \"prefix\": \"AC\" } ]\n" +
" }\n" +
"}",
"{\n" +
" \"properties\": {\n" +
" \"STREET\": [ { \"anything-but\": [ \"MASON\" ] } ]\n" +
" \"STREET\": [ { \"prefix\": \"BL\" } ]\n" +
" }\n" +
"}",
"{\n" +
" \"properties\": {\n" +
" \"ST_TYPE\": [ { \"anything-but\": [ \"ST\" ] } ]\n" +
" \"STREET\": [ { \"prefix\": \"DR\" } ]\n" +
" }\n" +
"}",
"{\n" +
" \"geometry\": {\n" +
" \"type\": [ {\"anything-but\": [ \"Polygon\" ] } ]\n" +
" \"properties\": {\n" +
" \"STREET\": [ { \"prefix\": \"FU\" } ]\n" +
" }\n" +
"}",
"{\n" +
" \"properties\": {\n" +
" \"FROM_ST\": [ { \"anything-but\": [ \"441\" ] } ]\n" +
" \"STREET\": [ { \"prefix\": \"RH\" } ]\n" +
" }\n" +
"}",
}
anythingButMatches := []int{211158, 210411, 96682, 120, 210615}

shellstyleRules := []string{
prefixMatches = []int{24, 442, 38, 2387, 328}
shellstyleRules = []string{
"{\n" +
" \"properties\": {\n" +
" \"MAPBLKLOT\": [ { \"shellstyle\": \"143*\" } ]\n" +
Expand All @@ -163,8 +129,7 @@ func TestRulerCl2(t *testing.T) {
" }\n" +
"}",
}
shellstyleMatches := []int{490, 713, 43, 2540, 1}

shellstyleMatches = []int{490, 713, 43, 2540, 1}
/* will add when we have numeric
complexArraysRules := []string{
"{\n" +
Expand Down Expand Up @@ -211,6 +176,41 @@ func TestRulerCl2(t *testing.T) {
complexArraysMatches := []int{227, 2, 149444, 64368, 127485}
*/

)

/* This test adopted, with thanks, from aws/event-ruler */

func getCL2Lines(t *testing.T) [][]byte {
t.Helper()

cl2Lock.Lock()
defer cl2Lock.Unlock()
if cl2Lines != nil {
return cl2Lines
}
file, err := os.Open("testdata/citylots2.json.gz")
if err != nil {
t.Fatalf("Can't open citylots2.json.gz: %v", err.Error())
}
defer func(file *os.File) {
_ = file.Close()
}(file)
zr, err := gzip.NewReader(file)
if err != nil {
t.Fatalf("Can't open zip reader: %v", err.Error())
}

scanner := bufio.NewScanner(zr)
buf := make([]byte, oneMeg)
scanner.Buffer(buf, oneMeg)
for scanner.Scan() {
cl2LineCount++
cl2Lines = append(cl2Lines, []byte(scanner.Text()))
}
return cl2Lines
}

func TestRulerCl2(t *testing.T) {
lines := getCL2Lines(t)
fmt.Printf("lines: %d\n", len(lines))

Expand Down
18 changes: 1 addition & 17 deletions field_matcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,21 +132,6 @@ func (m *fieldMatcher) addTransition(field *patternField, printer printer) []*fi
var nextFieldMatchers []*fieldMatcher
for _, val := range field.vals {
nextFieldMatchers = append(nextFieldMatchers, vm.addTransition(val, printer))

// if the val is a number, let's add a transition on the canonicalized number
// TODO: Only do this if asked
/*
if val.vType == numberType {
c, err := canonicalize([]byte(val.val))
if err == nil {
number := typedVal{
vType: literalType,
val: c,
}
nextFieldMatchers = append(nextFieldMatchers, vm.addTransition(number))
}
}
*/
}
m.update(freshStart)
return nextFieldMatchers
Expand All @@ -162,6 +147,5 @@ func (m *fieldMatcher) transitionOn(field *Field, bufs *bufpair) []*fieldMatcher
if !ok {
return nil
}

return valMatcher.transitionOn(field.Val, bufs)
return valMatcher.transitionOn(field, bufs)
}
Loading
Loading