-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy pathvalue_matcher.go
275 lines (245 loc) · 9.5 KB
/
value_matcher.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
package quamina
import (
"bytes"
"fmt"
"sync/atomic"
)
type bufpair struct {
buf1, buf2 []*faState
}
// valueMatcher represents a byte-driven finite automaton (FA). The table needs to be the
// equivalent of a map[byte][]nextState and is represented by smallTable.
// In this implementation all the FAs are nondeterministic, which means each
// byte can cause transfers to multiple other states. We compute the FA
// for a pattern and merge with any existing FA.
// In some (common) cases there is only one matching value present for some field,
// e.g. a string-valued field with only one string match. In this case, the FA
// will be null and the value being matched has to exactly equal the singletonMatch
// field; if so, the singletonTransition is the return value. This is to avoid
// having a long chain of smallTables each with only one entry.
// To allow for concurrent access between one thread running AddPattern and many
// others running MatchesForEvent, the valueMatcher payload is stored in an
// atomic.Pointer
type valueMatcher struct {
updateable atomic.Pointer[vmFields]
}
type vmFields struct {
startTable *smallTable
singletonMatch []byte
singletonTransition *fieldMatcher
hasNumbers bool
isNondeterministic bool
}
func (m *valueMatcher) fields() *vmFields {
return m.updateable.Load()
}
func (m *valueMatcher) getFieldsForUpdate() *vmFields {
current := m.updateable.Load()
freshState := *current // struct copy
return &freshState
}
func (m *valueMatcher) update(state *vmFields) {
m.updateable.Store(state)
}
func newValueMatcher() *valueMatcher {
var vm valueMatcher
vm.update(&vmFields{})
return &vm
}
func (m *valueMatcher) transitionOn(eventField *Field, bufs *bufpair) []*fieldMatcher {
vmFields := m.fields()
var transitions []*fieldMatcher
val := eventField.Val
switch {
case vmFields.singletonMatch != nil:
// if there's a singleton entry here, we either match the val or we're
// done Note: We have to check this first because addTransition might be
// busy constructing an automaton, but it's not ready for use yet. When
// it's done it'll zero out the singletonMatch
if bytes.Equal(vmFields.singletonMatch, val) {
transitions = append(transitions, vmFields.singletonTransition)
}
return transitions
case vmFields.startTable != nil:
// if there is a potential for a numeric match, try making a Q number from the event
if vmFields.hasNumbers && eventField.IsNumber {
qNum, err := qNumFromBytes(val)
if err == nil {
if vmFields.isNondeterministic {
return traverseNFA(vmFields.startTable, qNum, transitions, bufs)
} else {
return traverseDFA(vmFields.startTable, qNum, transitions)
}
}
}
// if it doesn't work as a Q number for some reason, go ahead and compare the string values
if vmFields.isNondeterministic {
return traverseNFA(vmFields.startTable, val, transitions, bufs)
} else {
return traverseDFA(vmFields.startTable, val, transitions)
}
default:
// no FA, no singleton, nothing to do, this probably can't happen because a flattener
// shouldn't preserve a field that hasn't appeared in a pattern
return transitions
}
}
func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatcher {
valBytes := []byte(val.val)
fields := m.getFieldsForUpdate()
// special case - virgin state and this is a string match
if fields.startTable == nil && fields.singletonMatch == nil && (val.vType == stringType || val.vType == literalType) {
fields.singletonMatch = valBytes
fields.singletonTransition = newFieldMatcher()
m.update(fields)
return fields.singletonTransition
}
// special case: singleton match is here and this value matches it
if val.vType == stringType || val.vType == literalType {
if bytes.Equal(fields.singletonMatch, valBytes) {
return fields.singletonTransition
}
}
// no dodges, we have to build an automaton to match this value
var nextField *fieldMatcher
var newFA *smallTable
switch val.vType {
case stringType, literalType:
newFA, nextField = makeStringFA(valBytes, nil, false)
case numberType:
newFA, nextField = makeStringFA(valBytes, nil, true)
fields.hasNumbers = true
case anythingButType:
newFA, nextField = makeMultiAnythingButFA(val.list)
case shellStyleType:
newFA, nextField = makeShellStyleFA(valBytes, printer)
fields.isNondeterministic = true
case wildcardType:
newFA, nextField = makeWildcardFA(valBytes, printer)
fields.isNondeterministic = true
case prefixType:
newFA, nextField = makePrefixFA(valBytes)
case monocaseType:
newFA, nextField = makeMonocaseFA(valBytes, printer)
case regexpType:
newFA, nextField = makeRegexpNFA(val.parsedRegexp, true)
printer.labelTable(newFA, "RX start")
default:
panic("unknown value type")
}
// there's already a table, thus an out-degree > 1
if fields.startTable != nil {
fields.startTable = mergeFAs(fields.startTable, newFA, sharedNullPrinter)
m.update(fields)
return nextField
}
// no start table, maybe singletons …
if fields.singletonMatch != nil {
// singleton is here, we don't match, so our outdegree becomes 2, so we have
// to build an automaton with two values in it
singletonAutomaton, _ := makeStringFA(fields.singletonMatch, fields.singletonTransition, false)
// now table is ready for use, nuke singleton to signal threads to use it
fields.startTable = mergeFAs(singletonAutomaton, newFA, sharedNullPrinter)
fields.singletonMatch = nil
fields.singletonTransition = nil
} else {
// empty valueMatcher, no special cases, just jam in the new FA
fields.startTable = newFA
}
m.update(fields)
return nextField
}
func (m *valueMatcher) gatherMetadata(meta *nfaMetadata) {
start := m.fields().startTable
if start != nil {
start.gatherMetadata(meta)
}
}
// TODO: make these simple FA builders iterative not recursive, this will recurse as deep as the longest string match
func makePrefixFA(val []byte) (*smallTable, *fieldMatcher) {
nextField := newFieldMatcher()
return makeOnePrefixFAStep(val, 0, nextField), nextField
}
func makeOnePrefixFAStep(val []byte, index int, nextField *fieldMatcher) *smallTable {
var nextStep *faNext
// have to stop one short to skip the closing "
var nextState *faState
if index == len(val)-2 {
nextState = &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{nextField}}
} else {
nextState = &faState{table: makeOnePrefixFAStep(val, index+1, nextField)}
}
nextStep = &faNext{states: []*faState{nextState}}
return makeSmallTable(nil, []byte{val[index]}, []*faNext{nextStep})
}
// makeStringFA creates a utf8-based automaton from a literal string
// using smallTables. Note the addition of a valueTerminator. The implementation
// is recursive because this allows the use of the makeSmallTable call, which
// reduces memory churn. Converting from a straightforward implementation to
// this approximately doubled the fields/second rate in addPattern
func makeStringFA(val []byte, useThisTransition *fieldMatcher, isNumber bool) (*smallTable, *fieldMatcher) {
var nextField *fieldMatcher
if useThisTransition != nil {
nextField = useThisTransition
} else {
nextField = newFieldMatcher()
}
stringFA := makeOneStringFAStep(val, 0, nextField)
// if the field is numeric, *and* if it can be converted to a float, *and* can be
// made into a Q number, equip the NFA with the Q number form
if isNumber {
qNum, err := qNumFromBytes(val)
if err == nil {
numberFA := makeOneStringFAStep(qNum, 0, nextField)
stringFA = mergeFAs(stringFA, numberFA, sharedNullPrinter)
}
}
return stringFA, nextField
}
// makeFAFragment makes the simplest possible byte-chain FA with its last transition being to the provided
// endAt value. It is designed to help higher-level automaton builders.
// suppose you need a few steps to match "cat". You call makeFAFragment and it'll make two *faState instances, one
// which matches 'a' and transitions to the second, which matches 't' and transitions to the provided endAt
// argument. Then you transition to what makeFAFragment returns on 'c' from your current faState.
func makeFAFragment(val []byte, endAt *faNext, pp printer) *faNext {
firstStep := &faNext{}
step := firstStep
// no-op on one-byte values, but should still work so caller can just call this without worrying
// about slice length
if len(val) == 1 {
return endAt
}
for index := 1; index < len(val); index++ {
if index == len(val)-1 {
table := makeSmallTable(nil, []byte{val[index]}, []*faNext{endAt})
pp.labelTable(table, fmt.Sprintf("exiting on %v", val[index]))
step.states = []*faState{{table: table}}
} else {
nextState := &faNext{}
table := makeSmallTable(nil, []byte{val[index]}, []*faNext{nextState})
pp.labelTable(table, fmt.Sprintf("stepping on %c", val[index]))
step.states = []*faState{{table: table}}
step = nextState
}
}
return firstStep
}
func makeOneStringFAStep(val []byte, index int, nextField *fieldMatcher) *smallTable {
// TODO: Turn this into a simple back-to-front construction and remove the recursion
var nextStepList *faNext
if index == len(val)-1 {
lastStep := &faState{
table: newSmallTable(),
fieldTransitions: []*fieldMatcher{nextField},
}
lastStepList := &faNext{states: []*faState{lastStep}}
nextStep := &faState{
table: makeSmallTable(nil, []byte{valueTerminator}, []*faNext{lastStepList}),
}
nextStepList = &faNext{states: []*faState{nextStep}}
} else {
nextStep := &faState{table: makeOneStringFAStep(val, index+1, nextField)}
nextStepList = &faNext{states: []*faState{nextStep}}
}
return makeSmallTable(nil, []byte{val[index]}, []*faNext{nextStepList})
}