From 62f68b7a4d994703d7e9eceb989dd64ea73d540c Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Thu, 3 Jul 2025 20:08:22 +0300 Subject: [PATCH 01/75] Make funcs private --- pipeline/doif/check_type_op.go | 2 +- pipeline/doif/check_type_test.go | 6 +++--- pipeline/doif/ctor.go | 10 +++++----- pipeline/doif/ctor_test.go | 10 +++++----- pipeline/doif/do_if_test.go | 8 ++++---- pipeline/doif/field_op.go | 2 +- pipeline/doif/len_cmp_op.go | 2 +- pipeline/doif/logical_op.go | 2 +- pipeline/doif/ts_cmp_op.go | 2 +- 9 files changed, 22 insertions(+), 22 deletions(-) diff --git a/pipeline/doif/check_type_op.go b/pipeline/doif/check_type_op.go index fff31eec9..2e78d0321 100644 --- a/pipeline/doif/check_type_op.go +++ b/pipeline/doif/check_type_op.go @@ -79,7 +79,7 @@ type checkTypeOpNode struct { checkTypeFns []checkTypeFn } -func NewCheckTypeOpNode(field string, values [][]byte) (Node, error) { +func newCheckTypeOpNode(field string, values [][]byte) (Node, error) { if len(values) == 0 { return nil, errors.New("values are not provided") } diff --git a/pipeline/doif/check_type_test.go b/pipeline/doif/check_type_test.go index f4dd33c34..e1a8b1bab 100644 --- a/pipeline/doif/check_type_test.go +++ b/pipeline/doif/check_type_test.go @@ -204,7 +204,7 @@ func TestCheckType(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() var eventRoot *insaneJSON.Root - node, err := NewCheckTypeOpNode(tt.node.field, tt.node.values) + node, err := newCheckTypeOpNode(tt.node.field, tt.node.values) require.NoError(t, err) for _, d := range tt.data { if d.eventStr == "" { @@ -357,8 +357,8 @@ func TestCheckTypeDuplicateValues(t *testing.T) { tt := tt t.Run(tt.name, func(t *testing.T) { t.Parallel() - node, err := NewCheckTypeOpNode(tt.node.field, tt.node.values) - require.NoError(t, err, "must be no error on NewCheckTypeOpNode") + node, err := newCheckTypeOpNode(tt.node.field, tt.node.values) + require.NoError(t, err, "must be no error on newCheckTypeOpNode") ctnode, ok := node.(*checkTypeOpNode) require.True(t, ok, "must be *checkTypeOpNode type") assert.Equal(t, tt.expectedVals, len(ctnode.checkTypeFns)) diff --git a/pipeline/doif/ctor.go b/pipeline/doif/ctor.go index 56f08f36b..464795a31 100644 --- a/pipeline/doif/ctor.go +++ b/pipeline/doif/ctor.go @@ -95,7 +95,7 @@ func extractFieldOpNode(opName string, node map[string]any) (Node, error) { return nil, fmt.Errorf("extract field op values: %w", err) } - result, err = NewFieldOpNode(opName, fieldPath, caseSensitive, vals) + result, err = newFieldOpNode(opName, fieldPath, caseSensitive, vals) if err != nil { return nil, fmt.Errorf("init field op: %w", err) } @@ -159,7 +159,7 @@ func extractLengthCmpOpNode(opName string, node map[string]any) (Node, error) { return nil, err } - return NewLenCmpOpNode(opName, fieldPath, cmpOp, cmpValue) + return newLenCmpOpNode(opName, fieldPath, cmpOp, cmpValue) } func extractTsCmpOpNode(_ string, node map[string]any) (Node, error) { @@ -225,7 +225,7 @@ func extractTsCmpOpNode(_ string, node map[string]any) (Node, error) { return nil, err } - return NewTsCmpOpNode(fieldPath, format, cmpOp, cmpMode, cmpValue, cmpValueShift, updateInterval) + return newTsCmpOpNode(fieldPath, format, cmpOp, cmpMode, cmpValue, cmpValueShift, updateInterval) } func extractCheckTypeOpNode(_ string, node map[string]any) (Node, error) { @@ -239,7 +239,7 @@ func extractCheckTypeOpNode(_ string, node map[string]any) (Node, error) { return nil, fmt.Errorf("extract check type op values: %w", err) } - result, err := NewCheckTypeOpNode(fieldPath, vals) + result, err := newCheckTypeOpNode(fieldPath, vals) if err != nil { return nil, fmt.Errorf("init check_type op: %w", err) } @@ -270,7 +270,7 @@ func extractLogicalOpNode(opName string, node map[string]any) (Node, error) { operands = append(operands, operand) } - result, err := NewLogicalNode(opName, operands) + result, err := newLogicalNode(opName, operands) if err != nil { return nil, fmt.Errorf("init logical node: %w", err) } diff --git a/pipeline/doif/ctor_test.go b/pipeline/doif/ctor_test.go index 7a52453ab..b8eb430c3 100644 --- a/pipeline/doif/ctor_test.go +++ b/pipeline/doif/ctor_test.go @@ -39,7 +39,7 @@ type doIfTreeNode struct { func buildDoIfTree(node *doIfTreeNode) (Node, error) { switch { case node.fieldOp != "": - return NewFieldOpNode( + return newFieldOpNode( node.fieldOp, node.fieldName, node.caseSensitive, @@ -54,14 +54,14 @@ func buildDoIfTree(node *doIfTreeNode) (Node, error) { } operands = append(operands, operand) } - return NewLogicalNode( + return newLogicalNode( node.logicalOp, operands, ) case node.lenCmpOp != "": - return NewLenCmpOpNode(node.lenCmpOp, node.fieldName, node.cmpOp, node.cmpValue) + return newLenCmpOpNode(node.lenCmpOp, node.fieldName, node.cmpOp, node.cmpValue) case node.tsCmpOp: - return NewTsCmpOpNode( + return newTsCmpOpNode( node.fieldName, node.tsFormat, node.cmpOp, @@ -71,7 +71,7 @@ func buildDoIfTree(node *doIfTreeNode) (Node, error) { node.tsUpdateInterval, ) case node.checkTypeOp: - return NewCheckTypeOpNode( + return newCheckTypeOpNode( node.fieldName, node.values, ) diff --git a/pipeline/doif/do_if_test.go b/pipeline/doif/do_if_test.go index 045c0b06b..79b5438e7 100644 --- a/pipeline/doif/do_if_test.go +++ b/pipeline/doif/do_if_test.go @@ -38,7 +38,7 @@ type treeNode struct { func buildTree(node treeNode) (Node, error) { switch { case node.fieldOp != "": - return NewFieldOpNode( + return newFieldOpNode( node.fieldOp, node.fieldName, node.caseSensitive, @@ -53,14 +53,14 @@ func buildTree(node treeNode) (Node, error) { } operands = append(operands, operand) } - return NewLogicalNode( + return newLogicalNode( node.logicalOp, operands, ) case node.lenCmpOp != "": - return NewLenCmpOpNode(node.lenCmpOp, node.fieldName, node.cmpOp, node.cmpValue) + return newLenCmpOpNode(node.lenCmpOp, node.fieldName, node.cmpOp, node.cmpValue) case node.tsCmpOp: - return NewTsCmpOpNode( + return newTsCmpOpNode( node.fieldName, node.tsFormat, node.cmpOp, diff --git a/pipeline/doif/field_op.go b/pipeline/doif/field_op.go index 589f674ff..68a314848 100644 --- a/pipeline/doif/field_op.go +++ b/pipeline/doif/field_op.go @@ -201,7 +201,7 @@ type fieldOpNode struct { maxValLen int } -func NewFieldOpNode(op string, field string, caseSensitive bool, values [][]byte) (Node, error) { +func newFieldOpNode(op string, field string, caseSensitive bool, values [][]byte) (Node, error) { if len(values) == 0 { return nil, errors.New("values are not provided") } diff --git a/pipeline/doif/len_cmp_op.go b/pipeline/doif/len_cmp_op.go index c8904dca9..7ed89168d 100644 --- a/pipeline/doif/len_cmp_op.go +++ b/pipeline/doif/len_cmp_op.go @@ -96,7 +96,7 @@ type lenCmpOpNode struct { cmpValue int } -func NewLenCmpOpNode(op string, field string, cmpOp string, cmpValue int) (Node, error) { +func newLenCmpOpNode(op string, field string, cmpOp string, cmpValue int) (Node, error) { var lenCmpOp lenCmpOpType switch op { case byteLenCmpOpTag: diff --git a/pipeline/doif/logical_op.go b/pipeline/doif/logical_op.go index 70930b077..0efaabfad 100644 --- a/pipeline/doif/logical_op.go +++ b/pipeline/doif/logical_op.go @@ -151,7 +151,7 @@ type logicalNode struct { operands []Node } -func NewLogicalNode(op string, operands []Node) (Node, error) { +func newLogicalNode(op string, operands []Node) (Node, error) { if len(operands) == 0 { return nil, errors.New("logical op must have at least one operand") } diff --git a/pipeline/doif/ts_cmp_op.go b/pipeline/doif/ts_cmp_op.go index f24753c9d..b5e85ed8f 100644 --- a/pipeline/doif/ts_cmp_op.go +++ b/pipeline/doif/ts_cmp_op.go @@ -84,7 +84,7 @@ type tsCmpOpNode struct { updateInterval time.Duration } -func NewTsCmpOpNode(field string, format string, cmpOp string, cmpValChangeMode string, cmpValue time.Time, cmpValueShift time.Duration, updateInterval time.Duration) (Node, error) { +func newTsCmpOpNode(field string, format string, cmpOp string, cmpValChangeMode string, cmpValue time.Time, cmpValueShift time.Duration, updateInterval time.Duration) (Node, error) { typedCmpOp, err := newCmpOp(cmpOp) if err != nil { return nil, err From 95e33dc3d64d25cc2dd7974b54b0880fc893646d Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Thu, 3 Jul 2025 20:21:33 +0300 Subject: [PATCH 02/75] Make consts private --- pipeline/doif/README.md | 19 ------------------- pipeline/doif/check_type_op.go | 4 ++-- pipeline/doif/do_if.go | 16 ++++++++-------- pipeline/doif/do_if_test.go | 8 ++++---- pipeline/doif/field_op.go | 4 ++-- pipeline/doif/len_cmp_op.go | 4 ++-- pipeline/doif/logical_op.go | 4 ++-- pipeline/doif/ts_cmp_op.go | 4 ++-- 8 files changed, 22 insertions(+), 41 deletions(-) diff --git a/pipeline/doif/README.md b/pipeline/doif/README.md index 759545365..eb9b47605 100755 --- a/pipeline/doif/README.md +++ b/pipeline/doif/README.md @@ -6,25 +6,6 @@ When Do If Checker's Match func is called it calls to the root Match func and th the chain of Match func calls are performed across the whole tree. ## Node types -**`FieldOp`** Type of node where matching rules for fields are stored. - -
- -**`LengthCmpOp`** Type of node where matching rules for byte length and array length are stored. - -
- -**`TimestampCmpOp`** Type of node where matching rules for timestamps are stored. - -
- -**`CheckTypeOp`** Type of node where matching rules for check types are stored. - -
- -**`LogicalOp`** Type of node where logical rules for applying other rules are stored. - -
## Field op node diff --git a/pipeline/doif/check_type_op.go b/pipeline/doif/check_type_op.go index 2e78d0321..a6cdf79e1 100644 --- a/pipeline/doif/check_type_op.go +++ b/pipeline/doif/check_type_op.go @@ -150,8 +150,8 @@ func newCheckTypeOpNode(field string, values [][]byte) (Node, error) { }, nil } -func (n *checkTypeOpNode) Type() NodeType { - return NodeCheckTypeOp +func (n *checkTypeOpNode) Type() nodeType { + return nodeCheckTypeOp } func (n *checkTypeOpNode) Check(eventRoot *insaneJSON.Root) bool { diff --git a/pipeline/doif/do_if.go b/pipeline/doif/do_if.go index 599fe1508..44f174b4e 100644 --- a/pipeline/doif/do_if.go +++ b/pipeline/doif/do_if.go @@ -7,29 +7,29 @@ import ( // ! do-if-node // ^ do-if-node -type NodeType int +type nodeType int const ( - NodeUnknownType NodeType = iota + nodeUnknownType nodeType = iota // > Type of node where matching rules for fields are stored. - NodeFieldOp // * + nodeFieldOp // * // > Type of node where matching rules for byte length and array length are stored. - NodeLengthCmpOp // * + nodeLengthCmpOp // * // > Type of node where matching rules for timestamps are stored. - NodeTimestampCmpOp // * + nodeTimestampCmpOp // * // > Type of node where matching rules for check types are stored. - NodeCheckTypeOp // * + nodeCheckTypeOp // * // > Type of node where logical rules for applying other rules are stored. - NodeLogicalOp // * + nodeLogicalOp // * ) type Node interface { - Type() NodeType + Type() nodeType Check(*insaneJSON.Root) bool isEqualTo(Node, int) error } diff --git a/pipeline/doif/do_if_test.go b/pipeline/doif/do_if_test.go index 79b5438e7..124927928 100644 --- a/pipeline/doif/do_if_test.go +++ b/pipeline/doif/do_if_test.go @@ -77,7 +77,7 @@ func buildTree(node treeNode) (Node, error) { func checkNode(t *testing.T, want, got Node) { require.Equal(t, want.Type(), got.Type()) switch want.Type() { - case NodeFieldOp: + case nodeFieldOp: wantNode := want.(*fieldOpNode) gotNode := got.(*fieldOpNode) assert.Equal(t, wantNode.op, gotNode.op) @@ -111,7 +111,7 @@ func checkNode(t *testing.T, want, got Node) { } assert.Equal(t, wantNode.minValLen, gotNode.minValLen) assert.Equal(t, wantNode.maxValLen, gotNode.maxValLen) - case NodeLogicalOp: + case nodeLogicalOp: wantNode := want.(*logicalNode) gotNode := got.(*logicalNode) assert.Equal(t, wantNode.op, gotNode.op) @@ -119,14 +119,14 @@ func checkNode(t *testing.T, want, got Node) { for i := 0; i < len(wantNode.operands); i++ { checkNode(t, wantNode.operands[i], gotNode.operands[i]) } - case NodeLengthCmpOp: + case nodeLengthCmpOp: wantNode := want.(*lenCmpOpNode) gotNode := got.(*lenCmpOpNode) assert.Equal(t, wantNode.lenCmpOp, gotNode.lenCmpOp) assert.Equal(t, wantNode.cmpValue, gotNode.cmpValue) assert.Equal(t, wantNode.cmpOp, gotNode.cmpOp) assert.Equal(t, 0, slices.Compare[[]string](wantNode.fieldPath, gotNode.fieldPath)) - case NodeTimestampCmpOp: + case nodeTimestampCmpOp: wantNode := want.(*tsCmpOpNode) gotNode := got.(*tsCmpOpNode) assert.Equal(t, wantNode.format, gotNode.format) diff --git a/pipeline/doif/field_op.go b/pipeline/doif/field_op.go index 68a314848..7f337fffd 100644 --- a/pipeline/doif/field_op.go +++ b/pipeline/doif/field_op.go @@ -280,8 +280,8 @@ func newFieldOpNode(op string, field string, caseSensitive bool, values [][]byte }, nil } -func (n *fieldOpNode) Type() NodeType { - return NodeFieldOp +func (n *fieldOpNode) Type() nodeType { + return nodeFieldOp } func (n *fieldOpNode) Check(eventRoot *insaneJSON.Root) bool { diff --git a/pipeline/doif/len_cmp_op.go b/pipeline/doif/len_cmp_op.go index 7ed89168d..67dc6ba34 100644 --- a/pipeline/doif/len_cmp_op.go +++ b/pipeline/doif/len_cmp_op.go @@ -125,8 +125,8 @@ func newLenCmpOpNode(op string, field string, cmpOp string, cmpValue int) (Node, }, nil } -func (n *lenCmpOpNode) Type() NodeType { - return NodeLengthCmpOp +func (n *lenCmpOpNode) Type() nodeType { + return nodeLengthCmpOp } func getNodeFieldsBytesSize(node *insaneJSON.Node) int { diff --git a/pipeline/doif/logical_op.go b/pipeline/doif/logical_op.go index 0efaabfad..72feaeca7 100644 --- a/pipeline/doif/logical_op.go +++ b/pipeline/doif/logical_op.go @@ -175,8 +175,8 @@ func newLogicalNode(op string, operands []Node) (Node, error) { }, nil } -func (n *logicalNode) Type() NodeType { - return NodeLogicalOp +func (n *logicalNode) Type() nodeType { + return nodeLogicalOp } func (n *logicalNode) Check(eventRoot *insaneJSON.Root) bool { diff --git a/pipeline/doif/ts_cmp_op.go b/pipeline/doif/ts_cmp_op.go index b5e85ed8f..8284a22e9 100644 --- a/pipeline/doif/ts_cmp_op.go +++ b/pipeline/doif/ts_cmp_op.go @@ -133,8 +133,8 @@ func (n *tsCmpOpNode) startUpdater() { } } -func (n *tsCmpOpNode) Type() NodeType { - return NodeTimestampCmpOp +func (n *tsCmpOpNode) Type() nodeType { + return nodeTimestampCmpOp } func (n *tsCmpOpNode) Check(eventRoot *insaneJSON.Root) bool { From 6ce88a5bc52d457735f48d0ca2f07b82818aff7c Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Thu, 3 Jul 2025 20:40:42 +0300 Subject: [PATCH 03/75] Make methods private --- pipeline/doif/check_type_op.go | 2 +- pipeline/doif/check_type_test.go | 4 ++-- pipeline/doif/do_if.go | 4 ++-- pipeline/doif/field_op.go | 2 +- pipeline/doif/len_cmp_op.go | 2 +- pipeline/doif/logical_op.go | 8 ++++---- pipeline/doif/ts_cmp_op.go | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pipeline/doif/check_type_op.go b/pipeline/doif/check_type_op.go index a6cdf79e1..baa054e79 100644 --- a/pipeline/doif/check_type_op.go +++ b/pipeline/doif/check_type_op.go @@ -154,7 +154,7 @@ func (n *checkTypeOpNode) Type() nodeType { return nodeCheckTypeOp } -func (n *checkTypeOpNode) Check(eventRoot *insaneJSON.Root) bool { +func (n *checkTypeOpNode) check(eventRoot *insaneJSON.Root) bool { node := eventRoot.Dig(n.fieldPath...) for _, checkFn := range n.checkTypeFns { if checkFn(node) { diff --git a/pipeline/doif/check_type_test.go b/pipeline/doif/check_type_test.go index e1a8b1bab..94d15357c 100644 --- a/pipeline/doif/check_type_test.go +++ b/pipeline/doif/check_type_test.go @@ -213,7 +213,7 @@ func TestCheckType(t *testing.T) { eventRoot, err = insaneJSON.DecodeString(d.eventStr) require.NoError(t, err) } - got := node.Check(eventRoot) + got := node.check(eventRoot) assert.Equal(t, d.want, got, "invalid result for event %q", d.eventStr) } }) @@ -366,7 +366,7 @@ func TestCheckTypeDuplicateValues(t *testing.T) { eventStr := logsMap[d.checkType] eventRoot, err := insaneJSON.DecodeString(eventStr) require.NoError(t, err, "must be no error on decode checkEvent") - got := ctnode.Check(eventRoot) + got := ctnode.check(eventRoot) assert.Equal(t, d.want, got, "invalid result for check %d of type %q", i, d.checkType) } }) diff --git a/pipeline/doif/do_if.go b/pipeline/doif/do_if.go index 44f174b4e..91ba2f9c0 100644 --- a/pipeline/doif/do_if.go +++ b/pipeline/doif/do_if.go @@ -30,7 +30,7 @@ const ( type Node interface { Type() nodeType - Check(*insaneJSON.Root) bool + check(*insaneJSON.Root) bool isEqualTo(Node, int) error } @@ -52,5 +52,5 @@ func (c *Checker) Check(eventRoot *insaneJSON.Root) bool { if eventRoot == nil { return false } - return c.root.Check(eventRoot) + return c.root.check(eventRoot) } diff --git a/pipeline/doif/field_op.go b/pipeline/doif/field_op.go index 7f337fffd..942e8c38c 100644 --- a/pipeline/doif/field_op.go +++ b/pipeline/doif/field_op.go @@ -284,7 +284,7 @@ func (n *fieldOpNode) Type() nodeType { return nodeFieldOp } -func (n *fieldOpNode) Check(eventRoot *insaneJSON.Root) bool { +func (n *fieldOpNode) check(eventRoot *insaneJSON.Root) bool { var data []byte node := eventRoot.Dig(n.fieldPath...) if node.IsArray() || node.IsObject() { diff --git a/pipeline/doif/len_cmp_op.go b/pipeline/doif/len_cmp_op.go index 67dc6ba34..d8c729de2 100644 --- a/pipeline/doif/len_cmp_op.go +++ b/pipeline/doif/len_cmp_op.go @@ -174,7 +174,7 @@ func getNodeBytesSize(node *insaneJSON.Node) int { return size } -func (n *lenCmpOpNode) Check(eventRoot *insaneJSON.Root) bool { +func (n *lenCmpOpNode) check(eventRoot *insaneJSON.Root) bool { value := 0 switch n.lenCmpOp { diff --git a/pipeline/doif/logical_op.go b/pipeline/doif/logical_op.go index 72feaeca7..b2c9e51cd 100644 --- a/pipeline/doif/logical_op.go +++ b/pipeline/doif/logical_op.go @@ -179,24 +179,24 @@ func (n *logicalNode) Type() nodeType { return nodeLogicalOp } -func (n *logicalNode) Check(eventRoot *insaneJSON.Root) bool { +func (n *logicalNode) check(eventRoot *insaneJSON.Root) bool { switch n.op { case logicalOr: for _, op := range n.operands { - if op.Check(eventRoot) { + if op.check(eventRoot) { return true } } return false case logicalAnd: for _, op := range n.operands { - if !op.Check(eventRoot) { + if !op.check(eventRoot) { return false } } return true case logicalNot: - return !n.operands[0].Check(eventRoot) + return !n.operands[0].check(eventRoot) } return false } diff --git a/pipeline/doif/ts_cmp_op.go b/pipeline/doif/ts_cmp_op.go index 8284a22e9..6f830cbcd 100644 --- a/pipeline/doif/ts_cmp_op.go +++ b/pipeline/doif/ts_cmp_op.go @@ -137,7 +137,7 @@ func (n *tsCmpOpNode) Type() nodeType { return nodeTimestampCmpOp } -func (n *tsCmpOpNode) Check(eventRoot *insaneJSON.Root) bool { +func (n *tsCmpOpNode) check(eventRoot *insaneJSON.Root) bool { node := eventRoot.Dig(n.fieldPath...) if node == nil { return false From 2f2f885b6c23dc35cd3aad6e2c9bff2d18b402a1 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Thu, 3 Jul 2025 22:28:54 +0300 Subject: [PATCH 04/75] Restore doc --- pipeline/doif/README.md | 19 +++++++++++++++++++ pipeline/doif/check_type_op.go | 2 +- pipeline/doif/do_if.go | 12 ++++++------ pipeline/doif/do_if_test.go | 8 ++++---- pipeline/doif/field_op.go | 2 +- pipeline/doif/len_cmp_op.go | 2 +- pipeline/doif/logical_op.go | 2 +- pipeline/doif/ts_cmp_op.go | 2 +- 8 files changed, 34 insertions(+), 15 deletions(-) diff --git a/pipeline/doif/README.md b/pipeline/doif/README.md index eb9b47605..759545365 100755 --- a/pipeline/doif/README.md +++ b/pipeline/doif/README.md @@ -6,6 +6,25 @@ When Do If Checker's Match func is called it calls to the root Match func and th the chain of Match func calls are performed across the whole tree. ## Node types +**`FieldOp`** Type of node where matching rules for fields are stored. + +
+ +**`LengthCmpOp`** Type of node where matching rules for byte length and array length are stored. + +
+ +**`TimestampCmpOp`** Type of node where matching rules for timestamps are stored. + +
+ +**`CheckTypeOp`** Type of node where matching rules for check types are stored. + +
+ +**`LogicalOp`** Type of node where logical rules for applying other rules are stored. + +
## Field op node diff --git a/pipeline/doif/check_type_op.go b/pipeline/doif/check_type_op.go index baa054e79..463526616 100644 --- a/pipeline/doif/check_type_op.go +++ b/pipeline/doif/check_type_op.go @@ -151,7 +151,7 @@ func newCheckTypeOpNode(field string, values [][]byte) (Node, error) { } func (n *checkTypeOpNode) Type() nodeType { - return nodeCheckTypeOp + return NodeCheckTypeOp } func (n *checkTypeOpNode) check(eventRoot *insaneJSON.Root) bool { diff --git a/pipeline/doif/do_if.go b/pipeline/doif/do_if.go index 91ba2f9c0..f2103bd51 100644 --- a/pipeline/doif/do_if.go +++ b/pipeline/doif/do_if.go @@ -10,22 +10,22 @@ import ( type nodeType int const ( - nodeUnknownType nodeType = iota + NodeUnknownType nodeType = iota // > Type of node where matching rules for fields are stored. - nodeFieldOp // * + NodeFieldOp // * // > Type of node where matching rules for byte length and array length are stored. - nodeLengthCmpOp // * + NodeLengthCmpOp // * // > Type of node where matching rules for timestamps are stored. - nodeTimestampCmpOp // * + NodeTimestampCmpOp // * // > Type of node where matching rules for check types are stored. - nodeCheckTypeOp // * + NodeCheckTypeOp // * // > Type of node where logical rules for applying other rules are stored. - nodeLogicalOp // * + NodeLogicalOp // * ) type Node interface { diff --git a/pipeline/doif/do_if_test.go b/pipeline/doif/do_if_test.go index 124927928..79b5438e7 100644 --- a/pipeline/doif/do_if_test.go +++ b/pipeline/doif/do_if_test.go @@ -77,7 +77,7 @@ func buildTree(node treeNode) (Node, error) { func checkNode(t *testing.T, want, got Node) { require.Equal(t, want.Type(), got.Type()) switch want.Type() { - case nodeFieldOp: + case NodeFieldOp: wantNode := want.(*fieldOpNode) gotNode := got.(*fieldOpNode) assert.Equal(t, wantNode.op, gotNode.op) @@ -111,7 +111,7 @@ func checkNode(t *testing.T, want, got Node) { } assert.Equal(t, wantNode.minValLen, gotNode.minValLen) assert.Equal(t, wantNode.maxValLen, gotNode.maxValLen) - case nodeLogicalOp: + case NodeLogicalOp: wantNode := want.(*logicalNode) gotNode := got.(*logicalNode) assert.Equal(t, wantNode.op, gotNode.op) @@ -119,14 +119,14 @@ func checkNode(t *testing.T, want, got Node) { for i := 0; i < len(wantNode.operands); i++ { checkNode(t, wantNode.operands[i], gotNode.operands[i]) } - case nodeLengthCmpOp: + case NodeLengthCmpOp: wantNode := want.(*lenCmpOpNode) gotNode := got.(*lenCmpOpNode) assert.Equal(t, wantNode.lenCmpOp, gotNode.lenCmpOp) assert.Equal(t, wantNode.cmpValue, gotNode.cmpValue) assert.Equal(t, wantNode.cmpOp, gotNode.cmpOp) assert.Equal(t, 0, slices.Compare[[]string](wantNode.fieldPath, gotNode.fieldPath)) - case nodeTimestampCmpOp: + case NodeTimestampCmpOp: wantNode := want.(*tsCmpOpNode) gotNode := got.(*tsCmpOpNode) assert.Equal(t, wantNode.format, gotNode.format) diff --git a/pipeline/doif/field_op.go b/pipeline/doif/field_op.go index 942e8c38c..a18d3b8c8 100644 --- a/pipeline/doif/field_op.go +++ b/pipeline/doif/field_op.go @@ -281,7 +281,7 @@ func newFieldOpNode(op string, field string, caseSensitive bool, values [][]byte } func (n *fieldOpNode) Type() nodeType { - return nodeFieldOp + return NodeFieldOp } func (n *fieldOpNode) check(eventRoot *insaneJSON.Root) bool { diff --git a/pipeline/doif/len_cmp_op.go b/pipeline/doif/len_cmp_op.go index d8c729de2..12af46fcb 100644 --- a/pipeline/doif/len_cmp_op.go +++ b/pipeline/doif/len_cmp_op.go @@ -126,7 +126,7 @@ func newLenCmpOpNode(op string, field string, cmpOp string, cmpValue int) (Node, } func (n *lenCmpOpNode) Type() nodeType { - return nodeLengthCmpOp + return NodeLengthCmpOp } func getNodeFieldsBytesSize(node *insaneJSON.Node) int { diff --git a/pipeline/doif/logical_op.go b/pipeline/doif/logical_op.go index b2c9e51cd..2ac198225 100644 --- a/pipeline/doif/logical_op.go +++ b/pipeline/doif/logical_op.go @@ -176,7 +176,7 @@ func newLogicalNode(op string, operands []Node) (Node, error) { } func (n *logicalNode) Type() nodeType { - return nodeLogicalOp + return NodeLogicalOp } func (n *logicalNode) check(eventRoot *insaneJSON.Root) bool { diff --git a/pipeline/doif/ts_cmp_op.go b/pipeline/doif/ts_cmp_op.go index 6f830cbcd..acf40ecf9 100644 --- a/pipeline/doif/ts_cmp_op.go +++ b/pipeline/doif/ts_cmp_op.go @@ -134,7 +134,7 @@ func (n *tsCmpOpNode) startUpdater() { } func (n *tsCmpOpNode) Type() nodeType { - return nodeTimestampCmpOp + return NodeTimestampCmpOp } func (n *tsCmpOpNode) check(eventRoot *insaneJSON.Root) bool { From 9def8ebf46bae2e1188dc2733bc46c3b2447c92b Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Thu, 3 Jul 2025 22:42:53 +0300 Subject: [PATCH 05/75] Rename package --- fd/util.go | 6 +++--- pipeline/{doif => do_if}/README.idoc.md | 0 pipeline/{doif => do_if}/README.md | 0 pipeline/{doif => do_if}/check_type_op.go | 2 +- pipeline/{doif => do_if}/check_type_test.go | 2 +- pipeline/{doif => do_if}/comparator.go | 2 +- pipeline/{doif => do_if}/ctor.go | 2 +- pipeline/{doif => do_if}/ctor_test.go | 2 +- pipeline/{doif => do_if}/ctor_utils.go | 2 +- pipeline/{doif => do_if}/do_if.go | 2 +- pipeline/{doif => do_if}/do_if_test.go | 2 +- pipeline/{doif => do_if}/field_op.go | 2 +- pipeline/{doif => do_if}/len_cmp_op.go | 2 +- pipeline/{doif => do_if}/logical_op.go | 2 +- pipeline/{doif => do_if}/ts_cmp_op.go | 2 +- pipeline/plugin.go | 4 ++-- plugin/action/mask/mask.go | 6 +++--- 17 files changed, 20 insertions(+), 20 deletions(-) rename pipeline/{doif => do_if}/README.idoc.md (100%) rename pipeline/{doif => do_if}/README.md (100%) rename pipeline/{doif => do_if}/check_type_op.go (99%) rename pipeline/{doif => do_if}/check_type_test.go (99%) rename pipeline/{doif => do_if}/comparator.go (98%) rename pipeline/{doif => do_if}/ctor.go (99%) rename pipeline/{doif => do_if}/ctor_test.go (99%) rename pipeline/{doif => do_if}/ctor_utils.go (98%) rename pipeline/{doif => do_if}/do_if.go (98%) rename pipeline/{doif => do_if}/do_if_test.go (99%) rename pipeline/{doif => do_if}/field_op.go (99%) rename pipeline/{doif => do_if}/len_cmp_op.go (99%) rename pipeline/{doif => do_if}/logical_op.go (99%) rename pipeline/{doif => do_if}/ts_cmp_op.go (99%) diff --git a/fd/util.go b/fd/util.go index 38c3ecccd..d138a98ea 100644 --- a/fd/util.go +++ b/fd/util.go @@ -11,7 +11,7 @@ import ( "github.com/ozontech/file.d/logger" "github.com/ozontech/file.d/pipeline" "github.com/ozontech/file.d/pipeline/antispam" - "github.com/ozontech/file.d/pipeline/doif" + "github.com/ozontech/file.d/pipeline/do_if" ) func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings { @@ -219,13 +219,13 @@ func extractMetrics(actionJSON *simplejson.Json) (string, []string, bool) { return metricName, metricLabels, skipStatus } -func extractDoIfChecker(actionJSON *simplejson.Json) (*doif.Checker, error) { +func extractDoIfChecker(actionJSON *simplejson.Json) (*do_if.Checker, error) { m := actionJSON.MustMap() if m == nil { return nil, nil } - return doif.NewFromMap(m) + return do_if.NewFromMap(m) } func makeActionJSON(actionJSON *simplejson.Json) []byte { diff --git a/pipeline/doif/README.idoc.md b/pipeline/do_if/README.idoc.md similarity index 100% rename from pipeline/doif/README.idoc.md rename to pipeline/do_if/README.idoc.md diff --git a/pipeline/doif/README.md b/pipeline/do_if/README.md similarity index 100% rename from pipeline/doif/README.md rename to pipeline/do_if/README.md diff --git a/pipeline/doif/check_type_op.go b/pipeline/do_if/check_type_op.go similarity index 99% rename from pipeline/doif/check_type_op.go rename to pipeline/do_if/check_type_op.go index 463526616..5b3a2607e 100644 --- a/pipeline/doif/check_type_op.go +++ b/pipeline/do_if/check_type_op.go @@ -1,4 +1,4 @@ -package doif +package do_if import ( "errors" diff --git a/pipeline/doif/check_type_test.go b/pipeline/do_if/check_type_test.go similarity index 99% rename from pipeline/doif/check_type_test.go rename to pipeline/do_if/check_type_test.go index 94d15357c..b06851db5 100644 --- a/pipeline/doif/check_type_test.go +++ b/pipeline/do_if/check_type_test.go @@ -1,4 +1,4 @@ -package doif +package do_if import ( "testing" diff --git a/pipeline/doif/comparator.go b/pipeline/do_if/comparator.go similarity index 98% rename from pipeline/doif/comparator.go rename to pipeline/do_if/comparator.go index a16e90844..7a35a4e5b 100644 --- a/pipeline/doif/comparator.go +++ b/pipeline/do_if/comparator.go @@ -1,4 +1,4 @@ -package doif +package do_if import "fmt" diff --git a/pipeline/doif/ctor.go b/pipeline/do_if/ctor.go similarity index 99% rename from pipeline/doif/ctor.go rename to pipeline/do_if/ctor.go index 464795a31..290d999fb 100644 --- a/pipeline/doif/ctor.go +++ b/pipeline/do_if/ctor.go @@ -1,4 +1,4 @@ -package doif +package do_if import ( "errors" diff --git a/pipeline/doif/ctor_test.go b/pipeline/do_if/ctor_test.go similarity index 99% rename from pipeline/doif/ctor_test.go rename to pipeline/do_if/ctor_test.go index b8eb430c3..a69d4713d 100644 --- a/pipeline/doif/ctor_test.go +++ b/pipeline/do_if/ctor_test.go @@ -1,4 +1,4 @@ -package doif +package do_if import ( "bytes" diff --git a/pipeline/doif/ctor_utils.go b/pipeline/do_if/ctor_utils.go similarity index 98% rename from pipeline/doif/ctor_utils.go rename to pipeline/do_if/ctor_utils.go index ace56edc2..6d7772e86 100644 --- a/pipeline/doif/ctor_utils.go +++ b/pipeline/do_if/ctor_utils.go @@ -1,4 +1,4 @@ -package doif +package do_if import ( "encoding/json" diff --git a/pipeline/doif/do_if.go b/pipeline/do_if/do_if.go similarity index 98% rename from pipeline/doif/do_if.go rename to pipeline/do_if/do_if.go index f2103bd51..95590a927 100644 --- a/pipeline/doif/do_if.go +++ b/pipeline/do_if/do_if.go @@ -1,4 +1,4 @@ -package doif +package do_if import ( insaneJSON "github.com/ozontech/insane-json" diff --git a/pipeline/doif/do_if_test.go b/pipeline/do_if/do_if_test.go similarity index 99% rename from pipeline/doif/do_if_test.go rename to pipeline/do_if/do_if_test.go index 79b5438e7..8cfeb897e 100644 --- a/pipeline/doif/do_if_test.go +++ b/pipeline/do_if/do_if_test.go @@ -1,4 +1,4 @@ -package doif +package do_if import ( "errors" diff --git a/pipeline/doif/field_op.go b/pipeline/do_if/field_op.go similarity index 99% rename from pipeline/doif/field_op.go rename to pipeline/do_if/field_op.go index a18d3b8c8..636a82f79 100644 --- a/pipeline/doif/field_op.go +++ b/pipeline/do_if/field_op.go @@ -1,4 +1,4 @@ -package doif +package do_if import ( "bytes" diff --git a/pipeline/doif/len_cmp_op.go b/pipeline/do_if/len_cmp_op.go similarity index 99% rename from pipeline/doif/len_cmp_op.go rename to pipeline/do_if/len_cmp_op.go index 12af46fcb..b0971df99 100644 --- a/pipeline/doif/len_cmp_op.go +++ b/pipeline/do_if/len_cmp_op.go @@ -1,4 +1,4 @@ -package doif +package do_if import ( "errors" diff --git a/pipeline/doif/logical_op.go b/pipeline/do_if/logical_op.go similarity index 99% rename from pipeline/doif/logical_op.go rename to pipeline/do_if/logical_op.go index 2ac198225..2fbdb885a 100644 --- a/pipeline/doif/logical_op.go +++ b/pipeline/do_if/logical_op.go @@ -1,4 +1,4 @@ -package doif +package do_if import ( "errors" diff --git a/pipeline/doif/ts_cmp_op.go b/pipeline/do_if/ts_cmp_op.go similarity index 99% rename from pipeline/doif/ts_cmp_op.go rename to pipeline/do_if/ts_cmp_op.go index acf40ecf9..192e468e0 100644 --- a/pipeline/doif/ts_cmp_op.go +++ b/pipeline/do_if/ts_cmp_op.go @@ -1,4 +1,4 @@ -package doif +package do_if import ( "errors" diff --git a/pipeline/plugin.go b/pipeline/plugin.go index 3e0a7c749..13eb087ac 100644 --- a/pipeline/plugin.go +++ b/pipeline/plugin.go @@ -6,7 +6,7 @@ import ( "strings" "github.com/ozontech/file.d/metric" - "github.com/ozontech/file.d/pipeline/doif" + "github.com/ozontech/file.d/pipeline/do_if" "go.uber.org/zap" ) @@ -99,7 +99,7 @@ type ActionPluginStaticInfo struct { MatchMode MatchMode MatchInvert bool - DoIfChecker *doif.Checker + DoIfChecker *do_if.Checker } type ActionPluginInfo struct { diff --git a/plugin/action/mask/mask.go b/plugin/action/mask/mask.go index af134ebdd..2d628b322 100644 --- a/plugin/action/mask/mask.go +++ b/plugin/action/mask/mask.go @@ -11,7 +11,7 @@ import ( "github.com/ozontech/file.d/fd" "github.com/ozontech/file.d/metric" "github.com/ozontech/file.d/pipeline" - "github.com/ozontech/file.d/pipeline/doif" + "github.com/ozontech/file.d/pipeline/do_if" insaneJSON "github.com/ozontech/insane-json" "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" @@ -160,7 +160,7 @@ type Mask struct { mode mode DoIfCheckerMap map[string]any `json:"do_if"` - DoIfChecker *doif.Checker + DoIfChecker *do_if.Checker use bool @@ -249,7 +249,7 @@ func compileMask(m *Mask, logger *zap.Logger) { if m.DoIfCheckerMap != nil { var err error - m.DoIfChecker, err = doif.NewFromMap(m.DoIfCheckerMap) + m.DoIfChecker, err = do_if.NewFromMap(m.DoIfCheckerMap) if err != nil { logger.Fatal("can't init do_if for mask", zap.Error(err)) } From 1ae7a486ce47b1ea474aaabbc2448563fc086d19 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Fri, 4 Jul 2025 17:35:01 +0300 Subject: [PATCH 06/75] Add checker draft --- pipeline/checker/checker.go | 207 ++++++++++++++++++++++++++++++++++++ pipeline/do_if/field_op.go | 11 +- 2 files changed, 213 insertions(+), 5 deletions(-) create mode 100644 pipeline/checker/checker.go diff --git a/pipeline/checker/checker.go b/pipeline/checker/checker.go new file mode 100644 index 000000000..b075c0d00 --- /dev/null +++ b/pipeline/checker/checker.go @@ -0,0 +1,207 @@ +package checker + +import ( + "bytes" + "errors" + "fmt" + "regexp" +) + +type op int + +const ( + opEqual op = iota + opContains + opPrefix + opSuffix + opRegex +) + +func opToString(op op) string { + switch op { + case opEqual: + return "equal" + case opContains: + return "contains" + case opPrefix: + return "prefix" + case opSuffix: + return "suffix" + case opRegex: + return "regex" + default: + return "unknown" + } +} + +const ( + OpEqualTag = "equal" + OpContainsTag = "contains" + OpPrefixTag = "prefix" + OpSuffixTag = "suffix" + OpRegexTag = "regex" +) + +func stringToOp(s string) (op, error) { + switch s { + case OpEqualTag: + return opEqual, nil + case OpContainsTag: + return opContains, nil + case OpPrefixTag: + return opPrefix, nil + case OpSuffixTag: + return opSuffix, nil + case OpRegexTag: + return opRegex, nil + default: + return -1, fmt.Errorf("unknown field op %q", s) + } +} + +type Checker struct { + op op + caseSensitive bool + values [][]byte + valuesBySize map[int][][]byte + reValues []*regexp.Regexp + + minValLen int + maxValLen int +} + +func New(opTag string, caseSensitive bool, values [][]byte) (*Checker, error) { + if len(values) == 0 { + return nil, errors.New("values are not provided") + } + + var vals [][]byte + var valsBySize map[int][][]byte + var reValues []*regexp.Regexp + var minValLen, maxValLen int + + curOp, err := stringToOp(opTag) + if err != nil { + return nil, err + } + + if curOp == opRegex { + reValues = make([]*regexp.Regexp, 0, len(values)) + for _, v := range values { + re, err := regexp.Compile(string(v)) + if err != nil { + return nil, fmt.Errorf("failed to compile regex %q: %w", v, err) + } + reValues = append(reValues, re) + } + } else { + minValLen = len(values[0]) + maxValLen = len(values[0]) + if curOp == opEqual { + valsBySize = make(map[int][][]byte) + } else { + vals = make([][]byte, len(values)) + } + for i := range values { + var curVal []byte + if values[i] != nil { + curVal = make([]byte, len(values[i])) + copy(curVal, values[i]) + } + if !caseSensitive && curVal != nil { + curVal = bytes.ToLower(curVal) + } + if len(values[i]) < minValLen { + minValLen = len(values[i]) + } + if len(values[i]) > maxValLen { + maxValLen = len(values[i]) + } + if curOp == opEqual { + valsBySize[len(curVal)] = append(valsBySize[len(curVal)], curVal) + } else { + vals[i] = curVal + } + } + } + + return &Checker{ + op: curOp, + caseSensitive: caseSensitive, + values: vals, + valuesBySize: valsBySize, + reValues: reValues, + minValLen: minValLen, + maxValLen: maxValLen, + }, nil +} + +func (n *Checker) Check(data []byte) bool { + // fast check for data + if n.op != opRegex && len(data) < n.minValLen { + return false + } + + switch n.op { + case opEqual: + vals, ok := n.valuesBySize[len(data)] + if !ok { + return false + } + if !n.caseSensitive && data != nil { + data = bytes.ToLower(data) + } + for _, val := range vals { + // null and empty strings are considered as different values + // null can also come if field value is absent + if (data == nil && val != nil) || (data != nil && val == nil) { + continue + } + if bytes.Equal(data, val) { + return true + } + } + case opContains: + if !n.caseSensitive { + data = bytes.ToLower(data) + } + for _, val := range n.values { + if bytes.Contains(data, val) { + return true + } + } + case opPrefix: + // check only necessary amount of bytes + if len(data) > n.maxValLen { + data = data[:n.maxValLen] + } + if !n.caseSensitive { + data = bytes.ToLower(data) + } + for _, val := range n.values { + if bytes.HasPrefix(data, val) { + return true + } + } + case opSuffix: + // check only necessary amount of bytes + if len(data) > n.maxValLen { + data = data[len(data)-n.maxValLen:] + } + if !n.caseSensitive { + data = bytes.ToLower(data) + } + for _, val := range n.values { + if bytes.HasSuffix(data, val) { + return true + } + } + case opRegex: + for _, re := range n.reValues { + if re.Match(data) { + return true + } + } + } + return false +} diff --git a/pipeline/do_if/field_op.go b/pipeline/do_if/field_op.go index 636a82f79..a604686aa 100644 --- a/pipeline/do_if/field_op.go +++ b/pipeline/do_if/field_op.go @@ -8,6 +8,7 @@ import ( "slices" "github.com/ozontech/file.d/cfg" + "github.com/ozontech/file.d/pipeline/checker" insaneJSON "github.com/ozontech/insane-json" ) @@ -64,7 +65,7 @@ const ( // > {"pod":"test-pod","service":"test-service"} # not discarded // > {"pod":"test-pod","service":"test-service-1"} # not discarded // > ``` - fieldEqualOpTag = "equal" // * + fieldEqualOpTag = checker.OpEqualTag // * // > checks whether the field value contains one of the elements the in values list. // > @@ -87,7 +88,7 @@ const ( // > {"pod":"my-test-pod","service":"test-service"} # discarded // > {"pod":"test-pod","service":"test-service-1"} # not discarded // > ``` - fieldContainsOpTag = "contains" // * + fieldContainsOpTag = checker.OpContainsTag // * // > checks whether the field value has prefix equal to one of the elements in the values list. // > @@ -110,7 +111,7 @@ const ( // > {"pod":"test-pod","service":"test-service"} # not discarded // > {"pod":"test-pod","service":"test-service-1"} # not discarded // > ``` - fieldPrefixOpTag = "prefix" // * + fieldPrefixOpTag = checker.OpPrefixTag // * // > checks whether the field value has suffix equal to one of the elements in the values list. // > @@ -133,7 +134,7 @@ const ( // > {"pod":"test-pod","service":"test-service"} # not discarded // > {"pod":"test-pod","service":"test-service-1"} # not discarded // > ``` - fieldSuffixOpTag = "suffix" // * + fieldSuffixOpTag = checker.OpSuffixTag // * // > checks whether the field matches any regex from the values list. // > @@ -158,7 +159,7 @@ const ( // > {"pod":"my-test-instance","service":"test-service-1"} # discarded // > {"pod":"service123","service":"test-service-1"} # not discarded // > ``` - fieldRegexOpTag = "regex" // * + fieldRegexOpTag = checker.OpRegexTag // * ) /*{ do-if-field-op-node From 96537b45b47047b94ebf1ee8de14aaa84783844d Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Fri, 4 Jul 2025 23:37:47 +0300 Subject: [PATCH 07/75] Add checkers cmp --- pipeline/checker/checker.go | 49 +++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/pipeline/checker/checker.go b/pipeline/checker/checker.go index b075c0d00..3fa55f67e 100644 --- a/pipeline/checker/checker.go +++ b/pipeline/checker/checker.go @@ -205,3 +205,52 @@ func (n *Checker) Check(data []byte) bool { } return false } + +func Assert(b bool, msg string) { + if !b { + panic(msg) + } +} + +func AssertEqual[T comparable](a, b T, msg string) { + Assert(a == b, fmt.Sprintf("%s: %v != %v", msg, a, b)) +} + +func AssertEqualValues(a, b [][]byte, msg string) { + AssertEqual(len(a), len(b), fmt.Sprintf("%s: different values count", msg)) + for i := range a { + Assert( + bytes.Equal(a[i], b[i]), + fmt.Sprintf("%s: different values at pos %d: %s != %s", + msg, i, a[i], b[i], + ), + ) + } +} + +func Equal(a, b *Checker) (err error) { + defer func() { + if r := recover(); r != nil { + err = errors.New(r.(string)) + } + }() + + AssertEqual(a.op, b.op, "different op") + AssertEqual(a.caseSensitive, b.caseSensitive, "different case_sensitive") + AssertEqualValues(a.values, b.values, "different values") + + AssertEqual(len(a.valuesBySize), len(b.valuesBySize), "different valuesBySize len") + for size := range a.valuesBySize { + _, found := b.valuesBySize[size] + Assert(found, fmt.Sprintf("not found values by size %d", size)) + AssertEqualValues( + a.valuesBySize[size], b.valuesBySize[size], + fmt.Sprintf("different values by size %d", size), + ) + } + + AssertEqual(a.minValLen, b.minValLen, "different min value len") + AssertEqual(a.maxValLen, b.maxValLen, "different max value len") + + return +} From fa46937db5ca7eaeb43b2de7497c7cb1c0d89d71 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Sat, 5 Jul 2025 00:07:40 +0300 Subject: [PATCH 08/75] Fix --- pipeline/checker/checker.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pipeline/checker/checker.go b/pipeline/checker/checker.go index 3fa55f67e..5ca81f21d 100644 --- a/pipeline/checker/checker.go +++ b/pipeline/checker/checker.go @@ -17,18 +17,18 @@ const ( opRegex ) -func opToString(op op) string { +func (op op) String() string { switch op { case opEqual: - return "equal" + return OpEqualTag case opContains: - return "contains" + return OpContainsTag case opPrefix: - return "prefix" + return OpPrefixTag case opSuffix: - return "suffix" + return OpSuffixTag case opRegex: - return "regex" + return OpRegexTag default: return "unknown" } From 953178b342efbda1458331333bf9aa6d4c4cd20d Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Sat, 5 Jul 2025 00:59:55 +0300 Subject: [PATCH 09/75] Use new string checker. Fix cmp func --- pipeline/checker/checker.go | 8 ++ pipeline/do_if/do_if_test.go | 134 ++++++-------------- pipeline/do_if/field_op.go | 239 +++-------------------------------- 3 files changed, 63 insertions(+), 318 deletions(-) diff --git a/pipeline/checker/checker.go b/pipeline/checker/checker.go index 5ca81f21d..d9bab76f4 100644 --- a/pipeline/checker/checker.go +++ b/pipeline/checker/checker.go @@ -249,6 +249,14 @@ func Equal(a, b *Checker) (err error) { ) } + AssertEqual(len(a.reValues), len(b.reValues), "different regex values count") + for i := range a.reValues { + AssertEqual( + a.reValues[i].String(), b.reValues[i].String(), + fmt.Sprintf("different regex values at pos %d", i), + ) + } + AssertEqual(a.minValLen, b.minValLen, "different min value len") AssertEqual(a.maxValLen, b.maxValLen, "different max value len") diff --git a/pipeline/do_if/do_if_test.go b/pipeline/do_if/do_if_test.go index 8cfeb897e..cc2835302 100644 --- a/pipeline/do_if/do_if_test.go +++ b/pipeline/do_if/do_if_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "github.com/ozontech/file.d/pipeline/checker" insaneJSON "github.com/ozontech/insane-json" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -80,37 +81,9 @@ func checkNode(t *testing.T, want, got Node) { case NodeFieldOp: wantNode := want.(*fieldOpNode) gotNode := got.(*fieldOpNode) - assert.Equal(t, wantNode.op, gotNode.op) assert.Equal(t, 0, slices.Compare[[]string](wantNode.fieldPath, gotNode.fieldPath)) assert.Equal(t, wantNode.fieldPathStr, gotNode.fieldPathStr) - assert.Equal(t, wantNode.caseSensitive, gotNode.caseSensitive) - if wantNode.values == nil { - assert.Equal(t, wantNode.values, gotNode.values) - } else { - require.Equal(t, len(wantNode.values), len(gotNode.values)) - for i := 0; i < len(wantNode.values); i++ { - wantValues := wantNode.values[i] - gotValues := gotNode.values[i] - assert.Equal(t, 0, slices.Compare[[]byte](wantValues, gotValues)) - } - } - if wantNode.valuesBySize == nil { - assert.Equal(t, wantNode.valuesBySize, gotNode.valuesBySize) - } else { - require.Equal(t, len(wantNode.valuesBySize), len(gotNode.valuesBySize)) - for k, wantVals := range wantNode.valuesBySize { - gotVals, ok := gotNode.valuesBySize[k] - assert.True(t, ok, "values by key %d not present in got node", k) - if ok { - require.Equal(t, len(wantVals), len(gotVals)) - for i := 0; i < len(wantVals); i++ { - assert.Equal(t, 0, slices.Compare[[]byte](wantVals[i], gotVals[i])) - } - } - } - } - assert.Equal(t, wantNode.minValLen, gotNode.minValLen) - assert.Equal(t, wantNode.maxValLen, gotNode.maxValLen) + assert.NoError(t, checker.Equal(wantNode.checker, gotNode.checker)) case NodeLogicalOp: wantNode := want.(*logicalNode) gotNode := got.(*logicalNode) @@ -144,6 +117,14 @@ func checkNode(t *testing.T, want, got Node) { func TestBuildNodes(t *testing.T) { timestamp := time.Now() + mustNewChecker := func(op string, caseSensitive bool, values [][]byte) *checker.Checker { + c, err := checker.New(op, caseSensitive, values) + if err != nil { + panic(err) + } + return c + } + tests := []struct { name string tree treeNode @@ -159,25 +140,13 @@ func TestBuildNodes(t *testing.T) { values: [][]byte{[]byte(`test-111`), []byte(`test-2`), []byte(`test-3`), []byte(`test-12345`)}, }, want: &fieldOpNode{ - op: fieldEqualOp, - fieldPath: []string{"log", "pod"}, - fieldPathStr: "log.pod", - caseSensitive: true, - values: nil, - valuesBySize: map[int][][]byte{ - 6: [][]byte{ - []byte(`test-2`), - []byte(`test-3`), - }, - 8: [][]byte{ - []byte(`test-111`), - }, - 10: [][]byte{ - []byte(`test-12345`), - }, - }, - minValLen: 6, - maxValLen: 10, + fieldPath: []string{"log", "pod"}, + fieldPathStr: "log.pod", + checker: mustNewChecker( + "equal", + true, + [][]byte{[]byte(`test-111`), []byte(`test-2`), []byte(`test-3`), []byte(`test-12345`)}, + ), }, }, { @@ -189,25 +158,13 @@ func TestBuildNodes(t *testing.T) { values: [][]byte{[]byte(`TEST-111`), []byte(`Test-2`), []byte(`tesT-3`), []byte(`TeSt-12345`)}, }, want: &fieldOpNode{ - op: fieldEqualOp, - fieldPath: []string{"log", "pod"}, - fieldPathStr: "log.pod", - caseSensitive: false, - values: nil, - valuesBySize: map[int][][]byte{ - 6: [][]byte{ - []byte(`test-2`), - []byte(`test-3`), - }, - 8: [][]byte{ - []byte(`test-111`), - }, - 10: [][]byte{ - []byte(`test-12345`), - }, - }, - minValLen: 6, - maxValLen: 10, + fieldPath: []string{"log", "pod"}, + fieldPathStr: "log.pod", + checker: mustNewChecker( + "equal", + false, + [][]byte{[]byte(`TEST-111`), []byte(`Test-2`), []byte(`tesT-3`), []byte(`TeSt-12345`)}, + ), }, }, { @@ -233,37 +190,22 @@ func TestBuildNodes(t *testing.T) { op: logicalOr, operands: []Node{ &fieldOpNode{ - op: fieldEqualOp, - fieldPath: []string{"log", "pod"}, - fieldPathStr: "log.pod", - caseSensitive: true, - values: nil, - valuesBySize: map[int][][]byte{ - 6: [][]byte{ - []byte(`test-2`), - []byte(`test-3`), - }, - 8: [][]byte{ - []byte(`test-111`), - }, - 10: [][]byte{ - []byte(`test-12345`), - }, - }, - minValLen: 6, - maxValLen: 10, + fieldPath: []string{"log", "pod"}, + fieldPathStr: "log.pod", + checker: mustNewChecker( + "equal", + true, + [][]byte{[]byte(`test-111`), []byte(`test-2`), []byte(`test-3`), []byte(`test-12345`)}, + ), }, &fieldOpNode{ - op: fieldContainsOp, - fieldPath: []string{"service", "msg"}, - fieldPathStr: "service.msg", - caseSensitive: true, - values: [][]byte{ - []byte(`test-0987`), - []byte(`test-11`), - }, - minValLen: 7, - maxValLen: 9, + fieldPath: []string{"service", "msg"}, + fieldPathStr: "service.msg", + checker: mustNewChecker( + "contains", + true, + [][]byte{[]byte(`test-0987`), []byte(`test-11`)}, + ), }, }, }, diff --git a/pipeline/do_if/field_op.go b/pipeline/do_if/field_op.go index a604686aa..74567750b 100644 --- a/pipeline/do_if/field_op.go +++ b/pipeline/do_if/field_op.go @@ -1,10 +1,8 @@ package do_if import ( - "bytes" "errors" "fmt" - "regexp" "slices" "github.com/ozontech/file.d/cfg" @@ -15,34 +13,6 @@ import ( // ! do-if-field-op // ^ do-if-field-op -type fieldOpType int - -const ( - fieldUnknownOp fieldOpType = iota - fieldEqualOp - fieldContainsOp - fieldPrefixOp - fieldSuffixOp - fieldRegexOp -) - -func (t fieldOpType) String() string { - switch t { - case fieldEqualOp: - return "equal" - case fieldContainsOp: - return "contains" - case fieldPrefixOp: - return "prefix" - case fieldSuffixOp: - return "suffix" - case fieldRegexOp: - return "regex" - default: - return "unknown" - } -} - const ( // > checks whether the field value is equal to one of the elements in the values list. // > @@ -190,94 +160,25 @@ pipelines: }*/ type fieldOpNode struct { - op fieldOpType - fieldPath []string - fieldPathStr string - caseSensitive bool - values [][]byte - valuesBySize map[int][][]byte - reValues []*regexp.Regexp - - minValLen int - maxValLen int + fieldPath []string + fieldPathStr string + checker *checker.Checker } func newFieldOpNode(op string, field string, caseSensitive bool, values [][]byte) (Node, error) { if len(values) == 0 { return nil, errors.New("values are not provided") } - var vals [][]byte - var valsBySize map[int][][]byte - var reValues []*regexp.Regexp - var minValLen, maxValLen int - var fop fieldOpType - - fieldPath := cfg.ParseFieldSelector(field) - - switch op { - case fieldEqualOpTag: - fop = fieldEqualOp - case fieldContainsOpTag: - fop = fieldContainsOp - case fieldPrefixOpTag: - fop = fieldPrefixOp - case fieldSuffixOpTag: - fop = fieldSuffixOp - case fieldRegexOpTag: - fop = fieldRegexOp - reValues = make([]*regexp.Regexp, 0, len(values)) - for _, v := range values { - re, err := regexp.Compile(string(v)) - if err != nil { - return nil, fmt.Errorf("failed to compile regex %q: %w", v, err) - } - reValues = append(reValues, re) - } - default: - return nil, fmt.Errorf("unknown field op %q", op) - } - if fop != fieldRegexOp { - minValLen = len(values[0]) - maxValLen = len(values[0]) - if fop == fieldEqualOp { - valsBySize = make(map[int][][]byte) - } else { - vals = make([][]byte, len(values)) - } - for i := range values { - var curVal []byte - if values[i] != nil { - curVal = make([]byte, len(values[i])) - copy(curVal, values[i]) - } - if !caseSensitive && curVal != nil { - curVal = bytes.ToLower(curVal) - } - if len(values[i]) < minValLen { - minValLen = len(values[i]) - } - if len(values[i]) > maxValLen { - maxValLen = len(values[i]) - } - if fop == fieldEqualOp { - valsBySize[len(curVal)] = append(valsBySize[len(curVal)], curVal) - } else { - vals[i] = curVal - } - } + c, err := checker.New(op, caseSensitive, values) + if err != nil { + return nil, err } return &fieldOpNode{ - op: fop, - fieldPath: fieldPath, - fieldPathStr: field, - caseSensitive: caseSensitive, - values: vals, - valuesBySize: valsBySize, - reValues: reValues, - minValLen: minValLen, - maxValLen: maxValLen, + fieldPath: cfg.ParseFieldSelector(field), + fieldPathStr: field, + checker: c, }, nil } @@ -286,80 +187,16 @@ func (n *fieldOpNode) Type() nodeType { } func (n *fieldOpNode) check(eventRoot *insaneJSON.Root) bool { - var data []byte node := eventRoot.Dig(n.fieldPath...) if node.IsArray() || node.IsObject() { return false } - if !node.IsNull() { - data = node.AsBytes() - } - // fast check for data - if n.op != fieldRegexOp && len(data) < n.minValLen { - return false - } - switch n.op { - case fieldEqualOp: - vals, ok := n.valuesBySize[len(data)] - if !ok { - return false - } - if !n.caseSensitive && data != nil { - data = bytes.ToLower(data) - } - for _, val := range vals { - // null and empty strings are considered as different values - // null can also come if field value is absent - if (data == nil && val != nil) || (data != nil && val == nil) { - continue - } - if bytes.Equal(data, val) { - return true - } - } - case fieldContainsOp: - if !n.caseSensitive { - data = bytes.ToLower(data) - } - for _, val := range n.values { - if bytes.Contains(data, val) { - return true - } - } - case fieldPrefixOp: - // check only necessary amount of bytes - if len(data) > n.maxValLen { - data = data[:n.maxValLen] - } - if !n.caseSensitive { - data = bytes.ToLower(data) - } - for _, val := range n.values { - if bytes.HasPrefix(data, val) { - return true - } - } - case fieldSuffixOp: - // check only necessary amount of bytes - if len(data) > n.maxValLen { - data = data[len(data)-n.maxValLen:] - } - if !n.caseSensitive { - data = bytes.ToLower(data) - } - for _, val := range n.values { - if bytes.HasSuffix(data, val) { - return true - } - } - case fieldRegexOp: - for _, re := range n.reValues { - if re.Match(data) { - return true - } - } + + if node.IsNull() { + return n.checker.Check(nil) } - return false + + return n.checker.Check(node.AsBytes()) } func (n *fieldOpNode) isEqualTo(n2 Node, _ int) error { @@ -367,54 +204,12 @@ func (n *fieldOpNode) isEqualTo(n2 Node, _ int) error { if !ok { return errors.New("nodes have different types expected: fieldOpNode") } - if n.op != n2f.op { - return fmt.Errorf("nodes have different op expected: %q", n.op) - } - if n.caseSensitive != n2f.caseSensitive { - return fmt.Errorf("nodes have different caseSensitive expected: %v", n.caseSensitive) - } + if n.fieldPathStr != n2f.fieldPathStr || slices.Compare[[]string](n.fieldPath, n2f.fieldPath) != 0 { return fmt.Errorf("nodes have different fieldPathStr expected: fieldPathStr=%q fieldPath=%v", n.fieldPathStr, n.fieldPath, ) } - if len(n.values) != len(n2f.values) { - return fmt.Errorf("nodes have different values slices len expected: %d", len(n.values)) - } - for i := 0; i < len(n.values); i++ { - if !bytes.Equal(n.values[i], n2f.values[i]) { - return fmt.Errorf("nodes have different data in values expected: %v on position", n.values) - } - } - if len(n.valuesBySize) != len(n2f.valuesBySize) { - return fmt.Errorf("nodes have different valuesBySize len expected: %d", len(n.valuesBySize)) - } - for k, v := range n.valuesBySize { - if v2, has := n2f.valuesBySize[k]; !has { - return fmt.Errorf("nodes have different valuesBySize keys expected key: %d", k) - } else if len(v) != len(v2) { - return fmt.Errorf("nodes have different valuesBySize values len under key %d expected: %d", k, len(v)) - } else { - for i := 0; i < len(v); i++ { - if !bytes.Equal(v[i], v2[i]) { - return fmt.Errorf("nodes have different valuesBySize data under key %d: %v", k, v) - } - } - } - } - if len(n.reValues) != len(n2f.reValues) { - return fmt.Errorf("nodes have different reValues len expected: %d", len(n.reValues)) - } - for i := 0; i < len(n.reValues); i++ { - if n.reValues[i].String() != n2f.reValues[i].String() { - return fmt.Errorf("nodes have different reValues data expected: %v", n.reValues) - } - } - if n.minValLen != n2f.minValLen { - return fmt.Errorf("nodes have different minValLem expected: %d", n.minValLen) - } - if n.maxValLen != n2f.maxValLen { - return fmt.Errorf("nodes have different maxValLem expected: %d", n.maxValLen) - } - return nil + + return checker.Equal(n.checker, n2f.checker) } From e34dcf0bb113b808ead51562f93410c288c55b05 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 7 Jul 2025 14:46:54 +0300 Subject: [PATCH 10/75] Fix linter errors and doc --- Insanedocfile | 4 +- pipeline/checker/checker.go | 2 +- pipeline/do_if/README.idoc.md | 2 +- pipeline/do_if/README.md | 30 ++--- pipeline/do_if/field_op.go | 235 +++++++++++++++++----------------- 5 files changed, 127 insertions(+), 146 deletions(-) diff --git a/Insanedocfile b/Insanedocfile index 65db2b5f8..b6f18e50c 100644 --- a/Insanedocfile +++ b/Insanedocfile @@ -3,21 +3,19 @@ extractors: fn-list: '"fn-list" #4 /Plugin\)\s(.+)\s{/' match-modes: '"match-modes" /MatchMode(.*),/ /\"(.*)\"/' do-if-node: '"do-if-node" /Node(\w+)\s/' - do-if-field-op: '"do-if-field-op" /field(\w+)OpTag\s/' do-if-logical-op: '"do-if-logical-op" /logical(\w+)Tag\s/' decorators: config-params: '_ _ /*`%s`* / /*`default=%s`* / /*`%s`* / /*`options=%s`* /' fn-list: '_ _ /`%s`/' match-modes: '_ /%s/ /`match_mode: %s`/' do-if-node: '_ /%s/' - do-if-field-op: '_ /%s/' do-if-logical-op: '_ /%s/' templates: - template: docs/*.idoc.md files: ["../pipeline/*.go"] - template: pipeline/*.idoc.md files: ["*.go"] - - template: pipeline/doif/*.idoc.md + - template: pipeline/do_if/*.idoc.md files: ["*.go"] - template: plugin/*/*/README.idoc.md files: ["*.go"] diff --git a/pipeline/checker/checker.go b/pipeline/checker/checker.go index d9bab76f4..124b4208f 100644 --- a/pipeline/checker/checker.go +++ b/pipeline/checker/checker.go @@ -260,5 +260,5 @@ func Equal(a, b *Checker) (err error) { AssertEqual(a.minValLen, b.minValLen, "different min value len") AssertEqual(a.maxValLen, b.maxValLen, "different max value len") - return + return nil } diff --git a/pipeline/do_if/README.idoc.md b/pipeline/do_if/README.idoc.md index aea515be0..0468988de 100644 --- a/pipeline/do_if/README.idoc.md +++ b/pipeline/do_if/README.idoc.md @@ -12,7 +12,7 @@ the chain of Match func calls are performed across the whole tree. @do-if-field-op-node ## Field operations -@do-if-field-op|description +@do-if-field-op ## Logical op node @do-if-logical-op-node diff --git a/pipeline/do_if/README.md b/pipeline/do_if/README.md index 759545365..5ac2dd96e 100755 --- a/pipeline/do_if/README.md +++ b/pipeline/do_if/README.md @@ -54,7 +54,7 @@ pipelines: ## Field operations -**`Equal`** checks whether the field value is equal to one of the elements in the values list. +`equal` checks whether the field value is equal to one of the elements in the values list. Example: ```yaml @@ -68,7 +68,7 @@ pipelines: values: [test-pod-1, test-pod-2] ``` -result: +Result: ``` {"pod":"test-pod-1","service":"test-service"} # discarded {"pod":"test-pod-2","service":"test-service-2"} # discarded @@ -76,9 +76,7 @@ result: {"pod":"test-pod","service":"test-service-1"} # not discarded ``` -
- -**`Contains`** checks whether the field value contains one of the elements the in values list. +`contains` checks whether the field value contains one of the elements the in values list. Example: ```yaml @@ -92,7 +90,7 @@ pipelines: values: [my-pod, my-test] ``` -result: +Result: ``` {"pod":"test-my-pod-1","service":"test-service"} # discarded {"pod":"test-not-my-pod","service":"test-service-2"} # discarded @@ -100,9 +98,7 @@ result: {"pod":"test-pod","service":"test-service-1"} # not discarded ``` -
- -**`Prefix`** checks whether the field value has prefix equal to one of the elements in the values list. +`prefix` checks whether the field value has prefix equal to one of the elements in the values list. Example: ```yaml @@ -116,7 +112,7 @@ pipelines: values: [test-1, test-2] ``` -result: +Result: ``` {"pod":"test-1-pod-1","service":"test-service"} # discarded {"pod":"test-2-pod-2","service":"test-service-2"} # discarded @@ -124,9 +120,7 @@ result: {"pod":"test-pod","service":"test-service-1"} # not discarded ``` -
- -**`Suffix`** checks whether the field value has suffix equal to one of the elements in the values list. +`suffix` checks whether the field value has suffix equal to one of the elements in the values list. Example: ```yaml @@ -140,7 +134,7 @@ pipelines: values: [pod-1, pod-2] ``` -result: +Result: ``` {"pod":"test-1-pod-1","service":"test-service"} # discarded {"pod":"test-2-pod-2","service":"test-service-2"} # discarded @@ -148,9 +142,7 @@ result: {"pod":"test-pod","service":"test-service-1"} # not discarded ``` -
- -**`Regex`** checks whether the field matches any regex from the values list. +`regex` checks whether the field matches any regex from the values list. Example: ```yaml @@ -164,7 +156,7 @@ pipelines: values: [pod-\d, my-test.*] ``` -result: +Result: ``` {"pod":"test-1-pod-1","service":"test-service"} # discarded {"pod":"test-2-pod-2","service":"test-service-2"} # discarded @@ -174,8 +166,6 @@ result: {"pod":"service123","service":"test-service-1"} # not discarded ``` -
- ## Logical op node DoIf logical op node is a node considered to be the root or an edge between nodes. diff --git a/pipeline/do_if/field_op.go b/pipeline/do_if/field_op.go index 74567750b..5a45c4c18 100644 --- a/pipeline/do_if/field_op.go +++ b/pipeline/do_if/field_op.go @@ -10,127 +10,120 @@ import ( insaneJSON "github.com/ozontech/insane-json" ) -// ! do-if-field-op -// ^ do-if-field-op - -const ( - // > checks whether the field value is equal to one of the elements in the values list. - // > - // > Example: - // > ```yaml - // > pipelines: - // > test: - // > actions: - // > - type: discard - // > do_if: - // > op: equal - // > field: pod - // > values: [test-pod-1, test-pod-2] - // > ``` - // > - // > result: - // > ``` - // > {"pod":"test-pod-1","service":"test-service"} # discarded - // > {"pod":"test-pod-2","service":"test-service-2"} # discarded - // > {"pod":"test-pod","service":"test-service"} # not discarded - // > {"pod":"test-pod","service":"test-service-1"} # not discarded - // > ``` - fieldEqualOpTag = checker.OpEqualTag // * - - // > checks whether the field value contains one of the elements the in values list. - // > - // > Example: - // > ```yaml - // > pipelines: - // > test: - // > actions: - // > - type: discard - // > do_if: - // > op: contains - // > field: pod - // > values: [my-pod, my-test] - // > ``` - // > - // > result: - // > ``` - // > {"pod":"test-my-pod-1","service":"test-service"} # discarded - // > {"pod":"test-not-my-pod","service":"test-service-2"} # discarded - // > {"pod":"my-test-pod","service":"test-service"} # discarded - // > {"pod":"test-pod","service":"test-service-1"} # not discarded - // > ``` - fieldContainsOpTag = checker.OpContainsTag // * - - // > checks whether the field value has prefix equal to one of the elements in the values list. - // > - // > Example: - // > ```yaml - // > pipelines: - // > test: - // > actions: - // > - type: discard - // > do_if: - // > op: prefix - // > field: pod - // > values: [test-1, test-2] - // > ``` - // > - // > result: - // > ``` - // > {"pod":"test-1-pod-1","service":"test-service"} # discarded - // > {"pod":"test-2-pod-2","service":"test-service-2"} # discarded - // > {"pod":"test-pod","service":"test-service"} # not discarded - // > {"pod":"test-pod","service":"test-service-1"} # not discarded - // > ``` - fieldPrefixOpTag = checker.OpPrefixTag // * - - // > checks whether the field value has suffix equal to one of the elements in the values list. - // > - // > Example: - // > ```yaml - // > pipelines: - // > test: - // > actions: - // > - type: discard - // > do_if: - // > op: suffix - // > field: pod - // > values: [pod-1, pod-2] - // > ``` - // > - // > result: - // > ``` - // > {"pod":"test-1-pod-1","service":"test-service"} # discarded - // > {"pod":"test-2-pod-2","service":"test-service-2"} # discarded - // > {"pod":"test-pod","service":"test-service"} # not discarded - // > {"pod":"test-pod","service":"test-service-1"} # not discarded - // > ``` - fieldSuffixOpTag = checker.OpSuffixTag // * - - // > checks whether the field matches any regex from the values list. - // > - // > Example: - // > ```yaml - // > pipelines: - // > test: - // > actions: - // > - type: discard - // > do_if: - // > op: regex - // > field: pod - // > values: [pod-\d, my-test.*] - // > ``` - // > - // > result: - // > ``` - // > {"pod":"test-1-pod-1","service":"test-service"} # discarded - // > {"pod":"test-2-pod-2","service":"test-service-2"} # discarded - // > {"pod":"test-pod","service":"test-service"} # not discarded - // > {"pod":"my-test-pod","service":"test-service-1"} # discarded - // > {"pod":"my-test-instance","service":"test-service-1"} # discarded - // > {"pod":"service123","service":"test-service-1"} # not discarded - // > ``` - fieldRegexOpTag = checker.OpRegexTag // * -) +/*{ do-if-field-op +`equal` checks whether the field value is equal to one of the elements in the values list. + +Example: +```yaml +pipelines: + test: + actions: + - type: discard + do_if: + op: equal + field: pod + values: [test-pod-1, test-pod-2] +``` + +Result: +``` +{"pod":"test-pod-1","service":"test-service"} # discarded +{"pod":"test-pod-2","service":"test-service-2"} # discarded +{"pod":"test-pod","service":"test-service"} # not discarded +{"pod":"test-pod","service":"test-service-1"} # not discarded +``` + +`contains` checks whether the field value contains one of the elements the in values list. + +Example: +```yaml +pipelines: + test: + actions: + - type: discard + do_if: + op: contains + field: pod + values: [my-pod, my-test] +``` + +Result: +``` +{"pod":"test-my-pod-1","service":"test-service"} # discarded +{"pod":"test-not-my-pod","service":"test-service-2"} # discarded +{"pod":"my-test-pod","service":"test-service"} # discarded +{"pod":"test-pod","service":"test-service-1"} # not discarded +``` + +`prefix` checks whether the field value has prefix equal to one of the elements in the values list. + +Example: +```yaml +pipelines: + test: + actions: + - type: discard + do_if: + op: prefix + field: pod + values: [test-1, test-2] +``` + +Result: +``` +{"pod":"test-1-pod-1","service":"test-service"} # discarded +{"pod":"test-2-pod-2","service":"test-service-2"} # discarded +{"pod":"test-pod","service":"test-service"} # not discarded +{"pod":"test-pod","service":"test-service-1"} # not discarded +``` + +`suffix` checks whether the field value has suffix equal to one of the elements in the values list. + +Example: +```yaml +pipelines: + test: + actions: + - type: discard + do_if: + op: suffix + field: pod + values: [pod-1, pod-2] +``` + +Result: +``` +{"pod":"test-1-pod-1","service":"test-service"} # discarded +{"pod":"test-2-pod-2","service":"test-service-2"} # discarded +{"pod":"test-pod","service":"test-service"} # not discarded +{"pod":"test-pod","service":"test-service-1"} # not discarded +``` + +`regex` checks whether the field matches any regex from the values list. + +Example: +```yaml +pipelines: + test: + actions: + - type: discard + do_if: + op: regex + field: pod + values: [pod-\d, my-test.*] +``` + +Result: +``` +{"pod":"test-1-pod-1","service":"test-service"} # discarded +{"pod":"test-2-pod-2","service":"test-service-2"} # discarded +{"pod":"test-pod","service":"test-service"} # not discarded +{"pod":"my-test-pod","service":"test-service-1"} # discarded +{"pod":"my-test-instance","service":"test-service-1"} # discarded +{"pod":"service123","service":"test-service-1"} # not discarded +``` + +}*/ /*{ do-if-field-op-node DoIf field op node is considered to always be a leaf in the DoIf tree. It checks byte representation of the value by the given field path. From e1a93a5eed6d59174380bd83f8500ef8e5e64fd9 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 7 Jul 2025 15:36:36 +0300 Subject: [PATCH 11/75] Add checker test --- pipeline/checker/checker_test.go | 71 ++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 pipeline/checker/checker_test.go diff --git a/pipeline/checker/checker_test.go b/pipeline/checker/checker_test.go new file mode 100644 index 000000000..7b671c8a6 --- /dev/null +++ b/pipeline/checker/checker_test.go @@ -0,0 +1,71 @@ +package checker + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestCheckerCtor(t *testing.T) { + type testCase struct { + opTag string + caseSensitive bool + values [][]byte + + expected *Checker + } + + for _, tt := range []testCase{ + { + opTag: OpEqualTag, + caseSensitive: true, + values: [][]byte{[]byte(`test-111`), []byte(`test-2`), []byte(`test-3`), []byte(`test-12345`)}, + + expected: &Checker{ + op: opEqual, + caseSensitive: true, + values: nil, + valuesBySize: map[int][][]byte{ + 6: { + []byte(`test-2`), + []byte(`test-3`), + }, + 8: { + []byte(`test-111`), + }, + 10: { + []byte(`test-12345`), + }, + }, + reValues: nil, + minValLen: 6, + maxValLen: 10, + }, + }, + { + opTag: OpContainsTag, + caseSensitive: false, + values: [][]byte{ + []byte(`test-0987`), + []byte(`test-11`), + }, + + expected: &Checker{ + op: opContains, + caseSensitive: false, + values: [][]byte{ + []byte(`test-0987`), + []byte(`test-11`), + }, + valuesBySize: nil, + reValues: nil, + minValLen: 7, + maxValLen: 9, + }, + }, + } { + got, err := New(tt.opTag, tt.caseSensitive, tt.values) + require.NoErrorf(t, err, "failed to init checker") + require.NoError(t, Equal(got, tt.expected), "checkers are not equal") + } +} From 75f834a39cda219fb880942d29517eff55ea0086 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 8 Jul 2025 01:52:20 +0300 Subject: [PATCH 12/75] Add new antispam ctor draft --- pipeline/antispam/ctor.go | 57 +++++++++++++++++ pipeline/antispam/logical_node.go | 91 ++++++++++++++++++++++++++ pipeline/antispam/node.go | 13 ++++ pipeline/antispam/value_node.go | 103 ++++++++++++++++++++++++++++++ 4 files changed, 264 insertions(+) create mode 100644 pipeline/antispam/ctor.go create mode 100644 pipeline/antispam/logical_node.go create mode 100644 pipeline/antispam/node.go create mode 100644 pipeline/antispam/value_node.go diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go new file mode 100644 index 000000000..b6790e570 --- /dev/null +++ b/pipeline/antispam/ctor.go @@ -0,0 +1,57 @@ +package antispam + +import ( + "fmt" + + "github.com/bitly/go-simplejson" +) + +func extractNode(jsonNode *simplejson.Json) (Node, error) { + switch op := jsonNode.Get("op").MustString(); op { + case "and", "or", "not": + return extractLogicalNode(op, jsonNode) + case + "equal", + "contains", + "prefix", + "suffix", + "regex": + return extractValueNode(op, jsonNode) + default: + return nil, fmt.Errorf("unknown op: %s", op) + } +} + +func extractLogicalNode(op string, jsonNode *simplejson.Json) (Node, error) { + rawOperands := jsonNode.Get("operands") + + var operands []Node + for i := range rawOperands.MustArray() { + opNode := rawOperands.GetIndex(i) + operand, err := extractNode(opNode) + if err != nil { + return nil, fmt.Errorf("extract operand for logical op %q: %w", op, err) + } + operands = append(operands, operand) + } + + result, err := newLogicalNode(op, operands) + if err != nil { + return nil, fmt.Errorf("init logical node: %w", err) + } + + return result, nil +} + +func extractValueNode(op string, jsonNode *simplejson.Json) (Node, error) { + caseSensitive := jsonNode.Get("case_sensitive").MustBool(true) + checkDataTag := jsonNode.Get("data").MustString() + metaKey := jsonNode.Get("meta_key").MustString() + + result, err := newValueNode(op, caseSensitive, nil, checkDataTag, metaKey) + if err != nil { + return nil, fmt.Errorf("init value node: %w", err) + } + + return result, nil +} diff --git a/pipeline/antispam/logical_node.go b/pipeline/antispam/logical_node.go new file mode 100644 index 000000000..0aacfe782 --- /dev/null +++ b/pipeline/antispam/logical_node.go @@ -0,0 +1,91 @@ +package antispam + +import ( + "errors" + "fmt" +) + +type logicalOpType int + +const ( + logicalAnd logicalOpType = iota + logicalOr + logicalNot +) + +func (t logicalOpType) String() string { + switch t { + case logicalAnd: + return "and" + case logicalOr: + return "or" + case logicalNot: + return "not" + default: + return "unknown" + } +} + +const ( + logicalAndTag = "and" + logicalOrTag = "or" + logicalNotTag = "not" +) + +type logicalNode struct { + op logicalOpType + operands []Node +} + +func newLogicalNode(op string, operands []Node) (Node, error) { + if len(operands) == 0 { + return nil, errors.New("logical op must have at least one operand") + } + + var lop logicalOpType + switch op { + case logicalOrTag: + lop = logicalOr + case logicalAndTag: + lop = logicalAnd + case logicalNotTag: + lop = logicalNot + if len(operands) > 1 { + return nil, fmt.Errorf("logical not must have exactly one operand, got %d", len(operands)) + } + default: + return nil, fmt.Errorf("unknown logical op %q", op) + } + return &logicalNode{ + op: lop, + operands: operands, + }, nil +} + +func (n *logicalNode) Type() nodeType { + return nodeTypeLogical +} + +func (n *logicalNode) check(event []byte, sourceName []byte, metadata map[string]string) bool { + switch n.op { + case logicalAnd: + for _, op := range n.operands { + if !op.check(event, sourceName, metadata) { + return false + } + } + return true + case logicalOr: + for _, op := range n.operands { + if op.check(event, sourceName, metadata) { + return true + } + } + return false + + case logicalNot: + return !n.operands[0].check(event, sourceName, metadata) + default: + panic(fmt.Sprintf("unknown logical op: %v", n.op)) + } +} diff --git a/pipeline/antispam/node.go b/pipeline/antispam/node.go new file mode 100644 index 000000000..1b3673aa4 --- /dev/null +++ b/pipeline/antispam/node.go @@ -0,0 +1,13 @@ +package antispam + +type nodeType int + +const ( + nodeTypeUsual nodeType = iota + nodeTypeLogical +) + +type Node interface { + Type() nodeType + check(event []byte, sourceName []byte, metadata map[string]string) bool +} diff --git a/pipeline/antispam/value_node.go b/pipeline/antispam/value_node.go new file mode 100644 index 000000000..645313a9b --- /dev/null +++ b/pipeline/antispam/value_node.go @@ -0,0 +1,103 @@ +package antispam + +import ( + "errors" + "fmt" + + "github.com/ozontech/file.d/pipeline/checker" +) + +type checkData int + +const ( + checkDataEvent checkData = iota + checkDataSourceName + checkDataMeta +) + +func (c checkData) String() string { + switch c { + case checkDataEvent: + return checkDataEventTag + case checkDataSourceName: + return checkDataSourceNameTag + case checkDataMeta: + return checkDataMetaTag + default: + panic(fmt.Sprintf("unknown checked data type: %d", c)) + } +} + +const ( + checkDataEventTag = "event" + checkDataSourceNameTag = "source_name" + checkDataMetaTag = "meta" +) + +func stringToCheckData(s string) (checkData, error) { + switch s { + case checkDataEventTag: + return checkDataEvent, nil + case checkDataSourceNameTag: + return checkDataSourceName, nil + case checkDataMetaTag: + return checkDataMeta, nil + default: + return -1, fmt.Errorf("unknown checked type data: %s", s) + } +} + +type valueNode struct { + checkData checkData + metaKey string + checker *checker.Checker +} + +func newValueNode( + opTag string, + caseSensitive bool, + values [][]byte, + checkDataTag string, + metaKey string, +) (*valueNode, error) { + c, err := checker.New(opTag, caseSensitive, values) + if err != nil { + return nil, fmt.Errorf("init checker: %w", err) + } + + var checkDataType checkData + checkDataType, err = stringToCheckData(checkDataTag) + if err != nil { + return nil, err + } + + if checkDataType == checkDataMeta { + if metaKey == "" { + return nil, errors.New("empty meta key") + } + } + + return &valueNode{ + checkData: checkDataType, + metaKey: metaKey, + checker: c, + }, nil +} + +func (n *valueNode) Type() nodeType { + return nodeTypeUsual +} + +func (n *valueNode) check(event []byte, sourceName []byte, metadata map[string]string) bool { + switch n.checkData { + case checkDataEvent: + return n.checker.Check(event) + case checkDataSourceName: + return n.checker.Check(sourceName) + case checkDataMeta: + data, ok := metadata[n.metaKey] + return ok && n.checker.Check([]byte(data)) + default: + panic(fmt.Sprintf("inknown type of checked data: %d", n.checkData)) + } +} From f2081ad16645cfd63d6254799256e10d3b41adbc Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 8 Jul 2025 02:09:13 +0300 Subject: [PATCH 13/75] Extract values --- pipeline/antispam/ctor.go | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index b6790e570..f4fd98603 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -1,6 +1,7 @@ package antispam import ( + "errors" "fmt" "github.com/bitly/go-simplejson" @@ -48,10 +49,34 @@ func extractValueNode(op string, jsonNode *simplejson.Json) (Node, error) { checkDataTag := jsonNode.Get("data").MustString() metaKey := jsonNode.Get("meta_key").MustString() - result, err := newValueNode(op, caseSensitive, nil, checkDataTag, metaKey) + values, err := extractFieldOpVals(jsonNode) + if err != nil { + return nil, fmt.Errorf("extract values: %w", err) + } + + result, err := newValueNode(op, caseSensitive, values, checkDataTag, metaKey) if err != nil { return nil, fmt.Errorf("init value node: %w", err) } return result, nil } + +func extractFieldOpVals(jsonNode *simplejson.Json) ([][]byte, error) { + values, has := jsonNode.CheckGet("values") + if !has { + return nil, errors.New(`field "values" not found'`) + } + + var result [][]byte + for i := range values.MustArray() { + curValue, err := values.GetIndex(i).String() + if err != nil { + return nil, err + } + + result = append(result, []byte(curValue)) + } + + return result, nil +} From 1e09f0edbfbe5089640094b56e86fd67ca5766b7 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 8 Jul 2025 12:41:58 +0300 Subject: [PATCH 14/75] Rename --- pipeline/antispam/node.go | 2 +- pipeline/antispam/value_node.go | 72 ++++++++++++++++----------------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/pipeline/antispam/node.go b/pipeline/antispam/node.go index 1b3673aa4..ef9c95c15 100644 --- a/pipeline/antispam/node.go +++ b/pipeline/antispam/node.go @@ -3,7 +3,7 @@ package antispam type nodeType int const ( - nodeTypeUsual nodeType = iota + nodeTypeValue nodeType = iota nodeTypeLogical ) diff --git a/pipeline/antispam/value_node.go b/pipeline/antispam/value_node.go index 645313a9b..a12ece761 100644 --- a/pipeline/antispam/value_node.go +++ b/pipeline/antispam/value_node.go @@ -7,50 +7,50 @@ import ( "github.com/ozontech/file.d/pipeline/checker" ) -type checkData int +type dataType int const ( - checkDataEvent checkData = iota - checkDataSourceName - checkDataMeta + dataTypeEvent dataType = iota + dataTypeSourceName + dataTypeMeta ) -func (c checkData) String() string { +func (c dataType) String() string { switch c { - case checkDataEvent: - return checkDataEventTag - case checkDataSourceName: - return checkDataSourceNameTag - case checkDataMeta: - return checkDataMetaTag + case dataTypeEvent: + return dataTypeEventTag + case dataTypeSourceName: + return dataTypeSourceNameTag + case dataTypeMeta: + return dataTypeMetaTag default: panic(fmt.Sprintf("unknown checked data type: %d", c)) } } const ( - checkDataEventTag = "event" - checkDataSourceNameTag = "source_name" - checkDataMetaTag = "meta" + dataTypeEventTag = "event" + dataTypeSourceNameTag = "source_name" + dataTypeMetaTag = "meta" ) -func stringToCheckData(s string) (checkData, error) { +func stringToDataType(s string) (dataType, error) { switch s { - case checkDataEventTag: - return checkDataEvent, nil - case checkDataSourceNameTag: - return checkDataSourceName, nil - case checkDataMetaTag: - return checkDataMeta, nil + case dataTypeEventTag: + return dataTypeEvent, nil + case dataTypeSourceNameTag: + return dataTypeSourceName, nil + case dataTypeMetaTag: + return dataTypeMeta, nil default: return -1, fmt.Errorf("unknown checked type data: %s", s) } } type valueNode struct { - checkData checkData - metaKey string - checker *checker.Checker + dataType dataType + metaKey string + checker *checker.Checker } func newValueNode( @@ -65,39 +65,39 @@ func newValueNode( return nil, fmt.Errorf("init checker: %w", err) } - var checkDataType checkData - checkDataType, err = stringToCheckData(checkDataTag) + var dType dataType + dType, err = stringToDataType(checkDataTag) if err != nil { return nil, err } - if checkDataType == checkDataMeta { + if dType == dataTypeMeta { if metaKey == "" { return nil, errors.New("empty meta key") } } return &valueNode{ - checkData: checkDataType, - metaKey: metaKey, - checker: c, + dataType: dType, + metaKey: metaKey, + checker: c, }, nil } func (n *valueNode) Type() nodeType { - return nodeTypeUsual + return nodeTypeValue } func (n *valueNode) check(event []byte, sourceName []byte, metadata map[string]string) bool { - switch n.checkData { - case checkDataEvent: + switch n.dataType { + case dataTypeEvent: return n.checker.Check(event) - case checkDataSourceName: + case dataTypeSourceName: return n.checker.Check(sourceName) - case checkDataMeta: + case dataTypeMeta: data, ok := metadata[n.metaKey] return ok && n.checker.Check([]byte(data)) default: - panic(fmt.Sprintf("inknown type of checked data: %d", n.checkData)) + panic(fmt.Sprintf("inknown type of checked data: %d", n.dataType)) } } From 89811b47589d3dd8061bcd931cd1127909f62515 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 8 Jul 2025 14:47:54 +0300 Subject: [PATCH 15/75] Change method signature --- pipeline/antispam/antispammer.go | 9 ++++++++- pipeline/antispam/antispammer_test.go | 6 +++--- pipeline/pipeline.go | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index 4a5cbff16..639294d36 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -83,7 +83,14 @@ func NewAntispammer(o *Options) *Antispammer { return a } -func (a *Antispammer) IsSpam(id string, name string, isNewSource bool, event []byte, timeEvent time.Time) bool { +func (a *Antispammer) IsSpam( + id string, + name string, + isNewSource bool, + event []byte, + timeEvent time.Time, + _ map[string]string, +) bool { if a.threshold <= 0 { return false } diff --git a/pipeline/antispam/antispammer_test.go b/pipeline/antispam/antispammer_test.go index eb1a1a81e..7ba6cddf4 100644 --- a/pipeline/antispam/antispammer_test.go +++ b/pipeline/antispam/antispammer_test.go @@ -36,7 +36,7 @@ func TestAntispam(t *testing.T) { startTime := time.Now() checkSpam := func(i int) bool { eventTime := startTime.Add(time.Duration(i) * maintenanceInterval / 2) - return antispamer.IsSpam("1", "test", false, []byte(`{}`), eventTime) + return antispamer.IsSpam("1", "test", false, []byte(`{}`), eventTime, nil) } for i := 1; i < threshold; i++ { @@ -66,7 +66,7 @@ func TestAntispamAfterRestart(t *testing.T) { startTime := time.Now() checkSpam := func(i int) bool { eventTime := startTime.Add(time.Duration(i) * maintenanceInterval) - return antispamer.IsSpam("1", "test", false, []byte(`{}`), eventTime) + return antispamer.IsSpam("1", "test", false, []byte(`{}`), eventTime, nil) } for i := 1; i < threshold; i++ { @@ -128,7 +128,7 @@ func TestAntispamExceptions(t *testing.T) { antispamer.exceptions.Prepare() checkSpam := func(source, event string, wantMetric map[string]float64) { - antispamer.IsSpam("1", source, true, []byte(event), now) + antispamer.IsSpam("1", source, true, []byte(event), now, nil) for k, v := range wantMetric { r.Equal(v, testutil.ToFloat64(antispamer.exceptionMetric.WithLabelValues(k))) } diff --git a/pipeline/pipeline.go b/pipeline/pipeline.go index f1bb39104..9780ca200 100644 --- a/pipeline/pipeline.go +++ b/pipeline/pipeline.go @@ -491,7 +491,7 @@ func (p *Pipeline) In(sourceID SourceID, sourceName string, offsets Offsets, byt p.Error(fmt.Sprintf("cannot parse raw time %s: %v", row.Time, err)) } } - isSpam := p.antispamer.IsSpam(checkSourceID, checkSourceName, isNewSource, bytes, eventTime) + isSpam := p.antispamer.IsSpam(checkSourceID, checkSourceName, isNewSource, bytes, eventTime, meta) if isSpam { return EventSeqIDError } From dd2eaeb00e068802ca4df0718f3ac09f09da0781 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 8 Jul 2025 15:12:58 +0300 Subject: [PATCH 16/75] Add ctors --- pipeline/antispam/antispammer.go | 34 ++++++++++++++++++++++++++++++++ pipeline/antispam/ctor.go | 31 +++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index 639294d36..375a7266e 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -229,3 +229,37 @@ func (e Exceptions) Prepare() { e[i].Prepare() } } + +type Antispam struct { + rules []Rule + defaultLimit int + enabled bool +} + +func NewAntispam(defaultLimit int, rules []Rule) *Antispam { + if defaultLimit == -1 && len(rules) == 0 { + return &Antispam{enabled: false} + } + + return &Antispam{ + rules: rules, + defaultLimit: defaultLimit, + enabled: true, + } +} + +type Rule struct { + Condition Node + Limit int +} + +func newRule(condition Node, limit int) (Rule, error) { + if limit < -1 { + return Rule{}, fmt.Errorf("invalid limit: %d", limit) + } + + return Rule{ + Condition: condition, + Limit: limit, + }, nil +} diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index f4fd98603..63c6a50c2 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -7,6 +7,37 @@ import ( "github.com/bitly/go-simplejson" ) +func extractRules(jsonNode *simplejson.Json) ([]Rule, error) { + rules := jsonNode.Get("rules") + + var result []Rule + for i := range rules.MustArray() { + ruleRaw := rules.GetIndex(i) + rule, err := extractRule(ruleRaw) + if err != nil { + return nil, fmt.Errorf("extract rule: %w", err) + } + + result = append(result, rule) + } + + return result, nil +} + +func extractRule(jsonNode *simplejson.Json) (Rule, error) { + condition, err := extractNode(jsonNode.Get("cond")) + if err != nil { + return Rule{}, fmt.Errorf("extract cond: %w", err) + } + + limit, err := jsonNode.Get("limit").Int() + if err != nil { + return Rule{}, fmt.Errorf("limit is not int: %w", err) + } + + return newRule(condition, limit) +} + func extractNode(jsonNode *simplejson.Json) (Node, error) { switch op := jsonNode.Get("op").MustString(); op { case "and", "or", "not": From f3987a992ad42c364769665d0fd75584ac8f8dc4 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 8 Jul 2025 16:25:20 +0300 Subject: [PATCH 17/75] Refactor --- fd/util.go | 31 +++++++++++++++++++++---------- pipeline/antispam/antispammer.go | 28 +++++++++++++++++++++++----- pipeline/antispam/ctor.go | 24 ++++++++++++++++++++++++ pipeline/pipeline.go | 2 ++ 4 files changed, 70 insertions(+), 15 deletions(-) diff --git a/fd/util.go b/fd/util.go index d138a98ea..5b9346879 100644 --- a/fd/util.go +++ b/fd/util.go @@ -18,6 +18,7 @@ func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings { capacity := pipeline.DefaultCapacity antispamThreshold := pipeline.DefaultAntispamThreshold var antispamExceptions antispam.Exceptions + var antispamV2 *antispam.Antispam sourceNameMetaField := pipeline.DefaultSourceNameMetaField avgInputEventSize := pipeline.DefaultAvgInputEventSize maxInputEventSize := pipeline.DefaultMaxInputEventSize @@ -87,19 +88,28 @@ func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings { eventTimeout = i } - antispamThreshold = settings.Get("antispam_threshold").MustInt() - antispamThreshold *= int(maintenanceInterval / time.Second) - if antispamThreshold < 0 { - logger.Warn("negative antispam_threshold value, antispam disabled") - antispamThreshold = 0 - } - var err error - antispamExceptions, err = extractAntispamExceptions(settings) + antispamV2, err = antispam.ExtractV2(settings) if err != nil { - logger.Fatalf("extract exceptions: %s", err) + logger.Warnf( + "try to fallback to legacy antispam; can't get new antispam config: %s", + err.Error(), + ) + + antispamThreshold = settings.Get("antispam_threshold").MustInt() + antispamThreshold *= int(maintenanceInterval / time.Second) + if antispamThreshold < 0 { + logger.Warn("negative antispam_threshold value, antispam disabled") + antispamThreshold = 0 + } + + var err error + antispamExceptions, err = extractAntispamExceptions(settings) + if err != nil { + logger.Fatalf("extract exceptions: %s", err) + } + antispamExceptions.Prepare() } - antispamExceptions.Prepare() sourceNameMetaField = settings.Get("source_name_meta_field").MustString() isStrict = settings.Get("is_strict").MustBool() @@ -129,6 +139,7 @@ func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings { CutOffEventByLimitField: cutOffEventByLimitField, AntispamThreshold: antispamThreshold, AntispamExceptions: antispamExceptions, + Antispam: antispamV2, SourceNameMetaField: sourceNameMetaField, MaintenanceInterval: maintenanceInterval, EventTimeout: eventTimeout, diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index 375a7266e..b90ced6fe 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -25,6 +25,8 @@ type Antispammer struct { sources map[string]source exceptions Exceptions + antispam *Antispam + logger *zap.Logger // antispammer metrics @@ -44,6 +46,7 @@ type Options struct { Threshold int UnbanIterations int Exceptions Exceptions + Antispam *Antispam Logger *zap.Logger MetricsController *metric.Ctl @@ -91,6 +94,10 @@ func (a *Antispammer) IsSpam( timeEvent time.Time, _ map[string]string, ) bool { + if a.antispam != nil { + panic("proc new antispam") + } + if a.threshold <= 0 { return false } @@ -236,16 +243,20 @@ type Antispam struct { enabled bool } -func NewAntispam(defaultLimit int, rules []Rule) *Antispam { +func NewAntispam(defaultLimit int, rules []Rule) (*Antispam, error) { + if err := checkLimit(defaultLimit); err != nil { + return nil, err + } + if defaultLimit == -1 && len(rules) == 0 { - return &Antispam{enabled: false} + return &Antispam{enabled: false}, nil } return &Antispam{ rules: rules, defaultLimit: defaultLimit, enabled: true, - } + }, nil } type Rule struct { @@ -253,9 +264,16 @@ type Rule struct { Limit int } -func newRule(condition Node, limit int) (Rule, error) { +func checkLimit(limit int) error { if limit < -1 { - return Rule{}, fmt.Errorf("invalid limit: %d", limit) + return fmt.Errorf("invalid limit: %d", limit) + } + return nil +} + +func newRule(condition Node, limit int) (Rule, error) { + if err := checkLimit(limit); err != nil { + return Rule{}, err } return Rule{ diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index 63c6a50c2..049805408 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -7,6 +7,30 @@ import ( "github.com/bitly/go-simplejson" ) +func ExtractV2(jsonNode *simplejson.Json) (*Antispam, error) { + rules, err := extractRules(jsonNode) + if err != nil { + return nil, err + } + + defLimiter, ok := jsonNode.CheckGet("default") + if !ok { + return nil, fmt.Errorf(`field "default" not found`) + } + + defLimit, ok := defLimiter.CheckGet("limit") + if !ok { + return nil, fmt.Errorf(`field "limit" not found`) + } + + limit, err := defLimit.Int() + if err != nil { + return nil, err + } + + return NewAntispam(limit, rules) +} + func extractRules(jsonNode *simplejson.Json) ([]Rule, error) { rules := jsonNode.Get("rules") diff --git a/pipeline/pipeline.go b/pipeline/pipeline.go index 9780ca200..a2cb56f45 100644 --- a/pipeline/pipeline.go +++ b/pipeline/pipeline.go @@ -154,6 +154,7 @@ type Settings struct { EventTimeout time.Duration AntispamThreshold int AntispamExceptions antispam.Exceptions + Antispam *antispam.Antispam SourceNameMetaField string AvgEventSize int MaxEventSize int @@ -209,6 +210,7 @@ func New(name string, settings *Settings, registry *prometheus.Registry, lg *zap antispamer: antispam.NewAntispammer(&antispam.Options{ MaintenanceInterval: settings.MaintenanceInterval, Threshold: settings.AntispamThreshold, + Antispam: settings.Antispam, UnbanIterations: antispamUnbanIterations, Logger: lg.Named("antispam"), MetricsController: metricCtl, From c7b8d32c76812bda3343b86932cbe31b3d059a73 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 8 Jul 2025 18:27:25 +0300 Subject: [PATCH 18/75] Use new antispam rules --- pipeline/antispam/antispammer.go | 92 ++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index b90ced6fe..07b0ec8cf 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -163,6 +163,89 @@ func (a *Antispammer) IsSpam( return x >= int32(a.threshold) } +func (a *Antispammer) isSpamNew( + id string, + name string, + isNewSource bool, + event []byte, + timeEvent time.Time, + meta map[string]string, +) bool { + if !a.antispam.enabled { + return false + } + + key := id + ruleIndex := -1 + + for i := range a.antispam.rules { + rule := a.antispam.rules[i] + if rule.Condition.check(event, []byte(name), meta) { + if a.antispam.defaultLimit == -1 { + return false + } else { + key = rule.MapKey + ruleIndex = i + break + } + } + } + + if a.antispam.defaultLimit == -1 { + return false + } + + a.mu.RLock() + src, has := a.sources[key] + a.mu.RUnlock() + + timeEventSeconds := timeEvent.UnixNano() + + if !has { + a.mu.Lock() + if newSrc, has := a.sources[key]; has { + src = newSrc + } else { + src = source{ + counter: &atomic.Int32{}, + name: name, + timestamp: &atomic.Int64{}, + } + src.timestamp.Add(timeEventSeconds) + a.sources[key] = src + } + a.mu.Unlock() + } + + if isNewSource { + src.counter.Swap(0) + return false + } + + x := src.counter.Load() + diff := timeEventSeconds - src.timestamp.Swap(timeEventSeconds) + if diff < a.maintenanceInterval.Nanoseconds() { + x = src.counter.Inc() + } + if x == int32(a.threshold) { + src.counter.Swap(int32(a.unbanIterations * a.threshold)) + a.activeMetric.Set(1) + a.banMetric.WithLabelValues(name).Inc() + a.logger.Warn("source has been banned", + zap.Any("id", id), zap.String("name", name), + zap.Time("time_event", timeEvent), zap.Int64("diff_nsec", diff), + zap.Int64("maintenance_nsec", a.maintenanceInterval.Nanoseconds()), + zap.Int32("counter", src.counter.Load()), + ) + } + + if ruleIndex == -1 { + return x >= int32(a.antispam.defaultLimit) + } + + return x >= int32(a.antispam.rules[ruleIndex].Limit) +} + func (a *Antispammer) Maintenance() { a.mu.Lock() @@ -252,6 +335,10 @@ func NewAntispam(defaultLimit int, rules []Rule) (*Antispam, error) { return &Antispam{enabled: false}, nil } + for i := range rules { + rules[i].Prepare(i) + } + return &Antispam{ rules: rules, defaultLimit: defaultLimit, @@ -262,6 +349,11 @@ func NewAntispam(defaultLimit int, rules []Rule) (*Antispam, error) { type Rule struct { Condition Node Limit int + MapKey string +} + +func (r *Rule) Prepare(id int) { + r.MapKey = fmt.Sprintf("#=%d=#", id) } func checkLimit(limit int) error { From be731a2e978136d83a64b7cf357bd1574e95276e Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 8 Jul 2025 18:39:02 +0300 Subject: [PATCH 19/75] Fix linter errors --- pipeline/antispam/antispammer.go | 4 ++-- pipeline/antispam/ctor.go | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index 07b0ec8cf..d54faf475 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -92,10 +92,10 @@ func (a *Antispammer) IsSpam( isNewSource bool, event []byte, timeEvent time.Time, - _ map[string]string, + meta map[string]string, ) bool { if a.antispam != nil { - panic("proc new antispam") + return a.isSpamNew(id, name, isNewSource, event, timeEvent, meta) } if a.threshold <= 0 { diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index 049805408..02c520313 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -34,7 +34,7 @@ func ExtractV2(jsonNode *simplejson.Json) (*Antispam, error) { func extractRules(jsonNode *simplejson.Json) ([]Rule, error) { rules := jsonNode.Get("rules") - var result []Rule + result := make([]Rule, 0) for i := range rules.MustArray() { ruleRaw := rules.GetIndex(i) rule, err := extractRule(ruleRaw) @@ -81,7 +81,7 @@ func extractNode(jsonNode *simplejson.Json) (Node, error) { func extractLogicalNode(op string, jsonNode *simplejson.Json) (Node, error) { rawOperands := jsonNode.Get("operands") - var operands []Node + operands := make([]Node, 0) for i := range rawOperands.MustArray() { opNode := rawOperands.GetIndex(i) operand, err := extractNode(opNode) @@ -123,7 +123,7 @@ func extractFieldOpVals(jsonNode *simplejson.Json) ([][]byte, error) { return nil, errors.New(`field "values" not found'`) } - var result [][]byte + result := make([][]byte, 0) for i := range values.MustArray() { curValue, err := values.GetIndex(i).String() if err != nil { From e586edf3040a93518e78ff19b23f999dfd1bfce0 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 8 Jul 2025 21:14:11 +0300 Subject: [PATCH 20/75] Edit doc --- pipeline/do_if/README.md | 18 +++++++--- pipeline/do_if/field_op.go | 72 +++++++++++++++++++++----------------- 2 files changed, 53 insertions(+), 37 deletions(-) diff --git a/pipeline/do_if/README.md b/pipeline/do_if/README.md index 5ac2dd96e..c6f9cdf92 100755 --- a/pipeline/do_if/README.md +++ b/pipeline/do_if/README.md @@ -54,7 +54,7 @@ pipelines: ## Field operations -`equal` checks whether the field value is equal to one of the elements in the values list. +Operation `equal` checks whether the field value is equal to one of the elements in the values list. Example: ```yaml @@ -76,7 +76,9 @@ Result: {"pod":"test-pod","service":"test-service-1"} # not discarded ``` -`contains` checks whether the field value contains one of the elements the in values list. +
+ +Operation `contains` checks whether the field value contains one of the elements the in values list. Example: ```yaml @@ -98,7 +100,9 @@ Result: {"pod":"test-pod","service":"test-service-1"} # not discarded ``` -`prefix` checks whether the field value has prefix equal to one of the elements in the values list. +
+ +Operation `prefix` checks whether the field value has prefix equal to one of the elements in the values list. Example: ```yaml @@ -120,7 +124,9 @@ Result: {"pod":"test-pod","service":"test-service-1"} # not discarded ``` -`suffix` checks whether the field value has suffix equal to one of the elements in the values list. +
+ +Operation `suffix` checks whether the field value has suffix equal to one of the elements in the values list. Example: ```yaml @@ -142,7 +148,9 @@ Result: {"pod":"test-pod","service":"test-service-1"} # not discarded ``` -`regex` checks whether the field matches any regex from the values list. +
+ +Operation `regex` checks whether the field matches any regex from the values list. Example: ```yaml diff --git a/pipeline/do_if/field_op.go b/pipeline/do_if/field_op.go index 5a45c4c18..ae67b4d0a 100644 --- a/pipeline/do_if/field_op.go +++ b/pipeline/do_if/field_op.go @@ -10,8 +10,35 @@ import ( insaneJSON "github.com/ozontech/insane-json" ) +/*{ do-if-field-op-node +DoIf field op node is considered to always be a leaf in the DoIf tree. It checks byte representation of the value by the given field path. +Array and object values are considered as not matched since encoding them to bytes leads towards large CPU and memory consumption. + +Params: + - `op` - value from field operations list. Required. + - `field` - path to field in JSON tree. If empty, root value is checked. Path to nested fields is delimited by dots `"."`, e.g. `"field.subfield"` for `{"field": {"subfield": "val"}}`. + If the field name contains dots in it they should be shielded with `"\"`, e.g. `"exception\.type"` for `{"exception.type": "example"}`. Default empty. + - `values` - list of values to check field. Required non-empty. + - `case_sensitive` - flag indicating whether checks are performed in case sensitive way. Default `true`. + Note: case insensitive checks can cause CPU and memory overhead since every field value will be converted to lower letters. + +Example: +```yaml +pipelines: + tests: + actions: + - type: discard + do_if: + op: suffix + field: pod + values: [pod-1, pod-2] + case_sensitive: true +``` + +}*/ + /*{ do-if-field-op -`equal` checks whether the field value is equal to one of the elements in the values list. +Operation `equal` checks whether the field value is equal to one of the elements in the values list. Example: ```yaml @@ -33,7 +60,9 @@ Result: {"pod":"test-pod","service":"test-service-1"} # not discarded ``` -`contains` checks whether the field value contains one of the elements the in values list. +
+ +Operation `contains` checks whether the field value contains one of the elements the in values list. Example: ```yaml @@ -55,7 +84,9 @@ Result: {"pod":"test-pod","service":"test-service-1"} # not discarded ``` -`prefix` checks whether the field value has prefix equal to one of the elements in the values list. +
+ +Operation `prefix` checks whether the field value has prefix equal to one of the elements in the values list. Example: ```yaml @@ -77,7 +108,9 @@ Result: {"pod":"test-pod","service":"test-service-1"} # not discarded ``` -`suffix` checks whether the field value has suffix equal to one of the elements in the values list. +
+ +Operation `suffix` checks whether the field value has suffix equal to one of the elements in the values list. Example: ```yaml @@ -99,7 +132,9 @@ Result: {"pod":"test-pod","service":"test-service-1"} # not discarded ``` -`regex` checks whether the field matches any regex from the values list. +
+ +Operation `regex` checks whether the field matches any regex from the values list. Example: ```yaml @@ -125,33 +160,6 @@ Result: }*/ -/*{ do-if-field-op-node -DoIf field op node is considered to always be a leaf in the DoIf tree. It checks byte representation of the value by the given field path. -Array and object values are considered as not matched since encoding them to bytes leads towards large CPU and memory consumption. - -Params: - - `op` - value from field operations list. Required. - - `field` - path to field in JSON tree. If empty, root value is checked. Path to nested fields is delimited by dots `"."`, e.g. `"field.subfield"` for `{"field": {"subfield": "val"}}`. - If the field name contains dots in it they should be shielded with `"\"`, e.g. `"exception\.type"` for `{"exception.type": "example"}`. Default empty. - - `values` - list of values to check field. Required non-empty. - - `case_sensitive` - flag indicating whether checks are performed in case sensitive way. Default `true`. - Note: case insensitive checks can cause CPU and memory overhead since every field value will be converted to lower letters. - -Example: -```yaml -pipelines: - tests: - actions: - - type: discard - do_if: - op: suffix - field: pod - values: [pod-1, pod-2] - case_sensitive: true -``` - -}*/ - type fieldOpNode struct { fieldPath []string fieldPathStr string From 979a92f43323b23c92b9faffc9e626d8c64ba009 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 8 Jul 2025 21:18:15 +0300 Subject: [PATCH 21/75] Fix doc --- pipeline/do_if/README.md | 2 ++ pipeline/do_if/field_op.go | 2 ++ 2 files changed, 4 insertions(+) diff --git a/pipeline/do_if/README.md b/pipeline/do_if/README.md index c6f9cdf92..4d06da72c 100755 --- a/pipeline/do_if/README.md +++ b/pipeline/do_if/README.md @@ -174,6 +174,8 @@ Result: {"pod":"service123","service":"test-service-1"} # not discarded ``` +
+ ## Logical op node DoIf logical op node is a node considered to be the root or an edge between nodes. diff --git a/pipeline/do_if/field_op.go b/pipeline/do_if/field_op.go index ae67b4d0a..f965cb3d3 100644 --- a/pipeline/do_if/field_op.go +++ b/pipeline/do_if/field_op.go @@ -158,6 +158,8 @@ Result: {"pod":"service123","service":"test-service-1"} # not discarded ``` +
+ }*/ type fieldOpNode struct { From 94479e0b7caea4eb6a33643d5a1b03154efd42b8 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Fri, 11 Jul 2025 17:46:37 +0300 Subject: [PATCH 22/75] Make funcs private --- pipeline/checker/checker.go | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/pipeline/checker/checker.go b/pipeline/checker/checker.go index 124b4208f..de163ab56 100644 --- a/pipeline/checker/checker.go +++ b/pipeline/checker/checker.go @@ -206,20 +206,20 @@ func (n *Checker) Check(data []byte) bool { return false } -func Assert(b bool, msg string) { +func assert(b bool, msg string) { if !b { panic(msg) } } -func AssertEqual[T comparable](a, b T, msg string) { - Assert(a == b, fmt.Sprintf("%s: %v != %v", msg, a, b)) +func assertEqual[T comparable](a, b T, msg string) { + assert(a == b, fmt.Sprintf("%s: %v != %v", msg, a, b)) } -func AssertEqualValues(a, b [][]byte, msg string) { - AssertEqual(len(a), len(b), fmt.Sprintf("%s: different values count", msg)) +func assertEqualValues(a, b [][]byte, msg string) { + assertEqual(len(a), len(b), fmt.Sprintf("%s: different values count", msg)) for i := range a { - Assert( + assert( bytes.Equal(a[i], b[i]), fmt.Sprintf("%s: different values at pos %d: %s != %s", msg, i, a[i], b[i], @@ -235,30 +235,30 @@ func Equal(a, b *Checker) (err error) { } }() - AssertEqual(a.op, b.op, "different op") - AssertEqual(a.caseSensitive, b.caseSensitive, "different case_sensitive") - AssertEqualValues(a.values, b.values, "different values") + assertEqual(a.op, b.op, "different op") + assertEqual(a.caseSensitive, b.caseSensitive, "different case_sensitive") + assertEqualValues(a.values, b.values, "different values") - AssertEqual(len(a.valuesBySize), len(b.valuesBySize), "different valuesBySize len") + assertEqual(len(a.valuesBySize), len(b.valuesBySize), "different valuesBySize len") for size := range a.valuesBySize { _, found := b.valuesBySize[size] - Assert(found, fmt.Sprintf("not found values by size %d", size)) - AssertEqualValues( + assert(found, fmt.Sprintf("not found values by size %d", size)) + assertEqualValues( a.valuesBySize[size], b.valuesBySize[size], fmt.Sprintf("different values by size %d", size), ) } - AssertEqual(len(a.reValues), len(b.reValues), "different regex values count") + assertEqual(len(a.reValues), len(b.reValues), "different regex values count") for i := range a.reValues { - AssertEqual( + assertEqual( a.reValues[i].String(), b.reValues[i].String(), fmt.Sprintf("different regex values at pos %d", i), ) } - AssertEqual(a.minValLen, b.minValLen, "different min value len") - AssertEqual(a.maxValLen, b.maxValLen, "different max value len") + assertEqual(a.minValLen, b.minValLen, "different min value len") + assertEqual(a.maxValLen, b.maxValLen, "different max value len") return nil } From af8ce3388ef82d4aa7bc17bb7028f386eacb9385 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Fri, 11 Jul 2025 18:49:40 +0300 Subject: [PATCH 23/75] Return result instantly if blocked --- pipeline/antispam/antispammer.go | 45 ++++++++++---------------------- pipeline/antispam/rule.go | 31 ++++++++++++++++++++++ 2 files changed, 45 insertions(+), 31 deletions(-) create mode 100644 pipeline/antispam/rule.go diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index d54faf475..b41cbfddb 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -13,6 +13,11 @@ import ( "go.uber.org/zap" ) +const ( + limitValueUnlimited = -1 + limitValueBlocked = 0 +) + // Antispammer makes a decision on the need to parse the input log. // It can be useful when any application writes logs at speed faster than File.d can read it. // @@ -181,9 +186,12 @@ func (a *Antispammer) isSpamNew( for i := range a.antispam.rules { rule := a.antispam.rules[i] if rule.Condition.check(event, []byte(name), meta) { - if a.antispam.defaultLimit == -1 { + switch rule.Limit { + case limitValueUnlimited: return false - } else { + case limitValueBlocked: + return true + default: key = rule.MapKey ruleIndex = i break @@ -191,8 +199,11 @@ func (a *Antispammer) isSpamNew( } } - if a.antispam.defaultLimit == -1 { + switch a.antispam.defaultLimit { + case limitValueUnlimited: return false + case limitValueBlocked: + return true } a.mu.RLock() @@ -345,31 +356,3 @@ func NewAntispam(defaultLimit int, rules []Rule) (*Antispam, error) { enabled: true, }, nil } - -type Rule struct { - Condition Node - Limit int - MapKey string -} - -func (r *Rule) Prepare(id int) { - r.MapKey = fmt.Sprintf("#=%d=#", id) -} - -func checkLimit(limit int) error { - if limit < -1 { - return fmt.Errorf("invalid limit: %d", limit) - } - return nil -} - -func newRule(condition Node, limit int) (Rule, error) { - if err := checkLimit(limit); err != nil { - return Rule{}, err - } - - return Rule{ - Condition: condition, - Limit: limit, - }, nil -} diff --git a/pipeline/antispam/rule.go b/pipeline/antispam/rule.go new file mode 100644 index 000000000..61bc50832 --- /dev/null +++ b/pipeline/antispam/rule.go @@ -0,0 +1,31 @@ +package antispam + +import "fmt" + +type Rule struct { + Condition Node + Limit int + MapKey string +} + +func (r *Rule) Prepare(id int) { + r.MapKey = fmt.Sprintf("#=%d=#", id) +} + +func checkLimit(limit int) error { + if limit < -1 { + return fmt.Errorf("invalid limit: %d", limit) + } + return nil +} + +func newRule(condition Node, limit int) (Rule, error) { + if err := checkLimit(limit); err != nil { + return Rule{}, err + } + + return Rule{ + Condition: condition, + Limit: limit, + }, nil +} From 24865b70f3d73c418bc5be63252eda09475de92d Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 14 Jul 2025 13:47:41 +0300 Subject: [PATCH 24/75] Rename 'limit' => 'threshold' --- fd/util.go | 33 +++++++++++-------------------- pipeline/antispam/antispammer.go | 32 +++++++++++++++--------------- pipeline/antispam/ctor.go | 22 +-------------------- pipeline/antispam/logical_node.go | 2 +- pipeline/antispam/node.go | 2 +- pipeline/antispam/rule.go | 19 +++++++++--------- pipeline/antispam/value_node.go | 2 +- 7 files changed, 42 insertions(+), 70 deletions(-) diff --git a/fd/util.go b/fd/util.go index 5b9346879..c988bce1c 100644 --- a/fd/util.go +++ b/fd/util.go @@ -18,7 +18,7 @@ func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings { capacity := pipeline.DefaultCapacity antispamThreshold := pipeline.DefaultAntispamThreshold var antispamExceptions antispam.Exceptions - var antispamV2 *antispam.Antispam + var antispammer *antispam.Antispam sourceNameMetaField := pipeline.DefaultSourceNameMetaField avgInputEventSize := pipeline.DefaultAvgInputEventSize maxInputEventSize := pipeline.DefaultMaxInputEventSize @@ -88,28 +88,19 @@ func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings { eventTimeout = i } + antispamThreshold = settings.Get("antispam_threshold").MustInt() + antispamThreshold *= int(maintenanceInterval / time.Second) + if antispamThreshold < 0 { + logger.Warn("negative antispam_threshold value, antispam disabled") + antispamThreshold = 0 + } + var err error - antispamV2, err = antispam.ExtractV2(settings) + antispamExceptions, err = extractAntispamExceptions(settings) if err != nil { - logger.Warnf( - "try to fallback to legacy antispam; can't get new antispam config: %s", - err.Error(), - ) - - antispamThreshold = settings.Get("antispam_threshold").MustInt() - antispamThreshold *= int(maintenanceInterval / time.Second) - if antispamThreshold < 0 { - logger.Warn("negative antispam_threshold value, antispam disabled") - antispamThreshold = 0 - } - - var err error - antispamExceptions, err = extractAntispamExceptions(settings) - if err != nil { - logger.Fatalf("extract exceptions: %s", err) - } - antispamExceptions.Prepare() + logger.Fatalf("extract exceptions: %s", err) } + antispamExceptions.Prepare() sourceNameMetaField = settings.Get("source_name_meta_field").MustString() isStrict = settings.Get("is_strict").MustBool() @@ -139,7 +130,7 @@ func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings { CutOffEventByLimitField: cutOffEventByLimitField, AntispamThreshold: antispamThreshold, AntispamExceptions: antispamExceptions, - Antispam: antispamV2, + Antispam: antispammer, SourceNameMetaField: sourceNameMetaField, MaintenanceInterval: maintenanceInterval, EventTimeout: eventTimeout, diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index b41cbfddb..0cd76504b 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -14,8 +14,8 @@ import ( ) const ( - limitValueUnlimited = -1 - limitValueBlocked = 0 + thresholdUnlimited = -1 + thresholdBlocked = 0 ) // Antispammer makes a decision on the need to parse the input log. @@ -186,23 +186,23 @@ func (a *Antispammer) isSpamNew( for i := range a.antispam.rules { rule := a.antispam.rules[i] if rule.Condition.check(event, []byte(name), meta) { - switch rule.Limit { - case limitValueUnlimited: + switch rule.Threshold { + case thresholdUnlimited: return false - case limitValueBlocked: + case thresholdBlocked: return true default: - key = rule.MapKey + key = rule.MetaKey ruleIndex = i break } } } - switch a.antispam.defaultLimit { - case limitValueUnlimited: + switch a.antispam.defThreshold { + case thresholdUnlimited: return false - case limitValueBlocked: + case thresholdBlocked: return true } @@ -251,10 +251,10 @@ func (a *Antispammer) isSpamNew( } if ruleIndex == -1 { - return x >= int32(a.antispam.defaultLimit) + return x >= int32(a.antispam.defThreshold) } - return x >= int32(a.antispam.rules[ruleIndex].Limit) + return x >= int32(a.antispam.rules[ruleIndex].Threshold) } func (a *Antispammer) Maintenance() { @@ -333,16 +333,16 @@ func (e Exceptions) Prepare() { type Antispam struct { rules []Rule - defaultLimit int + defThreshold int enabled bool } -func NewAntispam(defaultLimit int, rules []Rule) (*Antispam, error) { - if err := checkLimit(defaultLimit); err != nil { +func NewAntispam(defThreshold int, rules []Rule) (*Antispam, error) { + if err := checkThreshold(defThreshold); err != nil { return nil, err } - if defaultLimit == -1 && len(rules) == 0 { + if defThreshold == -1 && len(rules) == 0 { return &Antispam{enabled: false}, nil } @@ -352,7 +352,7 @@ func NewAntispam(defaultLimit int, rules []Rule) (*Antispam, error) { return &Antispam{ rules: rules, - defaultLimit: defaultLimit, + defThreshold: defThreshold, enabled: true, }, nil } diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index 02c520313..4e6a2784c 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -8,27 +8,7 @@ import ( ) func ExtractV2(jsonNode *simplejson.Json) (*Antispam, error) { - rules, err := extractRules(jsonNode) - if err != nil { - return nil, err - } - - defLimiter, ok := jsonNode.CheckGet("default") - if !ok { - return nil, fmt.Errorf(`field "default" not found`) - } - - defLimit, ok := defLimiter.CheckGet("limit") - if !ok { - return nil, fmt.Errorf(`field "limit" not found`) - } - - limit, err := defLimit.Int() - if err != nil { - return nil, err - } - - return NewAntispam(limit, rules) + panic("not impl") } func extractRules(jsonNode *simplejson.Json) ([]Rule, error) { diff --git a/pipeline/antispam/logical_node.go b/pipeline/antispam/logical_node.go index 0aacfe782..87c4601d2 100644 --- a/pipeline/antispam/logical_node.go +++ b/pipeline/antispam/logical_node.go @@ -62,7 +62,7 @@ func newLogicalNode(op string, operands []Node) (Node, error) { }, nil } -func (n *logicalNode) Type() nodeType { +func (n *logicalNode) getType() nodeType { return nodeTypeLogical } diff --git a/pipeline/antispam/node.go b/pipeline/antispam/node.go index ef9c95c15..fe10f0a0c 100644 --- a/pipeline/antispam/node.go +++ b/pipeline/antispam/node.go @@ -8,6 +8,6 @@ const ( ) type Node interface { - Type() nodeType + getType() nodeType check(event []byte, sourceName []byte, metadata map[string]string) bool } diff --git a/pipeline/antispam/rule.go b/pipeline/antispam/rule.go index 61bc50832..94d457440 100644 --- a/pipeline/antispam/rule.go +++ b/pipeline/antispam/rule.go @@ -4,28 +4,29 @@ import "fmt" type Rule struct { Condition Node - Limit int - MapKey string + Threshold int + MetaKey string } func (r *Rule) Prepare(id int) { - r.MapKey = fmt.Sprintf("#=%d=#", id) + r.MetaKey = fmt.Sprintf("#=%d=#", id) } -func checkLimit(limit int) error { - if limit < -1 { - return fmt.Errorf("invalid limit: %d", limit) +func checkThreshold(threshold int) error { + if threshold < -1 { + return fmt.Errorf("invalid threshold: %d", threshold) } + return nil } -func newRule(condition Node, limit int) (Rule, error) { - if err := checkLimit(limit); err != nil { +func newRule(condition Node, threshold int) (Rule, error) { + if err := checkThreshold(threshold); err != nil { return Rule{}, err } return Rule{ Condition: condition, - Limit: limit, + Threshold: threshold, }, nil } diff --git a/pipeline/antispam/value_node.go b/pipeline/antispam/value_node.go index a12ece761..4ed8b99d9 100644 --- a/pipeline/antispam/value_node.go +++ b/pipeline/antispam/value_node.go @@ -84,7 +84,7 @@ func newValueNode( }, nil } -func (n *valueNode) Type() nodeType { +func (n *valueNode) getType() nodeType { return nodeTypeValue } From 5baea95d50118716d314972317270295a3014338 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 14 Jul 2025 15:18:39 +0300 Subject: [PATCH 25/75] Add test utils. Change meta tag parsing --- pipeline/antispam/ctor_utils.go | 43 +++++++++++++++++++++++++++++++++ pipeline/antispam/value_node.go | 30 ++++++++++------------- 2 files changed, 56 insertions(+), 17 deletions(-) create mode 100644 pipeline/antispam/ctor_utils.go diff --git a/pipeline/antispam/ctor_utils.go b/pipeline/antispam/ctor_utils.go new file mode 100644 index 000000000..7fc037538 --- /dev/null +++ b/pipeline/antispam/ctor_utils.go @@ -0,0 +1,43 @@ +package antispam + +import ( + "encoding/json" + "fmt" +) + +func getAny(node map[string]any, field string) any { + res, has := node[field] + if !has { + panic(fmt.Sprintf("field %q not found", field)) + } + + return res +} + +func get[T any](node map[string]any, field string) T { + fieldNode := getAny(node, field) + + result, ok := fieldNode.(T) + if !ok { + panic(fmt.Sprintf("field %q type mismatch: expected=%T got=%T", field, *new(T), fieldNode)) + } + + return result +} + +func anyToInt(v any) int { + switch vNum := v.(type) { + case int: + return vNum + case float64: + return int(vNum) + case json.Number: + vInt64, err := vNum.Int64() + if err != nil { + panic(err.Error()) + } + return int(vInt64) + default: + panic(fmt.Sprintf("type=%T not convertable to int", v)) + } +} diff --git a/pipeline/antispam/value_node.go b/pipeline/antispam/value_node.go index 4ed8b99d9..bef9b0500 100644 --- a/pipeline/antispam/value_node.go +++ b/pipeline/antispam/value_node.go @@ -1,8 +1,8 @@ package antispam import ( - "errors" "fmt" + "strings" "github.com/ozontech/file.d/pipeline/checker" ) @@ -32,18 +32,20 @@ const ( dataTypeEventTag = "event" dataTypeSourceNameTag = "source_name" dataTypeMetaTag = "meta" + + metaTagPrefix = "meta:" ) -func stringToDataType(s string) (dataType, error) { - switch s { - case dataTypeEventTag: - return dataTypeEvent, nil - case dataTypeSourceNameTag: - return dataTypeSourceName, nil - case dataTypeMetaTag: - return dataTypeMeta, nil +func stringToDataType(s string) (dataType, string, error) { + switch { + case s == dataTypeEventTag: + return dataTypeEvent, "", nil + case s == dataTypeSourceNameTag: + return dataTypeSourceName, "", nil + case strings.HasPrefix(s, metaTagPrefix): + return dataTypeMeta, strings.TrimPrefix(s, metaTagPrefix), nil default: - return -1, fmt.Errorf("unknown checked type data: %s", s) + return -1, "", fmt.Errorf("unparsable check data tag: %s", s) } } @@ -66,17 +68,11 @@ func newValueNode( } var dType dataType - dType, err = stringToDataType(checkDataTag) + dType, metaKey, err = stringToDataType(checkDataTag) if err != nil { return nil, err } - if dType == dataTypeMeta { - if metaKey == "" { - return nil, errors.New("empty meta key") - } - } - return &valueNode{ dataType: dType, metaKey: metaKey, From 90210f4322f81a410ffa18a9c8995b667048acaa Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 14 Jul 2025 16:15:32 +0300 Subject: [PATCH 26/75] Refactor --- pipeline/antispam/ctor.go | 116 ------------------------------ pipeline/antispam/ctor_utils.go | 10 +++ pipeline/antispam/logical_node.go | 17 ++--- pipeline/antispam/value_node.go | 22 +++--- 4 files changed, 25 insertions(+), 140 deletions(-) diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index 4e6a2784c..258ec67fd 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -1,117 +1 @@ package antispam - -import ( - "errors" - "fmt" - - "github.com/bitly/go-simplejson" -) - -func ExtractV2(jsonNode *simplejson.Json) (*Antispam, error) { - panic("not impl") -} - -func extractRules(jsonNode *simplejson.Json) ([]Rule, error) { - rules := jsonNode.Get("rules") - - result := make([]Rule, 0) - for i := range rules.MustArray() { - ruleRaw := rules.GetIndex(i) - rule, err := extractRule(ruleRaw) - if err != nil { - return nil, fmt.Errorf("extract rule: %w", err) - } - - result = append(result, rule) - } - - return result, nil -} - -func extractRule(jsonNode *simplejson.Json) (Rule, error) { - condition, err := extractNode(jsonNode.Get("cond")) - if err != nil { - return Rule{}, fmt.Errorf("extract cond: %w", err) - } - - limit, err := jsonNode.Get("limit").Int() - if err != nil { - return Rule{}, fmt.Errorf("limit is not int: %w", err) - } - - return newRule(condition, limit) -} - -func extractNode(jsonNode *simplejson.Json) (Node, error) { - switch op := jsonNode.Get("op").MustString(); op { - case "and", "or", "not": - return extractLogicalNode(op, jsonNode) - case - "equal", - "contains", - "prefix", - "suffix", - "regex": - return extractValueNode(op, jsonNode) - default: - return nil, fmt.Errorf("unknown op: %s", op) - } -} - -func extractLogicalNode(op string, jsonNode *simplejson.Json) (Node, error) { - rawOperands := jsonNode.Get("operands") - - operands := make([]Node, 0) - for i := range rawOperands.MustArray() { - opNode := rawOperands.GetIndex(i) - operand, err := extractNode(opNode) - if err != nil { - return nil, fmt.Errorf("extract operand for logical op %q: %w", op, err) - } - operands = append(operands, operand) - } - - result, err := newLogicalNode(op, operands) - if err != nil { - return nil, fmt.Errorf("init logical node: %w", err) - } - - return result, nil -} - -func extractValueNode(op string, jsonNode *simplejson.Json) (Node, error) { - caseSensitive := jsonNode.Get("case_sensitive").MustBool(true) - checkDataTag := jsonNode.Get("data").MustString() - metaKey := jsonNode.Get("meta_key").MustString() - - values, err := extractFieldOpVals(jsonNode) - if err != nil { - return nil, fmt.Errorf("extract values: %w", err) - } - - result, err := newValueNode(op, caseSensitive, values, checkDataTag, metaKey) - if err != nil { - return nil, fmt.Errorf("init value node: %w", err) - } - - return result, nil -} - -func extractFieldOpVals(jsonNode *simplejson.Json) ([][]byte, error) { - values, has := jsonNode.CheckGet("values") - if !has { - return nil, errors.New(`field "values" not found'`) - } - - result := make([][]byte, 0) - for i := range values.MustArray() { - curValue, err := values.GetIndex(i).String() - if err != nil { - return nil, err - } - - result = append(result, []byte(curValue)) - } - - return result, nil -} diff --git a/pipeline/antispam/ctor_utils.go b/pipeline/antispam/ctor_utils.go index 7fc037538..a310bbf32 100644 --- a/pipeline/antispam/ctor_utils.go +++ b/pipeline/antispam/ctor_utils.go @@ -5,6 +5,16 @@ import ( "fmt" ) +func assert(b bool, msg string) { + if !b { + panic(msg) + } +} + +func assertEqual[T comparable](a, b T, msg string) { + assert(a == b, fmt.Sprintf("%s: %v != %v", msg, a, b)) +} + func getAny(node map[string]any, field string) any { res, has := node[field] if !has { diff --git a/pipeline/antispam/logical_node.go b/pipeline/antispam/logical_node.go index 87c4601d2..749feb60f 100644 --- a/pipeline/antispam/logical_node.go +++ b/pipeline/antispam/logical_node.go @@ -1,7 +1,6 @@ package antispam import ( - "errors" "fmt" ) @@ -37,10 +36,8 @@ type logicalNode struct { operands []Node } -func newLogicalNode(op string, operands []Node) (Node, error) { - if len(operands) == 0 { - return nil, errors.New("logical op must have at least one operand") - } +func newLogicalNode(op string, operands []Node) *logicalNode { + assert(len(operands) > 0, "logical op must have at least one operand") var lop logicalOpType switch op { @@ -50,16 +47,15 @@ func newLogicalNode(op string, operands []Node) (Node, error) { lop = logicalAnd case logicalNotTag: lop = logicalNot - if len(operands) > 1 { - return nil, fmt.Errorf("logical not must have exactly one operand, got %d", len(operands)) - } + assert(len(operands) == 1, fmt.Sprintf("logical not must have exactly one operand, got %d", len(operands))) default: - return nil, fmt.Errorf("unknown logical op %q", op) + panic(fmt.Sprintf("unknown logical op %q", op)) } + return &logicalNode{ op: lop, operands: operands, - }, nil + } } func (n *logicalNode) getType() nodeType { @@ -82,7 +78,6 @@ func (n *logicalNode) check(event []byte, sourceName []byte, metadata map[string } } return false - case logicalNot: return !n.operands[0].check(event, sourceName, metadata) default: diff --git a/pipeline/antispam/value_node.go b/pipeline/antispam/value_node.go index bef9b0500..92656f178 100644 --- a/pipeline/antispam/value_node.go +++ b/pipeline/antispam/value_node.go @@ -36,16 +36,16 @@ const ( metaTagPrefix = "meta:" ) -func stringToDataType(s string) (dataType, string, error) { +func stringToDataType(s string) (dataType, string) { switch { case s == dataTypeEventTag: - return dataTypeEvent, "", nil + return dataTypeEvent, "" case s == dataTypeSourceNameTag: - return dataTypeSourceName, "", nil + return dataTypeSourceName, "" case strings.HasPrefix(s, metaTagPrefix): - return dataTypeMeta, strings.TrimPrefix(s, metaTagPrefix), nil + return dataTypeMeta, strings.TrimPrefix(s, metaTagPrefix) default: - return -1, "", fmt.Errorf("unparsable check data tag: %s", s) + panic(fmt.Sprintf("unparsable check data tag: %s", s)) } } @@ -61,23 +61,19 @@ func newValueNode( values [][]byte, checkDataTag string, metaKey string, -) (*valueNode, error) { +) *valueNode { c, err := checker.New(opTag, caseSensitive, values) if err != nil { - return nil, fmt.Errorf("init checker: %w", err) + panic(fmt.Sprintf("init checker: %s", err.Error())) } - var dType dataType - dType, metaKey, err = stringToDataType(checkDataTag) - if err != nil { - return nil, err - } + dType, metaKey := stringToDataType(checkDataTag) return &valueNode{ dataType: dType, metaKey: metaKey, checker: c, - }, nil + } } func (n *valueNode) getType() nodeType { From d9b80eabc61a879af7b368231a3116f11731cdfa Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 14 Jul 2025 19:54:13 +0300 Subject: [PATCH 27/75] Separate logic op. Refactor --- pipeline/do_if/README.idoc.md | 2 +- pipeline/do_if/README.md | 12 +- pipeline/do_if/do_if_test.go | 3 +- pipeline/do_if/logical_op.go | 231 ++++++++++++++++------------------ pipeline/logic/logic.go | 43 +++++++ 5 files changed, 157 insertions(+), 134 deletions(-) create mode 100644 pipeline/logic/logic.go diff --git a/pipeline/do_if/README.idoc.md b/pipeline/do_if/README.idoc.md index 0468988de..d7f2028b1 100644 --- a/pipeline/do_if/README.idoc.md +++ b/pipeline/do_if/README.idoc.md @@ -18,7 +18,7 @@ the chain of Match func calls are performed across the whole tree. @do-if-logical-op-node ## Logical operations -@do-if-logical-op|description +@do-if-logical-op ## Length comparison op node @do-if-len-cmp-op-node diff --git a/pipeline/do_if/README.md b/pipeline/do_if/README.md index 4d06da72c..c7108e829 100755 --- a/pipeline/do_if/README.md +++ b/pipeline/do_if/README.md @@ -207,7 +207,7 @@ pipelines: ## Logical operations -**`Or`** accepts at least one operand and returns true on the first returned true from its operands. +Operation `or` accepts at least one operand and returns true on the first returned true from its operands. Example: ```yaml @@ -226,7 +226,7 @@ pipelines: values: [test-service] ``` -result: +Result: ``` {"pod":"test-pod-1","service":"test-service"} # discarded {"pod":"test-pod-2","service":"test-service-2"} # discarded @@ -236,7 +236,7 @@ result:
-**`And`** accepts at least one operand and returns true if all operands return true +Operation `and` accepts at least one operand and returns true if all operands return true (in other words returns false on the first returned false from its operands). Example: @@ -256,7 +256,7 @@ pipelines: values: [test-service] ``` -result: +Result: ``` {"pod":"test-pod-1","service":"test-service"} # discarded {"pod":"test-pod-2","service":"test-service-2"} # not discarded @@ -266,7 +266,7 @@ result:
-**`Not`** accepts exactly one operand and returns inverted result of its operand. +Operation `not` accepts exactly one operand and returns inverted result of its operand. Example: ```yaml @@ -282,7 +282,7 @@ pipelines: values: [test-service] ``` -result: +Result: ``` {"pod":"test-pod-1","service":"test-service"} # not discarded {"pod":"test-pod-2","service":"test-service-2"} # discarded diff --git a/pipeline/do_if/do_if_test.go b/pipeline/do_if/do_if_test.go index cc2835302..957bdd292 100644 --- a/pipeline/do_if/do_if_test.go +++ b/pipeline/do_if/do_if_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/ozontech/file.d/pipeline/checker" + "github.com/ozontech/file.d/pipeline/logic" insaneJSON "github.com/ozontech/insane-json" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -187,7 +188,7 @@ func TestBuildNodes(t *testing.T) { }, }, want: &logicalNode{ - op: logicalOr, + op: logic.Or, operands: []Node{ &fieldOpNode{ fieldPath: []string{"log", "pod"}, diff --git a/pipeline/do_if/logical_op.go b/pipeline/do_if/logical_op.go index 2fbdb885a..6fb05b7a5 100644 --- a/pipeline/do_if/logical_op.go +++ b/pipeline/do_if/logical_op.go @@ -4,118 +4,10 @@ import ( "errors" "fmt" + "github.com/ozontech/file.d/pipeline/logic" insaneJSON "github.com/ozontech/insane-json" ) -// ! do-if-logical-op -// ^ do-if-logical-op - -type logicalOpType int - -const ( - logicalOpUnknown logicalOpType = iota - logicalOr - logicalAnd - logicalNot -) - -func (t logicalOpType) String() string { - switch t { - case logicalOr: - return "or" - case logicalAnd: - return "and" - case logicalNot: - return "not" - default: - return "unknown" - } -} - -const ( - // > accepts at least one operand and returns true on the first returned true from its operands. - // > - // > Example: - // > ```yaml - // > pipelines: - // > test: - // > actions: - // > - type: discard - // > do_if: - // > op: or - // > operands: - // > - op: equal - // > field: pod - // > values: [test-pod-1, test-pod-2] - // > - op: equal - // > field: service - // > values: [test-service] - // > ``` - // > - // > result: - // > ``` - // > {"pod":"test-pod-1","service":"test-service"} # discarded - // > {"pod":"test-pod-2","service":"test-service-2"} # discarded - // > {"pod":"test-pod","service":"test-service"} # discarded - // > {"pod":"test-pod","service":"test-service-1"} # not discarded - // > ``` - logicalOrTag = "or" // * - - // > accepts at least one operand and returns true if all operands return true - // > (in other words returns false on the first returned false from its operands). - // > - // > Example: - // > ```yaml - // > pipelines: - // > test: - // > actions: - // > - type: discard - // > do_if: - // > op: and - // > operands: - // > - op: equal - // > field: pod - // > values: [test-pod-1, test-pod-2] - // > - op: equal - // > field: service - // > values: [test-service] - // > ``` - // > - // > result: - // > ``` - // > {"pod":"test-pod-1","service":"test-service"} # discarded - // > {"pod":"test-pod-2","service":"test-service-2"} # not discarded - // > {"pod":"test-pod","service":"test-service"} # not discarded - // > {"pod":"test-pod","service":"test-service-1"} # not discarded - // > ``` - logicalAndTag = "and" // * - - // > accepts exactly one operand and returns inverted result of its operand. - // > - // > Example: - // > ```yaml - // > pipelines: - // > test: - // > actions: - // > - type: discard - // > do_if: - // > op: not - // > operands: - // > - op: equal - // > field: service - // > values: [test-service] - // > ``` - // > - // > result: - // > ``` - // > {"pod":"test-pod-1","service":"test-service"} # not discarded - // > {"pod":"test-pod-2","service":"test-service-2"} # discarded - // > {"pod":"test-pod","service":"test-service"} # not discarded - // > {"pod":"test-pod","service":"test-service-1"} # discarded - // > ``` - logicalNotTag = "not" // * -) - /*{ do-if-logical-op-node DoIf logical op node is a node considered to be the root or an edge between nodes. It always has at least one operand which are other nodes and calls their checks @@ -146,8 +38,96 @@ pipelines: }*/ +/*{ do-if-logical-op +Operation `or` accepts at least one operand and returns true on the first returned true from its operands. + +Example: +```yaml +pipelines: + test: + actions: + - type: discard + do_if: + op: or + operands: + - op: equal + field: pod + values: [test-pod-1, test-pod-2] + - op: equal + field: service + values: [test-service] +``` + +Result: +``` +{"pod":"test-pod-1","service":"test-service"} # discarded +{"pod":"test-pod-2","service":"test-service-2"} # discarded +{"pod":"test-pod","service":"test-service"} # discarded +{"pod":"test-pod","service":"test-service-1"} # not discarded +``` + +
+ +Operation `and` accepts at least one operand and returns true if all operands return true +(in other words returns false on the first returned false from its operands). + +Example: +```yaml +pipelines: + test: + actions: + - type: discard + do_if: + op: and + operands: + - op: equal + field: pod + values: [test-pod-1, test-pod-2] + - op: equal + field: service + values: [test-service] +``` + +Result: +``` +{"pod":"test-pod-1","service":"test-service"} # discarded +{"pod":"test-pod-2","service":"test-service-2"} # not discarded +{"pod":"test-pod","service":"test-service"} # not discarded +{"pod":"test-pod","service":"test-service-1"} # not discarded +``` + +
+ +Operation `not` accepts exactly one operand and returns inverted result of its operand. + +Example: +```yaml +pipelines: + test: + actions: + - type: discard + do_if: + op: not + operands: + - op: equal + field: service + values: [test-service] +``` + +Result: +``` +{"pod":"test-pod-1","service":"test-service"} # not discarded +{"pod":"test-pod-2","service":"test-service-2"} # discarded +{"pod":"test-pod","service":"test-service"} # not discarded +{"pod":"test-pod","service":"test-service-1"} # discarded +``` + +
+ +}*/ + type logicalNode struct { - op logicalOpType + op logic.Op operands []Node } @@ -155,22 +135,20 @@ func newLogicalNode(op string, operands []Node) (Node, error) { if len(operands) == 0 { return nil, errors.New("logical op must have at least one operand") } - var lop logicalOpType - switch op { - case logicalOrTag: - lop = logicalOr - case logicalAndTag: - lop = logicalAnd - case logicalNotTag: - lop = logicalNot - if len(operands) > 1 { + + logicOp, err := logic.StringToOp(op) + if err != nil { + return nil, err + } + + if logicOp == logic.Not { + if len(operands) != 1 { return nil, fmt.Errorf("logical not must have exactly one operand, got %d", len(operands)) } - default: - return nil, fmt.Errorf("unknown logical op %q", op) } + return &logicalNode{ - op: lop, + op: logicOp, operands: operands, }, nil } @@ -181,24 +159,25 @@ func (n *logicalNode) Type() nodeType { func (n *logicalNode) check(eventRoot *insaneJSON.Root) bool { switch n.op { - case logicalOr: + case logic.Or: for _, op := range n.operands { if op.check(eventRoot) { return true } } return false - case logicalAnd: + case logic.And: for _, op := range n.operands { if !op.check(eventRoot) { return false } } return true - case logicalNot: + case logic.Not: return !n.operands[0].check(eventRoot) + default: + panic("unknown logical op") } - return false } func (n *logicalNode) isEqualTo(n2 Node, level int) error { diff --git a/pipeline/logic/logic.go b/pipeline/logic/logic.go new file mode 100644 index 000000000..fe5e9af5a --- /dev/null +++ b/pipeline/logic/logic.go @@ -0,0 +1,43 @@ +package logic + +import "fmt" + +type Op int + +const ( + And Op = iota + Or + Not +) + +const ( + AndTag = "and" + OrTag = "or" + NotTag = "not" +) + +func (op Op) String() string { + switch op { + case And: + return AndTag + case Or: + return OrTag + case Not: + return NotTag + default: + return "unknown" + } +} + +func StringToOp(s string) (Op, error) { + switch s { + case AndTag: + return And, nil + case OrTag: + return Or, nil + case NotTag: + return Not, nil + default: + return -1, fmt.Errorf("unknown logic op tag: %s", s) + } +} From bdf694275cc2eb0313d612c12a645dfb6f5b2399 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 14 Jul 2025 20:25:34 +0300 Subject: [PATCH 28/75] Refactor --- pipeline/antispam/logical_node.go | 68 ++++++++++--------------------- 1 file changed, 22 insertions(+), 46 deletions(-) diff --git a/pipeline/antispam/logical_node.go b/pipeline/antispam/logical_node.go index 749feb60f..79a9be901 100644 --- a/pipeline/antispam/logical_node.go +++ b/pipeline/antispam/logical_node.go @@ -1,61 +1,37 @@ package antispam import ( + "errors" "fmt" -) - -type logicalOpType int - -const ( - logicalAnd logicalOpType = iota - logicalOr - logicalNot -) - -func (t logicalOpType) String() string { - switch t { - case logicalAnd: - return "and" - case logicalOr: - return "or" - case logicalNot: - return "not" - default: - return "unknown" - } -} -const ( - logicalAndTag = "and" - logicalOrTag = "or" - logicalNotTag = "not" + "github.com/ozontech/file.d/pipeline/logic" ) type logicalNode struct { - op logicalOpType + op logic.Op operands []Node } -func newLogicalNode(op string, operands []Node) *logicalNode { - assert(len(operands) > 0, "logical op must have at least one operand") +func newLogicalNode(op string, operands []Node) (*logicalNode, error) { + if len(operands) == 0 { + return nil, errors.New("logical op must have at least one operand") + } - var lop logicalOpType - switch op { - case logicalOrTag: - lop = logicalOr - case logicalAndTag: - lop = logicalAnd - case logicalNotTag: - lop = logicalNot - assert(len(operands) == 1, fmt.Sprintf("logical not must have exactly one operand, got %d", len(operands))) - default: - panic(fmt.Sprintf("unknown logical op %q", op)) + logicOp, err := logic.StringToOp(op) + if err != nil { + return nil, err + } + + if logicOp == logic.Not { + if len(operands) != 1 { + return nil, fmt.Errorf("logical not must have exactly one operand, got %d", len(operands)) + } } return &logicalNode{ - op: lop, + op: logicOp, operands: operands, - } + }, nil } func (n *logicalNode) getType() nodeType { @@ -64,23 +40,23 @@ func (n *logicalNode) getType() nodeType { func (n *logicalNode) check(event []byte, sourceName []byte, metadata map[string]string) bool { switch n.op { - case logicalAnd: + case logic.And: for _, op := range n.operands { if !op.check(event, sourceName, metadata) { return false } } return true - case logicalOr: + case logic.Or: for _, op := range n.operands { if op.check(event, sourceName, metadata) { return true } } return false - case logicalNot: + case logic.Not: return !n.operands[0].check(event, sourceName, metadata) default: - panic(fmt.Sprintf("unknown logical op: %v", n.op)) + panic("unknown logical op: %v") } } From 7ba838ed13db2cf131d93dd0358f2e36f0106bee Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 14 Jul 2025 20:36:55 +0300 Subject: [PATCH 29/75] Refactor --- pipeline/antispam/ctor_utils.go | 10 ---------- pipeline/do_if/ctor.go | 18 ++++++++++++------ 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/pipeline/antispam/ctor_utils.go b/pipeline/antispam/ctor_utils.go index a310bbf32..7fc037538 100644 --- a/pipeline/antispam/ctor_utils.go +++ b/pipeline/antispam/ctor_utils.go @@ -5,16 +5,6 @@ import ( "fmt" ) -func assert(b bool, msg string) { - if !b { - panic(msg) - } -} - -func assertEqual[T comparable](a, b T, msg string) { - assert(a == b, fmt.Sprintf("%s: %v != %v", msg, a, b)) -} - func getAny(node map[string]any, field string) any { res, has := node[field] if !has { diff --git a/pipeline/do_if/ctor.go b/pipeline/do_if/ctor.go index 290d999fb..7f399a106 100644 --- a/pipeline/do_if/ctor.go +++ b/pipeline/do_if/ctor.go @@ -4,6 +4,9 @@ import ( "errors" "fmt" "time" + + "github.com/ozontech/file.d/pipeline/checker" + "github.com/ozontech/file.d/pipeline/logic" ) const ( @@ -51,14 +54,17 @@ func extractDoIfNode(node map[string]any) (Node, error) { } switch opName { - case "and", "or", "not": + case + logic.AndTag, + logic.OrTag, + logic.NotTag: return extractLogicalOpNode(opName, node) case - "equal", - "contains", - "prefix", - "suffix", - "regex": + checker.OpEqualTag, + checker.OpContainsTag, + checker.OpPrefixTag, + checker.OpSuffixTag, + checker.OpRegexTag: return extractFieldOpNode(opName, node) case "byte_len_cmp", From a85e7ba604b5b91c63bafc616f0d16bc9bc633db Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 14 Jul 2025 20:41:49 +0300 Subject: [PATCH 30/75] Edit doc meta file --- Insanedocfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Insanedocfile b/Insanedocfile index b6f18e50c..45cc77686 100644 --- a/Insanedocfile +++ b/Insanedocfile @@ -3,13 +3,11 @@ extractors: fn-list: '"fn-list" #4 /Plugin\)\s(.+)\s{/' match-modes: '"match-modes" /MatchMode(.*),/ /\"(.*)\"/' do-if-node: '"do-if-node" /Node(\w+)\s/' - do-if-logical-op: '"do-if-logical-op" /logical(\w+)Tag\s/' decorators: config-params: '_ _ /*`%s`* / /*`default=%s`* / /*`%s`* / /*`options=%s`* /' fn-list: '_ _ /`%s`/' match-modes: '_ /%s/ /`match_mode: %s`/' do-if-node: '_ /%s/' - do-if-logical-op: '_ /%s/' templates: - template: docs/*.idoc.md files: ["../pipeline/*.go"] From 4aa922ae7c42706c9549a1c3a2d0af3034ec4892 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 14 Jul 2025 21:28:37 +0300 Subject: [PATCH 31/75] Extract value node --- pipeline/antispam/ctor.go | 85 +++++++++++++++++++++++++++++++++ pipeline/antispam/ctor_utils.go | 40 +++++++++++----- pipeline/antispam/value_node.go | 22 +++++---- 3 files changed, 124 insertions(+), 23 deletions(-) diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index 258ec67fd..b28a3552c 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -1 +1,86 @@ package antispam + +import ( + "errors" + "fmt" + "strings" + + "github.com/ozontech/file.d/pipeline/checker" + "github.com/ozontech/file.d/pipeline/logic" +) + +const ( + fieldNameOp = "op" + + fieldNameData = "data" + fieldNameCaseSensitive = "case_sensitive" +) + +func extractNode(node map[string]any) (Node, error) { + opName, err := get[string](node, fieldNameOp) + if err != nil { + return nil, err + } + + switch opName { + case + logic.AndTag, + logic.OrTag, + logic.NotTag: + return extractLogicalNode(opName, node) + case + checker.OpEqualTag, + checker.OpContainsTag, + checker.OpPrefixTag, + checker.OpSuffixTag, + checker.OpRegexTag: + return extractValueNode(opName, node) + default: + return nil, fmt.Errorf("unknown op: %s", opName) + } +} + +func extractLogicalNode(op string, node map[string]any) (Node, error) { + panic("not impl") +} + +func extractValueNode(op string, node map[string]any) (Node, error) { + dataTag, err := get[string](node, fieldNameData) + if err != nil { + return nil, err + } + + caseSensitive := true + caseSensitiveNode, err := get[bool](node, fieldNameCaseSensitive) + if err == nil { + caseSensitive = caseSensitiveNode + } else if errors.Is(err, errTypeMismatch) { + return nil, err + } + + values, err := extractValues(node) + if err != nil { + return nil, fmt.Errorf("extract values: %w", err) + } + + return newValueNode(op, caseSensitive, values, dataTag) +} + +func extractValues(node map[string]any) ([][]byte, error) { + rawValues, err := get[[]any](node, "values") + if err != nil { + return nil, err + } + + values := make([][]byte, 0, len(rawValues)) + for _, rawValue := range rawValues { + value, ok := rawValue.(string) + if !ok { + return nil, fmt.Errorf("type=%T is not string", rawValue) + } + + values = append(values, []byte(strings.Clone(value))) + } + + return values, nil +} diff --git a/pipeline/antispam/ctor_utils.go b/pipeline/antispam/ctor_utils.go index 7fc037538..f949971ef 100644 --- a/pipeline/antispam/ctor_utils.go +++ b/pipeline/antispam/ctor_utils.go @@ -2,42 +2,56 @@ package antispam import ( "encoding/json" + "errors" "fmt" ) -func getAny(node map[string]any, field string) any { +var ( + errFieldNotFound = errors.New("field not found") + errTypeMismatch = errors.New("type mismatch") +) + +func getAny(node map[string]any, field string) (any, error) { res, has := node[field] if !has { - panic(fmt.Sprintf("field %q not found", field)) + return nil, fmt.Errorf("field=%q: %w", field, errFieldNotFound) } - return res + return res, nil } -func get[T any](node map[string]any, field string) T { - fieldNode := getAny(node, field) +func get[T any](node map[string]any, field string) (T, error) { + var def T + + fieldNode, err := getAny(node, field) + if err != nil { + return def, err + } result, ok := fieldNode.(T) if !ok { - panic(fmt.Sprintf("field %q type mismatch: expected=%T got=%T", field, *new(T), fieldNode)) + return def, fmt.Errorf( + "field=%q expected=%T got=%T: %w", + field, def, fieldNode, errTypeMismatch, + ) } - return result + return result, nil } -func anyToInt(v any) int { +func anyToInt(v any) (int, error) { switch vNum := v.(type) { case int: - return vNum + return vNum, nil case float64: - return int(vNum) + return int(vNum), nil case json.Number: vInt64, err := vNum.Int64() if err != nil { - panic(err.Error()) + return 0, err } - return int(vInt64) + return int(vInt64), nil default: - panic(fmt.Sprintf("type=%T not convertable to int", v)) + return 0, fmt.Errorf("type=%T not convertable to int", v) } } diff --git a/pipeline/antispam/value_node.go b/pipeline/antispam/value_node.go index 92656f178..fc39ff238 100644 --- a/pipeline/antispam/value_node.go +++ b/pipeline/antispam/value_node.go @@ -36,16 +36,16 @@ const ( metaTagPrefix = "meta:" ) -func stringToDataType(s string) (dataType, string) { +func stringToDataType(s string) (dataType, string, error) { switch { case s == dataTypeEventTag: - return dataTypeEvent, "" + return dataTypeEvent, "", nil case s == dataTypeSourceNameTag: - return dataTypeSourceName, "" + return dataTypeSourceName, "", nil case strings.HasPrefix(s, metaTagPrefix): - return dataTypeMeta, strings.TrimPrefix(s, metaTagPrefix) + return dataTypeMeta, strings.TrimPrefix(s, metaTagPrefix), nil default: - panic(fmt.Sprintf("unparsable check data tag: %s", s)) + return -1, "", fmt.Errorf("unparsable check data tag: %s", s) } } @@ -60,20 +60,22 @@ func newValueNode( caseSensitive bool, values [][]byte, checkDataTag string, - metaKey string, -) *valueNode { +) (*valueNode, error) { c, err := checker.New(opTag, caseSensitive, values) if err != nil { - panic(fmt.Sprintf("init checker: %s", err.Error())) + return nil, fmt.Errorf("init checker: %w", err) } - dType, metaKey := stringToDataType(checkDataTag) + dType, metaKey, err := stringToDataType(checkDataTag) + if err != nil { + return nil, err + } return &valueNode{ dataType: dType, metaKey: metaKey, checker: c, - } + }, nil } func (n *valueNode) getType() nodeType { From 932b8b9600c1937fd63bd9eda0ac4262b2c0b99a Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 14 Jul 2025 23:06:38 +0300 Subject: [PATCH 32/75] Impl logical node ctor --- pipeline/antispam/ctor.go | 37 +++++++++++++++++++++++++++++++--- pipeline/antispam/ctor_test.go | 1 + 2 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 pipeline/antispam/ctor_test.go diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index b28a3552c..b6a77bfcd 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -12,7 +12,10 @@ import ( const ( fieldNameOp = "op" + fieldNameOperands = "operands" + fieldNameData = "data" + fieldNameValues = "values" fieldNameCaseSensitive = "case_sensitive" ) @@ -41,7 +44,35 @@ func extractNode(node map[string]any) (Node, error) { } func extractLogicalNode(op string, node map[string]any) (Node, error) { - panic("not impl") + rawOperands, err := get[[]any](node, fieldNameOperands) + if err != nil { + return nil, err + } + + operands := make([]Node, 0) + + for _, rawOperand := range rawOperands { + operandMap, ok := rawOperand.(map[string]any) + if !ok { + return nil, fmt.Errorf( + "logical node operand type mismatch: expected=map[string]any got=%T", + rawOperand) + } + + operand, err := extractNode(operandMap) + if err != nil { + return nil, fmt.Errorf("extract operand for logical op %q: %w", op, err) + } + + operands = append(operands, operand) + } + + result, err := newLogicalNode(op, operands) + if err != nil { + return nil, fmt.Errorf("init logical node: %w", err) + } + + return result, nil } func extractValueNode(op string, node map[string]any) (Node, error) { @@ -67,7 +98,7 @@ func extractValueNode(op string, node map[string]any) (Node, error) { } func extractValues(node map[string]any) ([][]byte, error) { - rawValues, err := get[[]any](node, "values") + rawValues, err := get[[]any](node, fieldNameValues) if err != nil { return nil, err } @@ -76,7 +107,7 @@ func extractValues(node map[string]any) ([][]byte, error) { for _, rawValue := range rawValues { value, ok := rawValue.(string) if !ok { - return nil, fmt.Errorf("type=%T is not string", rawValue) + return nil, fmt.Errorf("type of value is not string: %T", rawValue) } values = append(values, []byte(strings.Clone(value))) diff --git a/pipeline/antispam/ctor_test.go b/pipeline/antispam/ctor_test.go new file mode 100644 index 000000000..258ec67fd --- /dev/null +++ b/pipeline/antispam/ctor_test.go @@ -0,0 +1 @@ +package antispam From 6c95ba9ddb1e9cc97bbf621830e0bf78c2c04006 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 14 Jul 2025 23:17:03 +0300 Subject: [PATCH 33/75] Rename files --- pipeline/antispam/ctor_test.go | 1 - pipeline/antispam/{logical_node.go => node_logical.go} | 0 pipeline/antispam/{value_node.go => node_value.go} | 0 3 files changed, 1 deletion(-) delete mode 100644 pipeline/antispam/ctor_test.go rename pipeline/antispam/{logical_node.go => node_logical.go} (100%) rename pipeline/antispam/{value_node.go => node_value.go} (100%) diff --git a/pipeline/antispam/ctor_test.go b/pipeline/antispam/ctor_test.go deleted file mode 100644 index 258ec67fd..000000000 --- a/pipeline/antispam/ctor_test.go +++ /dev/null @@ -1 +0,0 @@ -package antispam diff --git a/pipeline/antispam/logical_node.go b/pipeline/antispam/node_logical.go similarity index 100% rename from pipeline/antispam/logical_node.go rename to pipeline/antispam/node_logical.go diff --git a/pipeline/antispam/value_node.go b/pipeline/antispam/node_value.go similarity index 100% rename from pipeline/antispam/value_node.go rename to pipeline/antispam/node_value.go From 49d08cb829a5eb83194f142e184601587025f87b Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 15 Jul 2025 11:07:33 +0300 Subject: [PATCH 34/75] Add rules ctor --- pipeline/antispam/antispammer.go | 2 +- pipeline/antispam/ctor.go | 94 +++++++++++++++++++++++++++++--- pipeline/antispam/ctor_utils.go | 15 ++++- pipeline/antispam/rule.go | 4 +- 4 files changed, 101 insertions(+), 14 deletions(-) diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index 0cd76504b..31df5b34c 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -192,7 +192,7 @@ func (a *Antispammer) isSpamNew( case thresholdBlocked: return true default: - key = rule.MetaKey + key = rule.RLMapKey ruleIndex = i break } diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index b6a77bfcd..1aafae2e9 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -10,6 +10,10 @@ import ( ) const ( + fieldNameRules = "rules" + fieldNameIf = "if" + fieldNameThreshold = "threshold" + fieldNameOp = "op" fieldNameOperands = "operands" @@ -19,6 +23,79 @@ const ( fieldNameCaseSensitive = "case_sensitive" ) +func extractAntispam(node map[string]any) ([]Rule, int, error) { + thresholdNode, err := getAny(node, fieldNameThreshold) + if err != nil { + return nil, 0, err + } + + threshold, err := anyToInt(thresholdNode) + if err != nil { + return nil, 0, err + } + + var rules []Rule + + rawRules, err := get[[]any](node, fieldNameRules) + if err == nil { + rules, err = extractRules(rawRules) + if err == nil { + return nil, 0, err + } + } else if errors.Is(err, errTypeMismatch) { + return nil, 0, err + } + + return rules, threshold, nil +} + +func extractRules(rawRules []any) ([]Rule, error) { + rules := make([]Rule, 0, len(rawRules)) + + for _, rawRule := range rawRules { + ruleNode, err := must[map[string]any](rawRule) + if err != nil { + return nil, fmt.Errorf("rule type mismatch: %w", err) + } + + rule, err := extractRule(ruleNode) + if err != nil { + return nil, err + } + + rules = append(rules, rule) + } + + return rules, nil +} + +func extractRule(node map[string]any) (Rule, error) { + condNode, err := get[map[string]any](node, fieldNameIf) + if err != nil { + return Rule{}, err + } + + cond, err := extractNode(condNode) + if err != nil { + return Rule{}, err + } + + thresholdRaw, err := getAny(node, fieldNameThreshold) + if err != nil { + return Rule{}, err + } + + threshold, err := anyToInt(thresholdRaw) + if err != nil { + return Rule{}, err + } + + return Rule{ + Condition: cond, + Threshold: threshold, + }, nil +} + func extractNode(node map[string]any) (Node, error) { opName, err := get[string](node, fieldNameOp) if err != nil { @@ -52,14 +129,12 @@ func extractLogicalNode(op string, node map[string]any) (Node, error) { operands := make([]Node, 0) for _, rawOperand := range rawOperands { - operandMap, ok := rawOperand.(map[string]any) - if !ok { - return nil, fmt.Errorf( - "logical node operand type mismatch: expected=map[string]any got=%T", - rawOperand) + operandNode, err := must[map[string]any](rawOperand) + if err != nil { + return nil, fmt.Errorf("logical node operand type mismatch: %w", err) } - operand, err := extractNode(operandMap) + operand, err := extractNode(operandNode) if err != nil { return nil, fmt.Errorf("extract operand for logical op %q: %w", op, err) } @@ -105,9 +180,10 @@ func extractValues(node map[string]any) ([][]byte, error) { values := make([][]byte, 0, len(rawValues)) for _, rawValue := range rawValues { - value, ok := rawValue.(string) - if !ok { - return nil, fmt.Errorf("type of value is not string: %T", rawValue) + var value string + value, err = must[string](rawValue) + if err != nil { + return nil, fmt.Errorf("value type mismatch: %w", err) } values = append(values, []byte(strings.Clone(value))) diff --git a/pipeline/antispam/ctor_utils.go b/pipeline/antispam/ctor_utils.go index f949971ef..80e3596c0 100644 --- a/pipeline/antispam/ctor_utils.go +++ b/pipeline/antispam/ctor_utils.go @@ -20,6 +20,17 @@ func getAny(node map[string]any, field string) (any, error) { return res, nil } +func must[T any](v any) (T, error) { + var def T + + result, ok := v.(T) + if !ok { + return def, fmt.Errorf("%w: expected=%T got=%T", errTypeMismatch, def, v) + } + + return result, nil +} + func get[T any](node map[string]any, field string) (T, error) { var def T @@ -31,8 +42,8 @@ func get[T any](node map[string]any, field string) (T, error) { result, ok := fieldNode.(T) if !ok { return def, fmt.Errorf( - "field=%q expected=%T got=%T: %w", - field, def, fieldNode, errTypeMismatch, + "%w: field=%q expected=%T got=%T", + errTypeMismatch, field, def, fieldNode, ) } diff --git a/pipeline/antispam/rule.go b/pipeline/antispam/rule.go index 94d457440..4624ce783 100644 --- a/pipeline/antispam/rule.go +++ b/pipeline/antispam/rule.go @@ -5,11 +5,11 @@ import "fmt" type Rule struct { Condition Node Threshold int - MetaKey string + RLMapKey string } func (r *Rule) Prepare(id int) { - r.MetaKey = fmt.Sprintf("#=%d=#", id) + r.RLMapKey = fmt.Sprintf("#=%d=#", id) } func checkThreshold(threshold int) error { From 2e9156e8fc6c0b151fbca3d05b7f16f05d249911 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 15 Jul 2025 14:16:07 +0300 Subject: [PATCH 35/75] Remove antispam v2 --- fd/util.go | 2 - pipeline/antispam/antispammer.go | 124 +------------------------------ pipeline/pipeline.go | 2 - 3 files changed, 1 insertion(+), 127 deletions(-) diff --git a/fd/util.go b/fd/util.go index c988bce1c..d138a98ea 100644 --- a/fd/util.go +++ b/fd/util.go @@ -18,7 +18,6 @@ func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings { capacity := pipeline.DefaultCapacity antispamThreshold := pipeline.DefaultAntispamThreshold var antispamExceptions antispam.Exceptions - var antispammer *antispam.Antispam sourceNameMetaField := pipeline.DefaultSourceNameMetaField avgInputEventSize := pipeline.DefaultAvgInputEventSize maxInputEventSize := pipeline.DefaultMaxInputEventSize @@ -130,7 +129,6 @@ func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings { CutOffEventByLimitField: cutOffEventByLimitField, AntispamThreshold: antispamThreshold, AntispamExceptions: antispamExceptions, - Antispam: antispammer, SourceNameMetaField: sourceNameMetaField, MaintenanceInterval: maintenanceInterval, EventTimeout: eventTimeout, diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index 31df5b34c..f8d677a25 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -30,8 +30,6 @@ type Antispammer struct { sources map[string]source exceptions Exceptions - antispam *Antispam - logger *zap.Logger // antispammer metrics @@ -51,7 +49,6 @@ type Options struct { Threshold int UnbanIterations int Exceptions Exceptions - Antispam *Antispam Logger *zap.Logger MetricsController *metric.Ctl @@ -97,12 +94,8 @@ func (a *Antispammer) IsSpam( isNewSource bool, event []byte, timeEvent time.Time, - meta map[string]string, + _ map[string]string, ) bool { - if a.antispam != nil { - return a.isSpamNew(id, name, isNewSource, event, timeEvent, meta) - } - if a.threshold <= 0 { return false } @@ -168,95 +161,6 @@ func (a *Antispammer) IsSpam( return x >= int32(a.threshold) } -func (a *Antispammer) isSpamNew( - id string, - name string, - isNewSource bool, - event []byte, - timeEvent time.Time, - meta map[string]string, -) bool { - if !a.antispam.enabled { - return false - } - - key := id - ruleIndex := -1 - - for i := range a.antispam.rules { - rule := a.antispam.rules[i] - if rule.Condition.check(event, []byte(name), meta) { - switch rule.Threshold { - case thresholdUnlimited: - return false - case thresholdBlocked: - return true - default: - key = rule.RLMapKey - ruleIndex = i - break - } - } - } - - switch a.antispam.defThreshold { - case thresholdUnlimited: - return false - case thresholdBlocked: - return true - } - - a.mu.RLock() - src, has := a.sources[key] - a.mu.RUnlock() - - timeEventSeconds := timeEvent.UnixNano() - - if !has { - a.mu.Lock() - if newSrc, has := a.sources[key]; has { - src = newSrc - } else { - src = source{ - counter: &atomic.Int32{}, - name: name, - timestamp: &atomic.Int64{}, - } - src.timestamp.Add(timeEventSeconds) - a.sources[key] = src - } - a.mu.Unlock() - } - - if isNewSource { - src.counter.Swap(0) - return false - } - - x := src.counter.Load() - diff := timeEventSeconds - src.timestamp.Swap(timeEventSeconds) - if diff < a.maintenanceInterval.Nanoseconds() { - x = src.counter.Inc() - } - if x == int32(a.threshold) { - src.counter.Swap(int32(a.unbanIterations * a.threshold)) - a.activeMetric.Set(1) - a.banMetric.WithLabelValues(name).Inc() - a.logger.Warn("source has been banned", - zap.Any("id", id), zap.String("name", name), - zap.Time("time_event", timeEvent), zap.Int64("diff_nsec", diff), - zap.Int64("maintenance_nsec", a.maintenanceInterval.Nanoseconds()), - zap.Int32("counter", src.counter.Load()), - ) - } - - if ruleIndex == -1 { - return x >= int32(a.antispam.defThreshold) - } - - return x >= int32(a.antispam.rules[ruleIndex].Threshold) -} - func (a *Antispammer) Maintenance() { a.mu.Lock() @@ -330,29 +234,3 @@ func (e Exceptions) Prepare() { e[i].Prepare() } } - -type Antispam struct { - rules []Rule - defThreshold int - enabled bool -} - -func NewAntispam(defThreshold int, rules []Rule) (*Antispam, error) { - if err := checkThreshold(defThreshold); err != nil { - return nil, err - } - - if defThreshold == -1 && len(rules) == 0 { - return &Antispam{enabled: false}, nil - } - - for i := range rules { - rules[i].Prepare(i) - } - - return &Antispam{ - rules: rules, - defThreshold: defThreshold, - enabled: true, - }, nil -} diff --git a/pipeline/pipeline.go b/pipeline/pipeline.go index a2cb56f45..9780ca200 100644 --- a/pipeline/pipeline.go +++ b/pipeline/pipeline.go @@ -154,7 +154,6 @@ type Settings struct { EventTimeout time.Duration AntispamThreshold int AntispamExceptions antispam.Exceptions - Antispam *antispam.Antispam SourceNameMetaField string AvgEventSize int MaxEventSize int @@ -210,7 +209,6 @@ func New(name string, settings *Settings, registry *prometheus.Registry, lg *zap antispamer: antispam.NewAntispammer(&antispam.Options{ MaintenanceInterval: settings.MaintenanceInterval, Threshold: settings.AntispamThreshold, - Antispam: settings.Antispam, UnbanIterations: antispamUnbanIterations, Logger: lg.Named("antispam"), MetricsController: metricCtl, From 351dbe6da9eacd26a14c175752749a09a419376d Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Wed, 16 Jul 2025 15:03:56 +0300 Subject: [PATCH 36/75] Impl IsSpam with rules. Cast exceptions to rules --- cfg/matchrule/matchrule.go | 24 +++++++++ fd/util.go | 4 ++ pipeline/antispam/antispammer.go | 78 +++++++++++++++++---------- pipeline/antispam/antispammer_test.go | 22 ++++---- pipeline/antispam/rule.go | 75 ++++++++++++++++++++++++-- pipeline/pipeline.go | 2 + 6 files changed, 163 insertions(+), 42 deletions(-) diff --git a/cfg/matchrule/matchrule.go b/cfg/matchrule/matchrule.go index b49f3dc4e..b7dae1643 100644 --- a/cfg/matchrule/matchrule.go +++ b/cfg/matchrule/matchrule.go @@ -27,6 +27,19 @@ func (m *Mode) UnmarshalJSON(i []byte) error { return nil } +func ModeToString(m Mode) string { + switch m { + case ModeContains: + return "contains" + case ModePrefix: + return "prefix" + case ModeSuffix: + return "suffix" + default: + panic("unreachable") + } +} + const ( ModePrefix Mode = iota ModeContains @@ -186,6 +199,17 @@ var ( condOrBytes = []byte(`"or"`) ) +func CondToString(c Cond) string { + switch c { + case CondAnd: + return "and" + case CondOr: + return "or" + default: + panic("unreachable") + } +} + type RuleSet struct { // > @3@4@5@6 // > diff --git a/fd/util.go b/fd/util.go index d138a98ea..928219678 100644 --- a/fd/util.go +++ b/fd/util.go @@ -18,6 +18,7 @@ func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings { capacity := pipeline.DefaultCapacity antispamThreshold := pipeline.DefaultAntispamThreshold var antispamExceptions antispam.Exceptions + var antispamCfg map[string]any sourceNameMetaField := pipeline.DefaultSourceNameMetaField avgInputEventSize := pipeline.DefaultAvgInputEventSize maxInputEventSize := pipeline.DefaultMaxInputEventSize @@ -101,6 +102,8 @@ func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings { } antispamExceptions.Prepare() + antispamCfg = settings.Get("antispam").MustMap() + sourceNameMetaField = settings.Get("source_name_meta_field").MustString() isStrict = settings.Get("is_strict").MustBool() @@ -129,6 +132,7 @@ func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings { CutOffEventByLimitField: cutOffEventByLimitField, AntispamThreshold: antispamThreshold, AntispamExceptions: antispamExceptions, + Antispam: antispamCfg, SourceNameMetaField: sourceNameMetaField, MaintenanceInterval: maintenanceInterval, EventTimeout: eventTimeout, diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index f8d677a25..f460be86f 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -28,7 +28,7 @@ type Antispammer struct { maintenanceInterval time.Duration mu sync.RWMutex sources map[string]source - exceptions Exceptions + rules Rules logger *zap.Logger @@ -49,6 +49,7 @@ type Options struct { Threshold int UnbanIterations int Exceptions Exceptions + ConfigV2 map[string]any Logger *zap.Logger MetricsController *metric.Ctl @@ -56,18 +57,10 @@ type Options struct { } func NewAntispammer(o *Options) *Antispammer { - if o.Threshold > 0 { - o.Logger.Info("antispam enabled", - zap.Int("threshold", o.Threshold), - zap.Duration("maintenance", o.MaintenanceInterval)) - } - a := &Antispammer{ unbanIterations: o.UnbanIterations, - threshold: o.Threshold, maintenanceInterval: o.MaintenanceInterval, sources: make(map[string]source), - exceptions: o.Exceptions, logger: o.Logger, activeMetric: o.MetricsController.RegisterGauge("antispam_active", "Gauge indicates whether the antispam is enabled", @@ -82,6 +75,30 @@ func NewAntispammer(o *Options) *Antispammer { ), } + var err error + + if o.ConfigV2 != nil { + a.rules, a.threshold, err = extractAntispam(o.ConfigV2) + if err != nil { + o.Logger.Fatal("can't extract antispam", zap.Error(err)) + } + } else { + a.rules, err = exceptionToRules(o.Exceptions) + if err != nil { + o.Logger.Fatal("can't convert exceptions to rules") + } + + if o.Threshold > 0 { + a.threshold = o.Threshold + } else { + a.threshold = thresholdUnlimited + } + } + + for i := range a.rules { + a.rules[i].Prepare(i) + } + // not enabled by default a.activeMetric.Set(0) @@ -94,28 +111,33 @@ func (a *Antispammer) IsSpam( isNewSource bool, event []byte, timeEvent time.Time, - _ map[string]string, + meta map[string]string, ) bool { - if a.threshold <= 0 { - return false - } - - for i := 0; i < len(a.exceptions); i++ { - e := &a.exceptions[i] - checkData := event - if e.CheckSourceName { - checkData = []byte(name) - } - if e.Match(checkData) { - if e.Name != "" { - a.exceptionMetric.WithLabelValues(e.Name).Inc() + rlMapKey := id + threshold := a.threshold + +loop: + for i := range a.rules { + rule := &a.rules[i] + if rule.Condition.check(event, []byte(name), meta) { + switch rule.Threshold { + case thresholdUnlimited: + if rule.Name != "" { + a.exceptionMetric.WithLabelValues(rule.Name).Inc() + } + return false + case thresholdBlocked: + return true + default: + rlMapKey = rule.RLMapKey + threshold = rule.Threshold + break loop } - return false } } a.mu.RLock() - src, has := a.sources[id] + src, has := a.sources[rlMapKey] a.mu.RUnlock() timeEventSeconds := timeEvent.UnixNano() @@ -146,8 +168,8 @@ func (a *Antispammer) IsSpam( if diff < a.maintenanceInterval.Nanoseconds() { x = src.counter.Inc() } - if x == int32(a.threshold) { - src.counter.Swap(int32(a.unbanIterations * a.threshold)) + if x == int32(threshold) { + src.counter.Swap(int32(a.unbanIterations * threshold)) a.activeMetric.Set(1) a.banMetric.WithLabelValues(name).Inc() a.logger.Warn("source has been banned", @@ -158,7 +180,7 @@ func (a *Antispammer) IsSpam( ) } - return x >= int32(a.threshold) + return x >= int32(threshold) } func (a *Antispammer) Maintenance() { diff --git a/pipeline/antispam/antispammer_test.go b/pipeline/antispam/antispammer_test.go index 7ba6cddf4..6dfef5284 100644 --- a/pipeline/antispam/antispammer_test.go +++ b/pipeline/antispam/antispammer_test.go @@ -12,11 +12,12 @@ import ( "github.com/stretchr/testify/require" ) -func newAntispammer(threshold, unbanIterations int, maintenanceInterval time.Duration) *Antispammer { +func newAntispammer(threshold, unbanIterations int, maintenanceInterval time.Duration, exceptions Exceptions) *Antispammer { holder := metric.NewHolder(time.Minute) return NewAntispammer(&Options{ MaintenanceInterval: maintenanceInterval, Threshold: threshold, + Exceptions: exceptions, UnbanIterations: unbanIterations, Logger: logger.Instance.Named("antispam").Desugar(), MetricsController: metric.NewCtl("test", prometheus.NewRegistry()), @@ -31,12 +32,12 @@ func TestAntispam(t *testing.T) { unbanIterations := 2 maintenanceInterval := time.Second * 1 - antispamer := newAntispammer(threshold, unbanIterations, maintenanceInterval) + antispammer := newAntispammer(threshold, unbanIterations, maintenanceInterval, nil) startTime := time.Now() checkSpam := func(i int) bool { eventTime := startTime.Add(time.Duration(i) * maintenanceInterval / 2) - return antispamer.IsSpam("1", "test", false, []byte(`{}`), eventTime, nil) + return antispammer.IsSpam("1", "test", false, []byte(`{}`), eventTime, nil) } for i := 1; i < threshold; i++ { @@ -47,7 +48,7 @@ func TestAntispam(t *testing.T) { for i := 0; i <= unbanIterations-1; i++ { result := checkSpam(threshold + i) r.True(result) - antispamer.Maintenance() + antispammer.Maintenance() } result := checkSpam(threshold + 1) @@ -61,7 +62,7 @@ func TestAntispamAfterRestart(t *testing.T) { unbanIterations := 2 maintenanceInterval := time.Second * 1 - antispamer := newAntispammer(threshold, unbanIterations, maintenanceInterval) + antispamer := newAntispammer(threshold, unbanIterations, maintenanceInterval, nil) startTime := time.Now() checkSpam := func(i int) bool { @@ -86,12 +87,10 @@ func TestAntispamExceptions(t *testing.T) { unbanIterations := 2 maintenanceInterval := time.Second * 1 - antispamer := newAntispammer(threshold, unbanIterations, maintenanceInterval) - eventRulesetName := "test_event" sourceRulesetName := "test_sourcename" - antispamer.exceptions = Exceptions{ + exceptions := Exceptions{ { RuleSet: matchrule.RuleSet{ Name: eventRulesetName, @@ -125,12 +124,13 @@ func TestAntispamExceptions(t *testing.T) { }, }, } - antispamer.exceptions.Prepare() + + antispammer := newAntispammer(threshold, unbanIterations, maintenanceInterval, exceptions) checkSpam := func(source, event string, wantMetric map[string]float64) { - antispamer.IsSpam("1", source, true, []byte(event), now, nil) + antispammer.IsSpam("1", source, true, []byte(event), now, nil) for k, v := range wantMetric { - r.Equal(v, testutil.ToFloat64(antispamer.exceptionMetric.WithLabelValues(k))) + r.Equal(v, testutil.ToFloat64(antispammer.exceptionMetric.WithLabelValues(k))) } } diff --git a/pipeline/antispam/rule.go b/pipeline/antispam/rule.go index 4624ce783..bfbe29f52 100644 --- a/pipeline/antispam/rule.go +++ b/pipeline/antispam/rule.go @@ -1,32 +1,101 @@ package antispam -import "fmt" +import ( + "fmt" + "strings" + + "github.com/ozontech/file.d/cfg/matchrule" + "github.com/ozontech/file.d/pipeline/logic" +) type Rule struct { + Name string Condition Node Threshold int RLMapKey string } +type Rules []Rule + func (r *Rule) Prepare(id int) { r.RLMapKey = fmt.Sprintf("#=%d=#", id) } func checkThreshold(threshold int) error { if threshold < -1 { - return fmt.Errorf("invalid threshold: %d", threshold) + return fmt.Errorf("invalid threshold: expected non-negative or -1 got=%d", threshold) } return nil } -func newRule(condition Node, threshold int) (Rule, error) { +func newRule(name string, condition Node, threshold int) (Rule, error) { if err := checkThreshold(threshold); err != nil { return Rule{}, err } return Rule{ + Name: name, Condition: condition, Threshold: threshold, }, nil } + +func matchRuleToNode(rule matchrule.Rule, dataTypeTag string) (Node, error) { + values := make([][]byte, 0, len(rule.Values)) + for _, s := range rule.Values { + values = append(values, []byte(strings.Clone(s))) + } + + node, err := newValueNode(matchrule.ModeToString(rule.Mode), !rule.CaseInsensitive, values, dataTypeTag) + if err != nil { + return nil, err + } + + if !rule.Invert { + return node, nil + } + + return newLogicalNode(logic.NotTag, []Node{node}) +} + +func matchRuleSetToNode(ruleSet matchrule.RuleSet, dataTypeTag string) (Node, error) { + operands := make([]Node, 0, len(ruleSet.Rules)) + for _, r := range ruleSet.Rules { + operand, err := matchRuleToNode(r, dataTypeTag) + if err != nil { + return nil, err + } + operands = append(operands, operand) + } + + return newLogicalNode(matchrule.CondToString(ruleSet.Cond), operands) +} + +func exceptionToNode(exception Exception) (Node, error) { + dataTypeTag := dataTypeEventTag + if exception.CheckSourceName { + dataTypeTag = dataTypeSourceNameTag + } + + return matchRuleSetToNode(exception.RuleSet, dataTypeTag) +} + +func exceptionToRules(exceptions Exceptions) (Rules, error) { + rules := make(Rules, 0, len(exceptions)) + for _, e := range exceptions { + node, err := exceptionToNode(e) + if err != nil { + return nil, err + } + + rule, err := newRule(e.RuleSet.Name, node, -1) + if err != nil { + return nil, err + } + + rules = append(rules, rule) + } + + return rules, nil +} diff --git a/pipeline/pipeline.go b/pipeline/pipeline.go index 9780ca200..57f67704d 100644 --- a/pipeline/pipeline.go +++ b/pipeline/pipeline.go @@ -154,6 +154,7 @@ type Settings struct { EventTimeout time.Duration AntispamThreshold int AntispamExceptions antispam.Exceptions + Antispam map[string]any SourceNameMetaField string AvgEventSize int MaxEventSize int @@ -214,6 +215,7 @@ func New(name string, settings *Settings, registry *prometheus.Registry, lg *zap MetricsController: metricCtl, MetricHolder: metricHolder, Exceptions: settings.AntispamExceptions, + ConfigV2: settings.Antispam, }), eventLog: make([]string, 0, 128), From 840f16ac60c747074999a7f56f332de9cba727b3 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Wed, 16 Jul 2025 15:20:34 +0300 Subject: [PATCH 37/75] Add 'enabled' flag --- pipeline/antispam/antispammer.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index f460be86f..d92eb75f2 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -29,6 +29,7 @@ type Antispammer struct { mu sync.RWMutex sources map[string]source rules Rules + enabled bool logger *zap.Logger @@ -95,8 +96,11 @@ func NewAntispammer(o *Options) *Antispammer { } } + a.enabled = a.threshold != thresholdUnlimited + for i := range a.rules { a.rules[i].Prepare(i) + a.enabled = a.enabled || a.rules[i].Threshold != thresholdUnlimited } // not enabled by default @@ -113,6 +117,10 @@ func (a *Antispammer) IsSpam( timeEvent time.Time, meta map[string]string, ) bool { + if !a.enabled { + return false + } + rlMapKey := id threshold := a.threshold @@ -136,6 +144,13 @@ loop: } } + switch threshold { + case thresholdUnlimited: + return false + case thresholdBlocked: + return true + } + a.mu.RLock() src, has := a.sources[rlMapKey] a.mu.RUnlock() From 80d891e86ca3fdabf1b0d933c2f515ab5b45672f Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Wed, 16 Jul 2025 21:06:55 +0300 Subject: [PATCH 38/75] Change meta key data tag separator --- pipeline/antispam/node_value.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline/antispam/node_value.go b/pipeline/antispam/node_value.go index fc39ff238..2f9cfb8df 100644 --- a/pipeline/antispam/node_value.go +++ b/pipeline/antispam/node_value.go @@ -33,7 +33,7 @@ const ( dataTypeSourceNameTag = "source_name" dataTypeMetaTag = "meta" - metaTagPrefix = "meta:" + metaTagPrefix = "meta." ) func stringToDataType(s string) (dataType, string, error) { From e0021eb4d04eb0856fa6f4553b3e021c39877adf Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Wed, 16 Jul 2025 22:46:11 +0300 Subject: [PATCH 39/75] Dedublicate AnyToInt --- decoder/common.go | 6 +++--- decoder/json.go | 2 +- pipeline/antispam/ctor.go | 5 +++-- pipeline/antispam/ctor_utils.go | 18 ------------------ pipeline/do_if/ctor.go | 3 ++- pipeline/do_if/ctor_utils.go | 18 ------------------ 6 files changed, 9 insertions(+), 43 deletions(-) diff --git a/decoder/common.go b/decoder/common.go index 01867661a..f352014ef 100644 --- a/decoder/common.go +++ b/decoder/common.go @@ -2,10 +2,10 @@ package decoder import ( "encoding/json" - "errors" + "fmt" ) -func anyToInt(v any) (int, error) { +func AnyToInt(v any) (int, error) { switch vNum := v.(type) { case int: return vNum, nil @@ -18,7 +18,7 @@ func anyToInt(v any) (int, error) { } return int(vInt64), nil default: - return 0, errors.New("value is not convertable to int") + return 0, fmt.Errorf("not convertable to int: value=%v type=%T", v, v) } } diff --git a/decoder/json.go b/decoder/json.go index 492cd2169..400bbd8ba 100644 --- a/decoder/json.go +++ b/decoder/json.go @@ -140,7 +140,7 @@ func extractJsonParams(params map[string]any) (jsonParams, error) { return jsonParams{}, fmt.Errorf("%q must be map", jsonMaxFieldsSizeParam) } for k, v := range maxFieldsSizeMap { - vInt, err := anyToInt(v) + vInt, err := AnyToInt(v) if err != nil { return jsonParams{}, fmt.Errorf("each value in %q must be int", jsonMaxFieldsSizeParam) } diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index 1aafae2e9..3e45f25dd 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -5,6 +5,7 @@ import ( "fmt" "strings" + "github.com/ozontech/file.d/decoder" "github.com/ozontech/file.d/pipeline/checker" "github.com/ozontech/file.d/pipeline/logic" ) @@ -29,7 +30,7 @@ func extractAntispam(node map[string]any) ([]Rule, int, error) { return nil, 0, err } - threshold, err := anyToInt(thresholdNode) + threshold, err := decoder.AnyToInt(thresholdNode) if err != nil { return nil, 0, err } @@ -85,7 +86,7 @@ func extractRule(node map[string]any) (Rule, error) { return Rule{}, err } - threshold, err := anyToInt(thresholdRaw) + threshold, err := decoder.AnyToInt(thresholdRaw) if err != nil { return Rule{}, err } diff --git a/pipeline/antispam/ctor_utils.go b/pipeline/antispam/ctor_utils.go index 80e3596c0..05cd7dceb 100644 --- a/pipeline/antispam/ctor_utils.go +++ b/pipeline/antispam/ctor_utils.go @@ -1,7 +1,6 @@ package antispam import ( - "encoding/json" "errors" "fmt" ) @@ -49,20 +48,3 @@ func get[T any](node map[string]any, field string) (T, error) { return result, nil } - -func anyToInt(v any) (int, error) { - switch vNum := v.(type) { - case int: - return vNum, nil - case float64: - return int(vNum), nil - case json.Number: - vInt64, err := vNum.Int64() - if err != nil { - return 0, err - } - return int(vInt64), nil - default: - return 0, fmt.Errorf("type=%T not convertable to int", v) - } -} diff --git a/pipeline/do_if/ctor.go b/pipeline/do_if/ctor.go index 7f399a106..1b936002e 100644 --- a/pipeline/do_if/ctor.go +++ b/pipeline/do_if/ctor.go @@ -5,6 +5,7 @@ import ( "fmt" "time" + "github.com/ozontech/file.d/decoder" "github.com/ozontech/file.d/pipeline/checker" "github.com/ozontech/file.d/pipeline/logic" ) @@ -160,7 +161,7 @@ func extractLengthCmpOpNode(opName string, node map[string]any) (Node, error) { return nil, err } - cmpValue, err := anyToInt(cmpValueRaw) + cmpValue, err := decoder.AnyToInt(cmpValueRaw) if err != nil { return nil, err } diff --git a/pipeline/do_if/ctor_utils.go b/pipeline/do_if/ctor_utils.go index 6d7772e86..9b9a8a966 100644 --- a/pipeline/do_if/ctor_utils.go +++ b/pipeline/do_if/ctor_utils.go @@ -1,7 +1,6 @@ package do_if import ( - "encoding/json" "errors" "fmt" ) @@ -38,20 +37,3 @@ func get[T any](node map[string]any, field string) (T, error) { return result, nil } - -func anyToInt(v any) (int, error) { - switch vNum := v.(type) { - case int: - return vNum, nil - case float64: - return int(vNum), nil - case json.Number: - vInt64, err := vNum.Int64() - if err != nil { - return 0, err - } - return int(vInt64), nil - default: - return 0, fmt.Errorf("type=%T not convertable to int", v) - } -} From 64c5180619ae41c96dbd0ca035d85ad188c38524 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Wed, 16 Jul 2025 23:15:36 +0300 Subject: [PATCH 40/75] Extract field name. Use rule ctor --- pipeline/antispam/ctor.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index 3e45f25dd..95586a0ba 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -12,6 +12,7 @@ import ( const ( fieldNameRules = "rules" + fieldNameName = "name" fieldNameIf = "if" fieldNameThreshold = "threshold" @@ -71,6 +72,14 @@ func extractRules(rawRules []any) ([]Rule, error) { } func extractRule(node map[string]any) (Rule, error) { + name := "" + nameNode, err := get[string](node, fieldNameName) + if err == nil { + name = nameNode + } else if errors.Is(err, errTypeMismatch) { + return Rule{}, err + } + condNode, err := get[map[string]any](node, fieldNameIf) if err != nil { return Rule{}, err @@ -91,10 +100,7 @@ func extractRule(node map[string]any) (Rule, error) { return Rule{}, err } - return Rule{ - Condition: cond, - Threshold: threshold, - }, nil + return newRule(name, cond, threshold) } func extractNode(node map[string]any) (Node, error) { From ba5937b70534135c297b8509ef1ae085d71085ce Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Wed, 16 Jul 2025 23:56:40 +0300 Subject: [PATCH 41/75] Deduplicate ctor utils --- pipeline/antispam/ctor.go | 33 +++++++++++----------- pipeline/antispam/ctor_utils.go | 50 --------------------------------- pipeline/ctor/utils.go | 50 +++++++++++++++++++++++++++++++++ pipeline/do_if/ctor.go | 39 ++++++++++++------------- pipeline/do_if/ctor_utils.go | 39 ------------------------- 5 files changed, 87 insertions(+), 124 deletions(-) delete mode 100644 pipeline/antispam/ctor_utils.go create mode 100644 pipeline/ctor/utils.go delete mode 100644 pipeline/do_if/ctor_utils.go diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index 95586a0ba..9c550bbfa 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -7,6 +7,7 @@ import ( "github.com/ozontech/file.d/decoder" "github.com/ozontech/file.d/pipeline/checker" + "github.com/ozontech/file.d/pipeline/ctor" "github.com/ozontech/file.d/pipeline/logic" ) @@ -26,7 +27,7 @@ const ( ) func extractAntispam(node map[string]any) ([]Rule, int, error) { - thresholdNode, err := getAny(node, fieldNameThreshold) + thresholdNode, err := ctor.GetAny(node, fieldNameThreshold) if err != nil { return nil, 0, err } @@ -38,13 +39,13 @@ func extractAntispam(node map[string]any) ([]Rule, int, error) { var rules []Rule - rawRules, err := get[[]any](node, fieldNameRules) + rawRules, err := ctor.Get[[]any](node, fieldNameRules) if err == nil { rules, err = extractRules(rawRules) if err == nil { return nil, 0, err } - } else if errors.Is(err, errTypeMismatch) { + } else if errors.Is(err, ctor.ErrTypeMismatch) { return nil, 0, err } @@ -55,7 +56,7 @@ func extractRules(rawRules []any) ([]Rule, error) { rules := make([]Rule, 0, len(rawRules)) for _, rawRule := range rawRules { - ruleNode, err := must[map[string]any](rawRule) + ruleNode, err := ctor.Must[map[string]any](rawRule) if err != nil { return nil, fmt.Errorf("rule type mismatch: %w", err) } @@ -73,14 +74,14 @@ func extractRules(rawRules []any) ([]Rule, error) { func extractRule(node map[string]any) (Rule, error) { name := "" - nameNode, err := get[string](node, fieldNameName) + nameNode, err := ctor.Get[string](node, fieldNameName) if err == nil { name = nameNode - } else if errors.Is(err, errTypeMismatch) { + } else if errors.Is(err, ctor.ErrTypeMismatch) { return Rule{}, err } - condNode, err := get[map[string]any](node, fieldNameIf) + condNode, err := ctor.Get[map[string]any](node, fieldNameIf) if err != nil { return Rule{}, err } @@ -90,7 +91,7 @@ func extractRule(node map[string]any) (Rule, error) { return Rule{}, err } - thresholdRaw, err := getAny(node, fieldNameThreshold) + thresholdRaw, err := ctor.GetAny(node, fieldNameThreshold) if err != nil { return Rule{}, err } @@ -104,7 +105,7 @@ func extractRule(node map[string]any) (Rule, error) { } func extractNode(node map[string]any) (Node, error) { - opName, err := get[string](node, fieldNameOp) + opName, err := ctor.Get[string](node, fieldNameOp) if err != nil { return nil, err } @@ -128,7 +129,7 @@ func extractNode(node map[string]any) (Node, error) { } func extractLogicalNode(op string, node map[string]any) (Node, error) { - rawOperands, err := get[[]any](node, fieldNameOperands) + rawOperands, err := ctor.Get[[]any](node, fieldNameOperands) if err != nil { return nil, err } @@ -136,7 +137,7 @@ func extractLogicalNode(op string, node map[string]any) (Node, error) { operands := make([]Node, 0) for _, rawOperand := range rawOperands { - operandNode, err := must[map[string]any](rawOperand) + operandNode, err := ctor.Must[map[string]any](rawOperand) if err != nil { return nil, fmt.Errorf("logical node operand type mismatch: %w", err) } @@ -158,16 +159,16 @@ func extractLogicalNode(op string, node map[string]any) (Node, error) { } func extractValueNode(op string, node map[string]any) (Node, error) { - dataTag, err := get[string](node, fieldNameData) + dataTag, err := ctor.Get[string](node, fieldNameData) if err != nil { return nil, err } caseSensitive := true - caseSensitiveNode, err := get[bool](node, fieldNameCaseSensitive) + caseSensitiveNode, err := ctor.Get[bool](node, fieldNameCaseSensitive) if err == nil { caseSensitive = caseSensitiveNode - } else if errors.Is(err, errTypeMismatch) { + } else if errors.Is(err, ctor.ErrTypeMismatch) { return nil, err } @@ -180,7 +181,7 @@ func extractValueNode(op string, node map[string]any) (Node, error) { } func extractValues(node map[string]any) ([][]byte, error) { - rawValues, err := get[[]any](node, fieldNameValues) + rawValues, err := ctor.Get[[]any](node, fieldNameValues) if err != nil { return nil, err } @@ -188,7 +189,7 @@ func extractValues(node map[string]any) ([][]byte, error) { values := make([][]byte, 0, len(rawValues)) for _, rawValue := range rawValues { var value string - value, err = must[string](rawValue) + value, err = ctor.Must[string](rawValue) if err != nil { return nil, fmt.Errorf("value type mismatch: %w", err) } diff --git a/pipeline/antispam/ctor_utils.go b/pipeline/antispam/ctor_utils.go deleted file mode 100644 index 05cd7dceb..000000000 --- a/pipeline/antispam/ctor_utils.go +++ /dev/null @@ -1,50 +0,0 @@ -package antispam - -import ( - "errors" - "fmt" -) - -var ( - errFieldNotFound = errors.New("field not found") - errTypeMismatch = errors.New("type mismatch") -) - -func getAny(node map[string]any, field string) (any, error) { - res, has := node[field] - if !has { - return nil, fmt.Errorf("field=%q: %w", field, errFieldNotFound) - } - - return res, nil -} - -func must[T any](v any) (T, error) { - var def T - - result, ok := v.(T) - if !ok { - return def, fmt.Errorf("%w: expected=%T got=%T", errTypeMismatch, def, v) - } - - return result, nil -} - -func get[T any](node map[string]any, field string) (T, error) { - var def T - - fieldNode, err := getAny(node, field) - if err != nil { - return def, err - } - - result, ok := fieldNode.(T) - if !ok { - return def, fmt.Errorf( - "%w: field=%q expected=%T got=%T", - errTypeMismatch, field, def, fieldNode, - ) - } - - return result, nil -} diff --git a/pipeline/ctor/utils.go b/pipeline/ctor/utils.go new file mode 100644 index 000000000..9d6126993 --- /dev/null +++ b/pipeline/ctor/utils.go @@ -0,0 +1,50 @@ +package ctor + +import ( + "errors" + "fmt" +) + +var ( + ErrFieldNotFound = errors.New("field not found") + ErrTypeMismatch = errors.New("type mismatch") +) + +func GetAny(node map[string]any, field string) (any, error) { + res, has := node[field] + if !has { + return nil, fmt.Errorf("field=%q: %w", field, ErrFieldNotFound) + } + + return res, nil +} + +func Must[T any](v any) (T, error) { + var def T + + result, ok := v.(T) + if !ok { + return def, fmt.Errorf("%w: expected=%T got=%T", ErrTypeMismatch, def, v) + } + + return result, nil +} + +func Get[T any](node map[string]any, field string) (T, error) { + var def T + + fieldNode, err := GetAny(node, field) + if err != nil { + return def, err + } + + result, ok := fieldNode.(T) + if !ok { + return def, fmt.Errorf( + "%w: field=%q expected=%T got=%T", + ErrTypeMismatch, field, def, fieldNode, + ) + } + + return result, nil +} diff --git a/pipeline/do_if/ctor.go b/pipeline/do_if/ctor.go index 1b936002e..ff0e8d7df 100644 --- a/pipeline/do_if/ctor.go +++ b/pipeline/do_if/ctor.go @@ -7,6 +7,7 @@ import ( "github.com/ozontech/file.d/decoder" "github.com/ozontech/file.d/pipeline/checker" + "github.com/ozontech/file.d/pipeline/ctor" "github.com/ozontech/file.d/pipeline/logic" ) @@ -49,7 +50,7 @@ func NewFromMap(m map[string]any) (*Checker, error) { } func extractDoIfNode(node map[string]any) (Node, error) { - opName, err := get[string](node, fieldNameOp) + opName, err := ctor.Get[string](node, fieldNameOp) if err != nil { return nil, err } @@ -84,16 +85,16 @@ func extractFieldOpNode(opName string, node map[string]any) (Node, error) { var result Node var err error - fieldPath, err := get[string](node, fieldNameField) + fieldPath, err := ctor.Get[string](node, fieldNameField) if err != nil { return nil, err } caseSensitive := true - caseSensitiveNode, err := get[bool](node, fieldNameCaseSensitive) + caseSensitiveNode, err := ctor.Get[bool](node, fieldNameCaseSensitive) if err == nil { caseSensitive = caseSensitiveNode - } else if errors.Is(err, errTypeMismatch) { + } else if errors.Is(err, ctor.ErrTypeMismatch) { return nil, err } @@ -111,7 +112,7 @@ func extractFieldOpNode(opName string, node map[string]any) (Node, error) { } func extractOpValues(node map[string]any) ([][]byte, error) { - valuesRaw, err := getAny(node, fieldNameValues) + valuesRaw, err := ctor.GetAny(node, fieldNameValues) if err != nil { return nil, err } @@ -146,17 +147,17 @@ func extractOpValuesFromArr(values []any) ([][]byte, error) { } func extractLengthCmpOpNode(opName string, node map[string]any) (Node, error) { - fieldPath, err := get[string](node, fieldNameField) + fieldPath, err := ctor.Get[string](node, fieldNameField) if err != nil { return nil, err } - cmpOp, err := get[string](node, fieldNameCmpOp) + cmpOp, err := ctor.Get[string](node, fieldNameCmpOp) if err != nil { return nil, err } - cmpValueRaw, err := getAny(node, fieldNameCmpValue) + cmpValueRaw, err := ctor.GetAny(node, fieldNameCmpValue) if err != nil { return nil, err } @@ -170,17 +171,17 @@ func extractLengthCmpOpNode(opName string, node map[string]any) (Node, error) { } func extractTsCmpOpNode(_ string, node map[string]any) (Node, error) { - fieldPath, err := get[string](node, fieldNameField) + fieldPath, err := ctor.Get[string](node, fieldNameField) if err != nil { return nil, err } - cmpOp, err := get[string](node, fieldNameCmpOp) + cmpOp, err := ctor.Get[string](node, fieldNameCmpOp) if err != nil { return nil, err } - rawCmpValue, err := get[string](node, fieldNameCmpValue) + rawCmpValue, err := ctor.Get[string](node, fieldNameCmpValue) if err != nil { return nil, err } @@ -203,32 +204,32 @@ func extractTsCmpOpNode(_ string, node map[string]any) (Node, error) { } format := defaultTsFormat - str, err := get[string](node, fieldNameFormat) + str, err := ctor.Get[string](node, fieldNameFormat) if err == nil { format = str - } else if errors.Is(err, errTypeMismatch) { + } else if errors.Is(err, ctor.ErrTypeMismatch) { return nil, err } cmpValueShift := time.Duration(0) - str, err = get[string](node, fieldNameCmpValueShift) + str, err = ctor.Get[string](node, fieldNameCmpValueShift) if err == nil { cmpValueShift, err = time.ParseDuration(str) if err != nil { return nil, fmt.Errorf("parse cmp value shift: %w", err) } - } else if errors.Is(err, errTypeMismatch) { + } else if errors.Is(err, ctor.ErrTypeMismatch) { return nil, err } updateInterval := defaultTsCmpValUpdateInterval - str, err = get[string](node, fieldNameUpdateInterval) + str, err = ctor.Get[string](node, fieldNameUpdateInterval) if err == nil { updateInterval, err = time.ParseDuration(str) if err != nil { return nil, fmt.Errorf("parse update interval: %w", err) } - } else if errors.Is(err, errTypeMismatch) { + } else if errors.Is(err, ctor.ErrTypeMismatch) { return nil, err } @@ -236,7 +237,7 @@ func extractTsCmpOpNode(_ string, node map[string]any) (Node, error) { } func extractCheckTypeOpNode(_ string, node map[string]any) (Node, error) { - fieldPath, err := get[string](node, fieldNameField) + fieldPath, err := ctor.Get[string](node, fieldNameField) if err != nil { return nil, err } @@ -255,7 +256,7 @@ func extractCheckTypeOpNode(_ string, node map[string]any) (Node, error) { } func extractLogicalOpNode(opName string, node map[string]any) (Node, error) { - rawOperands, err := get[[]any](node, fieldNameOperands) + rawOperands, err := ctor.Get[[]any](node, fieldNameOperands) if err != nil { return nil, err } diff --git a/pipeline/do_if/ctor_utils.go b/pipeline/do_if/ctor_utils.go deleted file mode 100644 index 9b9a8a966..000000000 --- a/pipeline/do_if/ctor_utils.go +++ /dev/null @@ -1,39 +0,0 @@ -package do_if - -import ( - "errors" - "fmt" -) - -var ( - errFieldNotFound = errors.New("field not found") - errTypeMismatch = errors.New("type mismatch") -) - -func getAny(node map[string]any, field string) (any, error) { - res, has := node[field] - if !has { - return nil, fmt.Errorf("field=%q: %w", field, errFieldNotFound) - } - - return res, nil -} - -func get[T any](node map[string]any, field string) (T, error) { - var def T - - fieldNode, err := getAny(node, field) - if err != nil { - return def, err - } - - result, ok := fieldNode.(T) - if !ok { - return def, fmt.Errorf( - "field=%q expected=%T got=%T: %w", - field, def, fieldNode, errTypeMismatch, - ) - } - - return result, nil -} From 092a7be2087a7e8ceac953feef0033ef259a449f Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Thu, 17 Jul 2025 01:51:44 +0300 Subject: [PATCH 42/75] Deduplicate logical node extraction --- pipeline/antispam/ctor.go | 49 ++------------ pipeline/antispam/node_logical.go | 24 +------ pipeline/antispam/rule.go | 5 +- pipeline/ctor/ctor.go | 105 ++++++++++++++++++++++++++++++ pipeline/ctor/utils.go | 4 +- pipeline/do_if/ctor.go | 59 ++--------------- pipeline/do_if/ctor_test.go | 4 +- pipeline/do_if/do_if_test.go | 4 +- pipeline/do_if/logical_op.go | 21 +----- 9 files changed, 133 insertions(+), 142 deletions(-) create mode 100644 pipeline/ctor/ctor.go diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index 9c550bbfa..51bbdaa7d 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -8,7 +8,6 @@ import ( "github.com/ozontech/file.d/decoder" "github.com/ozontech/file.d/pipeline/checker" "github.com/ozontech/file.d/pipeline/ctor" - "github.com/ozontech/file.d/pipeline/logic" ) const ( @@ -86,7 +85,7 @@ func extractRule(node map[string]any) (Rule, error) { return Rule{}, err } - cond, err := extractNode(condNode) + cond, err := ctor.Extract(condNode, opToNonLogicalCtor, newLogicalNode) if err != nil { return Rule{}, err } @@ -104,58 +103,18 @@ func extractRule(node map[string]any) (Rule, error) { return newRule(name, cond, threshold) } -func extractNode(node map[string]any) (Node, error) { - opName, err := ctor.Get[string](node, fieldNameOp) - if err != nil { - return nil, err - } - +func opToNonLogicalCtor(opName string) func(string, map[string]any) (Node, error) { switch opName { - case - logic.AndTag, - logic.OrTag, - logic.NotTag: - return extractLogicalNode(opName, node) case checker.OpEqualTag, checker.OpContainsTag, checker.OpPrefixTag, checker.OpSuffixTag, checker.OpRegexTag: - return extractValueNode(opName, node) + return extractValueNode default: - return nil, fmt.Errorf("unknown op: %s", opName) - } -} - -func extractLogicalNode(op string, node map[string]any) (Node, error) { - rawOperands, err := ctor.Get[[]any](node, fieldNameOperands) - if err != nil { - return nil, err + return nil } - - operands := make([]Node, 0) - - for _, rawOperand := range rawOperands { - operandNode, err := ctor.Must[map[string]any](rawOperand) - if err != nil { - return nil, fmt.Errorf("logical node operand type mismatch: %w", err) - } - - operand, err := extractNode(operandNode) - if err != nil { - return nil, fmt.Errorf("extract operand for logical op %q: %w", op, err) - } - - operands = append(operands, operand) - } - - result, err := newLogicalNode(op, operands) - if err != nil { - return nil, fmt.Errorf("init logical node: %w", err) - } - - return result, nil } func extractValueNode(op string, node map[string]any) (Node, error) { diff --git a/pipeline/antispam/node_logical.go b/pipeline/antispam/node_logical.go index 79a9be901..66ac415fe 100644 --- a/pipeline/antispam/node_logical.go +++ b/pipeline/antispam/node_logical.go @@ -1,9 +1,6 @@ package antispam import ( - "errors" - "fmt" - "github.com/ozontech/file.d/pipeline/logic" ) @@ -12,26 +9,11 @@ type logicalNode struct { operands []Node } -func newLogicalNode(op string, operands []Node) (*logicalNode, error) { - if len(operands) == 0 { - return nil, errors.New("logical op must have at least one operand") - } - - logicOp, err := logic.StringToOp(op) - if err != nil { - return nil, err - } - - if logicOp == logic.Not { - if len(operands) != 1 { - return nil, fmt.Errorf("logical not must have exactly one operand, got %d", len(operands)) - } - } - +func newLogicalNode(op logic.Op, operands []Node) Node { return &logicalNode{ - op: logicOp, + op: op, operands: operands, - }, nil + } } func (n *logicalNode) getType() nodeType { diff --git a/pipeline/antispam/rule.go b/pipeline/antispam/rule.go index bfbe29f52..de5290a93 100644 --- a/pipeline/antispam/rule.go +++ b/pipeline/antispam/rule.go @@ -5,6 +5,7 @@ import ( "strings" "github.com/ozontech/file.d/cfg/matchrule" + "github.com/ozontech/file.d/pipeline/ctor" "github.com/ozontech/file.d/pipeline/logic" ) @@ -56,7 +57,7 @@ func matchRuleToNode(rule matchrule.Rule, dataTypeTag string) (Node, error) { return node, nil } - return newLogicalNode(logic.NotTag, []Node{node}) + return ctor.NewLogicalNode(logic.NotTag, []Node{node}, newLogicalNode) } func matchRuleSetToNode(ruleSet matchrule.RuleSet, dataTypeTag string) (Node, error) { @@ -69,7 +70,7 @@ func matchRuleSetToNode(ruleSet matchrule.RuleSet, dataTypeTag string) (Node, er operands = append(operands, operand) } - return newLogicalNode(matchrule.CondToString(ruleSet.Cond), operands) + return ctor.NewLogicalNode(matchrule.CondToString(ruleSet.Cond), operands, newLogicalNode) } func exceptionToNode(exception Exception) (Node, error) { diff --git a/pipeline/ctor/ctor.go b/pipeline/ctor/ctor.go new file mode 100644 index 000000000..85fccdb87 --- /dev/null +++ b/pipeline/ctor/ctor.go @@ -0,0 +1,105 @@ +package ctor + +import ( + "errors" + "fmt" + + "github.com/ozontech/file.d/pipeline/logic" +) + +const ( + fieldNameOp = "op" + fieldNameOperands = "operands" +) + +type Node = map[string]any + +func Extract[T any]( + root Node, + opToNonLogicalCtor func(string) func(string, Node) (T, error), + simpleLogicCtor func(op logic.Op, operands []T) T, +) (T, error) { + var ( + extract func(Node) (T, error) + extractLogical func(string, Node) (T, error) + ) + + extract = func(node Node) (T, error) { + var def T + + opName, err := Get[string](node, fieldNameOp) + if err != nil { + return def, err + } + + switch opName { + case logic.AndTag, logic.OrTag, logic.NotTag: + return extractLogical(opName, node) + } + + if curCtor := opToNonLogicalCtor(opName); curCtor != nil { + return curCtor(opName, node) + } + + return def, fmt.Errorf("unknown op: %s", opName) + } + + extractLogical = func(op string, node Node) (T, error) { + var def T + + rawOperands, err := Get[[]any](node, fieldNameOperands) + if err != nil { + return def, err + } + + operands := make([]T, 0) + + for _, rawOperand := range rawOperands { + var operandNode Node + operandNode, err = Must[Node](rawOperand) + if err != nil { + return def, fmt.Errorf("logical node operand type mismatch: %w", err) + } + + var operand T + operand, err = Extract(operandNode, opToNonLogicalCtor, simpleLogicCtor) + if err != nil { + return def, fmt.Errorf("extract operand for logical op %q: %w", op, err) + } + + operands = append(operands, operand) + } + + result, err := NewLogicalNode(op, operands, simpleLogicCtor) + if err != nil { + return def, fmt.Errorf("init logical node: %w", err) + } + + return result, nil + } + + return extract(root) +} + +func NewLogicalNode[T any]( + op string, + operands []T, + simpleLogicCtor func(op logic.Op, operands []T) T, +) (T, error) { + var def T + + if len(operands) == 0 { + return def, errors.New("logical op must have at least one operand") + } + + logicOp, err := logic.StringToOp(op) + if err != nil { + return def, err + } + + if logicOp == logic.Not && len(operands) != 1 { + return def, fmt.Errorf("logical not must have exactly one operand, got %d", len(operands)) + } + + return simpleLogicCtor(logicOp, operands), nil +} diff --git a/pipeline/ctor/utils.go b/pipeline/ctor/utils.go index 9d6126993..e427995c7 100644 --- a/pipeline/ctor/utils.go +++ b/pipeline/ctor/utils.go @@ -10,7 +10,7 @@ var ( ErrTypeMismatch = errors.New("type mismatch") ) -func GetAny(node map[string]any, field string) (any, error) { +func GetAny(node Node, field string) (any, error) { res, has := node[field] if !has { return nil, fmt.Errorf("field=%q: %w", field, ErrFieldNotFound) @@ -30,7 +30,7 @@ func Must[T any](v any) (T, error) { return result, nil } -func Get[T any](node map[string]any, field string) (T, error) { +func Get[T any](node Node, field string) (T, error) { var def T fieldNode, err := GetAny(node, field) diff --git a/pipeline/do_if/ctor.go b/pipeline/do_if/ctor.go index ff0e8d7df..8d37a89d8 100644 --- a/pipeline/do_if/ctor.go +++ b/pipeline/do_if/ctor.go @@ -8,11 +8,9 @@ import ( "github.com/ozontech/file.d/decoder" "github.com/ozontech/file.d/pipeline/checker" "github.com/ozontech/file.d/pipeline/ctor" - "github.com/ozontech/file.d/pipeline/logic" ) const ( - fieldNameOp = "op" fieldNameField = "field" fieldNameCaseSensitive = "case_sensitive" @@ -34,12 +32,10 @@ const ( defaultTsCmpValUpdateInterval = 10 * time.Second defaultTsFormat = "rfc3339nano" - - fieldNameOperands = "operands" ) func NewFromMap(m map[string]any) (*Checker, error) { - root, err := extractDoIfNode(m) + root, err := ctor.Extract(m, opToNonLogicalCtor, newLogicalOpNode) if err != nil { return nil, fmt.Errorf("extract nodes: %w", err) } @@ -49,35 +45,25 @@ func NewFromMap(m map[string]any) (*Checker, error) { }, nil } -func extractDoIfNode(node map[string]any) (Node, error) { - opName, err := ctor.Get[string](node, fieldNameOp) - if err != nil { - return nil, err - } - +func opToNonLogicalCtor(opName string) func(string, map[string]any) (Node, error) { switch opName { - case - logic.AndTag, - logic.OrTag, - logic.NotTag: - return extractLogicalOpNode(opName, node) case checker.OpEqualTag, checker.OpContainsTag, checker.OpPrefixTag, checker.OpSuffixTag, checker.OpRegexTag: - return extractFieldOpNode(opName, node) + return extractFieldOpNode case "byte_len_cmp", "array_len_cmp": - return extractLengthCmpOpNode(opName, node) + return extractLengthCmpOpNode case "ts_cmp": - return extractTsCmpOpNode(opName, node) + return extractTsCmpOpNode case "check_type": - return extractCheckTypeOpNode(opName, node) + return extractCheckTypeOpNode default: - return nil, fmt.Errorf("unknown op: %s", opName) + return nil } } @@ -254,34 +240,3 @@ func extractCheckTypeOpNode(_ string, node map[string]any) (Node, error) { return result, nil } - -func extractLogicalOpNode(opName string, node map[string]any) (Node, error) { - rawOperands, err := ctor.Get[[]any](node, fieldNameOperands) - if err != nil { - return nil, err - } - - operands := make([]Node, 0) - - for _, rawOperand := range rawOperands { - operandMap, ok := rawOperand.(map[string]any) - if !ok { - return nil, fmt.Errorf( - "logical node operand type mismatch: expected=map[string]any got=%T", - rawOperand) - } - - operand, err := extractDoIfNode(operandMap) - if err != nil { - return nil, fmt.Errorf("extract operand for logical op %q: %w", opName, err) - } - operands = append(operands, operand) - } - - result, err := newLogicalNode(opName, operands) - if err != nil { - return nil, fmt.Errorf("init logical node: %w", err) - } - - return result, nil -} diff --git a/pipeline/do_if/ctor_test.go b/pipeline/do_if/ctor_test.go index a69d4713d..61af3b81e 100644 --- a/pipeline/do_if/ctor_test.go +++ b/pipeline/do_if/ctor_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/bitly/go-simplejson" + "github.com/ozontech/file.d/pipeline/ctor" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -54,9 +55,10 @@ func buildDoIfTree(node *doIfTreeNode) (Node, error) { } operands = append(operands, operand) } - return newLogicalNode( + return ctor.NewLogicalNode( node.logicalOp, operands, + newLogicalOpNode, ) case node.lenCmpOp != "": return newLenCmpOpNode(node.lenCmpOp, node.fieldName, node.cmpOp, node.cmpValue) diff --git a/pipeline/do_if/do_if_test.go b/pipeline/do_if/do_if_test.go index 957bdd292..97c28a6e1 100644 --- a/pipeline/do_if/do_if_test.go +++ b/pipeline/do_if/do_if_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/ozontech/file.d/pipeline/checker" + "github.com/ozontech/file.d/pipeline/ctor" "github.com/ozontech/file.d/pipeline/logic" insaneJSON "github.com/ozontech/insane-json" "github.com/stretchr/testify/assert" @@ -55,9 +56,10 @@ func buildTree(node treeNode) (Node, error) { } operands = append(operands, operand) } - return newLogicalNode( + return ctor.NewLogicalNode( node.logicalOp, operands, + newLogicalOpNode, ) case node.lenCmpOp != "": return newLenCmpOpNode(node.lenCmpOp, node.fieldName, node.cmpOp, node.cmpValue) diff --git a/pipeline/do_if/logical_op.go b/pipeline/do_if/logical_op.go index 6fb05b7a5..7ea6963da 100644 --- a/pipeline/do_if/logical_op.go +++ b/pipeline/do_if/logical_op.go @@ -131,26 +131,11 @@ type logicalNode struct { operands []Node } -func newLogicalNode(op string, operands []Node) (Node, error) { - if len(operands) == 0 { - return nil, errors.New("logical op must have at least one operand") - } - - logicOp, err := logic.StringToOp(op) - if err != nil { - return nil, err - } - - if logicOp == logic.Not { - if len(operands) != 1 { - return nil, fmt.Errorf("logical not must have exactly one operand, got %d", len(operands)) - } - } - +func newLogicalOpNode(op logic.Op, operands []Node) Node { return &logicalNode{ - op: logicOp, + op: op, operands: operands, - }, nil + } } func (n *logicalNode) Type() nodeType { From df1277df321398ebb4e6cdc2a20b1099c16b1319 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Wed, 23 Jul 2025 12:36:39 +0300 Subject: [PATCH 43/75] Refactor --- cfg/config.go | 17 +++++++++++++++++ decoder/common.go | 22 ---------------------- decoder/json.go | 3 ++- pipeline/antispam/ctor.go | 6 +++--- pipeline/do_if/ctor.go | 4 ++-- 5 files changed, 24 insertions(+), 28 deletions(-) diff --git a/cfg/config.go b/cfg/config.go index 4c106b928..b6ca25250 100644 --- a/cfg/config.go +++ b/cfg/config.go @@ -731,3 +731,20 @@ func mergeYAMLs(a, b map[interface{}]interface{}) map[interface{}]interface{} { } return merged } + +func AnyToInt(v any) (int, error) { + switch vNum := v.(type) { + case int: + return vNum, nil + case float64: + return int(vNum), nil + case json.Number: + vInt64, err := vNum.Int64() + if err != nil { + return 0, err + } + return int(vInt64), nil + default: + return 0, fmt.Errorf("not convertable to int: value=%v type=%T", v, v) + } +} diff --git a/decoder/common.go b/decoder/common.go index f352014ef..16f6ef1e9 100644 --- a/decoder/common.go +++ b/decoder/common.go @@ -1,27 +1,5 @@ package decoder -import ( - "encoding/json" - "fmt" -) - -func AnyToInt(v any) (int, error) { - switch vNum := v.(type) { - case int: - return vNum, nil - case float64: - return int(vNum), nil - case json.Number: - vInt64, err := vNum.Int64() - if err != nil { - return 0, err - } - return int(vInt64), nil - default: - return 0, fmt.Errorf("not convertable to int: value=%v type=%T", v, v) - } -} - // atoi is allocation free ASCII number to integer conversion func atoi(b []byte) (int, bool) { if len(b) == 0 { diff --git a/decoder/json.go b/decoder/json.go index 400bbd8ba..8a3d945c2 100644 --- a/decoder/json.go +++ b/decoder/json.go @@ -6,6 +6,7 @@ import ( "slices" "sync" + "github.com/ozontech/file.d/cfg" insaneJSON "github.com/ozontech/insane-json" "github.com/tidwall/gjson" ) @@ -140,7 +141,7 @@ func extractJsonParams(params map[string]any) (jsonParams, error) { return jsonParams{}, fmt.Errorf("%q must be map", jsonMaxFieldsSizeParam) } for k, v := range maxFieldsSizeMap { - vInt, err := AnyToInt(v) + vInt, err := cfg.AnyToInt(v) if err != nil { return jsonParams{}, fmt.Errorf("each value in %q must be int", jsonMaxFieldsSizeParam) } diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index 51bbdaa7d..264842ac2 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -5,7 +5,7 @@ import ( "fmt" "strings" - "github.com/ozontech/file.d/decoder" + "github.com/ozontech/file.d/cfg" "github.com/ozontech/file.d/pipeline/checker" "github.com/ozontech/file.d/pipeline/ctor" ) @@ -31,7 +31,7 @@ func extractAntispam(node map[string]any) ([]Rule, int, error) { return nil, 0, err } - threshold, err := decoder.AnyToInt(thresholdNode) + threshold, err := cfg.AnyToInt(thresholdNode) if err != nil { return nil, 0, err } @@ -95,7 +95,7 @@ func extractRule(node map[string]any) (Rule, error) { return Rule{}, err } - threshold, err := decoder.AnyToInt(thresholdRaw) + threshold, err := cfg.AnyToInt(thresholdRaw) if err != nil { return Rule{}, err } diff --git a/pipeline/do_if/ctor.go b/pipeline/do_if/ctor.go index 8d37a89d8..eb31e40c6 100644 --- a/pipeline/do_if/ctor.go +++ b/pipeline/do_if/ctor.go @@ -5,7 +5,7 @@ import ( "fmt" "time" - "github.com/ozontech/file.d/decoder" + "github.com/ozontech/file.d/cfg" "github.com/ozontech/file.d/pipeline/checker" "github.com/ozontech/file.d/pipeline/ctor" ) @@ -148,7 +148,7 @@ func extractLengthCmpOpNode(opName string, node map[string]any) (Node, error) { return nil, err } - cmpValue, err := decoder.AnyToInt(cmpValueRaw) + cmpValue, err := cfg.AnyToInt(cmpValueRaw) if err != nil { return nil, err } From dce0cca9dd68dc53270abf0ed789fddbcdb0eeaa Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Wed, 23 Jul 2025 14:32:59 +0300 Subject: [PATCH 44/75] Remove label --- pipeline/antispam/antispammer.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index d92eb75f2..e832976e6 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -124,7 +124,6 @@ func (a *Antispammer) IsSpam( rlMapKey := id threshold := a.threshold -loop: for i := range a.rules { rule := &a.rules[i] if rule.Condition.check(event, []byte(name), meta) { @@ -136,11 +135,11 @@ loop: return false case thresholdBlocked: return true - default: - rlMapKey = rule.RLMapKey - threshold = rule.Threshold - break loop } + + rlMapKey = rule.RLMapKey + threshold = rule.Threshold + break } } From 237105fabe5ad05789284209047397f0bbcf1a24 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Thu, 24 Jul 2025 14:48:54 +0300 Subject: [PATCH 45/75] Fix --- pipeline/antispam/antispammer.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index e832976e6..6997b7952 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -158,7 +158,7 @@ func (a *Antispammer) IsSpam( if !has { a.mu.Lock() - if newSrc, has := a.sources[id]; has { + if newSrc, has := a.sources[rlMapKey]; has { src = newSrc } else { src = source{ @@ -167,7 +167,7 @@ func (a *Antispammer) IsSpam( timestamp: &atomic.Int64{}, } src.timestamp.Add(timeEventSeconds) - a.sources[id] = src + a.sources[rlMapKey] = src } a.mu.Unlock() } From c86c7032aa25a3a50bedfd71be649078f899f7e2 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Thu, 24 Jul 2025 16:43:30 +0300 Subject: [PATCH 46/75] Restore ctor utils --- pipeline/do_if/ctor_utils.go | 50 ++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 pipeline/do_if/ctor_utils.go diff --git a/pipeline/do_if/ctor_utils.go b/pipeline/do_if/ctor_utils.go new file mode 100644 index 000000000..95eda5d1a --- /dev/null +++ b/pipeline/do_if/ctor_utils.go @@ -0,0 +1,50 @@ +package do_if + +import ( + "errors" + "fmt" +) + +var ( + errFieldNotFound = errors.New("field not found") + errTypeMismatch = errors.New("type mismatch") +) + +func getAny(node map[string]any, field string) (any, error) { + res, has := node[field] + if !has { + return nil, fmt.Errorf("field=%q: %w", field, errFieldNotFound) + } + + return res, nil +} + +func get[T any](node map[string]any, field string) (T, error) { + var def T + + fieldNode, err := getAny(node, field) + if err != nil { + return def, err + } + + result, ok := fieldNode.(T) + if !ok { + return def, fmt.Errorf( + "%w: field=%q expected=%T got=%T", + errTypeMismatch, field, def, fieldNode, + ) + } + + return result, nil +} + +func must[T any](v any) (T, error) { + var def T + + result, ok := v.(T) + if !ok { + return def, fmt.Errorf("%w: expected=%T got=%T", errTypeMismatch, def, v) + } + + return result, nil +} From 6197e13a3ade23202166f22900830926f63d55c7 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Thu, 24 Jul 2025 17:33:52 +0300 Subject: [PATCH 47/75] Refactor --- pipeline/antispam/ctor.go | 12 +++++------ pipeline/antispam/node_value.go | 6 +++--- pipeline/do_if/ctor.go | 12 +++++------ .../data_checker}/checker.go | 20 ++++++++++--------- .../data_checker}/checker_test.go | 10 +++++----- pipeline/do_if/do_if_test.go | 8 ++++---- pipeline/do_if/field_op.go | 8 ++++---- 7 files changed, 39 insertions(+), 37 deletions(-) rename pipeline/{checker => do_if/data_checker}/checker.go (92%) rename pipeline/{checker => do_if/data_checker}/checker_test.go (87%) diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index 264842ac2..b84c9daf7 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -6,8 +6,8 @@ import ( "strings" "github.com/ozontech/file.d/cfg" - "github.com/ozontech/file.d/pipeline/checker" "github.com/ozontech/file.d/pipeline/ctor" + "github.com/ozontech/file.d/pipeline/do_if/data_checker" ) const ( @@ -106,11 +106,11 @@ func extractRule(node map[string]any) (Rule, error) { func opToNonLogicalCtor(opName string) func(string, map[string]any) (Node, error) { switch opName { case - checker.OpEqualTag, - checker.OpContainsTag, - checker.OpPrefixTag, - checker.OpSuffixTag, - checker.OpRegexTag: + data_checker.OpEqualTag, + data_checker.OpContainsTag, + data_checker.OpPrefixTag, + data_checker.OpSuffixTag, + data_checker.OpRegexTag: return extractValueNode default: return nil diff --git a/pipeline/antispam/node_value.go b/pipeline/antispam/node_value.go index 2f9cfb8df..35de9a35a 100644 --- a/pipeline/antispam/node_value.go +++ b/pipeline/antispam/node_value.go @@ -4,7 +4,7 @@ import ( "fmt" "strings" - "github.com/ozontech/file.d/pipeline/checker" + "github.com/ozontech/file.d/pipeline/do_if/data_checker" ) type dataType int @@ -52,7 +52,7 @@ func stringToDataType(s string) (dataType, string, error) { type valueNode struct { dataType dataType metaKey string - checker *checker.Checker + checker data_checker.DataChecker } func newValueNode( @@ -61,7 +61,7 @@ func newValueNode( values [][]byte, checkDataTag string, ) (*valueNode, error) { - c, err := checker.New(opTag, caseSensitive, values) + c, err := data_checker.New(opTag, caseSensitive, values) if err != nil { return nil, fmt.Errorf("init checker: %w", err) } diff --git a/pipeline/do_if/ctor.go b/pipeline/do_if/ctor.go index eb31e40c6..944458c25 100644 --- a/pipeline/do_if/ctor.go +++ b/pipeline/do_if/ctor.go @@ -6,8 +6,8 @@ import ( "time" "github.com/ozontech/file.d/cfg" - "github.com/ozontech/file.d/pipeline/checker" "github.com/ozontech/file.d/pipeline/ctor" + "github.com/ozontech/file.d/pipeline/do_if/data_checker" ) const ( @@ -48,11 +48,11 @@ func NewFromMap(m map[string]any) (*Checker, error) { func opToNonLogicalCtor(opName string) func(string, map[string]any) (Node, error) { switch opName { case - checker.OpEqualTag, - checker.OpContainsTag, - checker.OpPrefixTag, - checker.OpSuffixTag, - checker.OpRegexTag: + data_checker.OpEqualTag, + data_checker.OpContainsTag, + data_checker.OpPrefixTag, + data_checker.OpSuffixTag, + data_checker.OpRegexTag: return extractFieldOpNode case "byte_len_cmp", diff --git a/pipeline/checker/checker.go b/pipeline/do_if/data_checker/checker.go similarity index 92% rename from pipeline/checker/checker.go rename to pipeline/do_if/data_checker/checker.go index de163ab56..df52375c9 100644 --- a/pipeline/checker/checker.go +++ b/pipeline/do_if/data_checker/checker.go @@ -1,4 +1,4 @@ -package checker +package data_checker import ( "bytes" @@ -59,7 +59,7 @@ func stringToOp(s string) (op, error) { } } -type Checker struct { +type DataChecker struct { op op caseSensitive bool values [][]byte @@ -70,9 +70,11 @@ type Checker struct { maxValLen int } -func New(opTag string, caseSensitive bool, values [][]byte) (*Checker, error) { +func New(opTag string, caseSensitive bool, values [][]byte) (DataChecker, error) { + var def DataChecker + if len(values) == 0 { - return nil, errors.New("values are not provided") + return def, errors.New("values are not provided") } var vals [][]byte @@ -82,7 +84,7 @@ func New(opTag string, caseSensitive bool, values [][]byte) (*Checker, error) { curOp, err := stringToOp(opTag) if err != nil { - return nil, err + return def, err } if curOp == opRegex { @@ -90,7 +92,7 @@ func New(opTag string, caseSensitive bool, values [][]byte) (*Checker, error) { for _, v := range values { re, err := regexp.Compile(string(v)) if err != nil { - return nil, fmt.Errorf("failed to compile regex %q: %w", v, err) + return def, fmt.Errorf("failed to compile regex %q: %w", v, err) } reValues = append(reValues, re) } @@ -125,7 +127,7 @@ func New(opTag string, caseSensitive bool, values [][]byte) (*Checker, error) { } } - return &Checker{ + return DataChecker{ op: curOp, caseSensitive: caseSensitive, values: vals, @@ -136,7 +138,7 @@ func New(opTag string, caseSensitive bool, values [][]byte) (*Checker, error) { }, nil } -func (n *Checker) Check(data []byte) bool { +func (n *DataChecker) Check(data []byte) bool { // fast check for data if n.op != opRegex && len(data) < n.minValLen { return false @@ -228,7 +230,7 @@ func assertEqualValues(a, b [][]byte, msg string) { } } -func Equal(a, b *Checker) (err error) { +func Equal(a, b *DataChecker) (err error) { defer func() { if r := recover(); r != nil { err = errors.New(r.(string)) diff --git a/pipeline/checker/checker_test.go b/pipeline/do_if/data_checker/checker_test.go similarity index 87% rename from pipeline/checker/checker_test.go rename to pipeline/do_if/data_checker/checker_test.go index 7b671c8a6..79929645e 100644 --- a/pipeline/checker/checker_test.go +++ b/pipeline/do_if/data_checker/checker_test.go @@ -1,4 +1,4 @@ -package checker +package data_checker import ( "testing" @@ -12,7 +12,7 @@ func TestCheckerCtor(t *testing.T) { caseSensitive bool values [][]byte - expected *Checker + expected DataChecker } for _, tt := range []testCase{ @@ -21,7 +21,7 @@ func TestCheckerCtor(t *testing.T) { caseSensitive: true, values: [][]byte{[]byte(`test-111`), []byte(`test-2`), []byte(`test-3`), []byte(`test-12345`)}, - expected: &Checker{ + expected: DataChecker{ op: opEqual, caseSensitive: true, values: nil, @@ -50,7 +50,7 @@ func TestCheckerCtor(t *testing.T) { []byte(`test-11`), }, - expected: &Checker{ + expected: DataChecker{ op: opContains, caseSensitive: false, values: [][]byte{ @@ -66,6 +66,6 @@ func TestCheckerCtor(t *testing.T) { } { got, err := New(tt.opTag, tt.caseSensitive, tt.values) require.NoErrorf(t, err, "failed to init checker") - require.NoError(t, Equal(got, tt.expected), "checkers are not equal") + require.NoError(t, Equal(&got, &tt.expected), "checkers are not equal") } } diff --git a/pipeline/do_if/do_if_test.go b/pipeline/do_if/do_if_test.go index 97c28a6e1..bd10f1dde 100644 --- a/pipeline/do_if/do_if_test.go +++ b/pipeline/do_if/do_if_test.go @@ -8,8 +8,8 @@ import ( "testing" "time" - "github.com/ozontech/file.d/pipeline/checker" "github.com/ozontech/file.d/pipeline/ctor" + "github.com/ozontech/file.d/pipeline/do_if/data_checker" "github.com/ozontech/file.d/pipeline/logic" insaneJSON "github.com/ozontech/insane-json" "github.com/stretchr/testify/assert" @@ -86,7 +86,7 @@ func checkNode(t *testing.T, want, got Node) { gotNode := got.(*fieldOpNode) assert.Equal(t, 0, slices.Compare[[]string](wantNode.fieldPath, gotNode.fieldPath)) assert.Equal(t, wantNode.fieldPathStr, gotNode.fieldPathStr) - assert.NoError(t, checker.Equal(wantNode.checker, gotNode.checker)) + assert.NoError(t, data_checker.Equal(&wantNode.checker, &gotNode.checker)) case NodeLogicalOp: wantNode := want.(*logicalNode) gotNode := got.(*logicalNode) @@ -120,8 +120,8 @@ func checkNode(t *testing.T, want, got Node) { func TestBuildNodes(t *testing.T) { timestamp := time.Now() - mustNewChecker := func(op string, caseSensitive bool, values [][]byte) *checker.Checker { - c, err := checker.New(op, caseSensitive, values) + mustNewChecker := func(op string, caseSensitive bool, values [][]byte) data_checker.DataChecker { + c, err := data_checker.New(op, caseSensitive, values) if err != nil { panic(err) } diff --git a/pipeline/do_if/field_op.go b/pipeline/do_if/field_op.go index f965cb3d3..ce4f41cf9 100644 --- a/pipeline/do_if/field_op.go +++ b/pipeline/do_if/field_op.go @@ -6,7 +6,7 @@ import ( "slices" "github.com/ozontech/file.d/cfg" - "github.com/ozontech/file.d/pipeline/checker" + "github.com/ozontech/file.d/pipeline/do_if/data_checker" insaneJSON "github.com/ozontech/insane-json" ) @@ -165,7 +165,7 @@ Result: type fieldOpNode struct { fieldPath []string fieldPathStr string - checker *checker.Checker + checker data_checker.DataChecker } func newFieldOpNode(op string, field string, caseSensitive bool, values [][]byte) (Node, error) { @@ -173,7 +173,7 @@ func newFieldOpNode(op string, field string, caseSensitive bool, values [][]byte return nil, errors.New("values are not provided") } - c, err := checker.New(op, caseSensitive, values) + c, err := data_checker.New(op, caseSensitive, values) if err != nil { return nil, err } @@ -214,5 +214,5 @@ func (n *fieldOpNode) isEqualTo(n2 Node, _ int) error { ) } - return checker.Equal(n.checker, n2f.checker) + return data_checker.Equal(&n.checker, &n2f.checker) } From 6b424d59b8fea21464017edaeb5e14e06c19ea7d Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Thu, 24 Jul 2025 17:52:14 +0300 Subject: [PATCH 48/75] Refactor --- pipeline/do_if/check_type_op.go | 6 +++++- pipeline/do_if/check_type_test.go | 4 ++-- pipeline/do_if/do_if.go | 5 +++-- pipeline/do_if/field_op.go | 6 +++++- pipeline/do_if/len_cmp_op.go | 6 +++++- pipeline/do_if/logical_op.go | 12 ++++++++---- pipeline/do_if/ts_cmp_op.go | 6 +++++- 7 files changed, 33 insertions(+), 12 deletions(-) diff --git a/pipeline/do_if/check_type_op.go b/pipeline/do_if/check_type_op.go index 5b3a2607e..a6bf4358c 100644 --- a/pipeline/do_if/check_type_op.go +++ b/pipeline/do_if/check_type_op.go @@ -154,7 +154,7 @@ func (n *checkTypeOpNode) Type() nodeType { return NodeCheckTypeOp } -func (n *checkTypeOpNode) check(eventRoot *insaneJSON.Root) bool { +func (n *checkTypeOpNode) checkEvent(eventRoot *insaneJSON.Root) bool { node := eventRoot.Dig(n.fieldPath...) for _, checkFn := range n.checkTypeFns { if checkFn(node) { @@ -164,6 +164,10 @@ func (n *checkTypeOpNode) check(eventRoot *insaneJSON.Root) bool { return false } +func (n *checkTypeOpNode) checkRaw([]byte, []byte, map[string]string) bool { + panic("not impl") +} + func (n *checkTypeOpNode) isEqualTo(n2 Node, _ int) error { n2f, ok := n2.(*checkTypeOpNode) if !ok { diff --git a/pipeline/do_if/check_type_test.go b/pipeline/do_if/check_type_test.go index b06851db5..b9a3cb118 100644 --- a/pipeline/do_if/check_type_test.go +++ b/pipeline/do_if/check_type_test.go @@ -213,7 +213,7 @@ func TestCheckType(t *testing.T) { eventRoot, err = insaneJSON.DecodeString(d.eventStr) require.NoError(t, err) } - got := node.check(eventRoot) + got := node.checkEvent(eventRoot) assert.Equal(t, d.want, got, "invalid result for event %q", d.eventStr) } }) @@ -366,7 +366,7 @@ func TestCheckTypeDuplicateValues(t *testing.T) { eventStr := logsMap[d.checkType] eventRoot, err := insaneJSON.DecodeString(eventStr) require.NoError(t, err, "must be no error on decode checkEvent") - got := ctnode.check(eventRoot) + got := ctnode.checkEvent(eventRoot) assert.Equal(t, d.want, got, "invalid result for check %d of type %q", i, d.checkType) } }) diff --git a/pipeline/do_if/do_if.go b/pipeline/do_if/do_if.go index 95590a927..7833164fa 100644 --- a/pipeline/do_if/do_if.go +++ b/pipeline/do_if/do_if.go @@ -30,7 +30,8 @@ const ( type Node interface { Type() nodeType - check(*insaneJSON.Root) bool + checkEvent(*insaneJSON.Root) bool + checkRaw(event []byte, sourceName []byte, metadata map[string]string) bool isEqualTo(Node, int) error } @@ -52,5 +53,5 @@ func (c *Checker) Check(eventRoot *insaneJSON.Root) bool { if eventRoot == nil { return false } - return c.root.check(eventRoot) + return c.root.checkEvent(eventRoot) } diff --git a/pipeline/do_if/field_op.go b/pipeline/do_if/field_op.go index ce4f41cf9..ed54c6eb1 100644 --- a/pipeline/do_if/field_op.go +++ b/pipeline/do_if/field_op.go @@ -189,7 +189,7 @@ func (n *fieldOpNode) Type() nodeType { return NodeFieldOp } -func (n *fieldOpNode) check(eventRoot *insaneJSON.Root) bool { +func (n *fieldOpNode) checkEvent(eventRoot *insaneJSON.Root) bool { node := eventRoot.Dig(n.fieldPath...) if node.IsArray() || node.IsObject() { return false @@ -202,6 +202,10 @@ func (n *fieldOpNode) check(eventRoot *insaneJSON.Root) bool { return n.checker.Check(node.AsBytes()) } +func (n *fieldOpNode) checkRaw(event []byte, sourceName []byte, metadata map[string]string) bool { + panic("not impl") +} + func (n *fieldOpNode) isEqualTo(n2 Node, _ int) error { n2f, ok := n2.(*fieldOpNode) if !ok { diff --git a/pipeline/do_if/len_cmp_op.go b/pipeline/do_if/len_cmp_op.go index b0971df99..1517c9ac7 100644 --- a/pipeline/do_if/len_cmp_op.go +++ b/pipeline/do_if/len_cmp_op.go @@ -174,7 +174,7 @@ func getNodeBytesSize(node *insaneJSON.Node) int { return size } -func (n *lenCmpOpNode) check(eventRoot *insaneJSON.Root) bool { +func (n *lenCmpOpNode) checkEvent(eventRoot *insaneJSON.Root) bool { value := 0 switch n.lenCmpOp { @@ -203,6 +203,10 @@ func (n *lenCmpOpNode) check(eventRoot *insaneJSON.Root) bool { return n.cmpOp.compare(value, n.cmpValue) } +func (n *lenCmpOpNode) checkRaw(event []byte, sourceName []byte, metadata map[string]string) bool { + panic("not impl") +} + func (n *lenCmpOpNode) isEqualTo(n2 Node, _ int) error { n2Explicit, ok := n2.(*lenCmpOpNode) if !ok { diff --git a/pipeline/do_if/logical_op.go b/pipeline/do_if/logical_op.go index 7ea6963da..8bb6913a0 100644 --- a/pipeline/do_if/logical_op.go +++ b/pipeline/do_if/logical_op.go @@ -142,29 +142,33 @@ func (n *logicalNode) Type() nodeType { return NodeLogicalOp } -func (n *logicalNode) check(eventRoot *insaneJSON.Root) bool { +func (n *logicalNode) checkEvent(eventRoot *insaneJSON.Root) bool { switch n.op { case logic.Or: for _, op := range n.operands { - if op.check(eventRoot) { + if op.checkEvent(eventRoot) { return true } } return false case logic.And: for _, op := range n.operands { - if !op.check(eventRoot) { + if !op.checkEvent(eventRoot) { return false } } return true case logic.Not: - return !n.operands[0].check(eventRoot) + return !n.operands[0].checkEvent(eventRoot) default: panic("unknown logical op") } } +func (n *logicalNode) checkRaw([]byte, []byte, map[string]string) bool { + panic("not impl") +} + func (n *logicalNode) isEqualTo(n2 Node, level int) error { n2l, ok := n2.(*logicalNode) if !ok { diff --git a/pipeline/do_if/ts_cmp_op.go b/pipeline/do_if/ts_cmp_op.go index 192e468e0..6d2310e03 100644 --- a/pipeline/do_if/ts_cmp_op.go +++ b/pipeline/do_if/ts_cmp_op.go @@ -137,7 +137,7 @@ func (n *tsCmpOpNode) Type() nodeType { return NodeTimestampCmpOp } -func (n *tsCmpOpNode) check(eventRoot *insaneJSON.Root) bool { +func (n *tsCmpOpNode) checkEvent(eventRoot *insaneJSON.Root) bool { node := eventRoot.Dig(n.fieldPath...) if node == nil { return false @@ -169,6 +169,10 @@ func (n *tsCmpOpNode) check(eventRoot *insaneJSON.Root) bool { return n.cmpOp.compare(lhs, int(rhs)) } +func (n *tsCmpOpNode) checkRaw([]byte, []byte, map[string]string) bool { + panic("not impl") +} + func (n *tsCmpOpNode) isEqualTo(n2 Node, _ int) error { n2Explicit, ok := n2.(*tsCmpOpNode) if !ok { From 06094fd776c8b9196f357765f02633af925ff307 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Thu, 24 Jul 2025 18:34:38 +0300 Subject: [PATCH 49/75] Refactor --- pipeline/antispam/node_value.go | 6 +-- pipeline/do_if/ctor.go | 2 +- pipeline/do_if/ctor_test.go | 3 +- pipeline/do_if/do_if_test.go | 3 +- pipeline/do_if/field_op.go | 81 +++++++++++++++++++++++++++++++-- pipeline/do_if/logical_op.go | 23 +++++++++- 6 files changed, 106 insertions(+), 12 deletions(-) diff --git a/pipeline/antispam/node_value.go b/pipeline/antispam/node_value.go index 35de9a35a..fcb9e1531 100644 --- a/pipeline/antispam/node_value.go +++ b/pipeline/antispam/node_value.go @@ -33,7 +33,7 @@ const ( dataTypeSourceNameTag = "source_name" dataTypeMetaTag = "meta" - metaTagPrefix = "meta." + dataTypeMetaTagPrefix = "meta." ) func stringToDataType(s string) (dataType, string, error) { @@ -42,8 +42,8 @@ func stringToDataType(s string) (dataType, string, error) { return dataTypeEvent, "", nil case s == dataTypeSourceNameTag: return dataTypeSourceName, "", nil - case strings.HasPrefix(s, metaTagPrefix): - return dataTypeMeta, strings.TrimPrefix(s, metaTagPrefix), nil + case strings.HasPrefix(s, dataTypeMetaTagPrefix): + return dataTypeMeta, strings.TrimPrefix(s, dataTypeMetaTagPrefix), nil default: return -1, "", fmt.Errorf("unparsable check data tag: %s", s) } diff --git a/pipeline/do_if/ctor.go b/pipeline/do_if/ctor.go index 944458c25..957034b1a 100644 --- a/pipeline/do_if/ctor.go +++ b/pipeline/do_if/ctor.go @@ -89,7 +89,7 @@ func extractFieldOpNode(opName string, node map[string]any) (Node, error) { return nil, fmt.Errorf("extract field op values: %w", err) } - result, err = newFieldOpNode(opName, fieldPath, caseSensitive, vals) + result, err = newFieldOpNode(opName, caseSensitive, vals, fieldPath, "") if err != nil { return nil, fmt.Errorf("init field op: %w", err) } diff --git a/pipeline/do_if/ctor_test.go b/pipeline/do_if/ctor_test.go index 61af3b81e..a432f0075 100644 --- a/pipeline/do_if/ctor_test.go +++ b/pipeline/do_if/ctor_test.go @@ -42,9 +42,10 @@ func buildDoIfTree(node *doIfTreeNode) (Node, error) { case node.fieldOp != "": return newFieldOpNode( node.fieldOp, - node.fieldName, node.caseSensitive, node.values, + node.fieldName, + "", ) case node.logicalOp != "": operands := make([]Node, 0) diff --git a/pipeline/do_if/do_if_test.go b/pipeline/do_if/do_if_test.go index bd10f1dde..ef7da7670 100644 --- a/pipeline/do_if/do_if_test.go +++ b/pipeline/do_if/do_if_test.go @@ -43,9 +43,10 @@ func buildTree(node treeNode) (Node, error) { case node.fieldOp != "": return newFieldOpNode( node.fieldOp, - node.fieldName, node.caseSensitive, node.values, + node.fieldName, + "", ) case node.logicalOp != "": operands := make([]Node, 0) diff --git a/pipeline/do_if/field_op.go b/pipeline/do_if/field_op.go index ed54c6eb1..629c12cc5 100644 --- a/pipeline/do_if/field_op.go +++ b/pipeline/do_if/field_op.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "slices" + "strings" "github.com/ozontech/file.d/cfg" "github.com/ozontech/file.d/pipeline/do_if/data_checker" @@ -19,7 +20,7 @@ Params: - `field` - path to field in JSON tree. If empty, root value is checked. Path to nested fields is delimited by dots `"."`, e.g. `"field.subfield"` for `{"field": {"subfield": "val"}}`. If the field name contains dots in it they should be shielded with `"\"`, e.g. `"exception\.type"` for `{"exception.type": "example"}`. Default empty. - `values` - list of values to check field. Required non-empty. - - `case_sensitive` - flag indicating whether checks are performed in case sensitive way. Default `true`. + - `case_sensitive` - flag indicating whether checks are performed in case-sensitive way. Default `true`. Note: case insensitive checks can cause CPU and memory overhead since every field value will be converted to lower letters. Example: @@ -162,13 +163,64 @@ Result: }*/ +type dataType int + +const ( + dataTypeEvent dataType = iota + dataTypeSourceName + dataTypeMeta +) + +func (c dataType) String() string { + switch c { + case dataTypeEvent: + return dataTypeEventTag + case dataTypeSourceName: + return dataTypeSourceNameTag + case dataTypeMeta: + return dataTypeMetaTag + default: + panic(fmt.Sprintf("unknown checked data type: %d", c)) + } +} + +const ( + dataTypeEventTag = "event" + dataTypeSourceNameTag = "source_name" + dataTypeMetaTag = "meta" + + dataTypeMetaTagPrefix = "meta." +) + +func stringToDataType(s string) (dataType, string, error) { + switch { + case s == dataTypeEventTag: + return dataTypeEvent, "", nil + case s == dataTypeSourceNameTag: + return dataTypeSourceName, "", nil + case strings.HasPrefix(s, dataTypeMetaTagPrefix): + return dataTypeMeta, strings.TrimPrefix(s, dataTypeMetaTagPrefix), nil + default: + return -1, "", fmt.Errorf("unparsable check data tag: %s", s) + } +} + type fieldOpNode struct { fieldPath []string fieldPathStr string - checker data_checker.DataChecker + dataType dataType + metaKey string + + checker data_checker.DataChecker } -func newFieldOpNode(op string, field string, caseSensitive bool, values [][]byte) (Node, error) { +func newFieldOpNode( + op string, + caseSensitive bool, + values [][]byte, + field string, + dataTypeTag string, +) (Node, error) { if len(values) == 0 { return nil, errors.New("values are not provided") } @@ -178,9 +230,20 @@ func newFieldOpNode(op string, field string, caseSensitive bool, values [][]byte return nil, err } + var curDataType dataType + var curMetaKey string + if dataTypeTag != "" { + curDataType, curMetaKey, err = stringToDataType(dataTypeTag) + if err != nil { + return nil, err + } + } + return &fieldOpNode{ fieldPath: cfg.ParseFieldSelector(field), fieldPathStr: field, + dataType: curDataType, + metaKey: curMetaKey, checker: c, }, nil } @@ -203,7 +266,17 @@ func (n *fieldOpNode) checkEvent(eventRoot *insaneJSON.Root) bool { } func (n *fieldOpNode) checkRaw(event []byte, sourceName []byte, metadata map[string]string) bool { - panic("not impl") + switch n.dataType { + case dataTypeEvent: + return n.checker.Check(event) + case dataTypeSourceName: + return n.checker.Check(sourceName) + case dataTypeMeta: + data, ok := metadata[n.metaKey] + return ok && n.checker.Check([]byte(data)) + default: + panic(fmt.Sprintf("inknown type of checked data: %d", n.dataType)) + } } func (n *fieldOpNode) isEqualTo(n2 Node, _ int) error { diff --git a/pipeline/do_if/logical_op.go b/pipeline/do_if/logical_op.go index 8bb6913a0..1f7380df3 100644 --- a/pipeline/do_if/logical_op.go +++ b/pipeline/do_if/logical_op.go @@ -165,8 +165,27 @@ func (n *logicalNode) checkEvent(eventRoot *insaneJSON.Root) bool { } } -func (n *logicalNode) checkRaw([]byte, []byte, map[string]string) bool { - panic("not impl") +func (n *logicalNode) checkRaw(event []byte, sourceName []byte, metadata map[string]string) bool { + switch n.op { + case logic.Or: + for _, op := range n.operands { + if op.checkRaw(event, sourceName, metadata) { + return true + } + } + return false + case logic.And: + for _, op := range n.operands { + if !op.checkRaw(event, sourceName, metadata) { + return false + } + } + return true + case logic.Not: + return !n.operands[0].checkRaw(event, sourceName, metadata) + default: + panic("unknown logical op") + } } func (n *logicalNode) isEqualTo(n2 Node, level int) error { From c53756e6d04c09e06d22a63f8b335fdd1c9d3958 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Thu, 24 Jul 2025 19:43:05 +0300 Subject: [PATCH 50/75] Refactor --- pipeline/ctor/ctor.go | 105 ----------------------------------- pipeline/ctor/utils.go | 2 + pipeline/do_if/ctor.go | 61 +++++++++++++++++--- pipeline/do_if/ctor_test.go | 4 +- pipeline/do_if/ctor_utils.go | 50 ----------------- pipeline/do_if/do_if_test.go | 4 +- pipeline/do_if/logical_op.go | 19 ++++++- 7 files changed, 74 insertions(+), 171 deletions(-) delete mode 100644 pipeline/ctor/ctor.go delete mode 100644 pipeline/do_if/ctor_utils.go diff --git a/pipeline/ctor/ctor.go b/pipeline/ctor/ctor.go deleted file mode 100644 index 85fccdb87..000000000 --- a/pipeline/ctor/ctor.go +++ /dev/null @@ -1,105 +0,0 @@ -package ctor - -import ( - "errors" - "fmt" - - "github.com/ozontech/file.d/pipeline/logic" -) - -const ( - fieldNameOp = "op" - fieldNameOperands = "operands" -) - -type Node = map[string]any - -func Extract[T any]( - root Node, - opToNonLogicalCtor func(string) func(string, Node) (T, error), - simpleLogicCtor func(op logic.Op, operands []T) T, -) (T, error) { - var ( - extract func(Node) (T, error) - extractLogical func(string, Node) (T, error) - ) - - extract = func(node Node) (T, error) { - var def T - - opName, err := Get[string](node, fieldNameOp) - if err != nil { - return def, err - } - - switch opName { - case logic.AndTag, logic.OrTag, logic.NotTag: - return extractLogical(opName, node) - } - - if curCtor := opToNonLogicalCtor(opName); curCtor != nil { - return curCtor(opName, node) - } - - return def, fmt.Errorf("unknown op: %s", opName) - } - - extractLogical = func(op string, node Node) (T, error) { - var def T - - rawOperands, err := Get[[]any](node, fieldNameOperands) - if err != nil { - return def, err - } - - operands := make([]T, 0) - - for _, rawOperand := range rawOperands { - var operandNode Node - operandNode, err = Must[Node](rawOperand) - if err != nil { - return def, fmt.Errorf("logical node operand type mismatch: %w", err) - } - - var operand T - operand, err = Extract(operandNode, opToNonLogicalCtor, simpleLogicCtor) - if err != nil { - return def, fmt.Errorf("extract operand for logical op %q: %w", op, err) - } - - operands = append(operands, operand) - } - - result, err := NewLogicalNode(op, operands, simpleLogicCtor) - if err != nil { - return def, fmt.Errorf("init logical node: %w", err) - } - - return result, nil - } - - return extract(root) -} - -func NewLogicalNode[T any]( - op string, - operands []T, - simpleLogicCtor func(op logic.Op, operands []T) T, -) (T, error) { - var def T - - if len(operands) == 0 { - return def, errors.New("logical op must have at least one operand") - } - - logicOp, err := logic.StringToOp(op) - if err != nil { - return def, err - } - - if logicOp == logic.Not && len(operands) != 1 { - return def, fmt.Errorf("logical not must have exactly one operand, got %d", len(operands)) - } - - return simpleLogicCtor(logicOp, operands), nil -} diff --git a/pipeline/ctor/utils.go b/pipeline/ctor/utils.go index e427995c7..8a53a1a7f 100644 --- a/pipeline/ctor/utils.go +++ b/pipeline/ctor/utils.go @@ -5,6 +5,8 @@ import ( "fmt" ) +type Node = map[string]any + var ( ErrFieldNotFound = errors.New("field not found") ErrTypeMismatch = errors.New("type mismatch") diff --git a/pipeline/do_if/ctor.go b/pipeline/do_if/ctor.go index 957034b1a..b94301c82 100644 --- a/pipeline/do_if/ctor.go +++ b/pipeline/do_if/ctor.go @@ -8,9 +8,12 @@ import ( "github.com/ozontech/file.d/cfg" "github.com/ozontech/file.d/pipeline/ctor" "github.com/ozontech/file.d/pipeline/do_if/data_checker" + "github.com/ozontech/file.d/pipeline/logic" ) const ( + fieldNameOp = "op" + fieldNameField = "field" fieldNameCaseSensitive = "case_sensitive" @@ -32,10 +35,12 @@ const ( defaultTsCmpValUpdateInterval = 10 * time.Second defaultTsFormat = "rfc3339nano" + + fieldNameOperands = "operands" ) func NewFromMap(m map[string]any) (*Checker, error) { - root, err := ctor.Extract(m, opToNonLogicalCtor, newLogicalOpNode) + root, err := extractNode(m) if err != nil { return nil, fmt.Errorf("extract nodes: %w", err) } @@ -45,25 +50,35 @@ func NewFromMap(m map[string]any) (*Checker, error) { }, nil } -func opToNonLogicalCtor(opName string) func(string, map[string]any) (Node, error) { +func extractNode(node ctor.Node) (Node, error) { + opName, err := ctor.Get[string](node, fieldNameOp) + if err != nil { + return nil, err + } + switch opName { + case + logic.AndTag, + logic.OrTag, + logic.NotTag: + return extractLogicalOpNode(opName, node) case data_checker.OpEqualTag, data_checker.OpContainsTag, data_checker.OpPrefixTag, data_checker.OpSuffixTag, data_checker.OpRegexTag: - return extractFieldOpNode + return extractFieldOpNode(opName, node) case "byte_len_cmp", "array_len_cmp": - return extractLengthCmpOpNode + return extractLengthCmpOpNode(opName, node) case "ts_cmp": - return extractTsCmpOpNode + return extractTsCmpOpNode(opName, node) case "check_type": - return extractCheckTypeOpNode + return extractCheckTypeOpNode(opName, node) default: - return nil + return nil, fmt.Errorf("unknown op: %s", opName) } } @@ -240,3 +255,35 @@ func extractCheckTypeOpNode(_ string, node map[string]any) (Node, error) { return result, nil } + +func extractLogicalOpNode(opName string, node map[string]any) (Node, error) { + rawOperands, err := ctor.Get[[]any](node, fieldNameOperands) + if err != nil { + return nil, err + } + + operands := make([]Node, 0) + + for _, rawOperand := range rawOperands { + var operandNode map[string]any + operandNode, err = ctor.Must[map[string]any](rawOperand) + if err != nil { + return nil, fmt.Errorf("logical node operand type mismatch: %w", err) + } + + var operand Node + operand, err = extractNode(operandNode) + if err != nil { + return nil, fmt.Errorf("extract operand for logical op %q: %w", opName, err) + } + + operands = append(operands, operand) + } + + result, err := newLogicalNode(opName, operands) + if err != nil { + return nil, fmt.Errorf("init logical node: %w", err) + } + + return result, nil +} diff --git a/pipeline/do_if/ctor_test.go b/pipeline/do_if/ctor_test.go index a432f0075..52e6a747e 100644 --- a/pipeline/do_if/ctor_test.go +++ b/pipeline/do_if/ctor_test.go @@ -8,7 +8,6 @@ import ( "time" "github.com/bitly/go-simplejson" - "github.com/ozontech/file.d/pipeline/ctor" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -56,10 +55,9 @@ func buildDoIfTree(node *doIfTreeNode) (Node, error) { } operands = append(operands, operand) } - return ctor.NewLogicalNode( + return newLogicalNode( node.logicalOp, operands, - newLogicalOpNode, ) case node.lenCmpOp != "": return newLenCmpOpNode(node.lenCmpOp, node.fieldName, node.cmpOp, node.cmpValue) diff --git a/pipeline/do_if/ctor_utils.go b/pipeline/do_if/ctor_utils.go deleted file mode 100644 index 95eda5d1a..000000000 --- a/pipeline/do_if/ctor_utils.go +++ /dev/null @@ -1,50 +0,0 @@ -package do_if - -import ( - "errors" - "fmt" -) - -var ( - errFieldNotFound = errors.New("field not found") - errTypeMismatch = errors.New("type mismatch") -) - -func getAny(node map[string]any, field string) (any, error) { - res, has := node[field] - if !has { - return nil, fmt.Errorf("field=%q: %w", field, errFieldNotFound) - } - - return res, nil -} - -func get[T any](node map[string]any, field string) (T, error) { - var def T - - fieldNode, err := getAny(node, field) - if err != nil { - return def, err - } - - result, ok := fieldNode.(T) - if !ok { - return def, fmt.Errorf( - "%w: field=%q expected=%T got=%T", - errTypeMismatch, field, def, fieldNode, - ) - } - - return result, nil -} - -func must[T any](v any) (T, error) { - var def T - - result, ok := v.(T) - if !ok { - return def, fmt.Errorf("%w: expected=%T got=%T", errTypeMismatch, def, v) - } - - return result, nil -} diff --git a/pipeline/do_if/do_if_test.go b/pipeline/do_if/do_if_test.go index ef7da7670..90db7b170 100644 --- a/pipeline/do_if/do_if_test.go +++ b/pipeline/do_if/do_if_test.go @@ -8,7 +8,6 @@ import ( "testing" "time" - "github.com/ozontech/file.d/pipeline/ctor" "github.com/ozontech/file.d/pipeline/do_if/data_checker" "github.com/ozontech/file.d/pipeline/logic" insaneJSON "github.com/ozontech/insane-json" @@ -57,10 +56,9 @@ func buildTree(node treeNode) (Node, error) { } operands = append(operands, operand) } - return ctor.NewLogicalNode( + return newLogicalNode( node.logicalOp, operands, - newLogicalOpNode, ) case node.lenCmpOp != "": return newLenCmpOpNode(node.lenCmpOp, node.fieldName, node.cmpOp, node.cmpValue) diff --git a/pipeline/do_if/logical_op.go b/pipeline/do_if/logical_op.go index 1f7380df3..538e2ad2c 100644 --- a/pipeline/do_if/logical_op.go +++ b/pipeline/do_if/logical_op.go @@ -131,11 +131,24 @@ type logicalNode struct { operands []Node } -func newLogicalOpNode(op logic.Op, operands []Node) Node { +func newLogicalNode(op string, operands []Node) (Node, error) { + if len(operands) == 0 { + return nil, errors.New("logical op must have at least one operand") + } + + logicOp, err := logic.StringToOp(op) + if err != nil { + return nil, err + } + + if logicOp == logic.Not && len(operands) != 1 { + return nil, fmt.Errorf("logical not must have exactly one operand, got %d", len(operands)) + } + return &logicalNode{ - op: op, + op: logicOp, operands: operands, - } + }, nil } func (n *logicalNode) Type() nodeType { From db7028e167d12e90e3765eab35884c6d501ff078 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Thu, 24 Jul 2025 20:39:18 +0300 Subject: [PATCH 51/75] Refactor --- pipeline/antispam/antispammer.go | 4 +- pipeline/antispam/ctor.go | 65 +-------------------- pipeline/antispam/node.go | 13 ----- pipeline/antispam/node_logical.go | 44 -------------- pipeline/antispam/node_value.go | 97 ------------------------------- pipeline/antispam/rule.go | 50 +++------------- pipeline/do_if/check_type_op.go | 2 +- pipeline/do_if/converter.go | 45 ++++++++++++++ pipeline/do_if/converter_test.go | 1 + pipeline/do_if/ctor.go | 6 +- pipeline/do_if/do_if.go | 6 +- pipeline/do_if/field_op.go | 18 +++--- pipeline/do_if/len_cmp_op.go | 2 +- pipeline/do_if/logical_op.go | 8 +-- pipeline/do_if/ts_cmp_op.go | 2 +- 15 files changed, 82 insertions(+), 281 deletions(-) delete mode 100644 pipeline/antispam/node.go delete mode 100644 pipeline/antispam/node_logical.go delete mode 100644 pipeline/antispam/node_value.go create mode 100644 pipeline/do_if/converter.go create mode 100644 pipeline/do_if/converter_test.go diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index 6997b7952..8922e5819 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -84,7 +84,7 @@ func NewAntispammer(o *Options) *Antispammer { o.Logger.Fatal("can't extract antispam", zap.Error(err)) } } else { - a.rules, err = exceptionToRules(o.Exceptions) + a.rules, err = exceptionsToRules(o.Exceptions) if err != nil { o.Logger.Fatal("can't convert exceptions to rules") } @@ -126,7 +126,7 @@ func (a *Antispammer) IsSpam( for i := range a.rules { rule := &a.rules[i] - if rule.Condition.check(event, []byte(name), meta) { + if rule.Condition.CheckRaw(event, []byte(name), meta) { switch rule.Threshold { case thresholdUnlimited: if rule.Name != "" { diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index b84c9daf7..775c3a92e 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -3,11 +3,10 @@ package antispam import ( "errors" "fmt" - "strings" "github.com/ozontech/file.d/cfg" "github.com/ozontech/file.d/pipeline/ctor" - "github.com/ozontech/file.d/pipeline/do_if/data_checker" + "github.com/ozontech/file.d/pipeline/do_if" ) const ( @@ -16,10 +15,6 @@ const ( fieldNameIf = "if" fieldNameThreshold = "threshold" - fieldNameOp = "op" - - fieldNameOperands = "operands" - fieldNameData = "data" fieldNameValues = "values" fieldNameCaseSensitive = "case_sensitive" @@ -85,7 +80,7 @@ func extractRule(node map[string]any) (Rule, error) { return Rule{}, err } - cond, err := ctor.Extract(condNode, opToNonLogicalCtor, newLogicalNode) + cond, err := do_if.ExtractNode(condNode) if err != nil { return Rule{}, err } @@ -102,59 +97,3 @@ func extractRule(node map[string]any) (Rule, error) { return newRule(name, cond, threshold) } - -func opToNonLogicalCtor(opName string) func(string, map[string]any) (Node, error) { - switch opName { - case - data_checker.OpEqualTag, - data_checker.OpContainsTag, - data_checker.OpPrefixTag, - data_checker.OpSuffixTag, - data_checker.OpRegexTag: - return extractValueNode - default: - return nil - } -} - -func extractValueNode(op string, node map[string]any) (Node, error) { - dataTag, err := ctor.Get[string](node, fieldNameData) - if err != nil { - return nil, err - } - - caseSensitive := true - caseSensitiveNode, err := ctor.Get[bool](node, fieldNameCaseSensitive) - if err == nil { - caseSensitive = caseSensitiveNode - } else if errors.Is(err, ctor.ErrTypeMismatch) { - return nil, err - } - - values, err := extractValues(node) - if err != nil { - return nil, fmt.Errorf("extract values: %w", err) - } - - return newValueNode(op, caseSensitive, values, dataTag) -} - -func extractValues(node map[string]any) ([][]byte, error) { - rawValues, err := ctor.Get[[]any](node, fieldNameValues) - if err != nil { - return nil, err - } - - values := make([][]byte, 0, len(rawValues)) - for _, rawValue := range rawValues { - var value string - value, err = ctor.Must[string](rawValue) - if err != nil { - return nil, fmt.Errorf("value type mismatch: %w", err) - } - - values = append(values, []byte(strings.Clone(value))) - } - - return values, nil -} diff --git a/pipeline/antispam/node.go b/pipeline/antispam/node.go deleted file mode 100644 index fe10f0a0c..000000000 --- a/pipeline/antispam/node.go +++ /dev/null @@ -1,13 +0,0 @@ -package antispam - -type nodeType int - -const ( - nodeTypeValue nodeType = iota - nodeTypeLogical -) - -type Node interface { - getType() nodeType - check(event []byte, sourceName []byte, metadata map[string]string) bool -} diff --git a/pipeline/antispam/node_logical.go b/pipeline/antispam/node_logical.go deleted file mode 100644 index 66ac415fe..000000000 --- a/pipeline/antispam/node_logical.go +++ /dev/null @@ -1,44 +0,0 @@ -package antispam - -import ( - "github.com/ozontech/file.d/pipeline/logic" -) - -type logicalNode struct { - op logic.Op - operands []Node -} - -func newLogicalNode(op logic.Op, operands []Node) Node { - return &logicalNode{ - op: op, - operands: operands, - } -} - -func (n *logicalNode) getType() nodeType { - return nodeTypeLogical -} - -func (n *logicalNode) check(event []byte, sourceName []byte, metadata map[string]string) bool { - switch n.op { - case logic.And: - for _, op := range n.operands { - if !op.check(event, sourceName, metadata) { - return false - } - } - return true - case logic.Or: - for _, op := range n.operands { - if op.check(event, sourceName, metadata) { - return true - } - } - return false - case logic.Not: - return !n.operands[0].check(event, sourceName, metadata) - default: - panic("unknown logical op: %v") - } -} diff --git a/pipeline/antispam/node_value.go b/pipeline/antispam/node_value.go deleted file mode 100644 index fcb9e1531..000000000 --- a/pipeline/antispam/node_value.go +++ /dev/null @@ -1,97 +0,0 @@ -package antispam - -import ( - "fmt" - "strings" - - "github.com/ozontech/file.d/pipeline/do_if/data_checker" -) - -type dataType int - -const ( - dataTypeEvent dataType = iota - dataTypeSourceName - dataTypeMeta -) - -func (c dataType) String() string { - switch c { - case dataTypeEvent: - return dataTypeEventTag - case dataTypeSourceName: - return dataTypeSourceNameTag - case dataTypeMeta: - return dataTypeMetaTag - default: - panic(fmt.Sprintf("unknown checked data type: %d", c)) - } -} - -const ( - dataTypeEventTag = "event" - dataTypeSourceNameTag = "source_name" - dataTypeMetaTag = "meta" - - dataTypeMetaTagPrefix = "meta." -) - -func stringToDataType(s string) (dataType, string, error) { - switch { - case s == dataTypeEventTag: - return dataTypeEvent, "", nil - case s == dataTypeSourceNameTag: - return dataTypeSourceName, "", nil - case strings.HasPrefix(s, dataTypeMetaTagPrefix): - return dataTypeMeta, strings.TrimPrefix(s, dataTypeMetaTagPrefix), nil - default: - return -1, "", fmt.Errorf("unparsable check data tag: %s", s) - } -} - -type valueNode struct { - dataType dataType - metaKey string - checker data_checker.DataChecker -} - -func newValueNode( - opTag string, - caseSensitive bool, - values [][]byte, - checkDataTag string, -) (*valueNode, error) { - c, err := data_checker.New(opTag, caseSensitive, values) - if err != nil { - return nil, fmt.Errorf("init checker: %w", err) - } - - dType, metaKey, err := stringToDataType(checkDataTag) - if err != nil { - return nil, err - } - - return &valueNode{ - dataType: dType, - metaKey: metaKey, - checker: c, - }, nil -} - -func (n *valueNode) getType() nodeType { - return nodeTypeValue -} - -func (n *valueNode) check(event []byte, sourceName []byte, metadata map[string]string) bool { - switch n.dataType { - case dataTypeEvent: - return n.checker.Check(event) - case dataTypeSourceName: - return n.checker.Check(sourceName) - case dataTypeMeta: - data, ok := metadata[n.metaKey] - return ok && n.checker.Check([]byte(data)) - default: - panic(fmt.Sprintf("inknown type of checked data: %d", n.dataType)) - } -} diff --git a/pipeline/antispam/rule.go b/pipeline/antispam/rule.go index de5290a93..4726b995d 100644 --- a/pipeline/antispam/rule.go +++ b/pipeline/antispam/rule.go @@ -2,16 +2,13 @@ package antispam import ( "fmt" - "strings" - "github.com/ozontech/file.d/cfg/matchrule" - "github.com/ozontech/file.d/pipeline/ctor" - "github.com/ozontech/file.d/pipeline/logic" + "github.com/ozontech/file.d/pipeline/do_if" ) type Rule struct { Name string - Condition Node + Condition do_if.Node Threshold int RLMapKey string } @@ -30,7 +27,7 @@ func checkThreshold(threshold int) error { return nil } -func newRule(name string, condition Node, threshold int) (Rule, error) { +func newRule(name string, condition do_if.Node, threshold int) (Rule, error) { if err := checkThreshold(threshold); err != nil { return Rule{}, err } @@ -42,47 +39,16 @@ func newRule(name string, condition Node, threshold int) (Rule, error) { }, nil } -func matchRuleToNode(rule matchrule.Rule, dataTypeTag string) (Node, error) { - values := make([][]byte, 0, len(rule.Values)) - for _, s := range rule.Values { - values = append(values, []byte(strings.Clone(s))) - } - - node, err := newValueNode(matchrule.ModeToString(rule.Mode), !rule.CaseInsensitive, values, dataTypeTag) - if err != nil { - return nil, err - } - - if !rule.Invert { - return node, nil - } - - return ctor.NewLogicalNode(logic.NotTag, []Node{node}, newLogicalNode) -} - -func matchRuleSetToNode(ruleSet matchrule.RuleSet, dataTypeTag string) (Node, error) { - operands := make([]Node, 0, len(ruleSet.Rules)) - for _, r := range ruleSet.Rules { - operand, err := matchRuleToNode(r, dataTypeTag) - if err != nil { - return nil, err - } - operands = append(operands, operand) - } - - return ctor.NewLogicalNode(matchrule.CondToString(ruleSet.Cond), operands, newLogicalNode) -} - -func exceptionToNode(exception Exception) (Node, error) { - dataTypeTag := dataTypeEventTag +func exceptionToNode(exception Exception) (do_if.Node, error) { + dataTypeTag := do_if.DataTypeEventTag if exception.CheckSourceName { - dataTypeTag = dataTypeSourceNameTag + dataTypeTag = do_if.DataTypeSourceNameTag } - return matchRuleSetToNode(exception.RuleSet, dataTypeTag) + return do_if.RuleSetToNode(exception.RuleSet, dataTypeTag) } -func exceptionToRules(exceptions Exceptions) (Rules, error) { +func exceptionsToRules(exceptions Exceptions) (Rules, error) { rules := make(Rules, 0, len(exceptions)) for _, e := range exceptions { node, err := exceptionToNode(e) diff --git a/pipeline/do_if/check_type_op.go b/pipeline/do_if/check_type_op.go index a6bf4358c..7adcce9b6 100644 --- a/pipeline/do_if/check_type_op.go +++ b/pipeline/do_if/check_type_op.go @@ -164,7 +164,7 @@ func (n *checkTypeOpNode) checkEvent(eventRoot *insaneJSON.Root) bool { return false } -func (n *checkTypeOpNode) checkRaw([]byte, []byte, map[string]string) bool { +func (n *checkTypeOpNode) CheckRaw([]byte, []byte, map[string]string) bool { panic("not impl") } diff --git a/pipeline/do_if/converter.go b/pipeline/do_if/converter.go new file mode 100644 index 000000000..9268d17c3 --- /dev/null +++ b/pipeline/do_if/converter.go @@ -0,0 +1,45 @@ +package do_if + +import ( + "strings" + + "github.com/ozontech/file.d/cfg/matchrule" + "github.com/ozontech/file.d/pipeline/logic" +) + +func RuleToNode(rule matchrule.Rule, dataTypeTag string) (Node, error) { + values := make([][]byte, 0, len(rule.Values)) + for _, s := range rule.Values { + values = append(values, []byte(strings.Clone(s))) + } + + node, err := newFieldOpNode( + matchrule.ModeToString(rule.Mode), + !rule.CaseInsensitive, + values, + "", + dataTypeTag, + ) + if err != nil { + return nil, err + } + + if !rule.Invert { + return node, nil + } + + return newLogicalNode(logic.NotTag, []Node{node}) +} + +func RuleSetToNode(ruleSet matchrule.RuleSet, dataTypeTag string) (Node, error) { + operands := make([]Node, 0, len(ruleSet.Rules)) + for _, r := range ruleSet.Rules { + operand, err := RuleToNode(r, dataTypeTag) + if err != nil { + return nil, err + } + operands = append(operands, operand) + } + + return newLogicalNode(matchrule.CondToString(ruleSet.Cond), operands) +} diff --git a/pipeline/do_if/converter_test.go b/pipeline/do_if/converter_test.go new file mode 100644 index 000000000..672094a99 --- /dev/null +++ b/pipeline/do_if/converter_test.go @@ -0,0 +1 @@ +package do_if diff --git a/pipeline/do_if/ctor.go b/pipeline/do_if/ctor.go index b94301c82..b8a8de4a2 100644 --- a/pipeline/do_if/ctor.go +++ b/pipeline/do_if/ctor.go @@ -40,7 +40,7 @@ const ( ) func NewFromMap(m map[string]any) (*Checker, error) { - root, err := extractNode(m) + root, err := ExtractNode(m) if err != nil { return nil, fmt.Errorf("extract nodes: %w", err) } @@ -50,7 +50,7 @@ func NewFromMap(m map[string]any) (*Checker, error) { }, nil } -func extractNode(node ctor.Node) (Node, error) { +func ExtractNode(node ctor.Node) (Node, error) { opName, err := ctor.Get[string](node, fieldNameOp) if err != nil { return nil, err @@ -272,7 +272,7 @@ func extractLogicalOpNode(opName string, node map[string]any) (Node, error) { } var operand Node - operand, err = extractNode(operandNode) + operand, err = ExtractNode(operandNode) if err != nil { return nil, fmt.Errorf("extract operand for logical op %q: %w", opName, err) } diff --git a/pipeline/do_if/do_if.go b/pipeline/do_if/do_if.go index 7833164fa..eb6d7a07f 100644 --- a/pipeline/do_if/do_if.go +++ b/pipeline/do_if/do_if.go @@ -31,7 +31,7 @@ const ( type Node interface { Type() nodeType checkEvent(*insaneJSON.Root) bool - checkRaw(event []byte, sourceName []byte, metadata map[string]string) bool + CheckRaw(event []byte, sourceName []byte, metadata map[string]string) bool isEqualTo(Node, int) error } @@ -55,3 +55,7 @@ func (c *Checker) Check(eventRoot *insaneJSON.Root) bool { } return c.root.checkEvent(eventRoot) } + +func (c *Checker) CheckRaw(event []byte, sourceName []byte, metadata map[string]string) bool { + return c.root.CheckRaw(event, sourceName, metadata) +} diff --git a/pipeline/do_if/field_op.go b/pipeline/do_if/field_op.go index 629c12cc5..faff794b6 100644 --- a/pipeline/do_if/field_op.go +++ b/pipeline/do_if/field_op.go @@ -174,29 +174,29 @@ const ( func (c dataType) String() string { switch c { case dataTypeEvent: - return dataTypeEventTag + return DataTypeEventTag case dataTypeSourceName: - return dataTypeSourceNameTag + return DataTypeSourceNameTag case dataTypeMeta: - return dataTypeMetaTag + return DataTypeMetaTag default: panic(fmt.Sprintf("unknown checked data type: %d", c)) } } const ( - dataTypeEventTag = "event" - dataTypeSourceNameTag = "source_name" - dataTypeMetaTag = "meta" + DataTypeEventTag = "event" + DataTypeSourceNameTag = "source_name" + DataTypeMetaTag = "meta" dataTypeMetaTagPrefix = "meta." ) func stringToDataType(s string) (dataType, string, error) { switch { - case s == dataTypeEventTag: + case s == DataTypeEventTag: return dataTypeEvent, "", nil - case s == dataTypeSourceNameTag: + case s == DataTypeSourceNameTag: return dataTypeSourceName, "", nil case strings.HasPrefix(s, dataTypeMetaTagPrefix): return dataTypeMeta, strings.TrimPrefix(s, dataTypeMetaTagPrefix), nil @@ -265,7 +265,7 @@ func (n *fieldOpNode) checkEvent(eventRoot *insaneJSON.Root) bool { return n.checker.Check(node.AsBytes()) } -func (n *fieldOpNode) checkRaw(event []byte, sourceName []byte, metadata map[string]string) bool { +func (n *fieldOpNode) CheckRaw(event []byte, sourceName []byte, metadata map[string]string) bool { switch n.dataType { case dataTypeEvent: return n.checker.Check(event) diff --git a/pipeline/do_if/len_cmp_op.go b/pipeline/do_if/len_cmp_op.go index 1517c9ac7..3936a822c 100644 --- a/pipeline/do_if/len_cmp_op.go +++ b/pipeline/do_if/len_cmp_op.go @@ -203,7 +203,7 @@ func (n *lenCmpOpNode) checkEvent(eventRoot *insaneJSON.Root) bool { return n.cmpOp.compare(value, n.cmpValue) } -func (n *lenCmpOpNode) checkRaw(event []byte, sourceName []byte, metadata map[string]string) bool { +func (n *lenCmpOpNode) CheckRaw(event []byte, sourceName []byte, metadata map[string]string) bool { panic("not impl") } diff --git a/pipeline/do_if/logical_op.go b/pipeline/do_if/logical_op.go index 538e2ad2c..1ec648e1a 100644 --- a/pipeline/do_if/logical_op.go +++ b/pipeline/do_if/logical_op.go @@ -178,24 +178,24 @@ func (n *logicalNode) checkEvent(eventRoot *insaneJSON.Root) bool { } } -func (n *logicalNode) checkRaw(event []byte, sourceName []byte, metadata map[string]string) bool { +func (n *logicalNode) CheckRaw(event []byte, sourceName []byte, metadata map[string]string) bool { switch n.op { case logic.Or: for _, op := range n.operands { - if op.checkRaw(event, sourceName, metadata) { + if op.CheckRaw(event, sourceName, metadata) { return true } } return false case logic.And: for _, op := range n.operands { - if !op.checkRaw(event, sourceName, metadata) { + if !op.CheckRaw(event, sourceName, metadata) { return false } } return true case logic.Not: - return !n.operands[0].checkRaw(event, sourceName, metadata) + return !n.operands[0].CheckRaw(event, sourceName, metadata) default: panic("unknown logical op") } diff --git a/pipeline/do_if/ts_cmp_op.go b/pipeline/do_if/ts_cmp_op.go index 6d2310e03..fb93de625 100644 --- a/pipeline/do_if/ts_cmp_op.go +++ b/pipeline/do_if/ts_cmp_op.go @@ -169,7 +169,7 @@ func (n *tsCmpOpNode) checkEvent(eventRoot *insaneJSON.Root) bool { return n.cmpOp.compare(lhs, int(rhs)) } -func (n *tsCmpOpNode) checkRaw([]byte, []byte, map[string]string) bool { +func (n *tsCmpOpNode) CheckRaw([]byte, []byte, map[string]string) bool { panic("not impl") } From 1380060b405de682e59572f5520bfc9362362deb Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Fri, 25 Jul 2025 12:22:03 +0300 Subject: [PATCH 52/75] Edit check func --- pipeline/do_if/field_op.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pipeline/do_if/field_op.go b/pipeline/do_if/field_op.go index faff794b6..bdb4e1b20 100644 --- a/pipeline/do_if/field_op.go +++ b/pipeline/do_if/field_op.go @@ -291,5 +291,13 @@ func (n *fieldOpNode) isEqualTo(n2 Node, _ int) error { ) } + if n.dataType != n2f.dataType { + return fmt.Errorf("nodes have different data types expected: dataType=%q", n.dataType) + } + + if n.metaKey != n2f.metaKey { + return fmt.Errorf("nodes have different meta keys expected: mataKey=%q", n.metaKey) + } + return data_checker.Equal(&n.checker, &n2f.checker) } From bdf843f9aa08555f209846e3d01e79a669585020 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Fri, 25 Jul 2025 12:29:46 +0300 Subject: [PATCH 53/75] Move package logic --- pipeline/do_if/converter.go | 2 +- pipeline/do_if/ctor.go | 2 +- pipeline/do_if/do_if_test.go | 2 +- pipeline/{ => do_if}/logic/logic.go | 0 pipeline/do_if/logical_op.go | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) rename pipeline/{ => do_if}/logic/logic.go (100%) diff --git a/pipeline/do_if/converter.go b/pipeline/do_if/converter.go index 9268d17c3..6ee2e7c2e 100644 --- a/pipeline/do_if/converter.go +++ b/pipeline/do_if/converter.go @@ -4,7 +4,7 @@ import ( "strings" "github.com/ozontech/file.d/cfg/matchrule" - "github.com/ozontech/file.d/pipeline/logic" + "github.com/ozontech/file.d/pipeline/do_if/logic" ) func RuleToNode(rule matchrule.Rule, dataTypeTag string) (Node, error) { diff --git a/pipeline/do_if/ctor.go b/pipeline/do_if/ctor.go index b8a8de4a2..72a2c0cc7 100644 --- a/pipeline/do_if/ctor.go +++ b/pipeline/do_if/ctor.go @@ -8,7 +8,7 @@ import ( "github.com/ozontech/file.d/cfg" "github.com/ozontech/file.d/pipeline/ctor" "github.com/ozontech/file.d/pipeline/do_if/data_checker" - "github.com/ozontech/file.d/pipeline/logic" + "github.com/ozontech/file.d/pipeline/do_if/logic" ) const ( diff --git a/pipeline/do_if/do_if_test.go b/pipeline/do_if/do_if_test.go index 90db7b170..4125b341e 100644 --- a/pipeline/do_if/do_if_test.go +++ b/pipeline/do_if/do_if_test.go @@ -9,7 +9,7 @@ import ( "time" "github.com/ozontech/file.d/pipeline/do_if/data_checker" - "github.com/ozontech/file.d/pipeline/logic" + "github.com/ozontech/file.d/pipeline/do_if/logic" insaneJSON "github.com/ozontech/insane-json" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/pipeline/logic/logic.go b/pipeline/do_if/logic/logic.go similarity index 100% rename from pipeline/logic/logic.go rename to pipeline/do_if/logic/logic.go diff --git a/pipeline/do_if/logical_op.go b/pipeline/do_if/logical_op.go index 1ec648e1a..344722ee3 100644 --- a/pipeline/do_if/logical_op.go +++ b/pipeline/do_if/logical_op.go @@ -4,7 +4,7 @@ import ( "errors" "fmt" - "github.com/ozontech/file.d/pipeline/logic" + "github.com/ozontech/file.d/pipeline/do_if/logic" insaneJSON "github.com/ozontech/insane-json" ) From ce658c4de9a4dba110e78b436550bce3ed6cd892 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Fri, 25 Jul 2025 15:59:26 +0300 Subject: [PATCH 54/75] Add doc --- pipeline/antispam/README.md | 72 ++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/pipeline/antispam/README.md b/pipeline/antispam/README.md index 64d1a033f..cde2aa4ea 100644 --- a/pipeline/antispam/README.md +++ b/pipeline/antispam/README.md @@ -6,7 +6,77 @@ In some systems services might explode with logs due to different circumstances. The main entity is `Antispammer`. It counts input data from the sources (e.g. if data comes from [file input plugin](/plugin/input/file/README.md), source can be filename) and decides whether to ban it or not. For each source it counts how many logs it has got, in other words the counter for the source is incremented for each incoming log. When the counter is greater or equal to the threshold value the source is banned until its counter is less than the threshold value. The counter value is decremented once in maintenance interval by the threshold value. The maintenance interval for antispam is the same as for the pipeline (see `maintenance_interval` in [pipeline settings](/pipeline/README.md#settings)). -## Exceptions +## Antispam config + +Example: + +``` +antispam: + threshold: 3000 + rules: + - name: alert_agent + if: + op: and + operands: + - op: contains + data: meta.service + values: + - alerts-agent + - op: prefix + data: event + values: + - '{"level":"debug"' + threshold: -1 + - name: viewer + if: + op: and + operands: + - op: contains + data: source_name + values: + - viewer + threshold: 5000 +``` + +Antispammer iterates over rules, checks event and applies first matched rule. +If event does not match any rule it will be limited with common threshold. + +### Antispam fields + +**`threshold`** **`int`** + +Common threshold applied to events that don't match any rule. +Values: +- `-1` - no limit; +- `0` - discard all logs; +- `> 0` - normal threshold value. + +**`rules`** + +Antispam rules array + +### Rule fields + +**`name`** **`string`** + +Name of rule. If set to nonempty string, adds label value for the `name` label in the `antispam_exceptions` metric. + +**`threshold`** **`int`** + +Rule threshold. Has the same value meanings as common threshold. + +**`if`** + +`do_if`-like condition tree (see [doc](../do_if/README.md)). +Difference is we allowed only logical and data operations. +We use `data` to point data to check instead of `field`. +Values: +- `event` +- `source_name` +- `meta.name` - get data to check from metadata by key `name` + + +## Exceptions [deprecated: use rules instead] Antispammer has some exception rules which can be applied by checking source name or log as raw bytes contents. If the log is matched by the rules it is not accounted for in the antispammer. It might be helpful for the logs from critical infrastructure services which must not be banned at all. From 630e0362f1f23419bc325f78859216f4a5853c1c Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Fri, 25 Jul 2025 16:12:34 +0300 Subject: [PATCH 55/75] Refactor --- pipeline/antispam/antispammer.go | 6 +++--- pipeline/antispam/ctor.go | 16 +++++++--------- pipeline/pipeline.go | 2 +- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index 8922e5819..db7b07c43 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -50,7 +50,7 @@ type Options struct { Threshold int UnbanIterations int Exceptions Exceptions - ConfigV2 map[string]any + Config map[string]any Logger *zap.Logger MetricsController *metric.Ctl @@ -78,8 +78,8 @@ func NewAntispammer(o *Options) *Antispammer { var err error - if o.ConfigV2 != nil { - a.rules, a.threshold, err = extractAntispam(o.ConfigV2) + if o.Config != nil { + a.rules, a.threshold, err = extractAntispam(o.Config) if err != nil { o.Logger.Fatal("can't extract antispam", zap.Error(err)) } diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index 775c3a92e..42888c406 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -14,10 +14,6 @@ const ( fieldNameName = "name" fieldNameIf = "if" fieldNameThreshold = "threshold" - - fieldNameData = "data" - fieldNameValues = "values" - fieldNameCaseSensitive = "case_sensitive" ) func extractAntispam(node map[string]any) ([]Rule, int, error) { @@ -67,32 +63,34 @@ func extractRules(rawRules []any) ([]Rule, error) { } func extractRule(node map[string]any) (Rule, error) { + def := Rule{} + name := "" nameNode, err := ctor.Get[string](node, fieldNameName) if err == nil { name = nameNode } else if errors.Is(err, ctor.ErrTypeMismatch) { - return Rule{}, err + return def, err } condNode, err := ctor.Get[map[string]any](node, fieldNameIf) if err != nil { - return Rule{}, err + return def, err } cond, err := do_if.ExtractNode(condNode) if err != nil { - return Rule{}, err + return def, err } thresholdRaw, err := ctor.GetAny(node, fieldNameThreshold) if err != nil { - return Rule{}, err + return def, err } threshold, err := cfg.AnyToInt(thresholdRaw) if err != nil { - return Rule{}, err + return def, err } return newRule(name, cond, threshold) diff --git a/pipeline/pipeline.go b/pipeline/pipeline.go index 57f67704d..9c31b01bf 100644 --- a/pipeline/pipeline.go +++ b/pipeline/pipeline.go @@ -215,7 +215,7 @@ func New(name string, settings *Settings, registry *prometheus.Registry, lg *zap MetricsController: metricCtl, MetricHolder: metricHolder, Exceptions: settings.AntispamExceptions, - ConfigV2: settings.Antispam, + Config: settings.Antispam, }), eventLog: make([]string, 0, 128), From ae814f4376055b99fe2f69b80fd3531f9eb465d1 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Fri, 25 Jul 2025 16:34:59 +0300 Subject: [PATCH 56/75] Parse data type tag in do_if package --- pipeline/do_if/ctor.go | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/pipeline/do_if/ctor.go b/pipeline/do_if/ctor.go index 72a2c0cc7..9f178a4cf 100644 --- a/pipeline/do_if/ctor.go +++ b/pipeline/do_if/ctor.go @@ -15,6 +15,7 @@ const ( fieldNameOp = "op" fieldNameField = "field" + fieldNameData = "data" fieldNameCaseSensitive = "case_sensitive" @@ -87,10 +88,24 @@ func extractFieldOpNode(opName string, node map[string]any) (Node, error) { var err error fieldPath, err := ctor.Get[string](node, fieldNameField) - if err != nil { + fieldPathFound := err == nil + if errors.Is(err, ctor.ErrTypeMismatch) { + return nil, err + } + + dataTypeTag, err := ctor.Get[string](node, fieldNameData) + dataTypeTagFound := err == nil + if errors.Is(err, ctor.ErrTypeMismatch) { return nil, err } + switch { + case fieldPathFound && dataTypeTagFound: + return nil, errors.New("field selector and data type tag provided") + case !fieldPathFound && !dataTypeTagFound: + return nil, errors.New("field selector and data type tag are not provided") + } + caseSensitive := true caseSensitiveNode, err := ctor.Get[bool](node, fieldNameCaseSensitive) if err == nil { @@ -104,7 +119,7 @@ func extractFieldOpNode(opName string, node map[string]any) (Node, error) { return nil, fmt.Errorf("extract field op values: %w", err) } - result, err = newFieldOpNode(opName, caseSensitive, vals, fieldPath, "") + result, err = newFieldOpNode(opName, caseSensitive, vals, fieldPath, dataTypeTag) if err != nil { return nil, fmt.Errorf("init field op: %w", err) } From faa4eb8b7d081aa1e7ba9a581cbfb171c0086228 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 28 Jul 2025 13:42:29 +0300 Subject: [PATCH 57/75] Add 'unite_sources' flag --- pipeline/antispam/antispammer.go | 7 ++++++- pipeline/antispam/ctor.go | 19 +++++++++++++++---- pipeline/antispam/rule.go | 24 +++++++++++++----------- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/pipeline/antispam/antispammer.go b/pipeline/antispam/antispammer.go index db7b07c43..74d9ed442 100644 --- a/pipeline/antispam/antispammer.go +++ b/pipeline/antispam/antispammer.go @@ -137,7 +137,12 @@ func (a *Antispammer) IsSpam( return true } - rlMapKey = rule.RLMapKey + if rule.UniteSources { + rlMapKey = fmt.Sprintf("==%d==", rule.RuleID) + } else { + rlMapKey = fmt.Sprintf("==%d==%s==", rule.RuleID, id) + } + threshold = rule.Threshold break } diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index 42888c406..bc1c02310 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -10,10 +10,13 @@ import ( ) const ( - fieldNameRules = "rules" - fieldNameName = "name" - fieldNameIf = "if" + fieldNameRules = "rules" + fieldNameThreshold = "threshold" + + fieldNameName = "name" + fieldNameIf = "if" + fieldNameUniteSources = "unite_sources" ) func extractAntispam(node map[string]any) ([]Rule, int, error) { @@ -73,6 +76,14 @@ func extractRule(node map[string]any) (Rule, error) { return def, err } + uniteSources := false + uniteSourcesNode, err := ctor.Get[bool](node, fieldNameUniteSources) + if err == nil { + uniteSources = uniteSourcesNode + } else if errors.Is(err, ctor.ErrTypeMismatch) { + return def, err + } + condNode, err := ctor.Get[map[string]any](node, fieldNameIf) if err != nil { return def, err @@ -93,5 +104,5 @@ func extractRule(node map[string]any) (Rule, error) { return def, err } - return newRule(name, cond, threshold) + return newRule(name, cond, threshold, uniteSources) } diff --git a/pipeline/antispam/rule.go b/pipeline/antispam/rule.go index 4726b995d..56d288be7 100644 --- a/pipeline/antispam/rule.go +++ b/pipeline/antispam/rule.go @@ -7,35 +7,37 @@ import ( ) type Rule struct { - Name string - Condition do_if.Node - Threshold int - RLMapKey string + Name string + Condition do_if.Node + Threshold int + RuleID int + UniteSources bool } type Rules []Rule func (r *Rule) Prepare(id int) { - r.RLMapKey = fmt.Sprintf("#=%d=#", id) + r.RuleID = id } func checkThreshold(threshold int) error { if threshold < -1 { - return fmt.Errorf("invalid threshold: expected non-negative or -1 got=%d", threshold) + return fmt.Errorf("invalid threshold: expected=(non-negative or -1) got=%d", threshold) } return nil } -func newRule(name string, condition do_if.Node, threshold int) (Rule, error) { +func newRule(name string, condition do_if.Node, threshold int, uniteSources bool) (Rule, error) { if err := checkThreshold(threshold); err != nil { return Rule{}, err } return Rule{ - Name: name, - Condition: condition, - Threshold: threshold, + Name: name, + Condition: condition, + Threshold: threshold, + UniteSources: uniteSources, }, nil } @@ -56,7 +58,7 @@ func exceptionsToRules(exceptions Exceptions) (Rules, error) { return nil, err } - rule, err := newRule(e.RuleSet.Name, node, -1) + rule, err := newRule(e.RuleSet.Name, node, -1, false) if err != nil { return nil, err } From 9638af31b4be9d4651cc00c772161d62ce7818ca Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 28 Jul 2025 15:38:15 +0300 Subject: [PATCH 58/75] Refactor --- pipeline/antispam/ctor.go | 14 +++-------- pipeline/ctor/utils.go | 10 +++++++- pipeline/do_if/ctor.go | 53 ++++++++++++++++++++------------------- 3 files changed, 40 insertions(+), 37 deletions(-) diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index bc1c02310..f4693e863 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -68,19 +68,13 @@ func extractRules(rawRules []any) ([]Rule, error) { func extractRule(node map[string]any) (Rule, error) { def := Rule{} - name := "" - nameNode, err := ctor.Get[string](node, fieldNameName) - if err == nil { - name = nameNode - } else if errors.Is(err, ctor.ErrTypeMismatch) { + name, err := ctor.Get[string](node, fieldNameName, "") + if err != nil { return def, err } - uniteSources := false - uniteSourcesNode, err := ctor.Get[bool](node, fieldNameUniteSources) - if err == nil { - uniteSources = uniteSourcesNode - } else if errors.Is(err, ctor.ErrTypeMismatch) { + uniteSources, err := ctor.Get[bool](node, fieldNameUniteSources, false) + if err != nil { return def, err } diff --git a/pipeline/ctor/utils.go b/pipeline/ctor/utils.go index 8a53a1a7f..9329ced97 100644 --- a/pipeline/ctor/utils.go +++ b/pipeline/ctor/utils.go @@ -32,11 +32,19 @@ func Must[T any](v any) (T, error) { return result, nil } -func Get[T any](node Node, field string) (T, error) { +func Get[T any](node Node, field string, defValues ...T) (T, error) { + if len(defValues) > 1 { + panic("too many default values") + } + var def T fieldNode, err := GetAny(node, field) if err != nil { + if len(defValues) == 1 { + return defValues[0], nil + } + return def, err } diff --git a/pipeline/do_if/ctor.go b/pipeline/do_if/ctor.go index 9f178a4cf..2f66f3d49 100644 --- a/pipeline/do_if/ctor.go +++ b/pipeline/do_if/ctor.go @@ -34,6 +34,7 @@ const ( tsCmpValueNowTag = "now" tsCmpValueStartTag = "file_d_start" + defaultTsCmpValueShift = 0 * time.Second defaultTsCmpValUpdateInterval = 10 * time.Second defaultTsFormat = "rfc3339nano" @@ -106,11 +107,8 @@ func extractFieldOpNode(opName string, node map[string]any) (Node, error) { return nil, errors.New("field selector and data type tag are not provided") } - caseSensitive := true - caseSensitiveNode, err := ctor.Get[bool](node, fieldNameCaseSensitive) - if err == nil { - caseSensitive = caseSensitiveNode - } else if errors.Is(err, ctor.ErrTypeMismatch) { + caseSensitive, err := ctor.Get[bool](node, fieldNameCaseSensitive, true) + if err != nil { return nil, err } @@ -219,36 +217,39 @@ func extractTsCmpOpNode(_ string, node map[string]any) (Node, error) { } } - format := defaultTsFormat - str, err := ctor.Get[string](node, fieldNameFormat) - if err == nil { - format = str - } else if errors.Is(err, ctor.ErrTypeMismatch) { + format, err := ctor.Get[string](node, fieldNameFormat, defaultTsFormat) + if err != nil { return nil, err } - cmpValueShift := time.Duration(0) - str, err = ctor.Get[string](node, fieldNameCmpValueShift) - if err == nil { - cmpValueShift, err = time.ParseDuration(str) - if err != nil { - return nil, fmt.Errorf("parse cmp value shift: %w", err) - } - } else if errors.Is(err, ctor.ErrTypeMismatch) { + cmpValueShiftStr, err := ctor.Get[string]( + node, + fieldNameCmpValueShift, + defaultTsCmpValueShift.String(), + ) + if err != nil { return nil, err } - updateInterval := defaultTsCmpValUpdateInterval - str, err = ctor.Get[string](node, fieldNameUpdateInterval) - if err == nil { - updateInterval, err = time.ParseDuration(str) - if err != nil { - return nil, fmt.Errorf("parse update interval: %w", err) - } - } else if errors.Is(err, ctor.ErrTypeMismatch) { + cmpValueShift, err := time.ParseDuration(cmpValueShiftStr) + if err != nil { + return nil, fmt.Errorf("parse cmp value shift: %w", err) + } + + updateIntervalStr, err := ctor.Get[string]( + node, + fieldNameUpdateInterval, + defaultTsCmpValUpdateInterval.String(), + ) + if err != nil { return nil, err } + updateInterval, err := time.ParseDuration(updateIntervalStr) + if err != nil { + return nil, fmt.Errorf("parse update interval: %w", err) + } + return newTsCmpOpNode(fieldPath, format, cmpOp, cmpMode, cmpValue, cmpValueShift, updateInterval) } From 7efa1d3d4ea2be03c8da884ee20d6f61b1d7782e Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 28 Jul 2025 23:37:23 +0300 Subject: [PATCH 59/75] Rework tests --- pipeline/do_if/ctor_test.go | 1071 +++++++++++------------- pipeline/do_if/data_checker/checker.go | 9 + pipeline/do_if/do_if_test.go | 16 +- 3 files changed, 509 insertions(+), 587 deletions(-) diff --git a/pipeline/do_if/ctor_test.go b/pipeline/do_if/ctor_test.go index 52e6a747e..ccb9c0186 100644 --- a/pipeline/do_if/ctor_test.go +++ b/pipeline/do_if/ctor_test.go @@ -2,224 +2,157 @@ package do_if import ( "bytes" - "errors" - "fmt" "testing" "time" "github.com/bitly/go-simplejson" + "github.com/ozontech/file.d/pipeline/do_if/data_checker" + "github.com/ozontech/file.d/pipeline/do_if/logic" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -type doIfTreeNode struct { - fieldOp string - fieldName string - caseSensitive bool - values [][]byte - - logicalOp string - operands []*doIfTreeNode - - lenCmpOp string - cmpOp string - cmpValue int - - tsCmpOp bool - tsFormat string - tsCmpValChangeMode string - tsCmpValue time.Time - tsCmpValueShift time.Duration - tsUpdateInterval time.Duration - - checkTypeOp bool -} - -// nolint:gocritic -func buildDoIfTree(node *doIfTreeNode) (Node, error) { - switch { - case node.fieldOp != "": - return newFieldOpNode( - node.fieldOp, - node.caseSensitive, - node.values, - node.fieldName, - "", - ) - case node.logicalOp != "": - operands := make([]Node, 0) - for _, operandNode := range node.operands { - operand, err := buildDoIfTree(operandNode) - if err != nil { - return nil, fmt.Errorf("failed to build tree: %w", err) - } - operands = append(operands, operand) - } - return newLogicalNode( - node.logicalOp, - operands, - ) - case node.lenCmpOp != "": - return newLenCmpOpNode(node.lenCmpOp, node.fieldName, node.cmpOp, node.cmpValue) - case node.tsCmpOp: - return newTsCmpOpNode( - node.fieldName, - node.tsFormat, - node.cmpOp, - node.tsCmpValChangeMode, - node.tsCmpValue, - node.tsCmpValueShift, - node.tsUpdateInterval, - ) - case node.checkTypeOp: - return newCheckTypeOpNode( - node.fieldName, - node.values, - ) - default: - return nil, errors.New("unknown type of node") - } -} - -func Test_extractDoIfChecker(t *testing.T) { - type args struct { - cfgStr string - } - +func TestExtractNode(t *testing.T) { tests := []struct { - name string - args args - want *doIfTreeNode - wantErr bool + name string + raw string + expected Node + wantErr bool }{ { name: "ok", - args: args{ - cfgStr: ` - { - "op": "not", - "operands": [ - { - "op": "and", - "operands": [ - { - "op": "equal", - "field": "service", - "values": [null, ""], - "case_sensitive": false - }, - { - "op": "prefix", - "field": "log.msg", - "values": ["test-1", "test-2"], - "case_sensitive": false - }, - { - "op": "byte_len_cmp", - "field": "msg", - "cmp_op": "gt", - "value": 100 - }, - { - "op": "array_len_cmp", - "field": "items", - "cmp_op": "lt", - "value": 100 - }, - { - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "format": "2006-01-02T15:04:05.999999999Z07:00", - "update_interval": "15s" - }, - { - "op": "or", - "operands": [ - { - "op": "suffix", - "field": "service", - "values": ["test-svc-1", "test-svc-2"], - "case_sensitive": true - }, - { - "op": "contains", - "field": "pod", - "values": ["test"] - }, - { - "op": "regex", - "field": "message", - "values": ["test-\\d+", "test-msg-\\d+"] - } - ] - } - ] - } - ] - } - `, - }, - want: &doIfTreeNode{ - logicalOp: "not", - operands: []*doIfTreeNode{ + raw: ` + { + "op": "not", + "operands": [ { - logicalOp: "and", - operands: []*doIfTreeNode{ + "op": "and", + "operands": [ { - fieldOp: "equal", - fieldName: "service", - values: [][]byte{nil, []byte("")}, - caseSensitive: false, + "op": "equal", + "field": "service", + "values": [null, ""], + "case_sensitive": false }, { - fieldOp: "prefix", - fieldName: "log.msg", - values: [][]byte{[]byte("test-1"), []byte("test-2")}, - caseSensitive: false, + "op": "prefix", + "field": "log.msg", + "values": ["test-1", "test-2"], + "case_sensitive": false }, { - lenCmpOp: "byte_len_cmp", - cmpOp: "gt", - fieldName: "msg", - cmpValue: 100, + "op": "byte_len_cmp", + "field": "msg", + "cmp_op": "gt", + "value": 100 }, { - lenCmpOp: "array_len_cmp", - cmpOp: "lt", - fieldName: "items", - cmpValue: 100, + "op": "array_len_cmp", + "field": "items", + "cmp_op": "lt", + "value": 100 }, { - tsCmpOp: true, - cmpOp: "lt", - fieldName: "timestamp", - tsFormat: time.RFC3339Nano, - tsCmpValue: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), - tsCmpValChangeMode: tsCmpModeConstTag, - tsUpdateInterval: 15 * time.Second, + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "format": "2006-01-02T15:04:05.999999999Z07:00", + "update_interval": "15s" }, { - logicalOp: "or", - operands: []*doIfTreeNode{ + "op": "or", + "operands": [ { - fieldOp: "suffix", - fieldName: "service", - values: [][]byte{[]byte("test-svc-1"), []byte("test-svc-2")}, - caseSensitive: true, + "op": "suffix", + "field": "service", + "values": ["test-svc-1", "test-svc-2"], + "case_sensitive": true }, { - fieldOp: "contains", - fieldName: "pod", - values: [][]byte{[]byte("test")}, - caseSensitive: true, + "op": "contains", + "field": "pod", + "values": ["test"] }, { - fieldOp: "regex", - fieldName: "message", - values: [][]byte{[]byte(`test-\d+`), []byte(`test-msg-\d+`)}, - caseSensitive: true, + "op": "regex", + "field": "message", + "values": ["test-\\d+", "test-msg-\\d+"] + } + ] + } + ] + } + ] + }`, + expected: &logicalNode{ + op: logic.Not, + operands: []Node{ + &logicalNode{ + op: logic.And, + operands: []Node{ + &fieldOpNode{ + fieldPath: []string{"service"}, + fieldPathStr: "service", + checker: data_checker.MustNew("equal", false, [][]byte{nil, []byte("")}), + }, + &fieldOpNode{ + fieldPath: []string{"log", "msg"}, + fieldPathStr: "log.msg", + checker: data_checker.MustNew( + "prefix", false, [][]byte{[]byte("test-1"), []byte("test-2")}), + }, + &lenCmpOpNode{ + lenCmpOp: byteLenCmpOp, + fieldPath: []string{"msg"}, + cmpOp: cmpOpGreater, + cmpValue: 100, + }, + &lenCmpOpNode{ + lenCmpOp: arrayLenCmpOp, + fieldPath: []string{"items"}, + cmpOp: cmpOpLess, + cmpValue: 100, + }, + &tsCmpOpNode{ + fieldPath: []string{"timestamp"}, + format: time.RFC3339Nano, + cmpOp: cmpOpLess, + cmpValChangeMode: cmpValChangeModeConst, + constCmpValue: time.Date( + 2009, time.November, 10, 23, 0, 0, 0, time.UTC, + ).UnixNano(), + updateInterval: 15 * time.Second, + }, + &logicalNode{ + op: logic.Or, + operands: []Node{ + &fieldOpNode{ + fieldPath: []string{"service"}, + fieldPathStr: "service", + checker: data_checker.MustNew( + "suffix", + true, + [][]byte{[]byte("test-svc-1"), []byte("test-svc-2")}, + ), + }, + &fieldOpNode{ + fieldPath: []string{"pod"}, + fieldPathStr: "pod", + checker: data_checker.MustNew( + "contains", + true, + [][]byte{[]byte("test")}, + ), + }, + &fieldOpNode{ + fieldPath: []string{"message"}, + fieldPathStr: "message", + checker: data_checker.MustNew( + "regex", + true, + [][]byte{[]byte(`test-\d+`), []byte(`test-msg-\d+`)}, + ), }, }, }, @@ -228,432 +161,420 @@ func Test_extractDoIfChecker(t *testing.T) { }, }, }, - { - name: "ok_not_map", - args: args{ - cfgStr: `[{"field":"val"}]`, - }, - wantErr: false, - }, - { - name: "ok_byte_len_cmp_op", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":10}`, - }, - want: &doIfTreeNode{ - lenCmpOp: "byte_len_cmp", - cmpOp: "lt", - fieldName: "data", - cmpValue: 10, + } + /* + { + name: "ok_not_map", + args: args{ + cfgStr: `[{"field":"val"}]`, + }, + wantErr: false, }, - }, - { - name: "ok_array_len_cmp_op", - args: args{ - cfgStr: `{"op":"array_len_cmp","field":"items","cmp_op":"lt","value":10}`, + { + name: "ok_byte_len_cmp_op", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":10}`, + }, + want: &doIfTreeNode{ + lenCmpOp: "byte_len_cmp", + cmpOp: "lt", + fieldName: "data", + cmpValue: 10, + }, }, - want: &doIfTreeNode{ - lenCmpOp: "array_len_cmp", - cmpOp: "lt", - fieldName: "items", - cmpValue: 10, + { + name: "ok_array_len_cmp_op", + args: args{ + cfgStr: `{"op":"array_len_cmp","field":"items","cmp_op":"lt","value":10}`, + }, + want: &doIfTreeNode{ + lenCmpOp: "array_len_cmp", + cmpOp: "lt", + fieldName: "items", + cmpValue: 10, + }, }, - }, - { - name: "ok_ts_cmp_op", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "value_shift": "-24h", - "format": "2006-01-02T15:04:05Z07:00", - "update_interval": "15s"}`, - }, - want: &doIfTreeNode{ - tsCmpOp: true, - cmpOp: "lt", - fieldName: "timestamp", - tsFormat: time.RFC3339, - tsCmpValue: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), - tsCmpValueShift: -24 * time.Hour, - tsCmpValChangeMode: tsCmpModeConstTag, - tsUpdateInterval: 15 * time.Second, + { + name: "ok_ts_cmp_op", + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "value_shift": "-24h", + "format": "2006-01-02T15:04:05Z07:00", + "update_interval": "15s"}`, + }, + want: &doIfTreeNode{ + tsCmpOp: true, + cmpOp: "lt", + fieldName: "timestamp", + tsFormat: time.RFC3339, + tsCmpValue: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + tsCmpValueShift: -24 * time.Hour, + tsCmpValChangeMode: tsCmpModeConstTag, + tsUpdateInterval: 15 * time.Second, + }, }, - }, - { - name: "ok_ts_cmp_op_default_settings", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "now"}`, - }, - want: &doIfTreeNode{ - tsCmpOp: true, - cmpOp: "lt", - fieldName: "timestamp", - tsCmpValChangeMode: tsCmpModeNowTag, - tsFormat: defaultTsFormat, - tsCmpValueShift: 0, - tsUpdateInterval: defaultTsCmpValUpdateInterval, + { + name: "ok_ts_cmp_op_default_settings", + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "now"}`, + }, + want: &doIfTreeNode{ + tsCmpOp: true, + cmpOp: "lt", + fieldName: "timestamp", + tsCmpValChangeMode: tsCmpModeNowTag, + tsFormat: defaultTsFormat, + tsCmpValueShift: 0, + tsUpdateInterval: defaultTsCmpValUpdateInterval, + }, }, - }, - { - name: "ok_ts_cmp_op_format_alias", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "format": "rfc3339", - "value": "now"}`, - }, - want: &doIfTreeNode{ - tsCmpOp: true, - cmpOp: "lt", - fieldName: "timestamp", - tsCmpValChangeMode: tsCmpModeNowTag, - tsFormat: time.RFC3339, - tsCmpValueShift: 0, - tsUpdateInterval: defaultTsCmpValUpdateInterval, + { + name: "ok_ts_cmp_op_format_alias", + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "format": "rfc3339", + "value": "now"}`, + }, + want: &doIfTreeNode{ + tsCmpOp: true, + cmpOp: "lt", + fieldName: "timestamp", + tsCmpValChangeMode: tsCmpModeNowTag, + tsFormat: time.RFC3339, + tsCmpValueShift: 0, + tsUpdateInterval: defaultTsCmpValUpdateInterval, + }, }, - }, - { - name: "ok_check_type", - args: args{ - cfgStr: `{ - "op": "check_type", - "field": "log", - "values": ["obj","arr"] - }`, - }, - want: &doIfTreeNode{ - checkTypeOp: true, - fieldName: "log", - values: [][]byte{ - []byte("obj"), - []byte("arr"), + { + name: "ok_check_type", + args: args{ + cfgStr: `{ + "op": "check_type", + "field": "log", + "values": ["obj","arr"] + }`, }, - }, - }, - { - name: "ok_single_val", - args: args{ - cfgStr: `{ - "op":"or", - "operands":[ - {"op":"equal","field":"service","values":null}, - {"op":"equal","field":"service","values":""}, - {"op":"equal","field":"service","values":"test"} - ] - }`, - }, - want: &doIfTreeNode{ - logicalOp: "or", - operands: []*doIfTreeNode{ - { - fieldOp: "equal", - fieldName: "service", - values: [][]byte{nil}, - caseSensitive: true, - }, - { - fieldOp: "equal", - fieldName: "service", - values: [][]byte{[]byte("")}, - caseSensitive: true, - }, - { - fieldOp: "equal", - fieldName: "service", - values: [][]byte{[]byte("test")}, - caseSensitive: true, + want: &doIfTreeNode{ + checkTypeOp: true, + fieldName: "log", + values: [][]byte{ + []byte("obj"), + []byte("arr"), }, }, }, - wantErr: false, - }, - { - name: "error_no_op_field", - args: args{ - cfgStr: `{"field": "val"}`, + { + name: "ok_single_val", + args: args{ + cfgStr: `{ + "op":"or", + "operands":[ + {"op":"equal","field":"service","values":null}, + {"op":"equal","field":"service","values":""}, + {"op":"equal","field":"service","values":"test"} + ] + }`, + }, + want: &doIfTreeNode{ + logicalOp: "or", + operands: []*doIfTreeNode{ + { + fieldOp: "equal", + fieldName: "service", + values: [][]byte{nil}, + caseSensitive: true, + }, + { + fieldOp: "equal", + fieldName: "service", + values: [][]byte{[]byte("")}, + caseSensitive: true, + }, + { + fieldOp: "equal", + fieldName: "service", + values: [][]byte{[]byte("test")}, + caseSensitive: true, + }, + }, + }, + wantErr: false, }, - wantErr: true, - }, - { - name: "error_invalid_op_name", - args: args{ - cfgStr: `{"op": "invalid"}`, + { + name: "error_no_op_field", + args: args{ + cfgStr: `{"field": "val"}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_invalid_field_op", - args: args{ - cfgStr: `{"op": "equal"}`, + { + name: "error_invalid_op_name", + args: args{ + cfgStr: `{"op": "invalid"}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_invalid_case_sensitive_type", - args: args{ - cfgStr: `{ - "op": "equal", - "field": "a", - "values": ["abc"], - "case_sensitive": "not bool"}`, - }, - wantErr: true, - }, - { - name: "error_invalid_logical_op", - args: args{ - cfgStr: `{"op": "or"}`, + { + name: "error_invalid_field_op", + args: args{ + cfgStr: `{"op": "equal"}`, + }, + wantErr: true, + }, + { + name: "error_invalid_case_sensitive_type", + args: args{ + cfgStr: `{ + "op": "equal", + "field": "a", + "values": ["abc"], + "case_sensitive": "not bool"}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_invalid_logical_op_operand", - args: args{ - cfgStr: `{"op": "or", "operands": [{"op": "equal"}]}`, + { + name: "error_invalid_logical_op", + args: args{ + cfgStr: `{"op": "or"}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_no_field", - args: args{ - cfgStr: `{"op":"byte_len_cmp","cmp_op":"lt","value":10}`, + { + name: "error_invalid_logical_op_operand", + args: args{ + cfgStr: `{"op": "or", "operands": [{"op": "equal"}]}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_array_len_cmp_op_no_field", - args: args{ - cfgStr: `{"op":"array_len_cmp","cmp_op":"lt","value":10}`, + { + name: "error_byte_len_cmp_op_no_field", + args: args{ + cfgStr: `{"op":"byte_len_cmp","cmp_op":"lt","value":10}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_field_is_not_string", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":123,"cmp_op":"lt","value":10}`, + { + name: "error_array_len_cmp_op_no_field", + args: args{ + cfgStr: `{"op":"array_len_cmp","cmp_op":"lt","value":10}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_no_cmp_op", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":"data","value":10}`, + { + name: "error_byte_len_cmp_op_field_is_not_string", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":123,"cmp_op":"lt","value":10}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_cmp_op_is_not_string", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":123,"value":10}`, + { + name: "error_byte_len_cmp_op_no_cmp_op", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":"data","value":10}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_no_cmp_value", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt"}`, + { + name: "error_byte_len_cmp_op_cmp_op_is_not_string", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":123,"value":10}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_cmp_value_is_not_integer", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":"abc"}`, + { + name: "error_byte_len_cmp_op_no_cmp_value", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt"}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_invalid_cmp_op", - args: args{cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"ABC","value":10}`}, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_negative_cmp_value", - args: args{cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":-1}`}, - wantErr: true, - }, - { - name: "error_ts_cmp_op_no_field", - args: args{ - cfgStr: `{"op": "ts_cmp","cmp_op": "lt"}`, + { + name: "error_byte_len_cmp_op_cmp_value_is_not_integer", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":"abc"}`, + }, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_invalid_cmp_op", + args: args{cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"ABC","value":10}`}, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_negative_cmp_value", + args: args{cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":-1}`}, + wantErr: true, + }, + { + name: "error_ts_cmp_op_no_field", + args: args{ + cfgStr: `{"op": "ts_cmp","cmp_op": "lt"}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_field_is_not_string", - args: args{ - cfgStr: `{"op":"ts_cmp","field":123}`, + { + name: "error_ts_cmp_op_field_is_not_string", + args: args{ + cfgStr: `{"op":"ts_cmp","field":123}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_no_cmp_op", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp"}`, + { + name: "error_ts_cmp_op_no_cmp_op", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp"}`, + }, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_format_type", + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "format": 1000, + "value": "now"}`, + }, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_value_shift_type", + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "value_shift": 1000, + "format": "2006-01-02T15:04:05Z07:00", + "update_interval": "15s"}`, + }, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_update_interval_type", + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "format": "2006-01-02T15:04:05Z07:00", + "update_interval": false}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_format_type", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "format": 1000, - "value": "now"}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_value_shift_type", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "value_shift": 1000, - "format": "2006-01-02T15:04:05Z07:00", - "update_interval": "15s"}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_update_interval_type", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "format": "2006-01-02T15:04:05Z07:00", - "update_interval": false}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_cmp_op_is_not_string", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":123}`, + { + name: "error_ts_cmp_op_cmp_op_is_not_string", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":123}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_no_cmp_value", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt"}`, + { + name: "error_ts_cmp_op_no_cmp_value", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt"}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_cmp_value_is_not_string", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":123}`, + { + name: "error_ts_cmp_op_cmp_value_is_not_string", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":123}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_cmp_value", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":"qwe"}`, + { + name: "error_ts_cmp_op_invalid_cmp_value", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":"qwe"}`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_cmp_op", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"qwe","value":"2009-11-10T23:00:00Z"}`, + { + name: "error_ts_cmp_op_invalid_cmp_op", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"qwe","value":"2009-11-10T23:00:00Z"}`, + }, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_update_interval", + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "update_interval": "qwe"}`, + }, + wantErr: true, + }, + { + name: "error_check_type_op_empty_values", + args: args{ + cfgStr: `{ + "op": "check_type", + "field": "log", + "values": [] + }`, + }, + wantErr: true, + }, + { + name: "error_check_type_op_invalid_value", + args: args{ + cfgStr: `{ + "op": "check_type", + "field": "log", + "values": ["unknown_type"] + }`, + }, + wantErr: true, + }, + { + name: "error_check_type_op_no_field", + args: args{ + cfgStr: `{ + "op": "check_type", + "values": ["obj"] + }`, + }, + wantErr: true, }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_update_interval", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "update_interval": "qwe"}`, - }, - wantErr: true, - }, - { - name: "error_check_type_op_empty_values", - args: args{ - cfgStr: `{ - "op": "check_type", - "field": "log", - "values": [] - }`, - }, - wantErr: true, - }, - { - name: "error_check_type_op_invalid_value", - args: args{ - cfgStr: `{ - "op": "check_type", - "field": "log", - "values": ["unknown_type"] - }`, - }, - wantErr: true, - }, - { - name: "error_check_type_op_no_field", - args: args{ - cfgStr: `{ - "op": "check_type", - "values": ["obj"] - }`, - }, - wantErr: true, - }, - } + } + */ for _, tt := range tests { tt := tt t.Run(tt.name, func(t *testing.T) { t.Parallel() - reader := bytes.NewBufferString(tt.args.cfgStr) + + reader := bytes.NewBufferString(tt.raw) actionJSON, err := simplejson.NewFromReader(reader) require.NoError(t, err) - got, err := extractDoIfChecker(actionJSON) - if (err != nil) != tt.wantErr { - t.Errorf("extractDoIfChecker() error = %v, wantErr %v", err, tt.wantErr) - return - } + + got, err := ExtractNode(actionJSON.MustMap()) + require.Equal(t, err != nil, tt.wantErr) + if tt.wantErr { return } - if tt.want == nil { - assert.Nil(t, got) - return - } - wantTree, err := buildDoIfTree(tt.want) - require.NoError(t, err) - wantDoIfChecker := newChecker(wantTree) - assert.NoError(t, wantDoIfChecker.IsEqualTo(got)) - }) - } -} -func extractDoIfChecker(actionJSON *simplejson.Json) (*Checker, error) { - m := actionJSON.MustMap() - if m == nil { - return nil, nil + assert.NoError(t, got.isEqualTo(tt.expected, 1)) + }) } - - return NewFromMap(m) } diff --git a/pipeline/do_if/data_checker/checker.go b/pipeline/do_if/data_checker/checker.go index df52375c9..e8167c458 100644 --- a/pipeline/do_if/data_checker/checker.go +++ b/pipeline/do_if/data_checker/checker.go @@ -138,6 +138,15 @@ func New(opTag string, caseSensitive bool, values [][]byte) (DataChecker, error) }, nil } +func MustNew(opTag string, caseSensitive bool, values [][]byte) DataChecker { + res, err := New(opTag, caseSensitive, values) + if err != nil { + panic(err) + } + + return res +} + func (n *DataChecker) Check(data []byte) bool { // fast check for data if n.op != opRegex && len(data) < n.minValLen { diff --git a/pipeline/do_if/do_if_test.go b/pipeline/do_if/do_if_test.go index 4125b341e..592f5d62a 100644 --- a/pipeline/do_if/do_if_test.go +++ b/pipeline/do_if/do_if_test.go @@ -119,14 +119,6 @@ func checkNode(t *testing.T, want, got Node) { func TestBuildNodes(t *testing.T) { timestamp := time.Now() - mustNewChecker := func(op string, caseSensitive bool, values [][]byte) data_checker.DataChecker { - c, err := data_checker.New(op, caseSensitive, values) - if err != nil { - panic(err) - } - return c - } - tests := []struct { name string tree treeNode @@ -144,7 +136,7 @@ func TestBuildNodes(t *testing.T) { want: &fieldOpNode{ fieldPath: []string{"log", "pod"}, fieldPathStr: "log.pod", - checker: mustNewChecker( + checker: data_checker.MustNew( "equal", true, [][]byte{[]byte(`test-111`), []byte(`test-2`), []byte(`test-3`), []byte(`test-12345`)}, @@ -162,7 +154,7 @@ func TestBuildNodes(t *testing.T) { want: &fieldOpNode{ fieldPath: []string{"log", "pod"}, fieldPathStr: "log.pod", - checker: mustNewChecker( + checker: data_checker.MustNew( "equal", false, [][]byte{[]byte(`TEST-111`), []byte(`Test-2`), []byte(`tesT-3`), []byte(`TeSt-12345`)}, @@ -194,7 +186,7 @@ func TestBuildNodes(t *testing.T) { &fieldOpNode{ fieldPath: []string{"log", "pod"}, fieldPathStr: "log.pod", - checker: mustNewChecker( + checker: data_checker.MustNew( "equal", true, [][]byte{[]byte(`test-111`), []byte(`test-2`), []byte(`test-3`), []byte(`test-12345`)}, @@ -203,7 +195,7 @@ func TestBuildNodes(t *testing.T) { &fieldOpNode{ fieldPath: []string{"service", "msg"}, fieldPathStr: "service.msg", - checker: mustNewChecker( + checker: data_checker.MustNew( "contains", true, [][]byte{[]byte(`test-0987`), []byte(`test-11`)}, From 209fb913e102338b1a098a07959b8c1dee7e28a8 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 29 Jul 2025 00:29:35 +0300 Subject: [PATCH 60/75] Add tests --- pipeline/do_if/ctor_test.go | 769 ++++++++++++++++++------------------ 1 file changed, 379 insertions(+), 390 deletions(-) diff --git a/pipeline/do_if/ctor_test.go b/pipeline/do_if/ctor_test.go index ccb9c0186..67c266c43 100644 --- a/pipeline/do_if/ctor_test.go +++ b/pipeline/do_if/ctor_test.go @@ -161,403 +161,392 @@ func TestExtractNode(t *testing.T) { }, }, }, - } - /* - { - name: "ok_not_map", - args: args{ - cfgStr: `[{"field":"val"}]`, - }, - wantErr: false, - }, - { - name: "ok_byte_len_cmp_op", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":10}`, - }, - want: &doIfTreeNode{ - lenCmpOp: "byte_len_cmp", - cmpOp: "lt", - fieldName: "data", - cmpValue: 10, - }, - }, - { - name: "ok_array_len_cmp_op", - args: args{ - cfgStr: `{"op":"array_len_cmp","field":"items","cmp_op":"lt","value":10}`, - }, - want: &doIfTreeNode{ - lenCmpOp: "array_len_cmp", - cmpOp: "lt", - fieldName: "items", - cmpValue: 10, - }, - }, - { - name: "ok_ts_cmp_op", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "value_shift": "-24h", - "format": "2006-01-02T15:04:05Z07:00", - "update_interval": "15s"}`, - }, - want: &doIfTreeNode{ - tsCmpOp: true, - cmpOp: "lt", - fieldName: "timestamp", - tsFormat: time.RFC3339, - tsCmpValue: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), - tsCmpValueShift: -24 * time.Hour, - tsCmpValChangeMode: tsCmpModeConstTag, - tsUpdateInterval: 15 * time.Second, - }, + { + name: "ok_byte_len_cmp_op", + raw: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":10}`, + expected: &lenCmpOpNode{ + lenCmpOp: byteLenCmpOp, + fieldPath: []string{"data"}, + cmpOp: cmpOpLess, + cmpValue: 10, }, - { - name: "ok_ts_cmp_op_default_settings", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "now"}`, - }, - want: &doIfTreeNode{ - tsCmpOp: true, - cmpOp: "lt", - fieldName: "timestamp", - tsCmpValChangeMode: tsCmpModeNowTag, - tsFormat: defaultTsFormat, - tsCmpValueShift: 0, - tsUpdateInterval: defaultTsCmpValUpdateInterval, - }, + }, + { + name: "ok_array_len_cmp_op", + raw: `{"op":"array_len_cmp","field":"items","cmp_op":"lt","value":10}`, + expected: &lenCmpOpNode{ + lenCmpOp: arrayLenCmpOp, + fieldPath: []string{"items"}, + cmpOp: cmpOpLess, + cmpValue: 10, }, - { - name: "ok_ts_cmp_op_format_alias", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "format": "rfc3339", - "value": "now"}`, - }, - want: &doIfTreeNode{ - tsCmpOp: true, - cmpOp: "lt", - fieldName: "timestamp", - tsCmpValChangeMode: tsCmpModeNowTag, - tsFormat: time.RFC3339, - tsCmpValueShift: 0, - tsUpdateInterval: defaultTsCmpValUpdateInterval, - }, + }, + { + name: "ok_ts_cmp_op", + raw: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "value_shift": "-24h", + "format": "2006-01-02T15:04:05Z07:00", + "update_interval": "15s" + }`, + expected: &tsCmpOpNode{ + fieldPath: []string{"timestamp"}, + format: time.RFC3339, + cmpOp: cmpOpLess, + cmpValChangeMode: cmpValChangeModeConst, + constCmpValue: time.Date( + 2009, time.November, 10, 23, 0, 0, 0, time.UTC, + ).UnixNano(), + cmpValueShift: (-24 * time.Hour).Nanoseconds(), + updateInterval: 15 * time.Second, }, + }, + /* + { + name: "ok_ts_cmp_op_default_settings", + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "now"}`, + }, + want: &doIfTreeNode{ + tsCmpOp: true, + cmpOp: "lt", + fieldName: "timestamp", + tsCmpValChangeMode: tsCmpModeNowTag, + tsFormat: defaultTsFormat, + tsCmpValueShift: 0, + tsUpdateInterval: defaultTsCmpValUpdateInterval, + }, + }, + { + name: "ok_ts_cmp_op_format_alias", + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "format": "rfc3339", + "value": "now"}`, + }, + want: &doIfTreeNode{ + tsCmpOp: true, + cmpOp: "lt", + fieldName: "timestamp", + tsCmpValChangeMode: tsCmpModeNowTag, + tsFormat: time.RFC3339, + tsCmpValueShift: 0, + tsUpdateInterval: defaultTsCmpValUpdateInterval, + }, + }, - { - name: "ok_check_type", - args: args{ - cfgStr: `{ - "op": "check_type", - "field": "log", - "values": ["obj","arr"] - }`, - }, - want: &doIfTreeNode{ - checkTypeOp: true, - fieldName: "log", - values: [][]byte{ - []byte("obj"), - []byte("arr"), + { + name: "ok_check_type", + args: args{ + cfgStr: `{ + "op": "check_type", + "field": "log", + "values": ["obj","arr"] + }`, + }, + want: &doIfTreeNode{ + checkTypeOp: true, + fieldName: "log", + values: [][]byte{ + []byte("obj"), + []byte("arr"), + }, + }, }, - }, - }, - { - name: "ok_single_val", - args: args{ - cfgStr: `{ - "op":"or", - "operands":[ - {"op":"equal","field":"service","values":null}, - {"op":"equal","field":"service","values":""}, - {"op":"equal","field":"service","values":"test"} - ] - }`, - }, - want: &doIfTreeNode{ - logicalOp: "or", - operands: []*doIfTreeNode{ - { - fieldOp: "equal", - fieldName: "service", - values: [][]byte{nil}, - caseSensitive: true, - }, - { - fieldOp: "equal", - fieldName: "service", - values: [][]byte{[]byte("")}, - caseSensitive: true, - }, - { - fieldOp: "equal", - fieldName: "service", - values: [][]byte{[]byte("test")}, - caseSensitive: true, + { + name: "ok_single_val", + args: args{ + cfgStr: `{ + "op":"or", + "operands":[ + {"op":"equal","field":"service","values":null}, + {"op":"equal","field":"service","values":""}, + {"op":"equal","field":"service","values":"test"} + ] + }`, + }, + want: &doIfTreeNode{ + logicalOp: "or", + operands: []*doIfTreeNode{ + { + fieldOp: "equal", + fieldName: "service", + values: [][]byte{nil}, + caseSensitive: true, + }, + { + fieldOp: "equal", + fieldName: "service", + values: [][]byte{[]byte("")}, + caseSensitive: true, + }, + { + fieldOp: "equal", + fieldName: "service", + values: [][]byte{[]byte("test")}, + caseSensitive: true, + }, + }, }, + wantErr: false, }, - }, - wantErr: false, - }, - { - name: "error_no_op_field", - args: args{ - cfgStr: `{"field": "val"}`, - }, - wantErr: true, - }, - { - name: "error_invalid_op_name", - args: args{ - cfgStr: `{"op": "invalid"}`, - }, - wantErr: true, - }, - { - name: "error_invalid_field_op", - args: args{ - cfgStr: `{"op": "equal"}`, - }, - wantErr: true, - }, - { - name: "error_invalid_case_sensitive_type", - args: args{ - cfgStr: `{ - "op": "equal", - "field": "a", - "values": ["abc"], - "case_sensitive": "not bool"}`, - }, - wantErr: true, - }, - { - name: "error_invalid_logical_op", - args: args{ - cfgStr: `{"op": "or"}`, - }, - wantErr: true, - }, - { - name: "error_invalid_logical_op_operand", - args: args{ - cfgStr: `{"op": "or", "operands": [{"op": "equal"}]}`, - }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_no_field", - args: args{ - cfgStr: `{"op":"byte_len_cmp","cmp_op":"lt","value":10}`, - }, - wantErr: true, - }, - { - name: "error_array_len_cmp_op_no_field", - args: args{ - cfgStr: `{"op":"array_len_cmp","cmp_op":"lt","value":10}`, - }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_field_is_not_string", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":123,"cmp_op":"lt","value":10}`, - }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_no_cmp_op", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":"data","value":10}`, - }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_cmp_op_is_not_string", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":123,"value":10}`, - }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_no_cmp_value", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt"}`, - }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_cmp_value_is_not_integer", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":"abc"}`, - }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_invalid_cmp_op", - args: args{cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"ABC","value":10}`}, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_negative_cmp_value", - args: args{cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":-1}`}, - wantErr: true, - }, - { - name: "error_ts_cmp_op_no_field", - args: args{ - cfgStr: `{"op": "ts_cmp","cmp_op": "lt"}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_field_is_not_string", - args: args{ - cfgStr: `{"op":"ts_cmp","field":123}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_no_cmp_op", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp"}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_format_type", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "format": 1000, - "value": "now"}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_value_shift_type", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "value_shift": 1000, - "format": "2006-01-02T15:04:05Z07:00", - "update_interval": "15s"}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_update_interval_type", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "format": "2006-01-02T15:04:05Z07:00", - "update_interval": false}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_cmp_op_is_not_string", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":123}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_no_cmp_value", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt"}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_cmp_value_is_not_string", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":123}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_cmp_value", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":"qwe"}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_cmp_op", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"qwe","value":"2009-11-10T23:00:00Z"}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_update_interval", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "update_interval": "qwe"}`, - }, - wantErr: true, - }, - { - name: "error_check_type_op_empty_values", - args: args{ - cfgStr: `{ - "op": "check_type", - "field": "log", - "values": [] - }`, - }, - wantErr: true, - }, - { - name: "error_check_type_op_invalid_value", - args: args{ - cfgStr: `{ - "op": "check_type", - "field": "log", - "values": ["unknown_type"] - }`, - }, - wantErr: true, - }, - { - name: "error_check_type_op_no_field", - args: args{ - cfgStr: `{ - "op": "check_type", - "values": ["obj"] - }`, - }, - wantErr: true, - }, - } - */ + { + name: "error_no_op_field", + args: args{ + cfgStr: `{"field": "val"}`, + }, + wantErr: true, + }, + { + name: "error_invalid_op_name", + args: args{ + cfgStr: `{"op": "invalid"}`, + }, + wantErr: true, + }, + { + name: "error_invalid_field_op", + args: args{ + cfgStr: `{"op": "equal"}`, + }, + wantErr: true, + }, + { + name: "error_invalid_case_sensitive_type", + args: args{ + cfgStr: `{ + "op": "equal", + "field": "a", + "values": ["abc"], + "case_sensitive": "not bool"}`, + }, + wantErr: true, + }, + { + name: "error_invalid_logical_op", + args: args{ + cfgStr: `{"op": "or"}`, + }, + wantErr: true, + }, + { + name: "error_invalid_logical_op_operand", + args: args{ + cfgStr: `{"op": "or", "operands": [{"op": "equal"}]}`, + }, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_no_field", + args: args{ + cfgStr: `{"op":"byte_len_cmp","cmp_op":"lt","value":10}`, + }, + wantErr: true, + }, + { + name: "error_array_len_cmp_op_no_field", + args: args{ + cfgStr: `{"op":"array_len_cmp","cmp_op":"lt","value":10}`, + }, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_field_is_not_string", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":123,"cmp_op":"lt","value":10}`, + }, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_no_cmp_op", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":"data","value":10}`, + }, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_cmp_op_is_not_string", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":123,"value":10}`, + }, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_no_cmp_value", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt"}`, + }, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_cmp_value_is_not_integer", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":"abc"}`, + }, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_invalid_cmp_op", + args: args{cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"ABC","value":10}`}, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_negative_cmp_value", + args: args{cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":-1}`}, + wantErr: true, + }, + { + name: "error_ts_cmp_op_no_field", + args: args{ + cfgStr: `{"op": "ts_cmp","cmp_op": "lt"}`, + }, + wantErr: true, + }, + { + name: "error_ts_cmp_op_field_is_not_string", + args: args{ + cfgStr: `{"op":"ts_cmp","field":123}`, + }, + wantErr: true, + }, + { + name: "error_ts_cmp_op_no_cmp_op", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp"}`, + }, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_format_type", + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "format": 1000, + "value": "now"}`, + }, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_value_shift_type", + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "value_shift": 1000, + "format": "2006-01-02T15:04:05Z07:00", + "update_interval": "15s"}`, + }, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_update_interval_type", + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "format": "2006-01-02T15:04:05Z07:00", + "update_interval": false}`, + }, + wantErr: true, + }, + { + name: "error_ts_cmp_op_cmp_op_is_not_string", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":123}`, + }, + wantErr: true, + }, + { + name: "error_ts_cmp_op_no_cmp_value", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt"}`, + }, + wantErr: true, + }, + { + name: "error_ts_cmp_op_cmp_value_is_not_string", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":123}`, + }, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_cmp_value", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":"qwe"}`, + }, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_cmp_op", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"qwe","value":"2009-11-10T23:00:00Z"}`, + }, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_update_interval", + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "update_interval": "qwe"}`, + }, + wantErr: true, + }, + { + name: "error_check_type_op_empty_values", + args: args{ + cfgStr: `{ + "op": "check_type", + "field": "log", + "values": [] + }`, + }, + wantErr: true, + }, + { + name: "error_check_type_op_invalid_value", + args: args{ + cfgStr: `{ + "op": "check_type", + "field": "log", + "values": ["unknown_type"] + }`, + }, + wantErr: true, + }, + { + name: "error_check_type_op_no_field", + raw: `{ + "op": "check_type", + "values": ["obj"] + }`, + }, + wantErr: true, + }, + + */ + } + for _, tt := range tests { tt := tt t.Run(tt.name, func(t *testing.T) { From faba299896d591a16f649f5a88c96794fcdc2405 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 29 Jul 2025 10:44:46 +0300 Subject: [PATCH 61/75] Add tests --- pipeline/do_if/ctor_test.go | 612 ++++++++++++++++-------------------- 1 file changed, 276 insertions(+), 336 deletions(-) diff --git a/pipeline/do_if/ctor_test.go b/pipeline/do_if/ctor_test.go index 67c266c43..8604ccd75 100644 --- a/pipeline/do_if/ctor_test.go +++ b/pipeline/do_if/ctor_test.go @@ -8,6 +8,7 @@ import ( "github.com/bitly/go-simplejson" "github.com/ozontech/file.d/pipeline/do_if/data_checker" "github.com/ozontech/file.d/pipeline/do_if/logic" + insaneJSON "github.com/ozontech/insane-json" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -204,347 +205,286 @@ func TestExtractNode(t *testing.T) { updateInterval: 15 * time.Second, }, }, - /* - { - name: "ok_ts_cmp_op_default_settings", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "now"}`, - }, - want: &doIfTreeNode{ - tsCmpOp: true, - cmpOp: "lt", - fieldName: "timestamp", - tsCmpValChangeMode: tsCmpModeNowTag, - tsFormat: defaultTsFormat, - tsCmpValueShift: 0, - tsUpdateInterval: defaultTsCmpValUpdateInterval, - }, - }, - { - name: "ok_ts_cmp_op_format_alias", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "format": "rfc3339", - "value": "now"}`, - }, - want: &doIfTreeNode{ - tsCmpOp: true, - cmpOp: "lt", - fieldName: "timestamp", - tsCmpValChangeMode: tsCmpModeNowTag, - tsFormat: time.RFC3339, - tsCmpValueShift: 0, - tsUpdateInterval: defaultTsCmpValUpdateInterval, - }, - }, - - { - name: "ok_check_type", - args: args{ - cfgStr: `{ - "op": "check_type", - "field": "log", - "values": ["obj","arr"] - }`, - }, - want: &doIfTreeNode{ - checkTypeOp: true, - fieldName: "log", - values: [][]byte{ - []byte("obj"), - []byte("arr"), - }, - }, - }, - { - name: "ok_single_val", - args: args{ - cfgStr: `{ - "op":"or", - "operands":[ - {"op":"equal","field":"service","values":null}, - {"op":"equal","field":"service","values":""}, - {"op":"equal","field":"service","values":"test"} - ] - }`, - }, - want: &doIfTreeNode{ - logicalOp: "or", - operands: []*doIfTreeNode{ - { - fieldOp: "equal", - fieldName: "service", - values: [][]byte{nil}, - caseSensitive: true, - }, - { - fieldOp: "equal", - fieldName: "service", - values: [][]byte{[]byte("")}, - caseSensitive: true, - }, - { - fieldOp: "equal", - fieldName: "service", - values: [][]byte{[]byte("test")}, - caseSensitive: true, - }, - }, - }, - wantErr: false, - }, - { - name: "error_no_op_field", - args: args{ - cfgStr: `{"field": "val"}`, - }, - wantErr: true, - }, - { - name: "error_invalid_op_name", - args: args{ - cfgStr: `{"op": "invalid"}`, - }, - wantErr: true, - }, - { - name: "error_invalid_field_op", - args: args{ - cfgStr: `{"op": "equal"}`, - }, - wantErr: true, - }, - { - name: "error_invalid_case_sensitive_type", - args: args{ - cfgStr: `{ - "op": "equal", - "field": "a", - "values": ["abc"], - "case_sensitive": "not bool"}`, - }, - wantErr: true, - }, - { - name: "error_invalid_logical_op", - args: args{ - cfgStr: `{"op": "or"}`, - }, - wantErr: true, - }, - { - name: "error_invalid_logical_op_operand", - args: args{ - cfgStr: `{"op": "or", "operands": [{"op": "equal"}]}`, - }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_no_field", - args: args{ - cfgStr: `{"op":"byte_len_cmp","cmp_op":"lt","value":10}`, - }, - wantErr: true, - }, - { - name: "error_array_len_cmp_op_no_field", - args: args{ - cfgStr: `{"op":"array_len_cmp","cmp_op":"lt","value":10}`, - }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_field_is_not_string", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":123,"cmp_op":"lt","value":10}`, - }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_no_cmp_op", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":"data","value":10}`, - }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_cmp_op_is_not_string", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":123,"value":10}`, - }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_no_cmp_value", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt"}`, - }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_cmp_value_is_not_integer", - args: args{ - cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":"abc"}`, - }, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_invalid_cmp_op", - args: args{cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"ABC","value":10}`}, - wantErr: true, - }, - { - name: "error_byte_len_cmp_op_negative_cmp_value", - args: args{cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":-1}`}, - wantErr: true, - }, - { - name: "error_ts_cmp_op_no_field", - args: args{ - cfgStr: `{"op": "ts_cmp","cmp_op": "lt"}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_field_is_not_string", - args: args{ - cfgStr: `{"op":"ts_cmp","field":123}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_no_cmp_op", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp"}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_format_type", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "format": 1000, - "value": "now"}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_value_shift_type", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "value_shift": 1000, - "format": "2006-01-02T15:04:05Z07:00", - "update_interval": "15s"}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_update_interval_type", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "format": "2006-01-02T15:04:05Z07:00", - "update_interval": false}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_cmp_op_is_not_string", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":123}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_no_cmp_value", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt"}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_cmp_value_is_not_string", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":123}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_cmp_value", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":"qwe"}`, - }, - wantErr: true, - }, - { - name: "error_ts_cmp_op_invalid_cmp_op", - args: args{ - cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"qwe","value":"2009-11-10T23:00:00Z"}`, - }, - wantErr: true, + { + name: "ok_ts_cmp_op_default_settings", + raw: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "now" + }`, + expected: &tsCmpOpNode{ + fieldPath: []string{"timestamp"}, + format: time.RFC3339Nano, + cmpOp: cmpOpLess, + cmpValChangeMode: cmpValChangeModeNow, + updateInterval: defaultTsCmpValUpdateInterval, + cmpValueShift: 0, + }, + }, + { + name: "ok_ts_cmp_op_format_alias", + raw: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "format": "rfc3339", + "value": "now" + }`, + expected: &tsCmpOpNode{ + fieldPath: []string{"timestamp"}, + format: time.RFC3339, + cmpOp: cmpOpLess, + cmpValChangeMode: cmpValChangeModeNow, + cmpValueShift: 0, + updateInterval: defaultTsCmpValUpdateInterval, + }, + }, + { + name: "ok_check_type", + raw: `{ + "op": "check_type", + "field": "log", + "values": ["obj","arr"] + }`, + expected: &checkTypeOpNode{ + fieldPath: []string{"log"}, + fieldPathStr: "log", + checkTypeFns: []checkTypeFn{ + func(n *insaneJSON.Node) bool { + return n.IsObject() }, - { - name: "error_ts_cmp_op_invalid_update_interval", - args: args{ - cfgStr: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "update_interval": "qwe"}`, - }, - wantErr: true, + func(n *insaneJSON.Node) bool { + return n.IsArray() }, - { - name: "error_check_type_op_empty_values", - args: args{ - cfgStr: `{ - "op": "check_type", - "field": "log", - "values": [] - }`, - }, - wantErr: true, + }, + }, + }, + { + name: "ok_single_val", + raw: `{ + "op":"or", + "operands":[ + {"op":"equal","field":"service","values":null}, + {"op":"equal","field":"service","values":""}, + {"op":"equal","field":"service","values":"test"} + ] + }`, + expected: &logicalNode{ + op: logic.Or, + operands: []Node{ + &fieldOpNode{ + fieldPath: []string{"service"}, + fieldPathStr: "service", + checker: data_checker.MustNew("equal", true, [][]byte{nil}), }, - { - name: "error_check_type_op_invalid_value", - args: args{ - cfgStr: `{ - "op": "check_type", - "field": "log", - "values": ["unknown_type"] - }`, - }, - wantErr: true, + &fieldOpNode{ + fieldPath: []string{"service"}, + fieldPathStr: "service", + checker: data_checker.MustNew("equal", true, [][]byte{[]byte("")}), }, - { - name: "error_check_type_op_no_field", - raw: `{ - "op": "check_type", - "values": ["obj"] - }`, - }, - wantErr: true, + &fieldOpNode{ + fieldPath: []string{"service"}, + fieldPathStr: "service", + checker: data_checker.MustNew("equal", true, [][]byte{[]byte("test")}), }, - - */ + }, + }, + wantErr: false, + }, + { + name: "error_no_op_field", + raw: `{"field": "val"}`, + wantErr: true, + }, + { + name: "error_invalid_op_name", + raw: `{"op": "invalid"}`, + wantErr: true, + }, + { + name: "error_invalid_field_op", + raw: `{"op": "equal"}`, + wantErr: true, + }, + { + name: "error_invalid_case_sensitive_type", + raw: `{ + "op": "equal", + "field": "a", + "values": ["abc"], + "case_sensitive": "not bool" + }`, + wantErr: true, + }, + { + name: "error_invalid_logical_op", + raw: `{"op": "or"}`, + wantErr: true, + }, + { + name: "error_invalid_logical_op_operand", + raw: `{"op": "or", "operands": [{"op": "equal"}]}`, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_no_field", + raw: `{"op":"byte_len_cmp","cmp_op":"lt","value":10}`, + wantErr: true, + }, + { + name: "error_array_len_cmp_op_no_field", + raw: `{"op":"array_len_cmp","cmp_op":"lt","value":10}`, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_field_is_not_string", + raw: `{"op":"byte_len_cmp","field":123,"cmp_op":"lt","value":10}`, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_no_cmp_op", + raw: `{"op":"byte_len_cmp","field":"data","value":10}`, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_cmp_op_is_not_string", + raw: `{"op":"byte_len_cmp","field":"data","cmp_op":123,"value":10}`, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_no_cmp_value", + raw: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt"}`, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_cmp_value_is_not_integer", + raw: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":"abc"}`, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_invalid_cmp_op", + raw: `{"op":"byte_len_cmp","field":"data","cmp_op":"ABC","value":10}`, + wantErr: true, + }, + { + name: "error_byte_len_cmp_op_negative_cmp_value", + raw: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":-1}`, + wantErr: true, + }, + { + name: "error_ts_cmp_op_no_field", + raw: `{"op": "ts_cmp","cmp_op": "lt"}`, + wantErr: true, + }, + { + name: "error_ts_cmp_op_field_is_not_string", + raw: `{"op":"ts_cmp","field":123}`, + wantErr: true, + }, + { + name: "error_ts_cmp_op_no_cmp_op", + raw: `{"op":"ts_cmp","field":"timestamp"}`, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_format_type", + raw: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "format": 1000, + "value": "now" + }`, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_value_shift_type", + raw: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "value_shift": 1000, + "format": "2006-01-02T15:04:05Z07:00", + "update_interval": "15s" + }`, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_update_interval_type", + raw: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "format": "2006-01-02T15:04:05Z07:00", + "update_interval": false + }`, + wantErr: true, + }, + { + name: "error_ts_cmp_op_cmp_op_is_not_string", + raw: `{"op":"ts_cmp","field":"timestamp","cmp_op":123}`, + wantErr: true, + }, + { + name: "error_ts_cmp_op_no_cmp_value", + raw: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt"}`, + wantErr: true, + }, + { + name: "error_ts_cmp_op_cmp_value_is_not_string", + raw: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":123}`, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_cmp_value", + raw: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":"qwe"}`, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_cmp_op", + raw: `{"op":"ts_cmp","field":"timestamp","cmp_op":"qwe","value":"2009-11-10T23:00:00Z"}`, + wantErr: true, + }, + { + name: "error_ts_cmp_op_invalid_update_interval", + raw: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "update_interval": "qwe" + }`, + wantErr: true, + }, + { + name: "error_check_type_op_empty_values", + raw: `{ + "op": "check_type", + "field": "log", + "values": [] + }`, + wantErr: true, + }, + { + name: "error_check_type_op_invalid_value", + raw: `{ + "op": "check_type", + "field": "log", + "values": ["unknown_type"] + }`, + wantErr: true, + }, + { + name: "error_check_type_op_no_field", + raw: `{ + "op": "check_type", + "values": ["obj"] + }`, + wantErr: true, + }, } for _, tt := range tests { From 7b22e78bcf0520380061ca21e451c5639c2bdd33 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 29 Jul 2025 10:52:49 +0300 Subject: [PATCH 62/75] Rename package --- pipeline/do_if/ctor.go | 12 +- pipeline/do_if/ctor_test.go | 136 +++++++++--------- pipeline/do_if/do_if_test.go | 12 +- pipeline/do_if/field_op.go | 8 +- .../{data_checker => str_checker}/checker.go | 2 +- .../checker_test.go | 2 +- 6 files changed, 88 insertions(+), 84 deletions(-) rename pipeline/do_if/{data_checker => str_checker}/checker.go (99%) rename pipeline/do_if/{data_checker => str_checker}/checker_test.go (98%) diff --git a/pipeline/do_if/ctor.go b/pipeline/do_if/ctor.go index 2f66f3d49..0042e9b2e 100644 --- a/pipeline/do_if/ctor.go +++ b/pipeline/do_if/ctor.go @@ -7,8 +7,8 @@ import ( "github.com/ozontech/file.d/cfg" "github.com/ozontech/file.d/pipeline/ctor" - "github.com/ozontech/file.d/pipeline/do_if/data_checker" "github.com/ozontech/file.d/pipeline/do_if/logic" + "github.com/ozontech/file.d/pipeline/do_if/str_checker" ) const ( @@ -65,11 +65,11 @@ func ExtractNode(node ctor.Node) (Node, error) { logic.NotTag: return extractLogicalOpNode(opName, node) case - data_checker.OpEqualTag, - data_checker.OpContainsTag, - data_checker.OpPrefixTag, - data_checker.OpSuffixTag, - data_checker.OpRegexTag: + str_checker.OpEqualTag, + str_checker.OpContainsTag, + str_checker.OpPrefixTag, + str_checker.OpSuffixTag, + str_checker.OpRegexTag: return extractFieldOpNode(opName, node) case "byte_len_cmp", diff --git a/pipeline/do_if/ctor_test.go b/pipeline/do_if/ctor_test.go index 8604ccd75..20abf9805 100644 --- a/pipeline/do_if/ctor_test.go +++ b/pipeline/do_if/ctor_test.go @@ -6,8 +6,8 @@ import ( "time" "github.com/bitly/go-simplejson" - "github.com/ozontech/file.d/pipeline/do_if/data_checker" "github.com/ozontech/file.d/pipeline/do_if/logic" + "github.com/ozontech/file.d/pipeline/do_if/str_checker" insaneJSON "github.com/ozontech/insane-json" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -95,12 +95,12 @@ func TestExtractNode(t *testing.T) { &fieldOpNode{ fieldPath: []string{"service"}, fieldPathStr: "service", - checker: data_checker.MustNew("equal", false, [][]byte{nil, []byte("")}), + checker: str_checker.MustNew("equal", false, [][]byte{nil, []byte("")}), }, &fieldOpNode{ fieldPath: []string{"log", "msg"}, fieldPathStr: "log.msg", - checker: data_checker.MustNew( + checker: str_checker.MustNew( "prefix", false, [][]byte{[]byte("test-1"), []byte("test-2")}), }, &lenCmpOpNode{ @@ -131,7 +131,7 @@ func TestExtractNode(t *testing.T) { &fieldOpNode{ fieldPath: []string{"service"}, fieldPathStr: "service", - checker: data_checker.MustNew( + checker: str_checker.MustNew( "suffix", true, [][]byte{[]byte("test-svc-1"), []byte("test-svc-2")}, @@ -140,7 +140,7 @@ func TestExtractNode(t *testing.T) { &fieldOpNode{ fieldPath: []string{"pod"}, fieldPathStr: "pod", - checker: data_checker.MustNew( + checker: str_checker.MustNew( "contains", true, [][]byte{[]byte("test")}, @@ -149,7 +149,7 @@ func TestExtractNode(t *testing.T) { &fieldOpNode{ fieldPath: []string{"message"}, fieldPathStr: "message", - checker: data_checker.MustNew( + checker: str_checker.MustNew( "regex", true, [][]byte{[]byte(`test-\d+`), []byte(`test-msg-\d+`)}, @@ -182,64 +182,68 @@ func TestExtractNode(t *testing.T) { cmpValue: 10, }, }, - { - name: "ok_ts_cmp_op", - raw: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "value_shift": "-24h", - "format": "2006-01-02T15:04:05Z07:00", - "update_interval": "15s" - }`, - expected: &tsCmpOpNode{ - fieldPath: []string{"timestamp"}, - format: time.RFC3339, - cmpOp: cmpOpLess, - cmpValChangeMode: cmpValChangeModeConst, - constCmpValue: time.Date( - 2009, time.November, 10, 23, 0, 0, 0, time.UTC, - ).UnixNano(), - cmpValueShift: (-24 * time.Hour).Nanoseconds(), - updateInterval: 15 * time.Second, - }, - }, - { - name: "ok_ts_cmp_op_default_settings", - raw: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "now" - }`, - expected: &tsCmpOpNode{ - fieldPath: []string{"timestamp"}, - format: time.RFC3339Nano, - cmpOp: cmpOpLess, - cmpValChangeMode: cmpValChangeModeNow, - updateInterval: defaultTsCmpValUpdateInterval, - cmpValueShift: 0, - }, - }, - { - name: "ok_ts_cmp_op_format_alias", - raw: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "format": "rfc3339", - "value": "now" - }`, - expected: &tsCmpOpNode{ - fieldPath: []string{"timestamp"}, - format: time.RFC3339, - cmpOp: cmpOpLess, - cmpValChangeMode: cmpValChangeModeNow, - cmpValueShift: 0, - updateInterval: defaultTsCmpValUpdateInterval, + /* + { + name: "ok_ts_cmp_op", + raw: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "value_shift": "-24h", + "format": "2006-01-02T15:04:05Z07:00", + "update_interval": "15s" + }`, + expected: &tsCmpOpNode{ + fieldPath: []string{"timestamp"}, + format: time.RFC3339, + cmpOp: cmpOpLess, + cmpValChangeMode: cmpValChangeModeConst, + constCmpValue: time.Date( + 2009, time.November, 10, 23, 0, 0, 0, time.UTC, + ).UnixNano(), + cmpValueShift: (-24 * time.Hour).Nanoseconds(), + updateInterval: 15 * time.Second, + }, + }, + { + name: "ok_ts_cmp_op_default_settings", + raw: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "now" + }`, + expected: &tsCmpOpNode{ + fieldPath: []string{"timestamp"}, + format: time.RFC3339Nano, + cmpOp: cmpOpLess, + cmpValChangeMode: cmpValChangeModeNow, + updateInterval: defaultTsCmpValUpdateInterval, + cmpValueShift: 0, + }, + }, + + { + name: "ok_ts_cmp_op_format_alias", + raw: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "format": "rfc3339", + "value": "now" + }`, + expected: &tsCmpOpNode{ + fieldPath: []string{"timestamp"}, + format: time.RFC3339, + cmpOp: cmpOpLess, + cmpValChangeMode: cmpValChangeModeNow, + cmpValueShift: 0, + updateInterval: defaultTsCmpValUpdateInterval, + }, }, - }, + + */ { name: "ok_check_type", raw: `{ @@ -276,17 +280,17 @@ func TestExtractNode(t *testing.T) { &fieldOpNode{ fieldPath: []string{"service"}, fieldPathStr: "service", - checker: data_checker.MustNew("equal", true, [][]byte{nil}), + checker: str_checker.MustNew("equal", true, [][]byte{nil}), }, &fieldOpNode{ fieldPath: []string{"service"}, fieldPathStr: "service", - checker: data_checker.MustNew("equal", true, [][]byte{[]byte("")}), + checker: str_checker.MustNew("equal", true, [][]byte{[]byte("")}), }, &fieldOpNode{ fieldPath: []string{"service"}, fieldPathStr: "service", - checker: data_checker.MustNew("equal", true, [][]byte{[]byte("test")}), + checker: str_checker.MustNew("equal", true, [][]byte{[]byte("test")}), }, }, }, diff --git a/pipeline/do_if/do_if_test.go b/pipeline/do_if/do_if_test.go index 592f5d62a..460ac8e7a 100644 --- a/pipeline/do_if/do_if_test.go +++ b/pipeline/do_if/do_if_test.go @@ -8,8 +8,8 @@ import ( "testing" "time" - "github.com/ozontech/file.d/pipeline/do_if/data_checker" "github.com/ozontech/file.d/pipeline/do_if/logic" + "github.com/ozontech/file.d/pipeline/do_if/str_checker" insaneJSON "github.com/ozontech/insane-json" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -85,7 +85,7 @@ func checkNode(t *testing.T, want, got Node) { gotNode := got.(*fieldOpNode) assert.Equal(t, 0, slices.Compare[[]string](wantNode.fieldPath, gotNode.fieldPath)) assert.Equal(t, wantNode.fieldPathStr, gotNode.fieldPathStr) - assert.NoError(t, data_checker.Equal(&wantNode.checker, &gotNode.checker)) + assert.NoError(t, str_checker.Equal(&wantNode.checker, &gotNode.checker)) case NodeLogicalOp: wantNode := want.(*logicalNode) gotNode := got.(*logicalNode) @@ -136,7 +136,7 @@ func TestBuildNodes(t *testing.T) { want: &fieldOpNode{ fieldPath: []string{"log", "pod"}, fieldPathStr: "log.pod", - checker: data_checker.MustNew( + checker: str_checker.MustNew( "equal", true, [][]byte{[]byte(`test-111`), []byte(`test-2`), []byte(`test-3`), []byte(`test-12345`)}, @@ -154,7 +154,7 @@ func TestBuildNodes(t *testing.T) { want: &fieldOpNode{ fieldPath: []string{"log", "pod"}, fieldPathStr: "log.pod", - checker: data_checker.MustNew( + checker: str_checker.MustNew( "equal", false, [][]byte{[]byte(`TEST-111`), []byte(`Test-2`), []byte(`tesT-3`), []byte(`TeSt-12345`)}, @@ -186,7 +186,7 @@ func TestBuildNodes(t *testing.T) { &fieldOpNode{ fieldPath: []string{"log", "pod"}, fieldPathStr: "log.pod", - checker: data_checker.MustNew( + checker: str_checker.MustNew( "equal", true, [][]byte{[]byte(`test-111`), []byte(`test-2`), []byte(`test-3`), []byte(`test-12345`)}, @@ -195,7 +195,7 @@ func TestBuildNodes(t *testing.T) { &fieldOpNode{ fieldPath: []string{"service", "msg"}, fieldPathStr: "service.msg", - checker: data_checker.MustNew( + checker: str_checker.MustNew( "contains", true, [][]byte{[]byte(`test-0987`), []byte(`test-11`)}, diff --git a/pipeline/do_if/field_op.go b/pipeline/do_if/field_op.go index bdb4e1b20..924ee5c99 100644 --- a/pipeline/do_if/field_op.go +++ b/pipeline/do_if/field_op.go @@ -7,7 +7,7 @@ import ( "strings" "github.com/ozontech/file.d/cfg" - "github.com/ozontech/file.d/pipeline/do_if/data_checker" + "github.com/ozontech/file.d/pipeline/do_if/str_checker" insaneJSON "github.com/ozontech/insane-json" ) @@ -211,7 +211,7 @@ type fieldOpNode struct { dataType dataType metaKey string - checker data_checker.DataChecker + checker str_checker.DataChecker } func newFieldOpNode( @@ -225,7 +225,7 @@ func newFieldOpNode( return nil, errors.New("values are not provided") } - c, err := data_checker.New(op, caseSensitive, values) + c, err := str_checker.New(op, caseSensitive, values) if err != nil { return nil, err } @@ -299,5 +299,5 @@ func (n *fieldOpNode) isEqualTo(n2 Node, _ int) error { return fmt.Errorf("nodes have different meta keys expected: mataKey=%q", n.metaKey) } - return data_checker.Equal(&n.checker, &n2f.checker) + return str_checker.Equal(&n.checker, &n2f.checker) } diff --git a/pipeline/do_if/data_checker/checker.go b/pipeline/do_if/str_checker/checker.go similarity index 99% rename from pipeline/do_if/data_checker/checker.go rename to pipeline/do_if/str_checker/checker.go index e8167c458..0aff2c4a6 100644 --- a/pipeline/do_if/data_checker/checker.go +++ b/pipeline/do_if/str_checker/checker.go @@ -1,4 +1,4 @@ -package data_checker +package str_checker import ( "bytes" diff --git a/pipeline/do_if/data_checker/checker_test.go b/pipeline/do_if/str_checker/checker_test.go similarity index 98% rename from pipeline/do_if/data_checker/checker_test.go rename to pipeline/do_if/str_checker/checker_test.go index 79929645e..c159271e9 100644 --- a/pipeline/do_if/data_checker/checker_test.go +++ b/pipeline/do_if/str_checker/checker_test.go @@ -1,4 +1,4 @@ -package data_checker +package str_checker import ( "testing" From c2773b5a312f05cc4d49c6808816356a29c423d8 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 29 Jul 2025 11:58:06 +0300 Subject: [PATCH 63/75] Fix tests --- pipeline/do_if/ctor_test.go | 120 ++++++++++++++++++------------------ 1 file changed, 59 insertions(+), 61 deletions(-) diff --git a/pipeline/do_if/ctor_test.go b/pipeline/do_if/ctor_test.go index 20abf9805..7127b9514 100644 --- a/pipeline/do_if/ctor_test.go +++ b/pipeline/do_if/ctor_test.go @@ -182,68 +182,66 @@ func TestExtractNode(t *testing.T) { cmpValue: 10, }, }, - /* - { - name: "ok_ts_cmp_op", - raw: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "value_shift": "-24h", - "format": "2006-01-02T15:04:05Z07:00", - "update_interval": "15s" - }`, - expected: &tsCmpOpNode{ - fieldPath: []string{"timestamp"}, - format: time.RFC3339, - cmpOp: cmpOpLess, - cmpValChangeMode: cmpValChangeModeConst, - constCmpValue: time.Date( - 2009, time.November, 10, 23, 0, 0, 0, time.UTC, - ).UnixNano(), - cmpValueShift: (-24 * time.Hour).Nanoseconds(), - updateInterval: 15 * time.Second, - }, - }, - { - name: "ok_ts_cmp_op_default_settings", - raw: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "now" - }`, - expected: &tsCmpOpNode{ - fieldPath: []string{"timestamp"}, - format: time.RFC3339Nano, - cmpOp: cmpOpLess, - cmpValChangeMode: cmpValChangeModeNow, - updateInterval: defaultTsCmpValUpdateInterval, - cmpValueShift: 0, - }, - }, - - { - name: "ok_ts_cmp_op_format_alias", - raw: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "format": "rfc3339", - "value": "now" - }`, - expected: &tsCmpOpNode{ - fieldPath: []string{"timestamp"}, - format: time.RFC3339, - cmpOp: cmpOpLess, - cmpValChangeMode: cmpValChangeModeNow, - cmpValueShift: 0, - updateInterval: defaultTsCmpValUpdateInterval, - }, + { + name: "ok_ts_cmp_op", + raw: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "value_shift": "-24h", + "format": "2006-01-02T15:04:05Z07:00", + "update_interval": "15s" + }`, + expected: &tsCmpOpNode{ + fieldPath: []string{"timestamp"}, + format: time.RFC3339, + cmpOp: cmpOpLess, + cmpValChangeMode: cmpValChangeModeConst, + constCmpValue: time.Date( + 2009, time.November, 10, 23, 0, 0, 0, time.UTC, + ).UnixNano(), + cmpValueShift: (-24 * time.Hour).Nanoseconds(), + updateInterval: 15 * time.Second, }, - - */ + }, + { + name: "ok_ts_cmp_op_default_settings", + raw: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "now" + }`, + expected: &tsCmpOpNode{ + fieldPath: []string{"timestamp"}, + format: time.RFC3339Nano, + cmpOp: cmpOpLess, + cmpValChangeMode: cmpValChangeModeNow, + constCmpValue: time.Time{}.UnixNano(), + updateInterval: defaultTsCmpValUpdateInterval, + cmpValueShift: 0, + }, + }, + { + name: "ok_ts_cmp_op_format_alias", + raw: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "format": "rfc3339", + "value": "now" + }`, + expected: &tsCmpOpNode{ + fieldPath: []string{"timestamp"}, + format: time.RFC3339, + cmpOp: cmpOpLess, + cmpValChangeMode: cmpValChangeModeNow, + constCmpValue: time.Time{}.UnixNano(), + cmpValueShift: 0, + updateInterval: defaultTsCmpValUpdateInterval, + }, + }, { name: "ok_check_type", raw: `{ From 924efb791dd89ae0908e078f17ee54f67ef3fdef Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 29 Jul 2025 19:23:20 +0300 Subject: [PATCH 64/75] Add field op data type parsing test --- pipeline/do_if/ctor_test.go | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/pipeline/do_if/ctor_test.go b/pipeline/do_if/ctor_test.go index 7127b9514..d5969ee1c 100644 --- a/pipeline/do_if/ctor_test.go +++ b/pipeline/do_if/ctor_test.go @@ -162,6 +162,31 @@ func TestExtractNode(t *testing.T) { }, }, }, + { + name: "ok_field_op_node_data_type_event", + raw: `{"op":"equal", "values":["a"], "data":"event"}`, + expected: &fieldOpNode{ + dataType: dataTypeEvent, + checker: str_checker.MustNew("equal", true, [][]byte{[]byte("a")}), + }, + }, + { + name: "ok_field_op_node_data_type_source_name", + raw: `{"op":"equal", "values":["a"], "data":"source_name"}`, + expected: &fieldOpNode{ + dataType: dataTypeSourceName, + checker: str_checker.MustNew("equal", true, [][]byte{[]byte("a")}), + }, + }, + { + name: "ok_field_op_node_data_type_meta", + raw: `{"op":"equal", "values":["a"], "data":"meta.name"}`, + expected: &fieldOpNode{ + dataType: dataTypeMeta, + metaKey: "name", + checker: str_checker.MustNew("equal", true, [][]byte{[]byte("a")}), + }, + }, { name: "ok_byte_len_cmp_op", raw: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":10}`, @@ -461,6 +486,16 @@ func TestExtractNode(t *testing.T) { }`, wantErr: true, }, + { + name: "error_field_op_node_data_type_type_mismatch", + raw: `{"op":"equal", "values":["a"], "data":123}`, + wantErr: true, + }, + { + name: "error_field_op_node_data_type_unparsable", + raw: `{"op":"equal", "values":["a"], "data":"some"}`, + wantErr: true, + }, { name: "error_check_type_op_empty_values", raw: `{ From cd7b41c21cfcb58a1672fa37dd031d27651d100a Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Wed, 30 Jul 2025 15:43:08 +0300 Subject: [PATCH 65/75] Rename fieldOpNode => stringOpNode --- pipeline/do_if/README.idoc.md | 8 +- pipeline/do_if/README.md | 16 +- pipeline/do_if/converter.go | 2 +- pipeline/do_if/ctor.go | 10 +- pipeline/do_if/ctor_test.go | 22 +-- pipeline/do_if/do_if.go | 4 +- pipeline/do_if/do_if_test.go | 154 +++++++++---------- pipeline/do_if/len_cmp_op.go | 2 +- pipeline/do_if/str_checker/checker.go | 2 +- pipeline/do_if/{field_op.go => string_op.go} | 28 ++-- pipeline/do_if/ts_cmp_op.go | 2 +- 11 files changed, 125 insertions(+), 125 deletions(-) rename pipeline/do_if/{field_op.go => string_op.go} (90%) diff --git a/pipeline/do_if/README.idoc.md b/pipeline/do_if/README.idoc.md index d7f2028b1..d332934c3 100644 --- a/pipeline/do_if/README.idoc.md +++ b/pipeline/do_if/README.idoc.md @@ -8,11 +8,11 @@ the chain of Match func calls are performed across the whole tree. ## Node types @do-if-node|description -## Field op node -@do-if-field-op-node +## String op node +@do-if-string-op-node -## Field operations -@do-if-field-op +## String operations +@do-if-string-op ## Logical op node @do-if-logical-op-node diff --git a/pipeline/do_if/README.md b/pipeline/do_if/README.md index c7108e829..acef657b7 100755 --- a/pipeline/do_if/README.md +++ b/pipeline/do_if/README.md @@ -6,7 +6,7 @@ When Do If Checker's Match func is called it calls to the root Match func and th the chain of Match func calls are performed across the whole tree. ## Node types -**`FieldOp`** Type of node where matching rules for fields are stored. +**`StringOp`** Type of node where string checks for fields are stored.
@@ -27,16 +27,16 @@ the chain of Match func calls are performed across the whole tree.
-## Field op node -DoIf field op node is considered to always be a leaf in the DoIf tree. It checks byte representation of the value by the given field path. +## String op node +DoIf string op node is considered to always be a leaf in the DoIf tree. It checks byte representation of the value by the given field path. Array and object values are considered as not matched since encoding them to bytes leads towards large CPU and memory consumption. Params: - - `op` - value from field operations list. Required. + - `op` - value from string operations list. Required. - `field` - path to field in JSON tree. If empty, root value is checked. Path to nested fields is delimited by dots `"."`, e.g. `"field.subfield"` for `{"field": {"subfield": "val"}}`. If the field name contains dots in it they should be shielded with `"\"`, e.g. `"exception\.type"` for `{"exception.type": "example"}`. Default empty. - `values` - list of values to check field. Required non-empty. - - `case_sensitive` - flag indicating whether checks are performed in case sensitive way. Default `true`. + - `case_sensitive` - flag indicating whether checks are performed in case-sensitive way. Default `true`. Note: case insensitive checks can cause CPU and memory overhead since every field value will be converted to lower letters. Example: @@ -53,7 +53,7 @@ pipelines: ``` -## Field operations +## String operations Operation `equal` checks whether the field value is equal to one of the elements in the values list. Example: @@ -294,7 +294,7 @@ Result: ## Length comparison op node -DoIf length comparison op node is considered to always be a leaf in the DoIf tree like DoIf field op node. +DoIf length comparison op node is considered to always be a leaf in the DoIf tree like DoIf string op node. It contains operation that compares field length in bytes or array length (for array fields) with certain value. Params: @@ -361,7 +361,7 @@ They denote corresponding comparison operations. | `ne` | `!=` | ## Timestamp comparison op node -DoIf timestamp comparison op node is considered to always be a leaf in the DoIf tree like DoIf field op node. +DoIf timestamp comparison op node is considered to always be a leaf in the DoIf tree like DoIf string op node. It contains operation that compares timestamps with certain value. Params: diff --git a/pipeline/do_if/converter.go b/pipeline/do_if/converter.go index 6ee2e7c2e..cdfa832af 100644 --- a/pipeline/do_if/converter.go +++ b/pipeline/do_if/converter.go @@ -13,7 +13,7 @@ func RuleToNode(rule matchrule.Rule, dataTypeTag string) (Node, error) { values = append(values, []byte(strings.Clone(s))) } - node, err := newFieldOpNode( + node, err := newStringOpNode( matchrule.ModeToString(rule.Mode), !rule.CaseInsensitive, values, diff --git a/pipeline/do_if/ctor.go b/pipeline/do_if/ctor.go index 0042e9b2e..e3b2fcd6c 100644 --- a/pipeline/do_if/ctor.go +++ b/pipeline/do_if/ctor.go @@ -70,7 +70,7 @@ func ExtractNode(node ctor.Node) (Node, error) { str_checker.OpPrefixTag, str_checker.OpSuffixTag, str_checker.OpRegexTag: - return extractFieldOpNode(opName, node) + return extractStringOpNode(opName, node) case "byte_len_cmp", "array_len_cmp": @@ -84,7 +84,7 @@ func ExtractNode(node ctor.Node) (Node, error) { } } -func extractFieldOpNode(opName string, node map[string]any) (Node, error) { +func extractStringOpNode(opName string, node map[string]any) (Node, error) { var result Node var err error @@ -114,12 +114,12 @@ func extractFieldOpNode(opName string, node map[string]any) (Node, error) { vals, err := extractOpValues(node) if err != nil { - return nil, fmt.Errorf("extract field op values: %w", err) + return nil, fmt.Errorf("extract string op values: %w", err) } - result, err = newFieldOpNode(opName, caseSensitive, vals, fieldPath, dataTypeTag) + result, err = newStringOpNode(opName, caseSensitive, vals, fieldPath, dataTypeTag) if err != nil { - return nil, fmt.Errorf("init field op: %w", err) + return nil, fmt.Errorf("init string op: %w", err) } return result, nil diff --git a/pipeline/do_if/ctor_test.go b/pipeline/do_if/ctor_test.go index d5969ee1c..c7244aa1a 100644 --- a/pipeline/do_if/ctor_test.go +++ b/pipeline/do_if/ctor_test.go @@ -92,12 +92,12 @@ func TestExtractNode(t *testing.T) { &logicalNode{ op: logic.And, operands: []Node{ - &fieldOpNode{ + &stringOpNode{ fieldPath: []string{"service"}, fieldPathStr: "service", checker: str_checker.MustNew("equal", false, [][]byte{nil, []byte("")}), }, - &fieldOpNode{ + &stringOpNode{ fieldPath: []string{"log", "msg"}, fieldPathStr: "log.msg", checker: str_checker.MustNew( @@ -128,7 +128,7 @@ func TestExtractNode(t *testing.T) { &logicalNode{ op: logic.Or, operands: []Node{ - &fieldOpNode{ + &stringOpNode{ fieldPath: []string{"service"}, fieldPathStr: "service", checker: str_checker.MustNew( @@ -137,7 +137,7 @@ func TestExtractNode(t *testing.T) { [][]byte{[]byte("test-svc-1"), []byte("test-svc-2")}, ), }, - &fieldOpNode{ + &stringOpNode{ fieldPath: []string{"pod"}, fieldPathStr: "pod", checker: str_checker.MustNew( @@ -146,7 +146,7 @@ func TestExtractNode(t *testing.T) { [][]byte{[]byte("test")}, ), }, - &fieldOpNode{ + &stringOpNode{ fieldPath: []string{"message"}, fieldPathStr: "message", checker: str_checker.MustNew( @@ -165,7 +165,7 @@ func TestExtractNode(t *testing.T) { { name: "ok_field_op_node_data_type_event", raw: `{"op":"equal", "values":["a"], "data":"event"}`, - expected: &fieldOpNode{ + expected: &stringOpNode{ dataType: dataTypeEvent, checker: str_checker.MustNew("equal", true, [][]byte{[]byte("a")}), }, @@ -173,7 +173,7 @@ func TestExtractNode(t *testing.T) { { name: "ok_field_op_node_data_type_source_name", raw: `{"op":"equal", "values":["a"], "data":"source_name"}`, - expected: &fieldOpNode{ + expected: &stringOpNode{ dataType: dataTypeSourceName, checker: str_checker.MustNew("equal", true, [][]byte{[]byte("a")}), }, @@ -181,7 +181,7 @@ func TestExtractNode(t *testing.T) { { name: "ok_field_op_node_data_type_meta", raw: `{"op":"equal", "values":["a"], "data":"meta.name"}`, - expected: &fieldOpNode{ + expected: &stringOpNode{ dataType: dataTypeMeta, metaKey: "name", checker: str_checker.MustNew("equal", true, [][]byte{[]byte("a")}), @@ -300,17 +300,17 @@ func TestExtractNode(t *testing.T) { expected: &logicalNode{ op: logic.Or, operands: []Node{ - &fieldOpNode{ + &stringOpNode{ fieldPath: []string{"service"}, fieldPathStr: "service", checker: str_checker.MustNew("equal", true, [][]byte{nil}), }, - &fieldOpNode{ + &stringOpNode{ fieldPath: []string{"service"}, fieldPathStr: "service", checker: str_checker.MustNew("equal", true, [][]byte{[]byte("")}), }, - &fieldOpNode{ + &stringOpNode{ fieldPath: []string{"service"}, fieldPathStr: "service", checker: str_checker.MustNew("equal", true, [][]byte{[]byte("test")}), diff --git a/pipeline/do_if/do_if.go b/pipeline/do_if/do_if.go index eb6d7a07f..76db4ee14 100644 --- a/pipeline/do_if/do_if.go +++ b/pipeline/do_if/do_if.go @@ -12,8 +12,8 @@ type nodeType int const ( NodeUnknownType nodeType = iota - // > Type of node where matching rules for fields are stored. - NodeFieldOp // * + // > Type of node where string checks for fields are stored. + NodeStringOp // * // > Type of node where matching rules for byte length and array length are stored. NodeLengthCmpOp // * diff --git a/pipeline/do_if/do_if_test.go b/pipeline/do_if/do_if_test.go index 460ac8e7a..8b782ad29 100644 --- a/pipeline/do_if/do_if_test.go +++ b/pipeline/do_if/do_if_test.go @@ -16,7 +16,7 @@ import ( ) type treeNode struct { - fieldOp string + stringOp string fieldName string caseSensitive bool values [][]byte @@ -39,9 +39,9 @@ type treeNode struct { // nolint:gocritic func buildTree(node treeNode) (Node, error) { switch { - case node.fieldOp != "": - return newFieldOpNode( - node.fieldOp, + case node.stringOp != "": + return newStringOpNode( + node.stringOp, node.caseSensitive, node.values, node.fieldName, @@ -80,9 +80,9 @@ func buildTree(node treeNode) (Node, error) { func checkNode(t *testing.T, want, got Node) { require.Equal(t, want.Type(), got.Type()) switch want.Type() { - case NodeFieldOp: - wantNode := want.(*fieldOpNode) - gotNode := got.(*fieldOpNode) + case NodeStringOp: + wantNode := want.(*stringOpNode) + gotNode := got.(*stringOpNode) assert.Equal(t, 0, slices.Compare[[]string](wantNode.fieldPath, gotNode.fieldPath)) assert.Equal(t, wantNode.fieldPathStr, gotNode.fieldPathStr) assert.NoError(t, str_checker.Equal(&wantNode.checker, &gotNode.checker)) @@ -128,12 +128,12 @@ func TestBuildNodes(t *testing.T) { { name: "ok_field_op_node", tree: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "log.pod", caseSensitive: true, values: [][]byte{[]byte(`test-111`), []byte(`test-2`), []byte(`test-3`), []byte(`test-12345`)}, }, - want: &fieldOpNode{ + want: &stringOpNode{ fieldPath: []string{"log", "pod"}, fieldPathStr: "log.pod", checker: str_checker.MustNew( @@ -146,12 +146,12 @@ func TestBuildNodes(t *testing.T) { { name: "ok_field_op_node_case_insensitive", tree: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "log.pod", caseSensitive: false, values: [][]byte{[]byte(`TEST-111`), []byte(`Test-2`), []byte(`tesT-3`), []byte(`TeSt-12345`)}, }, - want: &fieldOpNode{ + want: &stringOpNode{ fieldPath: []string{"log", "pod"}, fieldPathStr: "log.pod", checker: str_checker.MustNew( @@ -167,13 +167,13 @@ func TestBuildNodes(t *testing.T) { logicalOp: "or", operands: []treeNode{ { - fieldOp: "equal", + stringOp: "equal", fieldName: "log.pod", caseSensitive: true, values: [][]byte{[]byte(`test-111`), []byte(`test-2`), []byte(`test-3`), []byte(`test-12345`)}, }, { - fieldOp: "contains", + stringOp: "contains", fieldName: "service.msg", caseSensitive: true, values: [][]byte{[]byte(`test-0987`), []byte(`test-11`)}, @@ -183,7 +183,7 @@ func TestBuildNodes(t *testing.T) { want: &logicalNode{ op: logic.Or, operands: []Node{ - &fieldOpNode{ + &stringOpNode{ fieldPath: []string{"log", "pod"}, fieldPathStr: "log.pod", checker: str_checker.MustNew( @@ -192,7 +192,7 @@ func TestBuildNodes(t *testing.T) { [][]byte{[]byte(`test-111`), []byte(`test-2`), []byte(`test-3`), []byte(`test-12345`)}, ), }, - &fieldOpNode{ + &stringOpNode{ fieldPath: []string{"service", "msg"}, fieldPathStr: "service.msg", checker: str_checker.MustNew( @@ -307,14 +307,14 @@ func TestBuildNodes(t *testing.T) { { name: "err_field_op_node_empty_field", tree: treeNode{ - fieldOp: "equal", + stringOp: "equal", }, wantErr: true, }, { name: "err_field_op_node_empty_values", tree: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "pod", }, wantErr: true, @@ -322,7 +322,7 @@ func TestBuildNodes(t *testing.T) { { name: "err_field_op_node_invalid_regex", tree: treeNode{ - fieldOp: "regex", + stringOp: "regex", fieldName: "pod", values: [][]byte{[]byte(`\`)}, }, @@ -331,7 +331,7 @@ func TestBuildNodes(t *testing.T) { { name: "err_field_op_node_invalid_op_type", tree: treeNode{ - fieldOp: "noop", + stringOp: "noop", fieldName: "pod", values: [][]byte{[]byte(`test`)}, }, @@ -414,7 +414,7 @@ func TestBuildNodes(t *testing.T) { logicalOp: "noop", operands: []treeNode{ { - fieldOp: "contains", + stringOp: "contains", fieldName: "service.msg", caseSensitive: true, values: [][]byte{[]byte(`test-0987`), []byte(`test-11`)}, @@ -429,13 +429,13 @@ func TestBuildNodes(t *testing.T) { logicalOp: "not", operands: []treeNode{ { - fieldOp: "contains", + stringOp: "contains", fieldName: "service.msg", caseSensitive: true, values: [][]byte{[]byte(`test-0987`), []byte(`test-11`)}, }, { - fieldOp: "contains", + stringOp: "contains", fieldName: "service.msg", caseSensitive: true, values: [][]byte{[]byte(`test-0987`), []byte(`test-11`)}, @@ -482,7 +482,7 @@ func TestCheck(t *testing.T) { { name: "equal", tree: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "pod", caseSensitive: true, values: [][]byte{[]byte("test-1"), []byte("test-2"), []byte("test-pod-123"), []byte("po-32")}, @@ -505,7 +505,7 @@ func TestCheck(t *testing.T) { { name: "contains", tree: treeNode{ - fieldOp: "contains", + stringOp: "contains", fieldName: "pod", caseSensitive: true, values: [][]byte{[]byte("test-1"), []byte("test-2")}, @@ -520,7 +520,7 @@ func TestCheck(t *testing.T) { { name: "prefix", tree: treeNode{ - fieldOp: "prefix", + stringOp: "prefix", fieldName: "pod", caseSensitive: true, values: [][]byte{[]byte("test-1"), []byte("test-2")}, @@ -535,7 +535,7 @@ func TestCheck(t *testing.T) { { name: "suffix", tree: treeNode{ - fieldOp: "suffix", + stringOp: "suffix", fieldName: "pod", caseSensitive: true, values: [][]byte{[]byte("test-1"), []byte("test-2")}, @@ -550,7 +550,7 @@ func TestCheck(t *testing.T) { { name: "regex", tree: treeNode{ - fieldOp: "regex", + stringOp: "regex", fieldName: "pod", values: [][]byte{[]byte(`test-\d`)}, }, @@ -569,13 +569,13 @@ func TestCheck(t *testing.T) { logicalOp: "or", operands: []treeNode{ { - fieldOp: "equal", + stringOp: "equal", fieldName: "pod", caseSensitive: true, values: [][]byte{[]byte("test-1"), []byte("test-2")}, }, { - fieldOp: "equal", + stringOp: "equal", fieldName: "pod", caseSensitive: true, values: [][]byte{[]byte("test-3"), []byte("test-4")}, @@ -598,13 +598,13 @@ func TestCheck(t *testing.T) { logicalOp: "and", operands: []treeNode{ { - fieldOp: "prefix", + stringOp: "prefix", fieldName: "pod", caseSensitive: true, values: [][]byte{[]byte("test")}, }, { - fieldOp: "suffix", + stringOp: "suffix", fieldName: "pod", caseSensitive: true, values: [][]byte{[]byte("pod")}, @@ -627,7 +627,7 @@ func TestCheck(t *testing.T) { logicalOp: "not", operands: []treeNode{ { - fieldOp: "equal", + stringOp: "equal", fieldName: "pod", caseSensitive: true, values: [][]byte{[]byte("test-1"), []byte("test-2")}, @@ -645,7 +645,7 @@ func TestCheck(t *testing.T) { { name: "equal_case_insensitive", tree: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "pod", caseSensitive: false, values: [][]byte{[]byte("Test-1"), []byte("tesT-2")}, @@ -660,7 +660,7 @@ func TestCheck(t *testing.T) { { name: "contains_case_insensitive", tree: treeNode{ - fieldOp: "contains", + stringOp: "contains", fieldName: "pod", caseSensitive: false, values: [][]byte{[]byte("Test-1"), []byte("tesT-2")}, @@ -675,7 +675,7 @@ func TestCheck(t *testing.T) { { name: "prefix_case_insensitive", tree: treeNode{ - fieldOp: "prefix", + stringOp: "prefix", fieldName: "pod", caseSensitive: false, values: [][]byte{[]byte("Test-1"), []byte("tesT-2")}, @@ -690,7 +690,7 @@ func TestCheck(t *testing.T) { { name: "suffix_case_insensitive", tree: treeNode{ - fieldOp: "suffix", + stringOp: "suffix", fieldName: "pod", caseSensitive: false, values: [][]byte{[]byte("Test-1"), []byte("tesT-2")}, @@ -705,7 +705,7 @@ func TestCheck(t *testing.T) { { name: "equal_nil_or_empty_string", tree: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "test-field", caseSensitive: false, values: [][]byte{nil, []byte("")}, @@ -1155,7 +1155,7 @@ func TestNodeIsEqual(t *testing.T) { ts := time.Now() fieldNode := treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-1"), []byte("test-2")}, @@ -1184,7 +1184,7 @@ func TestNodeIsEqual(t *testing.T) { logicalOp: "not", operands: []treeNode{ { - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-1"), []byte("test-2")}, @@ -1198,13 +1198,13 @@ func TestNodeIsEqual(t *testing.T) { logicalOp: "or", operands: []treeNode{ { - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: true, values: [][]byte{nil, []byte(""), []byte("null")}, }, { - fieldOp: "contains", + stringOp: "contains", fieldName: "pod", caseSensitive: false, values: [][]byte{[]byte("pod-1"), []byte("pod-2")}, @@ -1219,19 +1219,19 @@ func TestNodeIsEqual(t *testing.T) { logicalOp: "and", operands: []treeNode{ { - fieldOp: "prefix", + stringOp: "prefix", fieldName: "message", caseSensitive: true, values: [][]byte{[]byte("test-msg-1"), []byte("test-msg-2")}, }, { - fieldOp: "suffix", + stringOp: "suffix", fieldName: "message", caseSensitive: true, values: [][]byte{[]byte("test-msg-3"), []byte("test-msg-4")}, }, { - fieldOp: "regex", + stringOp: "regex", fieldName: "msg", caseSensitive: true, values: [][]byte{[]byte("test-\\d+"), []byte("test-000-\\d+")}, @@ -1311,13 +1311,13 @@ func TestNodeIsEqual(t *testing.T) { { name: "not_equal_field_op_mismatch", t1: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: false, values: [][]byte{[]byte("test-1")}, }, t2: treeNode{ - fieldOp: "contains", + stringOp: "contains", fieldName: "service", caseSensitive: false, values: [][]byte{[]byte("test-1")}, @@ -1327,13 +1327,13 @@ func TestNodeIsEqual(t *testing.T) { { name: "not_equal_field_op_mismatch_2", t1: treeNode{ - fieldOp: "prefix", + stringOp: "prefix", fieldName: "service", caseSensitive: false, values: [][]byte{[]byte("test-1")}, }, t2: treeNode{ - fieldOp: "suffix", + stringOp: "suffix", fieldName: "service", caseSensitive: false, values: [][]byte{[]byte("test-1")}, @@ -1343,13 +1343,13 @@ func TestNodeIsEqual(t *testing.T) { { name: "not_equal_field_op_mismatch_3", t1: treeNode{ - fieldOp: "regex", + stringOp: "regex", fieldName: "service", caseSensitive: false, values: [][]byte{[]byte("test-1")}, }, t2: treeNode{ - fieldOp: "contains", + stringOp: "contains", fieldName: "service", caseSensitive: false, values: [][]byte{[]byte("test-1")}, @@ -1359,13 +1359,13 @@ func TestNodeIsEqual(t *testing.T) { { name: "not_equal_field_case_sensitive_mismatch", t1: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: false, values: [][]byte{[]byte("test-1")}, }, t2: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-1")}, @@ -1375,13 +1375,13 @@ func TestNodeIsEqual(t *testing.T) { { name: "not_equal_field_field_path_mismatch", t1: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "log.msg", caseSensitive: true, values: [][]byte{[]byte("test-1")}, }, t2: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "log.svc", caseSensitive: true, values: [][]byte{[]byte("test-1")}, @@ -1391,13 +1391,13 @@ func TestNodeIsEqual(t *testing.T) { { name: "not_equal_field_values_slice_len_mismatch", t1: treeNode{ - fieldOp: "contains", + stringOp: "contains", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-1"), []byte("test-2")}, }, t2: treeNode{ - fieldOp: "contains", + stringOp: "contains", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-1")}, @@ -1407,13 +1407,13 @@ func TestNodeIsEqual(t *testing.T) { { name: "not_equal_field_values_slice_vals_mismatch", t1: treeNode{ - fieldOp: "contains", + stringOp: "contains", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-2")}, }, t2: treeNode{ - fieldOp: "contains", + stringOp: "contains", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-1")}, @@ -1423,13 +1423,13 @@ func TestNodeIsEqual(t *testing.T) { { name: "not_equal_field_values_by_size_len_mismatch", t1: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-1"), []byte("test-22")}, }, t2: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-1")}, @@ -1439,13 +1439,13 @@ func TestNodeIsEqual(t *testing.T) { { name: "not_equal_field_values_by_size_vals_key_mismatch", t1: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-11")}, }, t2: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-1")}, @@ -1455,13 +1455,13 @@ func TestNodeIsEqual(t *testing.T) { { name: "not_equal_field_values_by_size_vals_len_mismatch", t1: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-1"), []byte("test-2")}, }, t2: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-1")}, @@ -1471,13 +1471,13 @@ func TestNodeIsEqual(t *testing.T) { { name: "not_equal_field_values_by_size_vals_mismatch", t1: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-2")}, }, t2: treeNode{ - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-1")}, @@ -1487,13 +1487,13 @@ func TestNodeIsEqual(t *testing.T) { { name: "not_equal_field_reValues_len_mismatch", t1: treeNode{ - fieldOp: "regex", + stringOp: "regex", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-1"), []byte("test-2")}, }, t2: treeNode{ - fieldOp: "regex", + stringOp: "regex", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-1")}, @@ -1503,13 +1503,13 @@ func TestNodeIsEqual(t *testing.T) { { name: "not_equal_field_reValues_vals_mismatch", t1: treeNode{ - fieldOp: "regex", + stringOp: "regex", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-2")}, }, t2: treeNode{ - fieldOp: "regex", + stringOp: "regex", fieldName: "service", caseSensitive: true, values: [][]byte{[]byte("test-1")}, @@ -1732,7 +1732,7 @@ func TestNodeIsEqual(t *testing.T) { logicalOp: "not", operands: []treeNode{ { - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: false, values: [][]byte{nil}, @@ -1743,7 +1743,7 @@ func TestNodeIsEqual(t *testing.T) { logicalOp: "and", operands: []treeNode{ { - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: false, values: [][]byte{nil}, @@ -1758,7 +1758,7 @@ func TestNodeIsEqual(t *testing.T) { logicalOp: "or", operands: []treeNode{ { - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: false, values: [][]byte{nil}, @@ -1769,13 +1769,13 @@ func TestNodeIsEqual(t *testing.T) { logicalOp: "or", operands: []treeNode{ { - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: false, values: [][]byte{nil}, }, { - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: false, values: [][]byte{nil}, @@ -1790,7 +1790,7 @@ func TestNodeIsEqual(t *testing.T) { logicalOp: "or", operands: []treeNode{ { - fieldOp: "equal", + stringOp: "equal", fieldName: "service", caseSensitive: false, values: [][]byte{nil}, @@ -1801,7 +1801,7 @@ func TestNodeIsEqual(t *testing.T) { logicalOp: "or", operands: []treeNode{ { - fieldOp: "equal", + stringOp: "equal", fieldName: "pod", caseSensitive: false, values: [][]byte{nil}, diff --git a/pipeline/do_if/len_cmp_op.go b/pipeline/do_if/len_cmp_op.go index 3936a822c..a268192cc 100644 --- a/pipeline/do_if/len_cmp_op.go +++ b/pipeline/do_if/len_cmp_op.go @@ -10,7 +10,7 @@ import ( ) /*{ do-if-len-cmp-op-node -DoIf length comparison op node is considered to always be a leaf in the DoIf tree like DoIf field op node. +DoIf length comparison op node is considered to always be a leaf in the DoIf tree like DoIf string op node. It contains operation that compares field length in bytes or array length (for array fields) with certain value. Params: diff --git a/pipeline/do_if/str_checker/checker.go b/pipeline/do_if/str_checker/checker.go index 0aff2c4a6..3c40b49ea 100644 --- a/pipeline/do_if/str_checker/checker.go +++ b/pipeline/do_if/str_checker/checker.go @@ -55,7 +55,7 @@ func stringToOp(s string) (op, error) { case OpRegexTag: return opRegex, nil default: - return -1, fmt.Errorf("unknown field op %q", s) + return -1, fmt.Errorf("unknown string op %q", s) } } diff --git a/pipeline/do_if/field_op.go b/pipeline/do_if/string_op.go similarity index 90% rename from pipeline/do_if/field_op.go rename to pipeline/do_if/string_op.go index 924ee5c99..3f9077210 100644 --- a/pipeline/do_if/field_op.go +++ b/pipeline/do_if/string_op.go @@ -11,12 +11,12 @@ import ( insaneJSON "github.com/ozontech/insane-json" ) -/*{ do-if-field-op-node -DoIf field op node is considered to always be a leaf in the DoIf tree. It checks byte representation of the value by the given field path. +/*{ do-if-string-op-node +DoIf string op node is considered to always be a leaf in the DoIf tree. It checks byte representation of the value by the given field path. Array and object values are considered as not matched since encoding them to bytes leads towards large CPU and memory consumption. Params: - - `op` - value from field operations list. Required. + - `op` - value from string operations list. Required. - `field` - path to field in JSON tree. If empty, root value is checked. Path to nested fields is delimited by dots `"."`, e.g. `"field.subfield"` for `{"field": {"subfield": "val"}}`. If the field name contains dots in it they should be shielded with `"\"`, e.g. `"exception\.type"` for `{"exception.type": "example"}`. Default empty. - `values` - list of values to check field. Required non-empty. @@ -38,7 +38,7 @@ pipelines: }*/ -/*{ do-if-field-op +/*{ do-if-string-op Operation `equal` checks whether the field value is equal to one of the elements in the values list. Example: @@ -205,7 +205,7 @@ func stringToDataType(s string) (dataType, string, error) { } } -type fieldOpNode struct { +type stringOpNode struct { fieldPath []string fieldPathStr string dataType dataType @@ -214,7 +214,7 @@ type fieldOpNode struct { checker str_checker.DataChecker } -func newFieldOpNode( +func newStringOpNode( op string, caseSensitive bool, values [][]byte, @@ -239,7 +239,7 @@ func newFieldOpNode( } } - return &fieldOpNode{ + return &stringOpNode{ fieldPath: cfg.ParseFieldSelector(field), fieldPathStr: field, dataType: curDataType, @@ -248,11 +248,11 @@ func newFieldOpNode( }, nil } -func (n *fieldOpNode) Type() nodeType { - return NodeFieldOp +func (n *stringOpNode) Type() nodeType { + return NodeStringOp } -func (n *fieldOpNode) checkEvent(eventRoot *insaneJSON.Root) bool { +func (n *stringOpNode) checkEvent(eventRoot *insaneJSON.Root) bool { node := eventRoot.Dig(n.fieldPath...) if node.IsArray() || node.IsObject() { return false @@ -265,7 +265,7 @@ func (n *fieldOpNode) checkEvent(eventRoot *insaneJSON.Root) bool { return n.checker.Check(node.AsBytes()) } -func (n *fieldOpNode) CheckRaw(event []byte, sourceName []byte, metadata map[string]string) bool { +func (n *stringOpNode) CheckRaw(event []byte, sourceName []byte, metadata map[string]string) bool { switch n.dataType { case dataTypeEvent: return n.checker.Check(event) @@ -279,10 +279,10 @@ func (n *fieldOpNode) CheckRaw(event []byte, sourceName []byte, metadata map[str } } -func (n *fieldOpNode) isEqualTo(n2 Node, _ int) error { - n2f, ok := n2.(*fieldOpNode) +func (n *stringOpNode) isEqualTo(n2 Node, _ int) error { + n2f, ok := n2.(*stringOpNode) if !ok { - return errors.New("nodes have different types expected: fieldOpNode") + return errors.New("nodes have different types expected: stringOpNode") } if n.fieldPathStr != n2f.fieldPathStr || slices.Compare[[]string](n.fieldPath, n2f.fieldPath) != 0 { diff --git a/pipeline/do_if/ts_cmp_op.go b/pipeline/do_if/ts_cmp_op.go index fb93de625..dd2238e47 100644 --- a/pipeline/do_if/ts_cmp_op.go +++ b/pipeline/do_if/ts_cmp_op.go @@ -13,7 +13,7 @@ import ( ) /*{ do-if-ts-cmp-op-node -DoIf timestamp comparison op node is considered to always be a leaf in the DoIf tree like DoIf field op node. +DoIf timestamp comparison op node is considered to always be a leaf in the DoIf tree like DoIf string op node. It contains operation that compares timestamps with certain value. Params: From fdb8f368ec510a70f317b004e3f4a71ad55ed55b Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Wed, 30 Jul 2025 17:19:37 +0300 Subject: [PATCH 66/75] Fix --- pipeline/antispam/ctor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index f4693e863..95e978803 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -35,7 +35,7 @@ func extractAntispam(node map[string]any) ([]Rule, int, error) { rawRules, err := ctor.Get[[]any](node, fieldNameRules) if err == nil { rules, err = extractRules(rawRules) - if err == nil { + if err != nil { return nil, 0, err } } else if errors.Is(err, ctor.ErrTypeMismatch) { From 0bca7332cd567d7f8d13950dc576598077e9a501 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Thu, 31 Jul 2025 15:20:38 +0300 Subject: [PATCH 67/75] Add draft test --- cfg/matchrule/matchrule.go | 8 ++ pipeline/do_if/converter_test.go | 38 ++++++ pipeline/do_if/str_checker/checker.go | 128 ++++++++++----------- pipeline/do_if/str_checker/checker_test.go | 28 ++--- 4 files changed, 124 insertions(+), 78 deletions(-) diff --git a/cfg/matchrule/matchrule.go b/cfg/matchrule/matchrule.go index b7dae1643..c8c21a735 100644 --- a/cfg/matchrule/matchrule.go +++ b/cfg/matchrule/matchrule.go @@ -79,6 +79,14 @@ type Rule struct { prepared bool } +func (r *Rule) GetMinValueSize() int { + return r.minValueSize +} + +func (r *Rule) GetMaxValueSize() int { + return r.maxValueSize +} + func (r *Rule) Prepare() { if len(r.Values) == 0 { return diff --git a/pipeline/do_if/converter_test.go b/pipeline/do_if/converter_test.go index 672094a99..78549310c 100644 --- a/pipeline/do_if/converter_test.go +++ b/pipeline/do_if/converter_test.go @@ -1 +1,39 @@ package do_if + +import ( + "testing" + + "github.com/ozontech/file.d/cfg/matchrule" + "github.com/ozontech/file.d/pipeline/do_if/logic" + "github.com/stretchr/testify/require" +) + +func TestRuleToNode(t *testing.T) { + rule := matchrule.Rule{ + Values: []string{"a", "bb", "ccc"}, + Mode: matchrule.ModeSuffix, + CaseInsensitive: true, + Invert: true, + } + + rule.Prepare() + + nRaw, err := RuleToNode(rule, DataTypeEventTag) + require.NoError(t, err) + + nLogic := nRaw.(*logicalNode) + require.Equal(t, nLogic.op, logic.Not) + require.Equal(t, len(nLogic.operands), 1) + + nOperand := nLogic.operands[0].(*stringOpNode) + require.Equal(t, nOperand.dataType, dataTypeEvent) + + c := nOperand.checker + require.Equal(t, c.MinValLen, rule.GetMinValueSize()) + require.Equal(t, c.MinValLen, rule.GetMinValueSize()) + require.True(t, c.ValuesBySize == nil) + require.True(t, c.Values == nil) + require.Equal(t, c.Op.String(), matchrule.ModeToString(rule.Mode)) + require.Equal(t, c.CaseSensitive, !rule.CaseInsensitive) + require.Equal(t, c.Values, [][]byte{[]byte("a"), []byte("bb"), []byte("ccc")}) +} diff --git a/pipeline/do_if/str_checker/checker.go b/pipeline/do_if/str_checker/checker.go index 3c40b49ea..7d59924a9 100644 --- a/pipeline/do_if/str_checker/checker.go +++ b/pipeline/do_if/str_checker/checker.go @@ -10,24 +10,24 @@ import ( type op int const ( - opEqual op = iota - opContains - opPrefix - opSuffix - opRegex + OpEqual op = iota + OpContains + OpPrefix + OpSuffix + OpRegex ) func (op op) String() string { switch op { - case opEqual: + case OpEqual: return OpEqualTag - case opContains: + case OpContains: return OpContainsTag - case opPrefix: + case OpPrefix: return OpPrefixTag - case opSuffix: + case OpSuffix: return OpSuffixTag - case opRegex: + case OpRegex: return OpRegexTag default: return "unknown" @@ -45,29 +45,29 @@ const ( func stringToOp(s string) (op, error) { switch s { case OpEqualTag: - return opEqual, nil + return OpEqual, nil case OpContainsTag: - return opContains, nil + return OpContains, nil case OpPrefixTag: - return opPrefix, nil + return OpPrefix, nil case OpSuffixTag: - return opSuffix, nil + return OpSuffix, nil case OpRegexTag: - return opRegex, nil + return OpRegex, nil default: return -1, fmt.Errorf("unknown string op %q", s) } } type DataChecker struct { - op op - caseSensitive bool - values [][]byte - valuesBySize map[int][][]byte - reValues []*regexp.Regexp + Op op + CaseSensitive bool + Values [][]byte + ValuesBySize map[int][][]byte + ReValues []*regexp.Regexp - minValLen int - maxValLen int + MinValLen int + MaxValLen int } func New(opTag string, caseSensitive bool, values [][]byte) (DataChecker, error) { @@ -87,7 +87,7 @@ func New(opTag string, caseSensitive bool, values [][]byte) (DataChecker, error) return def, err } - if curOp == opRegex { + if curOp == OpRegex { reValues = make([]*regexp.Regexp, 0, len(values)) for _, v := range values { re, err := regexp.Compile(string(v)) @@ -99,7 +99,7 @@ func New(opTag string, caseSensitive bool, values [][]byte) (DataChecker, error) } else { minValLen = len(values[0]) maxValLen = len(values[0]) - if curOp == opEqual { + if curOp == OpEqual { valsBySize = make(map[int][][]byte) } else { vals = make([][]byte, len(values)) @@ -119,7 +119,7 @@ func New(opTag string, caseSensitive bool, values [][]byte) (DataChecker, error) if len(values[i]) > maxValLen { maxValLen = len(values[i]) } - if curOp == opEqual { + if curOp == OpEqual { valsBySize[len(curVal)] = append(valsBySize[len(curVal)], curVal) } else { vals[i] = curVal @@ -128,13 +128,13 @@ func New(opTag string, caseSensitive bool, values [][]byte) (DataChecker, error) } return DataChecker{ - op: curOp, - caseSensitive: caseSensitive, - values: vals, - valuesBySize: valsBySize, - reValues: reValues, - minValLen: minValLen, - maxValLen: maxValLen, + Op: curOp, + CaseSensitive: caseSensitive, + Values: vals, + ValuesBySize: valsBySize, + ReValues: reValues, + MinValLen: minValLen, + MaxValLen: maxValLen, }, nil } @@ -149,17 +149,17 @@ func MustNew(opTag string, caseSensitive bool, values [][]byte) DataChecker { func (n *DataChecker) Check(data []byte) bool { // fast check for data - if n.op != opRegex && len(data) < n.minValLen { + if n.Op != OpRegex && len(data) < n.MinValLen { return false } - switch n.op { - case opEqual: - vals, ok := n.valuesBySize[len(data)] + switch n.Op { + case OpEqual: + vals, ok := n.ValuesBySize[len(data)] if !ok { return false } - if !n.caseSensitive && data != nil { + if !n.CaseSensitive && data != nil { data = bytes.ToLower(data) } for _, val := range vals { @@ -172,43 +172,43 @@ func (n *DataChecker) Check(data []byte) bool { return true } } - case opContains: - if !n.caseSensitive { + case OpContains: + if !n.CaseSensitive { data = bytes.ToLower(data) } - for _, val := range n.values { + for _, val := range n.Values { if bytes.Contains(data, val) { return true } } - case opPrefix: + case OpPrefix: // check only necessary amount of bytes - if len(data) > n.maxValLen { - data = data[:n.maxValLen] + if len(data) > n.MaxValLen { + data = data[:n.MaxValLen] } - if !n.caseSensitive { + if !n.CaseSensitive { data = bytes.ToLower(data) } - for _, val := range n.values { + for _, val := range n.Values { if bytes.HasPrefix(data, val) { return true } } - case opSuffix: + case OpSuffix: // check only necessary amount of bytes - if len(data) > n.maxValLen { - data = data[len(data)-n.maxValLen:] + if len(data) > n.MaxValLen { + data = data[len(data)-n.MaxValLen:] } - if !n.caseSensitive { + if !n.CaseSensitive { data = bytes.ToLower(data) } - for _, val := range n.values { + for _, val := range n.Values { if bytes.HasSuffix(data, val) { return true } } - case opRegex: - for _, re := range n.reValues { + case OpRegex: + for _, re := range n.ReValues { if re.Match(data) { return true } @@ -246,30 +246,30 @@ func Equal(a, b *DataChecker) (err error) { } }() - assertEqual(a.op, b.op, "different op") - assertEqual(a.caseSensitive, b.caseSensitive, "different case_sensitive") - assertEqualValues(a.values, b.values, "different values") + assertEqual(a.Op, b.Op, "different op") + assertEqual(a.CaseSensitive, b.CaseSensitive, "different case_sensitive") + assertEqualValues(a.Values, b.Values, "different values") - assertEqual(len(a.valuesBySize), len(b.valuesBySize), "different valuesBySize len") - for size := range a.valuesBySize { - _, found := b.valuesBySize[size] + assertEqual(len(a.ValuesBySize), len(b.ValuesBySize), "different ValuesBySize len") + for size := range a.ValuesBySize { + _, found := b.ValuesBySize[size] assert(found, fmt.Sprintf("not found values by size %d", size)) assertEqualValues( - a.valuesBySize[size], b.valuesBySize[size], + a.ValuesBySize[size], b.ValuesBySize[size], fmt.Sprintf("different values by size %d", size), ) } - assertEqual(len(a.reValues), len(b.reValues), "different regex values count") - for i := range a.reValues { + assertEqual(len(a.ReValues), len(b.ReValues), "different regex values count") + for i := range a.ReValues { assertEqual( - a.reValues[i].String(), b.reValues[i].String(), + a.ReValues[i].String(), b.ReValues[i].String(), fmt.Sprintf("different regex values at pos %d", i), ) } - assertEqual(a.minValLen, b.minValLen, "different min value len") - assertEqual(a.maxValLen, b.maxValLen, "different max value len") + assertEqual(a.MinValLen, b.MinValLen, "different min value len") + assertEqual(a.MaxValLen, b.MaxValLen, "different max value len") return nil } diff --git a/pipeline/do_if/str_checker/checker_test.go b/pipeline/do_if/str_checker/checker_test.go index c159271e9..1a8c7fc0a 100644 --- a/pipeline/do_if/str_checker/checker_test.go +++ b/pipeline/do_if/str_checker/checker_test.go @@ -22,10 +22,10 @@ func TestCheckerCtor(t *testing.T) { values: [][]byte{[]byte(`test-111`), []byte(`test-2`), []byte(`test-3`), []byte(`test-12345`)}, expected: DataChecker{ - op: opEqual, - caseSensitive: true, - values: nil, - valuesBySize: map[int][][]byte{ + Op: OpEqual, + CaseSensitive: true, + Values: nil, + ValuesBySize: map[int][][]byte{ 6: { []byte(`test-2`), []byte(`test-3`), @@ -37,9 +37,9 @@ func TestCheckerCtor(t *testing.T) { []byte(`test-12345`), }, }, - reValues: nil, - minValLen: 6, - maxValLen: 10, + ReValues: nil, + MinValLen: 6, + MaxValLen: 10, }, }, { @@ -51,16 +51,16 @@ func TestCheckerCtor(t *testing.T) { }, expected: DataChecker{ - op: opContains, - caseSensitive: false, - values: [][]byte{ + Op: OpContains, + CaseSensitive: false, + Values: [][]byte{ []byte(`test-0987`), []byte(`test-11`), }, - valuesBySize: nil, - reValues: nil, - minValLen: 7, - maxValLen: 9, + ValuesBySize: nil, + ReValues: nil, + MinValLen: 7, + MaxValLen: 9, }, }, } { From 2dae4f58a5320d8d6918118e59d8da76b8a28db3 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Thu, 31 Jul 2025 19:32:14 +0300 Subject: [PATCH 68/75] Restore old test --- pipeline/do_if/ctor_test.go | 277 +++++++++++++++-------------------- pipeline/do_if/do_if_test.go | 30 ++-- 2 files changed, 134 insertions(+), 173 deletions(-) diff --git a/pipeline/do_if/ctor_test.go b/pipeline/do_if/ctor_test.go index c7244aa1a..81f0df9c9 100644 --- a/pipeline/do_if/ctor_test.go +++ b/pipeline/do_if/ctor_test.go @@ -6,19 +6,16 @@ import ( "time" "github.com/bitly/go-simplejson" - "github.com/ozontech/file.d/pipeline/do_if/logic" - "github.com/ozontech/file.d/pipeline/do_if/str_checker" - insaneJSON "github.com/ozontech/insane-json" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestExtractNode(t *testing.T) { tests := []struct { - name string - raw string - expected Node - wantErr bool + name string + raw string + want *treeNode + wantErr bool }{ { name: "ok", @@ -86,74 +83,65 @@ func TestExtractNode(t *testing.T) { } ] }`, - expected: &logicalNode{ - op: logic.Not, - operands: []Node{ - &logicalNode{ - op: logic.And, - operands: []Node{ - &stringOpNode{ - fieldPath: []string{"service"}, - fieldPathStr: "service", - checker: str_checker.MustNew("equal", false, [][]byte{nil, []byte("")}), + want: &treeNode{ + logicalOp: "not", + operands: []treeNode{ + { + logicalOp: "and", + operands: []treeNode{ + { + stringOp: "equal", + fieldName: "service", + values: [][]byte{nil, []byte("")}, + caseSensitive: false, }, - &stringOpNode{ - fieldPath: []string{"log", "msg"}, - fieldPathStr: "log.msg", - checker: str_checker.MustNew( - "prefix", false, [][]byte{[]byte("test-1"), []byte("test-2")}), + { + stringOp: "prefix", + fieldName: "log.msg", + values: [][]byte{[]byte("test-1"), []byte("test-2")}, + caseSensitive: false, }, - &lenCmpOpNode{ - lenCmpOp: byteLenCmpOp, - fieldPath: []string{"msg"}, - cmpOp: cmpOpGreater, + { + lenCmpOp: "byte_len_cmp", + cmpOp: "gt", + fieldName: "msg", cmpValue: 100, }, - &lenCmpOpNode{ - lenCmpOp: arrayLenCmpOp, - fieldPath: []string{"items"}, - cmpOp: cmpOpLess, + { + lenCmpOp: "array_len_cmp", + cmpOp: "lt", + fieldName: "items", cmpValue: 100, }, - &tsCmpOpNode{ - fieldPath: []string{"timestamp"}, - format: time.RFC3339Nano, - cmpOp: cmpOpLess, - cmpValChangeMode: cmpValChangeModeConst, - constCmpValue: time.Date( - 2009, time.November, 10, 23, 0, 0, 0, time.UTC, - ).UnixNano(), - updateInterval: 15 * time.Second, + { + tsCmpOp: true, + cmpOp: "lt", + fieldName: "timestamp", + tsFormat: time.RFC3339Nano, + tsCmpValue: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + tsCmpValChangeMode: tsCmpModeConstTag, + tsUpdateInterval: 15 * time.Second, }, - &logicalNode{ - op: logic.Or, - operands: []Node{ - &stringOpNode{ - fieldPath: []string{"service"}, - fieldPathStr: "service", - checker: str_checker.MustNew( - "suffix", - true, - [][]byte{[]byte("test-svc-1"), []byte("test-svc-2")}, - ), + { + logicalOp: "or", + operands: []treeNode{ + { + stringOp: "suffix", + fieldName: "service", + values: [][]byte{[]byte("test-svc-1"), []byte("test-svc-2")}, + caseSensitive: true, }, - &stringOpNode{ - fieldPath: []string{"pod"}, - fieldPathStr: "pod", - checker: str_checker.MustNew( - "contains", - true, - [][]byte{[]byte("test")}, - ), + { + stringOp: "contains", + fieldName: "pod", + values: [][]byte{[]byte("test")}, + caseSensitive: true, }, - &stringOpNode{ - fieldPath: []string{"message"}, - fieldPathStr: "message", - checker: str_checker.MustNew( - "regex", - true, - [][]byte{[]byte(`test-\d+`), []byte(`test-msg-\d+`)}, - ), + { + stringOp: "regex", + fieldName: "message", + values: [][]byte{[]byte(`test-\d+`), []byte(`test-msg-\d+`)}, + caseSensitive: true, }, }, }, @@ -162,48 +150,23 @@ func TestExtractNode(t *testing.T) { }, }, }, - { - name: "ok_field_op_node_data_type_event", - raw: `{"op":"equal", "values":["a"], "data":"event"}`, - expected: &stringOpNode{ - dataType: dataTypeEvent, - checker: str_checker.MustNew("equal", true, [][]byte{[]byte("a")}), - }, - }, - { - name: "ok_field_op_node_data_type_source_name", - raw: `{"op":"equal", "values":["a"], "data":"source_name"}`, - expected: &stringOpNode{ - dataType: dataTypeSourceName, - checker: str_checker.MustNew("equal", true, [][]byte{[]byte("a")}), - }, - }, - { - name: "ok_field_op_node_data_type_meta", - raw: `{"op":"equal", "values":["a"], "data":"meta.name"}`, - expected: &stringOpNode{ - dataType: dataTypeMeta, - metaKey: "name", - checker: str_checker.MustNew("equal", true, [][]byte{[]byte("a")}), - }, - }, { name: "ok_byte_len_cmp_op", raw: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":10}`, - expected: &lenCmpOpNode{ - lenCmpOp: byteLenCmpOp, - fieldPath: []string{"data"}, - cmpOp: cmpOpLess, + want: &treeNode{ + lenCmpOp: "byte_len_cmp", + cmpOp: "lt", + fieldName: "data", cmpValue: 10, }, }, { name: "ok_array_len_cmp_op", raw: `{"op":"array_len_cmp","field":"items","cmp_op":"lt","value":10}`, - expected: &lenCmpOpNode{ - lenCmpOp: arrayLenCmpOp, - fieldPath: []string{"items"}, - cmpOp: cmpOpLess, + want: &treeNode{ + lenCmpOp: "array_len_cmp", + cmpOp: "lt", + fieldName: "items", cmpValue: 10, }, }, @@ -218,16 +181,15 @@ func TestExtractNode(t *testing.T) { "format": "2006-01-02T15:04:05Z07:00", "update_interval": "15s" }`, - expected: &tsCmpOpNode{ - fieldPath: []string{"timestamp"}, - format: time.RFC3339, - cmpOp: cmpOpLess, - cmpValChangeMode: cmpValChangeModeConst, - constCmpValue: time.Date( - 2009, time.November, 10, 23, 0, 0, 0, time.UTC, - ).UnixNano(), - cmpValueShift: (-24 * time.Hour).Nanoseconds(), - updateInterval: 15 * time.Second, + want: &treeNode{ + tsCmpOp: true, + cmpOp: "lt", + fieldName: "timestamp", + tsFormat: time.RFC3339, + tsCmpValue: time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + tsCmpValueShift: -24 * time.Hour, + tsCmpValChangeMode: tsCmpModeConstTag, + tsUpdateInterval: 15 * time.Second, }, }, { @@ -238,14 +200,14 @@ func TestExtractNode(t *testing.T) { "cmp_op": "lt", "value": "now" }`, - expected: &tsCmpOpNode{ - fieldPath: []string{"timestamp"}, - format: time.RFC3339Nano, - cmpOp: cmpOpLess, - cmpValChangeMode: cmpValChangeModeNow, - constCmpValue: time.Time{}.UnixNano(), - updateInterval: defaultTsCmpValUpdateInterval, - cmpValueShift: 0, + want: &treeNode{ + tsCmpOp: true, + cmpOp: "lt", + fieldName: "timestamp", + tsCmpValChangeMode: tsCmpModeNowTag, + tsFormat: defaultTsFormat, + tsCmpValueShift: 0, + tsUpdateInterval: defaultTsCmpValUpdateInterval, }, }, { @@ -257,16 +219,17 @@ func TestExtractNode(t *testing.T) { "format": "rfc3339", "value": "now" }`, - expected: &tsCmpOpNode{ - fieldPath: []string{"timestamp"}, - format: time.RFC3339, - cmpOp: cmpOpLess, - cmpValChangeMode: cmpValChangeModeNow, - constCmpValue: time.Time{}.UnixNano(), - cmpValueShift: 0, - updateInterval: defaultTsCmpValUpdateInterval, + want: &treeNode{ + tsCmpOp: true, + cmpOp: "lt", + fieldName: "timestamp", + tsCmpValChangeMode: tsCmpModeNowTag, + tsFormat: time.RFC3339, + tsCmpValueShift: 0, + tsUpdateInterval: defaultTsCmpValUpdateInterval, }, }, + { name: "ok_check_type", raw: `{ @@ -274,16 +237,12 @@ func TestExtractNode(t *testing.T) { "field": "log", "values": ["obj","arr"] }`, - expected: &checkTypeOpNode{ - fieldPath: []string{"log"}, - fieldPathStr: "log", - checkTypeFns: []checkTypeFn{ - func(n *insaneJSON.Node) bool { - return n.IsObject() - }, - func(n *insaneJSON.Node) bool { - return n.IsArray() - }, + want: &treeNode{ + checkTypeOp: true, + fieldName: "log", + values: [][]byte{ + []byte("obj"), + []byte("arr"), }, }, }, @@ -297,23 +256,26 @@ func TestExtractNode(t *testing.T) { {"op":"equal","field":"service","values":"test"} ] }`, - expected: &logicalNode{ - op: logic.Or, - operands: []Node{ - &stringOpNode{ - fieldPath: []string{"service"}, - fieldPathStr: "service", - checker: str_checker.MustNew("equal", true, [][]byte{nil}), + want: &treeNode{ + logicalOp: "or", + operands: []treeNode{ + { + stringOp: "equal", + fieldName: "service", + values: [][]byte{nil}, + caseSensitive: true, }, - &stringOpNode{ - fieldPath: []string{"service"}, - fieldPathStr: "service", - checker: str_checker.MustNew("equal", true, [][]byte{[]byte("")}), + { + stringOp: "equal", + fieldName: "service", + values: [][]byte{[]byte("")}, + caseSensitive: true, }, - &stringOpNode{ - fieldPath: []string{"service"}, - fieldPathStr: "service", - checker: str_checker.MustNew("equal", true, [][]byte{[]byte("test")}), + { + stringOp: "equal", + fieldName: "service", + values: [][]byte{[]byte("test")}, + caseSensitive: true, }, }, }, @@ -486,16 +448,6 @@ func TestExtractNode(t *testing.T) { }`, wantErr: true, }, - { - name: "error_field_op_node_data_type_type_mismatch", - raw: `{"op":"equal", "values":["a"], "data":123}`, - wantErr: true, - }, - { - name: "error_field_op_node_data_type_unparsable", - raw: `{"op":"equal", "values":["a"], "data":"some"}`, - wantErr: true, - }, { name: "error_check_type_op_empty_values", raw: `{ @@ -523,7 +475,6 @@ func TestExtractNode(t *testing.T) { wantErr: true, }, } - for _, tt := range tests { tt := tt t.Run(tt.name, func(t *testing.T) { @@ -540,7 +491,9 @@ func TestExtractNode(t *testing.T) { return } - assert.NoError(t, got.isEqualTo(tt.expected, 1)) + want, err := buildTree(tt.want) + require.NoError(t, err) + assert.NoError(t, got.isEqualTo(want, 1)) }) } } diff --git a/pipeline/do_if/do_if_test.go b/pipeline/do_if/do_if_test.go index 8b782ad29..eab8167c5 100644 --- a/pipeline/do_if/do_if_test.go +++ b/pipeline/do_if/do_if_test.go @@ -16,8 +16,9 @@ import ( ) type treeNode struct { + fieldName string + stringOp string - fieldName string caseSensitive bool values [][]byte @@ -34,10 +35,12 @@ type treeNode struct { tsCmpValue time.Time tsCmpValueShift time.Duration tsUpdateInterval time.Duration + + checkTypeOp bool } // nolint:gocritic -func buildTree(node treeNode) (Node, error) { +func buildTree(node *treeNode) (Node, error) { switch { case node.stringOp != "": return newStringOpNode( @@ -49,8 +52,8 @@ func buildTree(node treeNode) (Node, error) { ) case node.logicalOp != "": operands := make([]Node, 0) - for _, operandNode := range node.operands { - operand, err := buildTree(operandNode) + for i := range node.operands { + operand, err := buildTree(&node.operands[i]) if err != nil { return nil, fmt.Errorf("failed to build tree: %w", err) } @@ -72,6 +75,11 @@ func buildTree(node treeNode) (Node, error) { node.tsCmpValueShift, node.tsUpdateInterval, ) + case node.checkTypeOp: + return newCheckTypeOpNode( + node.fieldName, + node.values, + ) default: return nil, errors.New("unknown type of node") } @@ -450,7 +458,7 @@ func TestBuildNodes(t *testing.T) { tt := tt t.Run(tt.name, func(t *testing.T) { t.Parallel() - got, err := buildTree(tt.tree) + got, err := buildTree(&tt.tree) if tt.wantErr { require.Error(t, err) return @@ -1014,7 +1022,7 @@ func TestCheck(t *testing.T) { var eventRoot *insaneJSON.Root var err error t.Parallel() - root, err = buildTree(tt.tree) + root, err = buildTree(&tt.tree) require.NoError(t, err) checker := newChecker(root) for _, d := range tt.data { @@ -1083,7 +1091,7 @@ func TestCheckLenCmpLtObject(t *testing.T) { require.NoError(t, err) for index, test := range tests { - root, err := buildTree(treeNode{ + root, err := buildTree(&treeNode{ fieldName: "user_info", lenCmpOp: byteLenCmpOpTag, cmpOp: "lt", @@ -1100,7 +1108,7 @@ func TestCheckLenCmpLtObject(t *testing.T) { require.NoError(t, err) for index, test := range tests { - root, err := buildTree(treeNode{ + root, err := buildTree(&treeNode{ fieldName: "", lenCmpOp: byteLenCmpOpTag, cmpOp: "lt", @@ -1124,7 +1132,7 @@ func TestCheckTsCmpValChangeModeNow(t *testing.T) { ts1 := begin.Add(2 * dt) ts2 := begin.Add(4 * dt) - root, err := buildTree(treeNode{ + root, err := buildTree(&treeNode{ tsCmpOp: true, fieldName: "ts", cmpOp: "lt", @@ -1815,9 +1823,9 @@ func TestNodeIsEqual(t *testing.T) { tt := tt t.Run(tt.name, func(t *testing.T) { t.Parallel() - root1, err := buildTree(tt.t1) + root1, err := buildTree(&tt.t1) require.NoError(t, err) - root2, err := buildTree(tt.t2) + root2, err := buildTree(&tt.t2) require.NoError(t, err) c1 := newChecker(root1) c2 := newChecker(root2) From 66c521adbf124a78bcb9682eaf62f27a3c87dfd3 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 4 Aug 2025 14:32:45 +0300 Subject: [PATCH 69/75] Edit convertor test --- pipeline/do_if/converter.go | 15 ++++++--- pipeline/do_if/converter_test.go | 55 ++++++++++++++++++++------------ 2 files changed, 44 insertions(+), 26 deletions(-) diff --git a/pipeline/do_if/converter.go b/pipeline/do_if/converter.go index cdfa832af..e9f667984 100644 --- a/pipeline/do_if/converter.go +++ b/pipeline/do_if/converter.go @@ -8,11 +8,7 @@ import ( ) func RuleToNode(rule matchrule.Rule, dataTypeTag string) (Node, error) { - values := make([][]byte, 0, len(rule.Values)) - for _, s := range rule.Values { - values = append(values, []byte(strings.Clone(s))) - } - + values := arrStringToArrBytes(rule.Values) node, err := newStringOpNode( matchrule.ModeToString(rule.Mode), !rule.CaseInsensitive, @@ -31,6 +27,15 @@ func RuleToNode(rule matchrule.Rule, dataTypeTag string) (Node, error) { return newLogicalNode(logic.NotTag, []Node{node}) } +func arrStringToArrBytes(a []string) [][]byte { + res := make([][]byte, 0, len(a)) + for _, s := range a { + res = append(res, []byte(strings.Clone(s))) + } + + return res +} + func RuleSetToNode(ruleSet matchrule.RuleSet, dataTypeTag string) (Node, error) { operands := make([]Node, 0, len(ruleSet.Rules)) for _, r := range ruleSet.Rules { diff --git a/pipeline/do_if/converter_test.go b/pipeline/do_if/converter_test.go index 78549310c..f14bb9c0a 100644 --- a/pipeline/do_if/converter_test.go +++ b/pipeline/do_if/converter_test.go @@ -9,31 +9,44 @@ import ( ) func TestRuleToNode(t *testing.T) { - rule := matchrule.Rule{ - Values: []string{"a", "bb", "ccc"}, - Mode: matchrule.ModeSuffix, - CaseInsensitive: true, - Invert: true, + rules := []matchrule.Rule{ + { + Values: []string{"a", "bb", "ccc"}, + Mode: matchrule.ModeSuffix, + CaseInsensitive: true, + Invert: true, + }, + { + Values: []string{"a", "bb", "ccc"}, + Mode: matchrule.ModeSuffix, + CaseInsensitive: true, + Invert: false, + }, } - rule.Prepare() + for _, rule := range rules { + rule.Prepare() - nRaw, err := RuleToNode(rule, DataTypeEventTag) - require.NoError(t, err) + nRaw, err := RuleToNode(rule, DataTypeEventTag) + require.NoError(t, err) - nLogic := nRaw.(*logicalNode) - require.Equal(t, nLogic.op, logic.Not) - require.Equal(t, len(nLogic.operands), 1) + if rule.Invert { + nLogic := nRaw.(*logicalNode) + require.Equal(t, nLogic.op, logic.Not) + require.Equal(t, len(nLogic.operands), 1) - nOperand := nLogic.operands[0].(*stringOpNode) - require.Equal(t, nOperand.dataType, dataTypeEvent) + nRaw = nLogic.operands[0] + } - c := nOperand.checker - require.Equal(t, c.MinValLen, rule.GetMinValueSize()) - require.Equal(t, c.MinValLen, rule.GetMinValueSize()) - require.True(t, c.ValuesBySize == nil) - require.True(t, c.Values == nil) - require.Equal(t, c.Op.String(), matchrule.ModeToString(rule.Mode)) - require.Equal(t, c.CaseSensitive, !rule.CaseInsensitive) - require.Equal(t, c.Values, [][]byte{[]byte("a"), []byte("bb"), []byte("ccc")}) + nStr := nRaw.(*stringOpNode) + require.Equal(t, nStr.dataType, dataTypeEvent) + + c := nStr.checker + require.Equal(t, c.MinValLen, rule.GetMinValueSize()) + require.Equal(t, c.MinValLen, rule.GetMinValueSize()) + require.True(t, c.ValuesBySize == nil) + require.Equal(t, c.Op.String(), matchrule.ModeToString(rule.Mode)) + require.Equal(t, c.CaseSensitive, !rule.CaseInsensitive) + require.Equal(t, c.Values, arrStringToArrBytes(rule.Values)) + } } From ff57d519097eb38b1c88499e60c8a04c42bcea8c Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 4 Aug 2025 16:36:51 +0300 Subject: [PATCH 70/75] Add test cases --- pipeline/do_if/converter_test.go | 53 +++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/pipeline/do_if/converter_test.go b/pipeline/do_if/converter_test.go index f14bb9c0a..90c9cbe34 100644 --- a/pipeline/do_if/converter_test.go +++ b/pipeline/do_if/converter_test.go @@ -9,24 +9,7 @@ import ( ) func TestRuleToNode(t *testing.T) { - rules := []matchrule.Rule{ - { - Values: []string{"a", "bb", "ccc"}, - Mode: matchrule.ModeSuffix, - CaseInsensitive: true, - Invert: true, - }, - { - Values: []string{"a", "bb", "ccc"}, - Mode: matchrule.ModeSuffix, - CaseInsensitive: true, - Invert: false, - }, - } - - for _, rule := range rules { - rule.Prepare() - + for _, rule := range genAllRules() { nRaw, err := RuleToNode(rule, DataTypeEventTag) require.NoError(t, err) @@ -50,3 +33,37 @@ func TestRuleToNode(t *testing.T) { require.Equal(t, c.Values, arrStringToArrBytes(rule.Values)) } } + +func genAllRules() []matchrule.Rule { + arrValues := [][]string{ + {"val1", "val2", "val3", "val4"}, + {"a", "bb", "ccc"}, + } + modes := []matchrule.Mode{ + matchrule.ModePrefix, + matchrule.ModeContains, + matchrule.ModeSuffix, + } + boolVals := []bool{false, true} + + var rules []matchrule.Rule + + for _, values := range arrValues { + for _, mode := range modes { + for _, caseSensitive := range boolVals { + for _, invert := range boolVals { + rule := matchrule.Rule{ + Values: values, + Mode: mode, + CaseInsensitive: caseSensitive, + Invert: invert, + } + rule.Prepare() + rules = append(rules, rule) + } + } + } + } + + return rules +} From 6ed13d85f133f95a85377a04d53c232ab13c0cc4 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Tue, 5 Aug 2025 13:23:47 +0300 Subject: [PATCH 71/75] Add tests --- pipeline/do_if/converter_test.go | 71 ++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/pipeline/do_if/converter_test.go b/pipeline/do_if/converter_test.go index 90c9cbe34..a202d8dd9 100644 --- a/pipeline/do_if/converter_test.go +++ b/pipeline/do_if/converter_test.go @@ -8,30 +8,66 @@ import ( "github.com/stretchr/testify/require" ) -func TestRuleToNode(t *testing.T) { - for _, rule := range genAllRules() { - nRaw, err := RuleToNode(rule, DataTypeEventTag) +func TestRuleSetToNode(t *testing.T) { + ruleSets := []matchrule.RuleSet{ + { + Name: "sample_or", + Cond: matchrule.CondOr, + Rules: genAllRules(), + }, + { + Name: "sample_and", + Cond: matchrule.CondAnd, + Rules: genAllRules(), + }, + } + + for _, ruleSet := range ruleSets { + ruleSet.Prepare() + + rawNode, err := RuleSetToNode(ruleSet, DataTypeEventTag) require.NoError(t, err) - if rule.Invert { - nLogic := nRaw.(*logicalNode) - require.Equal(t, nLogic.op, logic.Not) - require.Equal(t, len(nLogic.operands), 1) + logicNode := rawNode.(*logicalNode) + require.Equal(t, logicNode.op.String(), matchrule.CondToString(ruleSet.Cond)) + require.Equal(t, len(logicNode.operands), len(ruleSet.Rules)) - nRaw = nLogic.operands[0] + for i := range len(logicNode.operands) { + cmpRuleAndNode(t, ruleSet.Rules[i], logicNode.operands[i]) } + } +} - nStr := nRaw.(*stringOpNode) - require.Equal(t, nStr.dataType, dataTypeEvent) +func TestRuleToNode(t *testing.T) { + for _, rule := range genAllRules() { + rule.Prepare() - c := nStr.checker - require.Equal(t, c.MinValLen, rule.GetMinValueSize()) - require.Equal(t, c.MinValLen, rule.GetMinValueSize()) - require.True(t, c.ValuesBySize == nil) - require.Equal(t, c.Op.String(), matchrule.ModeToString(rule.Mode)) - require.Equal(t, c.CaseSensitive, !rule.CaseInsensitive) - require.Equal(t, c.Values, arrStringToArrBytes(rule.Values)) + node, err := RuleToNode(rule, DataTypeEventTag) + require.NoError(t, err) + + cmpRuleAndNode(t, rule, node) + } +} + +func cmpRuleAndNode(t *testing.T, rule matchrule.Rule, node Node) { + if rule.Invert { + nLogic := node.(*logicalNode) + require.Equal(t, nLogic.op, logic.Not) + require.Equal(t, len(nLogic.operands), 1) + + node = nLogic.operands[0] } + + nStr := node.(*stringOpNode) + require.Equal(t, nStr.dataType, dataTypeEvent) + + c := nStr.checker + require.Equal(t, c.MinValLen, rule.GetMinValueSize()) + require.Equal(t, c.MinValLen, rule.GetMinValueSize()) + require.True(t, c.ValuesBySize == nil) + require.Equal(t, c.Op.String(), matchrule.ModeToString(rule.Mode)) + require.Equal(t, c.CaseSensitive, !rule.CaseInsensitive) + require.Equal(t, c.Values, arrStringToArrBytes(rule.Values)) } func genAllRules() []matchrule.Rule { @@ -58,7 +94,6 @@ func genAllRules() []matchrule.Rule { CaseInsensitive: caseSensitive, Invert: invert, } - rule.Prepare() rules = append(rules, rule) } } From f6624a75eaab669a9af628b3a654d43b21ec2793 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Fri, 8 Aug 2025 16:02:33 +0300 Subject: [PATCH 72/75] Restore old test again --- pipeline/do_if/ctor_test.go | 499 +++++++++++++++++++++--------------- 1 file changed, 294 insertions(+), 205 deletions(-) diff --git a/pipeline/do_if/ctor_test.go b/pipeline/do_if/ctor_test.go index 81f0df9c9..6a2896d4d 100644 --- a/pipeline/do_if/ctor_test.go +++ b/pipeline/do_if/ctor_test.go @@ -10,79 +10,86 @@ import ( "github.com/stretchr/testify/require" ) -func TestExtractNode(t *testing.T) { +func Test_extractDoIfChecker(t *testing.T) { + type args struct { + cfgStr string + } + tests := []struct { name string - raw string + args args want *treeNode wantErr bool }{ { name: "ok", - raw: ` - { - "op": "not", - "operands": [ - { - "op": "and", - "operands": [ - { - "op": "equal", - "field": "service", - "values": [null, ""], - "case_sensitive": false - }, - { - "op": "prefix", - "field": "log.msg", - "values": ["test-1", "test-2"], - "case_sensitive": false - }, - { - "op": "byte_len_cmp", - "field": "msg", - "cmp_op": "gt", - "value": 100 - }, - { - "op": "array_len_cmp", - "field": "items", - "cmp_op": "lt", - "value": 100 - }, - { - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "format": "2006-01-02T15:04:05.999999999Z07:00", - "update_interval": "15s" - }, - { - "op": "or", - "operands": [ - { - "op": "suffix", - "field": "service", - "values": ["test-svc-1", "test-svc-2"], - "case_sensitive": true - }, - { - "op": "contains", - "field": "pod", - "values": ["test"] - }, - { - "op": "regex", - "field": "message", - "values": ["test-\\d+", "test-msg-\\d+"] - } - ] - } - ] - } - ] - }`, + args: args{ + cfgStr: ` + { + "op": "not", + "operands": [ + { + "op": "and", + "operands": [ + { + "op": "equal", + "field": "service", + "values": [null, ""], + "case_sensitive": false + }, + { + "op": "prefix", + "field": "log.msg", + "values": ["test-1", "test-2"], + "case_sensitive": false + }, + { + "op": "byte_len_cmp", + "field": "msg", + "cmp_op": "gt", + "value": 100 + }, + { + "op": "array_len_cmp", + "field": "items", + "cmp_op": "lt", + "value": 100 + }, + { + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "format": "2006-01-02T15:04:05.999999999Z07:00", + "update_interval": "15s" + }, + { + "op": "or", + "operands": [ + { + "op": "suffix", + "field": "service", + "values": ["test-svc-1", "test-svc-2"], + "case_sensitive": true + }, + { + "op": "contains", + "field": "pod", + "values": ["test"] + }, + { + "op": "regex", + "field": "message", + "values": ["test-\\d+", "test-msg-\\d+"] + } + ] + } + ] + } + ] + } + `, + }, want: &treeNode{ logicalOp: "not", operands: []treeNode{ @@ -150,9 +157,18 @@ func TestExtractNode(t *testing.T) { }, }, }, + { + name: "ok_not_map", + args: args{ + cfgStr: `[{"field":"val"}]`, + }, + wantErr: false, + }, { name: "ok_byte_len_cmp_op", - raw: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":10}`, + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":10}`, + }, want: &treeNode{ lenCmpOp: "byte_len_cmp", cmpOp: "lt", @@ -162,7 +178,9 @@ func TestExtractNode(t *testing.T) { }, { name: "ok_array_len_cmp_op", - raw: `{"op":"array_len_cmp","field":"items","cmp_op":"lt","value":10}`, + args: args{ + cfgStr: `{"op":"array_len_cmp","field":"items","cmp_op":"lt","value":10}`, + }, want: &treeNode{ lenCmpOp: "array_len_cmp", cmpOp: "lt", @@ -172,15 +190,16 @@ func TestExtractNode(t *testing.T) { }, { name: "ok_ts_cmp_op", - raw: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "value_shift": "-24h", - "format": "2006-01-02T15:04:05Z07:00", - "update_interval": "15s" - }`, + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "value_shift": "-24h", + "format": "2006-01-02T15:04:05Z07:00", + "update_interval": "15s"}`, + }, want: &treeNode{ tsCmpOp: true, cmpOp: "lt", @@ -194,12 +213,13 @@ func TestExtractNode(t *testing.T) { }, { name: "ok_ts_cmp_op_default_settings", - raw: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "now" - }`, + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "now"}`, + }, want: &treeNode{ tsCmpOp: true, cmpOp: "lt", @@ -212,13 +232,14 @@ func TestExtractNode(t *testing.T) { }, { name: "ok_ts_cmp_op_format_alias", - raw: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "format": "rfc3339", - "value": "now" - }`, + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "format": "rfc3339", + "value": "now"}`, + }, want: &treeNode{ tsCmpOp: true, cmpOp: "lt", @@ -232,11 +253,13 @@ func TestExtractNode(t *testing.T) { { name: "ok_check_type", - raw: `{ - "op": "check_type", - "field": "log", - "values": ["obj","arr"] - }`, + args: args{ + cfgStr: `{ + "op": "check_type", + "field": "log", + "values": ["obj","arr"] + }`, + }, want: &treeNode{ checkTypeOp: true, fieldName: "log", @@ -248,14 +271,16 @@ func TestExtractNode(t *testing.T) { }, { name: "ok_single_val", - raw: `{ - "op":"or", - "operands":[ - {"op":"equal","field":"service","values":null}, - {"op":"equal","field":"service","values":""}, - {"op":"equal","field":"service","values":"test"} - ] - }`, + args: args{ + cfgStr: `{ + "op":"or", + "operands":[ + {"op":"equal","field":"service","values":null}, + {"op":"equal","field":"service","values":""}, + {"op":"equal","field":"service","values":"test"} + ] + }`, + }, want: &treeNode{ logicalOp: "or", operands: []treeNode{ @@ -282,196 +307,247 @@ func TestExtractNode(t *testing.T) { wantErr: false, }, { - name: "error_no_op_field", - raw: `{"field": "val"}`, + name: "error_no_op_field", + args: args{ + cfgStr: `{"field": "val"}`, + }, wantErr: true, }, { - name: "error_invalid_op_name", - raw: `{"op": "invalid"}`, + name: "error_invalid_op_name", + args: args{ + cfgStr: `{"op": "invalid"}`, + }, wantErr: true, }, { - name: "error_invalid_field_op", - raw: `{"op": "equal"}`, + name: "error_invalid_field_op", + args: args{ + cfgStr: `{"op": "equal"}`, + }, wantErr: true, }, { name: "error_invalid_case_sensitive_type", - raw: `{ - "op": "equal", - "field": "a", - "values": ["abc"], - "case_sensitive": "not bool" - }`, + args: args{ + cfgStr: `{ + "op": "equal", + "field": "a", + "values": ["abc"], + "case_sensitive": "not bool"}`, + }, wantErr: true, }, { - name: "error_invalid_logical_op", - raw: `{"op": "or"}`, + name: "error_invalid_logical_op", + args: args{ + cfgStr: `{"op": "or"}`, + }, wantErr: true, }, { - name: "error_invalid_logical_op_operand", - raw: `{"op": "or", "operands": [{"op": "equal"}]}`, + name: "error_invalid_logical_op_operand", + args: args{ + cfgStr: `{"op": "or", "operands": [{"op": "equal"}]}`, + }, wantErr: true, }, { - name: "error_byte_len_cmp_op_no_field", - raw: `{"op":"byte_len_cmp","cmp_op":"lt","value":10}`, + name: "error_byte_len_cmp_op_no_field", + args: args{ + cfgStr: `{"op":"byte_len_cmp","cmp_op":"lt","value":10}`, + }, wantErr: true, }, { - name: "error_array_len_cmp_op_no_field", - raw: `{"op":"array_len_cmp","cmp_op":"lt","value":10}`, + name: "error_array_len_cmp_op_no_field", + args: args{ + cfgStr: `{"op":"array_len_cmp","cmp_op":"lt","value":10}`, + }, wantErr: true, }, { - name: "error_byte_len_cmp_op_field_is_not_string", - raw: `{"op":"byte_len_cmp","field":123,"cmp_op":"lt","value":10}`, + name: "error_byte_len_cmp_op_field_is_not_string", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":123,"cmp_op":"lt","value":10}`, + }, wantErr: true, }, { - name: "error_byte_len_cmp_op_no_cmp_op", - raw: `{"op":"byte_len_cmp","field":"data","value":10}`, + name: "error_byte_len_cmp_op_no_cmp_op", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":"data","value":10}`, + }, wantErr: true, }, { - name: "error_byte_len_cmp_op_cmp_op_is_not_string", - raw: `{"op":"byte_len_cmp","field":"data","cmp_op":123,"value":10}`, + name: "error_byte_len_cmp_op_cmp_op_is_not_string", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":123,"value":10}`, + }, wantErr: true, }, { - name: "error_byte_len_cmp_op_no_cmp_value", - raw: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt"}`, + name: "error_byte_len_cmp_op_no_cmp_value", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt"}`, + }, wantErr: true, }, { - name: "error_byte_len_cmp_op_cmp_value_is_not_integer", - raw: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":"abc"}`, + name: "error_byte_len_cmp_op_cmp_value_is_not_integer", + args: args{ + cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":"abc"}`, + }, wantErr: true, }, { name: "error_byte_len_cmp_op_invalid_cmp_op", - raw: `{"op":"byte_len_cmp","field":"data","cmp_op":"ABC","value":10}`, + args: args{cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"ABC","value":10}`}, wantErr: true, }, { name: "error_byte_len_cmp_op_negative_cmp_value", - raw: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":-1}`, + args: args{cfgStr: `{"op":"byte_len_cmp","field":"data","cmp_op":"lt","value":-1}`}, wantErr: true, }, { - name: "error_ts_cmp_op_no_field", - raw: `{"op": "ts_cmp","cmp_op": "lt"}`, + name: "error_ts_cmp_op_no_field", + args: args{ + cfgStr: `{"op": "ts_cmp","cmp_op": "lt"}`, + }, wantErr: true, }, { - name: "error_ts_cmp_op_field_is_not_string", - raw: `{"op":"ts_cmp","field":123}`, + name: "error_ts_cmp_op_field_is_not_string", + args: args{ + cfgStr: `{"op":"ts_cmp","field":123}`, + }, wantErr: true, }, { - name: "error_ts_cmp_op_no_cmp_op", - raw: `{"op":"ts_cmp","field":"timestamp"}`, + name: "error_ts_cmp_op_no_cmp_op", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp"}`, + }, wantErr: true, }, { name: "error_ts_cmp_op_invalid_format_type", - raw: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "format": 1000, - "value": "now" - }`, + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "format": 1000, + "value": "now"}`, + }, wantErr: true, }, { name: "error_ts_cmp_op_invalid_value_shift_type", - raw: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "value_shift": 1000, - "format": "2006-01-02T15:04:05Z07:00", - "update_interval": "15s" - }`, + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "value_shift": 1000, + "format": "2006-01-02T15:04:05Z07:00", + "update_interval": "15s"}`, + }, wantErr: true, }, { name: "error_ts_cmp_op_invalid_update_interval_type", - raw: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "format": "2006-01-02T15:04:05Z07:00", - "update_interval": false - }`, + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "format": "2006-01-02T15:04:05Z07:00", + "update_interval": false}`, + }, wantErr: true, }, { - name: "error_ts_cmp_op_cmp_op_is_not_string", - raw: `{"op":"ts_cmp","field":"timestamp","cmp_op":123}`, + name: "error_ts_cmp_op_cmp_op_is_not_string", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":123}`, + }, wantErr: true, }, { - name: "error_ts_cmp_op_no_cmp_value", - raw: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt"}`, + name: "error_ts_cmp_op_no_cmp_value", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt"}`, + }, wantErr: true, }, { - name: "error_ts_cmp_op_cmp_value_is_not_string", - raw: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":123}`, + name: "error_ts_cmp_op_cmp_value_is_not_string", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":123}`, + }, wantErr: true, }, { - name: "error_ts_cmp_op_invalid_cmp_value", - raw: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":"qwe"}`, + name: "error_ts_cmp_op_invalid_cmp_value", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"lt","value":"qwe"}`, + }, wantErr: true, }, { - name: "error_ts_cmp_op_invalid_cmp_op", - raw: `{"op":"ts_cmp","field":"timestamp","cmp_op":"qwe","value":"2009-11-10T23:00:00Z"}`, + name: "error_ts_cmp_op_invalid_cmp_op", + args: args{ + cfgStr: `{"op":"ts_cmp","field":"timestamp","cmp_op":"qwe","value":"2009-11-10T23:00:00Z"}`, + }, wantErr: true, }, { name: "error_ts_cmp_op_invalid_update_interval", - raw: `{ - "op": "ts_cmp", - "field": "timestamp", - "cmp_op": "lt", - "value": "2009-11-10T23:00:00Z", - "update_interval": "qwe" - }`, + args: args{ + cfgStr: `{ + "op": "ts_cmp", + "field": "timestamp", + "cmp_op": "lt", + "value": "2009-11-10T23:00:00Z", + "update_interval": "qwe"}`, + }, wantErr: true, }, { name: "error_check_type_op_empty_values", - raw: `{ - "op": "check_type", - "field": "log", - "values": [] - }`, + args: args{ + cfgStr: `{ + "op": "check_type", + "field": "log", + "values": [] + }`, + }, wantErr: true, }, { name: "error_check_type_op_invalid_value", - raw: `{ - "op": "check_type", - "field": "log", - "values": ["unknown_type"] - }`, + args: args{ + cfgStr: `{ + "op": "check_type", + "field": "log", + "values": ["unknown_type"] + }`, + }, wantErr: true, }, { name: "error_check_type_op_no_field", - raw: `{ - "op": "check_type", - "values": ["obj"] - }`, + args: args{ + cfgStr: `{ + "op": "check_type", + "values": ["obj"] + }`, + }, wantErr: true, }, } @@ -479,21 +555,34 @@ func TestExtractNode(t *testing.T) { tt := tt t.Run(tt.name, func(t *testing.T) { t.Parallel() - - reader := bytes.NewBufferString(tt.raw) + reader := bytes.NewBufferString(tt.args.cfgStr) actionJSON, err := simplejson.NewFromReader(reader) require.NoError(t, err) - - got, err := ExtractNode(actionJSON.MustMap()) - require.Equal(t, err != nil, tt.wantErr) - + got, err := extractDoIfChecker(actionJSON) + if (err != nil) != tt.wantErr { + t.Errorf("extractDoIfChecker() error = %v, wantErr %v", err, tt.wantErr) + return + } if tt.wantErr { return } - - want, err := buildTree(tt.want) + if tt.want == nil { + assert.Nil(t, got) + return + } + wantTree, err := buildTree(tt.want) require.NoError(t, err) - assert.NoError(t, got.isEqualTo(want, 1)) + wantDoIfChecker := newChecker(wantTree) + assert.NoError(t, wantDoIfChecker.IsEqualTo(got)) }) } } + +func extractDoIfChecker(actionJSON *simplejson.Json) (*Checker, error) { + m := actionJSON.MustMap() + if m == nil { + return nil, nil + } + + return NewFromMap(m) +} From c617d857c406ad2cdc5b9431174cd939d3d8f648 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Fri, 8 Aug 2025 16:16:05 +0300 Subject: [PATCH 73/75] Rename tests --- pipeline/do_if/do_if_test.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pipeline/do_if/do_if_test.go b/pipeline/do_if/do_if_test.go index eab8167c5..f36a0391e 100644 --- a/pipeline/do_if/do_if_test.go +++ b/pipeline/do_if/do_if_test.go @@ -134,7 +134,7 @@ func TestBuildNodes(t *testing.T) { wantErr bool }{ { - name: "ok_field_op_node", + name: "ok_string_op_node", tree: treeNode{ stringOp: "equal", fieldName: "log.pod", @@ -152,7 +152,7 @@ func TestBuildNodes(t *testing.T) { }, }, { - name: "ok_field_op_node_case_insensitive", + name: "ok_string_op_node_case_insensitive", tree: treeNode{ stringOp: "equal", fieldName: "log.pod", @@ -313,14 +313,14 @@ func TestBuildNodes(t *testing.T) { }, }, { - name: "err_field_op_node_empty_field", + name: "err_string_op_node_empty_field", tree: treeNode{ stringOp: "equal", }, wantErr: true, }, { - name: "err_field_op_node_empty_values", + name: "err_string_op_node_empty_values", tree: treeNode{ stringOp: "equal", fieldName: "pod", @@ -328,7 +328,7 @@ func TestBuildNodes(t *testing.T) { wantErr: true, }, { - name: "err_field_op_node_invalid_regex", + name: "err_string_op_node_invalid_regex", tree: treeNode{ stringOp: "regex", fieldName: "pod", @@ -337,7 +337,7 @@ func TestBuildNodes(t *testing.T) { wantErr: true, }, { - name: "err_field_op_node_invalid_op_type", + name: "err_string_op_node_invalid_op_type", tree: treeNode{ stringOp: "noop", fieldName: "pod", @@ -1317,7 +1317,7 @@ func TestNodeIsEqual(t *testing.T) { wantErr: true, }, { - name: "not_equal_field_op_mismatch", + name: "not_equal_string_op_mismatch", t1: treeNode{ stringOp: "equal", fieldName: "service", @@ -1333,7 +1333,7 @@ func TestNodeIsEqual(t *testing.T) { wantErr: true, }, { - name: "not_equal_field_op_mismatch_2", + name: "not_equal_string_op_mismatch_2", t1: treeNode{ stringOp: "prefix", fieldName: "service", @@ -1349,7 +1349,7 @@ func TestNodeIsEqual(t *testing.T) { wantErr: true, }, { - name: "not_equal_field_op_mismatch_3", + name: "not_equal_string_op_mismatch_3", t1: treeNode{ stringOp: "regex", fieldName: "service", From 5cf374a841478b4c939dfa0e88d4a5990d1537d6 Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Fri, 8 Aug 2025 16:40:49 +0300 Subject: [PATCH 74/75] Refactor --- pipeline/antispam/ctor.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pipeline/antispam/ctor.go b/pipeline/antispam/ctor.go index 95e978803..aa23afdbe 100644 --- a/pipeline/antispam/ctor.go +++ b/pipeline/antispam/ctor.go @@ -1,7 +1,6 @@ package antispam import ( - "errors" "fmt" "github.com/ozontech/file.d/cfg" @@ -32,13 +31,13 @@ func extractAntispam(node map[string]any) ([]Rule, int, error) { var rules []Rule - rawRules, err := ctor.Get[[]any](node, fieldNameRules) - if err == nil { - rules, err = extractRules(rawRules) - if err != nil { - return nil, 0, err - } - } else if errors.Is(err, ctor.ErrTypeMismatch) { + rawRules, err := ctor.Get[[]any](node, fieldNameRules, nil) + if err != nil { + return nil, 0, err + } + + rules, err = extractRules(rawRules) + if err != nil { return nil, 0, err } From ab9dc6a53bcc9aac9cb83f74bcdec95704f6f0ef Mon Sep 17 00:00:00 2001 From: george pogosyan Date: Mon, 11 Aug 2025 13:10:08 +0300 Subject: [PATCH 75/75] Refactor --- cfg/matchrule/matchrule.go | 8 ++++---- pipeline/do_if/converter.go | 4 ++-- pipeline/do_if/converter_test.go | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cfg/matchrule/matchrule.go b/cfg/matchrule/matchrule.go index c8c21a735..4e123fb4b 100644 --- a/cfg/matchrule/matchrule.go +++ b/cfg/matchrule/matchrule.go @@ -27,8 +27,8 @@ func (m *Mode) UnmarshalJSON(i []byte) error { return nil } -func ModeToString(m Mode) string { - switch m { +func (m *Mode) ToString() string { + switch *m { case ModeContains: return "contains" case ModePrefix: @@ -207,8 +207,8 @@ var ( condOrBytes = []byte(`"or"`) ) -func CondToString(c Cond) string { - switch c { +func (c *Cond) ToString() string { + switch *c { case CondAnd: return "and" case CondOr: diff --git a/pipeline/do_if/converter.go b/pipeline/do_if/converter.go index e9f667984..b79d6128b 100644 --- a/pipeline/do_if/converter.go +++ b/pipeline/do_if/converter.go @@ -10,7 +10,7 @@ import ( func RuleToNode(rule matchrule.Rule, dataTypeTag string) (Node, error) { values := arrStringToArrBytes(rule.Values) node, err := newStringOpNode( - matchrule.ModeToString(rule.Mode), + rule.Mode.ToString(), !rule.CaseInsensitive, values, "", @@ -46,5 +46,5 @@ func RuleSetToNode(ruleSet matchrule.RuleSet, dataTypeTag string) (Node, error) operands = append(operands, operand) } - return newLogicalNode(matchrule.CondToString(ruleSet.Cond), operands) + return newLogicalNode(ruleSet.Cond.ToString(), operands) } diff --git a/pipeline/do_if/converter_test.go b/pipeline/do_if/converter_test.go index a202d8dd9..0f36cdb09 100644 --- a/pipeline/do_if/converter_test.go +++ b/pipeline/do_if/converter_test.go @@ -29,7 +29,7 @@ func TestRuleSetToNode(t *testing.T) { require.NoError(t, err) logicNode := rawNode.(*logicalNode) - require.Equal(t, logicNode.op.String(), matchrule.CondToString(ruleSet.Cond)) + require.Equal(t, logicNode.op.String(), ruleSet.Cond.ToString()) require.Equal(t, len(logicNode.operands), len(ruleSet.Rules)) for i := range len(logicNode.operands) { @@ -65,7 +65,7 @@ func cmpRuleAndNode(t *testing.T, rule matchrule.Rule, node Node) { require.Equal(t, c.MinValLen, rule.GetMinValueSize()) require.Equal(t, c.MinValLen, rule.GetMinValueSize()) require.True(t, c.ValuesBySize == nil) - require.Equal(t, c.Op.String(), matchrule.ModeToString(rule.Mode)) + require.Equal(t, c.Op.String(), rule.Mode.ToString()) require.Equal(t, c.CaseSensitive, !rule.CaseInsensitive) require.Equal(t, c.Values, arrStringToArrBytes(rule.Values)) }