diff --git a/src/Substrait.Core/Expression/FieldReference.cs b/src/Substrait.Core/Expression/FieldReference.cs new file mode 100644 index 0000000..e706972 --- /dev/null +++ b/src/Substrait.Core/Expression/FieldReference.cs @@ -0,0 +1,9 @@ +namespace Substrait.Expression +{ + /// + /// The FIELD_REFERENCE relational expression, + /// + public class FieldReference : RelExpression + { + } +} diff --git a/src/Substrait.Core/Expression/RelExpression.cs b/src/Substrait.Core/Expression/RelExpression.cs new file mode 100644 index 0000000..b579d78 --- /dev/null +++ b/src/Substrait.Core/Expression/RelExpression.cs @@ -0,0 +1,14 @@ +using Substrait.Relation; + +namespace Substrait.Expression +{ + /// + /// Base type for all relational expressions, + /// + abstract public class RelExpression + { + public virtual void Accept(SubstraitRelVisitor visitor) + { + } + } +} diff --git a/src/Substrait.Core/ProtoConverter.cs b/src/Substrait.Core/ProtoConverter.cs new file mode 100644 index 0000000..24f782d --- /dev/null +++ b/src/Substrait.Core/ProtoConverter.cs @@ -0,0 +1,154 @@ +using Substrait.Expression; +using Substrait.Protobuf; +using Substrait.Relation; +using System.Collections.Immutable; +using ProtoRel = Substrait.Protobuf.Rel; +using ProtoRex = Substrait.Protobuf.Expression; +using Rel = Substrait.Relation.Rel; + +namespace Substrait.Core +{ + /// + /// Utility to convert Substrait protobuf messages to/from charp domain-objects. + /// + public class ProtoConverter + { + /// + /// Create domain object from a protobuf encoded message + /// + /// Relation encoded as protobuf message + /// corresponding to message's relation + public Rel ToRel(ProtoRel protoRel) + { + var relType = protoRel.RelTypeCase; + + return relType switch + { + ProtoRel.RelTypeOneofCase.Aggregate => CreateAgg(protoRel.Aggregate), + ProtoRel.RelTypeOneofCase.Cross => CreateCross(protoRel.Cross), + ProtoRel.RelTypeOneofCase.Fetch => CreateFetch(protoRel.Fetch), + ProtoRel.RelTypeOneofCase.Filter => CreateFilter(protoRel.Filter), + ProtoRel.RelTypeOneofCase.Join => CreateJoin(protoRel.Join), + ProtoRel.RelTypeOneofCase.Project => CreateProject(protoRel.Project), + ProtoRel.RelTypeOneofCase.Read => CreateRead(protoRel.Read), + ProtoRel.RelTypeOneofCase.Sort => CreateSort(protoRel.Sort), + _ => throw new NotImplementedException(relType.ToString()), + }; + } + + /// + /// Create domain object specific to relational expression type from protobuf message + /// + /// Relational expression encoded as protobuf message + /// corresponding to message's expression type + private RelExpression ToExpression(ProtoRex protoRex) + { + var rexType = protoRex.RexTypeCase; + + return rexType switch + { + ProtoRex.RexTypeOneofCase.Selection => CreateFieldReference(protoRex.Selection), + ProtoRex.RexTypeOneofCase.None => throw new NotImplementedException(), + ProtoRex.RexTypeOneofCase.Literal => throw new NotImplementedException(), + ProtoRex.RexTypeOneofCase.ScalarFunction => throw new NotImplementedException(), + ProtoRex.RexTypeOneofCase.WindowFunction => throw new NotImplementedException(), + ProtoRex.RexTypeOneofCase.IfThen => throw new NotImplementedException(), + ProtoRex.RexTypeOneofCase.SwitchExpression => throw new NotImplementedException(), + ProtoRex.RexTypeOneofCase.SingularOrList => throw new NotImplementedException(), + ProtoRex.RexTypeOneofCase.MultiOrList => throw new NotImplementedException(), + ProtoRex.RexTypeOneofCase.Cast => throw new NotImplementedException(), + ProtoRex.RexTypeOneofCase.Subquery => throw new NotImplementedException(), + ProtoRex.RexTypeOneofCase.Nested => throw new NotImplementedException(), + ProtoRex.RexTypeOneofCase.Enum => throw new NotImplementedException(), + _ => throw new NotImplementedException(rexType.ToString()), + }; + } + + private Rel CreateAgg(AggregateRel agg) + { + var input = ToRel(agg.Input); + var inputs = ImmutableList.Create(input); + return new Aggregate() + { + Inputs = inputs, + }; + } + + private Rel CreateCross(CrossRel cross) + { + var left = ToRel(cross.Left); + var right = ToRel(cross.Right); + var inputs = ImmutableList.Create(left, right); + return new Cross() + { + Inputs = inputs, + }; + } + + private Rel CreateFetch(FetchRel fetch) + { + var input = ToRel(fetch.Input); + var inputs = ImmutableList.Create(input); + return new Fetch() + { + Count = fetch.Count, + Inputs = inputs, + }; + } + + private Rel CreateFilter(FilterRel filter) + { + var input = ToRel(filter.Input); + var inputs = ImmutableList.Create(input); + return new Filter() + { + Inputs = inputs, + }; + } + + private Rel CreateJoin(JoinRel join) + { + var left = ToRel(join.Left); + var right = ToRel(join.Right); + var _ = ToExpression(join.Expression); + var inputs = ImmutableList.Create(left, right); + return new Join() + { + Inputs = inputs, + }; + } + + private Rel CreateProject(ProjectRel project) + { + var input = ToRel(project.Input); + var inputs = ImmutableList.Create(input); + return new Project() + { + Inputs = inputs, + }; + } + + private Rel CreateRead(ReadRel read) + { + return new Read() + { + Inputs = ImmutableList.Empty, + }; + } + + private Rel CreateSort(SortRel sort) + { + var input = ToRel(sort.Input); + var inputs = ImmutableList.Create(input); + return new Sort() + { + Inputs = inputs, + }; + } + + private RelExpression CreateFieldReference(ProtoRex.Types.FieldReference field) + { + return new FieldReference(); + } + } +} diff --git a/src/Substrait.Core/Relation/Cross.cs b/src/Substrait.Core/Relation/Cross.cs new file mode 100644 index 0000000..0cad61d --- /dev/null +++ b/src/Substrait.Core/Relation/Cross.cs @@ -0,0 +1,9 @@ +namespace Substrait.Relation +{ + /// + /// The CROSS relational operator representing cartesian product, + /// + public class Cross : Rel + { + } +} diff --git a/src/Substrait.Core/Relation/Fetch.cs b/src/Substrait.Core/Relation/Fetch.cs index 4ce5e36..84b1b18 100644 --- a/src/Substrait.Core/Relation/Fetch.cs +++ b/src/Substrait.Core/Relation/Fetch.cs @@ -5,5 +5,6 @@ /// public class Fetch : Rel { + public long Count { get; init; } } } diff --git a/src/Substrait.Core/Relation/Join.cs b/src/Substrait.Core/Relation/Join.cs index 0289d8a..72dc802 100644 --- a/src/Substrait.Core/Relation/Join.cs +++ b/src/Substrait.Core/Relation/Join.cs @@ -1,9 +1,65 @@ -namespace Substrait.Relation +using Substrait.Expression; +using Substrait.Protobuf; + +namespace Substrait.Relation { /// /// The binary JOIN relational operator, /// public class Join : Rel { + public RelExpression? Condition { get; init; } + public RelExpression? PostJoinFilter { get; init; } + public JoinType JoinType { get; init; } = JoinType.UNKNOWN; + + public override void Accept(SubstraitRelVisitor visitor) + { + base.Accept(visitor); + Condition?.Accept(visitor); + PostJoinFilter?.Accept(visitor); + } + } + + public enum JoinType + { + /// + /// + /// + ANTI, + + /// + /// + /// + INNER, + + /// + /// + /// + LEFT, + + /// + /// + /// + OUTER, + + /// + /// + /// + RIGHT, + + /// + /// + /// + SEMI, + + /// + /// + /// + SINGLE, + + /// + /// + /// + UNKNOWN } } diff --git a/src/Substrait.Core/Relation/Rel.cs b/src/Substrait.Core/Relation/Rel.cs index 8730c30..13fbfe5 100644 --- a/src/Substrait.Core/Relation/Rel.cs +++ b/src/Substrait.Core/Relation/Rel.cs @@ -5,5 +5,17 @@ /// abstract public class Rel { + /// + /// Input relations to this relation + /// + public IList Inputs { get; init; } = new List(); + + public virtual void Accept(SubstraitRelVisitor visitor) + { + foreach (var input in Inputs) + { + input.Accept(visitor); + } + } } } diff --git a/src/Substrait.Core/SubstraitRelVisitor.cs b/src/Substrait.Core/SubstraitRelVisitor.cs index 3a48f12..361fd69 100644 --- a/src/Substrait.Core/SubstraitRelVisitor.cs +++ b/src/Substrait.Core/SubstraitRelVisitor.cs @@ -1,6 +1,4 @@ -using Substrait.Relation; - -namespace Substrait.Core +namespace Substrait.Relation { /// /// Visitor to transform, compile, and/or process SQL logical operators represented using Substrait. The visitor has @@ -99,7 +97,7 @@ public void Visit(Sort sort) Fallback(sort); } - public void Fallback(Rel _) + protected void Fallback(object _) { throw new InvalidOperationException(); } diff --git a/src/Substrait.Lineage/LineageExtractorRelVisitor.cs b/src/Substrait.Lineage/LineageExtractorRelVisitor.cs new file mode 100644 index 0000000..8af2dc4 --- /dev/null +++ b/src/Substrait.Lineage/LineageExtractorRelVisitor.cs @@ -0,0 +1,20 @@ +using Substrait.Core; +using Substrait.Relation; +using ProtoRel = Substrait.Protobuf.Rel; + +namespace Substrait.Lineage +{ + /// + /// A concrete implementation of for processing Substrait relations and extracting + /// lineage information. + /// + public class LineageExtractorRelVisitor : SubstraitRelVisitor + { + public void Execute(ProtoRel protoRel) + { + ProtoConverter converter = new(); + var rel = converter.ToRel(protoRel); + rel.Accept(this); + } + } +} diff --git a/src/Substrait.Lineage/Substrait.Lineage.csproj b/src/Substrait.Lineage/Substrait.Lineage.csproj new file mode 100644 index 0000000..c93dd2c --- /dev/null +++ b/src/Substrait.Lineage/Substrait.Lineage.csproj @@ -0,0 +1,13 @@ + + + + net6.0 + enable + enable + + + + + + + diff --git a/substrait-csharp.sln b/substrait-csharp.sln index 7efe320..42989cf 100644 --- a/substrait-csharp.sln +++ b/substrait-csharp.sln @@ -5,6 +5,12 @@ VisualStudioVersion = 17.3.33027.108 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Substrait.Core", "src\Substrait.Core\Substrait.Core.csproj", "{C33A4305-4A80-4A92-AEF5-FB304D8BA6C7}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Substrait.Lineage", "src\Substrait.Lineage\Substrait.Lineage.csproj", "{365DAF8C-5336-4845-A297-38584F4DB328}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Substrait.Lineage.Tests", "tests\Substrait.Lineage.Tests\Substrait.Lineage.Tests.csproj", "{060D3D24-9395-4C8D-8C0C-95EDF2A86931}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Substrait.Core.Tests", "tests\Substrait.Core.Tests\Substrait.Core.Tests.csproj", "{590A26FC-0099-4A5E-8833-8D6CF5814C51}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -15,6 +21,18 @@ Global {C33A4305-4A80-4A92-AEF5-FB304D8BA6C7}.Debug|Any CPU.Build.0 = Debug|Any CPU {C33A4305-4A80-4A92-AEF5-FB304D8BA6C7}.Release|Any CPU.ActiveCfg = Release|Any CPU {C33A4305-4A80-4A92-AEF5-FB304D8BA6C7}.Release|Any CPU.Build.0 = Release|Any CPU + {365DAF8C-5336-4845-A297-38584F4DB328}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {365DAF8C-5336-4845-A297-38584F4DB328}.Debug|Any CPU.Build.0 = Debug|Any CPU + {365DAF8C-5336-4845-A297-38584F4DB328}.Release|Any CPU.ActiveCfg = Release|Any CPU + {365DAF8C-5336-4845-A297-38584F4DB328}.Release|Any CPU.Build.0 = Release|Any CPU + {060D3D24-9395-4C8D-8C0C-95EDF2A86931}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {060D3D24-9395-4C8D-8C0C-95EDF2A86931}.Debug|Any CPU.Build.0 = Debug|Any CPU + {060D3D24-9395-4C8D-8C0C-95EDF2A86931}.Release|Any CPU.ActiveCfg = Release|Any CPU + {060D3D24-9395-4C8D-8C0C-95EDF2A86931}.Release|Any CPU.Build.0 = Release|Any CPU + {590A26FC-0099-4A5E-8833-8D6CF5814C51}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {590A26FC-0099-4A5E-8833-8D6CF5814C51}.Debug|Any CPU.Build.0 = Debug|Any CPU + {590A26FC-0099-4A5E-8833-8D6CF5814C51}.Release|Any CPU.ActiveCfg = Release|Any CPU + {590A26FC-0099-4A5E-8833-8D6CF5814C51}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/tests/Substrait.Core.Tests/Relation/ProtoConverterTests.cs b/tests/Substrait.Core.Tests/Relation/ProtoConverterTests.cs new file mode 100644 index 0000000..44f8910 --- /dev/null +++ b/tests/Substrait.Core.Tests/Relation/ProtoConverterTests.cs @@ -0,0 +1,27 @@ +using Substrait.Core; +using Substrait.Protobuf; + +namespace Substrait.Relation.Tests +{ + [TestClass()] + public class ProtoConverterTests + { + [TestMethod()] + public void ToRel_FetchRel_Test() + { + FetchRel fetchRel = new FetchRel(); + fetchRel.Count = 1; // limit 1 + var rel = new Protobuf.Rel() + { + Fetch = fetchRel + }; + + fetchRel.Input = rel; + + ProtoConverter converter = new(); + var result = converter.ToRel(rel) as Fetch; + Assert.IsNotNull(result); + Assert.AreEqual(1, result.Count); + } + } +} \ No newline at end of file diff --git a/tests/Substrait.Core.Tests/Substrait.Core.Tests.csproj b/tests/Substrait.Core.Tests/Substrait.Core.Tests.csproj new file mode 100644 index 0000000..29b0b9a --- /dev/null +++ b/tests/Substrait.Core.Tests/Substrait.Core.Tests.csproj @@ -0,0 +1,22 @@ + + + + net6.0 + enable + enable + + false + + + + + + + + + + + + + + diff --git a/tests/Substrait.Core.Tests/Usings.cs b/tests/Substrait.Core.Tests/Usings.cs new file mode 100644 index 0000000..ab67c7e --- /dev/null +++ b/tests/Substrait.Core.Tests/Usings.cs @@ -0,0 +1 @@ +global using Microsoft.VisualStudio.TestTools.UnitTesting; \ No newline at end of file diff --git a/tests/Substrait.Lineage.Tests/LineageExtractorVisitorTest.cs b/tests/Substrait.Lineage.Tests/LineageExtractorVisitorTest.cs new file mode 100644 index 0000000..04927d1 --- /dev/null +++ b/tests/Substrait.Lineage.Tests/LineageExtractorVisitorTest.cs @@ -0,0 +1,19 @@ +using Substrait.Protobuf; + +namespace Substrait.Lineage.Tests +{ + [TestClass] + public class LineageExtractorVisitorTest + { + [TestMethod] + public void TestMethod1() + { + Plan plan; + using var planPb = File.OpenRead(@"TestData\tpcds\Q02.pb"); + plan = Plan.Parser.ParseFrom(planPb); + + var lineageVisitor = new LineageExtractorRelVisitor(); + lineageVisitor.Execute(plan.Relations[0].Root.Input); + } + } +} \ No newline at end of file diff --git a/tests/Substrait.Lineage.Tests/Substrait.Lineage.Tests.csproj b/tests/Substrait.Lineage.Tests/Substrait.Lineage.Tests.csproj new file mode 100644 index 0000000..5ab0847 --- /dev/null +++ b/tests/Substrait.Lineage.Tests/Substrait.Lineage.Tests.csproj @@ -0,0 +1,27 @@ + + + + net6.0 + enable + enable + + false + + + + + + + + + + + + + + + + + + + diff --git a/tests/Substrait.Lineage.Tests/TestData/tpcds/Q02.pb b/tests/Substrait.Lineage.Tests/TestData/tpcds/Q02.pb new file mode 100644 index 0000000..721a3a4 Binary files /dev/null and b/tests/Substrait.Lineage.Tests/TestData/tpcds/Q02.pb differ diff --git a/tests/Substrait.Lineage.Tests/Usings.cs b/tests/Substrait.Lineage.Tests/Usings.cs new file mode 100644 index 0000000..ab67c7e --- /dev/null +++ b/tests/Substrait.Lineage.Tests/Usings.cs @@ -0,0 +1 @@ +global using Microsoft.VisualStudio.TestTools.UnitTesting; \ No newline at end of file