diff --git a/src/Substrait.Core/Expression/FieldReference.cs b/src/Substrait.Core/Expression/FieldReference.cs
new file mode 100644
index 0000000..e706972
--- /dev/null
+++ b/src/Substrait.Core/Expression/FieldReference.cs
@@ -0,0 +1,9 @@
+namespace Substrait.Expression
+{
+ ///
+ /// The FIELD_REFERENCE relational expression,
+ ///
+ public class FieldReference : RelExpression
+ {
+ }
+}
diff --git a/src/Substrait.Core/Expression/RelExpression.cs b/src/Substrait.Core/Expression/RelExpression.cs
new file mode 100644
index 0000000..b579d78
--- /dev/null
+++ b/src/Substrait.Core/Expression/RelExpression.cs
@@ -0,0 +1,14 @@
+using Substrait.Relation;
+
+namespace Substrait.Expression
+{
+ ///
+ /// Base type for all relational expressions,
+ ///
+ abstract public class RelExpression
+ {
+ public virtual void Accept(SubstraitRelVisitor visitor)
+ {
+ }
+ }
+}
diff --git a/src/Substrait.Core/ProtoConverter.cs b/src/Substrait.Core/ProtoConverter.cs
new file mode 100644
index 0000000..24f782d
--- /dev/null
+++ b/src/Substrait.Core/ProtoConverter.cs
@@ -0,0 +1,154 @@
+using Substrait.Expression;
+using Substrait.Protobuf;
+using Substrait.Relation;
+using System.Collections.Immutable;
+using ProtoRel = Substrait.Protobuf.Rel;
+using ProtoRex = Substrait.Protobuf.Expression;
+using Rel = Substrait.Relation.Rel;
+
+namespace Substrait.Core
+{
+ ///
+ /// Utility to convert Substrait protobuf messages to/from charp domain-objects.
+ ///
+ public class ProtoConverter
+ {
+ ///
+ /// Create domain object from a protobuf encoded message
+ ///
+ /// Relation encoded as protobuf message
+ /// corresponding to message's relation
+ public Rel ToRel(ProtoRel protoRel)
+ {
+ var relType = protoRel.RelTypeCase;
+
+ return relType switch
+ {
+ ProtoRel.RelTypeOneofCase.Aggregate => CreateAgg(protoRel.Aggregate),
+ ProtoRel.RelTypeOneofCase.Cross => CreateCross(protoRel.Cross),
+ ProtoRel.RelTypeOneofCase.Fetch => CreateFetch(protoRel.Fetch),
+ ProtoRel.RelTypeOneofCase.Filter => CreateFilter(protoRel.Filter),
+ ProtoRel.RelTypeOneofCase.Join => CreateJoin(protoRel.Join),
+ ProtoRel.RelTypeOneofCase.Project => CreateProject(protoRel.Project),
+ ProtoRel.RelTypeOneofCase.Read => CreateRead(protoRel.Read),
+ ProtoRel.RelTypeOneofCase.Sort => CreateSort(protoRel.Sort),
+ _ => throw new NotImplementedException(relType.ToString()),
+ };
+ }
+
+ ///
+ /// Create domain object specific to relational expression type from protobuf message
+ ///
+ /// Relational expression encoded as protobuf message
+ /// corresponding to message's expression type
+ private RelExpression ToExpression(ProtoRex protoRex)
+ {
+ var rexType = protoRex.RexTypeCase;
+
+ return rexType switch
+ {
+ ProtoRex.RexTypeOneofCase.Selection => CreateFieldReference(protoRex.Selection),
+ ProtoRex.RexTypeOneofCase.None => throw new NotImplementedException(),
+ ProtoRex.RexTypeOneofCase.Literal => throw new NotImplementedException(),
+ ProtoRex.RexTypeOneofCase.ScalarFunction => throw new NotImplementedException(),
+ ProtoRex.RexTypeOneofCase.WindowFunction => throw new NotImplementedException(),
+ ProtoRex.RexTypeOneofCase.IfThen => throw new NotImplementedException(),
+ ProtoRex.RexTypeOneofCase.SwitchExpression => throw new NotImplementedException(),
+ ProtoRex.RexTypeOneofCase.SingularOrList => throw new NotImplementedException(),
+ ProtoRex.RexTypeOneofCase.MultiOrList => throw new NotImplementedException(),
+ ProtoRex.RexTypeOneofCase.Cast => throw new NotImplementedException(),
+ ProtoRex.RexTypeOneofCase.Subquery => throw new NotImplementedException(),
+ ProtoRex.RexTypeOneofCase.Nested => throw new NotImplementedException(),
+ ProtoRex.RexTypeOneofCase.Enum => throw new NotImplementedException(),
+ _ => throw new NotImplementedException(rexType.ToString()),
+ };
+ }
+
+ private Rel CreateAgg(AggregateRel agg)
+ {
+ var input = ToRel(agg.Input);
+ var inputs = ImmutableList.Create(input);
+ return new Aggregate()
+ {
+ Inputs = inputs,
+ };
+ }
+
+ private Rel CreateCross(CrossRel cross)
+ {
+ var left = ToRel(cross.Left);
+ var right = ToRel(cross.Right);
+ var inputs = ImmutableList.Create(left, right);
+ return new Cross()
+ {
+ Inputs = inputs,
+ };
+ }
+
+ private Rel CreateFetch(FetchRel fetch)
+ {
+ var input = ToRel(fetch.Input);
+ var inputs = ImmutableList.Create(input);
+ return new Fetch()
+ {
+ Count = fetch.Count,
+ Inputs = inputs,
+ };
+ }
+
+ private Rel CreateFilter(FilterRel filter)
+ {
+ var input = ToRel(filter.Input);
+ var inputs = ImmutableList.Create(input);
+ return new Filter()
+ {
+ Inputs = inputs,
+ };
+ }
+
+ private Rel CreateJoin(JoinRel join)
+ {
+ var left = ToRel(join.Left);
+ var right = ToRel(join.Right);
+ var _ = ToExpression(join.Expression);
+ var inputs = ImmutableList.Create(left, right);
+ return new Join()
+ {
+ Inputs = inputs,
+ };
+ }
+
+ private Rel CreateProject(ProjectRel project)
+ {
+ var input = ToRel(project.Input);
+ var inputs = ImmutableList.Create(input);
+ return new Project()
+ {
+ Inputs = inputs,
+ };
+ }
+
+ private Rel CreateRead(ReadRel read)
+ {
+ return new Read()
+ {
+ Inputs = ImmutableList.Empty,
+ };
+ }
+
+ private Rel CreateSort(SortRel sort)
+ {
+ var input = ToRel(sort.Input);
+ var inputs = ImmutableList.Create(input);
+ return new Sort()
+ {
+ Inputs = inputs,
+ };
+ }
+
+ private RelExpression CreateFieldReference(ProtoRex.Types.FieldReference field)
+ {
+ return new FieldReference();
+ }
+ }
+}
diff --git a/src/Substrait.Core/Relation/Cross.cs b/src/Substrait.Core/Relation/Cross.cs
new file mode 100644
index 0000000..0cad61d
--- /dev/null
+++ b/src/Substrait.Core/Relation/Cross.cs
@@ -0,0 +1,9 @@
+namespace Substrait.Relation
+{
+ ///
+ /// The CROSS relational operator representing cartesian product,
+ ///
+ public class Cross : Rel
+ {
+ }
+}
diff --git a/src/Substrait.Core/Relation/Fetch.cs b/src/Substrait.Core/Relation/Fetch.cs
index 4ce5e36..84b1b18 100644
--- a/src/Substrait.Core/Relation/Fetch.cs
+++ b/src/Substrait.Core/Relation/Fetch.cs
@@ -5,5 +5,6 @@
///
public class Fetch : Rel
{
+ public long Count { get; init; }
}
}
diff --git a/src/Substrait.Core/Relation/Join.cs b/src/Substrait.Core/Relation/Join.cs
index 0289d8a..72dc802 100644
--- a/src/Substrait.Core/Relation/Join.cs
+++ b/src/Substrait.Core/Relation/Join.cs
@@ -1,9 +1,65 @@
-namespace Substrait.Relation
+using Substrait.Expression;
+using Substrait.Protobuf;
+
+namespace Substrait.Relation
{
///
/// The binary JOIN relational operator,
///
public class Join : Rel
{
+ public RelExpression? Condition { get; init; }
+ public RelExpression? PostJoinFilter { get; init; }
+ public JoinType JoinType { get; init; } = JoinType.UNKNOWN;
+
+ public override void Accept(SubstraitRelVisitor visitor)
+ {
+ base.Accept(visitor);
+ Condition?.Accept(visitor);
+ PostJoinFilter?.Accept(visitor);
+ }
+ }
+
+ public enum JoinType
+ {
+ ///
+ ///
+ ///
+ ANTI,
+
+ ///
+ ///
+ ///
+ INNER,
+
+ ///
+ ///
+ ///
+ LEFT,
+
+ ///
+ ///
+ ///
+ OUTER,
+
+ ///
+ ///
+ ///
+ RIGHT,
+
+ ///
+ ///
+ ///
+ SEMI,
+
+ ///
+ ///
+ ///
+ SINGLE,
+
+ ///
+ ///
+ ///
+ UNKNOWN
}
}
diff --git a/src/Substrait.Core/Relation/Rel.cs b/src/Substrait.Core/Relation/Rel.cs
index 8730c30..13fbfe5 100644
--- a/src/Substrait.Core/Relation/Rel.cs
+++ b/src/Substrait.Core/Relation/Rel.cs
@@ -5,5 +5,17 @@
///
abstract public class Rel
{
+ ///
+ /// Input relations to this relation
+ ///
+ public IList Inputs { get; init; } = new List();
+
+ public virtual void Accept(SubstraitRelVisitor visitor)
+ {
+ foreach (var input in Inputs)
+ {
+ input.Accept(visitor);
+ }
+ }
}
}
diff --git a/src/Substrait.Core/SubstraitRelVisitor.cs b/src/Substrait.Core/SubstraitRelVisitor.cs
index 3a48f12..361fd69 100644
--- a/src/Substrait.Core/SubstraitRelVisitor.cs
+++ b/src/Substrait.Core/SubstraitRelVisitor.cs
@@ -1,6 +1,4 @@
-using Substrait.Relation;
-
-namespace Substrait.Core
+namespace Substrait.Relation
{
///
/// Visitor to transform, compile, and/or process SQL logical operators represented using Substrait. The visitor has
@@ -99,7 +97,7 @@ public void Visit(Sort sort)
Fallback(sort);
}
- public void Fallback(Rel _)
+ protected void Fallback(object _)
{
throw new InvalidOperationException();
}
diff --git a/src/Substrait.Lineage/LineageExtractorRelVisitor.cs b/src/Substrait.Lineage/LineageExtractorRelVisitor.cs
new file mode 100644
index 0000000..8af2dc4
--- /dev/null
+++ b/src/Substrait.Lineage/LineageExtractorRelVisitor.cs
@@ -0,0 +1,20 @@
+using Substrait.Core;
+using Substrait.Relation;
+using ProtoRel = Substrait.Protobuf.Rel;
+
+namespace Substrait.Lineage
+{
+ ///
+ /// A concrete implementation of for processing Substrait relations and extracting
+ /// lineage information.
+ ///
+ public class LineageExtractorRelVisitor : SubstraitRelVisitor
+ {
+ public void Execute(ProtoRel protoRel)
+ {
+ ProtoConverter converter = new();
+ var rel = converter.ToRel(protoRel);
+ rel.Accept(this);
+ }
+ }
+}
diff --git a/src/Substrait.Lineage/Substrait.Lineage.csproj b/src/Substrait.Lineage/Substrait.Lineage.csproj
new file mode 100644
index 0000000..c93dd2c
--- /dev/null
+++ b/src/Substrait.Lineage/Substrait.Lineage.csproj
@@ -0,0 +1,13 @@
+
+
+
+ net6.0
+ enable
+ enable
+
+
+
+
+
+
+
diff --git a/substrait-csharp.sln b/substrait-csharp.sln
index 7efe320..42989cf 100644
--- a/substrait-csharp.sln
+++ b/substrait-csharp.sln
@@ -5,6 +5,12 @@ VisualStudioVersion = 17.3.33027.108
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Substrait.Core", "src\Substrait.Core\Substrait.Core.csproj", "{C33A4305-4A80-4A92-AEF5-FB304D8BA6C7}"
EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Substrait.Lineage", "src\Substrait.Lineage\Substrait.Lineage.csproj", "{365DAF8C-5336-4845-A297-38584F4DB328}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Substrait.Lineage.Tests", "tests\Substrait.Lineage.Tests\Substrait.Lineage.Tests.csproj", "{060D3D24-9395-4C8D-8C0C-95EDF2A86931}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Substrait.Core.Tests", "tests\Substrait.Core.Tests\Substrait.Core.Tests.csproj", "{590A26FC-0099-4A5E-8833-8D6CF5814C51}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -15,6 +21,18 @@ Global
{C33A4305-4A80-4A92-AEF5-FB304D8BA6C7}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C33A4305-4A80-4A92-AEF5-FB304D8BA6C7}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C33A4305-4A80-4A92-AEF5-FB304D8BA6C7}.Release|Any CPU.Build.0 = Release|Any CPU
+ {365DAF8C-5336-4845-A297-38584F4DB328}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {365DAF8C-5336-4845-A297-38584F4DB328}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {365DAF8C-5336-4845-A297-38584F4DB328}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {365DAF8C-5336-4845-A297-38584F4DB328}.Release|Any CPU.Build.0 = Release|Any CPU
+ {060D3D24-9395-4C8D-8C0C-95EDF2A86931}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {060D3D24-9395-4C8D-8C0C-95EDF2A86931}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {060D3D24-9395-4C8D-8C0C-95EDF2A86931}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {060D3D24-9395-4C8D-8C0C-95EDF2A86931}.Release|Any CPU.Build.0 = Release|Any CPU
+ {590A26FC-0099-4A5E-8833-8D6CF5814C51}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {590A26FC-0099-4A5E-8833-8D6CF5814C51}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {590A26FC-0099-4A5E-8833-8D6CF5814C51}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {590A26FC-0099-4A5E-8833-8D6CF5814C51}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/tests/Substrait.Core.Tests/Relation/ProtoConverterTests.cs b/tests/Substrait.Core.Tests/Relation/ProtoConverterTests.cs
new file mode 100644
index 0000000..44f8910
--- /dev/null
+++ b/tests/Substrait.Core.Tests/Relation/ProtoConverterTests.cs
@@ -0,0 +1,27 @@
+using Substrait.Core;
+using Substrait.Protobuf;
+
+namespace Substrait.Relation.Tests
+{
+ [TestClass()]
+ public class ProtoConverterTests
+ {
+ [TestMethod()]
+ public void ToRel_FetchRel_Test()
+ {
+ FetchRel fetchRel = new FetchRel();
+ fetchRel.Count = 1; // limit 1
+ var rel = new Protobuf.Rel()
+ {
+ Fetch = fetchRel
+ };
+
+ fetchRel.Input = rel;
+
+ ProtoConverter converter = new();
+ var result = converter.ToRel(rel) as Fetch;
+ Assert.IsNotNull(result);
+ Assert.AreEqual(1, result.Count);
+ }
+ }
+}
\ No newline at end of file
diff --git a/tests/Substrait.Core.Tests/Substrait.Core.Tests.csproj b/tests/Substrait.Core.Tests/Substrait.Core.Tests.csproj
new file mode 100644
index 0000000..29b0b9a
--- /dev/null
+++ b/tests/Substrait.Core.Tests/Substrait.Core.Tests.csproj
@@ -0,0 +1,22 @@
+
+
+
+ net6.0
+ enable
+ enable
+
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/Substrait.Core.Tests/Usings.cs b/tests/Substrait.Core.Tests/Usings.cs
new file mode 100644
index 0000000..ab67c7e
--- /dev/null
+++ b/tests/Substrait.Core.Tests/Usings.cs
@@ -0,0 +1 @@
+global using Microsoft.VisualStudio.TestTools.UnitTesting;
\ No newline at end of file
diff --git a/tests/Substrait.Lineage.Tests/LineageExtractorVisitorTest.cs b/tests/Substrait.Lineage.Tests/LineageExtractorVisitorTest.cs
new file mode 100644
index 0000000..04927d1
--- /dev/null
+++ b/tests/Substrait.Lineage.Tests/LineageExtractorVisitorTest.cs
@@ -0,0 +1,19 @@
+using Substrait.Protobuf;
+
+namespace Substrait.Lineage.Tests
+{
+ [TestClass]
+ public class LineageExtractorVisitorTest
+ {
+ [TestMethod]
+ public void TestMethod1()
+ {
+ Plan plan;
+ using var planPb = File.OpenRead(@"TestData\tpcds\Q02.pb");
+ plan = Plan.Parser.ParseFrom(planPb);
+
+ var lineageVisitor = new LineageExtractorRelVisitor();
+ lineageVisitor.Execute(plan.Relations[0].Root.Input);
+ }
+ }
+}
\ No newline at end of file
diff --git a/tests/Substrait.Lineage.Tests/Substrait.Lineage.Tests.csproj b/tests/Substrait.Lineage.Tests/Substrait.Lineage.Tests.csproj
new file mode 100644
index 0000000..5ab0847
--- /dev/null
+++ b/tests/Substrait.Lineage.Tests/Substrait.Lineage.Tests.csproj
@@ -0,0 +1,27 @@
+
+
+
+ net6.0
+ enable
+ enable
+
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/Substrait.Lineage.Tests/TestData/tpcds/Q02.pb b/tests/Substrait.Lineage.Tests/TestData/tpcds/Q02.pb
new file mode 100644
index 0000000..721a3a4
Binary files /dev/null and b/tests/Substrait.Lineage.Tests/TestData/tpcds/Q02.pb differ
diff --git a/tests/Substrait.Lineage.Tests/Usings.cs b/tests/Substrait.Lineage.Tests/Usings.cs
new file mode 100644
index 0000000..ab67c7e
--- /dev/null
+++ b/tests/Substrait.Lineage.Tests/Usings.cs
@@ -0,0 +1 @@
+global using Microsoft.VisualStudio.TestTools.UnitTesting;
\ No newline at end of file