Skip to content

Commit

Permalink
Join DValue equality bug fix (#2799)
Browse files Browse the repository at this point in the history
Issue #2794.
  • Loading branch information
anderson-joyle authored Jan 2, 2025
1 parent ccaf7c6 commit 96d55b0
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -546,16 +546,16 @@ public static async ValueTask<FormulaValue> JoinTables(EvalVisitor runner, EvalV
switch (joinType.Option)
{
case "Full":
rows = await LazyJoinAsync(runner, context, leftTable, rightTable, predicate, outerLeft: true, outerRight: true, scopeNameResolver, leftRenaming, rightRenaming).ConfigureAwait(false);
rows = await LazyJoinAsync(runner, context, irContext, leftTable, rightTable, predicate, outerLeft: true, outerRight: true, scopeNameResolver, leftRenaming, rightRenaming).ConfigureAwait(false);
break;
case "Inner":
rows = await LazyJoinAsync(runner, context, leftTable, rightTable, predicate, outerLeft: false, outerRight: false, scopeNameResolver, leftRenaming, rightRenaming).ConfigureAwait(false);
rows = await LazyJoinAsync(runner, context, irContext, leftTable, rightTable, predicate, outerLeft: false, outerRight: false, scopeNameResolver, leftRenaming, rightRenaming).ConfigureAwait(false);
break;
case "Left":
rows = await LazyJoinAsync(runner, context, leftTable, rightTable, predicate, outerLeft: true, outerRight: false, scopeNameResolver, leftRenaming, rightRenaming).ConfigureAwait(false);
rows = await LazyJoinAsync(runner, context, irContext, leftTable, rightTable, predicate, outerLeft: true, outerRight: false, scopeNameResolver, leftRenaming, rightRenaming).ConfigureAwait(false);
break;
case "Right":
rows = await LazyJoinAsync(runner, context, leftTable, rightTable, predicate, outerLeft: false, outerRight: true, scopeNameResolver, leftRenaming, rightRenaming).ConfigureAwait(false);
rows = await LazyJoinAsync(runner, context, irContext, leftTable, rightTable, predicate, outerLeft: false, outerRight: true, scopeNameResolver, leftRenaming, rightRenaming).ConfigureAwait(false);
break;
default:
throw new InvalidOperationException();
Expand All @@ -566,7 +566,8 @@ public static async ValueTask<FormulaValue> JoinTables(EvalVisitor runner, EvalV

private static async Task<DValue<RecordValue>[]> LazyJoinAsync(
EvalVisitor runner,
EvalVisitorContext context,
EvalVisitorContext context,
IRContext irContext,
TableValue leftSource,
TableValue rightSource,
LambdaFormulaValue predicate,
Expand All @@ -576,15 +577,13 @@ private static async Task<DValue<RecordValue>[]> LazyJoinAsync(
RecordValue leftRenaming,
RecordValue rightRenaming)
{
var innerRows = new List<DValue<RecordValue>>();
var outerDict = new Dictionary<DValue<RecordValue>, bool>();

// Keep track of which rows have been included in the inner join.
// Using a dictionary to avoid duplicates and to make it easier to check if a row is in the inner join.
var innerDict = new Dictionary<DValue<RecordValue>, bool>();
var resultRows = new List<DValue<RecordValue>>();
var recordContext = IRContext.NotInSource(((TableType)irContext.ResultType).ToRecord());

// Inner and Left loops
foreach (var leftRow in leftSource.Rows)
{
var hasMatch = false;
foreach (var rightRow in rightSource.Rows)
{
runner.CheckCancel();
Expand All @@ -596,68 +595,60 @@ private static async Task<DValue<RecordValue>[]> LazyJoinAsync(
if (result.IsValue)
{
var fields = new List<NamedValue>();
var leftRenamed = (RecordValue)await RenameColumns(runner, context, IRContext.NotInSource(leftRow.Value.Type), BuildRenamingArgs(leftRow.Value, leftRenaming)).ConfigureAwait(false);

foreach (var field in leftRenamed.OriginalFields)
{
fields.Add(new NamedValue(field.Name, field.Value));
}
var leftRecordValueRenamed = (RecordValue)await RenameColumns(runner, context, recordContext, BuildRenamingArgs(leftRow.Value, leftRenaming)).ConfigureAwait(false);

foreach (var field in rightRenaming.OriginalFields)
{
var fieldName = (StringValue)field.Value;
fields.Add(new NamedValue(fieldName.Value, rightRow.Value.GetField(field.Name)));
}
fields.AddRange(leftRecordValueRenamed.OriginalFields);
fields.AddRange(rightRenaming.OriginalFields.Select(field => new NamedValue(((StringValue)field.Value).Value, rightRow.Value.GetField(field.Name))));

innerRows.Add(DValue<RecordValue>.Of(FormulaValue.NewRecordFromFields(fields)));
resultRows.Add(DValue<RecordValue>.Of(FormulaValue.NewRecordFromFields(fields)));
}
else if (result.IsError)
{
// Lambda evaluation resulted in an error. Include the error.
innerRows.Add(result);
resultRows.Add(result);
}

innerDict[leftRow] = true;
innerDict[rightRow] = true;
hasMatch = true;
}
}

if (outerLeft)
{
if (outerDict.ContainsKey(leftRow))
{
outerDict.Remove(leftRow);
}
}
if (!hasMatch && outerLeft)
{
// leftRow is value since it would have been added to the resultRows in the inner loop.
var recordValueRenamed = (RecordValue)await RenameColumns(runner, context, recordContext, BuildRenamingArgs(leftRow.Value, leftRenaming)).ConfigureAwait(false);
resultRows.Add(DValue<RecordValue>.Of(recordValueRenamed));
}
}

if (outerRight)
{
if (outerDict.ContainsKey(rightRow))
{
outerDict.Remove(rightRow);
}
}
// Right loop
if (outerRight)
{
foreach (var rightRow in rightSource.Rows)
{
var hasMatch = false;
foreach (var leftRow in leftSource.Rows)
{
runner.CheckCancel();

continue;
}
var result = await LazyCheckPredicateAsync(runner, context, scopeNameResolver, leftRow, rightRow, predicate).ConfigureAwait(false);

if (outerLeft)
{
if (!innerDict.ContainsKey(leftRow))
if (result != null)
{
outerDict[leftRow] = true;
hasMatch = true;
break;
}
}

if (outerRight)
if (!hasMatch)
{
if (!innerDict.ContainsKey(rightRow))
{
outerDict[rightRow] = true;
}
// rightRow is value since it would have been added to the resultRows in the inner loop.
var recorValueRenamed = (RecordValue)await RenameColumns(runner, context, recordContext, BuildRenamingArgs(rightRow.Value, rightRenaming)).ConfigureAwait(false);
resultRows.Add(DValue<RecordValue>.Of(recorValueRenamed));
}
}
}
}

return innerRows.Concat(outerDict.Keys).ToArray();
return resultRows.ToArray();
}

private static FormulaValue[] BuildRenamingArgs(RecordValue row, RecordValue renaming)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ Errors: Error 61-63: The '{ a:1 }' value is invalid in this context. It should b
Errors: Error 49-51: The '{ a:1 }' value is invalid in this context. It should be a reference to either 'LeftRecord' or 'RightRecord' scope name.|Error 5-12: At least one 'RightRecord' column must be declared to compose the join result.|Error 0-4: The function 'Join' has some invalid arguments.

>> Join([10,20], Table({Value:20}, Blank()), LeftRecord.Value=RightRecord.Value, JoinType.Left, RightRecord.Value As R2)
Table({R2:20,Value:20},{R2:Blank(),Value:10})
Table({R2:Blank(),Value:10},{R2:20,Value:20})

>> Join([10,20], Table({Value:20}, If(1/0, {Value:25})), LeftRecord.Value=RightRecord.Value, JoinType.Left, RightRecord.Value As R2)
Table(Error({Kind:ErrorKind.Div0}),{R2:20,Value:20},Error({Kind:ErrorKind.Div0}))
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ Table({Country:"USA",Id:"AAA111",Name:"Contoso",RowId:1},{Country:"USA",Id:"AAA1

// Full
>> Join(t1, t2, LeftRecord.Id = RightRecord.SupplierId, JoinType.Full, RightRecord.RowId As RowId)
Table({Country:"USA",Id:"AAA111",Name:"Contoso",RowId:1},{Country:"USA",Id:"AAA111",Name:"Contoso",RowId:3},{Country:"USA",Id:"AAA111",Name:"Contoso",RowId:4},{Country:"USA",Id:"AAA111",Name:"Contoso",RowId:6},{Country:"CAN",Id:"BBB222",Name:"Fabrikam",RowId:2},{Country:"CAN",Id:"BBB222",Name:"Fabrikam",RowId:5},{Country:"USA",Id:"FFF666",Name:"Dunder Mifflin",RowId:Blank()},{Country:"MEX",Id:"EEE555",Name:"ACME INC.",RowId:Blank()},{Country:Blank(),Id:Blank(),Name:Blank(),RowId:7},{Country:Blank(),Id:Blank(),Name:Blank(),RowId:8})
Table({Country:"USA",Id:"AAA111",Name:"Contoso",RowId:1},{Country:"USA",Id:"AAA111",Name:"Contoso",RowId:3},{Country:"USA",Id:"AAA111",Name:"Contoso",RowId:4},{Country:"USA",Id:"AAA111",Name:"Contoso",RowId:6},{Country:"CAN",Id:"BBB222",Name:"Fabrikam",RowId:2},{Country:"CAN",Id:"BBB222",Name:"Fabrikam",RowId:5},{Country:"MEX",Id:"EEE555",Name:"ACME INC.",RowId:Blank()},{Country:"USA",Id:"FFF666",Name:"Dunder Mifflin",RowId:Blank()},{Country:Blank(),Id:Blank(),Name:Blank(),RowId:7},{Country:Blank(),Id:Blank(),Name:Blank(),RowId:8})

>> Join(t1 As x1, t2 As x2, x1.Id = x2.SupplierId, JoinType.Full, x2.RowId As RowId)
Table({Country:"USA",Id:"AAA111",Name:"Contoso",RowId:1},{Country:"USA",Id:"AAA111",Name:"Contoso",RowId:3},{Country:"USA",Id:"AAA111",Name:"Contoso",RowId:4},{Country:"USA",Id:"AAA111",Name:"Contoso",RowId:6},{Country:"CAN",Id:"BBB222",Name:"Fabrikam",RowId:2},{Country:"CAN",Id:"BBB222",Name:"Fabrikam",RowId:5},{Country:"USA",Id:"FFF666",Name:"Dunder Mifflin",RowId:Blank()},{Country:"MEX",Id:"EEE555",Name:"ACME INC.",RowId:Blank()},{Country:Blank(),Id:Blank(),Name:Blank(),RowId:7},{Country:Blank(),Id:Blank(),Name:Blank(),RowId:8})
Table({Country:"USA",Id:"AAA111",Name:"Contoso",RowId:1},{Country:"USA",Id:"AAA111",Name:"Contoso",RowId:3},{Country:"USA",Id:"AAA111",Name:"Contoso",RowId:4},{Country:"USA",Id:"AAA111",Name:"Contoso",RowId:6},{Country:"CAN",Id:"BBB222",Name:"Fabrikam",RowId:2},{Country:"CAN",Id:"BBB222",Name:"Fabrikam",RowId:5},{Country:"MEX",Id:"EEE555",Name:"ACME INC.",RowId:Blank()},{Country:"USA",Id:"FFF666",Name:"Dunder Mifflin",RowId:Blank()},{Country:Blank(),Id:Blank(),Name:Blank(),RowId:7},{Country:Blank(),Id:Blank(),Name:Blank(),RowId:8})

// Renaming columns with 'As' keyword
>> Join(t1, t2, LeftRecord.Id = RightRecord.SupplierId, JoinType.Inner, LeftRecord.Id As SupId, LeftRecord.Name As SupName, RightRecord.Fruit As FruitName)
Expand Down

0 comments on commit 96d55b0

Please sign in to comment.