Skip to content

Commit

Permalink
Refactor Builders for the In-Memory Table (#12046)
Browse files Browse the repository at this point in the history
- Convert `Builder` to be an interface rather than an abstract base class.
- Add methods from `TypedBuilder` into the base interface: `accepts`, `canRetypeTo`, `retypeTo`.
- Add `BuilderForType` generic interface.
- New interfaces for `BuilderForBoolean`, `BuilderForDouble` and `BuilderForLong` avoiding boxing.
- `BuilderForDouble` also has `appendLong` which avoids boxing and notes lossy conversion.
- `BuilderWithRetyping` - New interface for `InferredBuilder` underlyings. Should be revisited in follow up.
- Reduced methods in `java_exports` so generally use the value type based method.
- Remove `fillUpToSize` and replace with a method in the Enso code (also throws Enso panic not a Java exception).
- Rename `retypeToMixed` to `copyDataTo` allowing copying the data to an Object array. All builders now support.
- Rename `TypedBuilderImpl` to `TypedBuilder` (having removed abstract base clase).
- `retypeTo` always throws `UnsupportedOperationException` if not supported.
- Removed `AnyObjectType` conversion from `BigIntegerBuilder` and `TypedBuilder` as conversion to mixed is done differently.
- Removed `appendRawNoGrow` as this was exposing internals.
- Removed `appendBigInteger` and `appendBigDecimal` - use `append`.
- Adjused `DoubleBuilder` and `DoubleStorage` to be backed by a `double[]`.
- Refactored the `cast` operations and based off the `Builder` interfaces.

Follow up:
- remove `appendNoGrow`.
- restructuring the InferredBuilder and associated methods.
  • Loading branch information
jdunkerley authored Jan 17, 2025
1 parent 1d4a1a6 commit 7cde786
Show file tree
Hide file tree
Showing 89 changed files with 1,451 additions and 1,720 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ boolean_fetcher =
fetch_value rs i =
b = rs.getBoolean i
if rs.wasNull then Nothing else b
make_builder _ _ =
java_builder = Java_Exports.make_bool_builder
make_builder initial_size _ =
java_builder = Java_Exports.make_bool_builder initial_size
append v =
if v.is_nothing then java_builder.appendNulls 1 else
java_builder.appendBoolean v
Expand Down Expand Up @@ -87,7 +87,7 @@ long_fetcher bits =
big_integer_fetcher : Column_Fetcher
big_integer_fetcher =
make_builder initial_size java_problem_aggregator =
java_builder = Java_Exports.make_biginteger_builder initial_size java_problem_aggregator
java_builder = Java_Exports.make_builder_for_type (Value_Type.Decimal scale=0) initial_size java_problem_aggregator
make_builder_from_java_object_builder java_builder
Column_Fetcher.Value fetch_big_integer make_builder

Expand All @@ -105,8 +105,7 @@ big_decimal_fetcher =
if rs.wasNull then Nothing else
big_decimal
make_builder initial_size java_problem_aggregator =
_ = java_problem_aggregator
java_builder = Java_Exports.make_bigdecimal_builder initial_size
java_builder = Java_Exports.make_builder_for_type Value_Type.Decimal initial_size java_problem_aggregator
make_builder_from_java_object_builder java_builder
Column_Fetcher.Value fetch_value make_builder

Expand All @@ -116,8 +115,8 @@ text_fetcher value_type =
fetch_value rs i =
t = rs.getString i
if rs.wasNull then Nothing else t
make_builder initial_size _ =
java_builder = Java_Exports.make_string_builder initial_size value_type=value_type
make_builder initial_size java_problem_aggregator =
java_builder = Java_Exports.make_builder_for_type value_type initial_size java_problem_aggregator
make_builder_from_java_object_builder java_builder
Column_Fetcher.Value fetch_value make_builder

Expand All @@ -141,24 +140,24 @@ fallback_fetcher =
## PRIVATE
date_fetcher =
fetch_value rs i = JDBCUtils.getLocalDate rs i
make_builder initial_size _ =
java_builder = Java_Exports.make_date_builder initial_size
make_builder initial_size java_problem_aggregator =
java_builder = Java_Exports.make_builder_for_type Value_Type.Date initial_size java_problem_aggregator
make_builder_from_java_object_builder java_builder
Column_Fetcher.Value fetch_value make_builder

## PRIVATE
time_fetcher =
fetch_value rs i = JDBCUtils.getLocalTime rs i
make_builder initial_size _ =
java_builder = Java_Exports.make_time_of_day_builder initial_size
make_builder initial_size java_problem_aggregator =
java_builder = Java_Exports.make_builder_for_type Value_Type.Time initial_size java_problem_aggregator
make_builder_from_java_object_builder java_builder
Column_Fetcher.Value fetch_value make_builder

## PRIVATE
date_time_fetcher =
fetch_value rs i = JDBCUtils.getZonedDateTime rs i
make_builder initial_size _ =
java_builder = Java_Exports.make_date_time_builder initial_size
make_builder initial_size java_problem_aggregator =
java_builder = Java_Exports.make_builder_for_type Value_Type.Date_Time initial_size java_problem_aggregator
make_builder_from_java_object_builder java_builder
Column_Fetcher.Value fetch_value make_builder

Expand All @@ -168,8 +167,8 @@ date_time_fetcher =
adding the default system timezone.
local_date_time_fetcher =
fetch_value rs i = JDBCUtils.getLocalDateTimeAsZoned rs i
make_builder initial_size _ =
java_builder = Java_Exports.make_date_time_builder initial_size
make_builder initial_size java_problem_aggregator =
java_builder = Java_Exports.make_builder_for_type Value_Type.Date_Time initial_size java_problem_aggregator
make_builder_from_java_object_builder java_builder
Column_Fetcher.Value fetch_value make_builder

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,8 @@ time_fetcher =
## Read the time as a string to get the nanosecond precision.
sf_string = rs.getString i
if sf_string == Nothing then Nothing else Time_Of_Day.parse sf_string
make_builder initial_size _ =
java_builder = Java_Exports.make_time_of_day_builder initial_size
make_builder initial_size java_problem_aggregator =
java_builder = Java_Exports.make_builder_for_type Value_Type.Time initial_size java_problem_aggregator
Column_Fetcher_Module.make_builder_from_java_object_builder java_builder
Column_Fetcher.Value fetch_value make_builder

Expand All @@ -226,8 +226,8 @@ date_time_fetcher =
# The offset is optional - if we were fetching TIMESTAMP_NTZ it could be missing.
# The two variants of offset are needed - to handle both `+0200` and `+02:00` formats.
Date_Time.parse normalized "yyyy-MM-dd HH:mm:ss.f[ ZZ][ ZZZZZ]"
make_builder initial_size _ =
java_builder = Java_Exports.make_date_time_builder initial_size
make_builder initial_size java_problem_aggregator =
java_builder = Java_Exports.make_builder_for_type Value_Type.Date_Time initial_size java_problem_aggregator
Column_Fetcher_Module.make_builder_from_java_object_builder java_builder
Column_Fetcher.Value fetch_value make_builder

Expand Down
6 changes: 3 additions & 3 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,15 @@ import project.Value_Type.Auto
import project.Value_Type.Value_Type
from project.Errors import Conversion_Failure, Inexact_Type_Coercion, Invalid_Column_Names, Invalid_Value_Type, No_Index_Set_Error
from project.Internal.Column_Format import all
from project.Internal.Java_Exports import make_date_builder_adapter, make_string_builder
from project.Internal.Java_Exports import make_string_builder
from project.Internal.Storage import enso_to_java, java_to_enso

polyglot java import org.enso.base.Time_Utils
polyglot java import org.enso.table.data.column.operation.cast.CastProblemAggregator
polyglot java import org.enso.table.data.column.operation.CountNothing
polyglot java import org.enso.table.data.column.operation.CountUntrimmed
polyglot java import org.enso.table.data.column.operation.unary.DatePartOperation
polyglot java import org.enso.table.data.column.operation.unary.DateTruncateOperation
polyglot java import org.enso.table.data.column.operation.unary.IsEmptyOperation
polyglot java import org.enso.table.data.column.operation.unary.IsFiniteOperation
polyglot java import org.enso.table.data.column.operation.unary.IsInfiniteOperation
Expand Down Expand Up @@ -937,8 +938,7 @@ type Column
apply_unary_operation self UnaryRoundOperation.TRUNCATE_INSTANCE
False -> case precise_value_type == Value_Type.Date_Time of
True ->
fun = _.date
Column_Ops.map_over_storage self fun make_date_builder_adapter skip_nothing=True . rename new_name
apply_unary_operation self DateTruncateOperation.TRUNCATE_INSTANCE
False -> Error.throw <| Invalid_Value_Type.Column "Numeric or Date_Time" self.value_type self.name

## GROUP Standard.Base.Rounding
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import project.Column.Column
import project.Data_Formatter.Data_Formatter
import project.Internal.Storage
import project.Value_Type.Value_Type
from project.Internal.Java_Exports import make_string_builder

polyglot java import java.lang.IllegalArgumentException
polyglot java import java.time.temporal.UnsupportedTemporalTypeException
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import Standard.Base.Data.Vector.Builder
import Standard.Base.Errors.Common.Index_Out_Of_Bounds
import Standard.Base.Errors.Common.Type_Error
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State

import project.Column.Column
import project.Conversions.Convertible_To_Columns.Convertible_To_Columns
Expand All @@ -16,7 +17,6 @@ import project.Internal.Java_Problems
import project.Internal.Widget_Helpers
import project.Prefix_Name.Prefix_Name
import project.Table.Table
from project.Internal.Java_Exports import make_inferred_builder

## PRIVATE
expand_column : Table -> Text | Integer -> (Vector Text) | Nothing -> Prefix_Name -> Table ! Type_Error | No_Such_Column | Index_Out_Of_Bounds
Expand Down Expand Up @@ -96,7 +96,7 @@ expand_to_rows table column:(Text | Integer) at_least_one_row=False sequences_on
. map name-> if name=="Value" then column else column+" "+name

Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
builder size = make_inferred_builder size java_problem_aggregator
builder size = Java_Exports.make_inferred_builder size java_problem_aggregator
Fan_Out.fan_out_to_rows table column row_expander column_names at_least_one_row column_builder=builder

create_table_from_objects (base_value : Convertible_To_Rows) (fields : Vector | Nothing) (treat_dictionary_as_sequence : Boolean = False) -> Table = if fields.is_nothing.not && fields.is_empty then Error.throw (Illegal_Argument.Error "The fields parameter cannot be empty.") else
Expand Down Expand Up @@ -131,14 +131,14 @@ create_table_from_objects (base_value : Convertible_To_Rows) (fields : Vector |
builder = existing_builder.if_nothing <|
discovered_field_names.append f
Java_Exports.make_inferred_builder len java_problem_aggregator
builder.fillUpToSize idx
_fill_builder builder idx
builder.append (v.getter f)
if existing_builder.is_nothing.not then inner_current_dict else
inner_current_dict.insert f builder

# Seal all builders and create columns
column_dict = builder_dict.map_with_key name-> builder->
builder.fillUpToSize len
_fill_builder builder len
Column.from_storage name builder.seal

column_dict.if_not_error <|
Expand All @@ -149,3 +149,8 @@ create_table_from_objects (base_value : Convertible_To_Rows) (fields : Vector |
if discovered_field_names.is_empty then Error.throw (Illegal_Argument.Error "Unable to generate column names as all inputs had no fields.") else
discovered_field_names.to_vector.map column_dict.get
Table.new columns

private _fill_builder builder size:Integer =
to_fill = size - builder.getCurrentSize
if to_fill > 0 then builder.appendNulls to_fill else
if to_fill < 0 then Panic.throw (Illegal_State.Error "Internal error: builder overfilled ("+builder.getCurrentSize.to_text+").")
Original file line number Diff line number Diff line change
Expand Up @@ -5,84 +5,43 @@ import project.Internal.Storage
import project.Value_Type.Bits
import project.Value_Type.Value_Type

polyglot java import org.enso.table.data.column.builder.BigDecimalBuilder
polyglot java import org.enso.table.data.column.builder.BigIntegerBuilder
polyglot java import org.enso.table.data.column.builder.BoolBuilder
polyglot java import org.enso.table.data.column.builder.DateBuilder
polyglot java import org.enso.table.data.column.builder.DateTimeBuilder
polyglot java import org.enso.table.data.column.builder.InferredBuilder
polyglot java import org.enso.table.data.column.builder.NumericBuilder
polyglot java import org.enso.table.data.column.builder.StringBuilder
polyglot java import org.enso.table.data.column.builder.TimeOfDayBuilder
polyglot java import org.enso.table.data.column.builder.Builder
polyglot java import org.enso.table.data.column.builder.BuilderForBoolean
polyglot java import org.enso.table.data.column.builder.BuilderForDouble
polyglot java import org.enso.table.data.column.builder.BuilderForLong
polyglot java import org.enso.table.data.column.storage.Storage as Java_Storage
polyglot java import org.enso.table.problems.ProblemAggregator

## PRIVATE
make_bool_builder : BoolBuilder
make_bool_builder = BoolBuilder.new
Note: Value_Type must have an exact representation in Java.
make_builder_for_type : Value_Type -> Integer -> ProblemAggregator -> Builder
make_builder_for_type value_type initial_size java_problem_aggregator=(Missing_Argument.ensure_present "java_problem_aggregator") =
storage_type = Storage.from_value_type_strict value_type
Builder.getForType storage_type initial_size java_problem_aggregator

## PRIVATE
make_bool_builder : Integer -> BuilderForBoolean
make_bool_builder initial_size = Builder.getForBoolean initial_size

## PRIVATE
make_double_builder : Integer -> ProblemAggregator -> NumericBuilder
make_double_builder : Integer -> ProblemAggregator -> BuilderForDouble
make_double_builder initial_size java_problem_aggregator=(Missing_Argument.ensure_present "java_problem_aggregator") =
NumericBuilder.createDoubleBuilder initial_size java_problem_aggregator
float_type = Storage.from_value_type_strict Value_Type.Float
Builder.getForDouble float_type initial_size java_problem_aggregator

## PRIVATE
make_long_builder : Integer -> Bits -> ProblemAggregator -> NumericBuilder
make_long_builder : Integer -> Bits -> ProblemAggregator -> BuilderForLong
make_long_builder initial_size bits java_problem_aggregator=(Missing_Argument.ensure_present "java_problem_aggregator") =
integer_type = Storage.from_value_type_strict (Value_Type.Integer bits)
NumericBuilder.createLongBuilder initial_size integer_type java_problem_aggregator

## PRIVATE
make_biginteger_builder : Integer -> ProblemAggregator -> BigIntegerBuilder
make_biginteger_builder initial_size java_problem_aggregator=(Missing_Argument.ensure_present "java_problem_aggregator") =
BigIntegerBuilder.new initial_size java_problem_aggregator
Builder.getForLong integer_type initial_size java_problem_aggregator

## PRIVATE
make_bigdecimal_builder : Integer -> BigDecimalBuilder
make_bigdecimal_builder initial_size =
BigDecimalBuilder.new initial_size

## PRIVATE
make_string_builder : Integer -> Value_Type -> StringBuilder
make_string_builder : Integer -> Value_Type -> Builder
make_string_builder initial_size value_type=Value_Type.Char =
storage_type = Storage.from_value_type_strict value_type
StringBuilder.new initial_size storage_type
Builder.getForType storage_type initial_size Nothing

## PRIVATE
make_time_of_day_builder : Integer -> TimeOfDayBuilder
make_time_of_day_builder initial_size = TimeOfDayBuilder.new initial_size

## PRIVATE
make_date_time_builder : Integer -> DateTimeBuilder
make_date_time_builder initial_size = DateTimeBuilder.new initial_size

## PRIVATE
make_date_builder : Integer -> DateBuilder
make_date_builder initial_size = DateBuilder.new initial_size

## PRIVATE
make_inferred_builder : Integer -> ProblemAggregator -> InferredBuilder
make_inferred_builder : Integer -> ProblemAggregator -> Builder
make_inferred_builder initial_size java_problem_aggregator=(Missing_Argument.ensure_present "java_problem_aggregator") =
InferredBuilder.new initial_size java_problem_aggregator

## PRIVATE
Wrapper around a DateBuilder that uses DateBuilder.appendDate() to append a
value (instead of builder.append())
type Date_Builder_Adapter
## PRIVATE
Value (date_builder : DateBuilder)

## PRIVATE
append : Date -> Nothing
append self date =
self.date_builder.appendDate date

## PRIVATE
seal : Java_Storage
seal self = self.date_builder.seal

## PRIVATE
Date_Builder_Adapter constructor that matches the interface of the other
make_*_builder functions.
make_date_builder_adapter : Integer -> Date_Builder_Adapter
make_date_builder_adapter n = Date_Builder_Adapter.Value (make_date_builder n)
Builder.getInferredBuilder initial_size java_problem_aggregator
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import java.util.TimeZone;
import java.util.stream.IntStream;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.builder.StringBuilder;
import org.enso.table.data.column.storage.type.TextType;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
Expand Down Expand Up @@ -278,7 +277,7 @@ public static Table runReport(

var builders = new Builder[dimensions.size() + metrics.size()];
for (int i = 0; i < dimensions.size() + metrics.size(); i++) {
builders[i] = new StringBuilder(rowCount, TextType.VARIABLE_LENGTH);
builders[i] = Builder.getForType(TextType.VARIABLE_LENGTH, rowCount, null);
}

// Load the data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import org.enso.table.error.ValueTypeMismatchException;
import org.graalvm.polyglot.Context;

public class SnowflakeIntegerColumnMaterializer extends Builder {
public class SnowflakeIntegerColumnMaterializer implements Builder {
private static final BigInteger LONG_MIN = BigInteger.valueOf(Long.MIN_VALUE);
private static final BigInteger LONG_MAX = BigInteger.valueOf(Long.MAX_VALUE);
// We start in integer mode and will switch to BigInteger mode if we encounter a value that
Expand Down Expand Up @@ -119,10 +119,26 @@ public Storage<?> seal() {
@Override
public StorageType getType() {
// The type of the builder can change over time, so we do not report any stable type here.
// Same as in InferredBuilder.
return null;
}

@Override
public void copyDataTo(Object[] items) {
if (currentSize > 0) {
if (mode == Mode.LONG) {
for (int i = 0; i < currentSize; i++) {
if (intsMissing.get(i)) {
items[i] = null;
} else {
items[i] = ints[i];
}
}
} else {
System.arraycopy(bigInts, 0, items, 0, currentSize);
}
}
}

private int capacity() {
return mode == Mode.LONG ? ints.length : bigInts.length;
}
Expand Down
20 changes: 7 additions & 13 deletions std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
import java.math.BigInteger;
import java.util.List;
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.builder.BigIntegerBuilder;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.builder.DoubleBuilder;
import org.enso.table.data.column.builder.InferredIntegerBuilder;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.Storage;
Expand Down Expand Up @@ -35,8 +33,9 @@ public Sum(String name, Column column) {
public Builder makeBuilder(int size, ProblemAggregator problemAggregator) {
return switch (inputType) {
case IntegerType integerType -> new InferredIntegerBuilder(size, problemAggregator);
case BigIntegerType bigIntegerType -> new BigIntegerBuilder(size, problemAggregator);
case FloatType floatType -> DoubleBuilder.createDoubleBuilder(size, problemAggregator);
case BigIntegerType bigIntegerType -> Builder.getForType(
bigIntegerType, size, problemAggregator);
case FloatType floatType -> Builder.getForDouble(floatType, size, problemAggregator);
default -> throw new IllegalStateException(
"Unexpected input type for Sum aggregate: " + inputType);
};
Expand Down Expand Up @@ -134,15 +133,10 @@ private void addLong(long value) {
private void addBigInteger(BigInteger value) {
assert value != null;
switch (accumulator) {
case Long accumulatorAsLong -> {
accumulator = BigInteger.valueOf(accumulatorAsLong).add(value);
}
case BigInteger accumulatorAsBigInteger -> {
accumulator = accumulatorAsBigInteger.add(value);
}
case null -> {
accumulator = value;
}
case Long accumulatorAsLong -> accumulator =
BigInteger.valueOf(accumulatorAsLong).add(value);
case BigInteger accumulatorAsBigInteger -> accumulator = accumulatorAsBigInteger.add(value);
case null -> accumulator = value;
default -> throw new IllegalStateException(
"Unexpected accumulator type: " + accumulator.getClass());
}
Expand Down
Loading

0 comments on commit 7cde786

Please sign in to comment.