Skip to content

Commit

Permalink
HIVE-25043: Support custom UDFs in Vectorized mode (Ryu Kobayashi, re…
Browse files Browse the repository at this point in the history
…viewed by Denys Kuzmenko)

Closes #5631
  • Loading branch information
ryukobayashi authored Feb 4, 2025
1 parent 26f72a9 commit da272b4
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 3 deletions.
5 changes: 5 additions & 0 deletions common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -4704,6 +4704,11 @@ public static enum ConfVars {
"This flag should be set to true to enable vectorized mode of the PTF of query execution.\n" +
"The default value is true."),

HIVE_VECTOR_ADAPTOR_CUSTOM_UDF_WHITELIST("hive.vectorized.adaptor.custom.udf.whitelist", "",
"Custom UDF allowed when hive.vectorized.adaptor.usage.mode is chosen.\n" +
"Specify classes separated by commas:\n" +
"package.FooClass,package.BarClass"),

HIVE_VECTORIZATION_PTF_MAX_MEMORY_BUFFERING_BATCH_COUNT("hive.vectorized.ptf.max.memory.buffering.batch.count", 25,
"Maximum number of vectorized row batches to buffer in memory for PTF\n" +
"The default value is 25"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,18 @@ public static HiveVectorIfStmtMode getHiveConfValue(HiveConf hiveConf) {

private HiveVectorIfStmtMode hiveVectorIfStmtMode;

private Set<String> allowedCustomUDFs;

private Set<String> getAllowedCustomUDFs(HiveConf hiveConf) {
String udfs = HiveConf.getVar(hiveConf,
HiveConf.ConfVars.HIVE_VECTOR_ADAPTOR_CUSTOM_UDF_WHITELIST);
if (udfs != null && !udfs.isEmpty()) {
return new HashSet<>(Arrays.asList(udfs.split(",")));
}

return new HashSet<>();
}

//when set to true use the overflow checked vector expressions
private boolean useCheckedVectorExpressions;

Expand All @@ -298,6 +310,7 @@ private void setHiveConfVars(HiveConf hiveConf) {
adaptorSuppressEvaluateExceptions =
HiveConf.getBoolVar(
hiveConf, HiveConf.ConfVars.HIVE_VECTORIZED_ADAPTOR_SUPPRESS_EVALUATE_EXCEPTIONS);
this.allowedCustomUDFs = getAllowedCustomUDFs(hiveConf);
}

private void copyHiveConfVars(VectorizationContext vContextEnvironment) {
Expand Down Expand Up @@ -1037,7 +1050,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpress
"Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString()
+ " because hive.vectorized.adaptor.usage.mode=none");
case CHOSEN:
if (isNonVectorizedPathUDF(expr, mode)) {
if (isNonVectorizedPathUDF(expr, mode, allowedCustomUDFs)) {
ve = getCustomUDFExpression(expr, mode);
} else {
throw new HiveException(
Expand Down Expand Up @@ -1446,8 +1459,8 @@ public static GenericUDF getGenericUDFForCast(TypeInfo castType) throws HiveExce
* Depending on performance requirements and frequency of use, these
* may be implemented in the future with an optimized VectorExpression.
*/
public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr,
VectorExpressionDescriptor.Mode mode) {
private static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr,
VectorExpressionDescriptor.Mode mode, Set<String> allowCustomUDFs) {
GenericUDF gudf = expr.getGenericUDF();
if (gudf instanceof GenericUDFBridge) {
GenericUDFBridge bridge = (GenericUDFBridge) gudf;
Expand Down Expand Up @@ -1486,6 +1499,8 @@ public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr,
return true;
} else if (gudf instanceof GenericUDFConcat && (mode == VectorExpressionDescriptor.Mode.PROJECTION)) {
return true;
} else if (allowCustomUDFs.contains(gudf.getClass().getName())) {
return true;
}
return false;
}
Expand Down

0 comments on commit da272b4

Please sign in to comment.