Skip to content

Commit

Permalink
Enable end to end DPS testing
Browse files Browse the repository at this point in the history
Implement python binding changes to allow execute function return
multiple returns. Update tests to use non-DPS style calling convention.

Also, enable end to end lowering by enabling conversion of closed alloc
group op to tensorrt dialect.

Miscellaneous fixes:
1. Add missing handling of `CallAllocOp` in EliminateShapeOps pass.
2. Skip non ranked tensor type function arguments while collecting host
   tensor arguments.
3. Temporarily add a pass to remove clone operation in MemRefToExecutor
   dialect conversion.
4. Relax memref creation for empty shape tensors.
5. Fix memref life returned from Lua function results. This required
   session allocator to track returned memref.

Also, address
Fix incorrect indexing into output memref results
Return error status instead of silently erroring out during TensorRT weight conversion
Address review comments
  • Loading branch information
jhalakpatel committed Feb 5, 2025
1 parent 87e5869 commit b25f807
Show file tree
Hide file tree
Showing 20 changed files with 734 additions and 105 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,8 @@ def StablehloClusteringPass : Pass<"stablehlo-clustering", "::mlir::ModuleOp"> {
Option<"entrypoint", "entrypoint", "std::string", "\"\"",
"the name of the entrypoint function; if empty then the clustering runs"
" on all functions">,
Option<"enableNonDPSReturns",
"enable-non-dps-returns", "bool", "false",
Option<"forceEntrypointsReturnAllocs",
"force-entrypoints-return-allocs", "bool", "false",
"allow backend clusters to directly allocate outputs">,
Option<"disableCreateShapeFuncPass", "disable-create-shape-func-pass", "bool", "false",
"don't apply create shape to func pass in TensorRT clusters">
Expand Down Expand Up @@ -331,7 +331,7 @@ def CreateClosedRegionsPass : Pass<"plan-create-closed-regions", "::mlir::Module
"(used only in testing) specifies to outline regions by walking in "
" pre-order; used for verifying results are not sensitive "
"to traversal order">,
Option<"enableNonDPSReturns", "enable-non-dps-returns", "bool",
Option<"forceEntrypointsReturnAllocs", "force-entrypoints-return-allocs", "bool",
/*default=*/"false",
"Allow backend clusters to directly allocate outputs">
];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@ void StablehloToExecutableTask::buildPostClusteringPipeline(

// Perform bufferization.
pm.addPass(createMemRefCastEliminationPass());
pm.addPass(plan::createPlanAllocTensorsPass());
plan::PlanAllocTensorsPassOptions allocTensorOpts{};
allocTensorOpts.forceEntrypointsReturnAllocs = opts.forceEntrypointsReturnAllocs;
pm.addPass(plan::createPlanAllocTensorsPass(allocTensorOpts));
pm.addPass(plan::createPlanBufferizePass());
pm.addPass(createMemRefCastEliminationPass());
pm.addPass(createCanonicalizerPass());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ add_mlir_tensorrt_library(MLIRTensorRTPlanTransforms
MLIRTensorRTStablehloScalarToArith
MLIRTensorRTStablehloToTensorRT
MLIRTensorRTTensorRTRuntimeDialect
MLIRBufferizationToMemRef
MLIRTransforms
StablehloOps
)
Original file line number Diff line number Diff line change
Expand Up @@ -561,12 +561,12 @@ createInlineClosedAllocGroupOp(RewriterBase &rewriter, plan::InlineGroupOp op,
static LogicalResult createClosedGroupOp(RewriterBase &rewriter,
plan::InlineGroupOp op,
DataFlowSolver &solver,
bool enableNonDPSReturns) {
bool forceEntrypointsReturnAllocs) {
OpBuilder::InsertionGuard g(rewriter);

// Materialize destination operands if not using non-DPS call convention.
SmallVector<DestinationOperandMaterializationResult> destinationOperands;
if (!enableNonDPSReturns)
if (!forceEntrypointsReturnAllocs)
if (failed(materializeDestinationOperands(rewriter, op, solver,
destinationOperands)))
return failure();
Expand All @@ -581,7 +581,7 @@ static LogicalResult createClosedGroupOp(RewriterBase &rewriter,

// Create and populate the appropriate closed group op based on call
// convention.
if (!enableNonDPSReturns)
if (!forceEntrypointsReturnAllocs)
return createInlineClosedGroupOp(rewriter, op, solver, inputs,
destinationOperands);
return createInlineClosedAllocGroupOp(rewriter, op, solver, inputs);
Expand Down Expand Up @@ -629,7 +629,7 @@ class CreateClosedRegionsPass
IRRewriter rewriter(ctx);
for (InlineGroupOp groupOp : llvm::make_early_inc_range(groupOps)) {
if (failed(createClosedGroupOp(rewriter, groupOp, solver,
enableNonDPSReturns)))
forceEntrypointsReturnAllocs)))
return signalPassFailure();
}
}
Expand Down
28 changes: 14 additions & 14 deletions mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
//===----------------------------------------------------------------------===//
#include "mlir-tensorrt/Dialect/Plan/Transforms/Passes.h"
#include "mlir-tensorrt/Transforms/Passes.h"
#include "mlir/Conversion/BufferizationToMemRef/BufferizationToMemRef.h"
#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
#include "mlir/Dialect/Bufferization/Pipelines/Passes.h"
#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
Expand All @@ -48,7 +49,7 @@ void plan::buildPlanSegmentationPipeline(
plan::createPlanPopulateFunctionBoundsAttributesPass());
pm.addPass(plan::createStablehloClusteringPass(opts));
plan::CreateClosedRegionsPassOptions closedRegionOptions{};
closedRegionOptions.enableNonDPSReturns = opts.enableNonDPSReturns;
closedRegionOptions.forceEntrypointsReturnAllocs = opts.forceEntrypointsReturnAllocs;
pm.addPass(plan::createCreateClosedRegionsPass(closedRegionOptions));
pm.addPass(plan::createOutlineClustersPass());
pm.addPass(mlir::createFuncExtDuplicateFunctionEliminationPass());
Expand Down Expand Up @@ -80,6 +81,7 @@ void plan::buildPlanBufferDeallocationPipeline(
pm.addPass(createCanonicalizerPass());
pm.addPass(bufferization::createBufferDeallocationSimplificationPass());
pm.addPass(bufferization::createLowerDeallocationsPass());
pm.addPass(mlir::createBufferizationToMemRefPass());
pm.addPass(createCSEPass());
pm.addPass(createCanonicalizerPass());
}
Expand Down Expand Up @@ -112,19 +114,17 @@ struct PlanBufferizationPipelineCliOpts
// Register pipelines.

void plan::registerPlanDialectPipelines() {
PassPipelineRegistration<PlanBufferizationPipelineCliOpts>
executorBufferizationPipeline(
"plan-bufferize-pipeline",
"perform bufferization and standard pre/post processing passes",
[](OpPassManager &pm, const PlanBufferizationPipelineCliOpts &opts) {
PlanAllocTensorsPassOptions allocTensorOpts{};
allocTensorOpts.forceEntrypointsReturnAllocs =
opts.forceEntrypointsReturnAllocs;
buildPlanBufferizationPipeline(pm, allocTensorOpts);
buildPlanBufferOptimizationPipeline(pm);
buildPlanBufferDeallocationPipeline(
pm, bufferization::DeallocationOptions{false});
});
PassPipelineRegistration<PlanBufferizationPipelineCliOpts> executorBufferizationPipeline(
"plan-bufferize-pipeline",
"perform bufferization and standard pre/post processing passes",
[](OpPassManager &pm, const PlanBufferizationPipelineCliOpts &opts) {
PlanAllocTensorsPassOptions allocTensorOpts{};
allocTensorOpts.forceEntrypointsReturnAllocs = opts.forceEntrypointsReturnAllocs;
buildPlanBufferizationPipeline(pm, allocTensorOpts);
buildPlanBufferOptimizationPipeline(pm);
buildPlanBufferDeallocationPipeline(
pm, bufferization::DeallocationOptions{false});
});

PassPipelineRegistration<> bufferOptPipeline(
"plan-buffer-opt-pipeline", "perform post-bufferization optimizations",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ class StablehloClusteringPass
if (failed(
applyClusteringToFunc(rewriter, func, solver, schedule,
StablehloClusteringPassOptions{
entrypoint, enableNonDPSReturns,
entrypoint, forceEntrypointsReturnAllocs,
/*disableCreateShapeFuncPass=*/false})))
return signalPassFailure();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// RUN: mlir-tensorrt-opt %s -plan-create-closed-regions -split-input-file | FileCheck %s
// RUN: mlir-tensorrt-opt %s -plan-create-closed-regions=test-pre-walk-order=true -split-input-file | FileCheck %s
// RUN: mlir-tensorrt-opt %s -plan-create-closed-regions=enable-non-dps-returns=true -split-input-file | FileCheck %s --check-prefix=CHECK-ALLOC
// RUN: mlir-tensorrt-opt %s -plan-create-closed-regions=force-entrypoints-return-allocs=true -split-input-file | FileCheck %s --check-prefix=CHECK-ALLOC

func.func @test_simple_static(%arg0: tensor<10xf32>, %arg1: tensor<10xf32>) -> tensor<10xf32> {
%0 = plan.inline_group target(#plan.tensorrt_cluster<disallow_shape_tensor_calculations = false, benefit = 1>) -> tensor<10xf32> {
Expand Down
Loading

0 comments on commit b25f807

Please sign in to comment.