Skip to content

Commit

Permalink
Optimised copy for downsample (#328)
Browse files Browse the repository at this point in the history
Addresses part of #306
  • Loading branch information
Aelphy authored May 17, 2024
1 parent cdba098 commit fe4be7a
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 7 deletions.
30 changes: 23 additions & 7 deletions builder/optimizations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,24 +53,40 @@ bool is_copy(var src, expr src_x, int src_d, var dst, var dst_x, int dst_d, expr
src_dim.fold_factor = dim::unfolded;
return true;
} else {
expr offset = simplify(src_x - dst_x);
// check for f(x) = g(x * C)
expr scale = 1;
if (const class mul* s = src_x.as<class mul>()) {
if (!depends_on(s->a, dst_x).any()) {
scale = s->a;
src_x = s->b;
} else if (!depends_on(s->b, dst_x).any()) {
scale = s->b;
src_x = s->a;
} else {
return false;
}
}

src_dim.stride *= scale;

expr offset = simplify((src_x - dst_x) * scale);
if (!depends_on(offset, dst_x).any()) {
// The difference of src_x and dst_x does not depend on dst_x, it's a simple copy.
if (is_zero(offset)) {
// If the offset is zero, the index we want for the buffer_at call is buffer_min(src, src_d), which is
// definitely in bounds, so we don't need to clamp it.
src_dim.bounds = buffer_bounds(src, src_d);
at = src_dim.bounds.min;
src_dim.bounds = buffer_bounds(src, src_d) / scale;
at = src_dim.bounds.min * scale;
} else {
// The offset is non-zero, we might go out of bounds with our buffer_at call. To avoid this, we need to
// clamp to the intersection of the src and dst buffers, like copy would have done.
src_dim.bounds &= (buffer_bounds(src, src_d) - offset);
at = src_dim.bounds.min + offset;
src_dim.bounds &= (buffer_bounds(src, src_d) - offset) / scale;
at = (src_dim.bounds.min + offset) * scale;
}
return true;
} else {
return false;
}

return false;
}
}

Expand Down
48 changes: 48 additions & 0 deletions builder/test/copy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,54 @@ TEST_P(upsample_y, copy) {
ASSERT_EQ(eval_ctx.copy_elements, W * H);
}


class downsample_y : public testing::TestWithParam<int> {};

INSTANTIATE_TEST_SUITE_P(split, downsample_y, testing::Range(0, 5));

TEST_P(downsample_y, copy) {
int split = GetParam();

// Make the pipeline
node_context ctx;

auto in = buffer_expr::make(ctx, "in", 2, sizeof(int));
auto out = buffer_expr::make(ctx, "out", 2, sizeof(int));

var x(ctx, "x");
var y(ctx, "y");

func downsample = func::make_copy({in, {point(x), point(y * 2)}}, {out, {x, y}});

if (split > 0) {
downsample.loops({{y, split}});
}

pipeline p = build_pipeline(ctx, {in}, {out});

// Run the pipeline.
const int H = 20;
const int W = 10;
buffer<int, 2> in_buf({W, H * 2});
init_random(in_buf);

buffer<int, 2> out_buf({W, H});
out_buf.allocate();
const raw_buffer* inputs[] = {&in_buf};
const raw_buffer* outputs[] = {&out_buf};
test_context eval_ctx;
p.evaluate(inputs, outputs, eval_ctx);

for (int y = 0; y < H; ++y) {
for (int x = 0; x < W; ++x) {
ASSERT_EQ(out_buf(x, y), in_buf(x, y * 2));
}
}

ASSERT_EQ(eval_ctx.copy_calls, split == 0 ? 1 : ceil_div(H, split));
ASSERT_EQ(eval_ctx.copy_elements, W * H);
}

class transpose_test : public testing::TestWithParam<std::vector<int>> {};

INSTANTIATE_TEST_SUITE_P(schedule, transpose_test,
Expand Down

0 comments on commit fe4be7a

Please sign in to comment.