Skip to content

Commit

Permalink
loop_interchange_2
Browse files Browse the repository at this point in the history
Remove transpose
Swap width & height in filterVertically
  • Loading branch information
jwest591 authored and Jon committed Jan 18, 2025
1 parent adcd017 commit 5635c84
Showing 1 changed file with 31 additions and 38 deletions.
69 changes: 31 additions & 38 deletions labs/memory_bound/loop_interchange_2/solution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,6 @@
#include <fstream>
#include <ios>

void transpose(const uint8_t* src, uint8_t* dest, const size_t width, const size_t height)
{
for (size_t r = 0; r < height; ++r) {
for (size_t c = 0; c < width; ++c) {
dest[c*height+r] = src[r*width+c];
}
}
}


// Applies Gaussian blur in independent vertical lines
static void filterVertically(uint8_t *output, const uint8_t *input,
Expand All @@ -21,37 +12,41 @@ static void filterVertically(uint8_t *output, const uint8_t *input,
const int shift) {
const int rounding = 1 << (shift - 1);

for (int c = 0; c < width; c++) {
// Top part of line, partial kernel
for (int r = 0; r < std::min(radius, height); r++) {
// Accumulation
int dot = 0;
int sum = 0;
auto p = &kernel[radius - r];
for (int y = 0; y <= std::min(r + radius, height - 1); y++) {
int weight = *p++;
dot += input[y * width + c] * weight;
sum += weight;
}
// Top part of line, partial kernel
for (int r = 0; r < std::min(radius, height); r++) {
for (int c = 0; c < width; c++) {
// Accumulation
int dot = 0;
int sum = 0;
auto p = &kernel[radius - r];
for (int y = 0; y <= std::min(r + radius, height - 1); y++) {
int weight = *p++;
dot += input[y * width + c] * weight;
sum += weight;
}

// Normalization
int value = static_cast<int>(dot / static_cast<float>(sum) + 0.5f);
output[r * width + c] = static_cast<uint8_t>(value);
}
// Normalization
int value = static_cast<int>(dot / static_cast<float>(sum) + 0.5f);
output[r * width + c] = static_cast<uint8_t>(value);
}
}

// Middle part of computations with full kernel
for (int r = radius; r < height - radius; r++) {
// Accumulation
int dot = 0;
for (int i = 0; i < radius + 1 + radius; i++) {
dot += input[(r - radius + i) * width + c] * kernel[i];
}
for (int r = radius; r < height - radius; r++) {
for (int c = 0; c < width; c++) {
// Accumulation
int dot = 0;
for (int i = 0; i < radius + 1 + radius; i++) {
dot += input[(r - radius + i) * width + c] * kernel[i];
}

// Fast shift instead of division
int value = (dot + rounding) >> shift;
output[r * width + c] = static_cast<uint8_t>(value);
}
// Fast shift instead of division
int value = (dot + rounding) >> shift;
output[r * width + c] = static_cast<uint8_t>(value);
}
}

for (int c = 0; c < width; c++) {
// Bottom part of line, partial kernel
for (int r = std::max(radius, height - radius); r < height; r++) {
// Accumulation
Expand Down Expand Up @@ -139,9 +134,7 @@ void blur(uint8_t *output, const uint8_t *input, const int width,
constexpr int shift = 4;

// A pair of 1-dimensional passes to achieve 2-dimensional transform
transpose(input, temp, width, height);
filterHorizontally(output, temp, height, width, kernel, radius, shift);
transpose(output, temp, height, width);
filterVertically(temp, input, width, height, kernel, radius, shift);
filterHorizontally(output, temp, width, height, kernel, radius, shift);
}

Expand Down

0 comments on commit 5635c84

Please sign in to comment.