Skip to content

Commit

Permalink
replaced integer vector division with multiply
Browse files Browse the repository at this point in the history
It became only 1% faster overall (because this part doesn't take much time), so this is more of an example.
  • Loading branch information
ilyakurdyukov authored Dec 25, 2023
1 parent 470eab1 commit b450599
Show file tree
Hide file tree
Showing 3 changed files with 194 additions and 142 deletions.
7 changes: 4 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ SIMD := native
MFLAGS :=
SIMDFLG :=
SIMDOBJ :=
SIMD_AVX512 := -mavx512f -mavx512dq -mavx512bw -mfma
ifeq ($(SIMD),select)
SIMDOBJ := jpegqs_base.o jpegqs_sse2.o jpegqs_avx2.o jpegqs_avx512.o
else ifeq ($(SIMD),none)
Expand All @@ -30,7 +31,7 @@ else
SIMDFLG := -march=native
endif
else ifeq ($(SIMD),avx512)
SIMDFLG := -mavx512f -mfma
SIMDFLG := $(SIMD_AVX512)
else ifeq ($(SIMD),avx2)
SIMDFLG := -mavx2 -mfma
else ifeq ($(SIMD),sse2)
Expand Down Expand Up @@ -152,7 +153,7 @@ SIMDSEL_FLAGS ?= -DTRANSCODE_ONLY -DWITH_LOG
endif

jpegqs_avx512.o: libjpegqs.c $(SRCDEPS)
$(CC) $(SIMDSEL_FLAGS) -DSIMD_NAME=avx512 -mavx512f -mfma $(CFLAGS_APP) -DSIMD_AVX512 -c -o $@ $<
$(CC) $(SIMDSEL_FLAGS) -DSIMD_NAME=avx512 $(SIMD_AVX512) $(CFLAGS_APP) -DSIMD_AVX512 -c -o $@ $<
jpegqs_avx2.o: libjpegqs.c $(SRCDEPS)
$(CC) $(SIMDSEL_FLAGS) -DSIMD_NAME=avx2 -mavx2 -mfma $(CFLAGS_APP) -DSIMD_AVX2 -c -o $@ $<
jpegqs_sse2.o: libjpegqs.c $(SRCDEPS)
Expand All @@ -169,7 +170,7 @@ lib$(APPNAME).a: libjpegqs.o
libjpegqs.o: libjpegqs.c $(SRCDEPS)
$(CC) $(CFLAGS_APP) -c -o $@ $<
libjpegqs_avx512.o: libjpegqs.c $(SRCDEPS)
$(CC) -DSIMD_NAME=avx512 -mavx512f -mfma $(CFLAGS_APP) -DSIMD_AVX512 -c -o $@ $<
$(CC) -DSIMD_NAME=avx512 $(SIMD_AVX512) $(CFLAGS_APP) -DSIMD_AVX512 -c -o $@ $<
libjpegqs_avx2.o: libjpegqs.c $(SRCDEPS)
$(CC) -DSIMD_NAME=avx2 -mavx2 -mfma $(CFLAGS_APP) -DSIMD_AVX2 -c -o $@ $<
libjpegqs_sse2.o: libjpegqs.c $(SRCDEPS)
Expand Down
2 changes: 2 additions & 0 deletions libjpegqs.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ JPEGQS_ATTR int do_quantsmooth QS_ARGS {
if (!(cpuid[1] & (1 << 5)) || xcr0 & 6) break; // AVX2
type = 3;
if (!(cpuid[1] & (1 << 16)) || xcr0 & 0xe6) break; // AVX512F
if (!(cpuid[1] & (1 << 17))) break; // AVX512DQ
if (!(cpuid[1] & (1 << 30))) break; // AVX512BW
type = 4;
} while (0);

Expand Down
Loading

0 comments on commit b450599

Please sign in to comment.