Skip to content

Commit

Permalink
Adds SSE2 optimized slide_hash.
Browse files Browse the repository at this point in the history
Edit: Removed glue code in deflate.c, since we want
to implement this differently in zlib-ng.
  • Loading branch information
jtkukunas authored and Dead2 committed Sep 4, 2019
1 parent ce00766 commit 11f2e8f
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 2 deletions.
8 changes: 7 additions & 1 deletion arch/x86/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ SRCDIR=.
SRCTOP=../..
TOPDIR=$(SRCTOP)

all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo
all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo slide_sse.o

x86.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
Expand Down Expand Up @@ -48,6 +48,12 @@ crc_folding.o:
crc_folding.lo:
$(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c

slide_sse.o:
$(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c

slide_sse.lo:
$(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c

mostlyclean: clean
clean:
rm -f *.o *.lo *~
Expand Down
52 changes: 52 additions & 0 deletions arch/x86/slide_sse.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* SSE optimized hash slide
*
* Copyright (C) 2017 Intel Corporation
* Authors:
* Arjan van de Ven <[email protected]>
* Jim Kukunas <[email protected]>
*
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "deflate.h"

#ifdef USE_SSE_SLIDE
#include <immintrin.h>

void slide_hash_sse(deflate_state *s)
{
unsigned n;
Posf *p;
uInt wsize = s->w_size;
z_const __m128i xmm_wsize = _mm_set1_epi16(s->w_size);

n = s->hash_size;
p = &s->head[n] - 8;
do {
__m128i value, result;

value = _mm_loadu_si128((__m128i *)p);
result= _mm_subs_epu16(value, xmm_wsize);
_mm_storeu_si128((__m128i *)p, result);
p -= 8;
n -= 8;
} while (n > 0);

#ifndef FASTEST
n = wsize;
p = &s->prev[n] - 8;
do {
__m128i value, result;

value = _mm_loadu_si128((__m128i *)p);
result= _mm_subs_epu16(value, xmm_wsize);
_mm_storeu_si128((__m128i *)p, result);

p -= 8;
n -= 8;
} while (n > 0);
#endif
}

#endif

3 changes: 2 additions & 1 deletion win32/Makefile.msc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ SUFFIX =

OBJS = adler32.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_quick.obj deflate_slow.obj \
deflate_medium.obj \
functable.obj infback.obj inflate.obj inftrees.obj inffast.obj trees.obj uncompr.obj zutil.obj \
functable.obj infback.obj inflate.obj inftrees.obj inffast.obj slide_sse.obj trees.obj uncompr.obj zutil.obj \
x86.obj fill_window_sse.obj insert_string_sse.obj crc_folding.obj
!if "$(ZLIB_COMPAT)" != ""
WITH_GZFILEOP = yes
Expand Down Expand Up @@ -126,6 +126,7 @@ infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/
inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h
inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h $(SRCDIR)/functable.h
inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h
slide_sse.obj: $(SRCDIR)/arch/x86/slide_sse.c $(SRCDIR)/deflate.h
trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees.h
zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/gzguts.h

Expand Down

0 comments on commit 11f2e8f

Please sign in to comment.