diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2017-05-05 00:35:39 -0800 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2017-05-05 00:35:39 -0800 |
commit | 5b24f18a0474487919bab04c1b26b0f368899b2e (patch) | |
tree | 8693b0dc905485914be0f6ff6e8b4e5bc27dbf04 | |
parent | c470abd4fde40ea6a0846a2beab642a578c0b8cd (diff) |
lz4 improvementslz4-refactoring
v4.11 upgraded the lz4 code so these changes probably aren't needed
anymore - but I like my simplifications and refactoring, might want to
keep this code around.
-rw-r--r-- | lib/lz4/lz4_compress.c | 505 | ||||
-rw-r--r-- | lib/lz4/lz4_decompress.c | 295 | ||||
-rw-r--r-- | lib/lz4/lz4defs.h | 273 | ||||
-rw-r--r-- | lib/lz4/lz4hc_compress.c | 129 |
4 files changed, 449 insertions, 753 deletions
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c index 28321d8f75ef..dbd36e250d74 100644 --- a/lib/lz4/lz4_compress.c +++ b/lib/lz4/lz4_compress.c @@ -40,402 +40,187 @@ #include <asm/unaligned.h> #include "lz4defs.h" -/* - * LZ4_compressCtx : - * ----------------- - * Compress 'isize' bytes from 'source' into an output buffer 'dest' of - * maximum size 'maxOutputSize'. * If it cannot achieve it, compression - * will stop, and result of the function will be zero. - * return : the number of bytes written in buffer 'dest', or 0 if the - * compression fails - */ -static inline int lz4_compressctx(void *ctx, - const char *source, - char *dest, - int isize, - int maxoutputsize) +#define LZ4_HASH_VALUE(p, _table) \ + __HASH_VALUE(p, MEMORY_USAGE - ilog2(sizeof(_table[0]))) + +struct lz4_hash_table { + const u8 *(*add)(const struct lz4_hash_table, const u8 *); + void *ctx; + const u8 *base; +}; + +#if __SIZEOF_POINTER__ == 4 +static inline const u8 *hash_table_add32(const struct lz4_hash_table hash, + const u8 *ip) { - HTYPE *hashtable = (HTYPE *)ctx; - const u8 *ip = (u8 *)source; -#if LZ4_ARCH64 - const BYTE * const base = ip; -#else - const int base = 0; -#endif - const u8 *anchor = ip; - const u8 *const iend = ip + isize; - const u8 *const mflimit = iend - MFLIMIT; - #define MATCHLIMIT (iend - LASTLITERALS) + const u8 **table = hash.ctx; - u8 *op = (u8 *) dest; - u8 *const oend = op + maxoutputsize; - int length; - const int skipstrength = SKIPSTRENGTH; - u32 forwardh; - int lastrun; + swap(table[LZ4_HASH_VALUE(ip, table)], ip); + return ip; +} +#else +static inline const u8 *hash_table_add32(const struct lz4_hash_table hash, + const u8 *ip) +{ + u32 *table = hash.ctx; + size_t offset = ip - hash.base; - /* Init */ - if (isize < MINLENGTH) - goto _last_literals; + swap(table[LZ4_HASH_VALUE(ip, table)], offset); + return hash.base + offset; +} +#endif - memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); +static inline const u8 *hash_table_add16(const struct lz4_hash_table hash, + const u8 *ip) +{ + u16 *table = hash.ctx; + size_t offset = ip - hash.base; - /* First Byte */ - hashtable[LZ4_HASH_VALUE(ip)] = ip - base; - ip++; - forwardh = LZ4_HASH_VALUE(ip); - - /* Main Loop */ - for (;;) { - int findmatchattempts = (1U << skipstrength) + 3; - const u8 *forwardip = ip; - const u8 *ref; - u8 *token; - - /* Find a match */ - do { - u32 h = forwardh; - int step = findmatchattempts++ >> skipstrength; - ip = forwardip; - forwardip = ip + step; - - if (unlikely(forwardip > mflimit)) - goto _last_literals; - - forwardh = LZ4_HASH_VALUE(forwardip); - ref = base + hashtable[h]; - hashtable[h] = ip - base; - } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); - - /* Catch up */ - while ((ip > anchor) && (ref > (u8 *)source) && - unlikely(ip[-1] == ref[-1])) { - ip--; - ref--; - } + swap(table[LZ4_HASH_VALUE(ip, table)], offset); + return hash.base + offset; +} - /* Encode Literal length */ - length = (int)(ip - anchor); - token = op++; - /* check output limit */ - if (unlikely(op + length + (2 + 1 + LASTLITERALS) + - (length >> 8) > oend)) - return 0; - - if (length >= (int)RUN_MASK) { - int len; - *token = (RUN_MASK << ML_BITS); - len = length - RUN_MASK; - for (; len > 254 ; len -= 255) - *op++ = 255; - *op++ = (u8)len; - } else - *token = (length << ML_BITS); - - /* Copy Literals */ - LZ4_BLINDCOPY(anchor, op, length); -_next_match: - /* Encode Offset */ - LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); - - /* Start Counting */ - ip += MINMATCH; - /* MinMatch verified */ - ref += MINMATCH; - anchor = ip; - while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) { - #if LZ4_ARCH64 - u64 diff = A64(ref) ^ A64(ip); - #else - u32 diff = A32(ref) ^ A32(ip); - #endif - if (!diff) { - ip += STEPSIZE; - ref += STEPSIZE; - continue; - } - ip += LZ4_NBCOMMONBYTES(diff); - goto _endcount; - } - #if LZ4_ARCH64 - if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { - ip += 4; - ref += 4; - } - #endif - if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { - ip += 2; - ref += 2; - } - if ((ip < MATCHLIMIT) && (*ref == *ip)) - ip++; -_endcount: - /* Encode MatchLength */ - length = (int)(ip - anchor); - /* Check output limit */ - if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend)) - return 0; - if (length >= (int)ML_MASK) { - *token += ML_MASK; - length -= ML_MASK; - for (; length > 509 ; length -= 510) { - *op++ = 255; - *op++ = 255; - } - if (length > 254) { - length -= 255; - *op++ = 255; +static inline const u8 *find_match(const struct lz4_hash_table hash, + const u8 **ip, const u8 *anchor, + const u8 *start, const u8 *mflimit) +{ + int findmatchattempts = (1U << SKIPSTRENGTH) + 3; + + while (*ip <= mflimit) { + const u8 *ref = hash.add(hash, *ip); + + if (ref >= *ip - MAX_DISTANCE && A32(ref) == A32(*ip)) { + /* found match: */ + while (*ip > anchor && + ref > start && + unlikely((*ip)[-1] == ref[-1])) { + (*ip)--; + ref--; } - *op++ = (u8)length; - } else - *token += length; - /* Test end of chunk */ - if (ip > mflimit) { - anchor = ip; - break; + return ref; } - /* Fill table */ - hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base; - - /* Test next position */ - ref = base + hashtable[LZ4_HASH_VALUE(ip)]; - hashtable[LZ4_HASH_VALUE(ip)] = ip - base; - if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { - token = op++; - *token = 0; - goto _next_match; - } - - /* Prepare next loop */ - anchor = ip++; - forwardh = LZ4_HASH_VALUE(ip); + *ip += findmatchattempts++ >> SKIPSTRENGTH; } -_last_literals: - /* Encode Last Literals */ - lastrun = (int)(iend - anchor); - if (((char *)op - dest) + lastrun + 1 - + ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize) - return 0; - - if (lastrun >= (int)RUN_MASK) { - *op++ = (RUN_MASK << ML_BITS); - lastrun -= RUN_MASK; - for (; lastrun > 254 ; lastrun -= 255) - *op++ = 255; - *op++ = (u8)lastrun; - } else - *op++ = (lastrun << ML_BITS); - memcpy(op, anchor, iend - anchor); - op += iend - anchor; + return NULL; +} - /* End */ - return (int)(((char *)op) - dest); +static inline int length_len(unsigned length) +{ + return length / 255 + 1; } -static inline int lz4_compress64kctx(void *ctx, - const char *source, - char *dest, - int isize, - int maxoutputsize) +/* + * LZ4_compressCtx : + * ----------------- + * Compress 'isize' bytes from 'source' into an output buffer 'dest' of + * maximum size 'maxOutputSize'. * If it cannot achieve it, compression + * will stop, and result of the function will be zero. + * return : the number of bytes written in buffer 'dest', or 0 if the + * compression fails + */ +static inline int lz4_compressctx(const struct lz4_hash_table hash, + const u8 *src, size_t src_len, + u8 *dst, size_t *dst_len) { - u16 *hashtable = (u16 *)ctx; - const u8 *ip = (u8 *) source; - const u8 *anchor = ip; - const u8 *const base = ip; - const u8 *const iend = ip + isize; + const u8 *ip = src, *anchor = ip, *ref; + const u8 *const iend = ip + src_len; const u8 *const mflimit = iend - MFLIMIT; - #define MATCHLIMIT (iend - LASTLITERALS) - - u8 *op = (u8 *) dest; - u8 *const oend = op + maxoutputsize; - int len, length; - const int skipstrength = SKIPSTRENGTH; - u32 forwardh; - int lastrun; + const u8 *const matchlimit = iend - LASTLITERALS; + u8 *op = dst, *token; + u8 *const oend = op + *dst_len; + size_t literal_len, match_len, match_offset; /* Init */ - if (isize < MINLENGTH) - goto _last_literals; - - memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + memset(hash.ctx, 0, LZ4_MEM_COMPRESS); + hash.add(hash, ip); - /* First Byte */ + /* Always start with a literal: */ ip++; - forwardh = LZ4_HASH64K_VALUE(ip); - - /* Main Loop */ - for (;;) { - int findmatchattempts = (1U << skipstrength) + 3; - const u8 *forwardip = ip; - const u8 *ref; - u8 *token; - - /* Find a match */ - do { - u32 h = forwardh; - int step = findmatchattempts++ >> skipstrength; - ip = forwardip; - forwardip = ip + step; - - if (forwardip > mflimit) - goto _last_literals; - - forwardh = LZ4_HASH64K_VALUE(forwardip); - ref = base + hashtable[h]; - hashtable[h] = (u16)(ip - base); - } while (A32(ref) != A32(ip)); - - /* Catch up */ - while ((ip > anchor) && (ref > (u8 *)source) - && (ip[-1] == ref[-1])) { - ip--; - ref--; - } - /* Encode Literal length */ - length = (int)(ip - anchor); - token = op++; - /* Check output limit */ - if (unlikely(op + length + (2 + 1 + LASTLITERALS) - + (length >> 8) > oend)) - return 0; - if (length >= (int)RUN_MASK) { - *token = (RUN_MASK << ML_BITS); - len = length - RUN_MASK; - for (; len > 254 ; len -= 255) - *op++ = 255; - *op++ = (u8)len; - } else - *token = (length << ML_BITS); - - /* Copy Literals */ - LZ4_BLINDCOPY(anchor, op, length); - -_next_match: - /* Encode Offset */ - LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); - - /* Start Counting */ + while ((ref = find_match(hash, &ip, anchor, src, mflimit))) { + /* + * We found a match; @ip now points to the match and @ref points + * to the prior part of the input we matched with. Everything up + * to @anchor has been encoded; the range from @anchor to @ip + * didn't match and now has to be encoded as a literal: + */ + literal_len = ip - anchor; + match_offset = ip - ref; + + /* MINMATCH bytes already matched from find_match(): */ ip += MINMATCH; - /* MinMatch verified */ ref += MINMATCH; - anchor = ip; - - while (ip < MATCHLIMIT - (STEPSIZE - 1)) { - #if LZ4_ARCH64 - u64 diff = A64(ref) ^ A64(ip); - #else - u32 diff = A32(ref) ^ A32(ip); - #endif - - if (!diff) { - ip += STEPSIZE; - ref += STEPSIZE; - continue; - } - ip += LZ4_NBCOMMONBYTES(diff); - goto _endcount; - } - #if LZ4_ARCH64 - if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { - ip += 4; - ref += 4; - } - #endif - if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { - ip += 2; - ref += 2; - } - if ((ip < MATCHLIMIT) && (*ref == *ip)) - ip++; -_endcount: - - /* Encode MatchLength */ - len = (int)(ip - anchor); - /* Check output limit */ - if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) - return 0; - if (len >= (int)ML_MASK) { - *token += ML_MASK; - len -= ML_MASK; - for (; len > 509 ; len -= 510) { - *op++ = 255; - *op++ = 255; - } - if (len > 254) { - len -= 255; - *op++ = 255; - } - *op++ = (u8)len; - } else - *token += len; + match_len = common_length(ip, ref, matchlimit); + ip += match_len; - /* Test end of chunk */ - if (ip > mflimit) { - anchor = ip; + /* check output limit */ + if (unlikely(op + + 1 + /* token */ + 2 + /* match ofset */ + literal_len + + length_len(literal_len) + + length_len(match_len) + + LASTLITERALS > oend)) break; - } - /* Fill table */ - hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base); + token = op++; + *token = encode_length(&op, literal_len) << ML_BITS; + MEMCPY_ADVANCE_CHUNKED(op, anchor, literal_len); + PUT_LE16_ADVANCE(op, match_offset); + *token += encode_length(&op, match_len); - /* Test next position */ - ref = base + hashtable[LZ4_HASH64K_VALUE(ip)]; - hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base); - if (A32(ref) == A32(ip)) { - token = op++; - *token = 0; - goto _next_match; - } + anchor = ip; + } + + /* Encode remaining input as literal: */ + literal_len = iend - anchor; + if (unlikely(op + + 1 + + literal_len + + length_len(literal_len) > oend)) { + /* Return how much would be able to fit: */ + ssize_t remaining = oend - op; + ssize_t encoded = anchor - src; - /* Prepare next loop */ - anchor = ip++; - forwardh = LZ4_HASH64K_VALUE(ip); + remaining -= length_len(remaining) + 1; + + return -max(encoded + remaining, 1L); } -_last_literals: - /* Encode Last Literals */ - lastrun = (int)(iend - anchor); - if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend) - return 0; - if (lastrun >= (int)RUN_MASK) { - *op++ = (RUN_MASK << ML_BITS); - lastrun -= RUN_MASK; - for (; lastrun > 254 ; lastrun -= 255) - *op++ = 255; - *op++ = (u8)lastrun; - } else - *op++ = (lastrun << ML_BITS); - memcpy(op, anchor, iend - anchor); - op += iend - anchor; + token = op++; + *token = encode_length(&op, literal_len) << ML_BITS; + MEMCPY_ADVANCE(op, anchor, literal_len); + /* End */ - return (int)(((char *)op) - dest); + BUG_ON(op > oend); + *dst_len = op - dst; + return 0; } +__attribute__((flatten)) int lz4_compress(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len, void *wrkmem) + unsigned char *dst, size_t *dst_len, void *wrkmem) { - int ret = -1; - int out_len = 0; - - if (src_len < LZ4_64KLIMIT) - out_len = lz4_compress64kctx(wrkmem, src, dst, src_len, - lz4_compressbound(src_len)); - else - out_len = lz4_compressctx(wrkmem, src, dst, src_len, - lz4_compressbound(src_len)); - - if (out_len < 0) - goto exit; - - *dst_len = out_len; - - return 0; -exit: - return ret; + if (src_len < LZ4_64KLIMIT) { + const struct lz4_hash_table hash = { + .add = hash_table_add16, + .ctx = wrkmem, + .base = src, + }; + + return lz4_compressctx(hash, src, src_len, dst, dst_len); + } else { + const struct lz4_hash_table hash = { + .add = hash_table_add32, + .ctx = wrkmem, + .base = src, + }; + + return lz4_compressctx(hash, src, src_len, dst, dst_len); + } } EXPORT_SYMBOL(lz4_compress); diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 6d940c72b5fc..0f3e42dd01b0 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -43,111 +43,99 @@ #endif #include <linux/lz4.h> -#include <asm/unaligned.h> - #include "lz4defs.h" -static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +static const int dec32table[8] = {0, 3, 2, 3, 0, 0, 0, 0}; #if LZ4_ARCH64 -static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +static const int dec64table[8] = {0, 0, 0, -1, 0, 1, 2, 3}; +#else +static const int dec64table[8] = {0, 0, 0, 0, 0, 0, 0, 0}; #endif -static int lz4_uncompress(const char *source, char *dest, int osize) +static inline size_t get_length(const u8 **ip, size_t length) { - const BYTE *ip = (const BYTE *) source; - const BYTE *ref; - BYTE *op = (BYTE *) dest; - BYTE * const oend = op + osize; - BYTE *cpy; - unsigned token; - size_t length; + if (length == LENGTH_LONG) { + size_t len; - while (1) { + do { + length += (len = *(*ip)++); + } while (len == 255); + } + + return length; +} + +static int lz4_uncompress(const u8 *source, u8 *dest, int osize) +{ + const u8 *ip = source; + const u8 *ref; + u8 *op = dest; + u8 * const oend = op + osize; + u8 *cpy; + unsigned token, offset; + ssize_t length; + while (1) { /* get runlength */ token = *ip++; - length = (token >> ML_BITS); - if (length == RUN_MASK) { - size_t len; - - len = *ip++; - for (; len == 255; length += 255) - len = *ip++; - if (unlikely(length > (size_t)(length + len))) - goto _output_error; - length += len; - } + length = get_length(&ip, token >> ML_BITS); /* copy literals */ - cpy = op + length; - if (unlikely(cpy > oend - COPYLENGTH)) { + if (unlikely(op + length > oend - COPYLENGTH)) { /* * Error: not enough place for another match * (min 4) + 5 literals */ - if (cpy != oend) + if (op + length != oend) goto _output_error; - memcpy(op, ip, length); - ip += length; + MEMCPY_ADVANCE(op, ip, length); break; /* EOF */ } - LZ4_WILDCOPY(ip, op, cpy); - ip -= (op - cpy); - op = cpy; + MEMCPY_ADVANCE_CHUNKED(op, ip, length); - /* get offset */ - LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); - ip += 2; + /* get match offset */ + offset = GET_LE16_ADVANCE(ip); + ref = op - offset; /* Error: offset create reference outside destination buffer */ - if (unlikely(ref < (BYTE *const) dest)) + if (unlikely(ref < (u8 *const) dest)) goto _output_error; - /* get matchlength */ - length = token & ML_MASK; - if (length == ML_MASK) { - for (; *ip == 255; length += 255) - ip++; - if (unlikely(length > (size_t)(length + *ip))) - goto _output_error; - length += *ip++; - } + /* get match length */ + length = get_length(&ip, token & ML_MASK); + length += MINMATCH; - /* copy repeated sequence */ - if (unlikely((op - ref) < STEPSIZE)) { -#if LZ4_ARCH64 - int dec64 = dec64table[op - ref]; -#else - const int dec64 = 0; -#endif - op[0] = ref[0]; - op[1] = ref[1]; - op[2] = ref[2]; - op[3] = ref[3]; - op += 4; - ref += 4; - ref -= dec32table[op-ref]; - PUT4(ref, op); + /* copy first STEPSIZE bytes of match: */ + if (unlikely(offset < STEPSIZE)) { + MEMCPY_ADVANCE_BYTES(op, ref, 4); + ref -= dec32table[offset]; + + memcpy(op, ref, 4); op += STEPSIZE - 4; - ref -= dec64; + ref -= dec64table[offset]; } else { - LZ4_COPYSTEP(ref, op); + MEMCPY_ADVANCE(op, ref, STEPSIZE); } - cpy = op + length - (STEPSIZE - 4); - if (cpy > (oend - COPYLENGTH)) { + length -= STEPSIZE; + /* + * Note - length could have been < STEPSIZE; that's ok, length + * will now be negative and we'll just end up rewinding op: + */ + /* copy rest of match: */ + cpy = op + length; + if (cpy > oend - COPYLENGTH) { /* Error: request to write beyond destination buffer */ - if (cpy > oend) + if (cpy > oend || + ref + COPYLENGTH > oend) goto _output_error; -#if LZ4_ARCH64 - if ((ref + COPYLENGTH) > oend) -#else - if ((ref + COPYLENGTH) > oend || - (op + COPYLENGTH) > oend) -#endif +#if !LZ4_ARCH64 + if (op + COPYLENGTH > oend) goto _output_error; - LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); +#endif + MEMCPY_ADVANCE_CHUNKED_NOFIXUP(op, ref, oend - COPYLENGTH); + /* op could be > cpy here */ while (op < cpy) *op++ = *ref++; op = cpy; @@ -157,55 +145,60 @@ static int lz4_uncompress(const char *source, char *dest, int osize) */ if (op == oend) goto _output_error; - continue; + } else { + MEMCPY_ADVANCE_CHUNKED(op, ref, length); } - LZ4_SECURECOPY(ref, op, cpy); - op = cpy; /* correction */ } /* end of decoding */ - return (int) (((char *)ip) - source); + return ip - source; /* write overflow error detected */ _output_error: return -1; } -static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, - int isize, size_t maxoutputsize) +static inline ssize_t get_length_safe(const u8 **ip, ssize_t length) { - const BYTE *ip = (const BYTE *) source; - const BYTE *const iend = ip + isize; - const BYTE *ref; + if (length == 15) { + size_t len; + do { + length += (len = *(*ip)++); + if (unlikely((ssize_t) length < 0)) + return -1; - BYTE *op = (BYTE *) dest; - BYTE * const oend = op + maxoutputsize; - BYTE *cpy; + length += len; + } while (len == 255); + } - /* Main Loop */ - while (ip < iend) { + return length; +} - unsigned token; - size_t length; +static int lz4_uncompress_unknownoutputsize(const u8 *source, u8 *dest, + int isize, size_t maxoutputsize) +{ + const u8 *ip = source; + const u8 *const iend = ip + isize; + const u8 *ref; + u8 *op = dest; + u8 * const oend = op + maxoutputsize; + u8 *cpy; + unsigned token, offset; + size_t length; + /* Main Loop */ + while (ip < iend) { /* get runlength */ token = *ip++; - length = (token >> ML_BITS); - if (length == RUN_MASK) { - int s = 255; - while ((ip < iend) && (s == 255)) { - s = *ip++; - if (unlikely(length > (size_t)(length + s))) - goto _output_error; - length += s; - } - } + length = get_length_safe(&ip, token >> ML_BITS); + if (unlikely((ssize_t) length < 0)) + goto _output_error; + /* copy literals */ - cpy = op + length; - if ((cpy > oend - COPYLENGTH) || - (ip + length > iend - COPYLENGTH)) { + if ((op + length > oend - COPYLENGTH) || + (ip + length > iend - COPYLENGTH)) { - if (cpy > oend) + if (op + length > oend) goto _output_error;/* writes beyond buffer */ if (ip + length != iend) @@ -214,70 +207,51 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, * to consume all input * at this stage */ - memcpy(op, ip, length); - op += length; + MEMCPY_ADVANCE(op, ip, length); break;/* Necessarily EOF, due to parsing restrictions */ } - LZ4_WILDCOPY(ip, op, cpy); - ip -= (op - cpy); - op = cpy; - - /* get offset */ - LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); - ip += 2; - if (ref < (BYTE * const) dest) + MEMCPY_ADVANCE_CHUNKED(op, ip, length); + + /* get match offset */ + offset = GET_LE16_ADVANCE(ip); + ref = op - offset; + + /* Error: offset create reference outside destination buffer */ + if (ref < (u8 * const) dest) goto _output_error; - /* - * Error : offset creates reference - * outside of destination buffer - */ - /* get matchlength */ - length = (token & ML_MASK); - if (length == ML_MASK) { - while (ip < iend) { - int s = *ip++; - if (unlikely(length > (size_t)(length + s))) - goto _output_error; - length += s; - if (s == 255) - continue; - break; - } - } + /* get match length */ + length = get_length_safe(&ip, token & ML_MASK); + if (unlikely((ssize_t) length < 0)) + goto _output_error; - /* copy repeated sequence */ - if (unlikely((op - ref) < STEPSIZE)) { -#if LZ4_ARCH64 - int dec64 = dec64table[op - ref]; -#else - const int dec64 = 0; -#endif - op[0] = ref[0]; - op[1] = ref[1]; - op[2] = ref[2]; - op[3] = ref[3]; - op += 4; - ref += 4; - ref -= dec32table[op - ref]; - PUT4(ref, op); - op += STEPSIZE - 4; - ref -= dec64; + length += MINMATCH; + + /* copy first STEPSIZE bytes of match: */ + if (unlikely(offset < STEPSIZE)) { + MEMCPY_ADVANCE_BYTES(op, ref, 4); + ref -= dec32table[offset]; + + memcpy(op, ref, 4); + op += STEPSIZE - 4; + ref -= dec64table[offset]; } else { - LZ4_COPYSTEP(ref, op); + MEMCPY_ADVANCE(op, ref, STEPSIZE); } - cpy = op + length - (STEPSIZE-4); + length -= STEPSIZE; + + /* copy rest of match: */ + cpy = op + length; if (cpy > oend - COPYLENGTH) { - if (cpy > oend) - goto _output_error; /* write outside of buf */ -#if LZ4_ARCH64 - if ((ref + COPYLENGTH) > oend) -#else - if ((ref + COPYLENGTH) > oend || - (op + COPYLENGTH) > oend) -#endif + /* Error: request to write beyond destination buffer */ + if (cpy > oend || + ref + COPYLENGTH > oend) + goto _output_error; +#if !LZ4_ARCH64 + if (op + COPYLENGTH > oend) goto _output_error; - LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); +#endif + MEMCPY_ADVANCE_CHUNKED_NOFIXUP(op, ref, oend - COPYLENGTH); while (op < cpy) *op++ = *ref++; op = cpy; @@ -287,13 +261,12 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, */ if (op == oend) goto _output_error; - continue; + } else { + MEMCPY_ADVANCE_CHUNKED(op, ref, length); } - LZ4_SECURECOPY(ref, op, cpy); - op = cpy; /* correction */ } /* end of decoding */ - return (int) (((char *) op) - dest); + return op - dest; /* write overflow error detected */ _output_error: diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index c79d7ea8a38e..66deff6342fe 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -17,141 +17,164 @@ #define LZ4_ARCH64 0 #endif +#include <asm/unaligned.h> + +#define A32(_p) get_unaligned((u32 *) (_p)) +#define A16(_p) get_unaligned((u16 *) (_p)) + +#define GET_LE16_ADVANCE(_src) \ +({ \ + u16 _r = get_unaligned_le16(_src); \ + (_src) += 2; \ + _r; \ +}) + +#define PUT_LE16_ADVANCE(_dst, _v) \ +do { \ + put_unaligned_le16((_v), (_dst)); \ + (_dst) += 2; \ +} while (0) + +#define LENGTH_LONG 15 +#define COPYLENGTH 8 +#define ML_BITS 4 +#define ML_MASK ((1U << ML_BITS) - 1) +#define RUN_BITS (8 - ML_BITS) +#define RUN_MASK ((1U << RUN_BITS) - 1) +#define MEMORY_USAGE 14 +#define MINMATCH 4 +#define SKIPSTRENGTH 6 +#define LASTLITERALS 5 +#define MFLIMIT (COPYLENGTH + MINMATCH) +#define MINLENGTH (MFLIMIT + 1) +#define MAXD_LOG 16 +#define MAXD (1 << MAXD_LOG) +#define MAXD_MASK (u32)(MAXD - 1) +#define MAX_DISTANCE (MAXD - 1) +#define HASH_LOG (MAXD_LOG - 1) +#define HASHTABLESIZE (1 << HASH_LOG) +#define MAX_NB_ATTEMPTS 256 +#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) +#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1)) + +#define __HASH_VALUE(p, bits) \ + (((A32(p)) * 2654435761U) >> (32 - (bits))) + +#define HASH_VALUE(p) __HASH_VALUE(p, HASH_LOG) + +#define MEMCPY_ADVANCE(_dst, _src, length) \ +do { \ + typeof(length) _length = (length); \ + memcpy(_dst, _src, _length); \ + _src += _length; \ + _dst += _length; \ +} while (0) + +#define MEMCPY_ADVANCE_BYTES(_dst, _src, _length) \ +do { \ + const u8 *_end = (_src) + (_length); \ + while ((_src) < _end) \ + *_dst++ = *_src++; \ +} while (0) + +#define STEPSIZE __SIZEOF_LONG__ + +#define LZ4_COPYPACKET(_src, _dst) \ +do { \ + MEMCPY_ADVANCE(_dst, _src, STEPSIZE); \ + MEMCPY_ADVANCE(_dst, _src, COPYLENGTH - STEPSIZE);\ +} while (0) + /* - * Architecture-specific macros + * Equivalent to MEMCPY_ADVANCE - except may overrun @_dst and @_src by + * COPYLENGTH: + * + * Note: src and dst may overlap (with src < dst) - we must do the copy in + * STEPSIZE chunks for correctness + * + * Note also: length may be negative - we must not call memcpy if length is + * negative, but still adjust dst and src by length */ -#define BYTE u8 -typedef struct _U16_S { u16 v; } U16_S; -typedef struct _U32_S { u32 v; } U32_S; -typedef struct _U64_S { u64 v; } U64_S; -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) - -#define A16(x) (((U16_S *)(x))->v) -#define A32(x) (((U32_S *)(x))->v) -#define A64(x) (((U64_S *)(x))->v) - -#define PUT4(s, d) (A32(d) = A32(s)) -#define PUT8(s, d) (A64(d) = A64(s)) - -#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ - (d = s - A16(p)) - -#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ - do { \ - A16(p) = v; \ - p += 2; \ - } while (0) -#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ - -#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v)) -#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v)) -#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v)) - -#define PUT4(s, d) \ - put_unaligned(get_unaligned((const u32 *) s), (u32 *) d) -#define PUT8(s, d) \ - put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) - -#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ - (d = s - get_unaligned_le16(p)) - -#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ - do { \ - put_unaligned_le16(v, (u16 *)(p)); \ - p += 2; \ - } while (0) -#endif - -#define COPYLENGTH 8 -#define ML_BITS 4 -#define ML_MASK ((1U << ML_BITS) - 1) -#define RUN_BITS (8 - ML_BITS) -#define RUN_MASK ((1U << RUN_BITS) - 1) -#define MEMORY_USAGE 14 -#define MINMATCH 4 -#define SKIPSTRENGTH 6 -#define LASTLITERALS 5 -#define MFLIMIT (COPYLENGTH + MINMATCH) -#define MINLENGTH (MFLIMIT + 1) -#define MAXD_LOG 16 -#define MAXD (1 << MAXD_LOG) -#define MAXD_MASK (u32)(MAXD - 1) -#define MAX_DISTANCE (MAXD - 1) -#define HASH_LOG (MAXD_LOG - 1) -#define HASHTABLESIZE (1 << HASH_LOG) -#define MAX_NB_ATTEMPTS 256 -#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) -#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1)) -#define HASHLOG64K ((MEMORY_USAGE - 2) + 1) -#define HASH64KTABLESIZE (1U << HASHLOG64K) -#define LZ4_HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ - ((MINMATCH * 8) - (MEMORY_USAGE-2))) -#define LZ4_HASH64K_VALUE(p) (((A32(p)) * 2654435761U) >> \ - ((MINMATCH * 8) - HASHLOG64K)) -#define HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ - ((MINMATCH * 8) - HASH_LOG)) - -#if LZ4_ARCH64/* 64-bit */ -#define STEPSIZE 8 - -#define LZ4_COPYSTEP(s, d) \ - do { \ - PUT8(s, d); \ - d += 8; \ - s += 8; \ - } while (0) - -#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) - -#define LZ4_SECURECOPY(s, d, e) \ - do { \ - if (d < e) { \ - LZ4_WILDCOPY(s, d, e); \ - } \ - } while (0) -#define HTYPE u32 - -#ifdef __BIG_ENDIAN -#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3) +#define MEMCPY_ADVANCE_CHUNKED(_dst, _src, _length) \ +do { \ + u8 *_end = (_dst) + (_length); \ + while ((_dst) < _end) \ + LZ4_COPYPACKET(_src, _dst); \ + _src -= (_dst) - _end; \ + _dst = _end; \ +} while (0) + +#define MEMCPY_ADVANCE_CHUNKED_NOFIXUP(_dst, _src, _end)\ +do { \ + while ((_dst) < (_end)) \ + LZ4_COPYPACKET((_src), (_dst)); \ +} while (0) + +struct lz4_hashtable { +#if LZ4_ARCH64 + const u8 * const base; + u32 *table; #else -#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3) + const int base; + const u8 *table; #endif +}; +#if LZ4_ARCH64 +#define HTYPE u32 #else /* 32-bit */ -#define STEPSIZE 4 - -#define LZ4_COPYSTEP(s, d) \ - do { \ - PUT4(s, d); \ - d += 4; \ - s += 4; \ - } while (0) - -#define LZ4_COPYPACKET(s, d) \ - do { \ - LZ4_COPYSTEP(s, d); \ - LZ4_COPYSTEP(s, d); \ - } while (0) - -#define LZ4_SECURECOPY LZ4_WILDCOPY #define HTYPE const u8* +#endif #ifdef __BIG_ENDIAN -#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3) +#define LZ4_NBCOMMONBYTES(val) (__builtin_clzl(val) >> 3) #else -#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3) +#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzl(val) >> 3) #endif +static inline unsigned common_length(const u8 *l, const u8 *r, + const u8 *const l_end) +{ + const u8 *l_start = l; + + while (likely(l <= l_end - sizeof(long))) { + unsigned long diff = + get_unaligned((unsigned long *) l) ^ + get_unaligned((unsigned long *) r); + + if (diff) + return l + LZ4_NBCOMMONBYTES(diff) - l_start; + + l += sizeof(long); + r += sizeof(long); + } +#if LZ4_ARCH64 + if (l <= l_end - 4 && A32(r) == A32(l)) { + l += 4; + r += 4; + } #endif - -#define LZ4_WILDCOPY(s, d, e) \ - do { \ - LZ4_COPYPACKET(s, d); \ - } while (d < e) - -#define LZ4_BLINDCOPY(s, d, l) \ - do { \ - u8 *e = (d) + l; \ - LZ4_WILDCOPY(s, d, e); \ - d = e; \ - } while (0) + if (l <= l_end - 2 && A16(r) == A16(l)) { + l += 2; + r += 2; + } + if (l <= l_end - 1 && *r == *l) { + l++; + r++; + } + + return l - l_start; +} + +static inline unsigned encode_length(u8 **op, unsigned length) +{ + if (length >= LENGTH_LONG) { + length -= LENGTH_LONG; + + for (; length > 254 ; length -= 255) + *(*op)++ = 255; + *(*op)++ = length; + return LENGTH_LONG; + } else + return length; +} diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c index f344f76b6559..b64ded0d9867 100644 --- a/lib/lz4/lz4hc_compress.c +++ b/lib/lz4/lz4hc_compress.c @@ -67,7 +67,7 @@ static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip) u16 *chaintable = hc4->chaintable; HTYPE *hashtable = hc4->hashtable; #if LZ4_ARCH64 - const BYTE * const base = hc4->base; + const u8 * const base = hc4->base; #else const int base = 0; #endif @@ -83,41 +83,6 @@ static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip) } } -static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2, - const u8 *const matchlimit) -{ - const u8 *p1t = p1; - - while (p1t < matchlimit - (STEPSIZE - 1)) { -#if LZ4_ARCH64 - u64 diff = A64(p2) ^ A64(p1t); -#else - u32 diff = A32(p2) ^ A32(p1t); -#endif - if (!diff) { - p1t += STEPSIZE; - p2 += STEPSIZE; - continue; - } - p1t += LZ4_NBCOMMONBYTES(diff); - return p1t - p1; - } -#if LZ4_ARCH64 - if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) { - p1t += 4; - p2 += 4; - } -#endif - - if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) { - p1t += 2; - p2 += 2; - } - if ((p1t < matchlimit) && (*p2 == *p1t)) - p1t++; - return p1t - p1; -} - static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, const u8 *ip, const u8 *const matchlimit, const u8 **matchpos) { @@ -125,7 +90,7 @@ static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, HTYPE *const hashtable = hc4->hashtable; const u8 *ref; #if LZ4_ARCH64 - const BYTE * const base = hc4->base; + const u8 * const base = hc4->base; #else const int base = 0; #endif @@ -142,7 +107,7 @@ static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, /* confirmed */ if (A32(ref) == A32(ip)) { delta = (u16)(ip-ref); - repl = ml = lz4hc_commonlength(ip + MINMATCH, + repl = ml = common_length(ip + MINMATCH, ref + MINMATCH, matchlimit) + MINMATCH; *matchpos = ref; } @@ -154,7 +119,7 @@ static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, if (*(ref + ml) == *(ip + ml)) { if (A32(ref) == A32(ip)) { size_t mlt = - lz4hc_commonlength(ip + MINMATCH, + common_length(ip + MINMATCH, ref + MINMATCH, matchlimit) + MINMATCH; if (mlt > ml) { ml = mlt; @@ -167,8 +132,8 @@ static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, /* Complete table */ if (repl) { - const BYTE *ptr = ip; - const BYTE *end; + const u8 *ptr = ip; + const u8 *end; end = ip + repl - (MINMATCH-1); /* Pre-Load */ while (ptr < end - delta) { @@ -194,7 +159,7 @@ static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4, u16 *const chaintable = hc4->chaintable; HTYPE *const hashtable = hc4->hashtable; #if LZ4_ARCH64 - const BYTE * const base = hc4->base; + const u8 * const base = hc4->base; #else const int base = 0; #endif @@ -211,51 +176,23 @@ static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4, nbattempts--; if (*(startlimit + longest) == *(ref - delta + longest)) { if (A32(ref) == A32(ip)) { - const u8 *reft = ref + MINMATCH; - const u8 *ipt = ip + MINMATCH; + const u8 *reft = ref; const u8 *startt = ip; - - while (ipt < matchlimit-(STEPSIZE - 1)) { - #if LZ4_ARCH64 - u64 diff = A64(reft) ^ A64(ipt); - #else - u32 diff = A32(reft) ^ A32(ipt); - #endif - - if (!diff) { - ipt += STEPSIZE; - reft += STEPSIZE; - continue; - } - ipt += LZ4_NBCOMMONBYTES(diff); - goto _endcount; - } - #if LZ4_ARCH64 - if ((ipt < (matchlimit - 3)) - && (A32(reft) == A32(ipt))) { - ipt += 4; - reft += 4; - } - ipt += 2; - #endif - if ((ipt < (matchlimit - 1)) - && (A16(reft) == A16(ipt))) { - reft += 2; - } - if ((ipt < matchlimit) && (*reft == *ipt)) - ipt++; -_endcount: - reft = ref; + unsigned length = + common_length(ip + MINMATCH, + ref + MINMATCH, + matchlimit); while ((startt > startlimit) && (reft > hc4->base) && (startt[-1] == reft[-1])) { startt--; reft--; + length++; } - if ((ipt - startt) > longest) { - longest = (int)(ipt - startt); + if (length > longest) { + longest = length; *matchpos = reft; *startpos = startt; } @@ -269,43 +206,21 @@ _endcount: static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor, int ml, const u8 *ref) { - int length, len; + unsigned length; u8 *token; /* Encode Literal length */ - length = (int)(*ip - *anchor); + length = *ip - *anchor; token = (*op)++; - if (length >= (int)RUN_MASK) { - *token = (RUN_MASK << ML_BITS); - len = length - RUN_MASK; - for (; len > 254 ; len -= 255) - *(*op)++ = 255; - *(*op)++ = (u8)len; - } else - *token = (length << ML_BITS); + *token = encode_length(op, length) << ML_BITS; /* Copy Literals */ - LZ4_BLINDCOPY(*anchor, *op, length); + MEMCPY_ADVANCE_CHUNKED(*op, *anchor, length); /* Encode Offset */ - LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref)); - - /* Encode MatchLength */ - len = (int)(ml - MINMATCH); - if (len >= (int)ML_MASK) { - *token += ML_MASK; - len -= ML_MASK; - for (; len > 509 ; len -= 510) { - *(*op)++ = 255; - *(*op)++ = 255; - } - if (len > 254) { - len -= 255; - *(*op)++ = 255; - } - *(*op)++ = (u8)len; - } else - *token += len; + PUT_LE16_ADVANCE(*op, (u16)(*ip - ref)); + + *token += encode_length(op, ml - MINMATCH); /* Prepare next loop */ *ip += ml; |