lib/lz4: update LZ4 decompressor module

Update the LZ4 compression module based on LZ4 v1.8.3 in order for the erofs file system to use the newest LZ4_decompress_safe_partial() which can now decode exactly the nb of bytes requested [1] to take place of the open hacked code in the erofs file system itself. Currently, apart from the erofs file system, no other users use LZ4_decompress_safe_partial, so no worry about the interface. In addition, LZ4 v1.8.x boosts up decompression speed compared to the current code which is based on LZ4 v1.7.3, mainly due to shortcut optimization for the specific common LZ4-sequences [2]. lzbench testdata (tested in kirin710, 8 cores, 4 big cores at 2189Mhz, 2GB DDR RAM at 1622Mhz, with enwik8 testdata [3]): Compressor name Compress. Decompress. Compr. size Ratio Filename memcpy 5004 MB/s 4924 MB/s 100000000 100.00 enwik8 lz4hc 1.7.3 -9 12 MB/s 653 MB/s 42203253 42.20 enwik8 lz4hc 1.8.0 -9 12 MB/s 908 MB/s 42203096 42.20 enwik8 lz4hc 1.8.3 -9 11 MB/s 965 MB/s 42203094 42.20 enwik8 [1] https://github.com/lz4/lz4/issues/566 08d347b5b2 [2] v1.8.1 perf: slightly faster compression and decompression speed a31b7058cb v1.8.2 perf: slightly faster HC compression and decompression speed 45f8603aae 1a191b3f8d [3] http://mattmahoney.net/dc/textdata.html http://mattmahoney.net/dc/enwik8.zip Link: http://lkml.kernel.org/r/1537181207-21932-1-git-send-email-gaoxiang25@huawei.com Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> Tested-by: Guo Xuenan <guoxuenan@huawei.com> Cc: Colin Ian King <colin.king@canonical.com> Cc: Yann Collet <yann.collet.73@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Fang Wei <fangwei1@huawei.com> Cc: Chao Yu <yuchao0@huawei.com> Cc: Miao Xie <miaoxie@huawei.com> Cc: Sven Schmidt <4sschmid@informatik.uni-hamburg.de> Cc: Kyungsik Lee <kyungsik.lee@lge.com> Cc: <weidu.du@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2024-11-10 14:11:52 +00:00 · 2018-10-30 15:07:28 -07:00 · 2018-10-30 15:07:28 -07:00 · 2209fda323
commit 2209fda323
parent 8c81ddd2ac
2 changed files with 356 additions and 148 deletions
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@ -43,30 +43,36 @@
 /*-*****************************
 *	Decompression functions
 *******************************/
-/* LZ4_decompress_generic() :
+
- * This generic decompression function cover all use cases.
+#define DEBUGLOG(l, ...) {}	/* disabled */
- * It shall be instantiated several times, using different sets of directives
+
- * Note that it is important this generic function is really inlined,
+#ifndef assert
 #define assert(condition) ((void)0)
 #endif
 /*
 * LZ4_decompress_generic() :
 * This generic decompression function covers all use cases.
 * It shall be instantiated several times, using different sets of directives.
 * Note that it is important for performance that this function really get inlined,
 * in order to remove useless branches during compilation optimization.
 */
 static FORCE_INLINE int LZ4_decompress_generic(
-	 const char * const source,
+	 const char * const src,
-	 char * const dest,
+	 char * const dst,
-	 int inputSize,
+	 int srcSize,
 		/*
 		 * If endOnInput == endOnInputSize,
-		 * this value is the max size of Output Buffer.
+		 * this value is `dstCapacity`
 		 */
 	 int outputSize,
 	 /* endOnOutputSize, endOnInputSize */
-	 int endOnInput,
+	 endCondition_directive endOnInput,
 	 /* full, partial */
-	 int partialDecoding,
+	 earlyEnd_directive partialDecoding,
 	 /* only used if partialDecoding == partial */
 	 int targetOutputSize,
 	 /* noDict, withPrefix64k, usingExtDict */
-	 int dict,
+	 dict_directive dict,
-	 /* == dest when no prefix */
+	 /* always <= dst, == dst when no prefix */
 	 const BYTE * const lowPrefix,
 	 /* only if dict == usingExtDict */
 	 const BYTE * const dictStart,
@ -74,35 +80,43 @@ static FORCE_INLINE int LZ4_decompress_generic(
 	 const size_t dictSize
 	 )
 {
-	/* Local Variables */
+	const BYTE *ip = (const BYTE *) src;
-	const BYTE *ip = (const BYTE *) source;
+	const BYTE * const iend = ip + srcSize;
 	const BYTE * const iend = ip + inputSize;
-	BYTE *op = (BYTE *) dest;
+	BYTE *op = (BYTE *) dst;
 	BYTE * const oend = op + outputSize;
 	BYTE *cpy;
 	BYTE *oexit = op + targetOutputSize;
 	const BYTE * const lowLimit = lowPrefix - dictSize;
 	const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize;
-	static const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };
+	static const unsigned int inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
-	static const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 };
+	static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
 	const int safeDecode = (endOnInput == endOnInputSize);
 	const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB)));
 	/* Set up the "end" pointers for the shortcut. */
 	const BYTE *const shortiend = iend -
 		(endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
 	const BYTE *const shortoend = oend -
 		(endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
 	DEBUGLOG(5, "%s (srcSize:%i, dstSize:%i)", __func__,
 		 srcSize, outputSize);
 	/* Special cases */
-	/* targetOutputSize too high => decode everything */
+	assert(lowPrefix <= op);
-	if ((partialDecoding) && (oexit > oend - MFLIMIT))
+	assert(src != NULL);
 		oexit = oend - MFLIMIT;
 	/* Empty output buffer */
 	if ((endOnInput) && (unlikely(outputSize == 0)))
-		return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
+		return ((srcSize == 1) && (*ip == 0)) ? 0 : -1;
 	if ((!endOnInput) && (unlikely(outputSize == 0)))
 		return (*ip == 0 ? 1 : -1);
 	if ((endOnInput) && unlikely(srcSize == 0))
 		return -1;
 	/* Main Loop : decode sequences */
 	while (1) {
 		size_t length;
@ -111,12 +125,74 @@ static FORCE_INLINE int LZ4_decompress_generic(
 		/* get literal length */
 		unsigned int const token = *ip++;
 		length = token>>ML_BITS;
 		/* ip < iend before the increment */
 		assert(!endOnInput || ip <= iend);
 		/*
 		 * A two-stage shortcut for the most common case:
 		 * 1) If the literal length is 0..14, and there is enough
 		 * space, enter the shortcut and copy 16 bytes on behalf
 		 * of the literals (in the fast mode, only 8 bytes can be
 		 * safely copied this way).
 		 * 2) Further if the match length is 4..18, copy 18 bytes
 		 * in a similar manner; but we ensure that there's enough
 		 * space in the output for those 18 bytes earlier, upon
 		 * entering the shortcut (in other words, there is a
 		 * combined check for both stages).
 		 */
 		if ((endOnInput ? length != RUN_MASK : length <= 8)
 		   /*
 		    * strictly "less than" on input, to re-enter
 		    * the loop with at least one byte
 		    */
 		   && likely((endOnInput ? ip < shortiend : 1) &
 			     (op <= shortoend))) {
 			/* Copy the literals */
 			memcpy(op, ip, endOnInput ? 16 : 8);
 			op += length; ip += length;
 			/*
 			 * The second stage:
 			 * prepare for match copying, decode full info.
 			 * If it doesn't work out, the info won't be wasted.
 			 */
 			length = token & ML_MASK; /* match length */
 			offset = LZ4_readLE16(ip);
 			ip += 2;
 			match = op - offset;
 			assert(match <= op); /* check overflow */
 			/* Do not deal with overlapping matches. */
 			if ((length != ML_MASK) &&
 			    (offset >= 8) &&
 			    (dict == withPrefix64k || match >= lowPrefix)) {
 				/* Copy the match. */
 				memcpy(op + 0, match + 0, 8);
 				memcpy(op + 8, match + 8, 8);
 				memcpy(op + 16, match + 16, 2);
 				op += length + MINMATCH;
 				/* Both stages worked, load the next token. */
 				continue;
 			}
 			/*
 			 * The second stage didn't work out, but the info
 			 * is ready. Propel it right to the point of match
 			 * copying.
 			 */
 			goto _copy_match;
 		}
 		/* decode literal length */
 		if (length == RUN_MASK) {
 			unsigned int s;
 			if (unlikely(endOnInput ? ip >= iend - RUN_MASK : 0)) {
 				/* overflow detection */
 				goto _output_error;
 			}
 			do {
 				s = *ip++;
 				length += s;
@ -125,14 +201,14 @@ static FORCE_INLINE int LZ4_decompress_generic(
 				: 1) & (s == 255));
 			if ((safeDecode)
-				&& unlikely(
+			    && unlikely((uptrval)(op) +
-					(size_t)(op + length) < (size_t)(op))) {
+					length < (uptrval)(op))) {
 				/* overflow detection */
 				goto _output_error;
 			}
 			if ((safeDecode)
-				&& unlikely(
+			    && unlikely((uptrval)(ip) +
-					(size_t)(ip + length) < (size_t)(ip))) {
+					length < (uptrval)(ip))) {
 				/* overflow detection */
 				goto _output_error;
 			}
@ -140,16 +216,19 @@ static FORCE_INLINE int LZ4_decompress_generic(
 		/* copy literals */
 		cpy = op + length;
-		if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT))
+		LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
 		if (((endOnInput) && ((cpy > oend - MFLIMIT)
 			|| (ip + length > iend - (2 + 1 + LASTLITERALS))))
 			|| ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
 			if (partialDecoding) {
 				if (cpy > oend) {
 					/*
-					 * Error :
+					 * Partial decoding :
-					 * write attempt beyond end of output buffer
+					 * stop in the middle of literal segment
 					 */
-					goto _output_error;
+					cpy = oend;
 					length = oend - op;
 				}
 				if ((endOnInput)
 					&& (ip + length > iend)) {
@ -184,29 +263,43 @@ static FORCE_INLINE int LZ4_decompress_generic(
 			memcpy(op, ip, length);
 			ip += length;
 			op += length;
 			/* Necessarily EOF, due to parsing restrictions */
 			break;
 		}
 			/* Necessarily EOF, due to parsing restrictions */
 			if (!partialDecoding || (cpy == oend))
 				break;
 		} else {
 			/* may overwrite up to WILDCOPYLENGTH beyond cpy */
 			LZ4_wildCopy(op, ip, cpy);
 			ip += length;
 			op = cpy;
 		}
 		/* get offset */
 		offset = LZ4_readLE16(ip);
 		ip += 2;
 		match = op - offset;
-		if ((checkOffset) && (unlikely(match < lowLimit))) {
+		/* get matchlength */
 		length = token & ML_MASK;
 _copy_match:
 		if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) {
 			/* Error : offset outside buffers */
 			goto _output_error;
 		}
 		/* costs ~1%; silence an msan warning when offset == 0 */
-		LZ4_write32(op, (U32)offset);
+		/*
 		 * note : when partialDecoding, there is no guarantee that
 		 * at least 4 bytes remain available in output buffer
 		 */
 		if (!partialDecoding) {
 			assert(oend > op);
 			assert(oend - op >= 4);
 			LZ4_write32(op, (U32)offset);
 		}
 		/* get matchlength */
 		length = token & ML_MASK;
 		if (length == ML_MASK) {
 			unsigned int s;
@ -221,7 +314,7 @@ static FORCE_INLINE int LZ4_decompress_generic(
 			if ((safeDecode)
 				&& unlikely(
-					(size_t)(op + length) < (size_t)op)) {
+					(uptrval)(op) + length < (uptrval)op)) {
 				/* overflow detection */
 				goto _output_error;
 			}
@ -229,24 +322,26 @@ static FORCE_INLINE int LZ4_decompress_generic(
 		length += MINMATCH;
-		/* check external dictionary */
+		/* match starting within external dictionary */
 		if ((dict == usingExtDict) && (match < lowPrefix)) {
 			if (unlikely(op + length > oend - LASTLITERALS)) {
 				/* doesn't respect parsing restriction */
 				if (!partialDecoding)
 					goto _output_error;
 				length = min(length, (size_t)(oend - op));
 			}
 			if (length <= (size_t)(lowPrefix - match)) {
 				/*
-				 * match can be copied as a single segment
+				 * match fits entirely within external
-				 * from external dictionary
+				 * dictionary : just copy
 				 */
 				memmove(op, dictEnd - (lowPrefix - match),
 					length);
 				op += length;
 			} else {
 				/*
-				 * match encompass external
+				 * match stretches into both external
 				 * dictionary and current block
 				 */
 				size_t const copySize = (size_t)(lowPrefix - match);
@ -254,7 +349,6 @@ static FORCE_INLINE int LZ4_decompress_generic(
 				memcpy(op, dictEnd - copySize, copySize);
 				op += copySize;
 				if (restSize > (size_t)(op - lowPrefix)) {
 					/* overlap copy */
 					BYTE * const endOfMatch = op + restSize;
@ -267,23 +361,44 @@ static FORCE_INLINE int LZ4_decompress_generic(
 					op += restSize;
 				}
 			}
 			continue;
 		}
 		/* copy match within block */
 		cpy = op + length;
-		if (unlikely(offset < 8)) {
+		/*
-			const int dec64 = dec64table[offset];
+		 * partialDecoding :
 		 * may not respect endBlock parsing restrictions
 		 */
 		assert(op <= oend);
 		if (partialDecoding &&
 		    (cpy > oend - MATCH_SAFEGUARD_DISTANCE)) {
 			size_t const mlen = min(length, (size_t)(oend - op));
 			const BYTE * const matchEnd = match + mlen;
 			BYTE * const copyEnd = op + mlen;
 			if (matchEnd > op) {
 				/* overlap copy */
 				while (op < copyEnd)
 					*op++ = *match++;
 			} else {
 				memcpy(op, match, mlen);
 			}
 			op = copyEnd;
 			if (op == oend)
 				break;
 			continue;
 		}
 		if (unlikely(offset < 8)) {
 			op[0] = match[0];
 			op[1] = match[1];
 			op[2] = match[2];
 			op[3] = match[3];
-			match += dec32table[offset];
+			match += inc32table[offset];
 			memcpy(op + 4, match, 4);
-			match -= dec64;
+			match -= dec64table[offset];
 		} else {
 			LZ4_copy8(op, match);
 			match += 8;
@ -291,7 +406,7 @@ static FORCE_INLINE int LZ4_decompress_generic(
 		op += 8;
-		if (unlikely(cpy > oend - 12)) {
+		if (unlikely(cpy > oend - MATCH_SAFEGUARD_DISTANCE)) {
 			BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
 			if (cpy > oend - LASTLITERALS) {
@ -307,60 +422,139 @@ static FORCE_INLINE int LZ4_decompress_generic(
 				match += oCopyLimit - op;
 				op = oCopyLimit;
 			}
 			while (op < cpy)
 				*op++ = *match++;
 		} else {
 			LZ4_copy8(op, match);
 			if (length > 16)
 				LZ4_wildCopy(op + 8, match + 8, cpy);
 		}
-
+		op = cpy; /* wildcopy correction */
 		op = cpy; /* correction */
 	}
 	/* end of decoding */
 	if (endOnInput) {
 		/* Nb of output bytes decoded */
-		return (int) (((char *)op) - dest);
+		return (int) (((char *)op) - dst);
 	} else {
 		/* Nb of input bytes read */
-		return (int) (((const char *)ip) - source);
+		return (int) (((const char *)ip) - src);
 	}
 	/* Overflow error detected */
 _output_error:
-	return -1;
+	return (int) (-(((const char *)ip) - src)) - 1;
 }
 int LZ4_decompress_safe(const char *source, char *dest,
 	int compressedSize, int maxDecompressedSize)
 {
-	return LZ4_decompress_generic(source, dest, compressedSize,
+	return LZ4_decompress_generic(source, dest,
-		maxDecompressedSize, endOnInputSize, full, 0,
+				      compressedSize, maxDecompressedSize,
 				      endOnInputSize, decode_full_block,
 				      noDict, (BYTE *)dest, NULL, 0);
 }
-int LZ4_decompress_safe_partial(const char *source, char *dest,
+int LZ4_decompress_safe_partial(const char *src, char *dst,
-	int compressedSize, int targetOutputSize, int maxDecompressedSize)
+	int compressedSize, int targetOutputSize, int dstCapacity)
 {
-	return LZ4_decompress_generic(source, dest, compressedSize,
+	dstCapacity = min(targetOutputSize, dstCapacity);
-		maxDecompressedSize, endOnInputSize, partial,
+	return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
-		targetOutputSize, noDict, (BYTE *)dest, NULL, 0);
+				      endOnInputSize, partial_decode,
 				      noDict, (BYTE *)dst, NULL, 0);
 }
 int LZ4_decompress_fast(const char *source, char *dest, int originalSize)
 {
 	return LZ4_decompress_generic(source, dest, 0, originalSize,
-		endOnOutputSize, full, 0, withPrefix64k,
+				      endOnOutputSize, decode_full_block,
-		(BYTE *)(dest - 64 * KB), NULL, 64 * KB);
+				      withPrefix64k,
 				      (BYTE *)dest - 64 * KB, NULL, 0);
 }
 /* ===== Instantiate a few more decoding cases, used more than once. ===== */
 int LZ4_decompress_safe_withPrefix64k(const char *source, char *dest,
 				      int compressedSize, int maxOutputSize)
 {
 	return LZ4_decompress_generic(source, dest,
 				      compressedSize, maxOutputSize,
 				      endOnInputSize, decode_full_block,
 				      withPrefix64k,
 				      (BYTE *)dest - 64 * KB, NULL, 0);
 }
 static int LZ4_decompress_safe_withSmallPrefix(const char *source, char *dest,
 					       int compressedSize,
 					       int maxOutputSize,
 					       size_t prefixSize)
 {
 	return LZ4_decompress_generic(source, dest,
 				      compressedSize, maxOutputSize,
 				      endOnInputSize, decode_full_block,
 				      noDict,
 				      (BYTE *)dest - prefixSize, NULL, 0);
 }
 int LZ4_decompress_safe_forceExtDict(const char *source, char *dest,
 				     int compressedSize, int maxOutputSize,
 				     const void *dictStart, size_t dictSize)
 {
 	return LZ4_decompress_generic(source, dest,
 				      compressedSize, maxOutputSize,
 				      endOnInputSize, decode_full_block,
 				      usingExtDict, (BYTE *)dest,
 				      (const BYTE *)dictStart, dictSize);
 }
 static int LZ4_decompress_fast_extDict(const char *source, char *dest,
 				       int originalSize,
 				       const void *dictStart, size_t dictSize)
 {
 	return LZ4_decompress_generic(source, dest,
 				      0, originalSize,
 				      endOnOutputSize, decode_full_block,
 				      usingExtDict, (BYTE *)dest,
 				      (const BYTE *)dictStart, dictSize);
 }
 /*
 * The "double dictionary" mode, for use with e.g. ring buffers: the first part
 * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
 * These routines are used only once, in LZ4_decompress_*_continue().
 */
 static FORCE_INLINE
 int LZ4_decompress_safe_doubleDict(const char *source, char *dest,
 				   int compressedSize, int maxOutputSize,
 				   size_t prefixSize,
 				   const void *dictStart, size_t dictSize)
 {
 	return LZ4_decompress_generic(source, dest,
 				      compressedSize, maxOutputSize,
 				      endOnInputSize, decode_full_block,
 				      usingExtDict, (BYTE *)dest - prefixSize,
 				      (const BYTE *)dictStart, dictSize);
 }
 static FORCE_INLINE
 int LZ4_decompress_fast_doubleDict(const char *source, char *dest,
 				   int originalSize, size_t prefixSize,
 				   const void *dictStart, size_t dictSize)
 {
 	return LZ4_decompress_generic(source, dest,
 				      0, originalSize,
 				      endOnOutputSize, decode_full_block,
 				      usingExtDict, (BYTE *)dest - prefixSize,
 				      (const BYTE *)dictStart, dictSize);
 }
 /* ===== streaming decompression functions ===== */
 int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
 	const char *dictionary, int dictSize)
 {
-	LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode;
+	LZ4_streamDecode_t_internal *lz4sd =
 		&LZ4_streamDecode->internal_donotuse;
 	lz4sd->prefixSize = (size_t) dictSize;
 	lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize;
@ -382,30 +576,46 @@ int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
 int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
 	const char *source, char *dest, int compressedSize, int maxOutputSize)
 {
-	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+	LZ4_streamDecode_t_internal *lz4sd =
 		&LZ4_streamDecode->internal_donotuse;
 	int result;
-	if (lz4sd->prefixEnd == (BYTE *)dest) {
+	if (lz4sd->prefixSize == 0) {
-		result = LZ4_decompress_generic(source, dest,
+		/* The first call, no dictionary yet. */
-			compressedSize,
+		assert(lz4sd->extDictSize == 0);
-			maxOutputSize,
+		result = LZ4_decompress_safe(source, dest,
-			endOnInputSize, full, 0,
+			compressedSize, maxOutputSize);
-			usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize,
+		if (result <= 0)
-			lz4sd->externalDict,
+			return result;
-			lz4sd->extDictSize);
+		lz4sd->prefixSize = result;
-
+		lz4sd->prefixEnd = (BYTE *)dest + result;
 	} else if (lz4sd->prefixEnd == (BYTE *)dest) {
 		/* They're rolling the current segment. */
 		if (lz4sd->prefixSize >= 64 * KB - 1)
 			result = LZ4_decompress_safe_withPrefix64k(source, dest,
 				compressedSize, maxOutputSize);
 		else if (lz4sd->extDictSize == 0)
 			result = LZ4_decompress_safe_withSmallPrefix(source,
 				dest, compressedSize, maxOutputSize,
 				lz4sd->prefixSize);
 		else
 			result = LZ4_decompress_safe_doubleDict(source, dest,
 				compressedSize, maxOutputSize,
 				lz4sd->prefixSize,
 				lz4sd->externalDict, lz4sd->extDictSize);
 		if (result <= 0)
 			return result;
 		lz4sd->prefixSize += result;
 		lz4sd->prefixEnd  += result;
 	} else {
 		/*
 		 * The buffer wraps around, or they're
 		 * switching to another buffer.
 		 */
 		lz4sd->extDictSize = lz4sd->prefixSize;
 		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-		result = LZ4_decompress_generic(source, dest,
+		result = LZ4_decompress_safe_forceExtDict(source, dest,
 			compressedSize, maxOutputSize,
 			endOnInputSize, full, 0,
 			usingExtDict, (BYTE *)dest,
 			lz4sd->externalDict, lz4sd->extDictSize);
 		if (result <= 0)
 			return result;
@ -422,75 +632,66 @@ int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
 	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
 	int result;
-	if (lz4sd->prefixEnd == (BYTE *)dest) {
+	if (lz4sd->prefixSize == 0) {
-		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+		assert(lz4sd->extDictSize == 0);
-			endOnOutputSize, full, 0,
+		result = LZ4_decompress_fast(source, dest, originalSize);
-			usingExtDict,
+		if (result <= 0)
-			lz4sd->prefixEnd - lz4sd->prefixSize,
+			return result;
-			lz4sd->externalDict, lz4sd->extDictSize);
+		lz4sd->prefixSize = originalSize;
-
+		lz4sd->prefixEnd = (BYTE *)dest + originalSize;
 	} else if (lz4sd->prefixEnd == (BYTE *)dest) {
 		if (lz4sd->prefixSize >= 64 * KB - 1 ||
 		    lz4sd->extDictSize == 0)
 			result = LZ4_decompress_fast(source, dest,
 						     originalSize);
 		else
 			result = LZ4_decompress_fast_doubleDict(source, dest,
 				originalSize, lz4sd->prefixSize,
 				lz4sd->externalDict, lz4sd->extDictSize);
 		if (result <= 0)
 			return result;
 		lz4sd->prefixSize += originalSize;
 		lz4sd->prefixEnd  += originalSize;
 	} else {
 		lz4sd->extDictSize = lz4sd->prefixSize;
 		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+		result = LZ4_decompress_fast_extDict(source, dest,
-			endOnOutputSize, full, 0,
+			originalSize, lz4sd->externalDict, lz4sd->extDictSize);
 			usingExtDict, (BYTE *)dest,
 			lz4sd->externalDict, lz4sd->extDictSize);
 		if (result <= 0)
 			return result;
 		lz4sd->prefixSize = originalSize;
 		lz4sd->prefixEnd = (BYTE *)dest + originalSize;
 	}
 	return result;
 }
 /*
 * Advanced decoding functions :
 * *_usingDict() :
 * These decoding functions work the same as "_continue" ones,
 * the dictionary must be explicitly provided within parameters
 */
 static FORCE_INLINE int LZ4_decompress_usingDict_generic(const char *source,
 	char *dest, int compressedSize, int maxOutputSize, int safe,
 	const char *dictStart, int dictSize)
 {
 	if (dictSize == 0)
 		return LZ4_decompress_generic(source, dest,
 			compressedSize, maxOutputSize, safe, full, 0,
 			noDict, (BYTE *)dest, NULL, 0);
 	if (dictStart + dictSize == dest) {
 		if (dictSize >= (int)(64 * KB - 1))
 			return LZ4_decompress_generic(source, dest,
 				compressedSize, maxOutputSize, safe, full, 0,
 				withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0);
 		return LZ4_decompress_generic(source, dest, compressedSize,
 			maxOutputSize, safe, full, 0, noDict,
 			(BYTE *)dest - dictSize, NULL, 0);
 	}
 	return LZ4_decompress_generic(source, dest, compressedSize,
 		maxOutputSize, safe, full, 0, usingExtDict,
 		(BYTE *)dest, (const BYTE *)dictStart, dictSize);
 }
 int LZ4_decompress_safe_usingDict(const char *source, char *dest,
 				  int compressedSize, int maxOutputSize,
 				  const char *dictStart, int dictSize)
 {
-	return LZ4_decompress_usingDict_generic(source, dest,
+	if (dictSize == 0)
-		compressedSize, maxOutputSize, 1, dictStart, dictSize);
+		return LZ4_decompress_safe(source, dest,
 					   compressedSize, maxOutputSize);
 	if (dictStart+dictSize == dest) {
 		if (dictSize >= 64 * KB - 1)
 			return LZ4_decompress_safe_withPrefix64k(source, dest,
 				compressedSize, maxOutputSize);
 		return LZ4_decompress_safe_withSmallPrefix(source, dest,
 			compressedSize, maxOutputSize, dictSize);
 	}
 	return LZ4_decompress_safe_forceExtDict(source, dest,
 		compressedSize, maxOutputSize, dictStart, dictSize);
 }
 int LZ4_decompress_fast_usingDict(const char *source, char *dest,
-	int originalSize, const char *dictStart, int dictSize)
+				  int originalSize,
 				  const char *dictStart, int dictSize)
 {
-	return LZ4_decompress_usingDict_generic(source, dest, 0,
+	if (dictSize == 0 || dictStart + dictSize == dest)
-		originalSize, 0, dictStart, dictSize);
+		return LZ4_decompress_fast(source, dest, originalSize);
 	return LZ4_decompress_fast_extDict(source, dest, originalSize,
 		dictStart, dictSize);
 }
 #ifndef STATIC
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@ -75,6 +75,11 @@ typedef uintptr_t uptrval;
 #define WILDCOPYLENGTH 8
 #define LASTLITERALS 5
 #define MFLIMIT (WILDCOPYLENGTH + MINMATCH)
 /*
 * ensure it's possible to write 2 x wildcopyLength
 * without overflowing output buffer
 */
 #define MATCH_SAFEGUARD_DISTANCE  ((2 * WILDCOPYLENGTH) - MINMATCH)
 /* Increase this value ==> compression run slower on incompressible data */
 #define LZ4_SKIPTRIGGER 6
@ -222,6 +227,8 @@ typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
 typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
-typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
 #define LZ4_STATIC_ASSERT(c)	BUILD_BUG_ON(!(c))
 #endif