From 027b728d4aafb5cf97abc804944a77b837b0f07f Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Tue, 6 Oct 2015 20:03:53 -0700 Subject: Add support for LZ4 decompression algorithm This patch adds support for LZ4-compressed FIT image contents. This algorithm has a slightly worse compression ration than LZO while being nearly twice as fast to decompress. When loading images from a fast storage medium this usually results in a boot time win. Sandbox-tested only since I don't have a U-Boot development system set up right now. The code was imported unchanged from coreboot where it's proven to work, though. I'm mostly interested in getting this recognized by mkImage for use in a downstream project. Signed-off-by: Julius Werner Acked-by: Simon Glass diff --git a/common/bootm.c b/common/bootm.c index c0d0d09..58936ca 100644 --- a/common/bootm.c +++ b/common/bootm.c @@ -389,6 +389,15 @@ int bootm_decomp_image(int comp, ulong load, ulong image_start, int type, break; } #endif /* CONFIG_LZO */ +#ifdef CONFIG_LZ4 + case IH_COMP_LZ4: { + size_t size = unc_len; + + ret = ulz4fn(image_buf, image_len, load_buf, &size); + image_len = size; + break; + } +#endif /* CONFIG_LZ4 */ default: printf("Unimplemented compression type %d\n", comp); return BOOTM_ERR_UNIMPLEMENTED; diff --git a/common/image.c b/common/image.c index 1325e07..c33749d 100644 --- a/common/image.c +++ b/common/image.c @@ -167,6 +167,7 @@ static const table_entry_t uimage_comp[] = { { IH_COMP_GZIP, "gzip", "gzip compressed", }, { IH_COMP_LZMA, "lzma", "lzma compressed", }, { IH_COMP_LZO, "lzo", "lzo compressed", }, + { IH_COMP_LZ4, "lz4", "lz4 compressed", }, { -1, "", "", }, }; diff --git a/configs/sandbox_defconfig b/configs/sandbox_defconfig index ae96b63..b2675c7 100644 --- a/configs/sandbox_defconfig +++ b/configs/sandbox_defconfig @@ -56,6 +56,7 @@ CONFIG_USB_STORAGE=y CONFIG_SYS_VSNPRINTF=y CONFIG_CMD_DHRYSTONE=y CONFIG_TPM=y +CONFIG_LZ4=y CONFIG_ERRNO_STR=y CONFIG_UNIT_TEST=y CONFIG_UT_TIME=y diff --git a/include/common.h b/include/common.h index 68b24d0..ecb1f06 100644 --- a/include/common.h +++ b/include/common.h @@ -826,6 +826,9 @@ int gzwrite(unsigned char *src, int len, u64 startoffs, u64 szexpected); +/* lib/lz4_wrapper.c */ +int ulz4fn(const void *src, size_t srcn, void *dst, size_t *dstn); + /* lib/qsort.c */ void qsort(void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *)); diff --git a/include/image.h b/include/image.h index 8a864ae..08ae24a 100644 --- a/include/image.h +++ b/include/image.h @@ -259,6 +259,7 @@ struct lmb; #define IH_COMP_BZIP2 2 /* bzip2 Compression Used */ #define IH_COMP_LZMA 3 /* lzma Compression Used */ #define IH_COMP_LZO 4 /* lzo Compression Used */ +#define IH_COMP_LZ4 5 /* lz4 Compression Used */ #define IH_MAGIC 0x27051956 /* Image Magic Number */ #define IH_NMLEN 32 /* Image Name Length */ diff --git a/lib/Kconfig b/lib/Kconfig index 0673072..a8f8460 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -100,6 +100,24 @@ config SHA_PROG_HW_ACCEL is performed in hardware. endmenu +menu "Compression Support" + +config LZ4 + bool "Enable LZ4 decompression support" + help + If this option is set, support for LZ4 compressed images + is included. The LZ4 algorithm can run in-place as long as the + compressed image is loaded to the end of the output buffer, and + trades lower compression ratios for much faster decompression. + + NOTE: This implements the release version of the LZ4 frame + format as generated by default by the 'lz4' command line tool. + This is not the same as the outdated, less efficient legacy + frame format currently (2015) implemented in the Linux kernel + (generated by 'lz4 -l'). The two formats are incompatible. + +endmenu + config ERRNO_STR bool "Enable function for getting errno-related string message" help diff --git a/lib/Makefile b/lib/Makefile index 96f832e..3eecefa 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_GZIP_COMPRESSED) += gzip.o obj-y += initcall.o obj-$(CONFIG_LMB) += lmb.o obj-y += ldiv.o +obj-$(CONFIG_LZ4) += lz4_wrapper.o obj-$(CONFIG_MD5) += md5.o obj-y += net_utils.o obj-$(CONFIG_PHYSMEM) += physmem.o diff --git a/lib/lz4.c b/lib/lz4.c new file mode 100644 index 0000000..f518341 --- /dev/null +++ b/lib/lz4.c @@ -0,0 +1,243 @@ +/* + LZ4 - Fast LZ compression algorithm + Copyright (C) 2011-2015, Yann Collet. + + SPDX-License-Identifier: BSD-2-Clause + + You can contact the author at : + - LZ4 source repository : https://github.com/Cyan4973/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + + +/************************************** +* Reading and writing into memory +**************************************/ + +/* customized version of memcpy, which may overwrite up to 7 bytes beyond dstEnd */ +static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* e = (BYTE*)dstEnd; + do { LZ4_copy8(d,s); d+=8; s+=8; } while (d oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */ + if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ + if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1); + + + /* Main Loop */ + while (1) + { + unsigned token; + size_t length; + const BYTE* match; + + /* get literal length */ + token = *ip++; + if ((length=(token>>ML_BITS)) == RUN_MASK) + { + unsigned s; + do + { + s = *ip++; + length += s; + } + while (likely((endOnInput)?ip(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) ) + || ((!endOnInput) && (cpy>oend-COPYLENGTH))) + { + if (partialDecoding) + { + if (cpy > oend) goto _output_error; /* Error : write attempt beyond end of output buffer */ + if ((endOnInput) && (ip+length > iend)) goto _output_error; /* Error : read attempt beyond end of input buffer */ + } + else + { + if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */ + if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; /* Error : input must be consumed */ + } + memcpy(op, ip, length); + ip += length; + op += length; + break; /* Necessarily EOF, due to parsing restrictions */ + } + LZ4_wildCopy(op, ip, cpy); + ip += length; op = cpy; + + /* get offset */ + match = cpy - LZ4_readLE16(ip); ip+=2; + if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error; /* Error : offset outside destination buffer */ + + /* get matchlength */ + length = token & ML_MASK; + if (length == ML_MASK) + { + unsigned s; + do + { + if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error; + s = *ip++; + length += s; + } while (s==255); + if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error; /* overflow detection */ + } + length += MINMATCH; + + /* check external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) + { + if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error; /* doesn't respect parsing restriction */ + + if (length <= (size_t)(lowPrefix-match)) + { + /* match can be copied as a single segment from external dictionary */ + match = dictEnd - (lowPrefix-match); + memmove(op, match, length); op += length; + } + else + { + /* match encompass external dictionary and current segment */ + size_t copySize = (size_t)(lowPrefix-match); + memcpy(op, dictEnd - copySize, copySize); + op += copySize; + copySize = length - copySize; + if (copySize > (size_t)(op-lowPrefix)) /* overlap within current segment */ + { + BYTE* const endOfMatch = op + copySize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) *op++ = *copyFrom++; + } + else + { + memcpy(op, lowPrefix, copySize); + op += copySize; + } + } + continue; + } + + /* copy repeated sequence */ + cpy = op + length; + if (unlikely((op-match)<8)) + { + const size_t dec64 = dec64table[op-match]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[op-match]; + LZ4_copy4(op+4, match); + op += 8; match -= dec64; + } else { LZ4_copy8(op, match); op+=8; match+=8; } + + if (unlikely(cpy>oend-12)) + { + if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals */ + if (op < oend-8) + { + LZ4_wildCopy(op, match, oend-8); + match += (oend-8) - op; + op = oend-8; + } + while (op +#include +#include +#include + +static u16 LZ4_readLE16(const void *src) { return le16_to_cpu(*(u16 *)src); } +static void LZ4_copy4(void *dst, const void *src) { *(u32 *)dst = *(u32 *)src; } +static void LZ4_copy8(void *dst, const void *src) { *(u64 *)dst = *(u64 *)src; } + +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; + +#define FORCE_INLINE static inline __attribute__((always_inline)) + +/* Unaltered (except removing unrelated code) from github.com/Cyan4973/lz4. */ +#include "lz4.c" /* #include for inlining, do not link! */ + +#define LZ4F_MAGIC 0x184D2204 + +struct lz4_frame_header { + u32 magic; + union { + u8 flags; + struct { + u8 reserved0:2; + u8 has_content_checksum:1; + u8 has_content_size:1; + u8 has_block_checksum:1; + u8 independent_blocks:1; + u8 version:2; + }; + }; + union { + u8 block_descriptor; + struct { + u8 reserved1:4; + u8 max_block_size:3; + u8 reserved2:1; + }; + }; + /* + u64 content_size iff has_content_size is set */ + /* + u8 header_checksum */ +} __packed; + +struct lz4_block_header { + union { + u32 raw; + struct { + u32 size:31; + u32 not_compressed:1; + }; + }; + /* + size bytes of data */ + /* + u32 block_checksum iff has_block_checksum is set */ +} __packed; + +int ulz4fn(const void *src, size_t srcn, void *dst, size_t *dstn) +{ + const void *end = dst + *dstn; + const void *in = src; + void *out = dst; + int has_block_checksum; + int ret; + *dstn = 0; + + { /* With in-place decompression the header may become invalid later. */ + const struct lz4_frame_header *h = in; + + if (srcn < sizeof(*h) + sizeof(u64) + sizeof(u8)) + return -EINVAL; /* input overrun */ + + /* We assume there's always only a single, standard frame. */ + if (le32_to_cpu(h->magic) != LZ4F_MAGIC || h->version != 1) + return -EPROTONOSUPPORT; /* unknown format */ + if (h->reserved0 || h->reserved1 || h->reserved2) + return -EINVAL; /* reserved must be zero */ + if (!h->independent_blocks) + return -EPROTONOSUPPORT; /* we can't support this yet */ + has_block_checksum = h->has_block_checksum; + + in += sizeof(*h); + if (h->has_content_size) + in += sizeof(u64); + in += sizeof(u8); + } + + while (1) { + struct lz4_block_header b = { .raw = le32_to_cpu(*(u32 *)in) }; + in += sizeof(struct lz4_block_header); + + if (in - src + b.size > srcn) { + ret = -EINVAL; /* input overrun */ + break; + } + + if (!b.size) { + ret = 0; /* decompression successful */ + break; + } + + if (b.not_compressed) { + size_t size = min((ptrdiff_t)b.size, end - out); + memcpy(out, in, size); + out += size; + if (size < b.size) { + ret = -ENOBUFS; /* output overrun */ + break; + } + } else { + /* constant folding essential, do not touch params! */ + ret = LZ4_decompress_generic(in, out, b.size, + end - out, endOnInputSize, + full, 0, noDict, out, NULL, 0); + if (ret < 0) { + ret = -EPROTO; /* decompression error */ + break; + } + out += ret; + } + + in += b.size; + if (has_block_checksum) + in += sizeof(u32); + } + + *dstn = out - dst; + return ret; +} diff --git a/test/compression.c b/test/compression.c index 7ef3a8c..be4e04e 100644 --- a/test/compression.c +++ b/test/compression.c @@ -95,6 +95,28 @@ static const char lzo_compressed[] = "\x73\x61\x67\x65\x73\x2e\x0a\x11\x00\x00\x00\x00\x00\x00"; static const unsigned long lzo_compressed_size = 334; +/* lz4 -z /tmp/plain.txt > /tmp/plain.lz4 */ +static const char lz4_compressed[] = + "\x04\x22\x4d\x18\x64\x70\xb9\x01\x01\x00\x00\xff\x19\x49\x20\x61" + "\x6d\x20\x61\x20\x68\x69\x67\x68\x6c\x79\x20\x63\x6f\x6d\x70\x72" + "\x65\x73\x73\x61\x62\x6c\x65\x20\x62\x69\x74\x20\x6f\x66\x20\x74" + "\x65\x78\x74\x2e\x0a\x28\x00\x3d\xf1\x25\x54\x68\x65\x72\x65\x20" + "\x61\x72\x65\x20\x6d\x61\x6e\x79\x20\x6c\x69\x6b\x65\x20\x6d\x65" + "\x2c\x20\x62\x75\x74\x20\x74\x68\x69\x73\x20\x6f\x6e\x65\x20\x69" + "\x73\x20\x6d\x69\x6e\x65\x2e\x0a\x49\x66\x20\x49\x20\x77\x32\x00" + "\xd1\x6e\x79\x20\x73\x68\x6f\x72\x74\x65\x72\x2c\x20\x74\x45\x00" + "\xf4\x0b\x77\x6f\x75\x6c\x64\x6e\x27\x74\x20\x62\x65\x20\x6d\x75" + "\x63\x68\x20\x73\x65\x6e\x73\x65\x20\x69\x6e\x0a\xcf\x00\x50\x69" + "\x6e\x67\x20\x6d\x12\x00\x00\x32\x00\xf0\x11\x20\x66\x69\x72\x73" + "\x74\x20\x70\x6c\x61\x63\x65\x2e\x20\x41\x74\x20\x6c\x65\x61\x73" + "\x74\x20\x77\x69\x74\x68\x20\x6c\x7a\x6f\x2c\x63\x00\xf5\x14\x77" + "\x61\x79\x2c\x0a\x77\x68\x69\x63\x68\x20\x61\x70\x70\x65\x61\x72" + "\x73\x20\x74\x6f\x20\x62\x65\x68\x61\x76\x65\x20\x70\x6f\x6f\x72" + "\x6c\x79\x4e\x00\x30\x61\x63\x65\x27\x01\x01\x95\x00\x01\x2d\x01" + "\xb0\x0a\x6d\x65\x73\x73\x61\x67\x65\x73\x2e\x0a\x00\x00\x00\x00" + "\x9d\x12\x8c\x9d"; +static const unsigned long lz4_compressed_size = 276; + #define TEST_BUFFER_SIZE 512 @@ -227,6 +249,39 @@ static int uncompress_using_lzo(void *in, unsigned long in_size, return (ret != LZO_E_OK); } +static int compress_using_lz4(void *in, unsigned long in_size, + void *out, unsigned long out_max, + unsigned long *out_size) +{ + /* There is no lz4 compression in u-boot, so fake it. */ + assert(in_size == strlen(plain)); + assert(memcmp(plain, in, in_size) == 0); + + if (lz4_compressed_size > out_max) + return -1; + + memcpy(out, lz4_compressed, lz4_compressed_size); + if (out_size) + *out_size = lz4_compressed_size; + + return 0; +} + +static int uncompress_using_lz4(void *in, unsigned long in_size, + void *out, unsigned long out_max, + unsigned long *out_size) +{ + int ret; + size_t input_size = in_size; + size_t output_size = out_max; + + ret = ulz4fn(in, input_size, out, &output_size); + if (out_size) + *out_size = output_size; + + return (ret != 0); +} + #define errcheck(statement) if (!(statement)) { \ fprintf(stderr, "\tFailed: %s\n", #statement); \ ret = 1; \ @@ -325,6 +380,7 @@ static int do_ut_compression(cmd_tbl_t *cmdtp, int flag, int argc, err += run_test("bzip2", compress_using_bzip2, uncompress_using_bzip2); err += run_test("lzma", compress_using_lzma, uncompress_using_lzma); err += run_test("lzo", compress_using_lzo, uncompress_using_lzo); + err += run_test("lz4", compress_using_lz4, uncompress_using_lz4); printf("ut_compression %s\n", err == 0 ? "ok" : "FAILED"); @@ -401,6 +457,7 @@ static int do_ut_image_decomp(cmd_tbl_t *cmdtp, int flag, int argc, err |= run_bootm_test(IH_COMP_BZIP2, compress_using_bzip2); err |= run_bootm_test(IH_COMP_LZMA, compress_using_lzma); err |= run_bootm_test(IH_COMP_LZO, compress_using_lzo); + err |= run_bootm_test(IH_COMP_LZ4, compress_using_lz4); err |= run_bootm_test(IH_COMP_NONE, compress_using_none); printf("ut_image_decomp %s\n", err == 0 ? "ok" : "FAILED"); -- cgit v0.10.2