Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Minor compression ratio improvements for high compression modes on small data #2781

Draft
wants to merge 7 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -723,36 +723,36 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
case ZSTD_c_windowLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_windowLog, value);
CCtxParams->cParams.windowLog = (U32)value;
CCtxParams->cParams.windowLog = (unsigned)value;
return CCtxParams->cParams.windowLog;

case ZSTD_c_hashLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_hashLog, value);
CCtxParams->cParams.hashLog = (U32)value;
CCtxParams->cParams.hashLog = (unsigned)value;
return CCtxParams->cParams.hashLog;

case ZSTD_c_chainLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_chainLog, value);
CCtxParams->cParams.chainLog = (U32)value;
CCtxParams->cParams.chainLog = (unsigned)value;
return CCtxParams->cParams.chainLog;

case ZSTD_c_searchLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_searchLog, value);
CCtxParams->cParams.searchLog = (U32)value;
CCtxParams->cParams.searchLog = (unsigned)value;
return (size_t)value;

case ZSTD_c_minMatch :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_minMatch, value);
CCtxParams->cParams.minMatch = value;
CCtxParams->cParams.minMatch = (unsigned)value;
return CCtxParams->cParams.minMatch;

case ZSTD_c_targetLength :
BOUNDCHECK(ZSTD_c_targetLength, value);
CCtxParams->cParams.targetLength = value;
CCtxParams->cParams.targetLength = (unsigned)value;
return CCtxParams->cParams.targetLength;

case ZSTD_c_strategy :
Expand All @@ -765,12 +765,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
/* Content size written in frame header _when known_ (default:1) */
DEBUGLOG(4, "set content size flag = %u", (value!=0));
CCtxParams->fParams.contentSizeFlag = value != 0;
return CCtxParams->fParams.contentSizeFlag;
return (size_t)CCtxParams->fParams.contentSizeFlag;

case ZSTD_c_checksumFlag :
/* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
CCtxParams->fParams.checksumFlag = value != 0;
return CCtxParams->fParams.checksumFlag;
return (size_t)CCtxParams->fParams.checksumFlag;

case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
Expand All @@ -779,11 +779,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,

case ZSTD_c_forceMaxWindow :
CCtxParams->forceWindow = (value != 0);
return CCtxParams->forceWindow;
return (size_t)CCtxParams->forceWindow;

case ZSTD_c_forceAttachDict : {
const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
BOUNDCHECK(ZSTD_c_forceAttachDict, (int)pref);
CCtxParams->attachDictPref = pref;
return CCtxParams->attachDictPref;
}
Expand Down
8 changes: 4 additions & 4 deletions lib/compress/zstd_compress_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return (__builtin_ctzll((U64)val) >> 3);
return (unsigned)(__builtin_ctzll((U64)val) >> 3);
# else
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
0, 3, 1, 3, 1, 4, 2, 7,
Expand All @@ -693,7 +693,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
unsigned long r=0;
return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_ctz((U32)val) >> 3);
return (unsigned)(__builtin_ctz((U32)val) >> 3);
# else
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
3, 2, 2, 1, 3, 2, 0, 1,
Expand All @@ -712,7 +712,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0;
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return (__builtin_clzll(val) >> 3);
return (unsigned)(__builtin_clzll(val) >> 3);
# else
unsigned r;
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
Expand All @@ -726,7 +726,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
unsigned long r = 0;
return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_clz((U32)val) >> 3);
return (unsigned)(__builtin_clz((U32)val) >> 3);
# else
unsigned r;
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
Expand Down
3 changes: 1 addition & 2 deletions lib/compress/zstd_compress_literals.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);

/* small ? don't even attempt compression (speed opt) */
# define COMPRESS_LITERALS_SIZE_MIN 63
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
Expand Down Expand Up @@ -136,7 +135,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
switch(lhSize)
{
case 3: /* 2 - 2 - 10 - 10 */
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
{ U32 const lhc = hType + (((U32)!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc);
break;
}
Expand Down
5 changes: 5 additions & 0 deletions lib/compress/zstd_compress_literals.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */


/* Below this limit, the heuristic does not even attempt compression of literals,
* as the cost of the headers is expected to outweight the benefits.
* This limit is not applicable when re-using statistics from dictionary or previous block */
#define COMPRESS_LITERALS_SIZE_MIN 63

size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);

size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
Expand Down
Loading