Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dictionary-in-stream format in contrib #2349

Open
wants to merge 6 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ man:
contrib: lib
$(MAKE) -C contrib/pzstd all
$(MAKE) -C contrib/seekable_format/examples all
$(MAKE) -C contrib/dict_in_stream/examples all
$(MAKE) -C contrib/largeNbDicts all
cd contrib/single_file_libs/ ; ./build_decoder_test.sh
cd contrib/single_file_libs/ ; ./build_library_test.sh
Expand Down
42 changes: 42 additions & 0 deletions contrib/dict_in_stream/examples/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# ################################################################
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# ################################################################

# This Makefile presumes libzstd is built, using `make` in / or /lib/

ZSTDLIB_PATH = ../../../lib
ZSTDLIB_NAME = libzstd.a
ZSTDLIB = $(ZSTDLIB_PATH)/$(ZSTDLIB_NAME)

CPPFLAGS += -I.. -I../../seekable_format -I../../../lib -I../../../lib/common

CFLAGS ?= -O3
CFLAGS += -g

DIS_OBJS = ../zstddis_compress.c ../zstddis_decompress.c
SEEKABLE_OBJS = ../../seekable_format/zstdseek_compress.c ../../seekable_format/zstdseek_decompress.c

.PHONY: default all clean test

default: all

all: seekable_compression seekable_decompression

$(ZSTDLIB):
make -C $(ZSTDLIB_PATH) $(ZSTDLIB_NAME)

seekable_compression : seekable_compression.c $(DIS_OBJS) $(SEEKABLE_OBJS) $(ZSTDLIB)
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@

seekable_decompression : seekable_decompression.c $(DIS_OBJS) $(SEEKABLE_OBJS) $(ZSTDLIB)
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@

clean:
@rm -f core *.o tmp* result* *.zst \
seekable_compression seekable_decompression
@echo Cleaning completed
173 changes: 173 additions & 0 deletions contrib/dict_in_stream/examples/seekable_compression.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
/*
* Copyright (c) 2017-present, Facebook, Inc.
* Copyright (c) 2020 Sean Bartell
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/

#include <stdlib.h> // malloc, free, exit, atoi
#include <stdio.h> // fprintf, perror, feof, fopen, etc.
#include <string.h> // strlen, memset, strcat
#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h> // presumes zstd library is installed

#include "zstd_dict_in_stream.h"
#include "zstd_seekable.h"

static void* malloc_orDie(size_t size)
{
void* const buff = malloc(size);
if (buff) return buff;
/* error */
perror("malloc:");
exit(1);
}

static FILE* fopen_orDie(const char *filename, const char *instruction)
{
FILE* const inFile = fopen(filename, instruction);
if (inFile) return inFile;
/* error */
perror(filename);
exit(3);
}

static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
{
size_t const readSize = fread(buffer, 1, sizeToRead, file);
if (readSize == sizeToRead) return readSize; /* good */
if (feof(file)) return readSize; /* good, reached end of file */
/* error */
perror("fread");
exit(4);
}

static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
{
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
/* error */
perror("fwrite");
exit(5);
}

static size_t fclose_orDie(FILE* file)
{
if (!fclose(file)) return 0;
/* error */
perror("fclose");
exit(6);
}

static size_t fsize_orDie(FILE* file)
{
if (fseek(file, 0, SEEK_END)) {
perror("fseek");
exit(7);
}
long result = ftell(file);
if (result < 0) {
perror("ftell");
exit(8);
}
if (fseek(file, 0, SEEK_SET)) {
perror("fseek");
exit(9);
}
return result;
}

static void compressFile_orDie(const char* dictName, const char* fname, const char* outName, int cLevel, unsigned frameSize)
{
FILE* const fdict = fopen_orDie(dictName, "rb");
FILE* const fin = fopen_orDie(fname, "rb");
FILE* const fout = fopen_orDie(outName, "wb");
size_t const dictSize = fsize_orDie(fdict);
void* const dict = malloc_orDie(dictSize);
size_t const buffInSize = ZSTD_CStreamInSize(); /* can always read one full block */
void* const buffIn = malloc_orDie(buffInSize);
size_t const buffOutSize = ZSTD_CStreamOutSize(); /* can always flush a full block */
void* const buffOut = malloc_orDie(buffOutSize);

ZSTD_seekable_CStream* const cstream = ZSTD_seekable_createCStream();
if (cstream==NULL) { fprintf(stderr, "ZSTD_seekable_createCStream() error \n"); exit(10); }
size_t const initResult = ZSTD_seekable_initCStream(cstream, cLevel, 1, frameSize);
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_initCStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }

fread_orDie(dict, dictSize, fdict);
ZSTD_CDict* cdict = ZSTD_createCDict(dict, dictSize, cLevel);
ZSTD_seekable_refCDict(cstream, cdict);

size_t dictFrameSize = ZSTD_dict_in_stream_maxFrameSize(dict, dictSize);
if (ZSTD_isError(dictFrameSize)) { fprintf(stderr, "ZSTD_dict_in_stream_maxFrameSize() error : %s \n", ZSTD_getErrorName(dictFrameSize)); exit(14); }
void* const dictFrame = malloc_orDie(dictFrameSize);
dictFrameSize = ZSTD_dict_in_stream_createFrame(dictFrame, dictFrameSize, dict, dictSize, 5);
if (ZSTD_isError(dictFrameSize)) { fprintf(stderr, "ZSTD_dict_in_stream_createFrame() error : %s \n", ZSTD_getErrorName(dictFrameSize)); exit(15); }
fwrite_orDie(dictFrame, dictFrameSize, fout);
ZSTD_seekable_logFrame(ZSTD_seekable_getFrameLog(cstream), dictFrameSize, 0, 0);

size_t read, toRead = buffInSize;
while( (read = fread_orDie(buffIn, toRead, fin)) ) {
ZSTD_inBuffer input = { buffIn, read, 0 };
while (input.pos < input.size) {
ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
toRead = ZSTD_seekable_compressStream(cstream, &output , &input); /* toRead is guaranteed to be <= ZSTD_CStreamInSize() */
if (ZSTD_isError(toRead)) { fprintf(stderr, "ZSTD_seekable_compressStream() error : %s \n", ZSTD_getErrorName(toRead)); exit(12); }
if (toRead > buffInSize) toRead = buffInSize; /* Safely handle case when `buffInSize` is manually changed to a value < ZSTD_CStreamInSize()*/
fwrite_orDie(buffOut, output.pos, fout);
}
}

while (1) {
ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
size_t const remainingToFlush = ZSTD_seekable_endStream(cstream, &output); /* close stream */
if (ZSTD_isError(remainingToFlush)) { fprintf(stderr, "ZSTD_seekable_endStream() error : %s \n", ZSTD_getErrorName(remainingToFlush)); exit(13); }
fwrite_orDie(buffOut, output.pos, fout);
if (!remainingToFlush) break;
}

ZSTD_seekable_freeCStream(cstream);
ZSTD_freeCDict(cdict);
fclose_orDie(fout);
fclose_orDie(fin);
fclose_orDie(fdict);
free(dict);
free(dictFrame);
free(buffIn);
free(buffOut);
}

static char* createOutFilename_orDie(const char* filename)
{
size_t const inL = strlen(filename);
size_t const outL = inL + 5;
void* outSpace = malloc_orDie(outL);
memset(outSpace, 0, outL);
strcat(outSpace, filename);
strcat(outSpace, ".zst");
return (char*)outSpace;
}

int main(int argc, const char** argv) {
const char* const exeName = argv[0];
if (argc!=4) {
printf("wrong arguments\n");
printf("usage:\n");
printf("%s DICT_FILE FILE FRAME_SIZE\n", exeName);
return 1;
}

{ const char* const dictFileName = argv[1];
const char* const inFileName = argv[2];
unsigned const frameSize = (unsigned)atoi(argv[3]);

char* const outFileName = createOutFilename_orDie(inFileName);
compressFile_orDie(dictFileName, inFileName, outFileName, 10, frameSize);
free(outFileName);
}

return 0;
}
153 changes: 153 additions & 0 deletions contrib/dict_in_stream/examples/seekable_decompression.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/*
* Copyright (c) 2017-present, Facebook, Inc.
* Copyright (c) 2020 Sean Bartell
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/


#include <stdlib.h> // malloc, exit
#include <stdio.h> // fprintf, perror, feof
#include <string.h> // strerror
#include <errno.h> // errno
#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h> // presumes zstd library is installed
#include <zstd_errors.h>

#include "zstd_dict_in_stream.h"
#include "zstd_seekable.h"

#define MIN(a, b) ((a) < (b) ? (a) : (b))

static void* malloc_orDie(size_t size)
{
void* const buff = malloc(size);
if (buff) return buff;
/* error */
perror("malloc");
exit(1);
}

static void* realloc_orDie(void* ptr, size_t size)
{
ptr = realloc(ptr, size);
if (ptr) return ptr;
/* error */
perror("realloc");
exit(1);
}

static FILE* fopen_orDie(const char *filename, const char *instruction)
{
FILE* const inFile = fopen(filename, instruction);
if (inFile) return inFile;
/* error */
perror(filename);
exit(3);
}

static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
{
size_t const readSize = fread(buffer, 1, sizeToRead, file);
if (readSize == sizeToRead) return readSize; /* good */
if (feof(file)) return readSize; /* good, reached end of file */
/* error */
perror("fread");
exit(4);
}

static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
{
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
/* error */
perror("fwrite");
exit(5);
}

static size_t fclose_orDie(FILE* file)
{
if (!fclose(file)) return 0;
/* error */
perror("fclose");
exit(6);
}

static void fseek_orDie(FILE* file, long int offset, int origin) {
if (!fseek(file, offset, origin)) {
if (!fflush(file)) return;
}
/* error */
perror("fseek");
exit(7);
}


static void decompressFile_orDie(const char* fname, off_t startOffset, off_t endOffset)
{
FILE* const fin = fopen_orDie(fname, "rb");
FILE* const fout = stdout;
size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */
void* const buffOut = malloc_orDie(buffOutSize);

ZSTD_seekable* const seekable = ZSTD_seekable_create();
if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }

char header[ZSTD_DICT_IN_STREAM_HEADER_SIZE];
size_t const headerSize = fread_orDie(header, ZSTD_DICT_IN_STREAM_HEADER_SIZE, fin);
size_t const dictDataSize = ZSTD_dict_in_stream_getDataSize(header, headerSize);
if (ZSTD_isError(dictDataSize)) { fprintf(stderr, "ZSTD_dict_in_stream_getDataSize() error : %s \n", ZSTD_getErrorName(dictDataSize)); exit(13); }
void* const dictData = malloc_orDie(dictDataSize);
size_t const actualDictDataSize = fread_orDie(dictData, dictDataSize, fin);
ZSTD_DDict *ddict = ZSTD_dict_in_stream_createDDict(dictData, actualDictDataSize);
if (!ddict) { fprintf(stderr, "ZSTD_dict_in_stream_createDDict() error\n"); exit(14); }
free(dictData);

size_t const initResult = ZSTD_seekable_initFile(seekable, fin);
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
size_t const refResult = ZSTD_seekable_refDDict(seekable, ddict);
if (ZSTD_isError(refResult)) { fprintf(stderr, "ZSTD_seekable_refDDict() error : %s \n", ZSTD_getErrorName(refResult)); exit(15); }

while (startOffset < endOffset) {
size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset);

if (ZSTD_isError(result)) {
fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n",
ZSTD_getErrorName(result));
exit(12);
}
fwrite_orDie(buffOut, result, fout);
startOffset += result;
}

ZSTD_freeDDict(ddict);
ZSTD_seekable_free(seekable);
fclose_orDie(fin);
fclose_orDie(fout);
free(buffOut);
}


int main(int argc, const char** argv)
{
const char* const exeName = argv[0];

if (argc!=4) {
fprintf(stderr, "wrong arguments\n");
fprintf(stderr, "usage:\n");
fprintf(stderr, "%s FILE START END\n", exeName);
return 1;
}

{
const char* const inFilename = argv[1];
off_t const startOffset = atoll(argv[2]);
off_t const endOffset = atoll(argv[3]);
decompressFile_orDie(inFilename, startOffset, endOffset);
}

return 0;
}
Loading