Skip to content

Commit 9eb56a3

Browse files
committed
Fixed bug passing estimated number of samples rather insted of the loaded number of samples.
Changed unit conversion not to use bit-shifts.
1 parent a463506 commit 9eb56a3

File tree

1 file changed

+15
-13
lines changed

1 file changed

+15
-13
lines changed

programs/dibio.c

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,8 @@ static int DiB_loadFiles(
172172
fclose(f);
173173

174174
DISPLAYLEVEL(2, "\r%79s\r", "");
175-
DISPLAYLEVEL(4, "loaded %zuKB total data, %d nb samples \n", totalDataLoaded >> 10, nbSamplesLoaded );
175+
DISPLAYLEVEL(4, "Loaded %d KB total training data, %d nb samples \n",
176+
(int)(totalDataLoaded / (1 KB)), nbSamplesLoaded );
176177
*bufferSizePtr = totalDataLoaded;
177178
return nbSamplesLoaded;
178179
}
@@ -298,14 +299,14 @@ static fileStats DiB_fileStats(const char** fileNamesTable, int nbFiles, int chu
298299
fs.oneSampleTooLarge |= (fileSize > 2*SAMPLESIZE_MAX);
299300

300301
/* Limit to the first SAMPLESIZE_MAX (128kB) of the file */
301-
DISPLAYLEVEL(3, "Sample file '%s' is too large, limiting to %ukB",
302-
fileNamesTable[n], SAMPLESIZE_MAX >> 10);
302+
DISPLAYLEVEL(3, "Sample file '%s' is too large, limiting to %d KB",
303+
fileNamesTable[n], SAMPLESIZE_MAX / (1 KB));
303304
}
304305
fs.nbSamples += 1;
305306
fs.totalSizeToLoad += MIN(fileSize, SAMPLESIZE_MAX);
306307
}
307308
}
308-
DISPLAYLEVEL(4, "Training files are %lldKB, %d samples\n", fs.totalSizeToLoad >> 10, fs.nbSamples);
309+
DISPLAYLEVEL(4, "Found training data %d files, %d KB, %d samples\n", nbFiles, (int)(fs.totalSizeToLoad / (1 KB)), fs.nbSamples);
309310
return fs;
310311
}
311312

@@ -369,23 +370,24 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
369370
/* init */
370371
if ((S64)loadedSize < fs.totalSizeToLoad)
371372
DISPLAYLEVEL(1, "Training samples set too large (%u MB); training on %u MB only...\n",
372-
(unsigned)(fs.totalSizeToLoad >> 20),
373-
(unsigned)(loadedSize >> 20));
373+
(unsigned)(fs.totalSizeToLoad / (1 MB)),
374+
(unsigned)(loadedSize / (1 MB)));
374375

375376
/* Load input buffer */
376-
DiB_loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, fileNamesTable,
377-
nbFiles, chunkSize, displayLevel);
377+
int const nbSamplesLoaded = DiB_loadFiles(
378+
srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, fileNamesTable,
379+
nbFiles, chunkSize, displayLevel);
378380

379381
{ size_t dictSize;
380382
if (params) {
381383
DiB_fillNoise((char*)srcBuffer + loadedSize, NOISELENGTH); /* guard band, for end of buffer condition */
382384
dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize,
383-
srcBuffer, sampleSizes, fs.nbSamples,
385+
srcBuffer, sampleSizes, nbSamplesLoaded,
384386
*params);
385387
} else if (coverParams) {
386388
if (optimize) {
387389
dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize,
388-
srcBuffer, sampleSizes, fs.nbSamples,
390+
srcBuffer, sampleSizes, nbSamplesLoaded,
389391
coverParams);
390392
if (!ZDICT_isError(dictSize)) {
391393
unsigned splitPercentage = (unsigned)(coverParams->splitPoint * 100);
@@ -394,13 +396,13 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
394396
}
395397
} else {
396398
dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer,
397-
sampleSizes, fs.nbSamples, *coverParams);
399+
sampleSizes, nbSamplesLoaded, *coverParams);
398400
}
399401
} else {
400402
assert(fastCoverParams != NULL);
401403
if (optimize) {
402404
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize,
403-
srcBuffer, sampleSizes, fs.nbSamples,
405+
srcBuffer, sampleSizes, nbSamplesLoaded,
404406
fastCoverParams);
405407
if (!ZDICT_isError(dictSize)) {
406408
unsigned splitPercentage = (unsigned)(fastCoverParams->splitPoint * 100);
@@ -410,7 +412,7 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
410412
}
411413
} else {
412414
dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, srcBuffer,
413-
sampleSizes, fs.nbSamples, *fastCoverParams);
415+
sampleSizes, nbSamplesLoaded, *fastCoverParams);
414416
}
415417
}
416418
if (ZDICT_isError(dictSize)) {

0 commit comments

Comments
 (0)