@@ -309,7 +309,7 @@ static fileStats DiB_fileStats(const char** fileNamesTable, int nbFiles, size_t
309309int DiB_trainFromFiles (const char * dictFileName , size_t maxDictSize ,
310310 const char * * fileNamesTable , int nbFiles , size_t chunkSize ,
311311 ZDICT_legacy_params_t * params , ZDICT_cover_params_t * coverParams ,
312- ZDICT_fastCover_params_t * fastCoverParams , int optimize )
312+ ZDICT_fastCover_params_t * fastCoverParams , int optimize , unsigned memLimit )
313313{
314314 fileStats fs ;
315315 size_t * sampleSizes ; /* vector of sample sizes. Each sample can be up to SAMPLESIZE_MAX */
@@ -341,6 +341,11 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize,
341341 /* Limit the size of the training data to 2GB */
342342 /* TODO: there is opportunity to stop DiB_fileStats() early when the data limit is reached */
343343 loadedSize = (size_t )MIN ( MIN ((S64 )maxMem , fs .totalSizeToLoad ), MAX_SAMPLES_SIZE );
344+ if (memLimit != 0 ) {
345+ DISPLAYLEVEL (2 , "! Warning : setting manual memory limit for dictionary training data at %u MB \n" ,
346+ (unsigned )(memLimit / (1 MB )));
347+ loadedSize = (size_t )MIN (loadedSize , memLimit );
348+ }
344349 srcBuffer = malloc (loadedSize + NOISELENGTH );
345350 sampleSizes = (size_t * )malloc (fs .nbSamples * sizeof (size_t ));
346351 }
0 commit comments