Skip to content

Commit 2c4aac3

Browse files
disgregate skip cases during importation process. Fix bug changing the log level (#51)
1 parent d63a8c2 commit 2c4aac3

3 files changed

Lines changed: 23 additions & 23 deletions

File tree

src/import.c

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,7 @@ int ldb_import_csv(ldb_importation_config_t * job)
601601
/* Counters */
602602
uint32_t imported = 0;
603603
uint32_t skipped = 0;
604+
uint32_t skipped_invalid = 0;
604605

605606
uint64_t totalbytes = ldb_file_size(job->csv_path);
606607
size_t bytecounter = 0;
@@ -684,7 +685,7 @@ int ldb_import_csv(ldb_importation_config_t * job)
684685
return LDB_ERROR_THREAD_ABORT;
685686
}
686687

687-
if (skipped > (line_number * 4) / 5)
688+
if (skipped_invalid > line_number / 2)
688689
{
689690
log_info("Aborting %s import at line %d due to excessive number of skipped lines\n", job->csv_path,line_number);
690691
fclose(fp);
@@ -704,36 +705,36 @@ int ldb_import_csv(ldb_importation_config_t * job)
704705
if (lineln < job->opt.params.csv_fields)
705706
{
706707
log_debug("%s: Line %d -- Skipped, the line %s is too short (size %d)\n", job->csv_path, line_number, line, lineln);
707-
skipped++;
708+
skipped_invalid++;
708709
continue;
709710
}
710711
//skip lines starting with non alphanumeric char
711712
if (!isalnum(line[0]))
712713
{
713714
log_debug("%s: Line %d -- Skipped, Non alphanumeric char %d on line %s\n", job->csv_path, line_number, line[0], line);
714-
skipped++;
715+
skipped_invalid++;
715716
continue;
716717
}
717718
//skip keys with the incorrect lenght.
718719
char * first_comma = strchr(line, ',');
719720
if (!first_comma)
720721
{
721722
log_debug("%s: Line %d -- Skipped, wrong csv format on line %s .\n", job->csv_path, line_number, line);
722-
skipped++;
723+
skipped_invalid++;
723724
continue;
724725
}
725726
int key_len = first_comma - line;
726727
if (key_len != MD5_LEN_HEX && key_len != MD5_LEN_HEX - 2)
727728
{
728729
log_debug("%s: Line %d -- Skipped, %d Incorrect key lenght.\n", job->csv_path, line_number, key_len);
729-
skipped++;
730+
skipped_invalid++;
730731
continue;
731732
}
732733
/* Skip records with sizes out of range */
733734
if (lineln > MAX_CSV_LINE_LEN || lineln < min_line_size)
734735
{
735736
log_debug("%s: Line %d -- Skipped, %ld exceed MAX line size %d.\n", job->csv_path, line_number, lineln, MAX_CSV_LINE_LEN);
736-
skipped++;
737+
skipped_invalid++;
737738
continue;
738739
}
739740

@@ -744,7 +745,7 @@ int ldb_import_csv(ldb_importation_config_t * job)
744745
if (first_line_byte != first_byte)
745746
{
746747
log_info("%s: Line %d -- Skipped, first byte in file name does not match key first byte %02x != %02x.\n", job->csv_path, line_number, first_byte, first_line_byte);
747-
skipped++;
748+
skipped_invalid++;
748749
continue;
749750
}
750751
}
@@ -763,12 +764,11 @@ int ldb_import_csv(ldb_importation_config_t * job)
763764

764765
/* First CSV field is the data key. Data starts with the second CSV field */
765766
char *data = field_n(2, line);
766-
bool skip = false;
767767

768768
if (!data)
769769
{
770770
log_debug("%s: Line %d -- Skipped, data is missed %d.\n", job->csv_path, line_number);
771-
skipped++;
771+
skipped_invalid++;
772772
continue;
773773
}
774774

@@ -781,7 +781,8 @@ int ldb_import_csv(ldb_importation_config_t * job)
781781
if (dup_id && *last_url_id && !memcmp(data, last_url_id, MD5_LEN * 2))
782782
{
783783
log_debug("%s: Line %d -- Skipped, repeated URL ID.\n", job->csv_path, line_number);
784-
skip = true;
784+
skipped++;
785+
continue;
785786
}
786787
else
787788
memcpy(last_url_id, data, MD5_LEN * 2);
@@ -792,7 +793,8 @@ int ldb_import_csv(ldb_importation_config_t * job)
792793
if (!data)
793794
{
794795
log_debug("%s: Error in line: %d, data is missing -- %s Skipped\n", job->csv_path, line_number, line);
795-
skipped++;
796+
skipped_invalid++;
797+
continue;
796798
}
797799
}
798800
else
@@ -812,8 +814,8 @@ int ldb_import_csv(ldb_importation_config_t * job)
812814
if (r_size <= 0)
813815
{
814816
log_debug("Error: failed to decode line %s. Skipping\n", line);
815-
skip = true;
816-
}
817+
skipped_invalid++;
818+
continue; }
817819
}
818820
else
819821
ldb_error("libscanoss_encoder.so it is not available, \".enc\" files cannot be processed\n");
@@ -827,14 +829,10 @@ int ldb_import_csv(ldb_importation_config_t * job)
827829
if (!skip_csv_check && (csv_fields(line) != job->opt.params.csv_fields))
828830
{
829831
log_debug("%s: Line %d -- Skipped, Missing CSV fields. Expected: %d.\n",job->csv_path, line_number, job->opt.params.csv_fields);
830-
skip = true;
831-
}
832-
833-
if (skip)
834-
{
835-
skipped++;
832+
skipped_invalid++;
836833
continue;
837834
}
835+
838836
}
839837

840838
if (data || (oss_bulk.keys > 1 && job->opt.params.csv_fields < 3))
@@ -843,7 +841,7 @@ int ldb_import_csv(ldb_importation_config_t * job)
843841
if (!file_id_to_bin(line, first_byte, got_1st_byte, itemid, field2, job->opt.params.keys_number > 1 ? true : false))
844842
{
845843
log_debug("%s: failed to parse key, line number: %d\n", job->csv_path, line_number);
846-
skipped++;
844+
skipped_invalid++;
847845
continue;
848846
}
849847

@@ -991,7 +989,7 @@ int ldb_import_csv(ldb_importation_config_t * job)
991989
if (item_sector)
992990
ldb_close_unlock(item_sector);
993991

994-
log_info("%s: %u records imported, %u skipped\n", job->csv_path, imported, skipped);
992+
log_info("%s: %u records imported, %u skipped\n", job->csv_path, imported, skipped+skipped_invalid);
995993

996994
int fd = fileno(fp);
997995
if (fclose(fp))
@@ -2208,6 +2206,7 @@ bool ldb_import_command(char * dbtable, char * path, char * config)
22082206
lines_to_add = max_threads;
22092207

22102208
log_table_config(jobs.job[jobs.sorted[i]]->table, &jobs.job[jobs.sorted[i]]->opt);
2209+
logger_set_level(jobs.job[jobs.sorted[i]]->opt.params.verbose);
22112210
logger_basic("%s",jobs.job[jobs.sorted[i]]->table);
22122211
if (!process_sectors(jobs.job[jobs.sorted[i]], threads_list)) {
22132212
log_info("Error processing sectors for table %s\n", jobs.job[jobs.sorted[i]]->table);
@@ -2237,6 +2236,7 @@ bool ldb_import_command(char * dbtable, char * path, char * config)
22372236
lines_to_add = max_threads;
22382237

22392238
log_table_config(jobs.job[jobs.unsorted[i]]->table, &jobs.job[jobs.unsorted[i]]->opt);
2239+
logger_set_level(jobs.job[jobs.unsorted[i]]->opt.params.verbose);
22402240
logger_basic("%s",jobs.job[jobs.unsorted[i]]->table);
22412241
if (!process_sectors(jobs.job[jobs.unsorted[i]], threads_list)) {
22422242
log_info("Error processing sectors for table %s\n", jobs.job[jobs.unsorted[i]]->table);

src/ldb.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
#include "./ldb/types.h"
2828
#include "./ldb/mz.h"
2929

30-
#define LDB_VERSION "4.1.8"
30+
#define LDB_VERSION "4.1.9"
3131

3232
#define LDB_TABLE_DEFINITION_UNDEFINED -1
3333
#define LDB_TABLE_DEFINITION_STANDARD 0

src/shell.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ int main(int argc, char **argv)
430430
{
431431
if (!first)
432432
strcat(cmd, ",");
433-
strcat(cmd, "VERBOSE=1");
433+
strcat(cmd, "VERBOSE=2");
434434
first = false;
435435
}
436436

0 commit comments

Comments
 (0)