@@ -482,16 +482,112 @@ logfile::build_content_map()
482482 static auto op = lnav_operation{" build_content_map" };
483483
484484 auto op_guard = lnav_opid_guard::internal (op);
485+
486+ log_info (" %s: trying to build content map" ,
487+ this ->lf_filename_as_string .c_str ());
488+ if (this ->lf_line_buffer .is_compressed ()) {
489+ auto skip_size = file_off_t {512 * 1024 };
490+ auto read_size = file_ssize_t {64 * 1024 };
491+ pattern_locks line_locks;
492+ scan_batch_context sbc_tmp{
493+ this ->lf_allocator ,
494+ line_locks,
495+ };
496+
497+ auto peek_range = file_range{
498+ 0 ,
499+ read_size,
500+ };
501+ while (true ) {
502+ auto last_peek = peek_range;
503+ peek_range.fr_offset += skip_size;
504+ log_debug (" content map peek %lld:%lld" ,
505+ peek_range.fr_offset ,
506+ peek_range.fr_size );
507+ auto peek_res = this ->lf_line_buffer .peek_range (
508+ peek_range,
509+ {
510+ line_buffer::peek_options::allow_short_read,
511+ });
512+ if (peek_res.isErr ()) {
513+ log_error (" content map peek failed -- %s" ,
514+ peek_res.unwrapErr ().c_str ());
515+ break ;
516+ }
517+
518+ auto buf = peek_res.unwrap ();
519+ if (buf.empty ()) {
520+ log_info (" file size %lld" ,
521+ this ->lf_line_buffer .get_file_size ());
522+ if (this ->lf_line_buffer .get_file_size () == -1 ) {
523+ log_info (" skipped past end, reversing" );
524+ skip_size = peek_range.fr_size ;
525+ peek_range = last_peek;
526+ continue ;
527+ }
528+ log_info (" reached end of file" );
529+ break ;
530+ }
531+ auto buf_sf = to_string_fragment (buf);
532+ auto split_res = buf_sf.split_pair (string_fragment::tag1{' \n ' });
533+ if (!split_res) {
534+ log_warning (" cannot find start of line at %lld" ,
535+ peek_range.fr_offset );
536+ continue ;
537+ }
538+
539+ auto [_junk, line_start_sf] = split_res.value ();
540+ while (!line_start_sf.empty ()) {
541+ auto utf8_res = is_utf8 (line_start_sf, ' \n ' );
542+ if (!utf8_res.usr_remaining ) {
543+ log_warning (" cannot find end of line at %lld" ,
544+ peek_range.fr_offset + line_start_sf.sf_begin );
545+ break ;
546+ }
547+ auto line_len = utf8_res.remaining_ptr () - line_start_sf.data ();
548+ shared_buffer tmp_sb;
549+ shared_buffer_ref tmp_sbr;
550+
551+ tmp_sbr.share (tmp_sb, line_start_sf.data (), line_len);
552+
553+ auto map_line_fr = file_range{
554+ peek_range.fr_offset + line_start_sf.sf_begin ,
555+ line_len,
556+ };
557+ map_line_fr.fr_metadata .m_has_ansi = utf8_res.usr_has_ansi ;
558+ map_line_fr.fr_metadata .m_valid_utf = utf8_res.is_valid ();
559+ auto map_li = line_info{map_line_fr};
560+ map_li.li_utf8_scan_result = utf8_res;
561+ std::vector<logline> tmp_index;
562+ auto scan_res = this ->lf_format ->scan (
563+ *this , tmp_index, map_li, tmp_sbr, sbc_tmp);
564+ if (scan_res.is <log_format::scan_match>()) {
565+ auto line_time = tmp_index.front ()
566+ .get_time <std::chrono::microseconds>();
567+ this ->lf_content_map .emplace_back (content_map_entry{
568+ map_line_fr,
569+ line_time,
570+ });
571+ log_info (" adding content map entry %lld - %s" ,
572+ map_line_fr.fr_offset ,
573+ lnav::to_rfc3339_string (line_time).c_str ());
574+ if (skip_size < 1024 * 1024 * 1024 ) {
575+ skip_size *= 2 ;
576+ }
577+ break ;
578+ }
579+ line_start_sf = utf8_res.usr_remaining .value ();
580+ }
581+ }
582+ }
583+
485584 auto retval = rebuild_result_t ::NO_NEW_LINES;
486585 auto full_size = this ->get_content_size ();
487586
488587 this ->lf_lower_bound_entry = std::nullopt ;
489588 this ->lf_upper_bound_entry = std::nullopt ;
490- this ->lf_file_size_at_map_time = full_size;
491589
492- log_info (" %s: finding content layout (full_size=%lld)" ,
493- this ->lf_filename_as_string .c_str (),
494- full_size);
590+ log_info (" finding content layout (full_size=%lld)" , full_size);
495591 if (this ->lf_options .loo_time_range .has_lower_bound ()
496592 && this ->lf_options .loo_time_range .tr_begin
497593 > this ->lf_index .front ().get_time <std::chrono::microseconds>()
@@ -506,17 +602,15 @@ logfile::build_content_map()
506602 file_range{first_line_offset, full_size - first_line_offset},
507603 ll->get_time <std::chrono::microseconds>(),
508604 };
509- log_info (" %s: lower bound is within current index, erasing %ld lines" ,
510- this ->lf_filename_as_string .c_str (),
605+ log_info (" lower bound is within current index, erasing %ld lines" ,
511606 std::distance (this ->lf_index .cbegin (), ll));
512607 this ->lf_index_size = first_line_offset;
513608 this ->lf_index .clear ();
514609 retval = rebuild_result_t ::NEW_ORDER;
515610 }
516611
517612 if (this ->lf_index_size == full_size) {
518- log_trace (" %s: file has already been scanned, no need to peek" ,
519- this ->lf_filename_as_string .c_str ());
613+ log_trace (" file has already been scanned, no need to peek" );
520614 const auto & last_line = this ->lf_index .back ();
521615 auto last_line_offset = last_line.get_offset ();
522616 this ->lf_upper_bound_entry = content_map_entry{
@@ -527,29 +621,29 @@ logfile::build_content_map()
527621 && this ->lf_options .loo_time_range .tr_begin
528622 > this ->lf_index .back ().get_time <std::chrono::microseconds>())
529623 {
530- log_info (" %s: lower bound is past content" ,
531- this ->lf_filename_as_string .c_str ());
624+ log_info (" lower bound is past content" );
532625 this ->lf_index .clear ();
533626 retval = rebuild_result_t ::NEW_ORDER;
534627 }
628+ this ->lf_file_size_at_map_time = full_size;
535629 return retval;
536630 }
537631
538632 auto end_entry_opt
539633 = this ->find_content_map_entry (full_size, map_read_upper_bound{});
540634 if (!end_entry_opt.is <map_entry_found>()) {
541635 log_warning (
542- " %s: skipping content map since the last message could not be "
543- " found" ,
544- this ->get_filename ().c_str ());
636+ " skipping content map since the last message could not be "
637+ " found" );
545638 return retval;
546639 }
547640
548641 auto end_entry = end_entry_opt.get <map_entry_found>().mef_entry ;
549- log_info (" found content end: %llu %s" ,
642+ log_info (" found content end: %llu %s" ,
550643 end_entry.cme_range .fr_offset ,
551644 lnav::to_rfc3339_string (to_timeval (end_entry.cme_time )).c_str ());
552645 this ->lf_upper_bound_entry = end_entry;
646+ this ->lf_file_size_at_map_time = full_size;
553647
554648 if (this ->lf_options .loo_time_range .has_lower_bound ()) {
555649 if (this ->lf_options .loo_time_range .tr_begin > end_entry.cme_time ) {
@@ -560,8 +654,7 @@ logfile::build_content_map()
560654 .get_time <std::chrono::microseconds>())
561655 {
562656 auto offset = full_size / 2 ;
563- log_debug (" %s: searching for lower bound %lld" ,
564- this ->lf_filename_as_string .c_str (),
657+ log_debug (" searching for lower bound %lld" ,
565658 this ->lf_options .loo_time_range .tr_begin .count ());
566659 auto low_entry_opt = this ->find_content_map_entry (
567660 offset,
@@ -570,7 +663,7 @@ logfile::build_content_map()
570663 });
571664 if (low_entry_opt.is <map_entry_found>()) {
572665 auto low_entry = low_entry_opt.get <map_entry_found>().mef_entry ;
573- log_info (" found content start: %llu %s" ,
666+ log_info (" found content start: %llu %s" ,
574667 low_entry.cme_range .fr_offset ,
575668 lnav::to_rfc3339_string (to_timeval (low_entry.cme_time ))
576669 .c_str ());
@@ -1368,8 +1461,7 @@ logfile::rebuild_index(std::optional<ui_clock::time_point> deadline)
13681461 } else {
13691462 limit = 100 ;
13701463 }
1371- } else if (!this ->lf_line_buffer .is_compressed ()
1372- && this ->lf_options .loo_detect_format
1464+ } else if (this ->lf_options .loo_detect_format
13731465 && (!has_format || !this ->lf_upper_bound_entry ))
13741466 {
13751467 limit = 1000 ;
0 commit comments