Skip to content

Commit 16773d5

Browse files
committed
[text_format] add plaintext type
Related to #1296
1 parent 6893d40 commit 16773d5

40 files changed

+342
-1622
lines changed

src/base/string_util.cc

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "is_utf8.hh"
4040
#include "lnav_log.hh"
4141
#include "scn/scan.h"
42+
#include "unistr.h"
4243

4344
using namespace std::string_view_literals;
4445

@@ -47,23 +48,24 @@ scrub_to_utf8(char* buffer, size_t length)
4748
{
4849
size_t index = 0;
4950
while (index < length) {
50-
auto start_index = index;
51-
auto ch_res = ww898::utf::utf8::read([buffer, &index, length]() {
52-
if (index < length) {
53-
return buffer[index++];
54-
}
55-
return '\x00';
56-
});
57-
if (ch_res.isErr()) {
58-
buffer[start_index] = '?';
51+
if (buffer[index] > 0) {
52+
index += 1;
53+
continue;
54+
}
55+
56+
auto rc = u8_mblen((uint8_t*) &buffer[index], length - index);
57+
if (rc <= 0) {
58+
buffer[index] = '?';
59+
} else {
60+
index += rc;
5961
}
6062
}
6163
}
6264

6365
void
6466
quote_content(auto_buffer& buf, const string_fragment& sf, char quote_char)
6567
{
66-
for (char ch : sf) {
68+
for (const char ch : sf) {
6769
if (ch == quote_char) {
6870
buf.push_back('\\').push_back(ch);
6971
continue;

src/base/string_util.tests.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ TEST_CASE("scrub_to_utf8")
6565
char buffer[8]{};
6666

6767
scrub_to_utf8(buffer, sizeof(buffer));
68-
CHECK(buffer[0] == '\x00');
68+
CHECK(buffer[0] == '?');
6969
}
7070

7171
TEST_CASE("truncate_to")

src/base/text_format_enum.hh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,10 @@ enum class text_format_t : uint8_t {
5656
TF_RESTRUCTURED_TEXT,
5757
TF_INI,
5858
TF_CSV,
59-
TF_UNKNOWN,
59+
TF_PLAINTEXT,
6060
};
6161

6262
constexpr auto text_format_count
63-
= lnav::enums::to_underlying(text_format_t::TF_UNKNOWN) + 1;
63+
= lnav::enums::to_underlying(text_format_t::TF_PLAINTEXT) + 1;
6464

6565
#endif

src/cmds.io.cc

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1551,9 +1551,6 @@ com_open(exec_context& ec, std::string cmdline, std::vector<std::string>& args)
15511551
}
15521552

15531553
auto tf = detect_text_format(al.get_string(), fn_str);
1554-
log_debug(":open preview text format: %s",
1555-
fmt::to_string(tf).c_str());
1556-
15571554
lnav_data.ld_preview_view[0].set_sub_source(
15581555
&lnav_data.ld_preview_source[0]);
15591556
lnav_data.ld_preview_source[0].replace_with(al).set_text_format(

src/command_executor.hh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ struct exec_context {
121121

122122
text_format_t get_output_format() const
123123
{
124-
auto retval = text_format_t::TF_UNKNOWN;
124+
auto retval = text_format_t::TF_PLAINTEXT;
125125

126126
if (!this->ec_output_stack.empty()) {
127127
retval = this->ec_output_stack.back().od_format;
@@ -133,7 +133,7 @@ struct exec_context {
133133
{
134134
if (!this->ec_output_stack.empty()
135135
&& this->ec_output_stack.back().od_format
136-
== text_format_t::TF_UNKNOWN)
136+
== text_format_t::TF_PLAINTEXT)
137137
{
138138
this->ec_output_stack.back().od_format = tf;
139139
}
@@ -212,7 +212,7 @@ struct exec_context {
212212
std::string name = "default",
213213
const std::optional<output_t>& file
214214
= std::nullopt,
215-
text_format_t tf = text_format_t::TF_UNKNOWN);
215+
text_format_t tf = text_format_t::TF_PLAINTEXT);
216216

217217
~output_guard();
218218

@@ -370,14 +370,14 @@ struct exec_context {
370370
struct output_desc {
371371
output_desc(std::string name,
372372
std::optional<output_t> out,
373-
text_format_t tf = text_format_t::TF_UNKNOWN)
373+
text_format_t tf = text_format_t::TF_PLAINTEXT)
374374
: od_name(std::move(name)), od_output(std::move(out)), od_format(tf)
375375
{
376376
}
377377

378378
std::string od_name;
379379
std::optional<output_t> od_output;
380-
text_format_t od_format{text_format_t::TF_UNKNOWN};
380+
text_format_t od_format{text_format_t::TF_PLAINTEXT};
381381
};
382382

383383
std::vector<output_desc> ec_output_stack;

src/data_scanner.hh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ public:
213213
};
214214

215215
std::optional<tokenize_result> tokenize2(text_format_t tf
216-
= text_format_t::TF_UNKNOWN);
216+
= text_format_t::TF_PLAINTEXT);
217217

218218
std::optional<tokenize_result> find_matching_bracket(text_format_t tf,
219219
tokenize_result tr);
@@ -236,7 +236,7 @@ private:
236236
bool is_credit_card(string_fragment frag) const;
237237

238238
std::optional<tokenize_result> tokenize_int(text_format_t tf
239-
= text_format_t::TF_UNKNOWN);
239+
= text_format_t::TF_PLAINTEXT);
240240

241241
std::string ds_line;
242242
shared_buffer_ref ds_sbr;

src/document.sections.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ struct metadata_builder {
226226
std::vector<section_type_interval_t> mb_type_intervals;
227227
std::unique_ptr<hier_node> mb_root_node;
228228
std::set<size_t> mb_indents;
229-
text_format_t mb_text_format{text_format_t::TF_UNKNOWN};
229+
text_format_t mb_text_format{text_format_t::TF_PLAINTEXT};
230230
std::set<std::string> mb_words;
231231

232232
metadata to_metadata() &&
@@ -445,7 +445,7 @@ class structure_walker {
445445
case text_format_t::TF_TOML:
446446
case text_format_t::TF_INI:
447447
case text_format_t::TF_LOG:
448-
case text_format_t::TF_UNKNOWN:
448+
case text_format_t::TF_PLAINTEXT:
449449
return true;
450450
default:
451451
return false;

src/document.sections.hh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ struct metadata {
126126
std::unique_ptr<hier_node> m_sections_root;
127127
section_types_tree_t m_section_types_tree;
128128
std::set<size_t> m_indents;
129-
text_format_t m_text_format{text_format_t::TF_UNKNOWN};
129+
text_format_t m_text_format{text_format_t::TF_PLAINTEXT};
130130
std::set<std::string> m_words;
131131

132132
std::vector<section_key_t> path_for_range(size_t start, size_t stop);
@@ -162,7 +162,7 @@ struct discover_builder {
162162

163163
attr_line_t& db_line;
164164
line_range db_range{0, -1};
165-
text_format_t db_text_format{text_format_t::TF_UNKNOWN};
165+
text_format_t db_text_format{text_format_t::TF_PLAINTEXT};
166166
bool db_save_words{false};
167167
};
168168

src/external_opener.cc

Lines changed: 50 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -105,54 +105,60 @@ for_href(const std::string& href)
105105
== CURLUE_OK)
106106
{
107107
auto path = std::filesystem::path{path_part.in()};
108-
109-
switch (detect_text_format(""_frag, path)) {
110-
case text_format_t::TF_UNKNOWN:
111-
case text_format_t::TF_BINARY:
112-
break;
113-
default: {
114-
auto line = 0UL;
115-
auto col = 0UL;
116-
117-
auto_mem<char> frag_part(curl_free);
118-
if (curl_url_get(cu,
119-
CURLUPART_FRAGMENT,
120-
frag_part.out(),
121-
CURLU_URLDECODE)
122-
== CURLUE_OK)
123-
{
124-
static const auto FRAG_RE
125-
= lnav::pcre2pp::code::from_const(
126-
R"(^L(\d+)(?:C(\d+))?$)");
127-
thread_local auto match_data
128-
= lnav::pcre2pp::match_data::unitialized();
129-
auto frag_sf = string_fragment::from_c_str(
130-
frag_part.in());
131-
132-
log_debug(" checking fragment for position: %s",
133-
frag_part.in());
134-
if (FRAG_RE.capture_from(frag_sf)
135-
.into(match_data)
136-
.found_p())
108+
auto tf_opt = detect_text_format(""_frag, path);
109+
110+
if (tf_opt.has_value()) {
111+
switch (tf_opt.value()) {
112+
case text_format_t::TF_PLAINTEXT:
113+
case text_format_t::TF_BINARY:
114+
break;
115+
default: {
116+
auto line = 0UL;
117+
auto col = 0UL;
118+
119+
auto_mem<char> frag_part(curl_free);
120+
if (curl_url_get(cu,
121+
CURLUPART_FRAGMENT,
122+
frag_part.out(),
123+
CURLU_URLDECODE)
124+
== CURLUE_OK)
137125
{
138-
line = scn::scan_int<uint32_t>(
139-
match_data[1]->to_string_view())
140-
->value();
141-
if (match_data[2]) {
142-
col = scn::scan_int<uint32_t>(
143-
match_data[2]
144-
->to_string_view())
145-
->value();
126+
static const auto FRAG_RE
127+
= lnav::pcre2pp::code::from_const(
128+
R"(^L(\d+)(?:C(\d+))?$)");
129+
thread_local auto match_data = lnav::
130+
pcre2pp::match_data::unitialized();
131+
auto frag_sf = string_fragment::from_c_str(
132+
frag_part.in());
133+
134+
log_debug(
135+
" checking fragment for position: %s",
136+
frag_part.in());
137+
if (FRAG_RE.capture_from(frag_sf)
138+
.into(match_data)
139+
.found_p())
140+
{
141+
line = scn::scan_int<uint32_t>(
142+
match_data[1]
143+
->to_string_view())
144+
->value();
145+
if (match_data[2]) {
146+
col = scn::scan_int<uint32_t>(
147+
match_data[2]
148+
->to_string_view())
149+
->value();
150+
}
146151
}
147152
}
153+
log_info(
154+
"Opening href with external editor: "
155+
"%s:%lu:%lu",
156+
path_part.in(),
157+
line,
158+
col);
159+
return external_editor::open(
160+
path_part.in(), line, col);
148161
}
149-
log_info(
150-
"Opening href with external editor: %s:%lu:%lu",
151-
path_part.in(),
152-
line,
153-
col);
154-
return external_editor::open(
155-
path_part.in(), line, col);
156162
}
157163
}
158164
}

src/file_format.cc

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -119,20 +119,23 @@ detect_file_format(const std::filesystem::path& filename)
119119
auto tf = detect_text_format(header_frag, filename);
120120
auto looping = true;
121121

122-
switch (tf) {
123-
case text_format_t::TF_UNKNOWN:
124-
case text_format_t::TF_BINARY:
125-
case text_format_t::TF_LOG:
126-
case text_format_t::TF_JSON:
127-
log_info("file does not have a known text format: %s",
128-
filename.c_str());
129-
break;
130-
default:
131-
log_info("file has text format: %s -> %d",
132-
filename.c_str(),
133-
tf);
134-
looping = false;
135-
break;
122+
if (tf) {
123+
switch (tf.value()) {
124+
case text_format_t::TF_PLAINTEXT:
125+
case text_format_t::TF_BINARY:
126+
case text_format_t::TF_LOG:
127+
case text_format_t::TF_JSON:
128+
log_info(
129+
"file does not have a known text format: %s",
130+
filename.c_str());
131+
break;
132+
default:
133+
log_info("file has text format: %s -> %d",
134+
filename.c_str(),
135+
tf.value());
136+
looping = false;
137+
break;
138+
}
136139
}
137140

138141
lnav::piper::multiplex_matcher mm;

0 commit comments

Comments
 (0)