Skip to content

Commit 080a585

Browse files
authored
Merge pull request #16 from glmcdona/glmcdona/msbuild-fix
Fix msbuild with newer compilers
2 parents 6f0f188 + 2c812c2 commit 080a585

File tree

6 files changed

+611
-610
lines changed

6 files changed

+611
-610
lines changed

setup.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,11 @@
88
from pybind11.setup_helpers import Pybind11Extension, build_ext
99
from setuptools import setup, Extension, find_packages
1010

11-
__version__ = "0.1.7"
11+
__version__ = "0.1.8"
1212

1313
ext_modules = [
1414
Pybind11Extension("binary2strings",
1515
sorted(glob("src/*.cpp")), # Sort source files for reproducibility
16-
headers = sorted(glob("src/*.hpp")),
1716
define_macros = [('VERSION_INFO', __version__)],
1817
include_dirs = ["src"],
1918
),

src/binary2strings.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
using namespace std;
44

5-
size_t try_utf8_char_step(const unsigned char* buffer, size_t buffer_size, long offset)
5+
size_t try_utf8_char_step(const unsigned char* buffer, size_t buffer_size, size_t offset)
66
{
77
// Returns 0 if it's not likely a valid utf8 character. For ascii range of characters it requires
88
// the character to be a displayable character.
@@ -99,14 +99,14 @@ int get_language_group(char16_t c)
9999
// Switch the definition based on platform:
100100
#if defined(_WIN32) || defined(_WIN64)
101101
// Note: Buffer overrun security checks disabled, since they added ~50% overhead.
102-
__declspec(safebuffers) extracted_string* try_extract_string(const unsigned char* buffer, size_t buffer_size, long offset, size_t min_chars)
102+
__declspec(safebuffers) extracted_string* try_extract_string(const unsigned char* buffer, size_t buffer_size, size_t offset, size_t min_chars)
103103
#else
104-
extracted_string* try_extract_string(const unsigned char* buffer, size_t buffer_size, long offset, size_t min_chars)
104+
extracted_string* try_extract_string(const unsigned char* buffer, size_t buffer_size, size_t offset, size_t min_chars)
105105
#endif
106106
{
107107
// Try extracting the string as either utf8 or unicode wchar format. Returns None if it's not a valid string.
108-
int i;
109-
int char_count;
108+
size_t i;
109+
size_t char_count;
110110

111111
// Try to parse as utf8 first
112112
size_t utf_char_len;
@@ -192,7 +192,7 @@ extracted_string* try_extract_string(const unsigned char* buffer, size_t buffer_
192192
}
193193

194194

195-
std::tuple<string, string, std::pair<int, int>, bool> try_extract_string_tuple(const unsigned char* buffer, size_t buffer_size, long offset, size_t min_chars, bool only_interesting)
195+
std::tuple<string, string, std::pair<size_t, size_t>, bool> try_extract_string_tuple(const unsigned char* buffer, size_t buffer_size, size_t offset, size_t min_chars, bool only_interesting)
196196
{
197197
// Simple wrapper to return a tuple instead
198198
extracted_string* s = try_extract_string(buffer, buffer_size, offset, min_chars);
@@ -206,7 +206,7 @@ std::tuple<string, string, std::pair<int, int>, bool> try_extract_string_tuple(c
206206
auto result = std::make_tuple(
207207
s->get_string(),
208208
s->get_type_string(),
209-
std::pair<int, int>(s->get_offset_start(), s->get_offset_end()),
209+
std::pair<size_t, size_t>(s->get_offset_start(), s->get_offset_end()),
210210
is_interesting
211211
);
212212

@@ -220,11 +220,11 @@ std::tuple<string, string, std::pair<int, int>, bool> try_extract_string_tuple(c
220220
}
221221

222222

223-
vector<std::tuple<string, string, std::pair<int, int>, bool>> extract_all_strings(const unsigned char buffer[], size_t buffer_size, size_t min_chars, bool only_interesting)
223+
vector<std::tuple<string, string, std::pair<size_t, size_t>, bool>> extract_all_strings(const unsigned char buffer[], size_t buffer_size, size_t min_chars, bool only_interesting)
224224
{
225225
// Process the specified binary buffer and extract all strings
226-
long offset = 0;
227-
vector<std::tuple<string, string, std::pair<int, int>, bool>> r_vect;
226+
size_t offset = 0;
227+
vector<std::tuple<string, string, std::pair<size_t, size_t>, bool>> r_vect;
228228
vector<float> proba_interesting_vect;
229229
vector<float> proba_interesting_avg_vect;
230230
extracted_string* s;
@@ -242,10 +242,10 @@ vector<std::tuple<string, string, std::pair<int, int>, bool>> extract_all_string
242242

243243
// Add the new string
244244
r_vect.push_back(
245-
tuple<string, string, std::pair<int, int>, bool>(
245+
tuple<string, string, std::pair<size_t, size_t>, bool>(
246246
s->get_string(),
247247
s->get_type_string(),
248-
std::pair<int, int>(s->get_offset_start(), s->get_offset_end()),
248+
std::pair<size_t, size_t>(s->get_offset_start(), s->get_offset_end()),
249249
proba_interesting > 0.5
250250
)
251251
);
@@ -268,7 +268,7 @@ vector<std::tuple<string, string, std::pair<int, int>, bool>> extract_all_string
268268
proba_interesting_vect.push_back(proba_interesting);
269269

270270
// Advance by the byte-length of the string
271-
offset += (long)s->get_size_in_bytes();
271+
offset += s->get_size_in_bytes();
272272

273273
// Cleanup
274274
delete s;
@@ -280,7 +280,7 @@ vector<std::tuple<string, string, std::pair<int, int>, bool>> extract_all_string
280280
}
281281

282282
// Have a pass through the strings averaging the interestingness and filtering
283-
vector<std::tuple<string, string, std::pair<int, int>, bool>> r_vect_filt;
283+
vector<std::tuple<string, string, std::pair<size_t, size_t>, bool>> r_vect_filt;
284284
for (int i = 0; i < r_vect.size(); i++)
285285
{
286286
// Get the interestingness
@@ -298,7 +298,7 @@ vector<std::tuple<string, string, std::pair<int, int>, bool>> extract_all_string
298298
if (!only_interesting || proba_interesting_avg >= 0.2 || proba_interesting_vect[i] >= 0.5)
299299
{
300300
r_vect_filt.push_back(
301-
tuple<string, string, std::pair<int, int>, bool>(
301+
tuple<string, string, std::pair<size_t, size_t>, bool>(
302302
std::get<0>(r_vect[i]),
303303
std::get<1>(r_vect[i]),
304304
std::get<2>(r_vect[i]),

src/binary2strings.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,12 @@ static std::unordered_set<char16_t> is_seen_commoncrawl({
6363
}
6464
);
6565

66-
size_t try_utf8_char_step(const unsigned char* buffer, size_t buffer_size, long offset);
66+
size_t try_utf8_char_step(const unsigned char* buffer, size_t buffer_size, size_t offset);
6767

6868
int get_language_group(char16_t c);
6969

70-
extracted_string* try_extract_string(const unsigned char* buffer, size_t buffer_size, long offset, size_t min_chars);
70+
extracted_string* try_extract_string(const unsigned char* buffer, size_t buffer_size, size_t offset, size_t min_chars);
7171

72-
std::tuple<string, string, std::pair<int, int>, bool> try_extract_string_tuple(const unsigned char* buffer, size_t buffer_size, long offset, size_t min_chars, bool only_interesting);
72+
std::tuple<string, string, std::pair<size_t, size_t>, bool> try_extract_string_tuple(const unsigned char* buffer, size_t buffer_size, size_t offset, size_t min_chars, bool only_interesting);
7373

74-
vector<std::tuple<string, string, std::pair<int, int>, bool>> extract_all_strings(const unsigned char buffer[], size_t buffer_size, size_t min_chars, bool only_interesting);
74+
vector<std::tuple<string, string, std::pair<size_t, size_t>, bool>> extract_all_strings(const unsigned char buffer[], size_t buffer_size, size_t min_chars, bool only_interesting);

src/extracted_string.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,19 @@
33

44
using namespace std;
55

6+
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
67
std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> _conv16;
78

89
extracted_string::extracted_string()
910
{
1011
m_type = TYPE_UNDETERMINED;
11-
m_string = (std::string)NULL;
12+
m_string = "";
1213
m_size_in_bytes = 0;
1314
m_offset_start = 0;
1415
m_offset_end = 0;
1516
}
1617

17-
extracted_string::extracted_string(const char* string, size_t size_in_bytes, STRING_TYPE type, int offset_start, int offset_end)
18+
extracted_string::extracted_string(const char* string, size_t size_in_bytes, STRING_TYPE type, size_t offset_start, size_t offset_end)
1819
{
1920
m_type = type;
2021
m_string = std::string(string, size_in_bytes);
@@ -23,7 +24,7 @@ extracted_string::extracted_string(const char* string, size_t size_in_bytes, STR
2324
m_offset_end = offset_end;
2425
}
2526

26-
extracted_string::extracted_string(const char16_t* string, size_t size_in_bytes, STRING_TYPE type, int offset_start, int offset_end)
27+
extracted_string::extracted_string(const char16_t* string, size_t size_in_bytes, STRING_TYPE type, size_t offset_start, size_t offset_end)
2728
{
2829
m_type = type;
2930

@@ -126,12 +127,12 @@ string extracted_string::get_type_string()
126127
}
127128
}
128129

129-
int extracted_string::get_offset_start()
130+
size_t extracted_string::get_offset_start()
130131
{
131132
return m_offset_start;
132133
}
133134

134-
int extracted_string::get_offset_end()
135+
size_t extracted_string::get_offset_end()
135136
{
136137
return m_offset_end;
137138
}

src/extracted_string.hpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Class for extracted strings
22
#pragma once
33
#include <string>
4+
#define _SILENCE_ALL_CXX17_DEPRECATION_WARNINGS
45
#include <codecvt>
56
#include "string_model.hpp"
67
#include <unordered_set>
@@ -24,21 +25,21 @@ class extracted_string
2425
STRING_TYPE m_type;
2526
std::string m_string; // Supports Utf8
2627
size_t m_size_in_bytes;
27-
int m_offset_start;
28-
int m_offset_end;
28+
size_t m_offset_start;
29+
size_t m_offset_end;
2930

3031
public:
3132
extracted_string();
32-
extracted_string(const char* string, size_t size_in_bytes, STRING_TYPE type, int offset_start, int offset_end);
33-
extracted_string(const char16_t* string, size_t size_in_bytes, STRING_TYPE type, int offset_start, int offset_end);
33+
extracted_string(const char* string, size_t size_in_bytes, STRING_TYPE type, size_t offset_start, size_t offset_end);
34+
extracted_string(const char16_t* string, size_t size_in_bytes, STRING_TYPE type, size_t offset_start, size_t offset_end);
3435

3536
float get_proba_interesting();
3637
size_t get_size_in_bytes();
3738
string get_string();
3839
STRING_TYPE get_type();
3940
string get_type_string();
40-
int get_offset_start();
41-
int get_offset_end();
41+
size_t get_offset_start();
42+
size_t get_offset_end();
4243

4344
bool is_interesting();
4445

0 commit comments

Comments
 (0)