Skip to content

Commit 7aa08aa

Browse files
authored
Merge pull request #2432 from wangkuiyi/stringpiece
Add StringPiece
2 parents d338976 + 44dc970 commit 7aa08aa

File tree

5 files changed

+541
-0
lines changed

5 files changed

+541
-0
lines changed

paddle/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ add_subdirectory(gserver)
88
add_subdirectory(pserver)
99
add_subdirectory(trainer)
1010
add_subdirectory(scripts)
11+
add_subdirectory(strings)
1112

1213
# Do not build go directory until go cmake is working smoothly.
1314
# if(CMAKE_Go_COMPILER)

paddle/strings/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
cc_library(stringpiece SRCS stringpiece.cc)
2+
cc_test(stringpiece_test SRCS stringpiece_test.cc DEPS stringpiece glog gflags)

paddle/strings/stringpiece.cc

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/*
2+
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
#include "paddle/strings/stringpiece.h"
18+
19+
// #include <stddef.h>
20+
#include <string.h>
21+
22+
#include <algorithm>
23+
#include <iosfwd>
24+
25+
namespace paddle {
26+
27+
StringPiece::StringPiece() : data_(NULL), size_(0) {}
28+
29+
StringPiece::StringPiece(const char* d, size_t n) : data_(d), size_(n) {
30+
if (d == NULL && n != 0)
31+
throw std::invalid_argument(
32+
"StringPiece requires len to be 0 for NULL data");
33+
}
34+
35+
StringPiece::StringPiece(const char* s) : data_(s) {
36+
size_ = (s == NULL) ? 0 : strlen(s);
37+
}
38+
39+
StringPiece::StringPiece(const std::string& s)
40+
: data_(s.data()), size_(s.size()) {}
41+
42+
int Compare(StringPiece a, StringPiece b) {
43+
const size_t min_len = (a.len() < b.len()) ? a.len() : b.len();
44+
int r = memcmp(a.data(), b.data(), min_len);
45+
if (r == 0) {
46+
if (a.len() < b.len())
47+
return -1;
48+
else if (a.len() > b.len())
49+
return 1;
50+
}
51+
return r;
52+
}
53+
54+
bool operator==(StringPiece x, StringPiece y) {
55+
return ((x.len() == y.len()) &&
56+
(x.data() == y.data() || memcmp(x.data(), y.data(), x.len()) == 0));
57+
}
58+
59+
bool operator!=(StringPiece x, StringPiece y) { return !(x == y); }
60+
61+
bool operator<(StringPiece x, StringPiece y) { return Compare(x, y) < 0; }
62+
bool operator>(StringPiece x, StringPiece y) { return Compare(x, y) > 0; }
63+
64+
bool operator<=(StringPiece x, StringPiece y) { return Compare(x, y) <= 0; }
65+
bool operator>=(StringPiece x, StringPiece y) { return Compare(x, y) >= 0; }
66+
67+
bool HasPrefix(StringPiece s, StringPiece x) {
68+
return ((s.len() >= x.len()) && (memcmp(s.data(), x.data(), x.len()) == 0));
69+
}
70+
71+
bool HasSuffix(StringPiece s, StringPiece x) {
72+
return ((s.len() >= x.len()) &&
73+
(memcmp(s.data() + (s.len() - x.len()), x.data(), x.len()) == 0));
74+
}
75+
76+
StringPiece SkipPrefix(StringPiece s, size_t n) {
77+
if (n > s.len())
78+
throw std::invalid_argument("Skip distance larger than StringPiece length");
79+
return StringPiece(s.data() + n, s.len() - n);
80+
}
81+
82+
StringPiece SkipSuffix(StringPiece s, size_t n) {
83+
if (n > s.len())
84+
throw std::invalid_argument("Skip distance larger than StringPiece length");
85+
return StringPiece(s.data(), s.len() - n);
86+
}
87+
88+
StringPiece TrimPrefix(StringPiece s, StringPiece x) {
89+
return HasPrefix(s, x) ? SkipPrefix(s, x.len()) : s;
90+
}
91+
92+
StringPiece TrimSuffix(StringPiece s, StringPiece x) {
93+
return HasSuffix(s, x) ? SkipSuffix(s, x.len()) : s;
94+
}
95+
96+
bool Contains(StringPiece s, StringPiece sub) {
97+
return std::search(s.begin(), s.end(), sub.begin(), sub.end()) != s.end();
98+
}
99+
100+
size_t Index(StringPiece s, StringPiece sub) {
101+
auto e = std::search(s.begin(), s.end(), sub.begin(), sub.end());
102+
return e != s.end() ? e - s.data() : StringPiece::npos;
103+
}
104+
105+
size_t Find(StringPiece s, char c, size_t pos) {
106+
if (pos >= s.len()) {
107+
return StringPiece::npos;
108+
}
109+
const char* result =
110+
reinterpret_cast<const char*>(memchr(s.data() + pos, c, s.len() - pos));
111+
return result != nullptr ? result - s.data() : StringPiece::npos;
112+
}
113+
114+
size_t RFind(StringPiece s, char c, size_t pos) {
115+
if (s.len() == 0) return StringPiece::npos;
116+
for (const char* p = s.data() + std::min(pos, s.len() - 1); p >= s.data();
117+
p--) {
118+
if (*p == c) {
119+
return p - s.data();
120+
}
121+
}
122+
return StringPiece::npos;
123+
}
124+
125+
StringPiece SubStr(StringPiece s, size_t pos, size_t n) {
126+
if (pos > s.len()) pos = s.len();
127+
if (n > s.len() - pos) n = s.len() - pos;
128+
return StringPiece(s.data() + pos, n);
129+
}
130+
131+
std::ostream& operator<<(std::ostream& o, StringPiece piece) {
132+
return o << piece.ToString();
133+
}
134+
135+
} // namespace paddle

paddle/strings/stringpiece.h

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/*
2+
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <assert.h>
20+
21+
#include <stdexcept>
22+
#include <string>
23+
24+
namespace paddle {
25+
26+
// StringPiece points into a std::string object but doesn't own the
27+
// string. It is for efficient access to strings. Like Go's string
28+
// type. Not that StringPiece doesn't mutate the underlying string,
29+
// so it is thread-safe given that the underlying string doesn't
30+
// change. Because StringPiece contains a little data members, and
31+
// its syntax is simple as it doesn't own/manage the string, it is
32+
// cheap to construct StringPieces and pass them around.
33+
class StringPiece {
34+
public:
35+
static const size_t npos = static_cast<size_t>(-1);
36+
37+
// We provide non-explicit singleton constructors so users can
38+
// pass in a "const char*" or a "string" wherever a "StringPiece"
39+
// is expected. These contructors ensure that if data_ is NULL,
40+
// size_ is 0.
41+
StringPiece();
42+
StringPiece(const char* d, size_t n);
43+
StringPiece(const char* d);
44+
StringPiece(const std::string& s);
45+
46+
const char* data() const { return data_; }
47+
size_t len() const { return size_; }
48+
49+
char operator[](size_t n) const {
50+
assert(n < len());
51+
return data_[n];
52+
}
53+
54+
// StringPiece doesn't own the string, so both iterator and const
55+
// iterator are const char* indeed.
56+
typedef const char* const_iterator;
57+
typedef const char* iterator;
58+
iterator begin() const { return data_; }
59+
iterator end() const { return data_ + size_; }
60+
61+
// Return a string that contains the copy of the referenced data.
62+
std::string ToString() const { return std::string(data_, size_); }
63+
64+
private:
65+
const char* data_;
66+
size_t size_;
67+
68+
// Intentionally copyable
69+
};
70+
71+
int Compare(StringPiece a, StringPiece b);
72+
73+
bool operator==(StringPiece x, StringPiece y);
74+
bool operator!=(StringPiece x, StringPiece y);
75+
bool operator<(StringPiece x, StringPiece y);
76+
bool operator>(StringPiece x, StringPiece y);
77+
bool operator<=(StringPiece x, StringPiece y);
78+
bool operator>=(StringPiece x, StringPiece y);
79+
80+
bool HasPrefix(StringPiece s, StringPiece prefix);
81+
bool HasSuffix(StringPiece s, StringPiece suffix);
82+
83+
StringPiece SkipPrefix(StringPiece s, size_t n);
84+
StringPiece SkipSuffix(StringPiece s, size_t n);
85+
86+
// Skip the prefix (or suffix) if it matches with the string.
87+
StringPiece TrimPrefix(StringPiece s, StringPiece prefix);
88+
StringPiece TrimSuffix(StringPiece s, StringPiece suffix);
89+
90+
// Returns if s contains sub. Any s except for empty s contains an
91+
// empty sub.
92+
bool Contains(StringPiece s, StringPiece sub);
93+
94+
// Return the first occurrence of sub in s, or npos. If both s and
95+
// sub is empty, it returns npos; otherwise, if only sub is empty, it
96+
// returns 0.
97+
size_t Index(StringPiece s, StringPiece sub);
98+
99+
// Return the first occurrence of c in s[pos:end], or npos.
100+
size_t Find(StringPiece s, char c, size_t pos);
101+
102+
// Search range is [0..pos] inclusive. If pos == npos, search everything.
103+
size_t RFind(StringPiece s, char c, size_t pos);
104+
105+
StringPiece SubStr(StringPiece s, size_t pos, size_t n);
106+
107+
// allow StringPiece to be logged
108+
std::ostream& operator<<(std::ostream& o, StringPiece piece);
109+
110+
} // namespace paddle

0 commit comments

Comments
 (0)