Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions paddle/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ add_subdirectory(gserver)
add_subdirectory(pserver)
add_subdirectory(trainer)
add_subdirectory(scripts)
add_subdirectory(strings)

# Do not build go directory until go cmake is working smoothly.
# if(CMAKE_Go_COMPILER)
Expand Down
2 changes: 2 additions & 0 deletions paddle/strings/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cc_library(stringpiece SRCS stringpiece.cc)
cc_test(stringpiece_test SRCS stringpiece_test.cc DEPS stringpiece glog gflags)
135 changes: 135 additions & 0 deletions paddle/strings/stringpiece.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

#include "paddle/strings/stringpiece.h"

// #include <stddef.h>
#include <string.h>

#include <algorithm>
#include <iosfwd>

namespace paddle {

StringPiece::StringPiece() : data_(NULL), size_(0) {}

StringPiece::StringPiece(const char* d, size_t n) : data_(d), size_(n) {
if (d == NULL && n != 0)
throw std::invalid_argument(
"StringPiece requires len to be 0 for NULL data");
}

StringPiece::StringPiece(const char* s) : data_(s) {
size_ = (s == NULL) ? 0 : strlen(s);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we use strlen here, we are limiting ourselves to c-string that ends with 0.

Perhaps StringPiece::StringPiece(const char* s, int size) could be more general (supports any char array, could have 0 in the body). Since std::string supports 0 in the body, maybe we need to support as well.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. If we have a C string const char* a, we can create a StringPiece using

StringPiece s(std::string(a));

Done, delete it.

Copy link
Collaborator Author

@wangkuiyi wangkuiyi Jun 9, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh. I tried it and it seems that we should leave it there; otherwise, we would have to write the following test assertion

EXPECT_EQ("app", TrimSuffix(StringPiece("apple"), "pl"));

in a much more complicated way:

EXPECT_EQ(std::string("app"), 
     TrimSuffix(StringPiece(std::string("apple")), StringPiece(std::string("pl")));

Copy link
Contributor

@helinwang helinwang Jun 10, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, I think we can keep it. LGTM!

}

StringPiece::StringPiece(const std::string& s)
: data_(s.data()), size_(s.size()) {}

int Compare(StringPiece a, StringPiece b) {
const size_t min_len = (a.len() < b.len()) ? a.len() : b.len();
int r = memcmp(a.data(), b.data(), min_len);
if (r == 0) {
if (a.len() < b.len())
return -1;
else if (a.len() > b.len())
return 1;
}
return r;
}

bool operator==(StringPiece x, StringPiece y) {
return ((x.len() == y.len()) &&
(x.data() == y.data() || memcmp(x.data(), y.data(), x.len()) == 0));
}

bool operator!=(StringPiece x, StringPiece y) { return !(x == y); }

bool operator<(StringPiece x, StringPiece y) { return Compare(x, y) < 0; }
bool operator>(StringPiece x, StringPiece y) { return Compare(x, y) > 0; }

bool operator<=(StringPiece x, StringPiece y) { return Compare(x, y) <= 0; }
bool operator>=(StringPiece x, StringPiece y) { return Compare(x, y) >= 0; }

bool HasPrefix(StringPiece s, StringPiece x) {
return ((s.len() >= x.len()) && (memcmp(s.data(), x.data(), x.len()) == 0));
}

bool HasSuffix(StringPiece s, StringPiece x) {
return ((s.len() >= x.len()) &&
(memcmp(s.data() + (s.len() - x.len()), x.data(), x.len()) == 0));
}

StringPiece SkipPrefix(StringPiece s, size_t n) {
if (n > s.len())
throw std::invalid_argument("Skip distance larger than StringPiece length");
return StringPiece(s.data() + n, s.len() - n);
}

StringPiece SkipSuffix(StringPiece s, size_t n) {
if (n > s.len())
throw std::invalid_argument("Skip distance larger than StringPiece length");
return StringPiece(s.data(), s.len() - n);
}

StringPiece TrimPrefix(StringPiece s, StringPiece x) {
return HasPrefix(s, x) ? SkipPrefix(s, x.len()) : s;
}

StringPiece TrimSuffix(StringPiece s, StringPiece x) {
return HasSuffix(s, x) ? SkipSuffix(s, x.len()) : s;
}

bool Contains(StringPiece s, StringPiece sub) {
return std::search(s.begin(), s.end(), sub.begin(), sub.end()) != s.end();
}

size_t Index(StringPiece s, StringPiece sub) {
auto e = std::search(s.begin(), s.end(), sub.begin(), sub.end());
return e != s.end() ? e - s.data() : StringPiece::npos;
}

size_t Find(StringPiece s, char c, size_t pos) {
if (pos >= s.len()) {
return StringPiece::npos;
}
const char* result =
reinterpret_cast<const char*>(memchr(s.data() + pos, c, s.len() - pos));
return result != nullptr ? result - s.data() : StringPiece::npos;
}

size_t RFind(StringPiece s, char c, size_t pos) {
if (s.len() == 0) return StringPiece::npos;
for (const char* p = s.data() + std::min(pos, s.len() - 1); p >= s.data();
p--) {
if (*p == c) {
return p - s.data();
}
}
return StringPiece::npos;
}

StringPiece SubStr(StringPiece s, size_t pos, size_t n) {
if (pos > s.len()) pos = s.len();
if (n > s.len() - pos) n = s.len() - pos;
return StringPiece(s.data() + pos, n);
}

std::ostream& operator<<(std::ostream& o, StringPiece piece) {
return o << piece.ToString();
}

} // namespace paddle
110 changes: 110 additions & 0 deletions paddle/strings/stringpiece.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

#pragma once

#include <assert.h>

#include <stdexcept>
#include <string>

namespace paddle {

// StringPiece points into a std::string object but doesn't own the
// string. It is for efficient access to strings. Like Go's string
// type. Not that StringPiece doesn't mutate the underlying string,
// so it is thread-safe given that the underlying string doesn't
// change. Because StringPiece contains a little data members, and
// its syntax is simple as it doesn't own/manage the string, it is
// cheap to construct StringPieces and pass them around.
class StringPiece {
public:
static const size_t npos = static_cast<size_t>(-1);

// We provide non-explicit singleton constructors so users can
// pass in a "const char*" or a "string" wherever a "StringPiece"
// is expected. These contructors ensure that if data_ is NULL,
// size_ is 0.
StringPiece();
StringPiece(const char* d, size_t n);
StringPiece(const char* d);
StringPiece(const std::string& s);

const char* data() const { return data_; }
size_t len() const { return size_; }

char operator[](size_t n) const {
assert(n < len());
return data_[n];
}

// StringPiece doesn't own the string, so both iterator and const
// iterator are const char* indeed.
typedef const char* const_iterator;
typedef const char* iterator;
iterator begin() const { return data_; }
iterator end() const { return data_ + size_; }

// Return a string that contains the copy of the referenced data.
std::string ToString() const { return std::string(data_, size_); }

private:
const char* data_;
size_t size_;

// Intentionally copyable
};

int Compare(StringPiece a, StringPiece b);

bool operator==(StringPiece x, StringPiece y);
bool operator!=(StringPiece x, StringPiece y);
bool operator<(StringPiece x, StringPiece y);
bool operator>(StringPiece x, StringPiece y);
bool operator<=(StringPiece x, StringPiece y);
bool operator>=(StringPiece x, StringPiece y);

bool HasPrefix(StringPiece s, StringPiece prefix);
bool HasSuffix(StringPiece s, StringPiece suffix);

StringPiece SkipPrefix(StringPiece s, size_t n);
StringPiece SkipSuffix(StringPiece s, size_t n);

// Skip the prefix (or suffix) if it matches with the string.
StringPiece TrimPrefix(StringPiece s, StringPiece prefix);
StringPiece TrimSuffix(StringPiece s, StringPiece suffix);

// Returns if s contains sub. Any s except for empty s contains an
// empty sub.
bool Contains(StringPiece s, StringPiece sub);

// Return the first occurrence of sub in s, or npos. If both s and
// sub is empty, it returns npos; otherwise, if only sub is empty, it
// returns 0.
size_t Index(StringPiece s, StringPiece sub);

// Return the first occurrence of c in s[pos:end], or npos.
size_t Find(StringPiece s, char c, size_t pos);

// Search range is [0..pos] inclusive. If pos == npos, search everything.
size_t RFind(StringPiece s, char c, size_t pos);

StringPiece SubStr(StringPiece s, size_t pos, size_t n);

// allow StringPiece to be logged
std::ostream& operator<<(std::ostream& o, StringPiece piece);

} // namespace paddle
Loading