From a09a001a14d852ccf501a2d07c74ffea387f6fa3 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Tue, 21 Feb 2012 21:04:29 -0800 Subject: [PATCH 01/17] (core::str) mostly rename len -> len_chars --- src/cargo/cargo.rs | 8 ++++---- src/comp/driver/diagnostic.rs | 4 ++-- src/comp/syntax/codemap.rs | 4 ++-- src/comp/syntax/ext/qquote.rs | 2 +- src/comp/syntax/print/pp.rs | 2 +- src/comp/syntax/print/pprust.rs | 2 +- src/fuzzer/fuzzer.rs | 2 +- src/libcore/extfmt.rs | 6 +++--- src/libcore/str.rs | 34 ++++++++++++++++--------------- src/libstd/fs.rs | 2 +- src/libstd/getopts.rs | 2 +- src/libstd/json.rs | 20 +++++++++--------- src/libstd/rand.rs | 6 +++--- src/libstd/rope.rs | 6 +++--- src/rustdoc/desc_to_brief_pass.rs | 2 +- src/rustdoc/unindent_pass.rs | 2 +- src/test/run-pass/utf8_chars.rs | 2 +- 17 files changed, 54 insertions(+), 52 deletions(-) diff --git a/src/cargo/cargo.rs b/src/cargo/cargo.rs index 4d67877afd8ac..8e8996551fa51 100644 --- a/src/cargo/cargo.rs +++ b/src/cargo/cargo.rs @@ -174,10 +174,10 @@ fn print(s: str) { } fn rest(s: str, start: uint) -> str { - if (start >= str::len(s)) { + if (start >= str::len_chars(s)) { "" } else { - str::slice(s, start, str::len(s)) + str::slice(s, start, str::len_chars(s)) } } @@ -689,7 +689,7 @@ fn cmd_install(c: cargo) unsafe { alt str::index(uuid, '/') { option::some(idx) { let source = str::slice(uuid, 0u, idx); - uuid = str::slice(uuid, idx + 1u, str::len(uuid)); + uuid = str::slice(uuid, idx + 1u, str::len_chars(uuid)); install_uuid_specific(c, wd, source, uuid); } option::none { @@ -701,7 +701,7 @@ fn cmd_install(c: cargo) unsafe { alt str::index(name, '/') { option::some(idx) { let source = str::slice(name, 0u, idx); - name = str::slice(name, idx + 1u, str::len(name)); + name = str::slice(name, idx + 1u, str::len_chars(name)); install_named_specific(c, wd, source, name); } option::none { diff --git a/src/comp/driver/diagnostic.rs b/src/comp/driver/diagnostic.rs index 8f0570a434959..f0c97588f5967 100644 --- a/src/comp/driver/diagnostic.rs +++ b/src/comp/driver/diagnostic.rs @@ -210,7 +210,7 @@ fn highlight_lines(cm: codemap::codemap, sp: span, if elided { let last_line = display_lines[vec::len(display_lines) - 1u]; let s = #fmt["%s:%u ", fm.name, last_line + 1u]; - let indent = str::len(s); + let indent = str::len_bytes(s); let out = ""; while indent > 0u { out += " "; indent -= 1u; } out += "...\n"; @@ -228,7 +228,7 @@ fn highlight_lines(cm: codemap::codemap, sp: span, while num > 0u { num /= 10u; digits += 1u; } // indent past |name:## | and the 0-offset column location - let left = str::len(fm.name) + digits + lo.col + 3u; + let left = str::len_bytes(fm.name) + digits + lo.col + 3u; let s = ""; while left > 0u { str::push_char(s, ' '); left -= 1u; } diff --git a/src/comp/syntax/codemap.rs b/src/comp/syntax/codemap.rs index 744ea97441d0f..6c0182c341149 100644 --- a/src/comp/syntax/codemap.rs +++ b/src/comp/syntax/codemap.rs @@ -158,9 +158,9 @@ fn span_to_lines(sp: span, cm: codemap::codemap) -> @file_lines { fn get_line(fm: filemap, line: int) -> str unsafe { let begin: uint = fm.lines[line].byte - fm.start_pos.byte; let end = alt str::byte_index_from(*fm.src, '\n' as u8, begin, - str::len(*fm.src)) { + str::len_bytes(*fm.src)) { some(e) { e } - none { str::len(*fm.src) } + none { str::len_bytes(*fm.src) } }; str::unsafe::slice_bytes(*fm.src, begin, end) } diff --git a/src/comp/syntax/ext/qquote.rs b/src/comp/syntax/ext/qquote.rs index 3476584aecece..e53307b474abf 100644 --- a/src/comp/syntax/ext/qquote.rs +++ b/src/comp/syntax/ext/qquote.rs @@ -214,7 +214,7 @@ fn finish if (j < g_len && i == cx.gather[j].lo) { assert ch == '$'; let repl = #fmt("$%u ", j); - state = skip(str::len(repl)); + state = skip(str::len_chars(repl)); str2 += repl; } alt state { diff --git a/src/comp/syntax/print/pp.rs b/src/comp/syntax/print/pp.rs index 3ee3131429929..9114caea2d4e2 100644 --- a/src/comp/syntax/print/pp.rs +++ b/src/comp/syntax/print/pp.rs @@ -491,7 +491,7 @@ fn end(p: printer) { p.pretty_print(END); } fn eof(p: printer) { p.pretty_print(EOF); } fn word(p: printer, wrd: str) { - p.pretty_print(STRING(wrd, str::len(wrd) as int)); + p.pretty_print(STRING(wrd, str::len_bytes(wrd) as int)); } fn huge_word(p: printer, wrd: str) { diff --git a/src/comp/syntax/print/pprust.rs b/src/comp/syntax/print/pprust.rs index 2b29bf57eaa10..288d8a38dda7d 100644 --- a/src/comp/syntax/print/pprust.rs +++ b/src/comp/syntax/print/pprust.rs @@ -204,7 +204,7 @@ fn head(s: ps, w: str) { // outer-box is consistent cbox(s, indent_unit); // head-box is inconsistent - ibox(s, str::len(w) + 1u); + ibox(s, str::len_bytes(w) + 1u); // keyword that starts the head word_nbsp(s, w); } diff --git a/src/fuzzer/fuzzer.rs b/src/fuzzer/fuzzer.rs index b0a47e4051b3d..152df70e7a4f4 100644 --- a/src/fuzzer/fuzzer.rs +++ b/src/fuzzer/fuzzer.rs @@ -287,7 +287,7 @@ fn check_variants_T( fn last_part(filename: str) -> str { let ix = option::get(str::rindex(filename, '/')); - str::slice(filename, ix + 1u, str::len(filename) - 3u) + str::slice(filename, ix + 1u, str::len_chars(filename) - 3u) } enum happiness { passed, cleanly_rejected(str), known_bug(str), failed(str), } diff --git a/src/libcore/extfmt.rs b/src/libcore/extfmt.rs index 30676e34de831..5fca4d69f3af1 100644 --- a/src/libcore/extfmt.rs +++ b/src/libcore/extfmt.rs @@ -325,7 +325,7 @@ mod rt { alt cv.precision { count_implied { s } count_is(max) { - if max as uint < str::len(s) { + if max as uint < str::len_chars(s) { str::substr(s, 0u, max as uint) } else { s } } @@ -368,7 +368,7 @@ mod rt { "" } else { let s = uint::to_str(num, radix); - let len = str::len(s); + let len = str::len_chars(s); if len < prec { let diff = prec - len; let pad = str_init_elt(diff, '0'); @@ -400,7 +400,7 @@ mod rt { uwidth = width as uint; } } - let strlen = str::len(s); + let strlen = str::len_chars(s); if uwidth <= strlen { ret s; } let padchar = ' '; let diff = uwidth - strlen; diff --git a/src/libcore/str.rs b/src/libcore/str.rs index d81f2d45d4080..de8ba2f28b6ba 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -86,7 +86,7 @@ export is_not_empty, is_whitespace, len_bytes, - len_chars, len, + len_chars, //len, // Misc // FIXME: perhaps some more of this section shouldn't be exported? @@ -529,7 +529,7 @@ fn split(ss: str, sepfn: fn(cc: char)->bool) -> [str] { } }); - if len(accum) >= 0u || ends_with_sep { + if len_chars(accum) >= 0u || ends_with_sep { vv += [accum]; } @@ -601,7 +601,7 @@ separated by whitespace */ fn words(ss: str) -> [str] { ret vec::filter( split(ss, {|cc| char::is_whitespace(cc)}), - {|w| 0u < str::len(w)}); + {|w| 0u < str::len_chars(w)}); } /* @@ -611,7 +611,7 @@ Create a vector of substrings of size `nn` */ fn windowed(nn: uint, ss: str) -> [str] { let ww = []; - let len = str::len(ss); + let len = str::len_chars(ss); assert 1u <= nn; @@ -883,7 +883,7 @@ fn byte_index_from(s: str, b: u8, start: uint, end: uint) -> option { // (as option some/none) fn rindex(ss: str, cc: char) -> option { let bii = len_bytes(ss); - let cii = len(ss); + let cii = len_chars(ss); while bii > 0u { let {ch, prev} = char_range_at_reverse(ss, bii); cii -= 1u; @@ -1008,8 +1008,8 @@ haystack - The string to look in needle - The string to look for */ fn ends_with(haystack: str, needle: str) -> bool { - let haystack_len: uint = len(haystack); - let needle_len: uint = len(needle); + let haystack_len: uint = len_chars(haystack); + let needle_len: uint = len_chars(needle); ret if needle_len == 0u { true } else if needle_len > haystack_len { @@ -1079,7 +1079,9 @@ fn len(s: str) -> uint { substr_len_chars(s, 0u, len_bytes(s)) } -fn len_chars(s: str) -> uint { len(s) } +fn len_chars(s: str) -> uint { + substr_len_chars(s, 0u, len_bytes(s)) +} /* Section: Misc @@ -1529,14 +1531,14 @@ mod tests { assert (len_bytes("\u2620") == 3u); assert (len_bytes("\U0001d11e") == 4u); - assert (len("") == 0u); - assert (len("hello world") == 11u); - assert (len("\x63") == 1u); - assert (len("\xa2") == 1u); - assert (len("\u03c0") == 1u); - assert (len("\u2620") == 1u); - assert (len("\U0001d11e") == 1u); - assert (len("ประเทศไทย中华Việt Nam") == 19u); + assert (len_chars("") == 0u); + assert (len_chars("hello world") == 11u); + assert (len_chars("\x63") == 1u); + assert (len_chars("\xa2") == 1u); + assert (len_chars("\u03c0") == 1u); + assert (len_chars("\u2620") == 1u); + assert (len_chars("\U0001d11e") == 1u); + assert (len_chars("ประเทศไทย中华Việt Nam") == 19u); } #[test] diff --git a/src/libstd/fs.rs b/src/libstd/fs.rs index 30abbb1e70aa9..ff2188461504d 100644 --- a/src/libstd/fs.rs +++ b/src/libstd/fs.rs @@ -45,7 +45,7 @@ fn splitDirnameBasename (pp: path) -> {dirname: str, basename: str} { } ret {dirname: str::slice(pp, 0u, ii), - basename: str::slice(pp, ii + 1u, str::len(pp))}; + basename: str::slice(pp, ii + 1u, str::len_chars(pp))}; } /* diff --git a/src/libstd/getopts.rs b/src/libstd/getopts.rs index dba406c889ecd..e925cd16e335d 100644 --- a/src/libstd/getopts.rs +++ b/src/libstd/getopts.rs @@ -79,7 +79,7 @@ A description of a possible option type opt = {name: name, hasarg: hasarg, occur: occur}; fn mkname(nm: str) -> name { - ret if str::len(nm) == 1u { + ret if str::len_bytes(nm) == 1u { short(str::char_at(nm, 0u)) } else { long(nm) }; } diff --git a/src/libstd/json.rs b/src/libstd/json.rs index 127cd93952e78..23d70a78bf61a 100644 --- a/src/libstd/json.rs +++ b/src/libstd/json.rs @@ -70,8 +70,8 @@ fn to_str(j: json) -> str { } fn rest(s: str) -> str { - assert(str::len(s) >= 1u); - str::slice(s, 1u, str::len(s)) + assert(str::len_chars(s) >= 1u); + str::slice(s, 1u, str::len_chars(s)) } fn from_str_str(s: str) -> (option, str) { @@ -99,7 +99,7 @@ fn from_str_str(s: str) -> (option, str) { cont; } else if (c == '"') { ret (some(string(res)), - str::slice(s, pos, str::len(s))); + str::slice(s, pos, str::len_chars(s))); } res = res + str::from_char(c); } @@ -200,12 +200,12 @@ fn from_str_float(s: str) -> (option, str) { } '.' { break; } _ { ret (some(num(neg * res)), - str::slice(s, opos, str::len(s))); } + str::slice(s, opos, str::len_chars(s))); } } } if pos == len { - ret (some(num(neg * res)), str::slice(s, pos, str::len(s))); + ret (some(num(neg * res)), str::slice(s, pos, str::len_chars(s))); } let dec = 1f; @@ -220,17 +220,17 @@ fn from_str_float(s: str) -> (option, str) { res += (((c as int) - ('0' as int)) as float) * dec; } _ { ret (some(num(neg * res)), - str::slice(s, opos, str::len(s))); } + str::slice(s, opos, str::len_chars(s))); } } } - ret (some(num(neg * res)), str::slice(s, pos, str::len(s))); + ret (some(num(neg * res)), str::slice(s, pos, str::len_chars(s))); } fn from_str_bool(s: str) -> (option, str) { if (str::starts_with(s, "true")) { - (some(boolean(true)), str::slice(s, 4u, str::len(s))) + (some(boolean(true)), str::slice(s, 4u, str::len_chars(s))) } else if (str::starts_with(s, "false")) { - (some(boolean(false)), str::slice(s, 5u, str::len(s))) + (some(boolean(false)), str::slice(s, 5u, str::len_chars(s))) } else { (none, s) } @@ -238,7 +238,7 @@ fn from_str_bool(s: str) -> (option, str) { fn from_str_null(s: str) -> (option, str) { if (str::starts_with(s, "null")) { - (some(null), str::slice(s, 4u, str::len(s))) + (some(null), str::slice(s, 4u, str::len_chars(s))) } else { (none, s) } diff --git a/src/libstd/rand.rs b/src/libstd/rand.rs index 35525b70710ac..33fac40fcccc3 100644 --- a/src/libstd/rand.rs +++ b/src/libstd/rand.rs @@ -77,7 +77,7 @@ fn mk_rng() -> rng { let i = 0u; while (i < len) { let n = rustrt::rand_next(**self) as uint % - str::len(charset); + str::len_bytes(charset); s = s + str::from_char(str::char_at(charset, n)); i += 1u; } @@ -130,8 +130,8 @@ mod tests { log(debug, r.gen_str(10u)); log(debug, r.gen_str(10u)); log(debug, r.gen_str(10u)); - assert(str::len(r.gen_str(10u)) == 10u); - assert(str::len(r.gen_str(16u)) == 16u); + assert(str::len_bytes(r.gen_str(10u)) == 10u); + assert(str::len_bytes(r.gen_str(16u)) == 16u); } } diff --git a/src/libstd/rope.rs b/src/libstd/rope.rs index 07ee837a9b918..3f330c3586d46 100644 --- a/src/libstd/rope.rs +++ b/src/libstd/rope.rs @@ -1373,7 +1373,7 @@ mod tests { let sample = @"0123456789ABCDE"; let r = of_str(sample); - assert char_len(r) == str::len(*sample); + assert char_len(r) == str::len_chars(*sample); assert rope_to_string(r) == *sample; } @@ -1384,7 +1384,7 @@ mod tests { while i < 10 { *buf = *buf + *buf; i+=1;} let sample = @*buf; let r = of_str(sample); - assert char_len(r) == str::len(*sample); + assert char_len(r) == str::len_chars(*sample); assert rope_to_string(r) == *sample; let string_iter = 0u; @@ -1427,7 +1427,7 @@ mod tests { } } - assert len == str::len(*sample); + assert len == str::len_chars(*sample); } #[test] diff --git a/src/rustdoc/desc_to_brief_pass.rs b/src/rustdoc/desc_to_brief_pass.rs index b506f50293375..82ae2ff8fed7b 100644 --- a/src/rustdoc/desc_to_brief_pass.rs +++ b/src/rustdoc/desc_to_brief_pass.rs @@ -175,7 +175,7 @@ fn parse_desc(desc: str) -> (option, option) { if check vec::is_not_empty(paras) { let maybe_brief = vec::head(paras); - if str::len(maybe_brief) <= max_brief_len { + if str::len_bytes(maybe_brief) <= max_brief_len { let desc_paras = vec::tail(paras); let desc = if vec::is_not_empty(desc_paras) { some(str::connect(desc_paras, "\n\n")) diff --git a/src/rustdoc/unindent_pass.rs b/src/rustdoc/unindent_pass.rs index b52316ccb15b9..d8f56e2047944 100644 --- a/src/rustdoc/unindent_pass.rs +++ b/src/rustdoc/unindent_pass.rs @@ -68,7 +68,7 @@ fn unindent(s: str) -> str { line } else { assert str::len_bytes(line) >= min_indent; - str::slice(line, min_indent, str::len(line)) + str::slice(line, min_indent, str::len_chars(line)) } }; str::connect(unindented, "\n") diff --git a/src/test/run-pass/utf8_chars.rs b/src/test/run-pass/utf8_chars.rs index cd591866c2488..fbb66b5a1ca48 100644 --- a/src/test/run-pass/utf8_chars.rs +++ b/src/test/run-pass/utf8_chars.rs @@ -8,7 +8,7 @@ fn main() { let s: str = str::from_chars(chs); assert (str::len_bytes(s) == 10u); - assert (str::len(s) == 4u); + assert (str::len_chars(s) == 4u); assert (vec::len::(str::chars(s)) == 4u); assert (str::eq(str::from_chars(str::chars(s)), s)); assert (str::char_at(s, 0u) == 'e'); From fa677b4b9262bd09171648a938a9758883585b62 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Tue, 21 Feb 2012 21:12:41 -0800 Subject: [PATCH 02/17] (core::str) make len an alias for len_bytes --- src/libcore/str.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index de8ba2f28b6ba..053ed94f559ed 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1074,10 +1074,8 @@ pure fn len_bytes(s: str) -> uint unsafe { // Function: len // // String length or size in characters. -// (Synonym: len_chars) -fn len(s: str) -> uint { - substr_len_chars(s, 0u, len_bytes(s)) -} +// (Synonym: len_bytes) +fn len(s: str) -> uint { len_bytes(s) } fn len_chars(s: str) -> uint { substr_len_chars(s, 0u, len_bytes(s)) From 56baeaccf73de6c969eb11cb7c81ba535285bc22 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Tue, 21 Feb 2012 21:13:18 -0800 Subject: [PATCH 03/17] (core::str) make len an alias for len_bytes ++ --- src/libcore/str.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 053ed94f559ed..c8f7706977a15 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -85,8 +85,8 @@ export is_empty, is_not_empty, is_whitespace, - len_bytes, - len_chars, //len, + len, len_bytes, + len_chars, // Misc // FIXME: perhaps some more of this section shouldn't be exported? From 67d8b890d4285b114f41dc1dc217001e86e589b4 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Tue, 21 Feb 2012 21:38:40 -0800 Subject: [PATCH 04/17] (core::str) rename index -> index_chars --- src/cargo/cargo.rs | 4 ++-- src/comp/back/link.rs | 2 +- src/libcore/str.rs | 16 +++++++--------- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/cargo/cargo.rs b/src/cargo/cargo.rs index 8e8996551fa51..bce9bb5f84fab 100644 --- a/src/cargo/cargo.rs +++ b/src/cargo/cargo.rs @@ -686,7 +686,7 @@ fn cmd_install(c: cargo) unsafe { if str::starts_with(target, "uuid:") { let uuid = rest(target, 5u); - alt str::index(uuid, '/') { + alt str::index_chars(uuid, '/') { option::some(idx) { let source = str::slice(uuid, 0u, idx); uuid = str::slice(uuid, idx + 1u, str::len_chars(uuid)); @@ -698,7 +698,7 @@ fn cmd_install(c: cargo) unsafe { } } else { let name = target; - alt str::index(name, '/') { + alt str::index_chars(name, '/') { option::some(idx) { let source = str::slice(name, 0u, idx); name = str::slice(name, idx + 1u, str::len_chars(name)); diff --git a/src/comp/back/link.rs b/src/comp/back/link.rs index 5b7d7a35c5371..ee450814ef961 100644 --- a/src/comp/back/link.rs +++ b/src/comp/back/link.rs @@ -109,7 +109,7 @@ mod write { // Decides what to call an intermediate file, given the name of the output // and the extension to use. fn mk_intermediate_name(output_path: str, extension: str) -> str unsafe { - let stem = alt str::index(output_path, '.') { + let stem = alt str::index_chars(output_path, '.') { option::some(dot_pos) { str::slice(output_path, 0u, dot_pos) } diff --git a/src/libcore/str.rs b/src/libcore/str.rs index c8f7706977a15..3c8cd7a3b853d 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -69,7 +69,7 @@ export lines_iter, // Searching - index, + index_chars, byte_index, byte_index_from, rindex, @@ -838,7 +838,7 @@ Section: Searching // // Returns the index of the first matching char // (as option some/none) -fn index(ss: str, cc: char) -> option { +fn index_chars(ss: str, cc: char) -> option { let bii = 0u; let cii = 0u; let len = len_bytes(ss); @@ -1157,8 +1157,6 @@ Safety note: This function fails if `byte_offset` or `char_len` do not represent valid positions in `s` - -FIXME: rename to 'substr_len_bytes' */ fn substr_len_bytes(s: str, byte_offset: uint, char_len: uint) -> uint { let i = byte_offset; @@ -1540,11 +1538,11 @@ mod tests { } #[test] - fn test_index() { - assert ( index("hello", 'h') == some(0u)); - assert ( index("hello", 'e') == some(1u)); - assert ( index("hello", 'o') == some(4u)); - assert ( index("hello", 'z') == none); + fn test_index_chars() { + assert ( index_chars("hello", 'h') == some(0u)); + assert ( index_chars("hello", 'e') == some(1u)); + assert ( index_chars("hello", 'o') == some(4u)); + assert ( index_chars("hello", 'z') == none); } #[test] From fc18c9b585d67d27b516f36b08d8efcf57ca1f09 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Tue, 21 Feb 2012 21:48:03 -0800 Subject: [PATCH 05/17] (core::str) rename rindex -> rindex_chars --- src/fuzzer/fuzzer.rs | 2 +- src/libcore/str.rs | 16 ++++++++-------- src/libstd/fs.rs | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/fuzzer/fuzzer.rs b/src/fuzzer/fuzzer.rs index 152df70e7a4f4..bc316b6e89668 100644 --- a/src/fuzzer/fuzzer.rs +++ b/src/fuzzer/fuzzer.rs @@ -286,7 +286,7 @@ fn check_variants_T( } fn last_part(filename: str) -> str { - let ix = option::get(str::rindex(filename, '/')); + let ix = option::get(str::rindex_chars(filename, '/')); str::slice(filename, ix + 1u, str::len_chars(filename) - 3u) } diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 3c8cd7a3b853d..30357d8ea643d 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -72,7 +72,7 @@ export index_chars, byte_index, byte_index_from, - rindex, + rindex_chars, find, find_bytes, find_from_bytes, @@ -877,11 +877,11 @@ fn byte_index_from(s: str, b: u8, start: uint, end: uint) -> option { str::as_bytes(s) { |v| vec::position_from(v, start, end) { |x| x == b } } } -// Function: rindex +// Function: rindex_chars // // Returns the index of the first matching char // (as option some/none) -fn rindex(ss: str, cc: char) -> option { +fn rindex_chars(ss: str, cc: char) -> option { let bii = len_bytes(ss); let cii = len_chars(ss); while bii > 0u { @@ -1546,11 +1546,11 @@ mod tests { } #[test] - fn test_rindex() { - assert (rindex("hello", 'l') == some(3u)); - assert (rindex("hello", 'o') == some(4u)); - assert (rindex("hello", 'h') == some(0u)); - assert (rindex("hello", 'z') == none); + fn test_rindex_chars() { + assert (rindex_chars("hello", 'l') == some(3u)); + assert (rindex_chars("hello", 'o') == some(4u)); + assert (rindex_chars("hello", 'h') == some(0u)); + assert (rindex_chars("hello", 'z') == none); } #[test] diff --git a/src/libstd/fs.rs b/src/libstd/fs.rs index ff2188461504d..3652eaf77f9d8 100644 --- a/src/libstd/fs.rs +++ b/src/libstd/fs.rs @@ -34,10 +34,10 @@ type path = str; fn splitDirnameBasename (pp: path) -> {dirname: str, basename: str} { let ii; - alt str::rindex(pp, os_fs::path_sep) { + alt str::rindex_chars(pp, os_fs::path_sep) { option::some(xx) { ii = xx; } option::none { - alt str::rindex(pp, os_fs::alt_path_sep) { + alt str::rindex_chars(pp, os_fs::alt_path_sep) { option::some(xx) { ii = xx; } option::none { ret {dirname: ".", basename: pp}; } } From c779e6733421539a017dcea359efee8fe042596c Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Tue, 21 Feb 2012 22:08:32 -0800 Subject: [PATCH 06/17] (core::char) rename slice -> slice_chars --- src/cargo/cargo.rs | 10 +++++----- src/comp/back/link.rs | 2 +- src/comp/syntax/codemap.rs | 4 ++-- src/fuzzer/fuzzer.rs | 2 +- src/libcore/str.rs | 30 +++++++++++++++--------------- src/libstd/fs.rs | 4 ++-- src/libstd/json.rs | 19 ++++++++++--------- src/rustdoc/unindent_pass.rs | 2 +- 8 files changed, 37 insertions(+), 36 deletions(-) diff --git a/src/cargo/cargo.rs b/src/cargo/cargo.rs index bce9bb5f84fab..879d4bf8a616d 100644 --- a/src/cargo/cargo.rs +++ b/src/cargo/cargo.rs @@ -177,7 +177,7 @@ fn rest(s: str, start: uint) -> str { if (start >= str::len_chars(s)) { "" } else { - str::slice(s, start, str::len_chars(s)) + str::slice_chars(s, start, str::len_chars(s)) } } @@ -688,8 +688,8 @@ fn cmd_install(c: cargo) unsafe { let uuid = rest(target, 5u); alt str::index_chars(uuid, '/') { option::some(idx) { - let source = str::slice(uuid, 0u, idx); - uuid = str::slice(uuid, idx + 1u, str::len_chars(uuid)); + let source = str::slice_chars(uuid, 0u, idx); + uuid = str::slice_chars(uuid, idx + 1u, str::len_chars(uuid)); install_uuid_specific(c, wd, source, uuid); } option::none { @@ -700,8 +700,8 @@ fn cmd_install(c: cargo) unsafe { let name = target; alt str::index_chars(name, '/') { option::some(idx) { - let source = str::slice(name, 0u, idx); - name = str::slice(name, idx + 1u, str::len_chars(name)); + let source = str::slice_chars(name, 0u, idx); + name = str::slice_chars(name, idx + 1u, str::len_chars(name)); install_named_specific(c, wd, source, name); } option::none { diff --git a/src/comp/back/link.rs b/src/comp/back/link.rs index ee450814ef961..e02aad48c5a84 100644 --- a/src/comp/back/link.rs +++ b/src/comp/back/link.rs @@ -111,7 +111,7 @@ mod write { fn mk_intermediate_name(output_path: str, extension: str) -> str unsafe { let stem = alt str::index_chars(output_path, '.') { option::some(dot_pos) { - str::slice(output_path, 0u, dot_pos) + str::slice_chars(output_path, 0u, dot_pos) } option::none { output_path } }; diff --git a/src/comp/syntax/codemap.rs b/src/comp/syntax/codemap.rs index 6c0182c341149..74dbade09c108 100644 --- a/src/comp/syntax/codemap.rs +++ b/src/comp/syntax/codemap.rs @@ -180,13 +180,13 @@ fn span_to_snippet(sp: span, cm: codemap::codemap) -> str { let begin = lookup_byte_offset(cm,sp.lo); let end = lookup_byte_offset(cm,sp.hi); assert begin.fm == end.fm; - ret str::slice(*begin.fm.src, begin.pos, end.pos); + ret str::slice_chars(*begin.fm.src, begin.pos, end.pos); } fn get_snippet(cm: codemap::codemap, fidx: uint, lo: uint, hi: uint) -> str { let fm = cm.files[fidx]; - ret str::slice(*fm.src, lo, hi) + ret str::slice_chars(*fm.src, lo, hi) } fn get_filemap(cm: codemap, filename: str) -> filemap { diff --git a/src/fuzzer/fuzzer.rs b/src/fuzzer/fuzzer.rs index bc316b6e89668..3b443d68a3d0a 100644 --- a/src/fuzzer/fuzzer.rs +++ b/src/fuzzer/fuzzer.rs @@ -287,7 +287,7 @@ fn check_variants_T( fn last_part(filename: str) -> str { let ix = option::get(str::rindex_chars(filename, '/')); - str::slice(filename, ix + 1u, str::len_chars(filename) - 3u) + str::slice_chars(filename, ix + 1u, str::len_chars(filename) - 3u) } enum happiness { passed, cleanly_rejected(str), known_bug(str), failed(str), } diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 30357d8ea643d..cbf6586139b7d 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -36,7 +36,7 @@ export bytes, chars, substr, - slice, + slice_chars, split, split_str, split_char, @@ -391,11 +391,11 @@ Failure: If `begin` + `len` is is greater than the char length of the string */ fn substr(s: str, begin: uint, len: uint) -> str { - ret slice(s, begin, begin + len); + ret slice_chars(s, begin, begin + len); } /* -Function: slice +Function: slice_chars Unicode-safe slice. Returns a slice of the given string containing the characters in the range [`begin`..`end`). `begin` and `end` are @@ -408,7 +408,7 @@ Failure: FIXME: make faster by avoiding char conversion */ -fn slice(s: str, begin: uint, end: uint) -> str { +fn slice_chars(s: str, begin: uint, end: uint) -> str { from_chars(vec::slice(chars(s), begin, end)) } @@ -617,7 +617,7 @@ fn windowed(nn: uint, ss: str) -> [str] { let ii = 0u; while ii+nn <= len { - let w = slice( ss, ii, ii+nn ); + let w = slice_chars( ss, ii, ii+nn ); vec::push(ww,w); ii += 1u; } @@ -1969,17 +1969,17 @@ mod tests { } #[test] - fn test_slice() { - assert (eq("ab", slice("abc", 0u, 2u))); - assert (eq("bc", slice("abc", 1u, 3u))); - assert (eq("", slice("abc", 1u, 1u))); - assert (eq("\u65e5", slice("\u65e5\u672c", 0u, 1u))); + fn test_slice_chars() { + assert (eq("ab", slice_chars("abc", 0u, 2u))); + assert (eq("bc", slice_chars("abc", 1u, 3u))); + assert (eq("", slice_chars("abc", 1u, 1u))); + assert (eq("\u65e5", slice_chars("\u65e5\u672c", 0u, 1u))); let data = "ประเทศไทย中华"; - assert (eq("ป", slice(data, 0u, 1u))); - assert (eq("ร", slice(data, 1u, 2u))); - assert (eq("华", slice(data, 10u, 11u))); - assert (eq("", slice(data, 1u, 1u))); + assert (eq("ป", slice_chars(data, 0u, 1u))); + assert (eq("ร", slice_chars(data, 1u, 2u))); + assert (eq("华", slice_chars(data, 10u, 11u))); + assert (eq("", slice_chars(data, 1u, 1u))); fn a_million_letter_X() -> str { let i = 0; @@ -1994,7 +1994,7 @@ mod tests { ret rs; } assert (eq(half_a_million_letter_X(), - slice(a_million_letter_X(), 0u, 500000u))); + slice_chars(a_million_letter_X(), 0u, 500000u))); } #[test] diff --git a/src/libstd/fs.rs b/src/libstd/fs.rs index 3652eaf77f9d8..acc09fefb01ac 100644 --- a/src/libstd/fs.rs +++ b/src/libstd/fs.rs @@ -44,8 +44,8 @@ fn splitDirnameBasename (pp: path) -> {dirname: str, basename: str} { } } - ret {dirname: str::slice(pp, 0u, ii), - basename: str::slice(pp, ii + 1u, str::len_chars(pp))}; + ret {dirname: str::slice_chars(pp, 0u, ii), + basename: str::slice_chars(pp, ii + 1u, str::len_chars(pp))}; } /* diff --git a/src/libstd/json.rs b/src/libstd/json.rs index 23d70a78bf61a..70b16758e1d32 100644 --- a/src/libstd/json.rs +++ b/src/libstd/json.rs @@ -71,7 +71,7 @@ fn to_str(j: json) -> str { fn rest(s: str) -> str { assert(str::len_chars(s) >= 1u); - str::slice(s, 1u, str::len_chars(s)) + str::slice_chars(s, 1u, str::len_chars(s)) } fn from_str_str(s: str) -> (option, str) { @@ -99,7 +99,7 @@ fn from_str_str(s: str) -> (option, str) { cont; } else if (c == '"') { ret (some(string(res)), - str::slice(s, pos, str::len_chars(s))); + str::slice_chars(s, pos, str::len_chars(s))); } res = res + str::from_char(c); } @@ -200,12 +200,13 @@ fn from_str_float(s: str) -> (option, str) { } '.' { break; } _ { ret (some(num(neg * res)), - str::slice(s, opos, str::len_chars(s))); } + str::slice_chars(s, opos, str::len_chars(s))); } } } if pos == len { - ret (some(num(neg * res)), str::slice(s, pos, str::len_chars(s))); + ret (some(num(neg * res)), + str::slice_chars(s, pos, str::len_chars(s))); } let dec = 1f; @@ -220,17 +221,17 @@ fn from_str_float(s: str) -> (option, str) { res += (((c as int) - ('0' as int)) as float) * dec; } _ { ret (some(num(neg * res)), - str::slice(s, opos, str::len_chars(s))); } + str::slice_chars(s, opos, str::len_chars(s))); } } } - ret (some(num(neg * res)), str::slice(s, pos, str::len_chars(s))); + ret (some(num(neg * res)), str::slice_chars(s, pos, str::len_chars(s))); } fn from_str_bool(s: str) -> (option, str) { if (str::starts_with(s, "true")) { - (some(boolean(true)), str::slice(s, 4u, str::len_chars(s))) + (some(boolean(true)), str::slice_chars(s, 4u, str::len_chars(s))) } else if (str::starts_with(s, "false")) { - (some(boolean(false)), str::slice(s, 5u, str::len_chars(s))) + (some(boolean(false)), str::slice_chars(s, 5u, str::len_chars(s))) } else { (none, s) } @@ -238,7 +239,7 @@ fn from_str_bool(s: str) -> (option, str) { fn from_str_null(s: str) -> (option, str) { if (str::starts_with(s, "null")) { - (some(null), str::slice(s, 4u, str::len_chars(s))) + (some(null), str::slice_chars(s, 4u, str::len_chars(s))) } else { (none, s) } diff --git a/src/rustdoc/unindent_pass.rs b/src/rustdoc/unindent_pass.rs index d8f56e2047944..42a3f3fa57af8 100644 --- a/src/rustdoc/unindent_pass.rs +++ b/src/rustdoc/unindent_pass.rs @@ -68,7 +68,7 @@ fn unindent(s: str) -> str { line } else { assert str::len_bytes(line) >= min_indent; - str::slice(line, min_indent, str::len_chars(line)) + str::slice_chars(line, min_indent, str::len_chars(line)) } }; str::connect(unindented, "\n") From b0977def2a1c97fcaa4d75ef63cfa0ad5d91ea7c Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 22 Feb 2012 00:49:05 -0800 Subject: [PATCH 07/17] (core::str) add a safe byte slice and maybe_slice --- src/libcore/str.rs | 77 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index cbf6586139b7d..b16272adc5aea 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -394,6 +394,31 @@ fn substr(s: str, begin: uint, len: uint) -> str { ret slice_chars(s, begin, begin + len); } +// Function: slice +// +// Return a slice of the given string from the byte range [`begin`..`end`) +// or else fail when `begin` and `end` do not point to valid characters or +// beyond the last character of the string +fn slice(ss: str, begin: uint, end: uint) -> str { + alt maybe_slice(ss, begin, end) { + none { fail "slice requires a valid start and end"; } + some(sli) { ret sli; } + } +} + +// Function: maybe_slice +// +// Like slice, only returns an option +fn maybe_slice(ss: str, begin: uint, end: uint) -> option unsafe { + let sli = unsafe::slice_bytes(ss, begin, end); + + if is_utf8(bytes(sli)) { + ret some(sli); + } else { + ret none; + } +} + /* Function: slice_chars @@ -1968,6 +1993,58 @@ mod tests { assert (replace(data, d, repl) == data); } + #[test] + fn test_slice() { + assert (eq("ab", slice("abc", 0u, 2u))); + assert (eq("bc", slice("abc", 1u, 3u))); + assert (eq("", slice("abc", 1u, 1u))); + assert (eq("\u65e5", slice("\u65e5\u672c", 0u, 3u))); + + let data = "ประเทศไทย中华"; + assert (eq("ป", slice(data, 0u, 3u))); + assert (eq("ร", slice(data, 3u, 6u))); + assert (eq("", slice(data, 1u, 1u))); + assert (eq("华", slice(data, 30u, 33u))); + + fn a_million_letter_X() -> str { + let i = 0; + let rs = ""; + while i < 100000 { rs += "华华华华华华华华华华"; i += 1; } + ret rs; + } + fn half_a_million_letter_X() -> str { + let i = 0; + let rs = ""; + while i < 100000 { rs += "华华华华华"; i += 1; } + ret rs; + } + assert (eq(half_a_million_letter_X(), + slice(a_million_letter_X(), 0u, (3u * 500000u)))); + } + + #[test] + fn test_maybe_slice() { + let ss = "中华Việt Nam"; + + assert none == maybe_slice(ss, 0u, 2u); + assert none == maybe_slice(ss, 1u, 3u); + assert none == maybe_slice(ss, 1u, 2u); + assert some("华") == maybe_slice(ss, 3u, 6u); + assert some("Việt Nam") == maybe_slice(ss, 6u, 16u); + assert none == maybe_slice(ss, 4u, 16u); + + /* 0: 中 + 3: 华 + 6: V + 7: i + 8: ệ + 11: t + 12: + 13: N + 14: a + 15: m */ + } + #[test] fn test_slice_chars() { assert (eq("ab", slice_chars("abc", 0u, 2u))); From b2b23d801df55d6589553a5ab4b3aa5b9545126f Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 22 Feb 2012 01:16:47 -0800 Subject: [PATCH 08/17] (core::str) add a safe byte slice and maybe_slice ++ --- src/libcore/str.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index b16272adc5aea..024924278dc6a 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -401,8 +401,8 @@ fn substr(s: str, begin: uint, len: uint) -> str { // beyond the last character of the string fn slice(ss: str, begin: uint, end: uint) -> str { alt maybe_slice(ss, begin, end) { - none { fail "slice requires a valid start and end"; } some(sli) { ret sli; } + none { fail "slice requires a valid start and end"; } } } From 69f405e96485f721f1a226be3464ef35d80c19e5 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 22 Feb 2012 23:03:42 -0800 Subject: [PATCH 09/17] (core::str) add index, index_from, rindex which return byte positions of chars; rename find to find_chars; add fixmes to delete byte_index, byte_index_from --- src/libcore/str.rs | 91 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 76 insertions(+), 15 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 024924278dc6a..17975fe658122 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -72,8 +72,9 @@ export index_chars, byte_index, byte_index_from, + //rindex, rindex_chars, - find, + find_chars, find_bytes, find_from_bytes, contains, @@ -861,7 +862,36 @@ Section: Searching // Function: index // -// Returns the index of the first matching char +// Returns the byte index of the first matching char +// (as option some/none) +fn index(ss: str, cc: char) -> option { + index_from(ss, cc, 0u, len_bytes(ss)) +} + +// Function: index_from +// +// Returns the byte index of the first matching char +// (as option some/none), starting at `nn` +fn index_from(ss: str, cc: char, start: uint, end: uint) -> option { + let bii = start; + while bii < end { + let {ch, next} = char_range_at(ss, bii); + + // found here? + if ch == cc { + ret some(bii); + } + + bii = next; + } + + // wasn't found + ret none; +} + +// Function: index_chars +// +// Returns the char index of the first matching char // (as option some/none) fn index_chars(ss: str, cc: char) -> option { let bii = 0u; @@ -887,6 +917,7 @@ fn index_chars(ss: str, cc: char) -> option { // // Returns the index of the first matching byte // (as option some/none) +// FIXME: delete fn byte_index(s: str, b: u8) -> option { byte_index_from(s, b, 0u, len_bytes(s)) } @@ -896,15 +927,36 @@ fn byte_index(s: str, b: u8) -> option { // Returns the index of the first matching byte within the range [`start`, // `end`). // (as option some/none) +// FIXME: delete fn byte_index_from(s: str, b: u8, start: uint, end: uint) -> option { assert end <= len_bytes(s); str::as_bytes(s) { |v| vec::position_from(v, start, end) { |x| x == b } } } +// Function: rindex +// +// Returns the byte index of the first matching char +// (as option some/none) +fn rindex(ss: str, cc: char) -> option { + let bii = len_bytes(ss); + while bii > 0u { + let {ch, prev} = char_range_at_reverse(ss, bii); + bii = prev; + + // found here? + if ch == cc { + ret some(bii); + } + } + + // wasn't found + ret none; +} + // Function: rindex_chars // -// Returns the index of the first matching char +// Returns the char index of the first matching char // (as option some/none) fn rindex_chars(ss: str, cc: char) -> option { let bii = len_bytes(ss); @@ -926,7 +978,7 @@ fn rindex_chars(ss: str, cc: char) -> option { //Function: find_bytes // -// Find the char position of the first instance of one string +// Find the byte position of the first instance of one string // within another, or return option::none fn find_bytes(haystack: str, needle: str) -> option { find_from_bytes(haystack, needle, 0u, len_bytes(haystack)) @@ -934,7 +986,7 @@ fn find_bytes(haystack: str, needle: str) -> option { //Function: find_from_bytes // -// Find the char position of the first instance of one string +// Find the byte position of the first instance of one string // within another, or return option::none // // FIXME: Boyer-Moore should be significantly faster @@ -962,11 +1014,11 @@ fn find_from_bytes(haystack: str, needle: str, start: uint, end:uint) ret none; } -// Function: find +// Function: find_chars // // Find the char position of the first instance of one string // within another, or return option::none -fn find(haystack: str, needle: str) -> option { +fn find_chars(haystack: str, needle: str) -> option { alt find_bytes(haystack, needle) { none { ret none; } some(nn) { ret some(b2c_pos(haystack, nn)); } @@ -1570,6 +1622,15 @@ mod tests { assert ( index_chars("hello", 'z') == none); } + #[test] + fn test_rindex() { + assert rindex("hello", 'l') == some(3u); + assert rindex("hello", 'o') == some(4u); + assert rindex("hello", 'h') == some(0u); + assert rindex("hello", 'z') == none; + assert rindex("ประเทศไทย中华Việt Nam", '华') == some(30u); + } + #[test] fn test_rindex_chars() { assert (rindex_chars("hello", 'l') == some(3u)); @@ -1820,17 +1881,17 @@ mod tests { } #[test] - fn test_find() { + fn test_find_chars() { // char positions - assert (find("banana", "apple pie") == none); - assert (find("", "") == some(0u)); + assert (find_chars("banana", "apple pie") == none); + assert (find_chars("", "") == some(0u)); let data = "ประเทศไทย中华Việt Nam"; - assert (find(data, "") == some(0u)); - assert (find(data, "ประเ") == some(0u)); - assert (find(data, "ะเ") == some(2u)); - assert (find(data, "中华") == some(9u)); - assert (find(data, "ไท华") == none); + assert (find_chars(data, "") == some(0u)); + assert (find_chars(data, "ประเ") == some(0u)); + assert (find_chars(data, "ะเ") == some(2u)); + assert (find_chars(data, "中华") == some(9u)); + assert (find_chars(data, "ไท华") == none); } #[test] From ca3c7ae1be6fc838391a7853efa7a5c60a10f05c Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 22 Feb 2012 23:26:09 -0800 Subject: [PATCH 10/17] (core::str) do some replacements --- src/fuzzer/fuzzer.rs | 4 ++-- src/libcore/str.rs | 5 +++-- src/libstd/fs.rs | 8 ++++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/fuzzer/fuzzer.rs b/src/fuzzer/fuzzer.rs index 3b443d68a3d0a..d6b2cf8b0ec91 100644 --- a/src/fuzzer/fuzzer.rs +++ b/src/fuzzer/fuzzer.rs @@ -286,8 +286,8 @@ fn check_variants_T( } fn last_part(filename: str) -> str { - let ix = option::get(str::rindex_chars(filename, '/')); - str::slice_chars(filename, ix + 1u, str::len_chars(filename) - 3u) + let ix = option::get(str::rindex(filename, '/')); + str::slice(filename, ix + 1u, str::len_bytes(filename) - 3u) } enum happiness { passed, cleanly_rejected(str), known_bug(str), failed(str), } diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 17975fe658122..4f315541da2a2 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -36,6 +36,7 @@ export bytes, chars, substr, + slice, slice_chars, split, split_str, @@ -72,8 +73,8 @@ export index_chars, byte_index, byte_index_from, - //rindex, - rindex_chars, + rindex, + //rindex_chars, find_chars, find_bytes, find_from_bytes, diff --git a/src/libstd/fs.rs b/src/libstd/fs.rs index acc09fefb01ac..167e0f125d6a3 100644 --- a/src/libstd/fs.rs +++ b/src/libstd/fs.rs @@ -34,18 +34,18 @@ type path = str; fn splitDirnameBasename (pp: path) -> {dirname: str, basename: str} { let ii; - alt str::rindex_chars(pp, os_fs::path_sep) { + alt str::rindex(pp, os_fs::path_sep) { option::some(xx) { ii = xx; } option::none { - alt str::rindex_chars(pp, os_fs::alt_path_sep) { + alt str::rindex(pp, os_fs::alt_path_sep) { option::some(xx) { ii = xx; } option::none { ret {dirname: ".", basename: pp}; } } } } - ret {dirname: str::slice_chars(pp, 0u, ii), - basename: str::slice_chars(pp, ii + 1u, str::len_chars(pp))}; + ret {dirname: str::slice(pp, 0u, ii), + basename: str::slice(pp, ii + 1u, str::len_bytes(pp))}; } /* From 0f2dbaa54ff6a9eba33277c1eb54895b7722096d Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 22 Feb 2012 23:46:45 -0800 Subject: [PATCH 11/17] (core::str) replace byte_index[_from] with index[_from] --- src/cargo/cargo.rs | 16 ++++++++-------- src/comp/syntax/codemap.rs | 2 +- src/libcore/str.rs | 6 ++++-- src/libstd/json.rs | 20 ++++++++++---------- 4 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/cargo/cargo.rs b/src/cargo/cargo.rs index 879d4bf8a616d..caaedab97c9a3 100644 --- a/src/cargo/cargo.rs +++ b/src/cargo/cargo.rs @@ -174,10 +174,10 @@ fn print(s: str) { } fn rest(s: str, start: uint) -> str { - if (start >= str::len_chars(s)) { + if (start >= str::len_bytes(s)) { "" } else { - str::slice_chars(s, start, str::len_chars(s)) + str::slice(s, start, str::len_bytes(s)) } } @@ -686,10 +686,10 @@ fn cmd_install(c: cargo) unsafe { if str::starts_with(target, "uuid:") { let uuid = rest(target, 5u); - alt str::index_chars(uuid, '/') { + alt str::index(uuid, '/') { option::some(idx) { - let source = str::slice_chars(uuid, 0u, idx); - uuid = str::slice_chars(uuid, idx + 1u, str::len_chars(uuid)); + let source = str::slice(uuid, 0u, idx); + uuid = str::slice(uuid, idx + 1u, str::len_bytes(uuid)); install_uuid_specific(c, wd, source, uuid); } option::none { @@ -698,10 +698,10 @@ fn cmd_install(c: cargo) unsafe { } } else { let name = target; - alt str::index_chars(name, '/') { + alt str::index(name, '/') { option::some(idx) { - let source = str::slice_chars(name, 0u, idx); - name = str::slice_chars(name, idx + 1u, str::len_chars(name)); + let source = str::slice(name, 0u, idx); + name = str::slice(name, idx + 1u, str::len_bytes(name)); install_named_specific(c, wd, source, name); } option::none { diff --git a/src/comp/syntax/codemap.rs b/src/comp/syntax/codemap.rs index 74dbade09c108..abe5a242261a2 100644 --- a/src/comp/syntax/codemap.rs +++ b/src/comp/syntax/codemap.rs @@ -157,7 +157,7 @@ fn span_to_lines(sp: span, cm: codemap::codemap) -> @file_lines { fn get_line(fm: filemap, line: int) -> str unsafe { let begin: uint = fm.lines[line].byte - fm.start_pos.byte; - let end = alt str::byte_index_from(*fm.src, '\n' as u8, begin, + let end = alt str::index_from(*fm.src, '\n', begin, str::len_bytes(*fm.src)) { some(e) { e } none { str::len_bytes(*fm.src) } diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 4f315541da2a2..8760403a77a2f 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -71,8 +71,10 @@ export // Searching index_chars, - byte_index, - byte_index_from, + index, + index_from, + //byte_index, + //byte_index_from, rindex, //rindex_chars, find_chars, diff --git a/src/libstd/json.rs b/src/libstd/json.rs index 70b16758e1d32..32294f38fb94f 100644 --- a/src/libstd/json.rs +++ b/src/libstd/json.rs @@ -70,8 +70,8 @@ fn to_str(j: json) -> str { } fn rest(s: str) -> str { - assert(str::len_chars(s) >= 1u); - str::slice_chars(s, 1u, str::len_chars(s)) + assert(str::len_bytes(s) >= 1u); + str::slice(s, 1u, str::len_bytes(s)) } fn from_str_str(s: str) -> (option, str) { @@ -99,7 +99,7 @@ fn from_str_str(s: str) -> (option, str) { cont; } else if (c == '"') { ret (some(string(res)), - str::slice_chars(s, pos, str::len_chars(s))); + str::slice(s, pos, str::len_bytes(s))); } res = res + str::from_char(c); } @@ -200,13 +200,13 @@ fn from_str_float(s: str) -> (option, str) { } '.' { break; } _ { ret (some(num(neg * res)), - str::slice_chars(s, opos, str::len_chars(s))); } + str::slice(s, opos, str::len_bytes(s))); } } } if pos == len { ret (some(num(neg * res)), - str::slice_chars(s, pos, str::len_chars(s))); + str::slice(s, pos, str::len_bytes(s))); } let dec = 1f; @@ -221,17 +221,17 @@ fn from_str_float(s: str) -> (option, str) { res += (((c as int) - ('0' as int)) as float) * dec; } _ { ret (some(num(neg * res)), - str::slice_chars(s, opos, str::len_chars(s))); } + str::slice(s, opos, str::len_bytes(s))); } } } - ret (some(num(neg * res)), str::slice_chars(s, pos, str::len_chars(s))); + ret (some(num(neg * res)), str::slice(s, pos, str::len_bytes(s))); } fn from_str_bool(s: str) -> (option, str) { if (str::starts_with(s, "true")) { - (some(boolean(true)), str::slice_chars(s, 4u, str::len_chars(s))) + (some(boolean(true)), str::slice(s, 4u, str::len_bytes(s))) } else if (str::starts_with(s, "false")) { - (some(boolean(false)), str::slice_chars(s, 5u, str::len_chars(s))) + (some(boolean(false)), str::slice(s, 5u, str::len_bytes(s))) } else { (none, s) } @@ -239,7 +239,7 @@ fn from_str_bool(s: str) -> (option, str) { fn from_str_null(s: str) -> (option, str) { if (str::starts_with(s, "null")) { - (some(null), str::slice_chars(s, 4u, str::len_chars(s))) + (some(null), str::slice(s, 4u, str::len_bytes(s))) } else { (none, s) } From 9cf3f7144ac25b2e9f2ae8b339b2cb465c5d4b13 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 22 Feb 2012 23:56:51 -0800 Subject: [PATCH 12/17] (core::str) stop using index_chars --- src/comp/back/link.rs | 4 ++-- src/libcore/str.rs | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/comp/back/link.rs b/src/comp/back/link.rs index e02aad48c5a84..5b7d7a35c5371 100644 --- a/src/comp/back/link.rs +++ b/src/comp/back/link.rs @@ -109,9 +109,9 @@ mod write { // Decides what to call an intermediate file, given the name of the output // and the extension to use. fn mk_intermediate_name(output_path: str, extension: str) -> str unsafe { - let stem = alt str::index_chars(output_path, '.') { + let stem = alt str::index(output_path, '.') { option::some(dot_pos) { - str::slice_chars(output_path, 0u, dot_pos) + str::slice(output_path, 0u, dot_pos) } option::none { output_path } }; diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 8760403a77a2f..b9eea353e211f 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -70,11 +70,9 @@ export lines_iter, // Searching - index_chars, + //index_chars, index, index_from, - //byte_index, - //byte_index_from, rindex, //rindex_chars, find_chars, From d0e5dc07241dbba9905299f8ff9a6966f3213f4d Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Thu, 23 Feb 2012 00:45:25 -0800 Subject: [PATCH 13/17] (core::str) replace uses of unsafe::slice_bytes; replace find[_from]_bytes with find[_from] --- src/comp/back/link.rs | 6 +-- src/comp/middle/trans/debuginfo.rs | 2 +- src/comp/syntax/codemap.rs | 2 +- src/comp/syntax/parse/lexer.rs | 4 +- src/comp/util/ppaux.rs | 2 +- src/compiletest/errors.rs | 6 +-- src/compiletest/header.rs | 4 +- src/libcore/extfmt.rs | 8 +-- src/libcore/str.rs | 80 +++++++++++++++--------------- src/libstd/getopts.rs | 2 +- src/libstd/rope.rs | 2 +- src/libstd/sha1.rs | 2 +- src/rustdoc/markdown_pass.rs | 8 +-- 13 files changed, 64 insertions(+), 64 deletions(-) diff --git a/src/comp/back/link.rs b/src/comp/back/link.rs index 5b7d7a35c5371..572fde7bd3f6f 100644 --- a/src/comp/back/link.rs +++ b/src/comp/back/link.rs @@ -478,7 +478,7 @@ fn build_link_meta(sess: session, c: ast::crate, output: str, } fn truncated_sha1_result(sha: sha1) -> str unsafe { - ret str::unsafe::slice_bytes(sha.result_str(), 0u, 16u); + ret str::slice(sha.result_str(), 0u, 16u); } @@ -567,12 +567,12 @@ fn link_binary(sess: session, // Converts a library file name into a cc -l argument fn unlib(config: @session::config, filename: str) -> str unsafe { let rmlib = fn@(filename: str) -> str { - let found = str::find_bytes(filename, "lib"); + let found = str::find(filename, "lib"); if config.os == session::os_macos || (config.os == session::os_linux || config.os == session::os_freebsd) && option::is_some(found) && option::get(found) == 0u { - ret str::unsafe::slice_bytes(filename, 3u, + ret str::slice(filename, 3u, str::len_bytes(filename)); } else { ret filename; } }; diff --git a/src/comp/middle/trans/debuginfo.rs b/src/comp/middle/trans/debuginfo.rs index 39da8cf2c57d8..dff4f26ae99fb 100644 --- a/src/comp/middle/trans/debuginfo.rs +++ b/src/comp/middle/trans/debuginfo.rs @@ -167,7 +167,7 @@ fn create_compile_unit(cx: crate_ctxt, full_path: str) let work_dir = cx.sess.working_dir; let file_path = if str::starts_with(full_path, work_dir) { - str::unsafe::slice_bytes(full_path, str::len_bytes(work_dir), + str::slice(full_path, str::len_bytes(work_dir), str::len_bytes(full_path)) } else { full_path diff --git a/src/comp/syntax/codemap.rs b/src/comp/syntax/codemap.rs index abe5a242261a2..e9b4569df6ca9 100644 --- a/src/comp/syntax/codemap.rs +++ b/src/comp/syntax/codemap.rs @@ -162,7 +162,7 @@ fn get_line(fm: filemap, line: int) -> str unsafe { some(e) { e } none { str::len_bytes(*fm.src) } }; - str::unsafe::slice_bytes(*fm.src, begin, end) + str::slice(*fm.src, begin, end) } fn lookup_byte_offset(cm: codemap::codemap, chpos: uint) diff --git a/src/comp/syntax/parse/lexer.rs b/src/comp/syntax/parse/lexer.rs index 113ca056ed40b..0328afc9f20d7 100644 --- a/src/comp/syntax/parse/lexer.rs +++ b/src/comp/syntax/parse/lexer.rs @@ -24,7 +24,7 @@ impl reader for reader { fn get_str_from(start: uint) -> str unsafe { // I'm pretty skeptical about this subtraction. What if there's a // multi-byte character before the mark? - ret str::unsafe::slice_bytes(*self.src, start - 1u, self.pos - 1u); + ret str::slice(*self.src, start - 1u, self.pos - 1u); } fn next() -> char { if self.pos < self.len { @@ -611,7 +611,7 @@ fn trim_whitespace_prefix_and_push_line(&lines: [str], let s1; if all_whitespace(s, 0u, col) { if col < str::len_bytes(s) { - s1 = str::unsafe::slice_bytes(s, col, str::len_bytes(s)); + s1 = str::slice(s, col, str::len_bytes(s)); } else { s1 = ""; } } else { s1 = s; } log(debug, "pushing line: " + s1); diff --git a/src/comp/util/ppaux.rs b/src/comp/util/ppaux.rs index 547c2d5ee824c..18bd05f2b8d3c 100644 --- a/src/comp/util/ppaux.rs +++ b/src/comp/util/ppaux.rs @@ -131,7 +131,7 @@ fn ty_to_str(cx: ctxt, typ: t) -> str { fn ty_to_short_str(cx: ctxt, typ: t) -> str unsafe { let s = encoder::encoded_ty(cx, typ); - if str::len_bytes(s) >= 32u { s = str::unsafe::slice_bytes(s, 0u, 32u); } + if str::len_bytes(s) >= 32u { s = str::slice(s, 0u, 32u); } ret s; } diff --git a/src/compiletest/errors.rs b/src/compiletest/errors.rs index f6b32e2fd311b..1184fc5f0f25f 100644 --- a/src/compiletest/errors.rs +++ b/src/compiletest/errors.rs @@ -25,7 +25,7 @@ fn load_errors(testfile: str) -> [expected_error] { fn parse_expected(line_num: uint, line: str) -> [expected_error] unsafe { let error_tag = "//!"; let idx; - alt str::find_bytes(line, error_tag) { + alt str::find(line, error_tag) { option::none { ret []; } option::some(nn) { idx = (nn as uint) + str::len_bytes(error_tag); } } @@ -43,11 +43,11 @@ fn parse_expected(line_num: uint, line: str) -> [expected_error] unsafe { while idx < len && line[idx] == (' ' as u8) { idx += 1u; } let start_kind = idx; while idx < len && line[idx] != (' ' as u8) { idx += 1u; } - let kind = str::to_lower(str::unsafe::slice_bytes(line, start_kind, idx)); + let kind = str::to_lower(str::slice(line, start_kind, idx)); // Extract msg: while idx < len && line[idx] == (' ' as u8) { idx += 1u; } - let msg = str::unsafe::slice_bytes(line, idx, len); + let msg = str::slice(line, idx, len); #debug("line=%u kind=%s msg=%s", line_num - adjust_line, kind, msg); diff --git a/src/compiletest/header.rs b/src/compiletest/header.rs index a976c5fb98c7c..8071edad2fe7f 100644 --- a/src/compiletest/header.rs +++ b/src/compiletest/header.rs @@ -106,10 +106,10 @@ fn parse_name_directive(line: str, directive: str) -> bool { fn parse_name_value_directive(line: str, directive: str) -> option unsafe { let keycolon = directive + ":"; - alt str::find_bytes(line, keycolon) { + alt str::find(line, keycolon) { option::some(colon) { let value = - str::unsafe::slice_bytes(line, + str::slice(line, colon + str::len_bytes(keycolon), str::len_bytes(line)); #debug("%s: %s", directive, value); diff --git a/src/libcore/extfmt.rs b/src/libcore/extfmt.rs index 5fca4d69f3af1..079451498f1b0 100644 --- a/src/libcore/extfmt.rs +++ b/src/libcore/extfmt.rs @@ -93,13 +93,13 @@ mod ct { } let i = 0u; while i < lim { - let curr = str::unsafe::slice_bytes(s, i, i+1u); + let curr = str::slice(s, i, i+1u); if str::eq(curr, "%") { i += 1u; if i >= lim { error("unterminated conversion at end of string"); } - let curr2 = str::unsafe::slice_bytes(s, i, i+1u); + let curr2 = str::slice(s, i, i+1u); if str::eq(curr2, "%") { buf += curr2; i += 1u; @@ -225,7 +225,7 @@ mod ct { fn parse_type(s: str, i: uint, lim: uint, error: error_fn) -> {ty: ty, next: uint} unsafe { if i >= lim { error("missing type in conversion"); } - let tstr = str::unsafe::slice_bytes(s, i, i+1u); + let tstr = str::slice(s, i, i+1u); // TODO: Do we really want two signed types here? // How important is it to be printf compatible? let t = @@ -439,7 +439,7 @@ mod rt { let headstr = str::from_bytes([head]); // FIXME: not UTF-8 safe let bytelen = str::len_bytes(s); - let numpart = str::unsafe::slice_bytes(s, 1u, bytelen); + let numpart = str::slice(s, 1u, bytelen); ret headstr + padstr + numpart; } } diff --git a/src/libcore/str.rs b/src/libcore/str.rs index b9eea353e211f..fb7ec4eec2e5b 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -75,9 +75,9 @@ export index_from, rindex, //rindex_chars, + find, + find_from, find_chars, - find_bytes, - find_from_bytes, contains, starts_with, ends_with, @@ -385,7 +385,7 @@ fn chars(s: str) -> [char] { /* Function: substr -Take a substring of another. Returns a string containing `len` chars +Take a substring of another. Returns a string containing `len` bytes starting at char offset `begin`. Failure: @@ -393,7 +393,7 @@ Failure: If `begin` + `len` is is greater than the char length of the string */ fn substr(s: str, begin: uint, len: uint) -> str { - ret slice_chars(s, begin, begin + len); + ret slice(s, begin, begin + len); } // Function: slice @@ -696,7 +696,7 @@ fn replace(s: str, from: str, to: str) -> str unsafe { from, to); } else { let idx; - alt find_bytes(s, from) { + alt find(s, from) { some(x) { idx = x; } none { ret s; } } @@ -977,21 +977,21 @@ fn rindex_chars(ss: str, cc: char) -> option { ret none; } -//Function: find_bytes +//Function: find // // Find the byte position of the first instance of one string // within another, or return option::none -fn find_bytes(haystack: str, needle: str) -> option { - find_from_bytes(haystack, needle, 0u, len_bytes(haystack)) +fn find(haystack: str, needle: str) -> option { + find_from(haystack, needle, 0u, len_bytes(haystack)) } -//Function: find_from_bytes +//Function: find_from // // Find the byte position of the first instance of one string // within another, or return option::none // // FIXME: Boyer-Moore should be significantly faster -fn find_from_bytes(haystack: str, needle: str, start: uint, end:uint) +fn find_from(haystack: str, needle: str, start: uint, end:uint) -> option { assert end <= len_bytes(haystack); @@ -1020,7 +1020,7 @@ fn find_from_bytes(haystack: str, needle: str, start: uint, end:uint) // Find the char position of the first instance of one string // within another, or return option::none fn find_chars(haystack: str, needle: str) -> option { - alt find_bytes(haystack, needle) { + alt find(haystack, needle) { none { ret none; } some(nn) { ret some(b2c_pos(haystack, nn)); } } @@ -1056,7 +1056,7 @@ haystack - The string to look in needle - The string to look for */ fn contains(haystack: str, needle: str) -> bool { - option::is_some(find_bytes(haystack, needle)) + option::is_some(find(haystack, needle)) } /* @@ -1479,8 +1479,8 @@ mod unsafe { export from_bytes, from_byte, - slice_bytes, - slice_bytes_safe_range, + slice_bytes, // FIXME: stop exporting + slice_bytes_safe_range, // FIXME: stop exporting push_byte, push_bytes, // note: wasn't exported pop_byte, @@ -1840,45 +1840,45 @@ mod tests { } #[test] - fn test_find_bytes() { + fn test_find() { // byte positions - assert (find_bytes("banana", "apple pie") == none); - assert (find_bytes("", "") == some(0u)); + assert (find("banana", "apple pie") == none); + assert (find("", "") == some(0u)); let data = "ประเทศไทย中华Việt Nam"; - assert (find_bytes(data, "") == some(0u)); - assert (find_bytes(data, "ประเ") == some( 0u)); - assert (find_bytes(data, "ะเ") == some( 6u)); - assert (find_bytes(data, "中华") == some(27u)); - assert (find_bytes(data, "ไท华") == none); + assert (find(data, "") == some(0u)); + assert (find(data, "ประเ") == some( 0u)); + assert (find(data, "ะเ") == some( 6u)); + assert (find(data, "中华") == some(27u)); + assert (find(data, "ไท华") == none); } #[test] - fn test_find_from_bytes() { + fn test_find_from() { // byte positions - assert (find_from_bytes("", "", 0u, 0u) == some(0u)); + assert (find_from("", "", 0u, 0u) == some(0u)); let data = "abcabc"; - assert find_from_bytes(data, "ab", 0u, 6u) == some(0u); - assert find_from_bytes(data, "ab", 2u, 6u) == some(3u); - assert find_from_bytes(data, "ab", 2u, 4u) == none; + assert find_from(data, "ab", 0u, 6u) == some(0u); + assert find_from(data, "ab", 2u, 6u) == some(3u); + assert find_from(data, "ab", 2u, 4u) == none; let data = "ประเทศไทย中华Việt Nam"; data += data; - assert find_from_bytes(data, "", 0u, 43u) == some(0u); - assert find_from_bytes(data, "", 6u, 43u) == some(6u); + assert find_from(data, "", 0u, 43u) == some(0u); + assert find_from(data, "", 6u, 43u) == some(6u); - assert find_from_bytes(data, "ประ", 0u, 43u) == some( 0u); - assert find_from_bytes(data, "ทศไ", 0u, 43u) == some(12u); - assert find_from_bytes(data, "ย中", 0u, 43u) == some(24u); - assert find_from_bytes(data, "iệt", 0u, 43u) == some(34u); - assert find_from_bytes(data, "Nam", 0u, 43u) == some(40u); + assert find_from(data, "ประ", 0u, 43u) == some( 0u); + assert find_from(data, "ทศไ", 0u, 43u) == some(12u); + assert find_from(data, "ย中", 0u, 43u) == some(24u); + assert find_from(data, "iệt", 0u, 43u) == some(34u); + assert find_from(data, "Nam", 0u, 43u) == some(40u); - assert find_from_bytes(data, "ประ", 43u, 86u) == some(43u); - assert find_from_bytes(data, "ทศไ", 43u, 86u) == some(55u); - assert find_from_bytes(data, "ย中", 43u, 86u) == some(67u); - assert find_from_bytes(data, "iệt", 43u, 86u) == some(77u); - assert find_from_bytes(data, "Nam", 43u, 86u) == some(83u); + assert find_from(data, "ประ", 43u, 86u) == some(43u); + assert find_from(data, "ทศไ", 43u, 86u) == some(55u); + assert find_from(data, "ย中", 43u, 86u) == some(67u); + assert find_from(data, "iệt", 43u, 86u) == some(77u); + assert find_from(data, "Nam", 43u, 86u) == some(83u); } #[test] @@ -1912,7 +1912,7 @@ mod tests { t("hello", "el", 1); assert "ะเทศไท" - == substr("ประเทศไทย中华Việt Nam", 2u, 6u); + == substr("ประเทศไทย中华Việt Nam", 6u, 18u); } #[test] diff --git a/src/libstd/getopts.rs b/src/libstd/getopts.rs index e925cd16e335d..25681185f9b58 100644 --- a/src/libstd/getopts.rs +++ b/src/libstd/getopts.rs @@ -229,7 +229,7 @@ fn getopts(args: [str], opts: [opt]) -> result unsafe { let names; let i_arg = option::none::; if cur[1] == '-' as u8 { - let tail = str::unsafe::slice_bytes(cur, 2u, curlen); + let tail = str::slice(cur, 2u, curlen); let tail_eq = str::splitn_char(tail, '=', 1u); if vec::len(tail_eq) <= 1u { names = [long(tail)]; diff --git a/src/libstd/rope.rs b/src/libstd/rope.rs index 3f330c3586d46..4351ceb2d5ab6 100644 --- a/src/libstd/rope.rs +++ b/src/libstd/rope.rs @@ -1345,7 +1345,7 @@ mod tests { fn aux(str: @mutable str, node: @node::node) unsafe { alt(*node) { node::leaf(x) { - *str += str::unsafe::slice_bytes( + *str += str::slice( *x.content, x.byte_offset, x.byte_offset + x.byte_len); } diff --git a/src/libstd/sha1.rs b/src/libstd/sha1.rs index c28d67a7526cd..1b63f87f7c62a 100644 --- a/src/libstd/sha1.rs +++ b/src/libstd/sha1.rs @@ -372,7 +372,7 @@ mod tests { let left = len; while left > 0u { let take = (left + 1u) / 2u; - sh.input_str(str::unsafe::slice_bytes(t.input, len - left, + sh.input_str(str::slice(t.input, len - left, take + len - left)); left = left - take; } diff --git a/src/rustdoc/markdown_pass.rs b/src/rustdoc/markdown_pass.rs index 1f82e892c0e51..d473f25d09093 100644 --- a/src/rustdoc/markdown_pass.rs +++ b/src/rustdoc/markdown_pass.rs @@ -56,10 +56,10 @@ fn should_write_modules_last() { fn d() { }" ); - let idx_a = option::get(str::find_bytes(markdown, "# Module `a`")); - let idx_b = option::get(str::find_bytes(markdown, "## Function `b`")); - let idx_c = option::get(str::find_bytes(markdown, "# Module `c`")); - let idx_d = option::get(str::find_bytes(markdown, "## Function `d`")); + let idx_a = option::get(str::find(markdown, "# Module `a`")); + let idx_b = option::get(str::find(markdown, "## Function `b`")); + let idx_c = option::get(str::find(markdown, "# Module `c`")); + let idx_d = option::get(str::find(markdown, "## Function `d`")); assert idx_b < idx_d; assert idx_d < idx_a; From 050072c41066a46e8109a7a4033337768a41618b Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Thu, 23 Feb 2012 01:44:04 -0800 Subject: [PATCH 14/17] (core::str) remove len_bytes alias --- src/cargo/cargo.rs | 8 +- src/comp/back/link.rs | 7 +- src/comp/driver/diagnostic.rs | 4 +- src/comp/driver/rustc.rs | 2 +- src/comp/middle/trans/common.rs | 4 +- src/comp/middle/trans/debuginfo.rs | 5 +- src/comp/middle/trans/tvec.rs | 2 +- src/comp/syntax/codemap.rs | 5 +- src/comp/syntax/parse/lexer.rs | 14 ++-- src/comp/syntax/print/pp.rs | 2 +- src/comp/syntax/print/pprust.rs | 6 +- src/comp/util/ppaux.rs | 4 +- src/compiletest/errors.rs | 4 +- src/compiletest/header.rs | 6 +- src/fuzzer/fuzzer.rs | 4 +- src/libcore/extfmt.rs | 8 +- src/libcore/float.rs | 2 +- src/libcore/str.rs | 99 ++++++++++++------------- src/libcore/u64.rs | 4 +- src/libcore/uint.rs | 2 +- src/libstd/fs.rs | 12 +-- src/libstd/getopts.rs | 6 +- src/libstd/json.rs | 24 +++--- src/libstd/rand.rs | 6 +- src/libstd/rope.rs | 10 +-- src/libstd/sha1.rs | 2 +- src/rustdoc/desc_to_brief_pass.rs | 2 +- src/rustdoc/unindent_pass.rs | 4 +- src/test/bench/99bob-iter.rs | 2 +- src/test/bench/99bob-simple.rs | 2 +- src/test/bench/shootout-fasta.rs | 10 +-- src/test/run-pass/bind-native-fn.rs | 2 +- src/test/run-pass/string-self-append.rs | 4 +- src/test/run-pass/utf8_chars.rs | 2 +- 34 files changed, 135 insertions(+), 145 deletions(-) diff --git a/src/cargo/cargo.rs b/src/cargo/cargo.rs index caaedab97c9a3..4d67877afd8ac 100644 --- a/src/cargo/cargo.rs +++ b/src/cargo/cargo.rs @@ -174,10 +174,10 @@ fn print(s: str) { } fn rest(s: str, start: uint) -> str { - if (start >= str::len_bytes(s)) { + if (start >= str::len(s)) { "" } else { - str::slice(s, start, str::len_bytes(s)) + str::slice(s, start, str::len(s)) } } @@ -689,7 +689,7 @@ fn cmd_install(c: cargo) unsafe { alt str::index(uuid, '/') { option::some(idx) { let source = str::slice(uuid, 0u, idx); - uuid = str::slice(uuid, idx + 1u, str::len_bytes(uuid)); + uuid = str::slice(uuid, idx + 1u, str::len(uuid)); install_uuid_specific(c, wd, source, uuid); } option::none { @@ -701,7 +701,7 @@ fn cmd_install(c: cargo) unsafe { alt str::index(name, '/') { option::some(idx) { let source = str::slice(name, 0u, idx); - name = str::slice(name, idx + 1u, str::len_bytes(name)); + name = str::slice(name, idx + 1u, str::len(name)); install_named_specific(c, wd, source, name); } option::none { diff --git a/src/comp/back/link.rs b/src/comp/back/link.rs index 572fde7bd3f6f..23ddb56c8f068 100644 --- a/src/comp/back/link.rs +++ b/src/comp/back/link.rs @@ -395,7 +395,7 @@ fn build_link_meta(sess: session, c: ast::crate, output: str, metas: provided_metas, dep_hashes: [str]) -> str { fn len_and_str(s: str) -> str { - ret #fmt["%u_%s", str::len_bytes(s), s]; + ret #fmt["%u_%s", str::len(s), s]; } fn len_and_str_lit(l: ast::lit) -> str { @@ -520,7 +520,7 @@ fn mangle(ss: path) -> str { for s in ss { alt s { path_name(s) | path_mod(s) { - n += #fmt["%u%s", str::len_bytes(s), s]; + n += #fmt["%u%s", str::len(s), s]; } } } n += "E"; // End name-sequence. @@ -572,8 +572,7 @@ fn link_binary(sess: session, (config.os == session::os_linux || config.os == session::os_freebsd) && option::is_some(found) && option::get(found) == 0u { - ret str::slice(filename, 3u, - str::len_bytes(filename)); + ret str::slice(filename, 3u, str::len(filename)); } else { ret filename; } }; fn rmext(filename: str) -> str { diff --git a/src/comp/driver/diagnostic.rs b/src/comp/driver/diagnostic.rs index f0c97588f5967..8f0570a434959 100644 --- a/src/comp/driver/diagnostic.rs +++ b/src/comp/driver/diagnostic.rs @@ -210,7 +210,7 @@ fn highlight_lines(cm: codemap::codemap, sp: span, if elided { let last_line = display_lines[vec::len(display_lines) - 1u]; let s = #fmt["%s:%u ", fm.name, last_line + 1u]; - let indent = str::len_bytes(s); + let indent = str::len(s); let out = ""; while indent > 0u { out += " "; indent -= 1u; } out += "...\n"; @@ -228,7 +228,7 @@ fn highlight_lines(cm: codemap::codemap, sp: span, while num > 0u { num /= 10u; digits += 1u; } // indent past |name:## | and the 0-offset column location - let left = str::len_bytes(fm.name) + digits + lo.col + 3u; + let left = str::len(fm.name) + digits + lo.col + 3u; let s = ""; while left > 0u { str::push_char(s, ' '); left -= 1u; } diff --git a/src/comp/driver/rustc.rs b/src/comp/driver/rustc.rs index 5186290e4a4cb..8d0b854906230 100644 --- a/src/comp/driver/rustc.rs +++ b/src/comp/driver/rustc.rs @@ -13,7 +13,7 @@ import rustc::driver::diagnostic; fn version(argv0: str) { let vers = "unknown version"; let env_vers = #env["CFG_VERSION"]; - if str::len_bytes(env_vers) != 0u { vers = env_vers; } + if str::len(env_vers) != 0u { vers = env_vers; } io::stdout().write_str(#fmt["%s %s\n", argv0, vers]); io::stdout().write_str(#fmt["host: %s\n", host_triple()]); } diff --git a/src/comp/middle/trans/common.rs b/src/comp/middle/trans/common.rs index 24af4e33b9abf..1c04bc8c4c69a 100644 --- a/src/comp/middle/trans/common.rs +++ b/src/comp/middle/trans/common.rs @@ -777,7 +777,7 @@ fn C_u8(i: uint) -> ValueRef { ret C_integral(T_i8(), i as u64, False); } // our boxed-and-length-annotated strings. fn C_cstr(cx: crate_ctxt, s: str) -> ValueRef { let sc = str::as_buf(s) {|buf| - llvm::LLVMConstString(buf, str::len_bytes(s) as unsigned, False) + llvm::LLVMConstString(buf, str::len(s) as unsigned, False) }; let g = str::as_buf(cx.names("str"), @@ -791,7 +791,7 @@ fn C_cstr(cx: crate_ctxt, s: str) -> ValueRef { // Returns a Plain Old LLVM String: fn C_postr(s: str) -> ValueRef { ret str::as_buf(s) {|buf| - llvm::LLVMConstString(buf, str::len_bytes(s) as unsigned, False) + llvm::LLVMConstString(buf, str::len(s) as unsigned, False) }; } diff --git a/src/comp/middle/trans/debuginfo.rs b/src/comp/middle/trans/debuginfo.rs index dff4f26ae99fb..47024432b4e12 100644 --- a/src/comp/middle/trans/debuginfo.rs +++ b/src/comp/middle/trans/debuginfo.rs @@ -48,7 +48,7 @@ const DW_ATE_unsigned_char: int = 0x08; fn llstr(s: str) -> ValueRef { str::as_buf(s, {|sbuf| - llvm::LLVMMDString(sbuf, str::len_bytes(s) as ctypes::c_uint) + llvm::LLVMMDString(sbuf, str::len(s) as ctypes::c_uint) }) } fn lltag(lltag: int) -> ValueRef { @@ -167,8 +167,7 @@ fn create_compile_unit(cx: crate_ctxt, full_path: str) let work_dir = cx.sess.working_dir; let file_path = if str::starts_with(full_path, work_dir) { - str::slice(full_path, str::len_bytes(work_dir), - str::len_bytes(full_path)) + str::slice(full_path, str::len(work_dir), str::len(full_path)) } else { full_path }; diff --git a/src/comp/middle/trans/tvec.rs b/src/comp/middle/trans/tvec.rs index ab9412263d2cf..aeaf65db7f4c0 100644 --- a/src/comp/middle/trans/tvec.rs +++ b/src/comp/middle/trans/tvec.rs @@ -126,7 +126,7 @@ fn trans_vec(bcx: block, args: [@ast::expr], id: ast::node_id, } fn trans_str(bcx: block, s: str, dest: dest) -> block { - let veclen = str::len_bytes(s) + 1u; // +1 for \0 + let veclen = str::len(s) + 1u; // +1 for \0 let {bcx: bcx, val: sptr, _} = alloc(bcx, ty::mk_str(bcx.tcx()), veclen); diff --git a/src/comp/syntax/codemap.rs b/src/comp/syntax/codemap.rs index e9b4569df6ca9..c8b33687abe5e 100644 --- a/src/comp/syntax/codemap.rs +++ b/src/comp/syntax/codemap.rs @@ -157,10 +157,9 @@ fn span_to_lines(sp: span, cm: codemap::codemap) -> @file_lines { fn get_line(fm: filemap, line: int) -> str unsafe { let begin: uint = fm.lines[line].byte - fm.start_pos.byte; - let end = alt str::index_from(*fm.src, '\n', begin, - str::len_bytes(*fm.src)) { + let end = alt str::index_from(*fm.src, '\n', begin, str::len(*fm.src)) { some(e) { e } - none { str::len_bytes(*fm.src) } + none { str::len(*fm.src) } }; str::slice(*fm.src, begin, end) } diff --git a/src/comp/syntax/parse/lexer.rs b/src/comp/syntax/parse/lexer.rs index 0328afc9f20d7..86d925e87e65f 100644 --- a/src/comp/syntax/parse/lexer.rs +++ b/src/comp/syntax/parse/lexer.rs @@ -64,7 +64,7 @@ fn new_reader(cm: codemap::codemap, itr: @interner::interner) -> reader { let r = @{cm: cm, span_diagnostic: span_diagnostic, - src: filemap.src, len: str::len_bytes(*filemap.src), + src: filemap.src, len: str::len(*filemap.src), mutable col: 0u, mutable pos: 0u, mutable curr: -1 as char, mutable chpos: filemap.start_pos.ch, mutable strs: [], filemap: filemap, interner: itr}; @@ -163,7 +163,7 @@ fn scan_exponent(rdr: reader) -> option { rdr.bump(); } let exponent = scan_digits(rdr, 10u); - if str::len_bytes(exponent) > 0u { + if str::len(exponent) > 0u { ret some(rslt + exponent); } else { rdr.fatal("scan_exponent: bad fp literal"); } } else { ret none::; } @@ -226,7 +226,7 @@ fn scan_number(c: char, rdr: reader) -> token::token { tp = if signed { either::left(ast::ty_i64) } else { either::right(ast::ty_u64) }; } - if str::len_bytes(num_str) == 0u { + if str::len(num_str) == 0u { rdr.fatal("no valid digits found for number"); } let parsed = option::get(u64::from_str(num_str, base as u64)); @@ -273,7 +273,7 @@ fn scan_number(c: char, rdr: reader) -> token::token { ret token::LIT_FLOAT(interner::intern(*rdr.interner, num_str), ast::ty_f); } else { - if str::len_bytes(num_str) == 0u { + if str::len(num_str) == 0u { rdr.fatal("no valid digits found for number"); } let parsed = option::get(u64::from_str(num_str, base as u64)); @@ -610,8 +610,8 @@ fn trim_whitespace_prefix_and_push_line(&lines: [str], s: str, col: uint) unsafe { let s1; if all_whitespace(s, 0u, col) { - if col < str::len_bytes(s) { - s1 = str::slice(s, col, str::len_bytes(s)); + if col < str::len(s) { + s1 = str::slice(s, col, str::len(s)); } else { s1 = ""; } } else { s1 = s; } log(debug, "pushing line: " + s1); @@ -651,7 +651,7 @@ fn read_block_comment(rdr: reader, code_to_the_left: bool) -> cmnt { } } } - if str::len_bytes(curr_line) != 0u { + if str::len(curr_line) != 0u { trim_whitespace_prefix_and_push_line(lines, curr_line, col); } let style = if code_to_the_left { trailing } else { isolated }; diff --git a/src/comp/syntax/print/pp.rs b/src/comp/syntax/print/pp.rs index 9114caea2d4e2..3ee3131429929 100644 --- a/src/comp/syntax/print/pp.rs +++ b/src/comp/syntax/print/pp.rs @@ -491,7 +491,7 @@ fn end(p: printer) { p.pretty_print(END); } fn eof(p: printer) { p.pretty_print(EOF); } fn word(p: printer, wrd: str) { - p.pretty_print(STRING(wrd, str::len_bytes(wrd) as int)); + p.pretty_print(STRING(wrd, str::len(wrd) as int)); } fn huge_word(p: printer, wrd: str) { diff --git a/src/comp/syntax/print/pprust.rs b/src/comp/syntax/print/pprust.rs index 288d8a38dda7d..76ab2b4ecb542 100644 --- a/src/comp/syntax/print/pprust.rs +++ b/src/comp/syntax/print/pprust.rs @@ -204,7 +204,7 @@ fn head(s: ps, w: str) { // outer-box is consistent cbox(s, indent_unit); // head-box is inconsistent - ibox(s, str::len_bytes(w) + 1u); + ibox(s, str::len(w) + 1u); // keyword that starts the head word_nbsp(s, w); } @@ -1465,7 +1465,7 @@ fn print_ty_fn(s: ps, opt_proto: option, popen(s); fn print_arg(s: ps, input: ast::arg) { print_arg_mode(s, input.mode); - if str::len_bytes(input.ident) > 0u { + if str::len(input.ident) > 0u { word_space(s, input.ident + ":"); } print_type(s, input.ty); @@ -1647,7 +1647,7 @@ fn print_string(s: ps, st: str) { fn escape_str(st: str, to_escape: char) -> str { let out: str = ""; - let len = str::len_bytes(st); + let len = str::len(st); let i = 0u; while i < len { alt st[i] as char { diff --git a/src/comp/util/ppaux.rs b/src/comp/util/ppaux.rs index 18bd05f2b8d3c..0ed630629b719 100644 --- a/src/comp/util/ppaux.rs +++ b/src/comp/util/ppaux.rs @@ -129,9 +129,9 @@ fn ty_to_str(cx: ctxt, typ: t) -> str { } } -fn ty_to_short_str(cx: ctxt, typ: t) -> str unsafe { +fn ty_to_short_str(cx: ctxt, typ: t) -> str { let s = encoder::encoded_ty(cx, typ); - if str::len_bytes(s) >= 32u { s = str::slice(s, 0u, 32u); } + if str::len(s) >= 32u { s = str::slice(s, 0u, 32u); } ret s; } diff --git a/src/compiletest/errors.rs b/src/compiletest/errors.rs index 1184fc5f0f25f..90b0c7d34e5f4 100644 --- a/src/compiletest/errors.rs +++ b/src/compiletest/errors.rs @@ -27,13 +27,13 @@ fn parse_expected(line_num: uint, line: str) -> [expected_error] unsafe { let idx; alt str::find(line, error_tag) { option::none { ret []; } - option::some(nn) { idx = (nn as uint) + str::len_bytes(error_tag); } + option::some(nn) { idx = (nn as uint) + str::len(error_tag); } } // "//!^^^ kind msg" denotes a message expected // three lines above current line: let adjust_line = 0u; - let len = str::len_bytes(line); + let len = str::len(line); while idx < len && line[idx] == ('^' as u8) { adjust_line += 1u; idx += 1u; diff --git a/src/compiletest/header.rs b/src/compiletest/header.rs index 8071edad2fe7f..099598d7fdb52 100644 --- a/src/compiletest/header.rs +++ b/src/compiletest/header.rs @@ -108,10 +108,8 @@ fn parse_name_value_directive(line: str, let keycolon = directive + ":"; alt str::find(line, keycolon) { option::some(colon) { - let value = - str::slice(line, - colon + str::len_bytes(keycolon), - str::len_bytes(line)); + let value = str::slice(line, colon + str::len(keycolon), + str::len(line)); #debug("%s: %s", directive, value); option::some(value) } diff --git a/src/fuzzer/fuzzer.rs b/src/fuzzer/fuzzer.rs index d6b2cf8b0ec91..4b614d1f08b6a 100644 --- a/src/fuzzer/fuzzer.rs +++ b/src/fuzzer/fuzzer.rs @@ -287,7 +287,7 @@ fn check_variants_T( fn last_part(filename: str) -> str { let ix = option::get(str::rindex(filename, '/')); - str::slice(filename, ix + 1u, str::len_bytes(filename) - 3u) + str::slice(filename, ix + 1u, str::len(filename) - 3u) } enum happiness { passed, cleanly_rejected(str), known_bug(str), failed(str), } @@ -335,7 +335,7 @@ fn removeDirIfExists(filename: str) { fn check_running(exe_filename: str) -> happiness { let p = std::run::program_output("/Users/jruderman/scripts/timed_run_rust_program.py", [exe_filename]); let comb = p.out + "\n" + p.err; - if str::len_bytes(comb) > 1u { + if str::len(comb) > 1u { log(error, "comb comb comb: " + comb); } diff --git a/src/libcore/extfmt.rs b/src/libcore/extfmt.rs index 079451498f1b0..b5e2eade244cb 100644 --- a/src/libcore/extfmt.rs +++ b/src/libcore/extfmt.rs @@ -82,10 +82,10 @@ mod ct { fn parse_fmt_string(s: str, error: error_fn) -> [piece] unsafe { let pieces: [piece] = []; - let lim = str::len_bytes(s); + let lim = str::len(s); let buf = ""; fn flush_buf(buf: str, &pieces: [piece]) -> str { - if str::len_bytes(buf) > 0u { + if str::len(buf) > 0u { let piece = piece_string(buf); pieces += [piece]; } @@ -433,12 +433,12 @@ mod rt { // zeros. It may make sense to convert zero padding to a precision // instead. - if signed && zero_padding && str::len_bytes(s) > 0u { + if signed && zero_padding && str::len(s) > 0u { let head = s[0]; if head == '+' as u8 || head == '-' as u8 || head == ' ' as u8 { let headstr = str::from_bytes([head]); // FIXME: not UTF-8 safe - let bytelen = str::len_bytes(s); + let bytelen = str::len(s); let numpart = str::slice(s, 1u, bytelen); ret headstr + padstr + numpart; } diff --git a/src/libcore/float.rs b/src/libcore/float.rs index b21d154097faf..cea46dedf44d6 100644 --- a/src/libcore/float.rs +++ b/src/libcore/float.rs @@ -133,7 +133,7 @@ number represented by [num]. fn from_str(num: str) -> option { let pos = 0u; //Current byte position in the string. //Used to walk the string in O(n). - let len = str::len_bytes(num); //Length of the string, in bytes. + let len = str::len(num); //Length of the string, in bytes. if len == 0u { ret none; } let total = 0f; //Accumulated result diff --git a/src/libcore/str.rs b/src/libcore/str.rs index fb7ec4eec2e5b..c199ead460f56 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -87,7 +87,7 @@ export is_empty, is_not_empty, is_whitespace, - len, len_bytes, + len, len_chars, // Misc @@ -277,7 +277,7 @@ Failure: If the string does not contain any characters. */ fn pop_char(&s: str) -> char unsafe { - let end = len_bytes(s); + let end = len(s); let {ch:ch, prev:end} = char_range_at_reverse(s, end); s = unsafe::slice_bytes(s, 0u, end); ret ch; @@ -294,7 +294,7 @@ If the string does not contain any characters. */ fn shift_char(&s: str) -> char unsafe { let r = char_range_at(s, 0u); - s = unsafe::slice_bytes(s, r.next, len_bytes(s)); + s = unsafe::slice_bytes(s, r.next, len(s)); ret r.ch; } @@ -373,7 +373,7 @@ Convert a string to a vector of characters fn chars(s: str) -> [char] { let buf: [char] = []; let i = 0u; - let len = len_bytes(s); + let len = len(s); while i < len { let cur = char_range_at(s, i); buf += [cur.ch]; @@ -474,7 +474,7 @@ fn splitn_byte(ss: str, sep: u8, count: uint) -> [str] unsafe { assert u8::is_ascii(sep); let vv = []; - let start = 0u, current = 0u, len = len_bytes(ss); + let start = 0u, current = 0u, len = len(ss); let splits_done = 0u; while splits_done < count && current < len { @@ -504,7 +504,7 @@ fn split_str(ss: str, sep: str) -> [str] unsafe { // unsafe is justified: we are splitting // UTF-8 with UTF-8, so the results will be OK - let sep_len = len_bytes(sep); + let sep_len = len(sep); assert sep_len > 0u; let vv = []; let start = 0u, start_match = 0u, current = 0u, matching = 0u; @@ -581,7 +581,7 @@ up to `count` times fn splitn_char(ss: str, sep: char, count: uint) -> [str] unsafe { let vv = []; - let start = 0u, current = 0u, len = len_bytes(ss); + let start = 0u, current = 0u, len = len(ss); let splits_done = 0u; while splits_done < count && current < len { @@ -688,11 +688,11 @@ The original string with all occurances of `from` replaced with `to` */ fn replace(s: str, from: str, to: str) -> str unsafe { assert is_not_empty(from); - if len_bytes(s) == 0u { + if len(s) == 0u { ret ""; } else if starts_with(s, from) { ret to + replace( - unsafe::slice_bytes(s, len_bytes(from), len_bytes(s)), + unsafe::slice_bytes(s, len(from), len(s)), from, to); } else { let idx; @@ -701,8 +701,8 @@ fn replace(s: str, from: str, to: str) -> str unsafe { none { ret s; } } let before = unsafe::slice_bytes(s, 0u, idx as uint); - let after = unsafe::slice_bytes(s, idx as uint + len_bytes(from), - len_bytes(s)); + let after = unsafe::slice_bytes(s, idx as uint + len(from), + len(s)); ret before + to + replace(after, from, to); } } @@ -761,7 +761,7 @@ Return true if a predicate matches all characters or if the string contains no characters */ fn all(s: str, it: fn(char) -> bool) -> bool{ - ret substr_all(s, 0u, len_bytes(s), it); + ret substr_all(s, 0u, len(s), it); } /* @@ -781,7 +781,7 @@ Apply a function to each character */ fn map(ss: str, ff: fn(char) -> char) -> str { let result = ""; - reserve(result, len_bytes(ss)); + reserve(result, len(ss)); chars_iter(ss, {|cc| str::push_char(result, ff(cc)); @@ -797,7 +797,7 @@ Iterate over the bytes in a string */ fn bytes_iter(ss: str, it: fn(u8)) { let pos = 0u; - let len = len_bytes(ss); + let len = len(ss); while (pos < len) { it(ss[pos]); @@ -811,7 +811,7 @@ Function: chars_iter Iterate over the characters in a string */ fn chars_iter(s: str, it: fn(char)) { - let pos = 0u, len = len_bytes(s); + let pos = 0u, len = len(s); while (pos < len) { let {ch, next} = char_range_at(s, pos); pos = next; @@ -866,7 +866,7 @@ Section: Searching // Returns the byte index of the first matching char // (as option some/none) fn index(ss: str, cc: char) -> option { - index_from(ss, cc, 0u, len_bytes(ss)) + index_from(ss, cc, 0u, len(ss)) } // Function: index_from @@ -897,7 +897,7 @@ fn index_from(ss: str, cc: char, start: uint, end: uint) -> option { fn index_chars(ss: str, cc: char) -> option { let bii = 0u; let cii = 0u; - let len = len_bytes(ss); + let len = len(ss); while bii < len { let {ch, next} = char_range_at(ss, bii); @@ -920,7 +920,7 @@ fn index_chars(ss: str, cc: char) -> option { // (as option some/none) // FIXME: delete fn byte_index(s: str, b: u8) -> option { - byte_index_from(s, b, 0u, len_bytes(s)) + byte_index_from(s, b, 0u, len(s)) } // Function: byte_index_from @@ -930,7 +930,7 @@ fn byte_index(s: str, b: u8) -> option { // (as option some/none) // FIXME: delete fn byte_index_from(s: str, b: u8, start: uint, end: uint) -> option { - assert end <= len_bytes(s); + assert end <= len(s); str::as_bytes(s) { |v| vec::position_from(v, start, end) { |x| x == b } } } @@ -940,7 +940,7 @@ fn byte_index_from(s: str, b: u8, start: uint, end: uint) -> option { // Returns the byte index of the first matching char // (as option some/none) fn rindex(ss: str, cc: char) -> option { - let bii = len_bytes(ss); + let bii = len(ss); while bii > 0u { let {ch, prev} = char_range_at_reverse(ss, bii); bii = prev; @@ -960,7 +960,7 @@ fn rindex(ss: str, cc: char) -> option { // Returns the char index of the first matching char // (as option some/none) fn rindex_chars(ss: str, cc: char) -> option { - let bii = len_bytes(ss); + let bii = len(ss); let cii = len_chars(ss); while bii > 0u { let {ch, prev} = char_range_at_reverse(ss, bii); @@ -982,7 +982,7 @@ fn rindex_chars(ss: str, cc: char) -> option { // Find the byte position of the first instance of one string // within another, or return option::none fn find(haystack: str, needle: str) -> option { - find_from(haystack, needle, 0u, len_bytes(haystack)) + find_from(haystack, needle, 0u, len(haystack)) } //Function: find_from @@ -993,9 +993,9 @@ fn find(haystack: str, needle: str) -> option { // FIXME: Boyer-Moore should be significantly faster fn find_from(haystack: str, needle: str, start: uint, end:uint) -> option { - assert end <= len_bytes(haystack); + assert end <= len(haystack); - let needle_len = len_bytes(needle); + let needle_len = len(needle); if needle_len == 0u { ret some(start); } if needle_len > end { ret none; } @@ -1031,7 +1031,7 @@ fn find_chars(haystack: str, needle: str) -> option { // Convert a byte position into a char position // within a given string fn b2c_pos(ss: str, bpos: uint) -> uint { - assert bpos == 0u || bpos < len_bytes(ss); + assert bpos == 0u || bpos < len(ss); let ii = 0u; let cpos = 0u; @@ -1070,8 +1070,8 @@ haystack - The string to look in needle - The string to look for */ fn starts_with(haystack: str, needle: str) -> bool unsafe { - let haystack_len: uint = len_bytes(haystack); - let needle_len: uint = len_bytes(needle); + let haystack_len: uint = len(haystack); + let needle_len: uint = len(needle); if needle_len == 0u { ret true; } if needle_len > haystack_len { ret false; } ret eq(unsafe::slice_bytes(haystack, 0u, needle_len), needle); @@ -1108,7 +1108,7 @@ Function: is_ascii Determines if a string contains only ASCII characters */ fn is_ascii(s: str) -> bool { - let i: uint = len_bytes(s); + let i: uint = len(s); while i > 0u { i -= 1u; if !u8::is_ascii(s[i]) { ret false; } } ret true; } @@ -1137,10 +1137,11 @@ fn is_whitespace(s: str) -> bool { } -// Function: len_bytes +// Function: len // -// Returns the string length in bytes -pure fn len_bytes(s: str) -> uint unsafe { +// Returns the string length/size in bytes +// not counting the null terminator +pure fn len(s: str) -> uint unsafe { as_bytes(s) { |v| let vlen = vec::len(v); // There should always be a null terminator @@ -1149,14 +1150,8 @@ pure fn len_bytes(s: str) -> uint unsafe { } } -// Function: len -// -// String length or size in characters. -// (Synonym: len_bytes) -fn len(s: str) -> uint { len_bytes(s) } - fn len_chars(s: str) -> uint { - substr_len_chars(s, 0u, len_bytes(s)) + substr_len_chars(s, 0u, len(s)) } /* @@ -1277,7 +1272,7 @@ This function can be used to iterate over the unicode characters of a string. Example: > let s = "中华Việt Nam"; > let i = 0u; -> while i < str::len_bytes(s) { +> while i < str::len(s) { > let {ch, next} = str::char_range_at(s, i); > std::io::println(#fmt("%u: %c",i,ch)); > i = next; @@ -1517,7 +1512,7 @@ mod unsafe { unsafe fn slice_bytes(s: str, begin: uint, end: uint) -> str unsafe { // FIXME: Typestate precondition assert (begin <= end); - assert (end <= len_bytes(s)); + assert (end <= len(s)); let v = as_bytes(s) { |v| vec::slice(v, begin, end) }; v += [0u8]; @@ -1534,7 +1529,7 @@ mod unsafe { unsafe fn slice_bytes_safe_range(s: str, begin: uint, end: uint) : uint::le(begin, end) -> str { // would need some magic to make this a precondition - assert (end <= len_bytes(s)); + assert (end <= len(s)); ret slice_bytes(s, begin, end); } @@ -1556,7 +1551,7 @@ mod unsafe { // // Removes the last byte from a string and returns it. (Not UTF-8 safe). unsafe fn pop_byte(&s: str) -> u8 unsafe { - let len = len_bytes(s); + let len = len(s); assert (len > 0u); let b = s[len - 1u]; s = unsafe::slice_bytes(s, 0u, len - 1u); @@ -1567,7 +1562,7 @@ mod unsafe { // // Removes the first byte from a string and returns it. (Not UTF-8 safe). unsafe fn shift_byte(&s: str) -> u8 unsafe { - let len = len_bytes(s); + let len = len(s); assert (len > 0u); let b = s[0]; s = unsafe::slice_bytes(s, 1u, len); @@ -1597,13 +1592,13 @@ mod tests { #[test] fn test_len() { - assert (len_bytes("") == 0u); - assert (len_bytes("hello world") == 11u); - assert (len_bytes("\x63") == 1u); - assert (len_bytes("\xa2") == 2u); - assert (len_bytes("\u03c0") == 2u); - assert (len_bytes("\u2620") == 3u); - assert (len_bytes("\U0001d11e") == 4u); + assert (len("") == 0u); + assert (len("hello world") == 11u); + assert (len("\x63") == 1u); + assert (len("\xa2") == 2u); + assert (len("\u03c0") == 2u); + assert (len("\u2620") == 3u); + assert (len("\U0001d11e") == 4u); assert (len_chars("") == 0u); assert (len_chars("hello world") == 11u); @@ -1906,7 +1901,7 @@ mod tests { #[test] fn test_substr() { fn t(a: str, b: str, start: int) { - assert (eq(substr(a, start as uint, len_bytes(b)), b)); + assert (eq(substr(a, start as uint, len(b)), b)); } t("hello", "llo", 2); t("hello", "el", 1); @@ -2285,7 +2280,7 @@ mod tests { let v: [u8] = bytes(s1); let s2: str = from_bytes(v); let i: uint = 0u; - let n1: uint = len_bytes(s1); + let n1: uint = len(s1); let n2: uint = vec::len::(v); assert (n1 == n2); while i < n1 { diff --git a/src/libcore/u64.rs b/src/libcore/u64.rs index ac5f83cc415a4..77b658f066234 100644 --- a/src/libcore/u64.rs +++ b/src/libcore/u64.rs @@ -118,8 +118,8 @@ Function: from_str Parse a string as an unsigned integer. */ fn from_str(buf: str, radix: u64) -> option { - if str::len_bytes(buf) == 0u { ret none; } - let i = str::len_bytes(buf) - 1u; + if str::len(buf) == 0u { ret none; } + let i = str::len(buf) - 1u; let power = 1u64, n = 0u64; while true { alt char::to_digit(buf[i] as char, radix as uint) { diff --git a/src/libcore/uint.rs b/src/libcore/uint.rs index b5e425d2b1de5..230c6a2e90c55 100644 --- a/src/libcore/uint.rs +++ b/src/libcore/uint.rs @@ -248,7 +248,7 @@ fn to_str(num: uint, radix: uint) -> str { n /= radix; } let s1: str = ""; - let len: uint = str::len_bytes(s); + let len: uint = str::len(s); while len != 0u { len -= 1u; s1 += str::from_byte(s[len]); } ret s1; } diff --git a/src/libstd/fs.rs b/src/libstd/fs.rs index 167e0f125d6a3..3ba2f8b105ad0 100644 --- a/src/libstd/fs.rs +++ b/src/libstd/fs.rs @@ -45,7 +45,7 @@ fn splitDirnameBasename (pp: path) -> {dirname: str, basename: str} { } ret {dirname: str::slice(pp, 0u, ii), - basename: str::slice(pp, ii + 1u, str::len_bytes(pp))}; + basename: str::slice(pp, ii + 1u, str::len(pp))}; } /* @@ -93,8 +93,8 @@ fn connect(pre: path, post: path) -> path unsafe { let pre_ = pre; let post_ = post; let sep = os_fs::path_sep as u8; - let pre_len = str::len_bytes(pre); - let post_len = str::len_bytes(post); + let pre_len = str::len(pre); + let post_len = str::len(post); if pre_len > 1u && pre[pre_len-1u] == sep { str::unsafe::pop_byte(pre_); } if post_len > 1u && post[0] == sep { str::unsafe::shift_byte(post_); } ret pre_ + path_sep() + post_; @@ -170,7 +170,7 @@ Lists the contents of a directory. */ fn list_dir(p: path) -> [str] { let p = p; - let pl = str::len_bytes(p); + let pl = str::len(p); if pl == 0u || p[pl - 1u] as char != os_fs::path_sep { p += path_sep(); } let full_paths: [str] = []; for filename: str in os_fs::list_dir(p) { @@ -336,7 +336,7 @@ fn normalize(p: path) -> path { let s = reabsolute(p, s); let s = reterminate(p, s); - let s = if str::len_bytes(s) == 0u { + let s = if str::len(s) == 0u { "." } else { s @@ -403,7 +403,7 @@ fn normalize(p: path) -> path { } fn reterminate(orig: path, new: path) -> path { - let last = orig[str::len_bytes(orig) - 1u]; + let last = orig[str::len(orig) - 1u]; if last == os_fs::path_sep as u8 || last == os_fs::path_sep as u8 { ret new + path_sep(); diff --git a/src/libstd/getopts.rs b/src/libstd/getopts.rs index 25681185f9b58..7b409053cb0b1 100644 --- a/src/libstd/getopts.rs +++ b/src/libstd/getopts.rs @@ -79,7 +79,7 @@ A description of a possible option type opt = {name: name, hasarg: hasarg, occur: occur}; fn mkname(nm: str) -> name { - ret if str::len_bytes(nm) == 1u { + ret if str::len(nm) == 1u { short(str::char_at(nm, 0u)) } else { long(nm) }; } @@ -141,7 +141,7 @@ of matches and a vector of free strings. type match = {opts: [opt], vals: [mutable [optval]], free: [str]}; fn is_arg(arg: str) -> bool { - ret str::len_bytes(arg) > 1u && arg[0] == '-' as u8; + ret str::len(arg) > 1u && arg[0] == '-' as u8; } fn name_str(nm: name) -> str { @@ -218,7 +218,7 @@ fn getopts(args: [str], opts: [opt]) -> result unsafe { let i = 0u; while i < l { let cur = args[i]; - let curlen = str::len_bytes(cur); + let curlen = str::len(cur); if !is_arg(cur) { free += [cur]; } else if str::eq(cur, "--") { diff --git a/src/libstd/json.rs b/src/libstd/json.rs index 32294f38fb94f..7a888f250eda3 100644 --- a/src/libstd/json.rs +++ b/src/libstd/json.rs @@ -70,13 +70,13 @@ fn to_str(j: json) -> str { } fn rest(s: str) -> str { - assert(str::len_bytes(s) >= 1u); - str::slice(s, 1u, str::len_bytes(s)) + assert(str::len(s) >= 1u); + str::slice(s, 1u, str::len(s)) } fn from_str_str(s: str) -> (option, str) { let pos = 0u; - let len = str::len_bytes(s); + let len = str::len(s); let escape = false; let res = ""; @@ -99,7 +99,7 @@ fn from_str_str(s: str) -> (option, str) { cont; } else if (c == '"') { ret (some(string(res)), - str::slice(s, pos, str::len_bytes(s))); + str::slice(s, pos, str::len(s))); } res = res + str::from_char(c); } @@ -172,7 +172,7 @@ fn from_str_dict(s: str) -> (option, str) { fn from_str_float(s: str) -> (option, str) { let pos = 0u; - let len = str::len_bytes(s); + let len = str::len(s); let res = 0f; let neg = 1f; @@ -200,13 +200,13 @@ fn from_str_float(s: str) -> (option, str) { } '.' { break; } _ { ret (some(num(neg * res)), - str::slice(s, opos, str::len_bytes(s))); } + str::slice(s, opos, str::len(s))); } } } if pos == len { ret (some(num(neg * res)), - str::slice(s, pos, str::len_bytes(s))); + str::slice(s, pos, str::len(s))); } let dec = 1f; @@ -221,17 +221,17 @@ fn from_str_float(s: str) -> (option, str) { res += (((c as int) - ('0' as int)) as float) * dec; } _ { ret (some(num(neg * res)), - str::slice(s, opos, str::len_bytes(s))); } + str::slice(s, opos, str::len(s))); } } } - ret (some(num(neg * res)), str::slice(s, pos, str::len_bytes(s))); + ret (some(num(neg * res)), str::slice(s, pos, str::len(s))); } fn from_str_bool(s: str) -> (option, str) { if (str::starts_with(s, "true")) { - (some(boolean(true)), str::slice(s, 4u, str::len_bytes(s))) + (some(boolean(true)), str::slice(s, 4u, str::len(s))) } else if (str::starts_with(s, "false")) { - (some(boolean(false)), str::slice(s, 5u, str::len_bytes(s))) + (some(boolean(false)), str::slice(s, 5u, str::len(s))) } else { (none, s) } @@ -239,7 +239,7 @@ fn from_str_bool(s: str) -> (option, str) { fn from_str_null(s: str) -> (option, str) { if (str::starts_with(s, "null")) { - (some(null), str::slice(s, 4u, str::len_bytes(s))) + (some(null), str::slice(s, 4u, str::len(s))) } else { (none, s) } diff --git a/src/libstd/rand.rs b/src/libstd/rand.rs index 33fac40fcccc3..35525b70710ac 100644 --- a/src/libstd/rand.rs +++ b/src/libstd/rand.rs @@ -77,7 +77,7 @@ fn mk_rng() -> rng { let i = 0u; while (i < len) { let n = rustrt::rand_next(**self) as uint % - str::len_bytes(charset); + str::len(charset); s = s + str::from_char(str::char_at(charset, n)); i += 1u; } @@ -130,8 +130,8 @@ mod tests { log(debug, r.gen_str(10u)); log(debug, r.gen_str(10u)); log(debug, r.gen_str(10u)); - assert(str::len_bytes(r.gen_str(10u)) == 10u); - assert(str::len_bytes(r.gen_str(16u)) == 16u); + assert(str::len(r.gen_str(10u)) == 10u); + assert(str::len(r.gen_str(16u)) == 16u); } } diff --git a/src/libstd/rope.rs b/src/libstd/rope.rs index 4351ceb2d5ab6..81800afffa5eb 100644 --- a/src/libstd/rope.rs +++ b/src/libstd/rope.rs @@ -65,7 +65,7 @@ Performance notes: - the function runs in linear time. */ fn of_str(str: @str) -> rope { - ret of_substr(str, 0u, str::len_bytes(*str)); + ret of_substr(str, 0u, str::len(*str)); } /* @@ -93,7 +93,7 @@ Safety notes: */ fn of_substr(str: @str, byte_offset: uint, byte_len: uint) -> rope { if byte_len == 0u { ret node::empty; } - if byte_offset + byte_len > str::len_bytes(*str) { fail; } + if byte_offset + byte_len > str::len(*str) { fail; } ret node::content(node::of_substr(str, byte_offset, byte_len)); } @@ -721,7 +721,7 @@ mod node { the length of `str`. */ fn of_str(str: @str) -> @node { - ret of_substr(str, 0u, str::len_bytes(*str)); + ret of_substr(str, 0u, str::len(*str)); } /* @@ -768,7 +768,7 @@ mod node { */ fn of_substr_unsafer(str: @str, byte_start: uint, byte_len: uint, char_len: uint) -> @node { - assert(byte_start + byte_len <= str::len_bytes(*str)); + assert(byte_start + byte_len <= str::len(*str)); let candidate = @leaf({ byte_offset: byte_start, byte_len: byte_len, @@ -1388,7 +1388,7 @@ mod tests { assert rope_to_string(r) == *sample; let string_iter = 0u; - let string_len = str::len_bytes(*sample); + let string_len = str::len(*sample); let rope_iter = iterator::char::start(r); let equal = true; let pos = 0u; diff --git a/src/libstd/sha1.rs b/src/libstd/sha1.rs index 1b63f87f7c62a..9360a84fb97af 100644 --- a/src/libstd/sha1.rs +++ b/src/libstd/sha1.rs @@ -368,7 +368,7 @@ mod tests { // Test that it works when accepting the message in pieces for t: test in tests { - let len = str::len_bytes(t.input); + let len = str::len(t.input); let left = len; while left > 0u { let take = (left + 1u) / 2u; diff --git a/src/rustdoc/desc_to_brief_pass.rs b/src/rustdoc/desc_to_brief_pass.rs index 82ae2ff8fed7b..b506f50293375 100644 --- a/src/rustdoc/desc_to_brief_pass.rs +++ b/src/rustdoc/desc_to_brief_pass.rs @@ -175,7 +175,7 @@ fn parse_desc(desc: str) -> (option, option) { if check vec::is_not_empty(paras) { let maybe_brief = vec::head(paras); - if str::len_bytes(maybe_brief) <= max_brief_len { + if str::len(maybe_brief) <= max_brief_len { let desc_paras = vec::tail(paras); let desc = if vec::is_not_empty(desc_paras) { some(str::connect(desc_paras, "\n\n")) diff --git a/src/rustdoc/unindent_pass.rs b/src/rustdoc/unindent_pass.rs index 42a3f3fa57af8..59005d8266b1e 100644 --- a/src/rustdoc/unindent_pass.rs +++ b/src/rustdoc/unindent_pass.rs @@ -67,8 +67,8 @@ fn unindent(s: str) -> str { if str::is_whitespace(line) { line } else { - assert str::len_bytes(line) >= min_indent; - str::slice_chars(line, min_indent, str::len_chars(line)) + assert str::len(line) >= min_indent; + str::slice(line, min_indent, str::len(line)) } }; str::connect(unindented, "\n") diff --git a/src/test/bench/99bob-iter.rs b/src/test/bench/99bob-iter.rs index 0e338ae5c52fa..6abeb63471fd0 100644 --- a/src/test/bench/99bob-iter.rs +++ b/src/test/bench/99bob-iter.rs @@ -31,7 +31,7 @@ fn sub(t: str, n: int) -> str unsafe { 1 { ns = "1 bottle"; } _ { ns = int::to_str(n, 10u) + " bottles"; } } - while i < str::len_bytes(t) { + while i < str::len(t) { if t[i] == '#' as u8 { b += ns; } else { str::unsafe::push_byte(b, t[i]); } i += 1u; diff --git a/src/test/bench/99bob-simple.rs b/src/test/bench/99bob-simple.rs index 351f5463d5291..3fcf6abebb193 100644 --- a/src/test/bench/99bob-simple.rs +++ b/src/test/bench/99bob-simple.rs @@ -31,7 +31,7 @@ fn sub(t: str, n: int) -> str unsafe { 1 { ns = "1 bottle"; } _ { ns = int::to_str(n, 10u) + " bottles"; } } - while i < str::len_bytes(t) { + while i < str::len(t) { if t[i] == '#' as u8 { b += ns; } else { str::unsafe::push_byte(b, t[i]); } i += 1u; diff --git a/src/test/bench/shootout-fasta.rs b/src/test/bench/shootout-fasta.rs index ef82f0742de16..28431d3353f7e 100644 --- a/src/test/bench/shootout-fasta.rs +++ b/src/test/bench/shootout-fasta.rs @@ -49,26 +49,26 @@ fn make_random_fasta(id: str, desc: str, genelist: [aminoacids], n: int) { uint::range(0u, n as uint) {|_i| str::push_char(op, select_random(myrandom_next(rng, 100u32), genelist)); - if str::len_bytes(op) >= LINE_LENGTH() { + if str::len(op) >= LINE_LENGTH() { log(debug, op); op = ""; } } - if str::len_bytes(op) > 0u { log(debug, op); } + if str::len(op) > 0u { log(debug, op); } } fn make_repeat_fasta(id: str, desc: str, s: str, n: int) unsafe { log(debug, ">" + id + " " + desc); let op: str = ""; - let sl: uint = str::len_bytes(s); + let sl: uint = str::len(s); uint::range(0u, n as uint) {|i| str::unsafe::push_byte(op, s[i % sl]); - if str::len_bytes(op) >= LINE_LENGTH() { + if str::len(op) >= LINE_LENGTH() { log(debug, op); op = ""; } } - if str::len_bytes(op) > 0u { log(debug, op); } + if str::len(op) > 0u { log(debug, op); } } fn acid(ch: char, prob: u32) -> aminoacids { ret {ch: ch, prob: prob}; } diff --git a/src/test/run-pass/bind-native-fn.rs b/src/test/run-pass/bind-native-fn.rs index 88b0fbf4b3553..0d839d02200a7 100644 --- a/src/test/run-pass/bind-native-fn.rs +++ b/src/test/run-pass/bind-native-fn.rs @@ -13,7 +13,7 @@ native mod libc { fn main() { let s = "hello world\n"; let b = str::bytes(s); - let l = str::len_bytes(s); + let l = str::len(s); let b8 = unsafe { vec::unsafe::to_ptr(b) }; libc::write(0i32, b8, l); let a = bind libc::write(0i32, _, _); diff --git a/src/test/run-pass/string-self-append.rs b/src/test/run-pass/string-self-append.rs index 260b0194a0c94..c09d6c7cb56bc 100644 --- a/src/test/run-pass/string-self-append.rs +++ b/src/test/run-pass/string-self-append.rs @@ -7,8 +7,8 @@ fn main() { let i = 20; let expected_len = 1u; while i > 0 { - log(error, str::len_bytes(a)); - assert (str::len_bytes(a) == expected_len); + log(error, str::len(a)); + assert (str::len(a) == expected_len); a += a; i -= 1; expected_len *= 2u; diff --git a/src/test/run-pass/utf8_chars.rs b/src/test/run-pass/utf8_chars.rs index fbb66b5a1ca48..2d38b9dd24584 100644 --- a/src/test/run-pass/utf8_chars.rs +++ b/src/test/run-pass/utf8_chars.rs @@ -7,7 +7,7 @@ fn main() { let chs: [char] = ['e', 'é', '€', 0x10000 as char]; let s: str = str::from_chars(chs); - assert (str::len_bytes(s) == 10u); + assert (str::len(s) == 10u); assert (str::len_chars(s) == 4u); assert (vec::len::(str::chars(s)) == 4u); assert (str::eq(str::from_chars(str::chars(s)), s)); From 0920b51223bb9ba526efda5d490bac2e7206a65e Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Thu, 23 Feb 2012 01:45:59 -0800 Subject: [PATCH 15/17] (core::str) rename substr_len_bytes to substr_len, and delete unused byte_index[_from] --- src/comp/syntax/codemap.rs | 2 +- src/libcore/str.rs | 23 +---------------------- src/libstd/rope.rs | 6 +++--- 3 files changed, 5 insertions(+), 26 deletions(-) diff --git a/src/comp/syntax/codemap.rs b/src/comp/syntax/codemap.rs index c8b33687abe5e..0d1172b3502cd 100644 --- a/src/comp/syntax/codemap.rs +++ b/src/comp/syntax/codemap.rs @@ -171,7 +171,7 @@ fn lookup_byte_offset(cm: codemap::codemap, chpos: uint) let {fm,line} = lookup_line(cm,chpos,lookup); let line_offset = fm.lines[line].byte - fm.start_pos.byte; let col = chpos - fm.lines[line].ch; - let col_offset = str::substr_len_bytes(*fm.src, line_offset, col); + let col_offset = str::substr_len(*fm.src, line_offset, col); ret {fm: fm, pos: line_offset + col_offset}; } diff --git a/src/libcore/str.rs b/src/libcore/str.rs index c199ead460f56..e4df5bafad6e3 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -93,7 +93,7 @@ export // Misc // FIXME: perhaps some more of this section shouldn't be exported? is_utf8, - substr_len_bytes, + substr_len, substr_len_chars, utf8_char_width, char_range_at, @@ -914,27 +914,6 @@ fn index_chars(ss: str, cc: char) -> option { ret none; } -// Function: byte_index -// -// Returns the index of the first matching byte -// (as option some/none) -// FIXME: delete -fn byte_index(s: str, b: u8) -> option { - byte_index_from(s, b, 0u, len(s)) -} - -// Function: byte_index_from -// -// Returns the index of the first matching byte within the range [`start`, -// `end`). -// (as option some/none) -// FIXME: delete -fn byte_index_from(s: str, b: u8, start: uint, end: uint) -> option { - assert end <= len(s); - - str::as_bytes(s) { |v| vec::position_from(v, start, end) { |x| x == b } } -} - // Function: rindex // // Returns the byte index of the first matching char diff --git a/src/libstd/rope.rs b/src/libstd/rope.rs index 81800afffa5eb..10144cc973151 100644 --- a/src/libstd/rope.rs +++ b/src/libstd/rope.rs @@ -795,7 +795,7 @@ mod node { if i == 0u { first_leaf_char_len } else { hint_max_leaf_char_len }; let chunk_byte_len = - str::substr_len_bytes(*str, offset, chunk_char_len); + str::substr_len(*str, offset, chunk_char_len); nodes[i] = @leaf({ byte_offset: offset, byte_len: chunk_byte_len, @@ -1059,9 +1059,9 @@ mod node { ret node; } let byte_offset = - str::substr_len_bytes(*x.content, 0u, char_offset); + str::substr_len(*x.content, 0u, char_offset); let byte_len = - str::substr_len_bytes(*x.content, byte_offset, char_len); + str::substr_len(*x.content, byte_offset, char_len); ret @leaf({byte_offset: byte_offset, byte_len: byte_len, char_len: char_len, From 3674f574b3d8380709f49aafe72a83628a69d923 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Thu, 23 Feb 2012 01:53:32 -0800 Subject: [PATCH 16/17] (core::str) rename ++ --- src/libcore/str.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index e4df5bafad6e3..191be0f65de22 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1192,7 +1192,7 @@ fn substr_len_chars(s: str, byte_start: uint, byte_len: uint) -> uint { } /* -Function: substr_len_bytes +Function: substr_len As byte_len but for a substring @@ -1210,7 +1210,7 @@ Safety note: This function fails if `byte_offset` or `char_len` do not represent valid positions in `s` */ -fn substr_len_bytes(s: str, byte_offset: uint, char_len: uint) -> uint { +fn substr_len(s: str, byte_offset: uint, char_len: uint) -> uint { let i = byte_offset; let chars = 0u; while chars < char_len { From 25d0c6a769fa68b7db13212b5e9eda523bb98e90 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Thu, 23 Feb 2012 02:03:37 -0800 Subject: [PATCH 17/17] (core::str) comments --- src/libcore/str.rs | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 191be0f65de22..f0f274d75d520 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -4,9 +4,9 @@ Module: str String manipulation Strings are a packed UTF-8 representation of text, stored as null terminated -buffers of u8 bytes. Strings should be considered by character, -for correctness, but some UTF-8 unsafe functions are also provided. -For some heavy-duty uses, we recommend trying std::rope. +buffers of u8 bytes. Strings should be indexed in bytes, for efficiency, +but UTF-8 unsafe operations should be avoided. +For some heavy-duty uses, try std::rope. */ import option::{some, none}; @@ -434,6 +434,7 @@ Failure: - If end is greater than the character length of the string FIXME: make faster by avoiding char conversion +FIXME: delete? */ fn slice_chars(s: str, begin: uint, end: uint) -> str { from_chars(vec::slice(chars(s), begin, end)) @@ -498,7 +499,7 @@ Splits a string into a vector of the substrings separated by a given string Note that this has recently been changed. For example: > assert ["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", ".") -FIXME: Boyer-Moore variation +FIXME: Boyer-Moore should be faster */ fn split_str(ss: str, sep: str) -> [str] unsafe { // unsafe is justified: we are splitting @@ -670,7 +671,7 @@ fn to_upper(s: str) -> str { map(s, char::to_upper) } -// FIXME: This is super-inefficient +// FIXME: This is super-inefficient: stop the extra slicing copies /* Function: replace @@ -894,6 +895,7 @@ fn index_from(ss: str, cc: char, start: uint, end: uint) -> option { // // Returns the char index of the first matching char // (as option some/none) +// FIXME: delete? fn index_chars(ss: str, cc: char) -> option { let bii = 0u; let cii = 0u; @@ -938,6 +940,7 @@ fn rindex(ss: str, cc: char) -> option { // // Returns the char index of the first matching char // (as option some/none) +// FIXME: delete? fn rindex_chars(ss: str, cc: char) -> option { let bii = len(ss); let cii = len_chars(ss); @@ -998,6 +1001,7 @@ fn find_from(haystack: str, needle: str, start: uint, end:uint) // // Find the char position of the first instance of one string // within another, or return option::none +// FIXME: delete? fn find_chars(haystack: str, needle: str) -> option { alt find(haystack, needle) { none { ret none; } @@ -1065,8 +1069,8 @@ haystack - The string to look in needle - The string to look for */ fn ends_with(haystack: str, needle: str) -> bool { - let haystack_len: uint = len_chars(haystack); - let needle_len: uint = len_chars(needle); + let haystack_len: uint = len(haystack); + let needle_len: uint = len(needle); ret if needle_len == 0u { true } else if needle_len > haystack_len { @@ -1129,6 +1133,7 @@ pure fn len(s: str) -> uint unsafe { } } +// FIXME: delete? fn len_chars(s: str) -> uint { substr_len_chars(s, 0u, len(s)) } @@ -1177,6 +1182,8 @@ Safety note: - This function does not check whether the substring is valid. - This function fails if `byte_offset` or `byte_len` do not represent valid positions inside `s` + +FIXME: delete? */ fn substr_len_chars(s: str, byte_start: uint, byte_len: uint) -> uint { let i = byte_start; @@ -1451,12 +1458,13 @@ fn reserve(&ss: str, nn: uint) { // These functions may create invalid UTF-8 strings and eat your baby. mod unsafe { export + // FIXME: stop exporting several of these from_bytes, from_byte, - slice_bytes, // FIXME: stop exporting - slice_bytes_safe_range, // FIXME: stop exporting + slice_bytes, + slice_bytes_safe_range, push_byte, - push_bytes, // note: wasn't exported + push_bytes, pop_byte, shift_byte; @@ -1489,7 +1497,6 @@ mod unsafe { - If end is greater than the length of the string. */ unsafe fn slice_bytes(s: str, begin: uint, end: uint) -> str unsafe { - // FIXME: Typestate precondition assert (begin <= end); assert (end <= len(s));