Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,15 @@ This crate provides Grapheme Cluster, Word and Sentence boundaries
according to Unicode Standard Annex #29 rules.
"""

exclude = [ "target/*", "Cargo.lock", "scripts/tmp", "*.txt" ]
exclude = [ "target/*", "Cargo.lock", "scripts/tmp", "benches/texts/*", "*.txt", ]

[features]
no_std = [] # This is a no-op, preserved for backward compatibility only.

[dev-dependencies]
quickcheck = "0.7"
bencher = "0.1"

[[bench]]
name = "graphemes"
harness = false
109 changes: 109 additions & 0 deletions benches/graphemes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#[macro_use]
extern crate bencher;
extern crate unicode_segmentation;

use bencher::Bencher;
use unicode_segmentation::UnicodeSegmentation;

const TEXT_ARABIC: &str = include_str!("texts/arabic.txt");
const TEXT_ENGLISH: &str = include_str!("texts/english.txt");
const TEXT_HINDI: &str = include_str!("texts/hindi.txt");
const TEXT_JAPANESE: &str = include_str!("texts/japanese.txt");
const TEXT_KOREAN: &str = include_str!("texts/korean.txt");
const TEXT_MANDARIN: &str = include_str!("texts/mandarin.txt");
const TEXT_RUSSIAN: &str = include_str!("texts/russian.txt");
const TEXT_SOURCE_CODE: &str = include_str!("texts/source_code.txt");

fn graphemes_arabic(bench: &mut Bencher) {
bench.iter(|| {
for g in UnicodeSegmentation::graphemes(TEXT_ARABIC, true) {
bencher::black_box(g);
}
});

bench.bytes = TEXT_ARABIC.len() as u64;
}

fn graphemes_english(bench: &mut Bencher) {
bench.iter(|| {
for g in UnicodeSegmentation::graphemes(TEXT_ENGLISH, true) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the text itself should also pass through black_box. Probably doesn't matter given how large it is, but worth a shot.

Alternatively, we can load the file dynamically outside of the iter() call.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pushed a fix that does this.

bencher::black_box(g);
}
});

bench.bytes = TEXT_ENGLISH.len() as u64;
}

fn graphemes_hindi(bench: &mut Bencher) {
bench.iter(|| {
for g in UnicodeSegmentation::graphemes(TEXT_HINDI, true) {
bencher::black_box(g);
}
});

bench.bytes = TEXT_HINDI.len() as u64;
}

fn graphemes_japanese(bench: &mut Bencher) {
bench.iter(|| {
for g in UnicodeSegmentation::graphemes(TEXT_JAPANESE, true) {
bencher::black_box(g);
}
});

bench.bytes = TEXT_JAPANESE.len() as u64;
}

fn graphemes_korean(bench: &mut Bencher) {
bench.iter(|| {
for g in UnicodeSegmentation::graphemes(TEXT_KOREAN, true) {
bencher::black_box(g);
}
});

bench.bytes = TEXT_KOREAN.len() as u64;
}

fn graphemes_mandarin(bench: &mut Bencher) {
bench.iter(|| {
for g in UnicodeSegmentation::graphemes(TEXT_MANDARIN, true) {
bencher::black_box(g);
}
});

bench.bytes = TEXT_MANDARIN.len() as u64;
}

fn graphemes_russian(bench: &mut Bencher) {
bench.iter(|| {
for g in UnicodeSegmentation::graphemes(TEXT_RUSSIAN, true) {
bencher::black_box(g);
}
});

bench.bytes = TEXT_RUSSIAN.len() as u64;
}

fn graphemes_source_code(bench: &mut Bencher) {
bench.iter(|| {
for g in UnicodeSegmentation::graphemes(TEXT_SOURCE_CODE, true) {
bencher::black_box(g);
}
});

bench.bytes = TEXT_SOURCE_CODE.len() as u64;
}

benchmark_group!(
benches,
graphemes_arabic,
graphemes_english,
graphemes_hindi,
graphemes_japanese,
graphemes_korean,
graphemes_mandarin,
graphemes_russian,
graphemes_source_code,
);

benchmark_main!(benches);
106 changes: 106 additions & 0 deletions benches/texts/arabic.txt

Large diffs are not rendered by default.

222 changes: 222 additions & 0 deletions benches/texts/english.txt

Large diffs are not rendered by default.

155 changes: 155 additions & 0 deletions benches/texts/hindi.txt

Large diffs are not rendered by default.

269 changes: 269 additions & 0 deletions benches/texts/japanese.txt

Large diffs are not rendered by default.

243 changes: 243 additions & 0 deletions benches/texts/korean.txt

Large diffs are not rendered by default.

356 changes: 356 additions & 0 deletions benches/texts/mandarin.txt

Large diffs are not rendered by default.

155 changes: 155 additions & 0 deletions benches/texts/russian.txt

Large diffs are not rendered by default.

Loading