From 541a0606e1d024564669df782b8a6032aca6b049 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Sat, 28 Jun 2014 15:39:48 -0700 Subject: [PATCH 1/4] Account for trailing / in URLs when determining on-disk location. Fixes #84. No tests because there's no preexisting network tests. --- src/cargo/sources/git/source.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index c126fe2b4d8..2ecd0bcf14b 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -67,7 +67,13 @@ fn ident(location: &Location) -> String { let last = path.components().last().unwrap(); str::from_utf8(last).unwrap().to_str() } - Remote(ref url) => url.path.as_slice().split('/').last().unwrap().to_str() + Remote(ref url) => { + // Remove the trailing '/' so that 'split' doesn't give us + // an empty string, making '../foo/' and '../foo' both + // result in the name 'foo' (#84) + let path = strip_trailing_slash(url.path.as_slice()); + path.split('/').last().unwrap().to_str() + } }; let ident = if ident.as_slice() == "" { @@ -79,6 +85,14 @@ fn ident(location: &Location) -> String { format!("{}-{}", ident, to_hex(hasher.hash(&location.to_str()))) } +fn strip_trailing_slash<'a>(path: &'a str) -> &'a str { + if path.as_bytes().last() != Some(&('/' as u8)) { + path.clone() + } else { + path.slice(0, path.len() - 1) + } +} + impl<'a, 'b> Show for GitSource<'a, 'b> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { try!(write!(f, "git repo at {}", self.remote.get_location())); From 73bdebc52b8cf7dfe5a00d1b0661ca0727f03eca Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Sat, 28 Jun 2014 16:18:02 -0700 Subject: [PATCH 2/4] Add a hack to make github urls case-insensitive. Fixes #84 --- src/cargo/sources/git/source.rs | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 2ecd0bcf14b..9b45762adea 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -68,11 +68,8 @@ fn ident(location: &Location) -> String { str::from_utf8(last).unwrap().to_str() } Remote(ref url) => { - // Remove the trailing '/' so that 'split' doesn't give us - // an empty string, making '../foo/' and '../foo' both - // result in the name 'foo' (#84) let path = strip_trailing_slash(url.path.as_slice()); - path.split('/').last().unwrap().to_str() + path.as_slice().split('/').last().unwrap().to_str() } }; @@ -82,10 +79,15 @@ fn ident(location: &Location) -> String { ident }; - format!("{}-{}", ident, to_hex(hasher.hash(&location.to_str()))) + let location = canonicalize_url(location.to_str().as_slice()); + + format!("{}-{}", ident, to_hex(hasher.hash(&location.as_slice()))) } fn strip_trailing_slash<'a>(path: &'a str) -> &'a str { + // Remove the trailing '/' so that 'split' doesn't give us + // an empty string, making '../foo/' and '../foo' both + // result in the name 'foo' (#84) if path.as_bytes().last() != Some(&('/' as u8)) { path.clone() } else { @@ -93,6 +95,20 @@ fn strip_trailing_slash<'a>(path: &'a str) -> &'a str { } } +fn canonicalize_url(url: &str) -> String { + // HACKHACK: For github URL's specifically just lowercase + // everything. GitHub traits both the same, but they hash + // differently, and we're gonna be hashing them. This wants a more + // general solution, and also we're almost certainly not using the + // same case conversion rules that GitHub does. (#84) + let lower_url = url.chars().map(|c|c.to_lowercase()).collect::(); + if lower_url.as_slice().contains("github.com") { + lower_url + } else { + url.to_string() + } +} + impl<'a, 'b> Show for GitSource<'a, 'b> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { try!(write!(f, "git repo at {}", self.remote.get_location())); From 7c6671c2a064576cc33ffd60bf77cf03da2bb300 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Sat, 28 Jun 2014 16:44:56 -0700 Subject: [PATCH 3/4] Add tests for URL canonicalization and canonicalize .git extensions --- src/cargo/sources/git/source.rs | 36 ++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 9b45762adea..772c3caa602 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -68,7 +68,7 @@ fn ident(location: &Location) -> String { str::from_utf8(last).unwrap().to_str() } Remote(ref url) => { - let path = strip_trailing_slash(url.path.as_slice()); + let path = canonicalize_url(url.path.as_slice()); path.as_slice().split('/').last().unwrap().to_str() } }; @@ -95,18 +95,31 @@ fn strip_trailing_slash<'a>(path: &'a str) -> &'a str { } } +// Some hacks and heuristics for making equivalent URLs hash the same fn canonicalize_url(url: &str) -> String { + let url = strip_trailing_slash(url); + // HACKHACK: For github URL's specifically just lowercase // everything. GitHub traits both the same, but they hash // differently, and we're gonna be hashing them. This wants a more // general solution, and also we're almost certainly not using the // same case conversion rules that GitHub does. (#84) + let lower_url = url.chars().map(|c|c.to_lowercase()).collect::(); - if lower_url.as_slice().contains("github.com") { + let url = if lower_url.as_slice().contains("github.com") { lower_url } else { url.to_string() - } + }; + + // Repos generally can be accessed with or w/o '.git' + let url = if !url.as_slice().ends_with(".git") { + url + } else { + url.as_slice().slice(0, url.len() - 4).to_string() + }; + + return url; } impl<'a, 'b> Show for GitSource<'a, 'b> { @@ -180,6 +193,23 @@ mod test { assert_eq!(ident.as_slice(), "_empty-fc065c9b6b16fc00"); } + #[test] + fn test_canonicalize_idents_by_stripping_trailing_url_slash() { + let ident = ident(&Remote(url("https://github.com/PistonDevelopers/piston/"))); + assert_eq!(ident.as_slice(), "piston-1ad60373965e5b42"); + } + + #[test] + fn test_canonicalize_idents_by_lowercasing_github_urls() { + let ident = ident(&Remote(url("https://github.com/PistonDevelopers/piston"))); + assert_eq!(ident.as_slice(), "piston-1ad60373965e5b42"); + } + + #[test] + fn test_canonicalize_idents_by_stripping_dot_git() { + let ident = ident(&Remote(url("https://github.com/PistonDevelopers/piston.git"))); + assert_eq!(ident.as_slice(), "piston-1ad60373965e5b42"); + } fn url(s: &str) -> Url { url::from_str(s).unwrap() From 39157bae044bac653e6452d0a68807ca654304f6 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Sat, 28 Jun 2014 17:07:49 -0700 Subject: [PATCH 4/4] Future proof URL canonicalization tests --- src/cargo/sources/git/source.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 772c3caa602..11f86c6c55c 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -195,20 +195,23 @@ mod test { #[test] fn test_canonicalize_idents_by_stripping_trailing_url_slash() { - let ident = ident(&Remote(url("https://github.com/PistonDevelopers/piston/"))); - assert_eq!(ident.as_slice(), "piston-1ad60373965e5b42"); + let ident1 = ident(&Remote(url("https://github.com/PistonDevelopers/piston/"))); + let ident2 = ident(&Remote(url("https://github.com/PistonDevelopers/piston"))); + assert_eq!(ident1, ident2); } #[test] fn test_canonicalize_idents_by_lowercasing_github_urls() { - let ident = ident(&Remote(url("https://github.com/PistonDevelopers/piston"))); - assert_eq!(ident.as_slice(), "piston-1ad60373965e5b42"); + let ident1 = ident(&Remote(url("https://github.com/PistonDevelopers/piston"))); + let ident2 = ident(&Remote(url("https://github.com/pistondevelopers/piston"))); + assert_eq!(ident1, ident2); } #[test] fn test_canonicalize_idents_by_stripping_dot_git() { - let ident = ident(&Remote(url("https://github.com/PistonDevelopers/piston.git"))); - assert_eq!(ident.as_slice(), "piston-1ad60373965e5b42"); + let ident1 = ident(&Remote(url("https://github.com/PistonDevelopers/piston"))); + let ident2 = ident(&Remote(url("https://github.com/PistonDevelopers/piston.git"))); + assert_eq!(ident1, ident2); } fn url(s: &str) -> Url {