From 926c2e533787167d073f068e88f1c2c461700011 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Wed, 11 Nov 2020 08:51:58 -0500 Subject: [PATCH 1/2] move verify_host into separate file --- src/NetworkOptions.jl | 142 +----------------------------------------- src/verify_host.jl | 141 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 141 deletions(-) create mode 100644 src/verify_host.jl diff --git a/src/NetworkOptions.jl b/src/NetworkOptions.jl index 9b6f14a..10b958c 100644 --- a/src/NetworkOptions.jl +++ b/src/NetworkOptions.jl @@ -1,145 +1,5 @@ module NetworkOptions -export verify_host - -""" -The `verify_host` function tells the caller whether the identity of a host -should be verified when communicating over secure transports like TLS or SSH. -The `url` argument may be: - -1. a proper URL staring with `proto://` -2. an `ssh`-style bare host name or host name prefixed with `user@` -3. an `scp`-style host as above, followed by `:` and a path location - -In each case the host name part is parsed out and the decision about whether to -verify or not is made based solely on the host name, not anything else about the -input URL. In particular, the protocol of the URL does not matter (more below). - -The `transport` argument indicates the kind of transport that the query is -about. The currently known values are `SSL` (alias `TLS`) and `SSH`. If the -transport is ommitted, the query will return `true` only if the host name should -not be verified regardless of transport. - -The host name is matched against the host patterns in the relavent environment -variables depending on whether `transport` is supplied and what its value is: - -- `JULIA_NO_VERIFY_HOSTS` — hosts that should not be verified for any transport -- `JULIA_SSL_NO_VERIFY_HOSTS` — hosts that should not be verified for SSL/TLS -- `JULIA_SSH_NO_VERIFY_HOSTS` — hosts that should not be verified for SSH - -The values of each of these variables is a comma-separated list of host name -patterns with the following syntax — each pattern is split on `.` into parts and -each part must one of: - -1. A literal domain name component consisting of one or more ASCII letter, - digit, hyphen or underscore (technically not part of a legal host name, but - sometimes used). A literal domain name component matches only itself. -2. A `**`, which matches zero or more domain name components. -3. A `*`, which match any one domain name component. - -When matching a host name against a pattern list in one of these variables, the -host name is split on `.` into components and that sequence of words is matched -against the pattern: a literal pattern matches exactly one host name component -with that value; a `*` pattern matches exactly one host name component with any -value; a `**` pattern matches any number of host name components. For example: - -- `**` matches any host name -- `**.org` matches any host name in the `.org` top-level domain -- `example.com` matches only the exact host name `example.com` -- `*.example.com` matches `api.example.com` but not `example.com` or - `v1.api.example.com` -- `**.example.com` matches any domain under `example.com`, including - `example.com` itself, `api.example.com` and `v1.api.example.com` -``` -""" -function verify_host( - url :: AbstractString, - transport :: Union{AbstractString, Nothing} = nothing, -) - host = url_host(url) - if env_host_pattern_match("JULIA_NO_VERIFY_HOSTS", host) - return false # don't verify - end - transport = transport === nothing ? nothing : uppercase(transport) - return if transport in ("SSL", "TLS") - !env_host_pattern_match("JULIA_SSL_NO_VERIFY_HOSTS", host) - elseif transport == "SSH" - !env_host_pattern_match("JULIA_SSH_NO_VERIFY_HOSTS", host) - else - true # do verify - end -end - -function url_host(url::AbstractString) - m = match(r"^(?:[a-z]+)://(?:[^@/]+@)?([-\w\.]+)"ai, url) - m !== nothing && return m.captures[1] - m = match(r"^(?:[-\w\.]+@)?([-\w\.]+)(?:$|:)"a, url) - m !== nothing && return m.captures[1] - return nothing # couldn't parse -end - -const MATCH_ANY_RE = r"" -const MATCH_NONE_RE = r"$.^" - -env_host_pattern_match(var::AbstractString, host::AbstractString) = - occursin(env_host_pattern_regex(var), host) -env_host_pattern_match(var::AbstractString, host::Nothing) = - env_host_pattern_regex(var) === MATCH_ANY_RE - -const HOST_PATTERN_LOCK = ReentrantLock() -const HOST_PATTERN_CACHE = Dict{String,Tuple{String,Regex}}() - -function env_host_pattern_regex(var::AbstractString) - lock(HOST_PATTERN_LOCK) do - value = get(ENV, var, nothing) - if value === nothing - delete!(HOST_PATTERN_CACHE, var) - return MATCH_NONE_RE - end - old_value, regex = get(HOST_PATTERN_CACHE, var, (nothing, nothing)) - old_value == value && return regex - regex = host_pattern_regex(value, var) - HOST_PATTERN_CACHE[var] = (value, regex) - return regex - end -end - -if !@isdefined(contains) - contains(needle) = haystack -> occursin(needle, haystack) -end - -function host_pattern_regex(value::AbstractString, var::AbstractString="") - match_any = false - patterns = Vector{String}[] - for pattern in split(value, r"\s*,\s*", keepempty=false) - match_any |= pattern == "**" - parts = split(pattern, '.') - # emit warning but ignore any pattern we don't recognize; - # this allows adding syntax without breaking old versions - if !all(contains(r"^([-\w]+|\*\*?)$"a), parts) - in = isempty(var) ? "" : " in ENV[$(repr(var))]" - @warn("bad host pattern$in: $(repr(pattern))") - continue - end - push!(patterns, parts) - end - match_any && return MATCH_ANY_RE - isempty(patterns) && return MATCH_NONE_RE - regex = "" - for parts in patterns - re = "" - for (i, part) in enumerate(parts) - re *= if i < length(parts) - part == "*" ? "[-\\w]+\\." : - part == "**" ? "(?:[-\\w]+\\.)*" : "$part\\." - else - part == "*" ? "[-\\w]+" : - part == "**" ? "(?:[-\\w]+\\.)*[-\\w]+" : part - end - end - regex = isempty(regex) ? re : "$regex|$re" - end - return Regex("^(?:$regex)\$", "ai") -end +include("verify_host.jl") end # module diff --git a/src/verify_host.jl b/src/verify_host.jl new file mode 100644 index 0000000..917609b --- /dev/null +++ b/src/verify_host.jl @@ -0,0 +1,141 @@ +export verify_host + +""" +The `verify_host` function tells the caller whether the identity of a host +should be verified when communicating over secure transports like TLS or SSH. +The `url` argument may be: + +1. a proper URL staring with `proto://` +2. an `ssh`-style bare host name or host name prefixed with `user@` +3. an `scp`-style host as above, followed by `:` and a path location + +In each case the host name part is parsed out and the decision about whether to +verify or not is made based solely on the host name, not anything else about the +input URL. In particular, the protocol of the URL does not matter (more below). + +The `transport` argument indicates the kind of transport that the query is +about. The currently known values are `SSL` (alias `TLS`) and `SSH`. If the +transport is ommitted, the query will return `true` only if the host name should +not be verified regardless of transport. + +The host name is matched against the host patterns in the relavent environment +variables depending on whether `transport` is supplied and what its value is: + +- `JULIA_NO_VERIFY_HOSTS` — hosts that should not be verified for any transport +- `JULIA_SSL_NO_VERIFY_HOSTS` — hosts that should not be verified for SSL/TLS +- `JULIA_SSH_NO_VERIFY_HOSTS` — hosts that should not be verified for SSH + +The values of each of these variables is a comma-separated list of host name +patterns with the following syntax — each pattern is split on `.` into parts and +each part must one of: + +1. A literal domain name component consisting of one or more ASCII letter, + digit, hyphen or underscore (technically not part of a legal host name, but + sometimes used). A literal domain name component matches only itself. +2. A `**`, which matches zero or more domain name components. +3. A `*`, which match any one domain name component. + +When matching a host name against a pattern list in one of these variables, the +host name is split on `.` into components and that sequence of words is matched +against the pattern: a literal pattern matches exactly one host name component +with that value; a `*` pattern matches exactly one host name component with any +value; a `**` pattern matches any number of host name components. For example: + +- `**` matches any host name +- `**.org` matches any host name in the `.org` top-level domain +- `example.com` matches only the exact host name `example.com` +- `*.example.com` matches `api.example.com` but not `example.com` or + `v1.api.example.com` +- `**.example.com` matches any domain under `example.com`, including + `example.com` itself, `api.example.com` and `v1.api.example.com` +``` +""" +function verify_host( + url :: AbstractString, + transport :: Union{AbstractString, Nothing} = nothing, +) + host = url_host(url) + if env_host_pattern_match("JULIA_NO_VERIFY_HOSTS", host) + return false # don't verify + end + transport = transport === nothing ? nothing : uppercase(transport) + return if transport in ("SSL", "TLS") + !env_host_pattern_match("JULIA_SSL_NO_VERIFY_HOSTS", host) + elseif transport == "SSH" + !env_host_pattern_match("JULIA_SSH_NO_VERIFY_HOSTS", host) + else + true # do verify + end +end + +function url_host(url::AbstractString) + m = match(r"^(?:[a-z]+)://(?:[^@/]+@)?([-\w\.]+)"ai, url) + m !== nothing && return m.captures[1] + m = match(r"^(?:[-\w\.]+@)?([-\w\.]+)(?:$|:)"a, url) + m !== nothing && return m.captures[1] + return nothing # couldn't parse +end + +const MATCH_ANY_RE = r"" +const MATCH_NONE_RE = r"$.^" + +env_host_pattern_match(var::AbstractString, host::AbstractString) = + occursin(env_host_pattern_regex(var), host) +env_host_pattern_match(var::AbstractString, host::Nothing) = + env_host_pattern_regex(var) === MATCH_ANY_RE + +const HOST_PATTERN_LOCK = ReentrantLock() +const HOST_PATTERN_CACHE = Dict{String,Tuple{String,Regex}}() + +function env_host_pattern_regex(var::AbstractString) + lock(HOST_PATTERN_LOCK) do + value = get(ENV, var, nothing) + if value === nothing + delete!(HOST_PATTERN_CACHE, var) + return MATCH_NONE_RE + end + old_value, regex = get(HOST_PATTERN_CACHE, var, (nothing, nothing)) + old_value == value && return regex + regex = host_pattern_regex(value, var) + HOST_PATTERN_CACHE[var] = (value, regex) + return regex + end +end + +if !@isdefined(contains) + contains(needle) = haystack -> occursin(needle, haystack) +end + +function host_pattern_regex(value::AbstractString, var::AbstractString="") + match_any = false + patterns = Vector{String}[] + for pattern in split(value, r"\s*,\s*", keepempty=false) + match_any |= pattern == "**" + parts = split(pattern, '.') + # emit warning but ignore any pattern we don't recognize; + # this allows adding syntax without breaking old versions + if !all(contains(r"^([-\w]+|\*\*?)$"a), parts) + in = isempty(var) ? "" : " in ENV[$(repr(var))]" + @warn("bad host pattern$in: $(repr(pattern))") + continue + end + push!(patterns, parts) + end + match_any && return MATCH_ANY_RE + isempty(patterns) && return MATCH_NONE_RE + regex = "" + for parts in patterns + re = "" + for (i, part) in enumerate(parts) + re *= if i < length(parts) + part == "*" ? "[-\\w]+\\." : + part == "**" ? "(?:[-\\w]+\\.)*" : "$part\\." + else + part == "*" ? "[-\\w]+" : + part == "**" ? "(?:[-\\w]+\\.)*[-\\w]+" : part + end + end + regex = isempty(regex) ? re : "$regex|$re" + end + return Regex("^(?:$regex)\$", "ai") +end From 50c027a95f109ae01281d26493379fe11354fb8f Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Wed, 11 Nov 2020 10:30:35 -0500 Subject: [PATCH 2/2] ca_roots: add API for finding CA root certificates --- README.md | 42 +++++++++++++++++++++ src/NetworkOptions.jl | 1 + src/ca_roots.jl | 86 +++++++++++++++++++++++++++++++++++++++++++ src/verify_host.jl | 12 +++--- test/runtests.jl | 26 ++++++++++++- test/setup.jl | 1 + 6 files changed, 161 insertions(+), 7 deletions(-) create mode 100644 src/ca_roots.jl diff --git a/README.md b/README.md index 8471e0c..512031d 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,48 @@ how that configuration is expressed. ## API +### ca_roots + +```jl +ca_roots() :: Union{Nothing, String} +``` +The `ca_roots()` function tells the caller where, if anywhere, to find a file or +directory of PEM-encoded certificate authority roots. By default, on systems +like Windows and macOS where the built-in TLS engines know how to verify hosts +using the system's built-in certificate verification mechanism, this function +will return `nothing`. On classic UNIX systems (excluding macOS), root +certificates are typically stored in a file in `/etc`: the common places for the +current UNIX kernel will be searched and if one of these paths exists, it will +be returned; if none of these typical root certificate paths exist, then the +path to the set of root certificates that are bundled with Julia is returned. + +The default value returned by `ca_roots()` may be overridden by setting the +`JULIA_SSL_CA_ROOTS_PATH` environment variable to a non-empty value, in which +case this function will always return that path (whether it exists or not). + +### ca_roots_path + +```jl +ca_roots_path() :: String +``` +The `ca_roots_path()` function is similar to the `ca_roots()` function except +that it always returns a path to a file or directory of PEM-encoded certificate +authority roots. When called on a system like Windows or macOS, where system +root certificates are not stored in the file system, it will currently return +the path to the set of root certificates that are bundled with Julia. (In the +future, this function may instead extract the root certificates from the system +and save them to a file whose path would be returned.) + +If it is possible to configure a library that uses TLS to use the system +certificates that is generally preferrable: i.e. it is better to use +`ca_roots()` which returns `nothing` to indicate that the system certs should be +used. The `ca_roots_path()` function should only be used when configuring +libraries which _require_ a path to a file or directory for root certificates. + +The default value returned by `ca_roots_path()` may be overridden by setting the +`JULIA_SSL_CA_ROOTS_PATH` environment variable to a non-empty value, in which +case this function will always return that path (whether it exists or not). + ### verify_host ```jl diff --git a/src/NetworkOptions.jl b/src/NetworkOptions.jl index 10b958c..bad64bc 100644 --- a/src/NetworkOptions.jl +++ b/src/NetworkOptions.jl @@ -1,5 +1,6 @@ module NetworkOptions +include("ca_roots.jl") include("verify_host.jl") end # module diff --git a/src/ca_roots.jl b/src/ca_roots.jl new file mode 100644 index 0000000..7958ecc --- /dev/null +++ b/src/ca_roots.jl @@ -0,0 +1,86 @@ +export ca_roots, ca_roots_path + +""" + ca_roots() :: Union{Nothing, String} + +The `ca_roots()` function tells the caller where, if anywhere, to find a file or +directory of PEM-encoded certificate authority roots. By default, on systems +like Windows and macOS where the built-in TLS engines know how to verify hosts +using the system's built-in certificate verification mechanism, this function +will return `nothing`. On classic UNIX systems (excluding macOS), root +certificates are typically stored in a file in `/etc`: the common places for the +current UNIX kernel will be searched and if one of these paths exists, it will +be returned; if none of these typical root certificate paths exist, then the +path to the set of root certificates that are bundled with Julia is returned. + +The default value returned by `ca_roots()` may be overridden by setting the +`JULIA_SSL_CA_ROOTS_PATH` environment variable to a non-empty value, in which +case this function will always return that path (whether it exists or not). +""" +ca_roots()::Union{Nothing,String} = _ca_roots(true) + +""" + ca_roots_path() :: String + +The `ca_roots_path()` function is similar to the `ca_roots()` function except +that it always returns a path to a file or directory of PEM-encoded certificate +authority roots. When called on a system like Windows or macOS, where system +root certificates are not stored in the file system, it will currently return +the path to the set of root certificates that are bundled with Julia. (In the +future, this function may instead extract the root certificates from the system +and save them to a file whose path would be returned.) + +If it is possible to configure a library that uses TLS to use the system +certificates that is generally preferrable: i.e. it is better to use +`ca_roots()` which returns `nothing` to indicate that the system certs should be +used. The `ca_roots_path()` function should only be used when configuring +libraries which _require_ a path to a file or directory for root certificates. + +The default value returned by `ca_roots_path()` may be overridden by setting the +`JULIA_SSL_CA_ROOTS_PATH` environment variable to a non-empty value, in which +case this function will always return that path (whether it exists or not). +""" +ca_roots_path()::String = _ca_roots(false) + +const BUNDLED_CA_ROOTS = normpath(Sys.BINDIR, "..", "share", "julia", "cert.pem") + +const LINUX_CA_ROOTS = [ + "/etc/ssl/cert.pem" # Alpine Linux + "/etc/ssl/ca-bundle.pem" # OpenSUSE + "/etc/ssl/certs/ca-certificates.crt" # Debian/Ubuntu/Gentoo etc. + "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem" # CentOS/RHEL 7 + "/etc/pki/tls/certs/ca-bundle.crt" # Fedora/RHEL 6 + "/etc/pki/tls/cacert.pem" # OpenELEC +] + +const BSD_CA_ROOTS = [ + "/etc/ssl/cert.pem" # OpenBSD + "/usr/local/share/certs/ca-root-nss.crt" # FreeBSD + "/usr/local/etc/ssl/cert.pem" # FreeBSD +] + +const SYSTEM_CA_ROOTS_LOCK = ReentrantLock() +const SYSTEM_CA_ROOTS = Ref{String}() + +function system_ca_roots() + lock(SYSTEM_CA_ROOTS_LOCK) do + isassigned(SYSTEM_CA_ROOTS) && return + search_path = Sys.islinux() ? LINUX_CA_ROOTS : + Sys.isbsd() && !Sys.isapple() ? BSD_CA_ROOTS : String[] + for path in search_path + ispath(path) || continue + SYSTEM_CA_ROOTS[] = path + return + end + # TODO: extract system certs on Windows & macOS + SYSTEM_CA_ROOTS[] = BUNDLED_CA_ROOTS + end + return SYSTEM_CA_ROOTS[] +end + +function _ca_roots(allow_nothing::Bool) + path = get(ENV, "JULIA_SSL_CA_ROOTS_PATH", nothing) + path !== nothing && !isempty(path) && return path + allow_nothing && (Sys.iswindows() || Sys.isapple()) && return nothing + return system_ca_roots() +end diff --git a/src/verify_host.jl b/src/verify_host.jl index 917609b..6ae4a58 100644 --- a/src/verify_host.jl +++ b/src/verify_host.jl @@ -84,20 +84,20 @@ env_host_pattern_match(var::AbstractString, host::AbstractString) = env_host_pattern_match(var::AbstractString, host::Nothing) = env_host_pattern_regex(var) === MATCH_ANY_RE -const HOST_PATTERN_LOCK = ReentrantLock() -const HOST_PATTERN_CACHE = Dict{String,Tuple{String,Regex}}() +const ENV_HOST_PATTERN_LOCK = ReentrantLock() +const ENV_HOST_PATTERN_CACHE = Dict{String,Tuple{String,Regex}}() function env_host_pattern_regex(var::AbstractString) - lock(HOST_PATTERN_LOCK) do + lock(ENV_HOST_PATTERN_LOCK) do value = get(ENV, var, nothing) if value === nothing - delete!(HOST_PATTERN_CACHE, var) + delete!(ENV_HOST_PATTERN_CACHE, var) return MATCH_NONE_RE end - old_value, regex = get(HOST_PATTERN_CACHE, var, (nothing, nothing)) + old_value, regex = get(ENV_HOST_PATTERN_CACHE, var, (nothing, nothing)) old_value == value && return regex regex = host_pattern_regex(value, var) - HOST_PATTERN_CACHE[var] = (value, regex) + ENV_HOST_PATTERN_CACHE[var] = (value, regex) return regex end end diff --git a/test/runtests.jl b/test/runtests.jl index 06e33b7..455ecd3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,30 @@ include("setup.jl") -@testset "NetworkOptions.jl" begin +@testset "ca_roots" begin + withenv( + "JULIA_SSL_CA_ROOTS_PATH" => nothing, + ) do + @test ca_roots_path() isa String + @test ispath(ca_roots_path()) + if Sys.iswindows() || Sys.isapple() + @test ca_roots_path() == BUNDLED_CA_ROOTS + @test ca_roots() === nothing + else + @test ca_roots_path() != BUNDLED_CA_ROOTS + @test ca_roots() == ca_roots_path() + end + unset = ca_roots(), ca_roots_path() + value = "Why hello!" + ENV["JULIA_SSL_CA_ROOTS_PATH"] = value + @test ca_roots() == value + @test ca_roots_path() == value + ENV["JULIA_SSL_CA_ROOTS_PATH"] = "" + @test ca_roots() == unset[1] + @test ca_roots_path() == unset[2] + end +end + +@testset "verify_host" begin withenv( "JULIA_NO_VERIFY_HOSTS" => nothing, "JULIA_SSL_NO_VERIFY_HOSTS" => nothing, diff --git a/test/setup.jl b/test/setup.jl index ab827ac..49fca78 100644 --- a/test/setup.jl +++ b/test/setup.jl @@ -1,6 +1,7 @@ using Test using Logging using NetworkOptions +using NetworkOptions: BUNDLED_CA_ROOTS const TEST_URLS = [ "" # not a valid host name