diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
index ba942b23..bb6f585a 100644
--- a/.JuliaFormatter.toml
+++ b/.JuliaFormatter.toml
@@ -1,2 +1,2 @@
 style="blue"
-format_markdown=true
\ No newline at end of file
+format_markdown=true
diff --git a/HISTORY.md b/HISTORY.md
index 1697cd36..6f9a641c 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,3 +1,9 @@
+# 0.15.13
+
+Exports extra functionality that should probably have been exported, namely `ordered`, `isinvertible`, and `columnwise`, from Bijectors.jl
+
+The docs have been thoroughly restructured.
+
 # 0.15.12
 
 Improved implementation of the Enzyme rule for `Bijectors.find_alpha`.
diff --git a/Project.toml b/Project.toml
index cb5d3a99..3eba34fa 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "Bijectors"
 uuid = "76274a88-744f-5084-9051-94815aaf08c4"
-version = "0.15.12"
+version = "0.15.13"
 
 [deps]
 ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"
diff --git a/README.md b/README.md
index 7155a11e..f6fea731 100644
--- a/README.md
+++ b/README.md
@@ -1,20 +1,35 @@
 # Bijectors.jl
 
-[![Docs - Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://turinglang.github.io/Bijectors.jl/stable)
-[![Docs - Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://turinglang.github.io/Bijectors.jl/dev)
-[![Interface tests](https://github.com/TuringLang/Bijectors.jl/workflows/Interface%20tests/badge.svg?branch=main)](https://github.com/TuringLang/Bijectors.jl/actions?query=workflow%3A%22Interface+tests%22+branch%3Amain)
-[![AD tests](https://github.com/TuringLang/Bijectors.jl/workflows/AD%20tests/badge.svg?branch=main)](https://github.com/TuringLang/Bijectors.jl/actions?query=workflow%3A%22AD+tests%22+branch%3Amain)
+[![Documentation for latest stable release](https://img.shields.io/badge/docs-stable-blue.svg)](https://turinglang.github.io/Bijectors.jl)
+[![Documentation for development version](https://img.shields.io/badge/docs-dev-blue.svg)](https://turinglang.github.io/Bijectors.jl/dev)
+[![CI](https://github.com/TuringLang/Bijectors.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/TuringLang/Bijectors.jl/actions/workflows/CI.yml)
 
-*A package for transforming distributions, used by [Turing.jl](https://github.com/TuringLang/Turing.jl).*
+Bijectors.jl implements functions for transforming random variables and probability distributions.
 
-Bijectors.jl implements both an interface for transforming distributions from Distributions.jl and many transformations needed in this context.
-This package is used heavily in the probabilistic programming language Turing.jl.
+A quick overview of some of the key functionality is provided below:
 
-See the [documentation](https://turinglang.github.io/Bijectors.jl) for more.
+```julia
+julia> using Bijectors;
+       dist = LogNormal();
+LogNormal{Float64}(μ=0.0, σ=1.0)
 
-## Do you want to contribute?
+julia> x = rand(dist)      # Constrained to (0, ∞)
+0.6471106974390148
 
-If you feel you have some relevant skills and are interested in contributing, please get in touch!
-You can find us in the #turing channel on the [Julia Slack](https://julialang.org/slack/) or [Discourse](https://discourse.julialang.org).
-If you're having any problems, please open a Github issue, even if the problem seems small (like help figuring out an error message).
-Every issue you open helps us to improve the library!
+julia> b = bijector(dist)  # This maps from (0, ∞) to ℝ
+(::Base.Fix1{typeof(broadcast), typeof(log)}) (generic function with 1 method)
+
+julia> y = b(x)            # Unconstrained value in ℝ
+-0.43523790570180304
+
+julia> # Log-absolute determinant of the Jacobian at x.
+       with_logabsdet_jacobian(b, x)
+(-0.43523790570180304, 0.43523790570180304)
+```
+
+Please see the [documentation](https://turinglang.github.io/Bijectors.jl) for more information.
+
+## Get in touch
+
+If you have any questions, please feel free to [post on Julia Slack](https://julialang.slack.com/archives/CCYDC34A0) or [Discourse](https://discourse.julialang.org/).
+We also very much welcome GitHub issues or pull requests!
diff --git a/docs/Project.toml b/docs/Project.toml
index 63604fda..3b0979c9 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 
diff --git a/docs/make.jl b/docs/make.jl
index 80ad43bf..d71d1031 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -9,10 +9,13 @@ makedocs(;
     format=Documenter.HTML(),
     modules=[Bijectors],
     pages=[
-        "Home" => "index.md",
-        "Transforms" => "transforms.md",
-        "Distributions.jl integration" => "distributions.md",
-        "Examples" => "examples.md",
+        "index.md",
+        "interface.md",
+        "defining.md",
+        "distributions.md",
+        "types.md",
+        "advi.md",
+        "flows.md",
     ],
     checkdocs=:exports,
     doctest=false,
diff --git a/docs/src/advi.md b/docs/src/advi.md
new file mode 100644
index 00000000..60e305a8
--- /dev/null
+++ b/docs/src/advi.md
@@ -0,0 +1,94 @@
+# Example: Variational inference
+
+The real utility of `TransformedDistribution` becomes more apparent when using `transformed(dist, b)` for any bijector `b`.
+To get the transformed distribution corresponding to the `Beta(2, 2)`, we called `transformed(dist)` before.
+This is an alias for `transformed(dist, bijector(dist))`.
+Remember `bijector(dist)` returns the constrained-to-constrained bijector for that particular `Distribution`.
+But we can of course construct a `TransformedDistribution` using different bijectors with the same `dist`.
+
+This is particularly useful in _Automatic Differentiation Variational Inference (ADVI)_.
+
+## Univariate ADVI
+
+An important part of ADVI is to approximate a constrained distribution, e.g. `Beta`, as follows:
+
+ 1. Sample `x` from a `Normal` with parameters `μ` and `σ`, i.e. `x ~ Normal(μ, σ)`.
+ 2. Transform `x` to `y` s.t. `y ∈ support(Beta)`, with the transform being a differentiable bijection with a differentiable inverse (a "bijector").
+
+This then defines a probability density with the same _support_ as `Beta`!
+Of course, it's unlikely that it will be the same density, but it's an _approximation_.
+
+Creating such a distribution can be done with `Bijector` and `TransformedDistribution`:
+
+```@example advi
+using Bijectors
+using StableRNGs: StableRNG
+rng = StableRNG(42)
+
+dist = Beta(2, 2)
+b = bijector(dist)                # (0, 1) → ℝ
+b⁻¹ = inverse(b)                  # ℝ → (0, 1)
+td = transformed(Normal(), b⁻¹)   # x ∼ 𝓝(0, 1) then b(x) ∈ (0, 1)
+x = rand(rng, td)                 # ∈ (0, 1)
+```
+
+It's worth noting that `support(Beta)` is the _closed_ interval `[0, 1]`, while the constrained-to-unconstrained bijection, `Logit` in this case, is only well-defined as a map `(0, 1) → ℝ` for the _open_ interval `(0, 1)`.
+This is of course not an implementation detail.
+`ℝ` is itself open, thus no continuous bijection exists from a _closed_ interval to `ℝ`.
+But since the boundaries of a closed interval has what's known as measure zero, this doesn't end up affecting the resulting density with support on the entire real line.
+In practice, this means that
+
+```@example advi
+td = transformed(Beta())
+inverse(td.transform)(rand(rng, td))
+```
+
+will never result in `0` or `1` though any sample arbitrarily close to either `0` or `1` is possible.
+_Disclaimer: numerical accuracy is limited, so you might still see `0` and `1` if you're 'lucky'._
+
+## Multivariate ADVI example
+
+We can also do _multivariate_ ADVI using the `Stacked` bijector.
+`Stacked` gives us a way to combine univariate and/or multivariate bijectors into a singe multivariate bijector.
+Say you have a vector `x` of length 2 and you want to transform the first entry using `Exp` and the second entry using `Log`.
+`Stacked` gives you an easy and efficient way of representing such a bijector.
+
+```@example advi
+using Bijectors: SimplexBijector
+
+# Original distributions
+dists = (Beta(), InverseGamma(), Dirichlet(2, 3))
+
+# Construct the corresponding ranges
+function make_ranges(dists)
+    ranges = []
+    idx = 1
+    for i in 1:length(dists)
+        d = dists[i]
+        push!(ranges, idx:(idx + length(d) - 1))
+        idx += length(d)
+    end
+    return ranges
+end
+
+ranges = make_ranges(dists)
+ranges
+```
+
+```@example advi
+# Base distribution; mean-field normal
+num_params = ranges[end][end]
+
+d = MvNormal(zeros(num_params), ones(num_params));
+
+# Construct the transform
+bs = bijector.(dists)       # constrained-to-unconstrained bijectors for dists
+ibs = inverse.(bs)          # invert, so we get unconstrained-to-constrained
+sb = Stacked(ibs, ranges)   # => Stacked <: Bijector
+
+# Mean-field normal with unconstrained-to-constrained stacked bijector
+td = transformed(d, sb)
+y = rand(td)
+```
+
+As can be seen from this, we now have a `y` for which `0.0 ≤ y[1] ≤ 1.0`, `0.0 < y[2]`, and `sum(y[3:4]) ≈ 1.0`.
diff --git a/docs/src/defining.md b/docs/src/defining.md
new file mode 100644
index 00000000..cd020dff
--- /dev/null
+++ b/docs/src/defining.md
@@ -0,0 +1,94 @@
+# Defining a bijector
+
+This page describes the minimum expected interface to implement a bijector.
+
+In general, there are two pieces of information needed to define a bijector:
+
+ 1. The transformation itself, i.e., the map $b: \mathbb{R}^d \to \mathbb{R}^d$.
+
+ 2. The log-absolute determinant of the Jacobian of that transformation.
+    For a transformation $b: \mathbb{R}^d \to \mathbb{R}^d$, the Jacobian at point $x \in \mathbb{R}^d$ is defined as:
+    
+    $$J_{b}(x) = \begin{bmatrix}
+    \partial y_1/\partial x_1 & \partial y_1/\partial x_2 & \cdots & \partial y_1/\partial x_d \\
+    \partial y_2/\partial x_1 & \partial y_2/\partial x_2 & \cdots & \partial y_2/\partial x_d \\
+    \vdots & \vdots & \ddots & \vdots \\
+    \partial y_d/\partial x_1 & \partial y_d/\partial x_2 & \cdots & \partial y_d/\partial x_d
+    \end{bmatrix}$$
+    
+    where $y = b(x)$.
+
+## The transform itself
+
+The most efficient way to implement a bijector is to provide an implementation of:
+
+```@docs; canonical=false
+Bijectors.with_logabsdet_jacobian
+```
+
+If you define `with_logabsdet_jacobian(b, x)`, then you will automatically get default implementations of both `transform(b, x)` and `logabsdetjac(b, x)`, which respectively return the first and second value of that tuple.
+So, in fact, you can implement a bijector by defining only `with_logabsdet_jacobian`.
+
+If you prefer, you can implement `transform` and `logabsdetjac` separately, as described below.
+Having manual implementations of these may also be useful if you expect either to be used heavily without the other.
+
+### Transformation
+
+```@docs; canonical=false
+transform
+```
+
+If `transform(b, x)` is defined, then you will automatically get a default implementation of `b(x)` which calls that.
+
+### Log-absolute determinant of the Jacobian
+
+```@docs; canonical=false
+Bijectors.logabsdetjac
+```
+
+## Inverse
+
+Often you will want to define an inverse bijector as well.
+To do so, you will have to implement:
+
+```@docs; canonical=false
+Bijectors.inverse
+```
+
+If `b` is a bijector, then `inverse(b)` should return the inverse bijector $b^{-1}$.
+
+If your bijector subtypes `Bijectors.Bijector`, then you will get a default implementation of `inverse` which constructs `Bijectors.Inverse(b)`.
+This may be easier than creating a second type for the inverse bijector.
+Note that you will also need to implement the methods for `with_logabsdet_jacobian` (and/or `transform` and `logabsdetjac`) for the inverse bijector type.
+
+If your bijector is not invertible, you can specify this here:
+
+```@docs; canonical=false
+Bijectors.isinvertible
+```
+
+## Distributions
+
+If your bijector is intended for use with a distribution, i.e., it transforms random variables drawn from that distribution to Euclidean space, then you should also implement:
+
+```@docs; canonical=false
+Bijectors.bijector
+```
+
+which should return your bijector.
+
+On top of that, you should also implement a method for `Bijectors.output_size(b, dist::Distribution)`:
+
+```@docs; canonical=false
+Bijectors.output_size
+```
+
+## Closed-form
+
+If your bijector does _not_ have a closed-form expression (e.g. if it uses an iterative procedure), then this should be set to false:
+
+```@docs; canonical=false
+Bijectors.isclosedform
+```
+
+The default is `true` so you only need to set this if your bijector is not closed-form.
diff --git a/docs/src/distributions.md b/docs/src/distributions.md
index a1135516..57a62b6a 100644
--- a/docs/src/distributions.md
+++ b/docs/src/distributions.md
@@ -1,55 +1,78 @@
-## Basic usage
+# Usage with distributions
 
-Other than the `logpdf_with_trans` methods, the package also provides a more composable interface through the `Bijector` types. Consider for example the one from above with `Beta(2, 2)`.
+Bijectors provides many utilities for working with probability distributions.
 
-```julia
-julia> using Random;
-       Random.seed!(42);
+```@example distributions
+using Bijectors
+
+dist = LogNormal()
+x = rand(dist)
+b = bijector(dist)  # bijection (0, ∞) → ℝ
+
+y = b(x)
+```
 
-julia> using Bijectors;
-       using Bijectors: Logit;
+Here, `bijector(d::Distribution)` returns the corresponding constrained-to-unconstrained bijection for `Beta`, which is a log function.
+The resulting bijector can be called, just like any other function, to transform samples from the distribution to the unconstrained space.
 
-julia> dist = Beta(2, 2)
-Beta{Float64}(α=2.0, β=2.0)
+The function [`link`](@ref) provides a short way of doing the above:
 
-julia> x = rand(dist)
-0.36888689965963756
+```@example distributions
+link(dist, x) ≈ b(x)
+```
+
+See [the Turing.jl docs](https://turinglang.org/docs/developers/transforms/distributions/) for more information about how this is used in probabilistic programming.
+
+## Transforming distributions
 
-julia> b = bijector(dist) # bijection (0, 1) → ℝ
-Logit{Float64}(0.0, 1.0)
+We can also couple a distribution together with its bijector to create a _transformed_ `Distribution`, i.e. a `Distribution` defined by sampling from a given `Distribution` and then transforming using a given transformation:
 
-julia> y = b(x)
--0.5369949942509267
+```@example distributions
+dist = LogNormal()          # support on (0, ∞)
+tdist = transformed(dist)   # support on ℝ
 ```
 
-In this case we see that `bijector(d::Distribution)` returns the corresponding constrained-to-unconstrained bijection for `Beta`, which indeed is a `Logit` with `a = 0.0` and `b = 1.0`. The resulting `Logit <: Bijector` has a method `(b::Logit)(x)` defined, allowing us to call it just like any other function. Comparing with the above example, `b(x) ≈ link(dist, x)`. Just to convince ourselves:
+We can then sample from, and compute the `logpdf` for, the resulting distribution:
+
+```@example distributions
+y = rand(tdist)
+```
+
+```@example distributions
+logpdf(tdist, y)
+```
+
+We should expect here that
 
 ```julia
-julia> b(x) ≈ link(dist, x)
-true
+logpdf(tdist, y) ≈ logpdf(dist, x) - logabsdetjac(b, x)
 ```
 
-## Transforming distributions
+where `b = bijector(dist)` and `y = b(x)`.
 
-```@setup transformed-dist-simple
-using Bijectors
+To verify this, we can calculate the value of `x` using the inverse bijector:
+
+```@example distributions
+b = bijector(dist)
+binv = inverse(b)
+
+x = binv(y)
 ```
 
-We can create a _transformed_ `Distribution`, i.e. a `Distribution` defined by sampling from a given `Distribution` and then transforming using a given transformation:
+(Because `b` is just a log function, `binv` is an exponential function, i.e. `x = exp(y)`.)
 
-```@repl transformed-dist-simple
-dist = Beta(2, 2)      # support on (0, 1)
-tdist = transformed(dist) # support on ℝ
+Then we can check the equality:
 
-tdist isa UnivariateDistribution
+```@example distributions
+logpdf(tdist, y) ≈ logpdf(dist, x) - logabsdetjac(b, x)
 ```
 
-We can the then compute the `logpdf` for the resulting distribution:
+You can also use [`Bijectors.logpdf_with_trans`](@ref) with the original distribution:
 
-```@repl transformed-dist-simple
-# Some example values
-x = rand(dist)
-y = tdist.transform(x)
+```@example distributions
+logpdf_with_trans(dist, x, false) ≈ logpdf(dist, x)
+```
 
-logpdf(tdist, y)
+```@example distributions
+logpdf_with_trans(dist, x, true) ≈ logpdf(tdist, y)
 ```
diff --git a/docs/src/examples.md b/docs/src/examples.md
deleted file mode 100644
index 368d4bed..00000000
--- a/docs/src/examples.md
+++ /dev/null
@@ -1,163 +0,0 @@
-```@setup advi
-using Bijectors
-```
-
-## Univariate ADVI example
-
-But the real utility of `TransformedDistribution` becomes more apparent when using `transformed(dist, b)` for any bijector `b`. To get the transformed distribution corresponding to the `Beta(2, 2)`, we called `transformed(dist)` before. This is simply an alias for `transformed(dist, bijector(dist))`. Remember `bijector(dist)` returns the constrained-to-constrained bijector for that particular `Distribution`. But we can of course construct a `TransformedDistribution` using different bijectors with the same `dist`. This is particularly useful in something called _Automatic Differentiation Variational Inference (ADVI)_.[2] An important part of ADVI is to approximate a constrained distribution, e.g. `Beta`, as follows:
-
- 1. Sample `x` from a `Normal` with parameters `μ` and `σ`, i.e. `x ~ Normal(μ, σ)`.
- 2. Transform `x` to `y` s.t. `y ∈ support(Beta)`, with the transform being a differentiable bijection with a differentiable inverse (a "bijector")
-
-This then defines a probability density with same _support_ as `Beta`! Of course, it's unlikely that it will be the same density, but it's an _approximation_. Creating such a distribution becomes trivial with `Bijector` and `TransformedDistribution`:
-
-```@repl advi
-using StableRNGs: StableRNG
-rng = StableRNG(42);
-dist = Beta(2, 2)
-b = bijector(dist)              # (0, 1) → ℝ
-b⁻¹ = inverse(b)                # ℝ → (0, 1)
-td = transformed(Normal(), b⁻¹) # x ∼ 𝓝(0, 1) then b(x) ∈ (0, 1)
-x = rand(rng, td)                   # ∈ (0, 1)
-```
-
-It's worth noting that `support(Beta)` is the _closed_ interval `[0, 1]`, while the constrained-to-unconstrained bijection, `Logit` in this case, is only well-defined as a map `(0, 1) → ℝ` for the _open_ interval `(0, 1)`. This is of course not an implementation detail. `ℝ` is itself open, thus no continuous bijection exists from a _closed_ interval to `ℝ`. But since the boundaries of a closed interval has what's known as measure zero, this doesn't end up affecting the resulting density with support on the entire real line. In practice, this means that
-
-```@repl advi
-td = transformed(Beta())
-inverse(td.transform)(rand(rng, td))
-```
-
-will never result in `0` or `1` though any sample arbitrarily close to either `0` or `1` is possible. _Disclaimer: numerical accuracy is limited, so you might still see `0` and `1` if you're lucky._
-
-## Multivariate ADVI example
-
-We can also do _multivariate_ ADVI using the `Stacked` bijector. `Stacked` gives us a way to combine univariate and/or multivariate bijectors into a singe multivariate bijector. Say you have a vector `x` of length 2 and you want to transform the first entry using `Exp` and the second entry using `Log`. `Stacked` gives you an easy and efficient way of representing such a bijector.
-
-```@repl advi
-using Bijectors: SimplexBijector
-
-# Original distributions
-dists = (Beta(), InverseGamma(), Dirichlet(2, 3));
-
-# Construct the corresponding ranges
-ranges = [];
-idx = 1;
-
-for i in 1:length(dists)
-    d = dists[i]
-    push!(ranges, idx:(idx + length(d) - 1))
-
-    global idx
-    idx += length(d)
-end;
-
-ranges
-
-# Base distribution; mean-field normal
-num_params = ranges[end][end]
-
-d = MvNormal(zeros(num_params), ones(num_params));
-
-# Construct the transform
-bs = bijector.(dists);     # constrained-to-unconstrained bijectors for dists
-ibs = inverse.(bs);            # invert, so we get unconstrained-to-constrained
-sb = Stacked(ibs, ranges) # => Stacked <: Bijector
-
-# Mean-field normal with unconstrained-to-constrained stacked bijector
-td = transformed(d, sb);
-y = rand(td)
-0.0 ≤ y[1] ≤ 1.0
-0.0 < y[2]
-sum(y[3:4]) ≈ 1.0
-```
-
-## Normalizing flows
-
-A very interesting application is that of _normalizing flows_.[1] Usually this is done by sampling from a multivariate normal distribution, and then transforming this to a target distribution using invertible neural networks. Currently there are two such transforms available in Bijectors.jl: `PlanarLayer` and `RadialLayer`. Let's create a flow with a single `PlanarLayer`:
-
-```@setup normalizing-flows
-using Bijectors
-using StableRNGs: StableRNG
-rng = StableRNG(42);
-```
-
-```@repl normalizing-flows
-d = MvNormal(zeros(2), ones(2));
-b = PlanarLayer(2)
-flow = transformed(d, b)
-flow isa MultivariateDistribution
-```
-
-That's it. Now we can sample from it using `rand` and compute the `logpdf`, like any other `Distribution`.
-
-```@repl normalizing-flows
-y = rand(rng, flow)
-logpdf(flow, y)         # uses inverse of `b`
-```
-
-Similarily to the multivariate ADVI example, we could use `Stacked` to get a _bounded_ flow:
-
-```@repl normalizing-flows
-d = MvNormal(zeros(2), ones(2));
-ibs = inverse.(bijector.((InverseGamma(2, 3), Beta())));
-sb = Stacked(ibs) # == Stacked(ibs, [i:i for i = 1:length(ibs)]
-b = sb ∘ PlanarLayer(2)
-td = transformed(d, b);
-y = rand(rng, td)
-0 < y[1]
-0 ≤ y[2] ≤ 1
-```
-
-Want to fit the flow?
-
-```@repl normalizing-flows
-using ForwardDiff
-
-# Construct the flow.
-b = PlanarLayer(2)
-
-# Convenient for extracting parameters and reconstructing the flow.
-using Functors
-θs, reconstruct = Functors.functor(b);
-
-# Make the objective a `struct` to avoid capturing global variables.
-struct NLLObjective{R,D,T}
-    reconstruct::R
-    basedist::D
-    data::T
-end
-
-function (obj::NLLObjective)(θs)
-    transformed_dist = transformed(obj.basedist, obj.reconstruct(θs))
-    return -sum(Base.Fix1(logpdf, transformed_dist), eachcol(obj.data))
-end
-
-# Some random data to estimate the density of.
-xs = randn(2, 1000);
-
-# Construct the objective.
-f = NLLObjective(reconstruct, MvNormal(2, 1), xs);
-
-# Initial loss.
-@info "Initial loss: $(f(θs))"
-
-# Train using gradient descent.
-ε = 1e-3;
-for i in 1:100
-    ∇s = ForwardDiff.gradient(θ -> f(θ), θs)
-    θs = fmap(θs, ∇s) do θ, ∇
-        θ - ε .* ∇
-    end
-end
-
-# Final loss
-@info "Final loss: $(f(θs))"
-
-# Very simple check to see if we learned something useful.
-samples = rand(transformed(f.basedist, f.reconstruct(θs)), 1000);
-mean(eachcol(samples)) # ≈ [0, 0]
-cov(samples; dims=2)   # ≈ I
-```
-
-We can easily create more complex flows by simply doing `PlanarLayer(10) ∘ PlanarLayer(10) ∘ RadialLayer(10)` and so on.
diff --git a/docs/src/flows.md b/docs/src/flows.md
new file mode 100644
index 00000000..73d37a5e
--- /dev/null
+++ b/docs/src/flows.md
@@ -0,0 +1,115 @@
+# Example: Normalizing flows
+
+A very interesting application of bijectors is in _normalizing flows_.
+Usually this is done by sampling from a multivariate normal distribution, and then transforming this to a target distribution using invertible neural networks.
+Currently there are two such transforms available in Bijectors.jl: `PlanarLayer` and `RadialLayer`.
+Let's create a flow with a single `PlanarLayer`:
+
+```@example normalizing-flows
+using Bijectors
+using StableRNGs: StableRNG
+rng = StableRNG(42)
+
+d = MvNormal(zeros(2), ones(2))
+b = PlanarLayer(2)
+flow = transformed(d, b)
+```
+
+`flow` is itself a multivariate distribution, so we can sample from it using `rand` and compute the `logpdf`, like any other `Distribution`.
+
+```@example normalizing-flows
+y = rand(rng, flow)
+logpdf(flow, y)         # uses inverse of `b`
+```
+
+Similarily to the multivariate ADVI example, we could use `Stacked` to get a _bounded_ flow:
+
+```@example normalizing-flows
+d = MvNormal(zeros(2), ones(2));
+ibs = inverse.(bijector.((InverseGamma(2, 3), Beta())));
+sb = Stacked(ibs) # == Stacked(ibs, [i:i for i = 1:length(ibs)]
+b = sb ∘ PlanarLayer(2)
+td = transformed(d, b);
+y = rand(rng, td)
+```
+
+(As required, we have that `0 < y[1]` and `0 ≤ y[2] ≤ 1`.)
+
+To fit the flow, we can define an objective function that computes the negative log-likelihood of some data.
+We will need to use automatic differentiation to compute gradients of the objective with respect to the parameters.
+Since most AD packages require vectorised inputs, this means we also need a way to convert between the vectorised parameters and the `PlanarLayer` struct.
+
+```@example normalizing-flows
+using ForwardDiff
+
+# Construct the flow.
+b = PlanarLayer(2)
+
+# Obtain a vectorised version of the parameters.
+xs_init = vcat(b.w, b.u, b.b)
+
+# Function to reconstruct the `PlanarLayer` from vectorised parameters.
+function reconstruct_planarlayer(xs::AbstractVector)
+    dim = 2
+    w = xs[1:dim]
+    u = xs[(dim + 1):(2 * dim)]
+    b = xs[end:end]
+    return PlanarLayer(w, u, b)
+end
+
+# Check that the reconstruction does work...
+reconstruct_planarlayer(xs_init) == b
+```
+
+Here is the objective function:
+
+```@example normalizing-flows
+# Make the objective a `struct` to avoid capturing global variables.
+struct NLLObjective{R,D,T}
+    reconstruct::R
+    basedist::D
+    data::T
+end
+
+function (obj::NLLObjective)(xs::AbstractVector)
+    transformed_dist = transformed(obj.basedist, obj.reconstruct(xs))
+    return -sum(Base.Fix1(logpdf, transformed_dist), eachcol(obj.data))
+end
+
+# Some random data to estimate the density of.
+xs = randn(2, 1000)
+
+# Construct the objective.
+f = NLLObjective(reconstruct_planarlayer, MvNormal(2, 1), xs)
+
+println("Initial loss = $(f(xs_init)) at xs_init = $(xs_init)")
+```
+
+Now we can train the flow using gradient descent:
+
+```@example normalizing-flows
+using ForwardDiff: ForwardDiff
+
+function train(xs_init, niters; stepsize=1e-3)
+    xs = xs_init
+    for i in 1:niters
+        grad = ForwardDiff.gradient(f, xs)
+        @. xs = xs - (stepsize * grad)
+    end
+    return xs
+end
+xs_trained = train(xs_init, 1000)
+
+println("Final loss = $(f(xs_trained)) at xs_trained = $(xs_trained)")
+```
+
+Finally, we can sample from the trained flow and check that the samples have approximately zero mean and identity covariance (as expected given that our data was sampled using `randn`):
+
+```@example normalizing-flows
+samples = rand(transformed(f.basedist, f.reconstruct(xs_trained)), 1000);
+
+# mean ≈ [0, 0], cov ≈ I
+mean(eachcol(samples)), cov(samples; dims=2)
+```
+
+More complex flows can be created by composing multiple layers, e.g. `PlanarLayer(10) ∘ PlanarLayer(10) ∘ RadialLayer(10)`.
diff --git a/docs/src/index.md b/docs/src/index.md
index 78b78331..a6e8fc1b 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,42 +1,33 @@
 # Bijectors.jl
 
-This package implements a set of functions for transforming constrained random variables (e.g. simplexes, intervals) to Euclidean space.
-The 3 main functions implemented in this package are the `link`, `invlink` and `logpdf_with_trans` for a number of distributions.
+This package implements functionality for transforming random variables to Euclidean space (and back).
 
-```@docs
-Bijectors.link
-Bijectors.invlink
-Bijectors.logpdf_with_trans
+For example, consider a random variable $X \sim \mathrm{Beta}(2, 2)$, which has support on $(0, 1)$:
+
+```@example main
+using Bijectors
+
+x = rand(Beta(2, 2))
+```
+
+In this case, the [logit function](https://en.wikipedia.org/wiki/Logit) is used as the transformation:
+
+$Y = \mathrm{logit}(X) = \log\left(\frac{X}{1 - X}\right).$
+
+We can construct this function
+
+```@example main
+b = bijector(Beta(2, 2))
+```
+
+and apply it to `x`:
+
+```@example main
+y = b(x)
 ```
 
-The distributions supported are:
-
- 1. `RealDistribution`: `Union{Cauchy, Gumbel, Laplace, Logistic, NoncentralT, Normal, NormalCanon, TDist}`,
- 2. `PositiveDistribution`: `Union{BetaPrime, Chi, Chisq, Erlang, Exponential, FDist, Frechet, Gamma, InverseGamma, InverseGaussian, Kolmogorov, LogNormal, NoncentralChisq, NoncentralF, Rayleigh, Weibull}`,
- 3. `UnitDistribution`: `Union{Beta, KSOneSided, NoncentralBeta}`,
- 4. `SimplexDistribution`: `Union{Dirichlet}`,
- 5. `PDMatDistribution`: `Union{InverseWishart, Wishart}`, and
- 6. `TransformDistribution`: `Union{T, Truncated{T}} where T<:ContinuousUnivariateDistribution`.
-
-All exported names from the [Distributions.jl](https://juliastats.org/Distributions.jl/stable/) package are reexported from `Bijectors`.
-
-Bijectors.jl also provides a nice interface for working with these maps: composition, inversion, etc.
-The following table lists mathematical operations for a bijector and the corresponding code in Bijectors.jl.
-
-| Operation                                   | Method                          | Automatic |
-|:-------------------------------------------:|:-------------------------------:|:---------:|
-| `b ↦ b⁻¹`                                   | `inverse(b)`                    | ✓         |
-| `(b₁, b₂) ↦ (b₁ ∘ b₂)`                      | `b₁ ∘ b₂`                       | ✓         |
-| `(b₁, b₂) ↦ [b₁, b₂]`                       | `Stacked(b₁, b₂)`               | ✓         |
-| `x ↦ b(x)`                                  | `b(x)`                          | ×         |
-| `y ↦ b⁻¹(y)`                                | `inverse(b)(y)`                 | ×         |
-| `x ↦ log｜det J(b, x)｜`                      | `logabsdetjac(b, x)`            | AD        |
-| `x ↦ b(x), log｜det J(b, x)｜`                | `with_logabsdet_jacobian(b, x)` | ✓         |
-| `p ↦ q := b_* p`                            | `q = transformed(p, b)`         | ✓         |
-| `y ∼ q`                                     | `y = rand(q)`                   | ✓         |
-| `p ↦ b` such that `support(b_* p) = ℝᵈ`     | `bijector(p)`                   | ✓         |
-| `(x ∼ p, b(x), log｜det J(b, x)｜, log q(y))` | `forward(q)`                    | ✓         |
-
-In this table, `b` denotes a `Bijector`, `J(b, x)` denotes the Jacobian of `b` evaluated at `x`, `b_*` denotes the [push-forward](https://www.wikiwand.com/en/Pushforward_measure) of `p` by `b`, and `x ∼ p` denotes `x` sampled from the distribution with density `p`.
-
-The "Automatic" column in the table refers to whether or not you are required to implement the feature for a custom `Bijector`. "AD" refers to the fact that it can be implemented "automatically" using automatic differentiation.
+You can also obtain the log absolute determinant of the Jacobian of the transformation:
+
+```@example main
+y, ladj = with_logabsdet_jacobian(b, x)
+```
diff --git a/docs/src/interface.md b/docs/src/interface.md
new file mode 100644
index 00000000..b36688ea
--- /dev/null
+++ b/docs/src/interface.md
@@ -0,0 +1,67 @@
+# Interface
+
+This page describes the user-facing interface of Bijectors.jl.
+You should be able to use all the functions documented here with any bijector defined in Bijectors.jl.
+
+## Transformation
+
+```@docs
+transform
+transform!
+```
+
+Bijectors are also callable objects, so `b(x)` is equivalent to `transform(b, x)`.
+
+## Inverses
+
+```@docs
+inverse
+```
+
+## Log-absolute determinant of the Jacobian
+
+```@docs
+logabsdetjac
+logabsdetjac!
+logabsdetjacinv
+with_logabsdet_jacobian
+with_logabsdet_jacobian!
+```
+
+## Transform wrappers
+
+### Elementwise transformation
+
+Some transformations are well-defined for different types of inputs, e.g. `exp` can also act elementwise on an `N`-dimensional `Array{<:Real,N}`.
+To specify that a transformation should act elementwise, we can wrap it in the `elementwise` wrapper:
+
+```@docs
+Bijectors.elementwise
+```
+
+### Columnwise transformation
+
+Likewise:
+
+```@docs
+Bijectors.columnwise
+```
+
+## Working with distributions
+
+```@docs
+Bijectors.bijector
+Bijectors.link
+Bijectors.invlink
+Bijectors.logpdf_with_trans
+Bijectors.output_size
+Bijectors.transformed(d::Distribution, b::Bijector)
+Bijectors.ordered
+```
+
+## Utilities
+
+```@docs
+Bijectors.isinvertible
+Bijectors.isclosedform(t::Bijectors.Transform)
+```
diff --git a/docs/src/transforms.md b/docs/src/transforms.md
deleted file mode 100644
index bf09ed18..00000000
--- a/docs/src/transforms.md
+++ /dev/null
@@ -1,106 +0,0 @@
-## Usage
-
-A very simple example of a "bijector"/diffeomorphism, i.e. a differentiable transformation with a differentiable inverse, is the `exp` function:
-
-  - The inverse of `exp` is `log`.
-  - The derivative of `exp` at an input `x` is simply `exp(x)`, hence `logabsdetjac` is simply `x`.
-
-```@repl usage
-using Bijectors
-transform(exp, 1.0)
-logabsdetjac(exp, 1.0)
-with_logabsdet_jacobian(exp, 1.0)
-```
-
-Some transformations are well-defined for different types of inputs, e.g. `exp` can also act elementwise on an `N`-dimensional `Array{<:Real,N}`.
-To specify that a transformation should act elementwise, we use the [`elementwise`](@ref) method:
-
-```@repl usage
-x = ones(2, 2)
-transform(elementwise(exp), x)
-logabsdetjac(elementwise(exp), x)
-with_logabsdet_jacobian(elementwise(exp), x)
-```
-
-These methods also work nicely for compositions of transformations:
-
-```@repl usage
-transform(elementwise(log ∘ exp), x)
-```
-
-Unlike `exp`, some transformations have parameters affecting the resulting transformation they represent, e.g. `Logit` has two parameters `a` and `b` representing the lower- and upper-bound, respectively, of its domain:
-
-```@repl usage
-using Bijectors: Logit
-
-f = Logit(0.0, 1.0)
-f(rand()) # takes us from `(0, 1)` to `(-∞, ∞)`
-```
-
-## User-facing methods
-
-Without mutation:
-
-```@docs
-transform
-logabsdetjac
-logabsdetjacinv
-inverse
-```
-
-```julia
-with_logabsdet_jacobian
-```
-
-With mutation:
-
-```@docs
-transform!
-logabsdetjac!
-with_logabsdet_jacobian!
-```
-
-## Implementing a transformation
-
-Any callable can be made into a bijector by providing an implementation of `ChangeOfVariables.with_logabsdet_jacobian(b, x)`.
-
-You can also optionally implement [`transform`](@ref) and [`logabsdetjac`](@ref) to avoid redundant computations. This is usually only worth it if you expect `transform` or `logabsdetjac` to be used heavily without the other.
-
-Similarly with the mutable versions [`with_logabsdet_jacobian!`](@ref), [`transform!`](@ref), and [`logabsdetjac!`](@ref).
-
-## Working with Distributions.jl
-
-```@docs
-Bijectors.bijector
-Bijectors.transformed(d::Distribution, b::Bijector)
-```
-
-## Utilities
-
-```@docs
-Bijectors.output_size
-Bijectors.elementwise
-Bijectors.isinvertible
-Bijectors.isclosedform(t::Bijectors.Transform)
-```
-
-## API
-
-```@docs
-Bijectors.Transform
-Bijectors.Bijector
-Bijectors.Inverse
-```
-
-## Bijectors
-
-```@docs
-Bijectors.CorrBijector
-Bijectors.LeakyReLU
-Bijectors.Stacked
-Bijectors.RationalQuadraticSpline
-Bijectors.Coupling
-Bijectors.OrderedBijector
-Bijectors.NamedTransform
-Bijectors.NamedCoupling
-```
diff --git a/docs/src/types.md b/docs/src/types.md
new file mode 100644
index 00000000..eeb14108
--- /dev/null
+++ b/docs/src/types.md
@@ -0,0 +1,24 @@
+# Types API
+
+This page includes docstrings for some types defined in Bijectors.
+
+## General types
+
+```@docs
+Bijectors.Transform
+Bijectors.Bijector
+Bijectors.Inverse
+```
+
+## Specific bijectors
+
+```@docs
+Bijectors.CorrBijector
+Bijectors.LeakyReLU
+Bijectors.Stacked
+Bijectors.RationalQuadraticSpline
+Bijectors.Coupling
+Bijectors.OrderedBijector
+Bijectors.NamedTransform
+Bijectors.NamedCoupling
+```
diff --git a/src/Bijectors.jl b/src/Bijectors.jl
index c98df676..e277affb 100644
--- a/src/Bijectors.jl
+++ b/src/Bijectors.jl
@@ -52,9 +52,11 @@ export TransformDistribution,
     UnitDistribution,
     SimplexDistribution,
     PDMatDistribution,
+    # basic interface
     link,
     invlink,
     logpdf_with_trans,
+    isinvertible,
     isclosedform,
     transform,
     transform!,
@@ -64,19 +66,25 @@ export TransformDistribution,
     logabsdetjac,
     logabsdetjac!,
     logabsdetjacinv,
-    Bijector,
-    Inverse,
-    Stacked,
     bijector,
     transformed,
+    # types and specific bijectors
+    Bijector,
     UnivariateTransformed,
     MultivariateTransformed,
+    Inverse,
+    # specific bijectors
+    Stacked,
     PlanarLayer,
     RadialLayer,
     Coupling,
     InvertibleBatchNorm,
+    # transform wrappers
     elementwise,
-    output_size
+    columnwise,
+    # utilities
+    output_size,
+    ordered
 
 const DEBUG = Bool(parse(Int, get(ENV, "DEBUG_BIJECTORS", "0")))
 _debug(str) = @debug str
diff --git a/src/interface.jl b/src/interface.jl
index 3531e0e0..620ec071 100644
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -12,20 +12,61 @@ Alias for `Base.Fix1(broadcast, f)`.
 In the case where `f::ComposedFunction`, the result is
 `Base.Fix1(broadcast, f.outer) ∘ Base.Fix1(broadcast, f.inner)` rather than
 `Base.Fix1(broadcast, f)`.
+
+# Examples
+
+```jldoctest; setup = :(using Bijectors)
+julia> x = [1.0, 2.0, 3.0];
+
+julia> f = elementwise(exp);
+
+julia> f(x)
+3-element Vector{Float64}:
+  2.718281828459045
+  7.38905609893065
+ 20.085536923187668
+
+julia> with_logabsdet_jacobian(f, x)
+([2.718281828459045, 7.38905609893065, 20.085536923187668], 6.0)
+```
 """
 elementwise(f) = Base.Fix1(broadcast, f)
-elementwise(f::typeof(identity)) = identity
+elementwise(::typeof(identity)) = identity
 # TODO: This is makes dispatching quite a bit easier, but uncertain if this is really
 # the way to go.
 function elementwise(f::ComposedFunction)
     return ComposedFunction(elementwise(f.outer), elementwise(f.inner))
 end
+
 const Columnwise{F} = Base.Fix1{typeof(eachcolmaphcat),F}
 """
+    columnwise(f)
 
 Alias for `Base.Fix1(eachcolmaphcat, f)`.
 
 Represents a function `f` which is applied to each column of an input.
+
+# Examples
+
+```jldoctest; setup = :(using Bijectors)
+julia> x = [4.0 5.0 6.0; 1.0 2.0 3.0];
+
+julia> my_reverse(v) = reverse(v);  # To avoid type piracy.
+
+julia> f = columnwise(my_reverse);
+
+julia> f(x)
+2×3 Matrix{Float64}:
+ 1.0  2.0  3.0
+ 4.0  5.0  6.0
+
+julia> # We can't use `with_logabsdet_jacobian` on `f` until we define it
+       # for `my_reverse`, since we need to sum over columns.
+       Bijectors.with_logabsdet_jacobian(::typeof(my_reverse), xs) = my_reverse(xs), 0.0;
+
+julia> with_logabsdet_jacobian(f, x)
+([1.0 2.0 3.0; 4.0 5.0 6.0], 0.0)
+```
 """
 columnwise(f) = Base.Fix1(eachcolmaphcat, f)
 inverse(f::Columnwise) = columnwise(inverse(f.x))
@@ -39,19 +80,19 @@ with_logabsdet_jacobian(f::Columnwise, x::AbstractMatrix) = (f(x), logabsdetjac(
 """
     output_size(f, sz)
 
-Returns the output size of `f` given the input size `sz`.
+Returns the size of `f(x)` when given an input `x` of size `sz`.
 """
 output_size(f, sz) = sz
 output_size(f::ComposedFunction, sz) = output_size(f.outer, output_size(f.inner, sz))
 """
     output_size(f, dist::Distribution)
 
-Returns the output size of `f` given the input distribution `dist`. This is useful when
-Base.size(dist) is not defined, e.g. for `ProductNamedTupleDistribution` and in particular
-is used by DynamicPPL when generating new random values for transformed distributions.
+Returns the output size of `f` given an input drawn from the distribution `dist`.
 
 By default this just calls `output_size(f, size(dist))`, but this can be overloaded for
-specific distributions.
+specific distributions. This is useful when `Base.size(dist)` is not defined, e.g. for
+`ProductNamedTupleDistribution` and in particular is used by DynamicPPL when generating new
+random values for transformed distributions.
 """
 output_size(f, dist::Distribution) = output_size(f, size(dist))
 output_size(f::ComposedFunction, dist::Distribution) = output_size(f, size(dist))
@@ -59,7 +100,7 @@ output_size(f::ComposedFunction, dist::Distribution) = output_size(f, size(dist)
 """
     output_length(f, len::Int)
 
-Returns the output length of `f` given the input length `len`.
+Returns the length of `f(x)` given a vector input `x` of length `len`.
 """
 output_length(f, len::Int) = only(output_size(f, (len,)))
 
@@ -93,12 +134,24 @@ abstract type Transform end
 
 (t::Transform)(x) = transform(t, x)
 
+"""
+    with_logabsdet_jacobian(t::Transform, x)
+
+Semantically, this must return a tuple of `(y, logabsdetjac)`, where `y = transform(t, x)`
+and `logabsdetjac = logabsdetjac(t, x)`. However, you can implement this function to exploit
+shared computation between the two quantities.
+"""
+function with_logabsdet_jacobian end
+
 Broadcast.broadcastable(b::Transform) = Ref(b)
 
 """
     transform(b, x)
 
-Transform `x` using `b`, treating `x` as a single input.
+Transform `x` using `b`.
+
+If `with_logabsdet_jacobian` is already implemented for `b`, the default implementation of
+`transform` will call `first(with_logabsdet_jacobian(b, x))`.
 """
 transform(f::F, x) where {F<:Function} = f(x)
 function transform(t::Transform, x)