pydata · scottcha · Nov 17, 2019 · Nov 17, 2019 · Nov 18, 2019 · Nov 18, 2019
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -100,6 +100,8 @@ Bug fixes
   (:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
 - Allow appending datetime and bool data variables to zarr stores.
   (:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa/>`_.
+- Make :py:func:`~xarray.concat` more robust when merging variables present in some datasets but
+  not others (:issue:`508`). By `Scott Chamberlin <http://github.com/scottcha>`_.
 
 Documentation
 ~~~~~~~~~~~~~

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
@@ -1,7 +1,9 @@
 import pandas as pd
+import numpy as np
 
 from . import dtypes, utils
 from .alignment import align
+from .common import full_like
 from .duck_array_ops import lazy_array_equiv
 from .merge import _VALID_COMPAT, unique_variable
 from .variable import IndexVariable, Variable, as_variable
@@ -370,10 +372,22 @@ def ensure_common_dims(vars):
     # n.b. this loop preserves variable order, needed for groupby.
     for k in datasets[0].variables:
         if k in concat_over:
-            try:
-                vars = ensure_common_dims([ds.variables[k] for ds in datasets])
-            except KeyError:
-                raise ValueError("%r is not present in all datasets." % k)
+            variables = []
+            for ds in datasets:
+                # if one of the variables doesn't exist find one which does
+                # and use it to create a fill value
+                if k not in ds.variables:
+                    for ds in datasets:
+                        if k in ds.variables:
+                            # found one to use as a fill value, fill with np.nan
+                            filled = full_like(
+                                ds.variables[k], fill_value=np.nan, dtype=np.double
+                            )
+                            break
+                    variables.append(filled)
+                else:
+                    variables.append(ds.variables[k])
+            vars = ensure_common_dims(variables)
             combined = concat_vars(vars, dim, positions)
             assert isinstance(combined, Variable)
             result_vars[k] = combined

diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
@@ -742,10 +742,16 @@ def test_auto_combine(self):
             Dataset({"x": ("a", [0]), "y": ("a", [0])}),
             Dataset({"y": ("a", [1]), "x": ("a", [1])}),
         ]
+
         actual = auto_combine(objs)
         expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])})
         assert_identical(expected, actual)
 
+        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
+        actual = auto_combine(objs)
+        expected = Dataset({"x": [0], "y": [0, np.nan]})
+        assert_identical(expected, actual)
+
         objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})]
         with raises_regex(ValueError, "too many .* dimensions"):
             auto_combine(objs)
@@ -754,10 +760,6 @@ def test_auto_combine(self):
         with raises_regex(ValueError, "cannot infer dimension"):
             auto_combine(objs)
 
-        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
-        with raises_regex(ValueError, "'y' is not present in all datasets"):
-            auto_combine(objs)
-
     def test_auto_combine_previously_failed(self):
         # In the above scenario, one file is missing, containing the data for
         # one year's data for one variable.

diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
@@ -35,17 +35,28 @@ def test_concat_compat():
         },
         coords={"x": [0, 1], "y": [1], "z": [-1, -2], "q": [0]},
     )
-
+    ds_concat = Dataset(
+        {
+            "has_x_y": (
+                ("q", "y", "x"),
+                [[[np.nan, np.nan], [3, 4]], [[1, 2], [np.nan, np.nan]]],
+            ),
+            "has_x": (("q", "x"), [[1, 2], [1, 2]]),
+            "no_x_y": (("q", "z"), [[1, 2], [1, 2]]),
+        },
+        coords={"x": [0, 1], "y": [0, 1], "z": [-1, -2], "q": [0, np.nan]},
+    )
     result = concat([ds1, ds2], dim="y", data_vars="minimal", compat="broadcast_equals")
     assert_equal(ds2.no_x_y, result.no_x_y.transpose())
 
     for var in ["has_x", "no_x_y"]:
         assert "y" not in result[var]
 
+    result2 = concat([ds2, ds1], dim="q")
+    assert_equal(ds_concat, result2)
+
     with raises_regex(ValueError, "coordinates in some datasets but not others"):
         concat([ds1, ds2], dim="q")
-    with raises_regex(ValueError, "'q' is not present in all datasets"):
-        concat([ds2, ds1], dim="q")
 
 
 class TestConcatDataset: