From 649dd15e65e26af1710e20ee3755db20bdf103cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Thu, 7 Aug 2025 22:32:54 +0200
Subject: [PATCH 1/6] Update _base.py

---
 skpro/distributions/base/_base.py | 93 ++++++++++++++++++++++++++++++-
 1 file changed, 91 insertions(+), 2 deletions(-)

diff --git a/skpro/distributions/base/_base.py b/skpro/distributions/base/_base.py
index 16afaeddb..bb21cbced 100644
--- a/skpro/distributions/base/_base.py
+++ b/skpro/distributions/base/_base.py
@@ -236,15 +236,49 @@ def _loc(self, rowidx=None, colidx=None):
             colidx = pd.Index([colidx])
 
         if rowidx is not None:
-            row_iloc = self.index.get_indexer_for(rowidx)
+            row_iloc = self._get_indexer_like_pandas(self.index, rowidx)
         else:
             row_iloc = None
         if colidx is not None:
-            col_iloc = self.columns.get_indexer_for(colidx)
+            col_iloc = self._get_indexer_like_pandas(self.columns, colidx)
         else:
             col_iloc = None
         return self._iloc(rowidx=row_iloc, colidx=col_iloc)
 
+    def _get_indexer_like_pandas(self, index, keys):
+        """
+        A unified helper that mimics pandas' get_indexer_for but supports:
+
+        - scalar key (e.g., "a", ("a", 1))
+        - tuple key (partial or full)
+        - list of keys (partial or full)
+        - works for both Index and MultiIndex
+
+        Returns:
+            np.ndarray of positions (integers)
+        """
+        if is_scalar_notnone(keys) or isinstance(keys, tuple):
+            keys = [keys]
+
+        if isinstance(index, pd.MultiIndex):
+            # Use get_locs for each key (full or partial)
+            ilocs = []
+            for key in keys:
+                if isinstance(key, slice):
+                    ilocs.append(index.slice_indexer(key.start, key.stop, key.step))
+                else:
+                    iloc = index.get_locs([key])
+                    if isinstance(iloc, slice):
+                        iloc = np.arange(len(index))[iloc]
+                    ilocs.append(iloc)
+            return np.concatenate(ilocs) if ilocs else np.array([], dtype=int)
+        # if not isinstance(index, pd.MultiIndex):
+        # Regular Index
+        if isinstance(keys, slice):
+            return np.arange(len(index))[index.slice_indexer(keys.start, keys.stop, keys.step)]
+        return index.get_indexer(keys)
+
+
     def _at(self, rowidx=None, colidx=None):
         if rowidx is not None:
             row_iloc = self.index.get_indexer_for([rowidx])[0]
@@ -772,6 +806,61 @@ def _log_pdf(self, x):
 
         raise NotImplementedError(self._method_error_msg("log_pdf", "error"))
 
+    def pdfj(self, x):
+        r"""Probability density function.
+
+        Let :math:`X` be a random variables with the distribution of ``self``,
+        taking values in ``(N, n)`` ``DataFrame``-s
+        Let :math:`x\in \mathbb{R}^{N\times n}`.
+        By :math:`p_{X_{ij}}`, denote the marginal pdf of :math:`X` at the
+        :math:`(i,j)`-th entry.
+
+        The output of this method, for input ``x`` representing :math:`x`,
+        is a ``DataFrame`` with same columns and indices as ``self``,
+        and entries :math:`p_{X_{ij}}(x_{ij})`.
+
+        If ``self`` has a mixed or discrete distribution, this returns
+        the weighted continuous part of `self`'s distribution instead of the pdf,
+        i.e., the marginal pdf integrate to the weight of the continuous part.
+
+        Parameters
+        ----------
+        x : ``pandas.DataFrame`` or 2D ``np.ndarray``
+            representing :math:`x`, as above
+
+        Returns
+        -------
+        ``pd.DataFrame`` with same columns and index as ``self``
+            containing :math:`p_{X_{ij}}(x_{ij})`, as above
+        """
+        distr_type = self.get_tag("distr:measuretype", "mixed", raise_error=False)
+        if distr_type == "discrete":
+            return self._coerce_to_self_index_df(0, flatten=False)
+
+        return self._boilerplate("_pdf", x=x)
+
+    def _pdf(self, x):
+        """Probability density function.
+
+        Private method, to be implemented by subclasses.
+        """
+        self_has_logpdf = self._has_implementation_of("log_pdf")
+        self_has_logpdf = self_has_logpdf or self._has_implementation_of("_log_pdf")
+        if self_has_logpdf:
+            approx_method = (
+                "by exponentiating the output returned by the log_pdf method, "
+                "this may be numerically unstable"
+            )
+            warn(self._method_error_msg("pdf", fill_in=approx_method))
+
+            x = self._coerce_to_self_index_df(x, flatten=False)
+            res = self.log_pdf(x=x)
+            if isinstance(res, pd.DataFrame):
+                res = res.values
+            return np.exp(res)
+
+        raise NotImplementedError(self._method_error_msg("pdf", "error"))
+
     def pmf(self, x):
         r"""Probability mass function.
 

From f062c5a60183cd107219ca95ce3c96f81dcf4380 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Thu, 7 Aug 2025 22:33:11 +0200
Subject: [PATCH 2/6] Update test_all_distrs.py

---
 skpro/distributions/tests/test_all_distrs.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/skpro/distributions/tests/test_all_distrs.py b/skpro/distributions/tests/test_all_distrs.py
index 1fed1c5c9..23d3e191e 100644
--- a/skpro/distributions/tests/test_all_distrs.py
+++ b/skpro/distributions/tests/test_all_distrs.py
@@ -50,10 +50,12 @@ def _has_capability(distr, method):
 
 METHODS_SCALAR = ["mean", "var", "energy"]
 METHODS_SCALAR_POS = ["var", "energy"]  # result always non-negative?
-METHODS_X = ["energy", "pdf", "log_pdf", "pmf", "log_pmf", "cdf"]
-METHODS_X_POS = ["energy", "pdf", "pmf", "cdf", "surv", "haz"]  # result non-negative?
+METHODS_X = ["energy", "pdf", "log_pdf", "pmf", "log_pmf", "cdf", "pdfj"]
+METHODS_X_POS = [
+    "energy", "pdf", "pmf", "cdf", "surv", "haz", "pdfj"
+]  # result non-negative?
 METHODS_P = ["ppf"]
-METHODS_ROWWISE = ["energy"]  # results in one column
+METHODS_ROWWISE = ["energy", "pdfj"]  # results in one column
 
 
 class TestAllDistributions(PackageConfig, DistributionFixtureGenerator, QuickTester):

From f407d2db6593321eab4d71208a4be34a1f5ebea1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Thu, 7 Aug 2025 22:33:30 +0200
Subject: [PATCH 3/6] revert

---
 skpro/distributions/base/_base.py | 38 ++-----------------------------
 1 file changed, 2 insertions(+), 36 deletions(-)

diff --git a/skpro/distributions/base/_base.py b/skpro/distributions/base/_base.py
index bb21cbced..48ad5981f 100644
--- a/skpro/distributions/base/_base.py
+++ b/skpro/distributions/base/_base.py
@@ -236,49 +236,15 @@ def _loc(self, rowidx=None, colidx=None):
             colidx = pd.Index([colidx])
 
         if rowidx is not None:
-            row_iloc = self._get_indexer_like_pandas(self.index, rowidx)
+            row_iloc = self.index.get_indexer_for(rowidx)
         else:
             row_iloc = None
         if colidx is not None:
-            col_iloc = self._get_indexer_like_pandas(self.columns, colidx)
+            col_iloc = self.columns.get_indexer_for(colidx)
         else:
             col_iloc = None
         return self._iloc(rowidx=row_iloc, colidx=col_iloc)
 
-    def _get_indexer_like_pandas(self, index, keys):
-        """
-        A unified helper that mimics pandas' get_indexer_for but supports:
-
-        - scalar key (e.g., "a", ("a", 1))
-        - tuple key (partial or full)
-        - list of keys (partial or full)
-        - works for both Index and MultiIndex
-
-        Returns:
-            np.ndarray of positions (integers)
-        """
-        if is_scalar_notnone(keys) or isinstance(keys, tuple):
-            keys = [keys]
-
-        if isinstance(index, pd.MultiIndex):
-            # Use get_locs for each key (full or partial)
-            ilocs = []
-            for key in keys:
-                if isinstance(key, slice):
-                    ilocs.append(index.slice_indexer(key.start, key.stop, key.step))
-                else:
-                    iloc = index.get_locs([key])
-                    if isinstance(iloc, slice):
-                        iloc = np.arange(len(index))[iloc]
-                    ilocs.append(iloc)
-            return np.concatenate(ilocs) if ilocs else np.array([], dtype=int)
-        # if not isinstance(index, pd.MultiIndex):
-        # Regular Index
-        if isinstance(keys, slice):
-            return np.arange(len(index))[index.slice_indexer(keys.start, keys.stop, keys.step)]
-        return index.get_indexer(keys)
-
-
     def _at(self, rowidx=None, colidx=None):
         if rowidx is not None:
             row_iloc = self.index.get_indexer_for([rowidx])[0]

From 4630d831da7574535bb5b53343cc2d14bb050d38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sat, 1 Nov 2025 15:37:01 +0100
Subject: [PATCH 4/6] Update _base.py

---
 skpro/distributions/base/_base.py | 75 ++++++++++++++++---------------
 1 file changed, 38 insertions(+), 37 deletions(-)

diff --git a/skpro/distributions/base/_base.py b/skpro/distributions/base/_base.py
index eac60c764..791dade9f 100644
--- a/skpro/distributions/base/_base.py
+++ b/skpro/distributions/base/_base.py
@@ -699,7 +699,7 @@ def _boilerplate(self, method, columns=None, **kwargs):
             res = res[()]
         return res
 
-    def pdf(self, x):
+    def pdf(self, x, axis=None):
         r"""Probability density function.
 
         Let :math:`X` be a random variables with the distribution of ``self``,
@@ -713,18 +713,52 @@ def pdf(self, x):
         and entries :math:`p_{X_{ij}}(x_{ij})`.
 
         If ``self`` has a mixed or discrete distribution, this returns
-        the weighted continuous part of `self`'s distribution instead of the pdf,
+        the weighted continuous part of ``self``'s distribution instead of the pdf,
         i.e., the marginal pdf integrate to the weight of the continuous part.
 
+        Joint pdfs can be obtained by specifying the ``axis`` argument:
+
+        * ``axis=0`` : joint pdf along rows.
+          Result is a single-row ``DataFrame`` corresponding to
+          :math:`p_{X_{\cdot j}}(x_{\cdot j})`, where :math:`X_{\cdot j}` is the
+          random variable corresponding to the :math:`j`-th column of :math:`X`,
+          :math:`x_{\cdot j}` is the :math:`j`-th column of :math:`x`,
+          and :math:`p_{X_{\cdot j}}` is the joint pdf of :math:`X_{\cdot j}`.
+        * ``axis=1`` : joint pdf along columns.
+          Result is a single-column ``DataFrame`` corresponding to
+          :math:`p_{X_{i \cdot}}(x_{i \cdot})`, where :math:`X_{i \cdot}` is the
+          random variable corresponding to the :math:`i`-th row of :math:`X`,
+          :math:`x_{i \cdot}` is the :math:`i`-th row of :math:`x`,
+        * ``axis=(0, 1)`` : joint pdf along rows and columns.
+          Result is a single scalar value, corresponding to
+          :math:`p_{X}(x)`, where :math:`p_{X}` is the joint pdf of :math:`X`.
+
         Parameters
         ----------
         x : ``pandas.DataFrame`` or 2D ``np.ndarray``
             representing :math:`x`, as above
+        axis : None or tuple of int, default=None
+            Axes or axis along which the pdf is joint:
+
+            * None : marginal pdfs are returned (default).
+              Result has same shape as ``self`` and same index and columns.
+            * 0 : joint pdf along rows, result has one row and same columns as ``self``.
+            * 1 : joint pdf along columns,
+              result has one column and same index as ``self``.
+            * (0, 1) : joint pdf along rows and columns,
+              result is a single scalar, a numpy float.
 
         Returns
         -------
-        ``pd.DataFrame`` with same columns and index as ``self``
-            containing :math:`p_{X_{ij}}(x_{ij})`, as above
+        ``pd.DataFrame``
+            with same columns and index as ``self`` at default (``axis=None``),
+            containing :math:`p_{X_{ij}}(x_{ij})`, as above.
+
+            * if ``axis=0``, single-row ``DataFrame`` with joint pdfs along rows,
+              columns same as ``self``, row index is ``[0]``
+            * if ``axis=1``, single-column ``DataFrame`` with joint pdfs along columns
+              index same as ``self``, column name is ``'pdf'``
+            * if ``axis=(0, 1)``, single scalar value, a numpy float
         """
         distr_type = self.get_tag("distr:measuretype", "mixed", raise_error=False)
         if distr_type == "discrete":
@@ -833,39 +867,6 @@ def _log_pdf(self, x):
 
         raise NotImplementedError(self._method_error_msg("log_pdf", "error"))
 
-    def pdfj(self, x):
-        r"""Probability density function.
-
-        Let :math:`X` be a random variables with the distribution of ``self``,
-        taking values in ``(N, n)`` ``DataFrame``-s
-        Let :math:`x\in \mathbb{R}^{N\times n}`.
-        By :math:`p_{X_{i}}`, denote the marginal pdf of :math:`X` at the
-        :math:`i)`-th row.
-
-        The output of this method, for input ``x`` representing :math:`x`,
-        is a ``DataFrame`` with same indices as ``self``, a single column ``'pdf'``,
-        and entries :math:`p_{X_{i}}(x_{i})`.
-
-        If ``self`` has a mixed or discrete distribution, this returns
-        the weighted continuous part of `self`'s distribution instead of the pdf,
-        i.e., the marginal pdf integrated to the weight of the continuous part.
-
-        Parameters
-        ----------
-        x : ``pandas.DataFrame`` or 2D ``np.ndarray``
-            representing :math:`x`, as above
-
-        Returns
-        -------
-        ``pd.DataFrame`` with same index as ``self`` and single column ``'pdf'``,
-            containing :math:`p_{X_{i}}(x_{i})`, as above
-        """
-        distr_type = self.get_tag("distr:measuretype", "mixed", raise_error=False)
-        if distr_type == "discrete":
-            return self._coerce_to_self_index_df(0, flatten=False)
-
-        return self._boilerplate("_jpdf", x=x)
-
     @staticmethod
     def _approx_derivative(x, fun, h=1e-7):
         """Approximate the derivative of the log PDF using finite differences.

From 778359afe074270bd67e15f0eacfb514e0e235fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sat, 1 Nov 2025 16:10:00 +0100
Subject: [PATCH 5/6] Update _base.py

---
 skpro/distributions/base/_base.py | 54 ++++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 4 deletions(-)

diff --git a/skpro/distributions/base/_base.py b/skpro/distributions/base/_base.py
index 791dade9f..8e0ee17af 100644
--- a/skpro/distributions/base/_base.py
+++ b/skpro/distributions/base/_base.py
@@ -26,6 +26,9 @@ class BaseDistribution(BaseObject):
         # -------------
         "distr:measuretype": "mixed",  # distribution type, mixed, continuous, discrete
         "distr:paramtype": "general",
+        "property:multivariate": False,  # whether distribution is multivariate
+        "property:indep_axes": (0, 1),  # axes along which distr is independent
+        #
         # parameterization type - parametric, nonparametric, composite
         #
         # default parameter settings for MC estimates
@@ -679,6 +682,8 @@ def _boilerplate(self, method, columns=None, **kwargs):
                 x_inner = x.values
             # else, coerce to a numpy array if needed
             # then, broadcast it to the shape of self
+            if k == "axis":
+                x_inner = _coerce_to_tuple(x)
             else:
                 x_inner = self._coerce_to_self_index_np(x, flatten=False)
             kwargs_inner[k] = x_inner
@@ -729,7 +734,7 @@ def pdf(self, x, axis=None):
           :math:`p_{X_{i \cdot}}(x_{i \cdot})`, where :math:`X_{i \cdot}` is the
           random variable corresponding to the :math:`i`-th row of :math:`X`,
           :math:`x_{i \cdot}` is the :math:`i`-th row of :math:`x`,
-        * ``axis=(0, 1)`` : joint pdf along rows and columns.
+        * ``axis=(0, 1)`` or ``axis=="all"`` : joint pdf along rows and columns.
           Result is a single scalar value, corresponding to
           :math:`p_{X}(x)`, where :math:`p_{X}` is the joint pdf of :math:`X`.
 
@@ -737,7 +742,7 @@ def pdf(self, x, axis=None):
         ----------
         x : ``pandas.DataFrame`` or 2D ``np.ndarray``
             representing :math:`x`, as above
-        axis : None or tuple of int, default=None
+        axis : None, ``"all"``, or tuple of int, default=None
             Axes or axis along which the pdf is joint:
 
             * None : marginal pdfs are returned (default).
@@ -764,9 +769,38 @@ def pdf(self, x, axis=None):
         if distr_type == "discrete":
             return self._coerce_to_self_index_df(0, flatten=False)
 
-        return self._boilerplate("_pdf", x=x)
+        # handle joint / marginalization
+        indep_axes = self.get_tag("property:indep_axes", (0, 1))
+        if axis is not None:
+            if axis == "all":
+                axis = (0, 1)
+            axis = _coerce_to_tuple(axis)
+
+            axes_to_pass = tuple([ax for ax in axis if ax not in indep_axes])
+            axes_to_handle_here = [ax for ax in axis if ax in indep_axes]
+
+            axs = {"axis": axes_to_pass} if len(axes_to_pass) > 0 else {}
+        else:
+            axs = {}
+            axes_to_handle_here = []
+
+        pdf_val = self._boilerplate("_pdf", x=x, **axs)
+
+        # handle marginalization over independent axes
+        for ax in axes_to_handle_here:
+            pdf_val = pdf_val.prod(axis=ax)
+            if isinstance(pdf_val, pd.Series):
+                if ax == 0:
+                    pdf_val = pdf_val.to_frame().T
+                    pdf_val.index = pd.Index([0])
+                else:
+                    pdf_val = pdf_val.to_frame(name="pdf")
+        if len(axis) == 2:
+            pdf_val = pdf_val.values[0, 0]
+
+        return pdf_val
 
-    def _pdf(self, x):
+    def _pdf(self, x, axis=None):
         """Probability density function.
 
         Private method, to be implemented by subclasses.
@@ -2030,3 +2064,15 @@ def _coerce_to_pd_index_or_none(x):
     if isinstance(x, pd.Index):
         return x
     return pd.Index(x)
+
+
+def _coerce_to_tuple(x):
+    """Coerce to tuple."""
+    if x is None:
+        return ()
+    if isinstance(x, tuple):
+        return x
+    # if iterable but not string, coerce to tuple
+    if hasattr(x, "__iter__") and not isinstance(x, str):
+        return tuple(x)
+    return (x,)  # else, make single-element tuple

From 44382432fbc618f13575ea0d1fb348279457c6ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sun, 2 Nov 2025 12:46:26 +0100
Subject: [PATCH 6/6] Update test_all_distrs.py

---
 skpro/distributions/tests/test_all_distrs.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/skpro/distributions/tests/test_all_distrs.py b/skpro/distributions/tests/test_all_distrs.py
index 23d3e191e..1fed1c5c9 100644
--- a/skpro/distributions/tests/test_all_distrs.py
+++ b/skpro/distributions/tests/test_all_distrs.py
@@ -50,12 +50,10 @@ def _has_capability(distr, method):
 
 METHODS_SCALAR = ["mean", "var", "energy"]
 METHODS_SCALAR_POS = ["var", "energy"]  # result always non-negative?
-METHODS_X = ["energy", "pdf", "log_pdf", "pmf", "log_pmf", "cdf", "pdfj"]
-METHODS_X_POS = [
-    "energy", "pdf", "pmf", "cdf", "surv", "haz", "pdfj"
-]  # result non-negative?
+METHODS_X = ["energy", "pdf", "log_pdf", "pmf", "log_pmf", "cdf"]
+METHODS_X_POS = ["energy", "pdf", "pmf", "cdf", "surv", "haz"]  # result non-negative?
 METHODS_P = ["ppf"]
-METHODS_ROWWISE = ["energy", "pdfj"]  # results in one column
+METHODS_ROWWISE = ["energy"]  # results in one column
 
 
 class TestAllDistributions(PackageConfig, DistributionFixtureGenerator, QuickTester):