Made all changes except removing the 'Difference is Significant' column seeing as basic users cannot understand p-values in my opinion. Should that column be removed, there is no need for that entire output port. Quick change if needed.

ahmed-elghazi · ahmed-elghazi · commit 1268c68d67b4 · 2026-01-16T03:19:58.000-06:00
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,9 @@ __pycache__/
 *.py[codz]
 *$py.class
 
+# macOS system files
+.DS_Store
+
 # C extensions
 *.so
 
@@ -223,3 +226,5 @@ p2.index
 features/
 plugins/
 
+CHANGES.md
+
diff --git a/icons/post_hoc.jpg b/icons/post_hoc.jpg
diff --git a/knime.yml b/knime.yml
@@ -1,12 +1,12 @@
 # Extension metadata
-name: statistical_normality_test  # Will be concatenated with the group_id to an ID
+name: utd_statistics  # Will be concatenated with the group_id to an ID
 group_id: edu.utdal
-description: Statistical Normality Test Extension # Human readable bundle name / description
-long_description: This extension implements a statistical normality test.
-version: 0.1.0 # Version of this Python node extension
+description: UTD Statistical Analysis Extension # Human readable bundle name / description
+long_description: This extension provides statistical testing tools developed by Ahmed Elghazi and Rabih Neouchi from University of Texas at Dallas.
+version: 1.0.0 # Version of this Python node extension
 
 # legal information 
-author: Ahmed Elghazi / Saad Jamil Ahmed / Rabih Neouchi # Authors of the extension
+author: Ahmed Elghazi / Rabih Neouchi # Authors of the extension
 vendor: KNIME AG, Zurich, Switzerland
 license_file: LICENSE.TXT # Best practice: put your LICENSE.TXT next to the knime.yml; otherwise you would need to change to path/to/LICENSE.txt
 
diff --git a/src/__init__.py b/src/__init__.py
@@ -1,4 +1,17 @@
+import knime.extension as knext
+
+# Shared category for all UTD statistical nodes
+# MUST be defined before importing node modules that reference it
+utd_category = knext.category(
+    path="/community",
+    level_id="utd_development",
+    name="University of Texas at Dallas Development",
+    description="Statistical analysis tools developed by the University of Texas at Dallas",
+    icon="./icons/utd.png",
+)
+
+# Import nodes after category is defined to avoid circular import
 from .normality_node import NormalityTestsNode
 from .post_hoc_node import PostHocTestsNode
 
-__all__ = ["NormalityTestsNode", "PostHocTestsNode"]
+__all__ = ["NormalityTestsNode", "PostHocTestsNode", "utd_category"]
diff --git a/src/normality_node.py b/src/normality_node.py
@@ -9,25 +9,16 @@
 import knime.extension as knext
 import numpy as np
 import pandas as pd
+from . import utd_category
 from .normality_tests import run_ad_test, run_cramer_test
 from .normality_tests.utils import test_type_param, input_column_param, alpha_param, TestType
 
 
-# Create normality tests category
-normality_category = knext.category(
-    path="/community",
-    level_id="utd_development",
-    name="University of Texas at Dallas Development",
-    description="Statistical Normality Testing Node",
-    icon="./icons/utd.png",
-)
-
-
 @knext.node(
     name="Statistical Normality Tests",
     node_type=knext.NodeType.MANIPULATOR,
-    icon_path="./icons/curve.png",
-    category=normality_category,
+    icon_path="./icons/bell_curve.png",
+    category=utd_category,
 )
 @knext.input_table(name="Input data", description="Table containing the numeric column to test.")
 @knext.output_table(
@@ -36,7 +27,11 @@
 )
 class NormalityTestsNode:
     """
-    Simplified normality testing node supporting Anderson-Darling and Cramer-von Mises tests.
+    Tests whether your data follows a normal (bell-shaped) distribution using Anderson-Darling 
+    or Cramer-von Mises methods. Normality is a key assumption in many statistical analyses.
+    
+    This node performs statistical tests to determine if your data follows a normal distribution,
+    which is required for many parametric statistical procedures.
     """
 
     test_type = test_type_param
diff --git a/src/post_hoc/utils.py b/src/post_hoc/utils.py
@@ -7,8 +7,8 @@ def is_numeric(col: knext.Column) -> bool:
     return col.ktype in (knext.double(), knext.int32(), knext.int64())
 
 
-def is_categorical(col: knext.Column) -> bool:
-    """Helper function to filter for categorical/string columns."""
+def is_string(col: knext.Column) -> bool:
+    """Helper function to filter for string columns."""
     return col.ktype == knext.string()
 
 
@@ -41,7 +41,7 @@ class PostHocTestType(knext.EnumParameterOptions):
 group_column_param = knext.ColumnParameter(
     label="Grouping Variable",
     description="Categorical column containing the group assignments.",
-    column_filter=is_categorical,
+    column_filter=is_string,
 )
 
 alpha_param = knext.DoubleParameter(
diff --git a/src/post_hoc_node.py b/src/post_hoc_node.py
@@ -9,6 +9,7 @@
 import knime.extension as knext
 import numpy as np
 import pandas as pd
+from . import utd_category
 from .post_hoc import (
     run_one_way_anova,
     validate_anova_data,
@@ -24,32 +25,31 @@
 )
 
 
-# Create post-hoc tests category (same as normality tests)
-post_hoc_category = knext.category(
-    path="/community",
-    level_id="utd_development",
-    name="University of Texas at Dallas Development",
-    description="Statistical Post-Hoc Multiple Comparison Testing Node",
-    icon="./icons/utd.png",
-)
-
-
 @knext.node(
-    name="Post-Hoc Multiple Comparisons",
+    name="Post-Hoc Analysis",
     node_type=knext.NodeType.MANIPULATOR,
-    icon_path="./icons/post_hoc.png",
-    category=post_hoc_category,
+    icon_path="./icons/post_hoc.jpg",
+    category=utd_category,
 )
-@knext.input_table(name="Data", description="Data table with numeric dependent variable and categorical grouping variable.")
+@knext.input_table(name="Input Data", description="Data table with numeric dependent variable and categorical grouping variable.")
 @knext.output_table(
     name="ANOVA Summary",
-    description="Overall ANOVA test results.",
+    description="Output table containing overall ANOVA decision and p-value.",
 )
 @knext.output_table(
     name="Pairwise Details",
-    description="Pairwise post-hoc comparison results (conditional on ANOVA significance).",
+    description="Output table reflecting all pairwise group comparisons.",
 )
 class PostHocTestsNode:
+    """
+    Performs post-hoc multiple comparison tests following significant ANOVA results.
+    
+    This node automatically runs one-way ANOVA first, then conducts pairwise comparisons 
+    using Tukey HSD or Holm-Bonferroni methods if overall differences are significant.
+    
+    When ANOVA shows significant differences between groups, this node identifies which 
+    specific group pairs differ from each other while controlling for multiple comparisons.
+    """
     test_type = test_type_param
     data_column = data_column_param
     group_column = group_column_param
@@ -87,6 +87,31 @@ def _validate_and_prepare_data(self, df, data_col, group_col):
 
     def configure(self, cfg_ctx, input_spec):
         """Configure the node's two output table schemas."""
+        # Import the filter functions for column type checking
+        from .post_hoc.utils import is_numeric, is_string
+        
+        # Get available columns by type
+        numeric_columns = [col.name for col in input_spec if is_numeric(col)]
+        categorical_columns = [col.name for col in input_spec if is_string(col)]
+        
+        # Auto-preselect rightmost columns if not already selected
+        if self.data_column is None and numeric_columns:
+            self.data_column = numeric_columns[-1]
+        
+        if self.group_column is None and categorical_columns:
+            self.group_column = categorical_columns[-1]
+        
+        # Validate that columns are selected
+        if self.data_column is None:
+            raise knext.InvalidParametersError(
+                "No dependent variable selected. Please select a numeric data column."
+            )
+        
+        if self.group_column is None:
+            raise knext.InvalidParametersError(
+                "No grouping variable selected. Please select a categorical grouping column."
+            )
+        
         # Output Port 1: ANOVA Summary
         # Columns: Tested Variable, Grouping Variable, Significance Level, ANOVA p-Value, Overall Conclusion
         anova_summary_cols = [