|
9 | 9 | import knime.extension as knext |
10 | 10 | import numpy as np |
11 | 11 | import pandas as pd |
| 12 | +from . import utd_category |
12 | 13 | from .post_hoc import ( |
13 | 14 | run_one_way_anova, |
14 | 15 | validate_anova_data, |
|
24 | 25 | ) |
25 | 26 |
|
26 | 27 |
|
27 | | -# Create post-hoc tests category (same as normality tests) |
28 | | -post_hoc_category = knext.category( |
29 | | - path="/community", |
30 | | - level_id="utd_development", |
31 | | - name="University of Texas at Dallas Development", |
32 | | - description="Statistical Post-Hoc Multiple Comparison Testing Node", |
33 | | - icon="./icons/utd.png", |
34 | | -) |
35 | | - |
36 | | - |
37 | 28 | @knext.node( |
38 | | - name="Post-Hoc Multiple Comparisons", |
| 29 | + name="Post-Hoc Analysis", |
39 | 30 | node_type=knext.NodeType.MANIPULATOR, |
40 | | - icon_path="./icons/post_hoc.png", |
41 | | - category=post_hoc_category, |
| 31 | + icon_path="./icons/post_hoc.jpg", |
| 32 | + category=utd_category, |
42 | 33 | ) |
43 | | -@knext.input_table(name="Data", description="Data table with numeric dependent variable and categorical grouping variable.") |
| 34 | +@knext.input_table(name="Input Data", description="Data table with numeric dependent variable and categorical grouping variable.") |
44 | 35 | @knext.output_table( |
45 | 36 | name="ANOVA Summary", |
46 | | - description="Overall ANOVA test results.", |
| 37 | + description="Output table containing overall ANOVA decision and p-value.", |
47 | 38 | ) |
48 | 39 | @knext.output_table( |
49 | 40 | name="Pairwise Details", |
50 | | - description="Pairwise post-hoc comparison results (conditional on ANOVA significance).", |
| 41 | + description="Output table reflecting all pairwise group comparisons.", |
51 | 42 | ) |
52 | 43 | class PostHocTestsNode: |
| 44 | + """ |
| 45 | + Performs post-hoc multiple comparison tests following significant ANOVA results. |
| 46 | + |
| 47 | + This node automatically runs one-way ANOVA first, then conducts pairwise comparisons |
| 48 | + using Tukey HSD or Holm-Bonferroni methods if overall differences are significant. |
| 49 | + |
| 50 | + When ANOVA shows significant differences between groups, this node identifies which |
| 51 | + specific group pairs differ from each other while controlling for multiple comparisons. |
| 52 | + """ |
53 | 53 | test_type = test_type_param |
54 | 54 | data_column = data_column_param |
55 | 55 | group_column = group_column_param |
@@ -87,6 +87,31 @@ def _validate_and_prepare_data(self, df, data_col, group_col): |
87 | 87 |
|
88 | 88 | def configure(self, cfg_ctx, input_spec): |
89 | 89 | """Configure the node's two output table schemas.""" |
| 90 | + # Import the filter functions for column type checking |
| 91 | + from .post_hoc.utils import is_numeric, is_string |
| 92 | + |
| 93 | + # Get available columns by type |
| 94 | + numeric_columns = [col.name for col in input_spec if is_numeric(col)] |
| 95 | + categorical_columns = [col.name for col in input_spec if is_string(col)] |
| 96 | + |
| 97 | + # Auto-preselect rightmost columns if not already selected |
| 98 | + if self.data_column is None and numeric_columns: |
| 99 | + self.data_column = numeric_columns[-1] |
| 100 | + |
| 101 | + if self.group_column is None and categorical_columns: |
| 102 | + self.group_column = categorical_columns[-1] |
| 103 | + |
| 104 | + # Validate that columns are selected |
| 105 | + if self.data_column is None: |
| 106 | + raise knext.InvalidParametersError( |
| 107 | + "No dependent variable selected. Please select a numeric data column." |
| 108 | + ) |
| 109 | + |
| 110 | + if self.group_column is None: |
| 111 | + raise knext.InvalidParametersError( |
| 112 | + "No grouping variable selected. Please select a categorical grouping column." |
| 113 | + ) |
| 114 | + |
90 | 115 | # Output Port 1: ANOVA Summary |
91 | 116 | # Columns: Tested Variable, Grouping Variable, Significance Level, ANOVA p-Value, Overall Conclusion |
92 | 117 | anova_summary_cols = [ |
|
0 commit comments