Skip to content

Commit 1268c68

Browse files
committed
Made all changes except removing the 'Difference is Significant' column seeing as basic users cannot understand p-values in my opinion. Should that column be removed, there is no need for that entire output port. Quick change if needed.
1 parent a0d10e1 commit 1268c68

7 files changed

Lines changed: 76 additions & 38 deletions

File tree

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ __pycache__/
33
*.py[codz]
44
*$py.class
55

6+
# macOS system files
7+
.DS_Store
8+
69
# C extensions
710
*.so
811

@@ -223,3 +226,5 @@ p2.index
223226
features/
224227
plugins/
225228

229+
CHANGES.md
230+

icons/post_hoc.jpg

768 Bytes
Loading

knime.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# Extension metadata
2-
name: statistical_normality_test # Will be concatenated with the group_id to an ID
2+
name: utd_statistics # Will be concatenated with the group_id to an ID
33
group_id: edu.utdal
4-
description: Statistical Normality Test Extension # Human readable bundle name / description
5-
long_description: This extension implements a statistical normality test.
6-
version: 0.1.0 # Version of this Python node extension
4+
description: UTD Statistical Analysis Extension # Human readable bundle name / description
5+
long_description: This extension provides statistical testing tools developed by Ahmed Elghazi and Rabih Neouchi from University of Texas at Dallas.
6+
version: 1.0.0 # Version of this Python node extension
77

88
# legal information
9-
author: Ahmed Elghazi / Saad Jamil Ahmed / Rabih Neouchi # Authors of the extension
9+
author: Ahmed Elghazi / Rabih Neouchi # Authors of the extension
1010
vendor: KNIME AG, Zurich, Switzerland
1111
license_file: LICENSE.TXT # Best practice: put your LICENSE.TXT next to the knime.yml; otherwise you would need to change to path/to/LICENSE.txt
1212

src/__init__.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,17 @@
1+
import knime.extension as knext
2+
3+
# Shared category for all UTD statistical nodes
4+
# MUST be defined before importing node modules that reference it
5+
utd_category = knext.category(
6+
path="/community",
7+
level_id="utd_development",
8+
name="University of Texas at Dallas Development",
9+
description="Statistical analysis tools developed by the University of Texas at Dallas",
10+
icon="./icons/utd.png",
11+
)
12+
13+
# Import nodes after category is defined to avoid circular import
114
from .normality_node import NormalityTestsNode
215
from .post_hoc_node import PostHocTestsNode
316

4-
__all__ = ["NormalityTestsNode", "PostHocTestsNode"]
17+
__all__ = ["NormalityTestsNode", "PostHocTestsNode", "utd_category"]

src/normality_node.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,25 +9,16 @@
99
import knime.extension as knext
1010
import numpy as np
1111
import pandas as pd
12+
from . import utd_category
1213
from .normality_tests import run_ad_test, run_cramer_test
1314
from .normality_tests.utils import test_type_param, input_column_param, alpha_param, TestType
1415

1516

16-
# Create normality tests category
17-
normality_category = knext.category(
18-
path="/community",
19-
level_id="utd_development",
20-
name="University of Texas at Dallas Development",
21-
description="Statistical Normality Testing Node",
22-
icon="./icons/utd.png",
23-
)
24-
25-
2617
@knext.node(
2718
name="Statistical Normality Tests",
2819
node_type=knext.NodeType.MANIPULATOR,
29-
icon_path="./icons/curve.png",
30-
category=normality_category,
20+
icon_path="./icons/bell_curve.png",
21+
category=utd_category,
3122
)
3223
@knext.input_table(name="Input data", description="Table containing the numeric column to test.")
3324
@knext.output_table(
@@ -36,7 +27,11 @@
3627
)
3728
class NormalityTestsNode:
3829
"""
39-
Simplified normality testing node supporting Anderson-Darling and Cramer-von Mises tests.
30+
Tests whether your data follows a normal (bell-shaped) distribution using Anderson-Darling
31+
or Cramer-von Mises methods. Normality is a key assumption in many statistical analyses.
32+
33+
This node performs statistical tests to determine if your data follows a normal distribution,
34+
which is required for many parametric statistical procedures.
4035
"""
4136

4237
test_type = test_type_param

src/post_hoc/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ def is_numeric(col: knext.Column) -> bool:
77
return col.ktype in (knext.double(), knext.int32(), knext.int64())
88

99

10-
def is_categorical(col: knext.Column) -> bool:
11-
"""Helper function to filter for categorical/string columns."""
10+
def is_string(col: knext.Column) -> bool:
11+
"""Helper function to filter for string columns."""
1212
return col.ktype == knext.string()
1313

1414

@@ -41,7 +41,7 @@ class PostHocTestType(knext.EnumParameterOptions):
4141
group_column_param = knext.ColumnParameter(
4242
label="Grouping Variable",
4343
description="Categorical column containing the group assignments.",
44-
column_filter=is_categorical,
44+
column_filter=is_string,
4545
)
4646

4747
alpha_param = knext.DoubleParameter(

src/post_hoc_node.py

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import knime.extension as knext
1010
import numpy as np
1111
import pandas as pd
12+
from . import utd_category
1213
from .post_hoc import (
1314
run_one_way_anova,
1415
validate_anova_data,
@@ -24,32 +25,31 @@
2425
)
2526

2627

27-
# Create post-hoc tests category (same as normality tests)
28-
post_hoc_category = knext.category(
29-
path="/community",
30-
level_id="utd_development",
31-
name="University of Texas at Dallas Development",
32-
description="Statistical Post-Hoc Multiple Comparison Testing Node",
33-
icon="./icons/utd.png",
34-
)
35-
36-
3728
@knext.node(
38-
name="Post-Hoc Multiple Comparisons",
29+
name="Post-Hoc Analysis",
3930
node_type=knext.NodeType.MANIPULATOR,
40-
icon_path="./icons/post_hoc.png",
41-
category=post_hoc_category,
31+
icon_path="./icons/post_hoc.jpg",
32+
category=utd_category,
4233
)
43-
@knext.input_table(name="Data", description="Data table with numeric dependent variable and categorical grouping variable.")
34+
@knext.input_table(name="Input Data", description="Data table with numeric dependent variable and categorical grouping variable.")
4435
@knext.output_table(
4536
name="ANOVA Summary",
46-
description="Overall ANOVA test results.",
37+
description="Output table containing overall ANOVA decision and p-value.",
4738
)
4839
@knext.output_table(
4940
name="Pairwise Details",
50-
description="Pairwise post-hoc comparison results (conditional on ANOVA significance).",
41+
description="Output table reflecting all pairwise group comparisons.",
5142
)
5243
class PostHocTestsNode:
44+
"""
45+
Performs post-hoc multiple comparison tests following significant ANOVA results.
46+
47+
This node automatically runs one-way ANOVA first, then conducts pairwise comparisons
48+
using Tukey HSD or Holm-Bonferroni methods if overall differences are significant.
49+
50+
When ANOVA shows significant differences between groups, this node identifies which
51+
specific group pairs differ from each other while controlling for multiple comparisons.
52+
"""
5353
test_type = test_type_param
5454
data_column = data_column_param
5555
group_column = group_column_param
@@ -87,6 +87,31 @@ def _validate_and_prepare_data(self, df, data_col, group_col):
8787

8888
def configure(self, cfg_ctx, input_spec):
8989
"""Configure the node's two output table schemas."""
90+
# Import the filter functions for column type checking
91+
from .post_hoc.utils import is_numeric, is_string
92+
93+
# Get available columns by type
94+
numeric_columns = [col.name for col in input_spec if is_numeric(col)]
95+
categorical_columns = [col.name for col in input_spec if is_string(col)]
96+
97+
# Auto-preselect rightmost columns if not already selected
98+
if self.data_column is None and numeric_columns:
99+
self.data_column = numeric_columns[-1]
100+
101+
if self.group_column is None and categorical_columns:
102+
self.group_column = categorical_columns[-1]
103+
104+
# Validate that columns are selected
105+
if self.data_column is None:
106+
raise knext.InvalidParametersError(
107+
"No dependent variable selected. Please select a numeric data column."
108+
)
109+
110+
if self.group_column is None:
111+
raise knext.InvalidParametersError(
112+
"No grouping variable selected. Please select a categorical grouping column."
113+
)
114+
90115
# Output Port 1: ANOVA Summary
91116
# Columns: Tested Variable, Grouping Variable, Significance Level, ANOVA p-Value, Overall Conclusion
92117
anova_summary_cols = [

0 commit comments

Comments
 (0)