paritytech · bkchr · Oct 15, 2025 · Oct 1, 2025 · Oct 2, 2025 · Oct 2, 2025
@@ -0,0 +1,62 @@
+# Command Bot Documentation
+
+The command bot allows contributors to perform self-service actions on PRs using comment commands.
+
+## Available Commands
+
+### Label Command (Self-service)
+
+Add labels to your PR without requiring maintainer intervention:
+
+```bash
+/cmd label T1-FRAME                                    # Add single label
+/cmd label T1-FRAME R0-no-crate-publish-required      # Add multiple labels
+/cmd label T1-FRAME A2-substantial D3-involved       # Add multiple labels
+```
+
+**Available Labels:**
+The bot dynamically fetches all current labels from the repository, ensuring it's always up-to-date. For label meanings and descriptions, see the [official label documentation](https://paritytech.github.io/labels/doc_polkadot-sdk.html).
+
+**Features**:
+- **Auto-Correction**: Automatically fixes high-confidence typos (e.g., `T1-FRAM` → `T1-FRAME`)
+- **Case Fixing**: Handles case variations (e.g., `I2-Bug` → `I2-bug`)
+- **Smart Suggestions**: For ambiguous inputs, provides multiple options to choose from
+
+### Other Commands
+
+```bash
+/cmd fmt                           # Format code (cargo +nightly fmt and taplo)
+/cmd prdoc                         # Generate PR documentation
+/cmd bench                         # Run benchmarks
+/cmd update-ui                     # Update UI tests
+/cmd --help                        # Show help for all commands
+```
+
+### Common Flags
+
+- `--quiet`: Don't post start/end messages in PR
+- `--clean`: Clean up previous bot comments
+- `--image <image>`: Override docker image
+
+## How It Works
+
+1. **Command Detection**: The bot listens for comments starting with `/cmd` on PRs
+2. **Permission Check**: Verifies if the user is an organization member
+3. **Command Execution**: Runs the specified command in a containerized environment
+4. **Result Handling**:
+   - For label commands: Applies labels via GitHub API
+   - For other commands: Commits changes back to the PR branch
+5. **Feedback**: Posts success/failure messages in the PR
+
+## Security
+
+- Organization member check prevents unauthorized usage
+- Commands from non-members run using bot scripts from master branch
+
+## Troubleshooting
+
+If a command fails:
+1. Check the GitHub Actions logs linked in the bot's comment
+2. Verify the command syntax matches the examples
+3. Ensure you have permission to perform the action
+4. For label commands, verify the label names are in the allowed list
@@ -7,6 +7,9 @@
 import _help
 import importlib.util
 import re
+import urllib.request
+import urllib.parse
+import difflib
 
 _HelpAction = _help._HelpAction
 
@@ -31,6 +34,143 @@ def setup_logging():
         os.makedirs('/tmp/cmd')
     open('/tmp/cmd/command_output.log', 'w')
 
+def fetch_repo_labels():
+    """Fetch current labels from the GitHub repository"""
+    try:
+        # Use GitHub API to get current labels
+        repo_owner = os.environ.get('GITHUB_REPOSITORY_OWNER', 'paritytech')
+        repo_name = os.environ.get('GITHUB_REPOSITORY', 'paritytech/polkadot-sdk').split('/')[-1]
+
+        api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/labels?per_page=100"
+
+        # Add GitHub token if available for higher rate limits
+        headers = {'User-Agent': 'polkadot-sdk-cmd-bot'}
+        github_token = os.environ.get('GITHUB_TOKEN')
+        if github_token:
+            headers['Authorization'] = f'token {github_token}'
+
+        req = urllib.request.Request(api_url, headers=headers)
+
+        with urllib.request.urlopen(req) as response:
+            if response.getcode() == 200:
+                labels_data = json.loads(response.read().decode())
+                label_names = [label['name'] for label in labels_data]
+                print_and_log(f"Fetched {len(label_names)} labels from repository")
+                return label_names
+            else:
+                print_and_log(f"Failed to fetch labels: HTTP {response.getcode()}")
+                return None
+    except Exception as e:
+        print_and_log(f"Error fetching labels from repository: {e}")
+        return None
+
+
+def check_pr_status(pr_number):
+    """Check if PR is merged or in merge queue"""
+    try:
+        # Get GitHub token from environment
+        github_token = os.environ.get('GITHUB_TOKEN')
+        if not github_token:
+            print_and_log("Error: GITHUB_TOKEN not set, cannot verify PR status")
+            return False  # Prevent labeling if we can't check status
+
+        repo_owner = os.environ.get('GITHUB_REPOSITORY_OWNER', 'paritytech')
+        repo_name = os.environ.get('GITHUB_REPOSITORY', 'paritytech/polkadot-sdk').split('/')[-1]
+        api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/pulls/{pr_number}"
+
+        headers = {
+            'User-Agent': 'polkadot-sdk-cmd-bot',
+            'Authorization': f'token {github_token}',
+            'Accept': 'application/vnd.github.v3+json'
+        }
+
+        req = urllib.request.Request(api_url, headers=headers)
+
+        with urllib.request.urlopen(req) as response:
+            if response.getcode() == 200:
+                data = json.loads(response.read().decode())
+
+                # Check if PR is merged
+                if data.get('merged', False):
+                    return False
+
+                # Check if PR is closed
+                if data.get('state') == 'closed':
+                    return False
+
+                # Check if PR is in merge queue (auto_merge enabled)
+                if data.get('auto_merge') is not None:
+                    return False
+
+                return True  # PR is open and not in merge queue
+            else:
+                print_and_log(f"Failed to fetch PR status: HTTP {response.getcode()}")
+                return False  # Prevent labeling if we can't check status
+    except Exception as e:
+        print_and_log(f"Error checking PR status: {e}")
+        return False  # Prevent labeling if we can't check status
+
+
+def find_closest_labels(invalid_label, valid_labels, max_suggestions=3, cutoff=0.6):
+    """Find the closest matching labels using fuzzy string matching"""
+    # Get close matches using difflib
+    close_matches = difflib.get_close_matches(
+        invalid_label,
+        valid_labels,
+        n=max_suggestions,
+        cutoff=cutoff
+    )
+
+    return close_matches
+
+def auto_correct_labels(invalid_labels, valid_labels, auto_correct_threshold=0.8):
+    """Automatically correct labels when confidence is high, otherwise suggest"""
+    corrections = []
+    suggestions = []
+
+    for invalid_label in invalid_labels:
+        closest = find_closest_labels(invalid_label, valid_labels, max_suggestions=1)
+
+        if closest:
+            # Calculate similarity for the top match
+            top_match = closest[0]
+            similarity = difflib.SequenceMatcher(None, invalid_label.lower(), top_match.lower()).ratio()
+
+            if similarity >= auto_correct_threshold:
+                # High confidence - auto-correct
+                corrections.append((invalid_label, top_match))
+            else:
+                # Lower confidence - suggest alternatives
+                all_matches = find_closest_labels(invalid_label, valid_labels, max_suggestions=3)
+                if all_matches:
+                    labels_str = ', '.join(f"'{label}'" for label in all_matches)
+                    suggestion = f"'{invalid_label}' → did you mean: {labels_str}?"
+                else:
+                    suggestion = f"'{invalid_label}' → no close matches found"
+                suggestions.append(suggestion)
+        else:
+            # No close matches - try prefix suggestions
+            prefix_match = re.match(r'^([A-Z]\d+)-', invalid_label)
+            if prefix_match:
+                prefix = prefix_match.group(1)
+                prefix_labels = [label for label in valid_labels if label.startswith(prefix + '-')]
+                if prefix_labels:
+                    # If there's exactly one prefix match, auto-correct it
+                    if len(prefix_labels) == 1:
+                        corrections.append((invalid_label, prefix_labels[0]))
+                    else:
+                        # Multiple prefix matches - suggest alternatives
+                        suggestion = f"'{invalid_label}' → try labels starting with '{prefix}-': {', '.join(prefix_labels[:3])}"
+                        suggestions.append(suggestion)
+                else:
+                    suggestion = f"'{invalid_label}' → no labels found with prefix '{prefix}-'"
+                    suggestions.append(suggestion)
+            else:
+                suggestion = f"'{invalid_label}' → invalid format (expected format: 'T1-FRAME', 'I2-bug', etc.)"
+                suggestions.append(suggestion)
+
+    return corrections, suggestions
+
 parser = argparse.ArgumentParser(prog="/cmd ", description='A command runner for polkadot-sdk repo', add_help=False)
 parser.add_argument('--help', action=_HelpAction, help='help for help if you need some help')  # help for help
 for arg, config in common_args.items():
@@ -93,6 +233,93 @@ def setup_logging():
 parser_prdoc = subparsers.add_parser('prdoc', help='Generates PR documentation')
 generate_prdoc.setup_parser(parser_prdoc, pr_required=False)
 
+"""
+LABEL
+"""
+# Fetch current labels from repository
+def get_allowed_labels():
+    """Get the current list of allowed labels"""
+    repo_labels = fetch_repo_labels()
+
+    if repo_labels is not None:
+        return repo_labels
+    else:
+        # Fail if API fetch fails
+        raise RuntimeError("Failed to fetch labels from repository. Please check your connection and try again.")
+
+def validate_and_auto_correct_labels(input_labels, valid_labels):
+    """Validate labels and auto-correct when confidence is high"""
+    final_labels = []
+    correction_messages = []
+    all_suggestions = []
+    no_match_labels = []
+
+    # Process all labels first to collect all issues
+    for label in input_labels:
+        if label in valid_labels:
+            final_labels.append(label)
+        else:
+            # Invalid label - try auto-correction
+            corrections, suggestions = auto_correct_labels([label], valid_labels)
+
+            if corrections:
+                # Auto-correct with high confidence
+                original, corrected = corrections[0]
+                final_labels.append(corrected)
+                similarity = difflib.SequenceMatcher(None, original.lower(), corrected.lower()).ratio()
+                correction_messages.append(f"Auto-corrected '{original}' → '{corrected}' (similarity: {similarity:.2f})")
+            elif suggestions:
+                # Low confidence - collect for batch error
+                all_suggestions.extend(suggestions)
+            else:
+                # No suggestions at all
+                no_match_labels.append(label)
+
+    # If there are any labels that couldn't be auto-corrected, show all at once
+    if all_suggestions or no_match_labels:
+        error_parts = []
+
+        if all_suggestions:
+            error_parts.append("Labels requiring manual selection:")
+            for suggestion in all_suggestions:
+                error_parts.append(f"  • {suggestion}")
+
+        if no_match_labels:
+            if all_suggestions:
+                error_parts.append("")  # Empty line for separation
+            error_parts.append("Labels with no close matches:")
+            for label in no_match_labels:
+                error_parts.append(f"  • '{label}' → no valid suggestions available")
+
+        error_parts.append("")
+        error_parts.append("For all available labels, see: https://paritytech.github.io/labels/doc_polkadot-sdk.html")
+
+        error_msg = "\n".join(error_parts)
+        raise ValueError(error_msg)
+
+    return final_labels, correction_messages
+
+label_example = '''**Examples**:
+ Add single label
+ %(prog)s T1-FRAME
+
+ Add multiple labels
+ %(prog)s T1-FRAME R0-no-crate-publish-required
+
+ Add multiple labels
+ %(prog)s T1-FRAME A2-substantial D3-involved
+
+Labels are fetched dynamically from the repository.
+Typos are auto-corrected when confidence is high (>80% similarity).
+For label meanings, see: https://paritytech.github.io/labels/doc_polkadot-sdk.html
+'''
+
+parser_label = subparsers.add_parser('label', help='Add labels to PR (self-service for contributors)', epilog=label_example, formatter_class=argparse.RawDescriptionHelpFormatter)
+for arg, config in common_args.items():
+    parser_label.add_argument(arg, **config)
+
+parser_label.add_argument('labels', nargs='+', help='Labels to add to the PR (auto-corrects typos)')
+
 def main():
     global args, unknown, runtimesMatrix
     args, unknown = parser.parse_known_args()
@@ -284,6 +511,54 @@ def main():
             print_and_log('❌ Failed to generate prdoc')
             sys.exit(exit_code)
 
+    elif args.command == 'label':
+        # The actual labeling is handled by the GitHub Action workflow
+        # This script validates and auto-corrects labels
+
+        try:
+            # Check if PR is still open and not merged/in merge queue
+            pr_number = os.environ.get('PR_NUM')
+            if pr_number:
+                if not check_pr_status(pr_number):
+                    raise ValueError("Cannot modify labels on merged PRs or PRs in merge queue")
+
+            # Check if user has permission to modify labels
+            is_org_member = os.environ.get('IS_ORG_MEMBER', 'false').lower() == 'true'
+            is_pr_author = os.environ.get('IS_PR_AUTHOR', 'false').lower() == 'true'
+
+            if not is_org_member and not is_pr_author:
+                raise ValueError("Only the PR author or organization members can modify labels")
+
+            # Get allowed labels dynamically
+            try:
+                allowed_labels = get_allowed_labels()
+            except RuntimeError as e:
+                raise ValueError(str(e))
+
+            # Validate and auto-correct labels
+            final_labels, correction_messages = validate_and_auto_correct_labels(args.labels, allowed_labels)
+
+            # Show auto-correction messages
+            for message in correction_messages:
+                print(message)
+
+            # Output labels as JSON for GitHub Action
+            import json
+            labels_output = {"labels": final_labels}
+            print(f"LABELS_JSON: {json.dumps(labels_output)}")
+        except ValueError as e:
+            print_and_log(f'❌ {e}')
+
+            # Output error as JSON for GitHub Action
+            import json
+            error_output = {
+                "error": "validation_failed",
+                "message": "Invalid labels found. Please check the suggestions below and try again.",
+                "details": str(e)
+            }
+            print(f"ERROR_JSON: {json.dumps(error_output)}")
+            sys.exit(1)
+
     print('🚀 Done')
 
 if __name__ == '__main__':