Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions .github/scripts/cmd/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Command Bot Documentation

The command bot allows contributors to perform self-service actions on PRs using comment commands.

## Available Commands

### Label Command (Self-service)

Add labels to your PR without requiring maintainer intervention:

```bash
/cmd label T1-FRAME # Add single label
/cmd label T1-FRAME R0-no-crate-publish-required # Add multiple labels
/cmd label T1-FRAME A2-substantial D3-involved # Add multiple labels
```

**Available Labels:**
The bot dynamically fetches all current labels from the repository, ensuring it's always up-to-date. For label meanings and descriptions, see the [official label documentation](https://paritytech.github.io/labels/doc_polkadot-sdk.html).

**Features**:
- **Auto-Correction**: Automatically fixes high-confidence typos (e.g., `T1-FRAM` → `T1-FRAME`)
- **Case Fixing**: Handles case variations (e.g., `I2-Bug` → `I2-bug`)
- **Smart Suggestions**: For ambiguous inputs, provides multiple options to choose from

### Other Commands

```bash
/cmd fmt # Format code (cargo +nightly fmt and taplo)
/cmd prdoc # Generate PR documentation
/cmd bench # Run benchmarks
/cmd update-ui # Update UI tests
/cmd --help # Show help for all commands
```

### Common Flags

- `--quiet`: Don't post start/end messages in PR
- `--clean`: Clean up previous bot comments
- `--image <image>`: Override docker image

## How It Works

1. **Command Detection**: The bot listens for comments starting with `/cmd` on PRs
2. **Permission Check**: Verifies if the user is an organization member
3. **Command Execution**: Runs the specified command in a containerized environment
4. **Result Handling**:
- For label commands: Applies labels via GitHub API
- For other commands: Commits changes back to the PR branch
5. **Feedback**: Posts success/failure messages in the PR

## Security

- Organization member check prevents unauthorized usage
- Commands from non-members run using bot scripts from master branch

## Troubleshooting

If a command fails:
1. Check the GitHub Actions logs linked in the bot's comment
2. Verify the command syntax matches the examples
3. Ensure you have permission to perform the action
4. For label commands, verify the label names are in the allowed list
275 changes: 275 additions & 0 deletions .github/scripts/cmd/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
import _help
import importlib.util
import re
import urllib.request
import urllib.parse
import difflib

_HelpAction = _help._HelpAction

Expand All @@ -31,6 +34,143 @@ def setup_logging():
os.makedirs('/tmp/cmd')
open('/tmp/cmd/command_output.log', 'w')

def fetch_repo_labels():
"""Fetch current labels from the GitHub repository"""
try:
# Use GitHub API to get current labels
repo_owner = os.environ.get('GITHUB_REPOSITORY_OWNER', 'paritytech')
repo_name = os.environ.get('GITHUB_REPOSITORY', 'paritytech/polkadot-sdk').split('/')[-1]

api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/labels?per_page=100"

# Add GitHub token if available for higher rate limits
headers = {'User-Agent': 'polkadot-sdk-cmd-bot'}
github_token = os.environ.get('GITHUB_TOKEN')
if github_token:
headers['Authorization'] = f'token {github_token}'

req = urllib.request.Request(api_url, headers=headers)

with urllib.request.urlopen(req) as response:
if response.getcode() == 200:
labels_data = json.loads(response.read().decode())
label_names = [label['name'] for label in labels_data]
print_and_log(f"Fetched {len(label_names)} labels from repository")
return label_names
else:
print_and_log(f"Failed to fetch labels: HTTP {response.getcode()}")
return None
except Exception as e:
print_and_log(f"Error fetching labels from repository: {e}")
return None


def check_pr_status(pr_number):
"""Check if PR is merged or in merge queue"""
try:
# Get GitHub token from environment
github_token = os.environ.get('GITHUB_TOKEN')
if not github_token:
print_and_log("Error: GITHUB_TOKEN not set, cannot verify PR status")
return False # Prevent labeling if we can't check status

repo_owner = os.environ.get('GITHUB_REPOSITORY_OWNER', 'paritytech')
repo_name = os.environ.get('GITHUB_REPOSITORY', 'paritytech/polkadot-sdk').split('/')[-1]
api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/pulls/{pr_number}"

headers = {
'User-Agent': 'polkadot-sdk-cmd-bot',
'Authorization': f'token {github_token}',
'Accept': 'application/vnd.github.v3+json'
}

req = urllib.request.Request(api_url, headers=headers)

with urllib.request.urlopen(req) as response:
if response.getcode() == 200:
data = json.loads(response.read().decode())

# Check if PR is merged
if data.get('merged', False):
return False

# Check if PR is closed
if data.get('state') == 'closed':
return False

# Check if PR is in merge queue (auto_merge enabled)
if data.get('auto_merge') is not None:
return False

return True # PR is open and not in merge queue
else:
print_and_log(f"Failed to fetch PR status: HTTP {response.getcode()}")
return False # Prevent labeling if we can't check status
except Exception as e:
print_and_log(f"Error checking PR status: {e}")
return False # Prevent labeling if we can't check status


def find_closest_labels(invalid_label, valid_labels, max_suggestions=3, cutoff=0.6):
"""Find the closest matching labels using fuzzy string matching"""
# Get close matches using difflib
close_matches = difflib.get_close_matches(
invalid_label,
valid_labels,
n=max_suggestions,
cutoff=cutoff
)

return close_matches

def auto_correct_labels(invalid_labels, valid_labels, auto_correct_threshold=0.8):
"""Automatically correct labels when confidence is high, otherwise suggest"""
corrections = []
suggestions = []

for invalid_label in invalid_labels:
closest = find_closest_labels(invalid_label, valid_labels, max_suggestions=1)

if closest:
# Calculate similarity for the top match
top_match = closest[0]
similarity = difflib.SequenceMatcher(None, invalid_label.lower(), top_match.lower()).ratio()

if similarity >= auto_correct_threshold:
# High confidence - auto-correct
corrections.append((invalid_label, top_match))
else:
# Lower confidence - suggest alternatives
all_matches = find_closest_labels(invalid_label, valid_labels, max_suggestions=3)
if all_matches:
labels_str = ', '.join(f"'{label}'" for label in all_matches)
suggestion = f"'{invalid_label}' → did you mean: {labels_str}?"
else:
suggestion = f"'{invalid_label}' → no close matches found"
suggestions.append(suggestion)
else:
# No close matches - try prefix suggestions
prefix_match = re.match(r'^([A-Z]\d+)-', invalid_label)
if prefix_match:
prefix = prefix_match.group(1)
prefix_labels = [label for label in valid_labels if label.startswith(prefix + '-')]
if prefix_labels:
# If there's exactly one prefix match, auto-correct it
if len(prefix_labels) == 1:
corrections.append((invalid_label, prefix_labels[0]))
else:
# Multiple prefix matches - suggest alternatives
suggestion = f"'{invalid_label}' → try labels starting with '{prefix}-': {', '.join(prefix_labels[:3])}"
suggestions.append(suggestion)
else:
suggestion = f"'{invalid_label}' → no labels found with prefix '{prefix}-'"
suggestions.append(suggestion)
else:
suggestion = f"'{invalid_label}' → invalid format (expected format: 'T1-FRAME', 'I2-bug', etc.)"
suggestions.append(suggestion)

return corrections, suggestions

parser = argparse.ArgumentParser(prog="/cmd ", description='A command runner for polkadot-sdk repo', add_help=False)
parser.add_argument('--help', action=_HelpAction, help='help for help if you need some help') # help for help
for arg, config in common_args.items():
Expand Down Expand Up @@ -93,6 +233,93 @@ def setup_logging():
parser_prdoc = subparsers.add_parser('prdoc', help='Generates PR documentation')
generate_prdoc.setup_parser(parser_prdoc, pr_required=False)

"""
LABEL
"""
# Fetch current labels from repository
def get_allowed_labels():
"""Get the current list of allowed labels"""
repo_labels = fetch_repo_labels()

if repo_labels is not None:
return repo_labels
else:
# Fail if API fetch fails
raise RuntimeError("Failed to fetch labels from repository. Please check your connection and try again.")

def validate_and_auto_correct_labels(input_labels, valid_labels):
"""Validate labels and auto-correct when confidence is high"""
final_labels = []
correction_messages = []
all_suggestions = []
no_match_labels = []

# Process all labels first to collect all issues
for label in input_labels:
if label in valid_labels:
final_labels.append(label)
else:
# Invalid label - try auto-correction
corrections, suggestions = auto_correct_labels([label], valid_labels)

if corrections:
# Auto-correct with high confidence
original, corrected = corrections[0]
final_labels.append(corrected)
similarity = difflib.SequenceMatcher(None, original.lower(), corrected.lower()).ratio()
correction_messages.append(f"Auto-corrected '{original}' → '{corrected}' (similarity: {similarity:.2f})")
elif suggestions:
# Low confidence - collect for batch error
all_suggestions.extend(suggestions)
else:
# No suggestions at all
no_match_labels.append(label)

# If there are any labels that couldn't be auto-corrected, show all at once
if all_suggestions or no_match_labels:
error_parts = []

if all_suggestions:
error_parts.append("Labels requiring manual selection:")
for suggestion in all_suggestions:
error_parts.append(f" • {suggestion}")

if no_match_labels:
if all_suggestions:
error_parts.append("") # Empty line for separation
error_parts.append("Labels with no close matches:")
for label in no_match_labels:
error_parts.append(f" • '{label}' → no valid suggestions available")

error_parts.append("")
error_parts.append("For all available labels, see: https://paritytech.github.io/labels/doc_polkadot-sdk.html")

error_msg = "\n".join(error_parts)
raise ValueError(error_msg)

return final_labels, correction_messages

label_example = '''**Examples**:
Add single label
%(prog)s T1-FRAME

Add multiple labels
%(prog)s T1-FRAME R0-no-crate-publish-required

Add multiple labels
%(prog)s T1-FRAME A2-substantial D3-involved

Labels are fetched dynamically from the repository.
Typos are auto-corrected when confidence is high (>80% similarity).
For label meanings, see: https://paritytech.github.io/labels/doc_polkadot-sdk.html
'''

parser_label = subparsers.add_parser('label', help='Add labels to PR (self-service for contributors)', epilog=label_example, formatter_class=argparse.RawDescriptionHelpFormatter)
for arg, config in common_args.items():
parser_label.add_argument(arg, **config)

parser_label.add_argument('labels', nargs='+', help='Labels to add to the PR (auto-corrects typos)')

def main():
global args, unknown, runtimesMatrix
args, unknown = parser.parse_known_args()
Expand Down Expand Up @@ -284,6 +511,54 @@ def main():
print_and_log('❌ Failed to generate prdoc')
sys.exit(exit_code)

elif args.command == 'label':
# The actual labeling is handled by the GitHub Action workflow
# This script validates and auto-corrects labels

try:
# Check if PR is still open and not merged/in merge queue
pr_number = os.environ.get('PR_NUM')
if pr_number:
if not check_pr_status(pr_number):
raise ValueError("Cannot modify labels on merged PRs or PRs in merge queue")

# Check if user has permission to modify labels
is_org_member = os.environ.get('IS_ORG_MEMBER', 'false').lower() == 'true'
is_pr_author = os.environ.get('IS_PR_AUTHOR', 'false').lower() == 'true'

if not is_org_member and not is_pr_author:
Copy link
Copy Markdown
Contributor

@EgorPopelyaev EgorPopelyaev Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this if condition be OR instead of AND as it mentioned in the error message below?
"Only the PR author or organization members can modify labels"

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When you are not an org member AND not the pr author, you are authorized. Expressed differently, when you are an org member it resolves to false. If you are not an org member, it checks if you are the author.

raise ValueError("Only the PR author or organization members can modify labels")

# Get allowed labels dynamically
try:
allowed_labels = get_allowed_labels()
except RuntimeError as e:
raise ValueError(str(e))

# Validate and auto-correct labels
final_labels, correction_messages = validate_and_auto_correct_labels(args.labels, allowed_labels)

# Show auto-correction messages
for message in correction_messages:
print(message)

# Output labels as JSON for GitHub Action
import json
labels_output = {"labels": final_labels}
print(f"LABELS_JSON: {json.dumps(labels_output)}")
except ValueError as e:
print_and_log(f'❌ {e}')

# Output error as JSON for GitHub Action
import json
error_output = {
"error": "validation_failed",
"message": "Invalid labels found. Please check the suggestions below and try again.",
"details": str(e)
}
print(f"ERROR_JSON: {json.dumps(error_output)}")
sys.exit(1)

print('🚀 Done')

if __name__ == '__main__':
Expand Down
Loading
Loading