Skip to content

Commit f0a7dad

Browse files
authored
Merge pull request #3948 from mashehu/fix-quotes-in-meta-yml
2 parents 99b5f07 + ed0936d commit f0a7dad

5 files changed

Lines changed: 217 additions & 79 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
- Pin j178/prek-action action to 91fd7d7 ([#3931](https://github.com/nf-core/tools/pull/3931))
1212
- add pre-commit hook to keep uv.lock in sync ([#3933](https://github.com/nf-core/tools/pull/3933))
1313
- Update mcr.microsoft.com/devcontainers/miniconda Docker digest to 2be0f5a ([#3946](https://github.com/nf-core/tools/pull/3946))
14+
- Fix quote handling in meta.yml ([#3948](https://github.com/nf-core/tools/pull/3948))
1415
- Fix docker errors in test ([#3924](https://github.com/nf-core/tools/pull/3924))
1516
- Update actions/checkout digest to 8e8c483 ([#3956](https://github.com/nf-core/tools/pull/3956))
1617
- Update GitHub Actions ([#3957](https://github.com/nf-core/tools/pull/3957))

nf_core/components/nfcore_component.py

Lines changed: 62 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -205,22 +205,13 @@ def get_inputs_from_main_nf(self) -> None:
205205
log.debug(f"Could not find any inputs in {self.main_nf}")
206206
return
207207
input_data = data.split("input:")[1].split("output:")[0]
208+
regex_keyword = r"\b(val|path)\b"
208209
for line in input_data.split("\n"):
209210
channel_elements: Any = []
210211
line = line.split("//")[0] # remove any trailing comments
211-
regex = r"\b(val|path)\b\s*(\(([^)]+)\)|\s*([^)\s,]+))"
212-
matches = re.finditer(regex, line)
213-
for _, match in enumerate(matches, start=1):
214-
input_val = None
215-
if match.group(3):
216-
input_val = match.group(3).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
217-
elif match.group(4):
218-
input_val = match.group(4).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
219-
if input_val:
220-
input_val = re.split(r',(?=(?:[^\'"]*[\'"][^\'"]*[\'"])*[^\'"]*$)', input_val)[
221-
0
222-
] # Takes only first part, avoid commas in quotes
223-
input_val = input_val.strip().strip("'").strip('"') # remove quotes and whitespaces
212+
for match in re.finditer(regex_keyword, line):
213+
if input_val := self._extract_value_from_line(line, match.end()):
214+
input_val = self._split_first_param(input_val)
224215
channel_elements.append({input_val: {}})
225216
if len(channel_elements) == 1:
226217
inputs.append(channel_elements[0])
@@ -244,6 +235,56 @@ def get_inputs_from_main_nf(self) -> None:
244235
log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
245236
self.inputs = inputs
246237

238+
def _split_first_param(self, value: str) -> str:
239+
"""
240+
Extract first parameter from comma-separated list, respecting quotes.
241+
242+
Args:
243+
value: String that may contain comma-separated parameters
244+
245+
Returns:
246+
First parameter with whitespace stripped
247+
"""
248+
result = re.split(r',(?=(?:[^\'"]*[\'"][^\'"]*[\'"])*[^\'"]*$)', value)[0]
249+
return result.strip()
250+
251+
def _extract_value_from_line(self, line: str, pos: int) -> str | None:
252+
"""
253+
Extract value after keyword, handling parentheses and quotes.
254+
255+
Uses a simple state machine to find matching closing parenthesis
256+
while respecting quoted strings.
257+
258+
Args:
259+
line: The line to parse
260+
pos: Position in line where keyword ends
261+
262+
Returns:
263+
Extracted value or None if not found
264+
"""
265+
rest = line[pos:].lstrip()
266+
if not rest:
267+
return None
268+
269+
if not rest.startswith("("):
270+
# No parentheses, extract until comma or newline
271+
match = re.match(r"([^,\n]*)", rest)
272+
return match.group(1).strip() if match else None
273+
274+
# Find matching closing parentheses, respecting quotes
275+
depth = 0
276+
in_quote = None
277+
for i, char in enumerate(rest):
278+
if char in ('"', "'") and (i == 0 or rest[i - 1] != "\\"):
279+
in_quote = char if in_quote is None else (None if in_quote == char else in_quote)
280+
elif char == "(" and in_quote is None:
281+
depth += 1
282+
elif char == ")" and in_quote is None:
283+
depth -= 1
284+
if depth == 0:
285+
return rest[1:i] # Return content between parentheses
286+
return None
287+
247288
def get_outputs_from_main_nf(self):
248289
with open(self.main_nf) as f:
249290
data = f.read()
@@ -256,25 +297,16 @@ def get_outputs_from_main_nf(self):
256297
output_data = data.split("output:")[1].split("when:")[0]
257298
log.debug(f"Found output_data: {output_data}")
258299
regex_emit = r"emit:\s*([^)\s,]+)"
259-
regex_elements = r"\b(val|path|env|stdout|eval)\b\s*(\(([^)]+)\)|\s*([^)\s,]+))"
300+
regex_keyword = r"\b(val|path|env|stdout|eval)\b"
260301
for line in output_data.split("\n"):
261302
match_emit = re.search(regex_emit, line)
262-
matches_elements = re.finditer(regex_elements, line)
263303
if not match_emit:
264304
continue
265305
channel_elements = []
266306
outputs[match_emit.group(1)] = []
267-
for _, match_element in enumerate(matches_elements, start=1):
268-
output_val = None
269-
if match_element.group(3):
270-
output_val = match_element.group(3)
271-
elif match_element.group(4):
272-
output_val = match_element.group(4)
273-
if output_val:
274-
output_val = re.split(r',(?=(?:[^\'"]*[\'"][^\'"]*[\'"])*[^\'"]*$)', output_val)[
275-
0
276-
] # Takes only first part, avoid commas in quotes
277-
output_val = output_val.strip().strip("'").strip('"') # remove quotes and whitespaces
307+
for match in re.finditer(regex_keyword, line):
308+
if output_val := self._extract_value_from_line(line, match.end()):
309+
output_val = self._split_first_param(output_val)
278310
channel_elements.append({output_val: {}})
279311
if len(channel_elements) == 1:
280312
outputs[match_emit.group(1)].append(channel_elements[0])
@@ -312,27 +344,18 @@ def get_topics_from_main_nf(self) -> None:
312344
output_data = data.split("output:")[1].split("when:")[0]
313345
log.debug(f"Output data: {output_data}")
314346
regex_topic = r"topic:\s*([^)\s,]+)"
315-
regex_elements = r"\b(val|path|env|stdout|eval)\b\s*(\(([^)]+)\)|\s*([^)\s,]+))"
347+
regex_keyword = r"\b(val|path|env|stdout|eval)\b"
316348
for line in output_data.split("\n"):
317349
match_topic = re.search(regex_topic, line)
318-
matches_elements = re.finditer(regex_elements, line)
319350
if not match_topic:
320351
continue
321352
channel_elements: list[dict[str, dict]] = []
322353
topic_name = match_topic.group(1)
323354
if topic_name not in topics:
324355
topics[topic_name] = []
325-
for _, match_element in enumerate(matches_elements, start=1):
326-
topic_val = None
327-
if match_element.group(3):
328-
topic_val = match_element.group(3)
329-
elif match_element.group(4):
330-
topic_val = match_element.group(4)
331-
if topic_val:
332-
topic_val = re.split(r',(?=(?:[^\'"]*[\'"][^\'"]*[\'"])*[^\'"]*$)', topic_val)[
333-
0
334-
] # Takes only first part, avoid commas in quotes
335-
topic_val = topic_val.strip().strip("'").strip('"') # remove quotes and whitespaces
356+
for match in re.finditer(regex_keyword, line):
357+
if topic_val := self._extract_value_from_line(line, match.end()):
358+
topic_val = self._split_first_param(topic_val)
336359
channel_elements.append({topic_val: {}})
337360
if len(channel_elements) == 1:
338361
topics[topic_name].append(channel_elements[0])

0 commit comments

Comments
 (0)