cccl/ci/matrix.yaml at main · NVIDIA/cccl · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
workflows:
  # If any jobs appear here, they will be executed instead of `pull_request' for PRs.
  # This is useful for limiting resource usage when a full matrix is not needed.
  # The branch protection checks will fail when using this override workflow.
  #
  # Example:
  # override:
  #   # Full project build: slow, expensive
  #   - { jobs: ['test'], project: 'thrust', std: 17, ctk: '12.X', cxx: ['gcc12', 'clang16'] }
  #
  #   # Build / run targeted tests: faster turnaround, less runner usage.
  #   # Use project 'target'.
  #   # args are passed to ci/util/build_and_test_targets.sh. See that script for available options.
  #   - { jobs: ['run_cpu'], project: 'target', ctk: ['12.X', '13.X'], cxx: ['gcc', 'clang', 'msvc'],
  #       args: '--preset cub-cpp20 --build-targets "cub.cpp20.test.iterator"' }
  #   - { jobs: ['run_gpu'], project: 'target', ctk: ['12.X', '13.X'], cxx: ['gcc', 'clang'], gpu: 'rtxa6000',
  #       args: '--preset cub-cpp20 --build-targets "cub.cpp20.test.iterator" --ctest-targets "cub.cpp20.test.iterator"' }
  #   - { jobs: ['run_cpu'], project: 'target', ctk: ['12.X', '13.X'], cxx: ['gcc', 'clang', 'msvc'],
  #       args: '--preset libcudacxx --lit-precompile-tests "cuda/utility/basic_any.pass.cpp"' }
  #   - { jobs: ['run_gpu'], project: 'target', ctk: ['12.X', '13.X'], cxx: ['gcc', 'clang'], gpu: 'rtx2080',
  #       args: '--preset libcudacxx --lit-tests "cuda/utility/basic_any.pass.cpp"' }
  #
  override:

  pull_request:
    # Old CTK: Oldest/newest supported host compilers:
    - {jobs: ['build'], std: 'minmax', ctk: '12.0', cxx: ['gcc7',  'gcc12', 'clang14',            'msvc2019', 'msvc14.39']}
    - {jobs: ['build'], std: 'minmax', ctk: '12.X', cxx: ['gcc7',  'gcc',   'clang14', 'clang19', 'msvc2019', 'msvc'     ]}
    - {jobs: ['build'], std: 'minmax', ctk: '13.0', cxx: ['gcc11', 'gcc',   'clang15', 'clang',   'msvc2019', 'msvc'     ]}
    # Old CTK: cudax has a different support matrix:
    - {jobs: ['build'], project: 'cudax', ctk: '12.0', std: 'minmax', cxx: ['gcc9',  'gcc12', 'clang14',            'msvc14.39']}
    - {jobs: ['build'], project: 'cudax', ctk: '12.X', std: 'minmax', cxx: ['gcc9',  'gcc',   'clang14', 'clang19', 'msvc']}
    - {jobs: ['build'], project: 'cudax', ctk: '13.0', std: 'minmax', cxx: ['gcc11', 'gcc',   'clang15', 'clang',   'msvc']}
    # Current CTK build-only:
    - {jobs: ['build'], std: 'minmax', cxx: ['gcc11', 'clang15', 'msvc2019'] } # Oldest
    - {jobs: ['build'], std: 'max',    cxx: ['gcc12', 'gcc13'] }
    - {jobs: ['build'], std: 'max',    cxx: ['clang16', 'clang17', 'clang18', 'clang19'] }
    - {jobs: ['build'], std: 'all',    cxx: ['gcc', 'clang', 'msvc']} # Latest
    # Current CTK build-only: cudax has a different support matrix:
    - {jobs: ['build'], project: 'cudax', std: 'minmax', cxx: ['gcc11', 'clang15']} # Oldest
    - {jobs: ['build'], project: 'cudax', std: 'max',    cxx: ['gcc12']}
    - {jobs: ['build'], project: 'cudax', std: 'max',    cxx: ['clang16', 'clang17', 'clang18', 'clang19']}
    - {jobs: ['build'], project: 'cudax', std: 'all',    cxx: ['gcc', 'clang', 'msvc']} # Newest
    # Current CTK testing:
    - {jobs: ['test'], project: 'thrust',     std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx4090'}
    - {jobs: ['test'], project: 'libcudacxx', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'cudax',      std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'}
    - {jobs: ['test_nolid', 'test_lid0'], project: 'cub', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtxa6000'}
    - {jobs: ['test_lid1',  'test_lid2'], project: 'cub', std: 'max', cxx: ['gcc'],                  gpu: 'rtxa6000'}
    # H100 coverage:
    - {jobs: ['test_nolid', 'test_lid0'], project: 'cub',                   std: 'max', gpu: 'h100' }
    - {jobs: ['test_gpu'],                project: 'thrust',                std: 'max', gpu: 'h100' }
    - {jobs: ['test'],                    project: ['libcudacxx', 'cudax'], std: 'max', gpu: 'h100' }
    # RTX PRO 6000 coverage (limited due to small number of runners):
    - {jobs: ['test_lid0'], project: 'cub', std: 'max', cxx: 'gcc', gpu: 'rtxpro6000'}
    # Misc:
    - {jobs: ['build'], cpu: 'arm64', project: ['libcudacxx', 'cub', 'thrust', 'cudax'], std: 'max', cxx: ['gcc', 'clang']}
    - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'}
    - {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all', gpu: 'rtx2080', sm: 'gpu'}
    - {jobs: ['verify_codegen'], project: 'libcudacxx'}
    # c.parallel -- pinned to gcc13 on Linux to match python
    - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '12.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080']}
    - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080', 'l4', 'h100']}
    # RTX PRO 6000 coverage (limited due to small number of runners):
    - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: ['gcc13'], gpu: ['rtxpro6000']}
    # c.experimental.stf-- pinned to gcc13 to match python
    - {jobs: ['test'], project: 'cccl_c_stf', ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']}
    - {jobs: ['test'], project: 'cccl_c_stf', ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']}
    # Python -- pinned to gcc13 on Linux for consistency across CTK images
    - {jobs: ['test'], project: 'python', ctk: ['12.0',                 '13.X'], py_version: ['3.10'], gpu: 'l4', cxx: ['gcc13', 'msvc']}
    - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.13'], gpu: 'l4', cxx: ['gcc13', 'msvc']}
    - {jobs: ['test'], project: 'python', py_version: '3.13', gpu: 'h100', cxx: 'gcc13'}
    # CCCL packaging:
    - {jobs: ['test'], project: 'packaging', ctk: '12.0', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080', args: '-min-cmake'}
    - {jobs: ['test'], project: 'packaging', ctk: '12.X', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'packaging', ctk: '13.0', cxx: ['gcc',   'clang'],   gpu: 'rtx2080', args: '-min-cmake'}
    - {jobs: ['test'], project: 'packaging', ctk: '13.X', cxx: ['gcc',   'clang'],   gpu: 'rtx2080'}
    - {jobs: ['install'], project: 'packaging'}
    # NVBench Helper testing:
    - {jobs: ['test'], project: 'nvbench_helper', ctk: '12.0', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'nvbench_helper', ctk: '12.X', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'nvbench_helper', ctk: '13.0', cxx: ['gcc',   'clang'],   gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'nvbench_helper', ctk: '13.X', cxx: ['gcc',   'clang'],   gpu: 'rtx2080'}
    # NVHPC build
    # - {jobs: ['build'], cxx: 'nvhpc', ctk: 'nvhpc', std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']}
    # clang-cuda
    - {jobs: ['build'], cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda',  std: 'all', sm: '75;80;90;100'}
    # libc++
    # - arm64 for now as it's closest to android.
    # - {jobs: ['build'], cpu: 'arm64', project: 'libcudacxx', std: 'all', cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', cmake_options: '-DCCCL_USE_LIBCXX=ON', sm: '75;80;90;100'}

  # Used when an upstream project changes to reduce time spent smoke testing dependencies.
  pull_request_lite:
    # libcudacxx - Specialized, testing default SM
    - {project: 'libcudacxx', jobs: ['test'], std: 'max', cxx: ['gcc', 'msvc'], gpu: 'rtx2080', sm: 'gpu'}
    - {project: 'libcudacxx', jobs: ['build'], std: 'max', cxx: 'clang'}
    # - {project: 'libcudacxx', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'}
    - {project: 'libcudacxx', jobs: ['build'], std: 'max', cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', sm: '70;80;90;100'}
    - {project: 'libcudacxx', jobs: ['nvrtc'], std: 'max', gpu: 'rtx2080', sm: 'gpu'}
    - {project: 'libcudacxx', jobs: ['verify_codegen']}
    # CUB - Specialized, testing default SM
    - {project: 'cub', jobs: ['test_nolid', 'test_lid0'], std: 'max', cxx: ['gcc', 'msvc'], gpu: 'rtxa6000', sm: 'gpu'}
    - {project: 'cub', jobs: ['build_nolid', 'build_lid0'], std: 'max', cxx: 'clang'}
    # - {project: 'cub', jobs: ['build_nolid', 'build_lid0'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'}
    - {project: 'cub', jobs: ['build_nolid', 'build_lid0'], std: 'max', cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', sm: '75;80;90;100'}
    # Thrust - Keep number of sm small. Kernel coverage is in CUB. This just tests dispatch / glue in lite mode:
    - {project: 'thrust', jobs: ['test'], std: 'max', cxx: ['gcc', 'msvc'], gpu: 'rtx4090', sm: 'gpu'}
    - {project: 'thrust', jobs: ['build'], std: 'max', cxx: 'clang', sm: '75;120'}
    # - {project: 'thrust', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc', sm: '75;120'}
    - {project: 'thrust', jobs: ['build'], std: 'max', cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', sm: '75;100'}
    # cudax
    - {project: 'cudax', jobs: ['test'], std: 'max', cxx: ['gcc', 'msvc'], gpu: 'rtx2080', sm: 'gpu'}
    - {project: 'cudax', jobs: ['build'], std: 'max', cxx: 'clang', sm: '75;120'}
    # - {project: 'cudax', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc', sm: '75;120'}
    # stdpar
    - {project: 'stdpar', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'}
    # Python + support
    - {project: 'cccl_c_parallel', jobs: ['test'], ctk: '13.X', cxx: ['gcc13', 'msvc'], gpu: 'rtx2080', sm: 'gpu'}
    - {project: 'cccl_c_stf',      jobs: ['test'], ctk: '13.X', cxx: 'gcc13',           gpu: 'rtx2080', sm: 'gpu'}
    - {project: 'python', jobs: ['test'], ctk: '13.X', py_version: '3.13', gpu: 'l4', cxx: ['gcc13', 'msvc']}
    # Packaging / install
    - {project: 'packaging', jobs: ['test'], ctk: '13.X', cxx: ['gcc', 'clang'], gpu: 'rtx2080', sm: 'gpu'}
    - {project: 'packaging', jobs: ['test'], args: '-min-cmake', gpu: 'rtx2080', sm: 'gpu'}
    - {project: 'packaging', jobs: ['install']}
    # NVBench Helper testing:
    - {project: 'nvbench_helper', jobs: ['test'], ctk: '13.X', cxx: ['gcc', 'clang'], gpu: 'rtx2080'}

  nightly:
    # CTK 12.0 full matrix build: default projects
    - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12']}
    - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['clang14']}
    - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['msvc2019', 'msvc14.39']}
    # CTK 12.X full matrix build: default projects
    - {jobs: ['build'], std: 'all', ctk: '12.X', cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12', 'gcc13', 'gcc14']}
    - {jobs: ['build'], std: 'all', ctk: '12.X', cxx: ['clang14', 'clang15', 'clang16', 'clang17', 'clang18', 'clang19']}
    - {jobs: ['build'], std: 'all', ctk: '12.X', cxx: ['msvc2019', 'msvc2022']}
    # CTK 13.0 full matrix build: default projects
    - {jobs: ['build'], std: 'all', ctk: '13.0', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']}
    - {jobs: ['build'], std: 'all', ctk: '13.0', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19', 'clang20']}
    - {jobs: ['build'], std: 'all', ctk: '13.0', cxx: ['msvc2019', 'msvc2022']}
    # CTK '13.X' full matrix build: default projects
    - {jobs: ['build'], std: 'all', ctk: '13.X', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']}
    - {jobs: ['build'], std: 'all', ctk: '13.X', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19', 'clang20']}
    - {jobs: ['build'], std: 'all', ctk: '13.X', cxx: ['msvc2019', 'msvc2022']}
    # CTK 12.0 full matrix build: cudax
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.0', cxx: ['gcc9', 'gcc10', 'gcc11', 'gcc12']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.0', cxx: ['clang14']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.0', cxx: ['msvc14.39']}
    # CTK 12.X full matrix build: cudax
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.X', cxx: ['gcc9', 'gcc10', 'gcc11', 'gcc12', 'gcc13', 'gcc14']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.X', cxx: ['clang14', 'clang15', 'clang16', 'clang17', 'clang18', 'clang19']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.X', cxx: ['msvc2022']}
    # CTK 13.0 full matrix build: cudax
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.0', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.0', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19', 'clang20']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.0', cxx: ['msvc2022']}
    # CTK '13.X' full matrix build: cudax
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.X', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.X', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19', 'clang20']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.X', cxx: ['msvc2022']}
    # CTK 12.X testing:
    - {jobs: ['test'], project: 'libcudacxx', ctk: '12.X', std: 'max', cxx: ['gcc', 'clang19', 'msvc'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'cub',        ctk: '12.X', std: 'max', cxx: ['gcc', 'clang19', 'msvc'], gpu: 'rtxa6000'}
    - {jobs: ['test'], project: 'thrust',     ctk: '12.X', std: 'max', cxx: ['gcc', 'clang19', 'msvc'], gpu: 'rtx4090'}
    - {jobs: ['test'], project: 'cudax',      ctk: '12.X', std: 'max', cxx: ['gcc', 'clang19', 'msvc'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: ['libcudacxx', 'cub', 'thrust', 'cudax'], ctk: '12.X', std: 'max', gpu: 'h100' }
    # CTK '13.X' testing:
    - {jobs: ['test'], project: 'libcudacxx', ctk: '13.X', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'cub',        ctk: '13.X', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtxa6000'}
    - {jobs: ['test'], project: 'thrust',     ctk: '13.X', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx4090'}
    - {jobs: ['test'], project: 'cudax',      ctk: '13.X', std: 'max', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: ['libcudacxx', 'cub', 'thrust', 'cudax'], ctk: '13.X', std: 'max', gpu: 'h100' }
      # RTX PRO 6000 coverage (limited due to small number of runners):
    - {jobs: ['test_nolid', 'test_lid0'], project: 'cub', std: 'max', cxx: 'gcc', gpu: 'rtxpro6000'}
    # Misc:
    - {jobs: ['build'], cpu: 'arm64', project: ['libcudacxx', 'cub', 'thrust', 'cudax'], ctk: '12.X', std: 'all', cxx: ['gcc', 'clang19']}
    - {jobs: ['build'], cpu: 'arm64', project: ['libcudacxx', 'cub', 'thrust', 'cudax'], ctk: '13.X', std: 'all', cxx: ['gcc', 'clang']}
    - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'}
    - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 'rtx4090'}
    - {jobs: ['limited'], project: 'cub', std: 17, gpu: 'rtx2080'}
    # NVRTC tests don't currently support 12.0:
    - {jobs: ['nvrtc'],          project: 'libcudacxx', ctk: [        '12.X', '13.0', '13.X'], cxx: 'gcc12', std: 'all', gpu: 'rtx2080', sm: 'gpu'}
    - {jobs: ['verify_codegen'], project: 'libcudacxx'}
    # c.parallel -- pinned to gcc13 to match python
    - {jobs: ['test'],  project: ['cccl_c_parallel'], ctk: '12.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080']}
    - {jobs: ['test'],  project: ['cccl_c_parallel'], ctk: '13.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080', 'l4', 'h100']}
    # RTX PRO 6000 coverage (limited due to small number of runners):
    - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: ['gcc13'], gpu: ['rtxpro6000']}
    # c.experimental.stf -- pinned to gcc13 to match python
    - {jobs: ['test'],  project: ['cccl_c_stf'], ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']}
    - {jobs: ['test'],  project: ['cccl_c_stf'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']}
    # Python -- pinned to gcc13 on Linux for consistency across CTK images
    - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: ['gcc13', 'msvc']}
    - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: 'gcc13'}
    # CCCL packaging:
    - {jobs: ['test'], project: 'packaging', ctk: '12.0', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080', args: '-min-cmake'}
    - {jobs: ['test'], project: 'packaging', ctk: '12.X', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'packaging', ctk: '13.0', cxx: ['gcc',   'clang'],   gpu: 'rtx2080', args: '-min-cmake'}
    - {jobs: ['test'], project: 'packaging', ctk: '13.X', cxx: ['gcc',   'clang'],   gpu: 'rtx2080'}
    - {jobs: ['install'], project: 'packaging'}
    # NVBench Helper testing:
    - {jobs: ['test'], project: 'nvbench_helper', ctk: '12.0', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'nvbench_helper', ctk: '12.X', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'nvbench_helper', ctk: '13.0', cxx: ['gcc',   'clang'],   gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'nvbench_helper', ctk: '13.X', cxx: ['gcc',   'clang'],   gpu: 'rtx2080'}
    # NVHPC build
    - {jobs: ['build'], cxx: 'nvhpc-prev', ctk: 'nvhpc-prev', std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']}
    - {jobs: ['build'], cxx: 'nvhpc',      ctk: 'nvhpc',      std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']}
    # clang-cuda
    - {jobs: ['build'], cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', std: 'all', sm: '75;80;90;100'}

  weekly:
    # CTK 12.0 full matrix build: default projects
    - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12']}
    - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['clang14']}
    - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['msvc2019', 'msvc14.39']}
    # CTK 12.X full matrix build: default projects
    - {jobs: ['build'], std: 'all', ctk: '12.X', cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12', 'gcc13', 'gcc14']}
    - {jobs: ['build'], std: 'all', ctk: '12.X', cxx: ['clang14', 'clang15', 'clang16', 'clang17', 'clang18', 'clang19']}
    - {jobs: ['build'], std: 'all', ctk: '12.X', cxx: ['msvc2019', 'msvc2022']}
    # CTK 13.0 full matrix build: default projects
    - {jobs: ['build'], std: 'all', ctk: '13.0', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']}
    - {jobs: ['build'], std: 'all', ctk: '13.0', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19', 'clang20']}
    - {jobs: ['build'], std: 'all', ctk: '13.0', cxx: ['msvc2019', 'msvc2022']}
    # CTK '13.X' full matrix build: default projects
    - {jobs: ['build'], std: 'all', ctk: '13.X', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']}
    - {jobs: ['build'], std: 'all', ctk: '13.X', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19', 'clang20']}
    - {jobs: ['build'], std: 'all', ctk: '13.X', cxx: ['msvc2019', 'msvc2022']}
    # CTK 12.0 full matrix build: cudax
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.0', cxx: ['gcc9', 'gcc10', 'gcc11', 'gcc12']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.0', cxx: ['clang14']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.0', cxx: ['msvc14.39']}
    # CTK 12.X full matrix build: cudax
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.X', cxx: ['gcc9', 'gcc10', 'gcc11', 'gcc12', 'gcc13', 'gcc14']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.X', cxx: ['clang14', 'clang15', 'clang16', 'clang17', 'clang18', 'clang19']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '12.X', cxx: ['msvc2022']}
    # CTK 13.0 full matrix build: cudax
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.0', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.0', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19', 'clang20']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.0', cxx: ['msvc2022']}
    # CTK '13.X' full matrix build: cudax
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.X', cxx: ['gcc11', 'gcc12', 'gcc13', 'gcc14']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.X', cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19', 'clang20']}
    - {jobs: ['build'], project: 'cudax', std: 'all', ctk: '13.X', cxx: ['msvc2022']}
    # CTK 12.X testing:
    - {jobs: ['test'], project: 'libcudacxx', ctk: '12.X', std: 'minmax', cxx: ['gcc', 'clang19', 'msvc'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'cub',        ctk: '12.X', std: 'minmax', cxx: ['gcc', 'clang19', 'msvc'], gpu: 'rtxa6000'}
    - {jobs: ['test'], project: 'thrust',     ctk: '12.X', std: 'minmax', cxx: ['gcc', 'clang19', 'msvc'], gpu: 'rtx4090'}
    - {jobs: ['test'], project: 'cudax',      ctk: '12.X', std: 'minmax', cxx: ['gcc', 'clang19', 'msvc'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: ['libcudacxx', 'cub', 'thrust', 'cudax'], ctk: '12.X', std: 'minmax', gpu: 'h100' }
    # CTK '13.X' testing:
    - {jobs: ['test'], project: 'libcudacxx', ctk: '13.X', std: 'minmax', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'cub',        ctk: '13.X', std: 'minmax', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtxa6000'}
    - {jobs: ['test'], project: 'thrust',     ctk: '13.X', std: 'minmax', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx4090'}
    - {jobs: ['test'], project: 'cudax',      ctk: '13.X', std: 'minmax', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: ['libcudacxx', 'cub', 'thrust', 'cudax'], ctk: '13.X', std: 'minmax', gpu: 'h100' }
    # RTX PRO 6000 coverage (limited due to small number of runners):
    - {jobs: ['test'], project: 'cub', std: 'max', cxx: 'gcc', gpu: 'rtxpro6000'}
    # Misc:
    - {jobs: ['build'], cpu: 'arm64', project: ['libcudacxx', 'cub', 'thrust', 'cudax'], ctk: '12.X', std: 'all', cxx: ['gcc', 'clang19']}
    - {jobs: ['build'], cpu: 'arm64', project: ['libcudacxx', 'cub', 'thrust', 'cudax'], ctk: '13.X', std: 'all', cxx: ['gcc', 'clang']}
    - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'}
    - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 'rtx4090'}
    - {jobs: ['limited'], project: 'cub', std: 17, gpu: 'rtx2080'}
    # sm: all-cccl:
    - {jobs: ['build'], project: ['thrust', 'libcudacxx', 'cudax'], std: 'max', sm: 'all-cccl' }
    - {jobs: ['build_nolid', 'build_lid0'], project: ['cub'], std: 'max', sm: 'all-cccl'}
    # NVRTC tests don't currently support 12.0:
    - {jobs: ['nvrtc'],          project: 'libcudacxx', ctk: [        '12.X', '13.0', '13.X'], cxx: 'gcc12', std: 'all', gpu: 'rtx2080', sm: 'gpu'}
    - {jobs: ['verify_codegen'], project: 'libcudacxx'}
    # c.parallel -- pinned to gcc13 to match python
    - {jobs: ['test'],  project: ['cccl_c_parallel'], ctk: '12.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080']}
    - {jobs: ['test'],  project: ['cccl_c_parallel'], ctk: '13.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080', 'l4', 'h100']}
    # RTX PRO 6000 coverage (limited due to small number of runners):
    - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: ['gcc13'], gpu: ['rtxpro6000']}
    # c.experimental.stf -- pinned to gcc13 to match python
    - {jobs: ['test'],  project: ['cccl_c_stf'], ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']}
    - {jobs: ['test'],  project: ['cccl_c_stf'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']}
    # Python -- pinned to gcc13 for consistency across CTK images
    - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: ['gcc13', 'msvc']}
    - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: ['gcc13', 'msvc']}
    # CCCL packaging:
    - {jobs: ['test'], project: 'packaging', ctk: '12.0', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080', args: '-min-cmake'}
    - {jobs: ['test'], project: 'packaging', ctk: '12.X', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'packaging', ctk: '13.0', cxx: ['gcc',   'clang'],   gpu: 'rtx2080', args: '-min-cmake'}
    - {jobs: ['test'], project: 'packaging', ctk: '13.X', cxx: ['gcc',   'clang'],   gpu: 'rtx2080'}
    - {jobs: ['install'], project: 'packaging'}
    # NVBench Helper:
    - {jobs: ['test'], project: 'nvbench_helper', ctk: '12.0', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'nvbench_helper', ctk: '12.X', cxx: ['gcc10', 'clang14'], gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'nvbench_helper', ctk: '13.0', cxx: ['gcc',   'clang'],   gpu: 'rtx2080'}
    - {jobs: ['test'], project: 'nvbench_helper', ctk: '13.X', cxx: ['gcc',   'clang'],   gpu: 'rtx2080'}
    # NVHPC build
    - {jobs: ['build'], cxx: 'nvhpc-prev', ctk: 'nvhpc-prev', std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']}
    - {jobs: ['build'], cxx: 'nvhpc',      ctk: 'nvhpc',      std: 'all', project: ['libcudacxx', 'cub', 'thrust', 'cudax', 'stdpar'], cpu: ['amd64', 'arm64']}
    # clang-cuda
    - {jobs: ['build'], cudacxx: 'clang', ctk: 'clang-cuda', cxx: 'clang-cuda', std: 'all', sm: '75;80;90;100'}
    # compute-sanitizer
    - {jobs: ['compute_sanitizer'], project: 'cub', std: 'max', gpu: 'rtxa6000', sm: 'gpu', cmake_options: '-DCMAKE_CUDA_FLAGS=-lineinfo'}

  python-wheels:
    - {jobs: ['test'], project: 'python', ctk: ['12.0', '12.X', '13.0', '13.X'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: ['gcc13', 'msvc']}
    - {jobs: ['test'], project: 'python', ctk: ['12.X', '13.X'], py_version: '3.13', gpu: 'h100', cxx: ['gcc13', 'msvc']}
    - {jobs: ['test'], project: 'python', cpu: 'arm64', ctk: ['12.X', '13.X'], py_version: ['3.10', '3.11', '3.12', '3.13'], gpu: 'l4', cxx: 'gcc13'}


  # This is just used to ensure that we generate devcontainers for all images we build.
  # These do not map to any actual jobs.
  devcontainers:
    - {jobs: ['dc'],     ctk: ['12.0', '12.X'                ], cxx: ['clang14']}
    - {jobs: ['dc'],     ctk: ['12.0', '12.X'                ], cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10']}
    - {jobs: ['dc'],     ctk: ['12.0', '12.X', '13.0', '13.X'], cxx: ['gcc11', 'gcc12']}
    - {jobs: ['dc'],     ctk: [        '12.X', '13.0', '13.X'], cxx: ['gcc13', 'gcc14']}
    # Clang20+CTK12.9 is currently only used for cuda-clang testing. nvcc 12.9 doesn't support clang20.
    - {jobs: ['dc'],     ctk: [        '12.X', '13.0', '13.X'], cxx: ['clang15', 'clang16', 'clang17', 'clang18', 'clang19', 'clang20']}
    - {jobs: ['dc_ext'], ctk: [        '12.X', '13.0', '13.X'], cxx: ['gcc14', 'clang20']}
    # 12.0 python image, pinned at gcc13 for simplicity. CTK 12.0 doesn't really play nice with gcc13, but
    # that doesn't matter for running python tests.
    - {jobs: ['dc'], ctk: ['12.0'], cxx: 'gcc13'}
    # NVHPC
    - {jobs: ['dc'], cxx: 'nvhpc-prev', ctk: 'nvhpc-prev'}
    - {jobs: ['dc'], cxx: 'nvhpc',      ctk: 'nvhpc'}

  # Any generated jobs that match the entries in `exclude` will be removed from the final matrix for all workflows.
  exclude:
    # GPU runners are not available on Windows.
    - {jobs: ['test', 'test_gpu', 'test_nolid', 'test_lid0', 'test_lid1', 'test_lid2'], cxx: ['msvc2019', 'msvc14.39', 'msvc2022']}
    # cudax doesn't support C++17 on msvc:
    - {project: 'cudax', std: 17, cxx: ['msvc2019', 'msvc14.39', 'msvc2022']}


#############################################################################################


# The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers
devcontainer_version: '26.02'

# Compiler versions used for the cuda99.X internal builds:
cuda99_gcc_version: 14
cuda99_clang_version: 20

# All supported C++ standards:
all_stds: [17, 20]

# Aliases:
# - 12.X: Newest CTK 12.X version.
# - 13.X: Newest CTK 13.X version.
# - nvhpc: CTK shipped in newest NVHPC
# - nvhpc-prev: CTK shipped in previous NVHPC
# - pybuild: Selects image to use for python wheel builds' outer docker instance
ctk_versions:
  12.0: { stds: [17, 20] }
  12.9: { stds: [17, 20], alias: ['12.X', 'nvhpc-prev', 'pybuild', 'clang-cuda'] }
  13.0: { stds: [17, 20], alias: ['nvhpc'] }
  13.1: { stds: [17, 20], alias: ['13.X'] }

device_compilers:
  nvcc: # Version / stds are taken from CTK
    name: 'nvcc'
    exe: 'nvcc'
  clang: # Requires cxx=clang. Version / stds are taken from cxx compiler.
    name: "ClangCUDA"
    exe: 'clang++'

host_compilers:
  gcc:
    name: 'GCC'
    container_tag: 'gcc'
    exe: 'g++'
    versions:
      7:  { stds: [17,   ] }
      8:  { stds: [17,   ] }
      9:  { stds: [17,   ] }
      10: { stds: [17, 20] }
      11: { stds: [17, 20] }
      12: { stds: [17, 20] }
      13: { stds: [17, 20] }
      14: { stds: [17, 20] }
  clang:
    name: 'Clang'
    container_tag: 'llvm'
    exe: 'clang++'
    versions:
      14: { stds: [17, 20] }
      15: { stds: [17, 20] }
      16: { stds: [17, 20] }
      17: { stds: [17, 20] }
      18: { stds: [17, 20] }
      19: { stds: [17, 20] }
      20: { stds: [17, 20], alias: 'cuda' }
  msvc:
    name: 'MSVC'
    container_tag: 'cl'
    exe: cl
    versions:
      14.29: { stds: [17,   ], alias: '2019' }
      14.39: { stds: [17, 20] } # CTK 12.0 doesn't recognize >14.39 as MSVC 2022.
      14.44: { stds: [17, 20], alias: '2022' }
  nvhpc:
    name: 'NVHPC'
    container_tag: 'nvhpc'
    exe: nvc++
    versions:
      # !! Update the ctk_versions 'nvhpc*' aliases when updating NVHPC versions:
      25.7: { stds: [17, 20], alias: 'prev' }
      25.9: { stds: [17, 20] }

# Jobs support the following properties:
#
# - name: The human-readable name of the job. Default is the capitalized job key.
# - needs:
#   - A list of jobs that must be completed before this job can run. Default is an empty list.
#   - These jobs are automatically added if needed:
#     - Eg. "jobs: ['test']" in the workflow def will also create the required 'build' jobs.
# - gpu: Whether the job requires a GPU runner. Default is false.
# - cuda_ext: Whether the job requires a devcontainer with extra CUDA libraries. Default is false.
# - invoke:
#   - Map the job type to the script invocation spec:
#     - prefix: The script invocation prefix. Default is the job name.
#     - args: Additional arguments to pass to the script. Default is no args.
#   - The script is invoked either:
#     linux:   `ci/windows/<spec[prefix]>_<project>.ps1 <spec[args]>`
#     windows: `ci/<spec[prefix]>_<project>.sh <spec[args]>`
# - force_producer_ctk:
#   - If set, force the auto-generated producers for this job to use a specific CTK version.
#   - By default, the autogenerated job's CTK version is determined by the consumer's `ctk` tag.
#   - This is useful for testing the cross-testing major version compat.
#   - E.g. "force_producer_ctk: '12.0'" on a test step will force the generated build step to use CTK 12.0.

jobs:
  # General:
  build:        { gpu: false }
  test:         { gpu: true, needs: 'build' }
  install:      { gpu: false }

  test_nobuild: { gpu: true, name: 'Test', invoke: { prefix: 'test' } }

  compute_sanitizer: { gpu: true, name: 'ComputeSanitizer', needs: 'build', invoke: { prefix: 'test', args: '-compute-sanitizer' } }

  # libcudacxx:
  nvrtc: { gpu: true, name: 'NVRTC' }
  verify_codegen: { gpu: false, name: 'VerifyCodegen' }

  # CUB:
  build_nolid: { name: 'BuildNoLaunch',     gpu: false, invoke: { prefix: 'build', args: '-no-lid'} }
  build_lid0:  { name: 'BuildHostLaunch',   gpu: false, invoke: { prefix: 'build', args: '-lid0'} }
  build_lid1:  { name: 'BuildDeviceLaunch', gpu: false, invoke: { prefix: 'build', args: '-lid1'} }
  build_lid2:  { name: 'BuildGraphCapture', gpu: false, invoke: { prefix: 'build', args: '-lid2'} }
  # NoLid -> The string `lid_X` doesn't appear in the test name. Mostly warp/block tests, old device tests, and examples.
  test_nolid: { name: 'TestNoLaunch',      gpu: true, needs: 'build_nolid', invoke: { prefix: 'test', args: '-no-lid'} }
  # CUB uses `lid` to indicate launch strategies: whether CUB algorithms are:
  # - launched from the host (lid0):
  test_lid0:  { name: 'HostLaunch',   gpu: true, needs: 'build_lid0', invoke: { prefix: 'test', args: '-lid0'} }
  # - launched from the device (lid1):
  test_lid1:  { name: 'DeviceLaunch', gpu: true, needs: 'build_lid1', invoke: { prefix: 'test', args: '-lid1'} }
  # - captured in a CUDA graph for deferred launch (lid2):
  test_lid2:  { name: 'GraphCapture', gpu: true, needs: 'build_lid2', invoke: { prefix: 'test', args: '-lid2'} }
  # Limited build reduces the number of runtime test cases, available device memory, etc, and may be used
  # to reduce test runtime in limited environments.
  limited:    { name: "SmallGMem",   gpu: true, needs: 'build', invoke: { prefix: 'test', args: '-limited'} }
  # Compute sanitizer jobs:
  compute_mem_nolid:  { name: 'CSMem-TestGPU',       gpu: true, needs: 'build_nolid', invoke: { prefix: 'test', args: '-compute-sanitizer-memcheck  -no-lid'} }
  compute_mem_lid0:   { name: 'CSMem-HostLaunch',    gpu: true, needs: 'build_lid0',  invoke: { prefix: 'test', args: '-compute-sanitizer-memcheck  -lid0'} }
  compute_race_nolid: { name: 'CSRace-TestGPU',      gpu: true, needs: 'build_nolid', invoke: { prefix: 'test', args: '-compute-sanitizer-racecheck -no-lid'} }
  compute_race_lid0:  { name: 'CSRace-HostLaunch',   gpu: true, needs: 'build_lid0',  invoke: { prefix: 'test', args: '-compute-sanitizer-racecheck -lid0'} }
  compute_init_nolid: { name: 'CSInit-TestGPU',      gpu: true, needs: 'build_nolid', invoke: { prefix: 'test', args: '-compute-sanitizer-initcheck -no-lid'} }
  compute_init_lid0:  { name: 'CSInit-HostLaunch',   gpu: true, needs: 'build_lid0',  invoke: { prefix: 'test', args: '-compute-sanitizer-initcheck -lid0'} }
  compute_sync_nolid: { name: 'CSSync-TestGPU',      gpu: true, needs: 'build_nolid', invoke: { prefix: 'test', args: '-compute-sanitizer-synccheck -no-lid'} }
  compute_sync_lid0:  { name: 'CSSync-HostLaunch',   gpu: true, needs: 'build_lid0',  invoke: { prefix: 'test', args: '-compute-sanitizer-synccheck -lid0'} }

  # Thrust:
  test_cpu: { name: 'TestCPU', gpu: false, needs: 'build', invoke: { prefix: 'test', args: '-cpu-only'} }
  test_gpu: { name: 'TestGPU', gpu: true,  needs: 'build', invoke: { prefix: 'test', args: '-gpu-only'} }

  # Python:
  build_py_wheel:   { name: "Build cuda.cccl",             gpu: false, invoke: { prefix: 'build_cuda_cccl'} }
  test_py_headers:  { name: "Test cuda.cccl.headers",      gpu: true,  needs: 'build_py_wheel', force_producer_ctk: "pybuild", invoke: { prefix: 'test_cuda_cccl_headers'} }
  test_py_coop:     { name: "Test cuda.coop",  gpu: true,  needs: 'build_py_wheel', force_producer_ctk: "pybuild", invoke: { prefix: 'test_cuda_coop'} }
  test_py_par:      { name: "Test cuda.compute",     gpu: true,  needs: 'build_py_wheel', force_producer_ctk: "pybuild", invoke: { prefix: 'test_cuda_compute'} }
  test_py_examples: { name: "Test cuda.cccl.examples",     gpu: true,  needs: 'build_py_wheel', force_producer_ctk: "pybuild", invoke: { prefix: 'test_cuda_cccl_examples'} }

  # Run jobs for 'target' project (ci/util/build_and_test_targets.sh):
  run_cpu: { gpu: false }
  run_gpu: { gpu: true  }

  # Only used for generating devcontainers. No scripts actually exist for these:
  dc:     { gpu: false }
  dc_ext: { gpu: false, cuda_ext: true }

# Projects have the following properties:
#
# Keys are project subdirectories names. These will also be used in script names.
#
# - stds: A list of C++ standards to test. Required.
# - name: The human-readable name of the project. Default is the project key.
# - job_map: Map general jobs to arrays of project-specific jobs.
#            Useful for things like splitting cpu/gpu testing for a project.
#            E.g. "job_map: { test: ['test_cpu', 'test_gpu'] }" replaces
#            the "test" job with distinct "test_cpu" and "test_gpu" jobs.
projects:
  packaging:
    name: 'CCCL Packaging'
    stds: [17, 20]
    job_map:
      build: []
      test: ['test_nobuild']
  libcudacxx:
    name: 'libcu++'
    stds: [17, 20]
  cub:
    name: 'CUB'
    stds: [17, 20]
    job_map:
      build: ['build_nolid', 'build_lid0', 'build_lid1', 'build_lid2']
      test:  ['test_nolid',  'test_lid0',  'test_lid1',  'test_lid2']
      compute_sanitizer:
        - compute_mem_nolid
        - compute_mem_lid0
        - compute_race_nolid
        - compute_race_lid0
        - compute_init_nolid
        - compute_init_lid0
        - compute_sync_nolid
        - compute_sync_lid0
  thrust:
    name: 'Thrust'
    stds: [17, 20]
    job_map: { test: ['test_cpu', 'test_gpu'] }
  cudax:
    stds: [17, 20]
  stdpar:
    name: 'NVHPC stdpar'
    stds: [17, 20]
  python:
    name: "Python"
    job_map:
      build: ['build_py_wheel']
      test:  ['test_py_headers', 'test_py_coop', 'test_py_par', 'test_py_examples']
  cccl_c_parallel:
    name: 'CCCL C Parallel'
    stds: [20]
  cccl_c_stf:
    name: 'CCCL C CUDASTF'
    stds: [20]
  nvbench_helper:
    name: 'NVBench Helper'
    stds: [17] # Only builds on oldest arch for max compat.
    job_map:
      build: []
      test: ['test_nobuild']

  # Run specific build_and_test_targets.sh invocations across the CI matrix.
  # Use the override workflow and supply arguments via the `args` tag.
  # Example:
  #   override:
  #     - { jobs: ['run'], project: 'target', ctk: ['12.X', '13.X'], cxx: 'gcc', gpu: 'rtx2080',
  #         args: '--preset cub-cpp20 --build-targets "cub.cpp20.test.iterator" --ctest-targets "cub.cpp20.test.iterator"' }
  target:
    name: 'Target'
    stds: [17, 20]
  bisect:
    name: 'Bisect'
    stds: [17, 20]

# testing -> Runner with GPU is in a nv-gh-runners testing pool
gpus:
  t4:         { sm: 75  } # 16 GB,  10 runners
  rtx2080:    { sm: 75  } #  8 GB,  12 runners
  rtxa6000:   { sm: 86  } # 48 GB,  12 runners
  l4:         { sm: 89  } # 24 GB,  48 runners
  rtx4090:    { sm: 89  } # 24 GB,  10 runners
  h100:       { sm: 90  } # 80 GB,  16 runners
  # Very small number of runners on loan from cuda-python while we wait for our order to arrive.
  # Limit jobs on these:
  rtxpro6000: { sm: 120 }

# Tags are used to define a `matrix job` in the workflow section.
#
# Tags have the following options:
#  - required: Whether the tag is required. Default is false.
#  - default: The default value for the tag. Default is null.
tags:
   # An array of jobs (e.g. 'build', 'test', 'nvrtc', 'infra', 'verify_codegen', ...)
   # See the `jobs` map.
  jobs: { required: true }
  # CUDA ToolKit version
  # See the `ctks` map.
  ctk: { default: '13.X' }
  # CPU architecture
  cpu: { default: 'amd64' }
  # GPU model
  gpu: { default: 'rtx2080' }
  # Host compiler {name, version, exe}
  # See the `host_compilers` map.
  cxx: { default: 'gcc' }
  # Device compiler.
  # See the `device_compilers` map.
  cudacxx: { default: 'nvcc' }
  # Project name (e.g. libcudacxx, cub, thrust, cccl)
  # See the `projects` map.
  project: { default: ['libcudacxx', 'cub', 'thrust'] }
  # Python version for Python builds/tests
  py_version: { required: false }
  # C++ standard
  # If set to 'all', all stds supported by the ctk/compilers/project are used.
  # If set to 'min', 'max', or 'minmax', the minimum, maximum, or both stds are used.
  # If set, will be passed to script with `-std <std>`.
  std: { required: false }
  # GPU architecture
  # - If set, passed to script with `-arch <sm>`.
  # - Format is the same as `CMAKE_CUDA_ARCHITECTURES`:
  #   - PTX only: 70-virtual
  #   - SASS only: 70-real
  #   - Both: 70
  # - Can pass multiple architectures via "60;70-real;80-virtual"
  # - Defaults to use the settings in the CMakePresets.json file.
  # - Will be exploded if an array, e.g. `sm: ['60;70;80;90', '90a']` creates two jobs.
  # - Set to 'gpu' to only target the GPU in the `gpu` tag.
  sm: { required: false }
  # Additional CMake options to pass to the build.
  # If set, passed to script with `-cmake_options "<cmake_options>"`.
  cmake_options: { required: false }
  # Additional arguments appended to the generated command.
  # Typically used with the `target` project to forward options to
  # ci/util/build_and_test_targets.sh, but works with all CI jobs.
  args: { required: false, default: "" }