@@ -126,6 +126,7 @@ def compile_mmh(src):
126126
127127def clipped_linear (
128128 X ,
129+ * ,
129130 inplace = False ,
130131 slope = 1.0 ,
131132 offset = 0.0 ,
@@ -154,7 +155,7 @@ def clipped_linear(
154155 return out
155156
156157
157- def gelu (X , inplace = False , threshold = 6.0 , threads_per_block = 128 , num_blocks = 128 ):
158+ def gelu (X , * , inplace = False , threshold = 6.0 , threads_per_block = 128 , num_blocks = 128 ):
158159 _is_float_array (X )
159160
160161 out = X
@@ -179,32 +180,32 @@ def check_seq2col_lengths(lengths, B):
179180 return lengths
180181
181182
182- def seq2col (X , nW , * , lengths = None , threads_per_block = 128 , num_blocks = 128 ):
183- _is_float_array (X )
183+ def seq2col (seq , nW , * , lengths = None , threads_per_block = 128 , num_blocks = 128 ):
184+ _is_float_array (seq )
184185
185- B = X .shape [0 ]
186+ B = seq .shape [0 ]
186187 nF = nW * 2 + 1
187- I = X .shape [1 ]
188+ I = seq .shape [1 ]
188189
189190 lengths = check_seq2col_lengths (lengths , B )
190191 nL = lengths .shape [0 ]
191192
192- out = cupy .zeros ((B , I * nF ), dtype = X .dtype )
193+ out = cupy .zeros ((B , I * nF ), dtype = seq .dtype )
193194
194- if X .size != 0 and lengths .size != 0 :
195- if X .dtype == "float32" :
195+ if seq .size != 0 and lengths .size != 0 :
196+ if seq .dtype == "float32" :
196197 seq2col_kernel_float (
197- (num_blocks ,), (threads_per_block ,), (out , X , lengths , nW , B , I , nL )
198+ (num_blocks ,), (threads_per_block ,), (out , seq , lengths , nW , B , I , nL )
198199 )
199200 else :
200201 seq2col_kernel_double (
201- (num_blocks ,), (threads_per_block ,), (out , X , lengths , nW , B , I , nL )
202+ (num_blocks ,), (threads_per_block ,), (out , seq , lengths , nW , B , I , nL )
202203 )
203204
204205 return out
205206
206207
207- def maxout (X , threads_per_block = 128 , num_blocks = 128 ):
208+ def maxout (X , * , threads_per_block = 128 , num_blocks = 128 ):
208209 _is_float_array (X )
209210
210211 B , I , P = X .shape
@@ -225,7 +226,7 @@ def maxout(X, threads_per_block=128, num_blocks=128):
225226 return best , which
226227
227228
228- def mish (X , inplace = False , threshold = 5 , threads_per_block = 128 , num_blocks = 128 ):
229+ def mish (X , * , inplace = False , threshold = 5 , threads_per_block = 128 , num_blocks = 128 ):
229230 _is_float_array (X )
230231
231232 out = X
@@ -244,7 +245,7 @@ def mish(X, inplace=False, threshold=5, threads_per_block=128, num_blocks=128):
244245 return out
245246
246247
247- def reduce_sum (X , lengths , threads_per_block = 128 , num_blocks = 128 ):
248+ def reduce_sum (X , lengths , * , threads_per_block = 128 , num_blocks = 128 ):
248249 _is_float_array (X )
249250
250251 B = len (lengths )
@@ -267,7 +268,7 @@ def reduce_sum(X, lengths, threads_per_block=128, num_blocks=128):
267268 return out
268269
269270
270- def reduce_mean (X , lengths , threads_per_block = 128 , num_blocks = 128 ):
271+ def reduce_mean (X , lengths , * , threads_per_block = 128 , num_blocks = 128 ):
271272 _is_float_array (X )
272273
273274 B = len (lengths )
@@ -292,7 +293,7 @@ def reduce_mean(X, lengths, threads_per_block=128, num_blocks=128):
292293 return out
293294
294295
295- def reduce_max (X , lengths , threads_per_block = 128 , num_blocks = 128 ):
296+ def reduce_max (X , lengths , * , threads_per_block = 128 , num_blocks = 128 ):
296297 _is_float_array (X )
297298
298299 B = len (lengths )
@@ -317,7 +318,7 @@ def reduce_max(X, lengths, threads_per_block=128, num_blocks=128):
317318 return maxes , which
318319
319320
320- def swish (X , inplace = False , threshold = 17.0 , threads_per_block = 128 , num_blocks = 128 ):
321+ def swish (X , * , inplace = False , threshold = 17.0 , threads_per_block = 128 , num_blocks = 128 ):
321322 _is_float_array (X )
322323
323324 out = X
@@ -362,6 +363,7 @@ def backprop_seq2col(dY, nW, *, lengths=None, threads_per_block=128, num_blocks=
362363def backprop_clipped_linear (
363364 dY ,
364365 X ,
366+ * ,
365367 slope : float = 1.0 ,
366368 offset : float = 0.0 ,
367369 min_val : float = 0.0 ,
@@ -394,7 +396,7 @@ def backprop_clipped_linear(
394396
395397
396398def backprop_hard_swish (
397- dY , X , inplace : bool = False , threads_per_block = 128 , num_blocks = 128
399+ dY , X , * , inplace : bool = False , threads_per_block = 128 , num_blocks = 128
398400):
399401 _is_float_array (dY )
400402 _is_float_array (X , shape = dY .shape )
@@ -416,7 +418,7 @@ def backprop_hard_swish(
416418
417419
418420def backprop_hard_swish_mobilenet (
419- dY , X , inplace : bool = False , threads_per_block = 128 , num_blocks = 128
421+ dY , X , * , inplace : bool = False , threads_per_block = 128 , num_blocks = 128
420422):
421423 _is_float_array (dY )
422424 _is_float_array (X , shape = dY .shape )
@@ -438,7 +440,13 @@ def backprop_hard_swish_mobilenet(
438440
439441
440442def backprop_gelu (
441- dY , X , inplace : bool = False , threshold = 6.0 , threads_per_block = 128 , num_blocks = 128
443+ dY ,
444+ X ,
445+ * ,
446+ inplace : bool = False ,
447+ threshold = 6.0 ,
448+ threads_per_block = 128 ,
449+ num_blocks = 128 ,
442450):
443451 _is_float_array (dY )
444452 _is_float_array (X , shape = dY .shape )
@@ -459,7 +467,7 @@ def backprop_gelu(
459467 return out
460468
461469
462- def backprop_maxout (dY , which , P , threads_per_block = 128 , num_blocks = 128 ):
470+ def backprop_maxout (dY , which , P , * , threads_per_block = 128 , num_blocks = 128 ):
463471 _is_float_array (dY )
464472
465473 B = dY .shape [0 ]
@@ -482,7 +490,7 @@ def backprop_maxout(dY, which, P, threads_per_block=128, num_blocks=128):
482490
483491
484492def backprop_mish (
485- dY , X , inplace : bool = False , threshold = 5 , threads_per_block = 128 , num_blocks = 128
493+ dY , X , * , inplace : bool = False , threshold = 5 , threads_per_block = 128 , num_blocks = 128
486494):
487495 _is_float_array (dY )
488496 _is_float_array (X , shape = dY .shape )
@@ -503,51 +511,53 @@ def backprop_mish(
503511 return out
504512
505513
506- def backprop_reduce_sum (d_sum , lengths , threads_per_block = 128 , num_blocks = 128 ):
507- _is_float_array (d_sum )
514+ def backprop_reduce_sum (d_sums , lengths , * , threads_per_block = 128 , num_blocks = 128 ):
515+ _is_float_array (d_sums )
508516
509517 B = len (lengths )
510518 T = int (lengths .sum ())
511- O = d_sum .shape [1 ]
519+ O = d_sums .shape [1 ]
512520 _check_lengths (lengths , T )
513521
514- out = cupy .zeros ((T , O ), dtype = d_sum .dtype )
522+ out = cupy .zeros ((T , O ), dtype = d_sums .dtype )
515523
516- if d_sum .dtype == "float32" :
524+ if d_sums .dtype == "float32" :
517525 backprop_reduce_sum_kernel_float (
518- (num_blocks ,), (threads_per_block ,), (out , d_sum , lengths , B , T , O )
526+ (num_blocks ,), (threads_per_block ,), (out , d_sums , lengths , B , T , O )
519527 )
520528 else :
521529 backprop_reduce_sum_kernel_double (
522- (num_blocks ,), (threads_per_block ,), (out , d_sum , lengths , B , T , O )
530+ (num_blocks ,), (threads_per_block ,), (out , d_sums , lengths , B , T , O )
523531 )
524532
525533 return out
526534
527535
528- def backprop_reduce_mean (d_mean , lengths , threads_per_block = 128 , num_blocks = 128 ):
529- _is_float_array (d_mean )
536+ def backprop_reduce_mean (d_means , lengths , * , threads_per_block = 128 , num_blocks = 128 ):
537+ _is_float_array (d_means )
530538
531539 B = len (lengths )
532540 T = int (lengths .sum ())
533- O = d_mean .shape [1 ]
541+ O = d_means .shape [1 ]
534542 _check_lengths (lengths , T )
535543
536- out = cupy .zeros ((T , O ), dtype = d_mean .dtype )
544+ out = cupy .zeros ((T , O ), dtype = d_means .dtype )
537545
538- if d_mean .dtype == "float32" :
546+ if d_means .dtype == "float32" :
539547 backprop_reduce_mean_kernel_float (
540- (num_blocks ,), (threads_per_block ,), (out , d_mean , lengths , B , T , O )
548+ (num_blocks ,), (threads_per_block ,), (out , d_means , lengths , B , T , O )
541549 )
542550 else :
543551 backprop_reduce_mean_kernel_double (
544- (num_blocks ,), (threads_per_block ,), (out , d_mean , lengths , B , T , O )
552+ (num_blocks ,), (threads_per_block ,), (out , d_means , lengths , B , T , O )
545553 )
546554
547555 return out
548556
549557
550- def backprop_reduce_max (d_maxes , which , lengths , threads_per_block = 128 , num_blocks = 128 ):
558+ def backprop_reduce_max (
559+ d_maxes , which , lengths , * , threads_per_block = 128 , num_blocks = 128
560+ ):
551561 _is_float_array (d_maxes )
552562
553563 B = len (lengths )
@@ -572,7 +582,7 @@ def backprop_reduce_max(d_maxes, which, lengths, threads_per_block=128, num_bloc
572582
573583
574584def backprop_swish (
575- dY , X , Y , inplace = False , threshold = 17.0 , threads_per_block = 128 , num_blocks = 128
585+ dY , X , Y , * , inplace = False , threshold = 17.0 , threads_per_block = 128 , num_blocks = 128
576586):
577587 _is_float_array (dY )
578588 _is_float_array (X , shape = dY .shape )
@@ -594,7 +604,7 @@ def backprop_swish(
594604 return out
595605
596606
597- def hash (ids , seed , threads_per_block = 128 , num_blocks = 128 ):
607+ def hash (ids , seed , * , threads_per_block = 128 , num_blocks = 128 ):
598608 out = cupy .zeros ((ids .shape [0 ], 4 ), dtype = "uint32" )
599609
600610 # sizeof(uint32_t) * 4
0 commit comments