@@ -91,10 +91,13 @@ gf_4vect_dot_prod_rvv:
9191 sd s2, 16 (sp)
9292 sd s3, 24 (sp)
9393
94- vsetvli t0, x0, e8, m1 /* Set vector length to maximum */
95-
94+ # vsetvli t0, x0, e8, m1 /* Set vector length to maximum */
95+ vsetvli t0, x0, e8, m1
9696 li x_pos, 0
97- slli t_offset, x_vec, 5
97+
98+ slli x_vec, x_vec, 3
99+ slli t_offset, x_vec, 2
100+
98101 ld x_dest1, 0 (x_dest)
99102 ld x_dest2, 8 (x_dest)
100103 ld x_dest3, 16 (x_dest)
@@ -111,19 +114,20 @@ gf_4vect_dot_prod_rvv:
111114 vmv.v.i v_dest3, 0
112115 vmv.v.i v_dest4, 0
113116
117+ /* x_vec, number of source vectors (ie. data blocks) */
118+ li x_vec_i, 0
119+
120+ /* load source pointer */
121+ ld x_ptr, 0 (x_src)
122+
114123 /* Reset table pointers */
115124 mv x_tbl1, x_tbl
116125 add x_tbl2, x_tbl1, t_offset
117126 add x_tbl3, x_tbl2, t_offset
118127 add x_tbl4, x_tbl3, t_offset
119128
120- /* Loop 2: x_vec, number of source vectors (ie. data blocks) */
121- li x_vec_i, 0
122129.Lloop_rvv_vl_vects:
123130 /* Load source data */
124- slli a6, x_vec_i, 3
125- add a6,x_src,a6
126- ld x_ptr, 0 (a6)
127131 add x_ptr,x_ptr,x_pos
128132
129133 vle8.v v_src, (x_ptr)
@@ -142,6 +146,10 @@ gf_4vect_dot_prod_rvv:
142146 vle8.v v_gft2_hi, (x_tbl2)
143147 addi x_tbl2, x_tbl2, 16
144148
149+ /* Move to next source vector */
150+ addi x_vec_i, x_vec_i, 8
151+ add a6, x_src, x_vec_i
152+ ld x_ptr, 0 (a6)
145153
146154 /* Load next gf_table's */
147155 vle8.v v_gft3_lo, (x_tbl3)
@@ -178,9 +186,6 @@ gf_4vect_dot_prod_rvv:
178186 vxor.vv v_dest4, v_dest4, v26
179187 vxor.vv v_dest4, v_dest4, v27
180188
181- /* Move to next source vector */
182- addi x_vec_i, x_vec_i, 1
183-
184189 /* Check if we have processed all vectors */
185190 blt x_vec_i, x_vec, .Lloop_rvv_vl_vects
186191
@@ -198,7 +203,7 @@ gf_4vect_dot_prod_rvv:
198203 j .Lloop_rvv_vl
199204
200205.return_pass:
201- /* restore callee-saved registers */
206+ /* restore callee-saved registers */
202207 ld s0, 0 (sp)
203208 ld s1, 8 (sp)
204209 ld s2, 16 (sp)
0 commit comments