3030#include "../thread.h"
3131#include "../types.h"
3232
33- extern ALIGN64 int16_t in_weights [INSIZE * KPSIZE ];
34- extern ALIGN64 int16_t in_biases [KPSIZE ];
35-
3633
3734static int sq64_to_sq32 (int sq ) {
3835 static const int Mirror [] = { 3 , 2 , 1 , 0 , 0 , 1 , 2 , 3 };
3936 return ((sq >> 1 ) & ~0x3 ) + Mirror [sq & 0x7 ];
4037}
4138
42- static int nnue_index_delta (int piece , int relksq , int colour , int sq ) {
39+ static int nnue_index (int piece , int relksq , int colour , int sq ) {
4340
4441 const int ptype = pieceType (piece );
4542 const int pcolour = pieceColour (piece );
@@ -51,15 +48,10 @@ static int nnue_index_delta(int piece, int relksq, int colour, int sq) {
5148 return 640 * sq64_to_sq32 (mksq ) + (64 * (5 * (colour == pcolour ) + ptype )) + mpsq ;
5249}
5350
54- static int nnue_index (Board * board , int relksq , int colour , int sq ) {
55- return nnue_index_delta (board -> squares [sq ], relksq , colour , sq );
56- }
57-
58-
5951int nnue_can_update (NNUEAccumulator * accum , Board * board , int colour ) {
6052
6153 // Search back through the tree to find an accurate accum
62- while (accum != board -> thread -> nnueStack ) {
54+ while (accum != board -> thread -> nnue -> stack ) {
6355
6456 // A King move prevents the entire tree from being updated
6557 if ( accum -> changes
@@ -77,51 +69,6 @@ int nnue_can_update(NNUEAccumulator *accum, Board *board, int colour) {
7769 return FALSE;
7870}
7971
80- void nnue_refresh_accumulator (NNUEAccumulator * accum , Board * board , int colour , int relsq ) {
81-
82- const uint64_t white = board -> colours [WHITE ];
83- const uint64_t black = board -> colours [BLACK ];
84- const uint64_t kings = board -> pieces [KING ];
85-
86- int indices [32 ], count = 0 ;
87- uint64_t pieces = (white | black ) & ~kings ;
88- vepi16 * biases , * outputs , * weights , registers [NUM_REGS ];
89-
90- // Compute the list of indices just once, to then be used multiple
91- // times while updating the accumulator using a tiling method
92-
93- while (pieces ) {
94- const int sq = poplsb (& pieces );
95- indices [count ++ ] = nnue_index (board , relsq , colour , sq );
96- }
97-
98- // Refresh completely, using all pieces as inputs except the Kings
99- // We do this by tiling over the accumulator, to get the compiler to
100- // produce more optimal code that does not emit extra move instructions
101-
102- for (int offset = 0 ; offset < KPSIZE ; offset += NUM_REGS * vepi16_cnt ) {
103-
104- biases = (vepi16 * ) & in_biases [offset ];
105- outputs = (vepi16 * ) & accum -> values [colour ][offset ];
106-
107- for (int i = 0 ; i < NUM_REGS ; i ++ )
108- registers [i ] = biases [i ];
109-
110- for (int i = 0 ; i < count ; i ++ ) {
111-
112- weights = (vepi16 * ) & in_weights [indices [i ] * KPSIZE + offset ];
113-
114- for (int j = 0 ; j < NUM_REGS ; j ++ )
115- registers [j ] = vepi16_add (registers [j ], weights [j ]);
116- }
117-
118- for (int i = 0 ; i < NUM_REGS ; i ++ )
119- outputs [i ] = registers [i ];
120- }
121-
122- accum -> accurate [colour ] = TRUE;
123- }
124-
12572void nnue_update_accumulator (NNUEAccumulator * accum , Board * board , int colour , int relksq ) {
12673
12774 int add = 0 , remove = 0 ;
@@ -141,11 +88,11 @@ void nnue_update_accumulator(NNUEAccumulator *accum, Board *board, int colour, i
14188
14289 // Moving or placing a Piece to a Square
14390 if (x -> to != SQUARE_NB )
144- add_list [add ++ ] = nnue_index_delta (x -> piece , relksq , colour , x -> to );
91+ add_list [add ++ ] = nnue_index (x -> piece , relksq , colour , x -> to );
14592
14693 // Moving or deleting a Piece from a Square
14794 if (x -> from != SQUARE_NB )
148- remove_list [remove ++ ] = nnue_index_delta (x -> piece , relksq , colour , x -> from );
95+ remove_list [remove ++ ] = nnue_index (x -> piece , relksq , colour , x -> from );
14996 }
15097
15198 for (int offset = 0 ; offset < KPSIZE ; offset += NUM_REGS * vepi16_cnt ) {
@@ -179,3 +126,61 @@ void nnue_update_accumulator(NNUEAccumulator *accum, Board *board, int colour, i
179126 accum -> accurate [colour ] = TRUE;
180127 return ;
181128}
129+
130+ void nnue_refresh_accumulator (NNUEEvaluator * nnue , NNUEAccumulator * accum , Board * board , int colour , int relsq ) {
131+
132+ vepi16 * outputs , * weights , registers [NUM_REGS ];
133+ const int ksq = getlsb (board -> pieces [KING ] & board -> colours [colour ]);
134+ NNUEAccumulatorTableEntry * entry = & nnue -> table [ksq ];
135+
136+ int set_indexes [32 ], set_count = 0 ;
137+ int unset_indexes [32 ], unset_count = 0 ;
138+
139+ for (int c = WHITE ; c <= BLACK ; c ++ ) {
140+
141+ for (int pt = PAWN ; pt <= QUEEN ; pt ++ ) {
142+
143+ uint64_t pieces = board -> pieces [pt ] & board -> colours [c ];
144+ uint64_t to_set = pieces & ~entry -> occupancy [colour ][c ][pt ];
145+ uint64_t to_unset = entry -> occupancy [colour ][c ][pt ] & ~pieces ;
146+
147+ while (to_set )
148+ set_indexes [set_count ++ ] = nnue_index (makePiece (pt , c ), relsq , colour , poplsb (& to_set ));
149+
150+ while (to_unset )
151+ unset_indexes [unset_count ++ ] = nnue_index (makePiece (pt , c ), relsq , colour , poplsb (& to_unset ));
152+
153+ entry -> occupancy [colour ][c ][pt ] = pieces ;
154+ }
155+ }
156+
157+ for (int offset = 0 ; offset < KPSIZE ; offset += NUM_REGS * vepi16_cnt ) {
158+
159+ outputs = (vepi16 * ) & entry -> accumulator .values [colour ][offset ];
160+
161+ for (int i = 0 ; i < NUM_REGS ; i ++ )
162+ registers [i ] = outputs [i ];
163+
164+ for (int i = 0 ; i < set_count ; i ++ ) {
165+
166+ weights = (vepi16 * ) & in_weights [set_indexes [i ] * KPSIZE + offset ];
167+
168+ for (int j = 0 ; j < NUM_REGS ; j ++ )
169+ registers [j ] = vepi16_add (registers [j ], weights [j ]);
170+ }
171+
172+ for (int i = 0 ; i < unset_count ; i ++ ) {
173+
174+ weights = (vepi16 * ) & in_weights [unset_indexes [i ] * KPSIZE + offset ];
175+
176+ for (int j = 0 ; j < NUM_REGS ; j ++ )
177+ registers [j ] = vepi16_sub (registers [j ], weights [j ]);
178+ }
179+
180+ for (int i = 0 ; i < NUM_REGS ; i ++ )
181+ outputs [i ] = registers [i ];
182+ }
183+
184+ memcpy (accum -> values [colour ], entry -> accumulator .values [colour ], sizeof (int16_t ) * KPSIZE );
185+ accum -> accurate [colour ] = TRUE;
186+ }
0 commit comments