@@ -758,7 +758,6 @@ size_t ZSTD_HcFindBestMatch(
758758* (SIMD) Row-based matchfinder
759759***********************************/
760760/* Constants for row-based hash */
761- #define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
762761#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
763762#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */
764763
@@ -801,12 +800,13 @@ U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
801800
802801/* ZSTD_row_nextIndex():
803802 * Returns the next index to insert at within a tagTable row, and updates the "head"
804- * value to reflect the update. Essentially cycles backwards from [0 , {entries per row})
803+ * value to reflect the update. Essentially cycles backwards from [1 , {entries per row})
805804 */
806805FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex (BYTE * const tagRow , U32 const rowMask ) {
807- U32 const next = (* tagRow - 1 ) & rowMask ;
808- * tagRow = (BYTE )next ;
809- return next ;
806+ U32 next = (* tagRow - 1 ) & rowMask ;
807+ next += (next == 0 ) ? rowMask : 0 ; /* skip first position */
808+ * tagRow = (BYTE )next ;
809+ return next ;
810810}
811811
812812/* ZSTD_isAligned():
@@ -820,7 +820,7 @@ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
820820/* ZSTD_row_prefetch():
821821 * Performs prefetching for the hashTable and tagTable at a given row.
822822 */
823- FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch (U32 const * hashTable , U16 const * tagTable , U32 const relRow , U32 const rowLog ) {
823+ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch (U32 const * hashTable , BYTE const * tagTable , U32 const relRow , U32 const rowLog ) {
824824 PREFETCH_L1 (hashTable + relRow );
825825 if (rowLog >= 5 ) {
826826 PREFETCH_L1 (hashTable + relRow + 16 );
@@ -844,7 +844,7 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
844844 U32 idx , const BYTE * const iLimit )
845845{
846846 U32 const * const hashTable = ms -> hashTable ;
847- U16 const * const tagTable = ms -> tagTable ;
847+ BYTE const * const tagTable = ms -> tagTable ;
848848 U32 const hashLog = ms -> rowHashLog ;
849849 U32 const maxElemsToPrefetch = (base + idx ) > iLimit ? 0 : (U32 )(iLimit - (base + idx ) + 1 );
850850 U32 const lim = idx + MIN (ZSTD_ROW_HASH_CACHE_SIZE , maxElemsToPrefetch );
@@ -866,7 +866,7 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
866866 * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
867867 */
868868FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash (U32 * cache , U32 const * hashTable ,
869- U16 const * tagTable , BYTE const * base ,
869+ BYTE const * tagTable , BYTE const * base ,
870870 U32 idx , U32 const hashLog ,
871871 U32 const rowLog , U32 const mls )
872872{
@@ -888,7 +888,7 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
888888 U32 const rowMask , U32 const useCache )
889889{
890890 U32 * const hashTable = ms -> hashTable ;
891- U16 * const tagTable = ms -> tagTable ;
891+ BYTE * const tagTable = ms -> tagTable ;
892892 U32 const hashLog = ms -> rowHashLog ;
893893 const BYTE * const base = ms -> window .base ;
894894
@@ -898,12 +898,11 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
898898 : (U32 )ZSTD_hashPtr (base + updateStartIdx , hashLog + ZSTD_ROW_HASH_TAG_BITS , mls );
899899 U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS ) << rowLog ;
900900 U32 * const row = hashTable + relRow ;
901- BYTE * tagRow = (BYTE * )(tagTable + relRow ); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
902- Explicit cast allows us to get exact desired position within each row */
901+ BYTE * tagRow = tagTable + relRow ;
903902 U32 const pos = ZSTD_row_nextIndex (tagRow , rowMask );
904903
905904 assert (hash == ZSTD_hashPtr (base + updateStartIdx , hashLog + ZSTD_ROW_HASH_TAG_BITS , mls ));
906- (( BYTE * ) tagRow ) [pos + ZSTD_ROW_HASH_TAG_OFFSET ] = hash & ZSTD_ROW_HASH_TAG_MASK ;
905+ tagRow [pos ] = hash & ZSTD_ROW_HASH_TAG_MASK ;
907906 row [pos ] = updateStartIdx ;
908907 }
909908}
@@ -1059,7 +1058,7 @@ ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag
10591058FORCE_INLINE_TEMPLATE ZSTD_VecMask
10601059ZSTD_row_getMatchMask (const BYTE * const tagRow , const BYTE tag , const U32 headGrouped , const U32 rowEntries )
10611060{
1062- const BYTE * const src = tagRow + ZSTD_ROW_HASH_TAG_OFFSET ;
1061+ const BYTE * const src = tagRow ;
10631062 assert ((rowEntries == 16 ) || (rowEntries == 32 ) || rowEntries == 64 );
10641063 assert (rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES );
10651064 assert (ZSTD_row_matchMaskGroupWidth (rowEntries ) * rowEntries <= sizeof (ZSTD_VecMask ) * 8 );
@@ -1144,7 +1143,7 @@ size_t ZSTD_RowFindBestMatch(
11441143 const U32 rowLog )
11451144{
11461145 U32 * const hashTable = ms -> hashTable ;
1147- U16 * const tagTable = ms -> tagTable ;
1146+ BYTE * const tagTable = ms -> tagTable ;
11481147 U32 * const hashCache = ms -> hashCache ;
11491148 const U32 hashLog = ms -> rowHashLog ;
11501149 const ZSTD_compressionParameters * const cParams = & ms -> cParams ;
@@ -1188,7 +1187,7 @@ size_t ZSTD_RowFindBestMatch(
11881187 if (dictMode == ZSTD_dictMatchState ) {
11891188 /* Prefetch DMS rows */
11901189 U32 * const dmsHashTable = dms -> hashTable ;
1191- U16 * const dmsTagTable = dms -> tagTable ;
1190+ BYTE * const dmsTagTable = dms -> tagTable ;
11921191 U32 const dmsHash = (U32 )ZSTD_hashPtr (ip , dms -> rowHashLog + ZSTD_ROW_HASH_TAG_BITS , mls );
11931192 U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS ) << rowLog ;
11941193 dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK ;
@@ -1230,7 +1229,7 @@ size_t ZSTD_RowFindBestMatch(
12301229 in ZSTD_row_update_internal() at the next search. */
12311230 {
12321231 U32 const pos = ZSTD_row_nextIndex (tagRow , rowMask );
1233- tagRow [pos + ZSTD_ROW_HASH_TAG_OFFSET ] = (BYTE )tag ;
1232+ tagRow [pos ] = (BYTE )tag ;
12341233 row [pos ] = ms -> nextToUpdate ++ ;
12351234 }
12361235
0 commit comments