diff --git a/src/kernels/rotary_embedding_kernels.cpp b/src/kernels/rotary_embedding_kernels.cpp index 4b8b817e..52142902 100644 --- a/src/kernels/rotary_embedding_kernels.cpp +++ b/src/kernels/rotary_embedding_kernels.cpp @@ -241,7 +241,7 @@ static inline void chatglm2ApplyRotaryPosEmbeding(T *query, T *key, int qStride, for (int head = 0; head < head_num; ++head) { for (int bs = 0; bs < batch_size; ++bs) { for (int seq = 0; seq < seq_len; ++seq) { - T *pF = query + seq * qStride + head * dim; + T *pF = query + bs * seq_len * qStride + seq * qStride + head * dim; int pos = position_ids[seq]; float *pcos = emb_cos + pos * dim; diff --git a/src/utils/shm_reduction.cpp b/src/utils/shm_reduction.cpp index f1097530..8fc15c35 100644 --- a/src/utils/shm_reduction.cpp +++ b/src/utils/shm_reduction.cpp @@ -50,8 +50,9 @@ void ShmReduction::ShmResize(int rank, size_t size) { // shm_unlink(shmCtx_.name); // alloc and map new shm - total_size = total_size - shmCtx_.nbytes + size; shmCtx_.nbytes = size; + shmCtx_.nblocks = (size + SHM_BLOCK_SIZE - 1) / SHM_BLOCK_SIZE; + total_size = sizeof(int) * shmCtx_.nstates + shmCtx_.nbytes + shmCtx_.nblocks * shmCtx_.nstates; // Truncate the shared memory to the desired size if (rank == 0 && ftruncate(shmCtx_.fp, total_size) == -1) { perror("shm ftruncate failed.");