Skip to content

Commit ab2a2bb

Browse files
committed
ggml : extend support for n_seq for soft_max and fattn
ggml-ci
1 parent 8c68219 commit ab2a2bb

File tree

1 file changed

+8
-6
lines changed

1 file changed

+8
-6
lines changed

ggml/src/ggml-cpu/ops.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4851,9 +4851,7 @@ static void ggml_compute_forward_soft_max_f32(
48514851
const int ith = params->ith;
48524852
const int nth = params->nth;
48534853

4854-
GGML_TENSOR_UNARY_OP_LOCALS
4855-
4856-
//const int64_t ne11 = src1 ? src1->ne[1] : 1;
4854+
GGML_TENSOR_BINARY_OP_LOCALS
48574855

48584856
// TODO: is this supposed to be ceil instead of floor?
48594857
// https://huggingface.co/mosaicml/mpt-7b/blob/main/attention.py#L370
@@ -4878,6 +4876,10 @@ static void ggml_compute_forward_soft_max_f32(
48784876
const bool use_f16 = (src1 && src1->type == GGML_TYPE_F16);
48794877

48804878
for (int i1 = ir0; i1 < ir1; i1++) {
4879+
const int64_t i11 = (i1%ne01);
4880+
//const int64_t i12 = (i1/ne01)%ne02;
4881+
const int64_t i13 = (i1/ne01)/ne02;
4882+
48814883
// ALiBi
48824884
const uint32_t h = (i1/ne01)%ne02; // head
48834885
const float slope = (max_bias > 0.0f) ? h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1) : 1.0f;
@@ -4886,8 +4888,8 @@ static void ggml_compute_forward_soft_max_f32(
48864888
float * dp = (float *)((char *) dst->data + i1*dst->nb[1]);
48874889

48884890
// broadcast the mask across rows
4889-
ggml_fp16_t * mp_f16 = src1 ? (ggml_fp16_t *)((char *) src1->data) + (i1%ne01)*ne00 : NULL;
4890-
float * mp_f32 = src1 ? (float *)((char *) src1->data) + (i1%ne01)*ne00 : NULL;
4891+
ggml_fp16_t * mp_f16 = src1 ? (ggml_fp16_t *)((char *) src1->data + i11*nb11 + i13*nb12) : NULL;
4892+
float * mp_f32 = src1 ? (float *)((char *) src1->data + i11*nb11 + i13*nb12) : NULL;
48914893

48924894
ggml_vec_cpy_f32 (nc, wp, sp);
48934895
ggml_vec_scale_f32(nc, wp, scale);
@@ -7227,7 +7229,7 @@ static void ggml_compute_forward_flash_attn_ext_f16(
72277229
memset(VKQ32, 0, DV*sizeof(float));
72287230
}
72297231

7230-
const ggml_fp16_t * mp = mask ? (ggml_fp16_t *)((char *) mask->data + iq1*mask->nb[1]) : NULL;
7232+
const ggml_fp16_t * mp = mask ? (ggml_fp16_t *)((char *) mask->data + iq1*mask->nb[1] + iq3*mask->nb[2]) : NULL;
72317233

72327234
// k indices
72337235
const int ik3 = iq3 / rk3;

0 commit comments

Comments
 (0)