From: Kirill A. Korinsky Subject: Re: devel/libggml: fix for llvm22 To: ports@openbsd.org Date: Wed, 20 May 2026 13:29:48 +0200 On Wed, 20 May 2026 11:16:51 +0200, Kirill A. Korinsky wrote: > > ports@, > > here a fix for devel/libggml's compiler issue. > > I can't check it due to no access to Sapphire Rapids CPU, but it should be > safe. Can someone test it? > > Thanks. > I'd like to withdraw this diff. It is llvm-22 bug, and backport of the fix will be send to ports@ shortly > Index: Makefile > =================================================================== > RCS file: /home/cvs/ports/devel/libggml/Makefile,v > diff -u -p -r1.15 Makefile > --- Makefile 17 May 2026 20:30:51 -0000 1.15 > +++ Makefile 20 May 2026 09:10:03 -0000 > @@ -3,6 +3,7 @@ COMMENT= tensor library for machine lea > GH_ACCOUNT= ggml-org > GH_PROJECT= ggml > GH_TAGNAME = v0.12.0 > +REVISION = 0 > PKGNAME= lib${DISTNAME} > > SHARED_LIBS += ggml 3.8 > Index: patches/patch-src_ggml-cpu_amx_mmq_cpp > =================================================================== > RCS file: patches/patch-src_ggml-cpu_amx_mmq_cpp > diff -N patches/patch-src_ggml-cpu_amx_mmq_cpp > --- /dev/null 1 Jan 1970 00:00:00 -0000 > +++ patches/patch-src_ggml-cpu_amx_mmq_cpp 20 May 2026 00:22:06 -0000 > @@ -0,0 +1,61 @@ > +https://github.com/ggml-org/ggml/issues/1499 > + > +Index: src/ggml-cpu/amx/mmq.cpp > +--- src/ggml-cpu/amx/mmq.cpp.orig > ++++ src/ggml-cpu/amx/mmq.cpp > +@@ -1510,18 +1510,15 @@ struct tinygemm_kernel_vnni + const char * RESTRICT B = static_cast(_B); > + > + __m512i va[8]; > +- __m512i vb[8]; > + __m512 vc[COLS]; > + __m512 vd1; > + > + // Notes: s8s8 igemm compensation in avx512-vnni > + // change s8s8 to u8s8 with compensate > +- // a * b = (a + 128) * b - 128 * b > ++ // a * b = (b + 128) * a - 128 * a > + // s s u s u s > +- // > +- // (128 * b is pre-computed when packing B to vnni formats) > +- // > + const __m512i off = _mm512_set1_epi8(static_cast(0x80)); > ++ __m512i vcomp; > + > + auto loadc = [&](auto col) { > + vc[col] = _mm512_setzero_ps(); > +@@ -1529,29 +1526,25 @@ struct tinygemm_kernel_vnni + Unroll{}(loadc); > + > + auto compute = [&](auto col, auto i) { > +- // load a and add offset 128 > ++ // load a and compute compensation > + if constexpr (col == 0) { > + const int32_t * a_ptr = reinterpret_cast(A[0 * KB + i].qs); > ++ vcomp = _mm512_setzero_si512(); > + for (int k = 0; k < 8; ++k) { > + va[k] = _mm512_set1_epi32(a_ptr[k]); > +- va[k] = _mm512_add_epi8(va[k], off); > ++ vcomp = _mm512_dpbusd_epi32(vcomp, off, va[k]); > + } > + vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d)); > + } > + > +- // load b > + const char * b_ptr = B + PACKED_INDEX(col, i, KB, TILE_SIZE); > +- for (int k = 0; k < 8; ++k) { > +- vb[k] = _mm512_loadu_si512((const __m512i *)(b_ptr + k * 64)); > +- } > + const int offset = TILE_N * TILE_K; > + const __m512 vd0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)(b_ptr + offset))); > +- const int offset2 = TILE_N * TILE_K + TILE_N * sizeof(ggml_half); > +- const __m512i vcomp = _mm512_loadu_si512((const __m512i *)(b_ptr + offset2)); > + > + __m512i vsum = _mm512_setzero_si512(); > + for (int k = 0; k < 8; ++k) { > +- vsum = _mm512_dpbusd_epi32(vsum, va[k], vb[k]); > ++ const __m512i vb = _mm512_add_epi8(_mm512_loadu_si512((const __m512i *)(b_ptr + k * 64)), off); > ++ vsum = _mm512_dpbusd_epi32(vsum, vb, va[k]); > + } > + vsum = _mm512_sub_epi32(vsum, vcomp); > + > > > -- > wbr, Kirill > -- wbr, Kirill