From: Kirill A. Korinsky <kirill@korins.ky>
Subject: misc/llama.cpp: update to b7360
To: OpenBSD ports <ports@openbsd.org>
Cc: Volker Schlecht <volker@schlecht.dev>
Date: Sat, 13 Dec 2025 15:56:06 +0100

ports@,

I'd like to update misc/llama.cpp to b7360 and devel/libggml to synced with
that version of llama.cpp as well.

libggml includes small changes which allows to use it with
stable-diffusion.cpp.

building with audio/whisper.cpp is fixed.

make test works as usual on all ports.

llama.cpp is tested on my usual workflow, works fine.

Thoughts? Tests? Ok?

Index: devel/libggml/Makefile
===================================================================
RCS file: /home/cvs/ports/devel/libggml/Makefile,v
diff -u -p -r1.5 Makefile
--- devel/libggml/Makefile	23 Nov 2025 14:30:13 -0000	1.5
+++ devel/libggml/Makefile	13 Dec 2025 09:54:53 -0000
@@ -2,12 +2,12 @@ COMMENT=		tensor library for machine lea
 
 GH_ACCOUNT=		ggml-org
 GH_PROJECT=		ggml
-GH_COMMIT=		781baf2a14d9e0aaee542b2e1bb918bfc4132199
-DISTNAME=		ggml-0.9.4pl20251120
+GH_COMMIT=		f5425c0ee5e582a7d64411f06139870bff3e52e0
+DISTNAME=		ggml-0.9.4pl20251211
 PKGNAME=		lib${DISTNAME}
 
-SHARED_LIBS +=		ggml 3.1
-SHARED_LIBS +=		ggml-base 3.1
+SHARED_LIBS +=		ggml 3.2
+SHARED_LIBS +=		ggml-base 3.2
 
 CATEGORIES=		devel
 
Index: devel/libggml/distinfo
===================================================================
RCS file: /home/cvs/ports/devel/libggml/distinfo,v
diff -u -p -r1.3 distinfo
--- devel/libggml/distinfo	23 Nov 2025 14:30:13 -0000	1.3
+++ devel/libggml/distinfo	13 Dec 2025 09:51:29 -0000
@@ -1,2 +1,2 @@
-SHA256 (ggml-0.9.4pl20251120-781baf2a.tar.gz) = qu5WT9HUZ12982U0x4SWNbBDMTameA7DvFXccUbldis=
-SIZE (ggml-0.9.4pl20251120-781baf2a.tar.gz) = 2344462
+SHA256 (ggml-0.9.4pl20251211-f5425c0e.tar.gz) = l0I374JdNBkL1IVIYNQBgXRCSZLkc9F1rE/ZOm1Jy0g=
+SIZE (ggml-0.9.4pl20251211-f5425c0e.tar.gz) = 2407321
Index: devel/libggml/patches/patch-src_ggml-backend-reg_cpp
===================================================================
RCS file: /home/cvs/ports/devel/libggml/patches/patch-src_ggml-backend-reg_cpp,v
diff -u -p -r1.2 patch-src_ggml-backend-reg_cpp
--- devel/libggml/patches/patch-src_ggml-backend-reg_cpp	4 Nov 2025 15:05:00 -0000	1.2
+++ devel/libggml/patches/patch-src_ggml-backend-reg_cpp	13 Dec 2025 14:37:20 -0000
@@ -1,7 +1,7 @@
 Index: src/ggml-backend-reg.cpp
 --- src/ggml-backend-reg.cpp.orig
 +++ src/ggml-backend-reg.cpp
-@@ -524,7 +524,9 @@ static ggml_backend_reg_t ggml_backend_load_best(const
+@@ -531,7 +531,9 @@ static ggml_backend_reg_t ggml_backend_load_best(const
          search_paths.push_back(fs::u8path(GGML_BACKEND_DIR));
  #endif
          // default search paths: executable directory, current directory
Index: devel/libggml/pkg/PLIST
===================================================================
RCS file: /home/cvs/ports/devel/libggml/pkg/PLIST,v
diff -u -p -r1.2 PLIST
--- devel/libggml/pkg/PLIST	5 Nov 2025 08:59:44 -0000	1.2
+++ devel/libggml/pkg/PLIST	13 Dec 2025 14:07:45 -0000
@@ -13,6 +13,7 @@ include/ggml-rpc.h
 include/ggml-sycl.h
 include/ggml-vulkan.h
 include/ggml-webgpu.h
+include/ggml-zendnn.h
 include/ggml.h
 include/gguf.h
 lib/cmake/ggml/
Index: misc/llama.cpp/Makefile
===================================================================
RCS file: /home/cvs/ports/misc/llama.cpp/Makefile,v
diff -u -p -r1.13 Makefile
--- misc/llama.cpp/Makefile	23 Nov 2025 14:30:49 -0000	1.13
+++ misc/llama.cpp/Makefile	13 Dec 2025 10:09:53 -0000
@@ -2,7 +2,7 @@ COMMENT =		LLM inference system
 
 GH_ACCOUNT =		ggml-org
 GH_PROJECT =		llama.cpp
-GH_TAGNAME =		b7086
+GH_TAGNAME =		b7360
 PKGNAME =		llama.cpp-0.0.${GH_TAGNAME:S/b//}
 
 SHARED_LIBS +=		llama 3.0
Index: misc/llama.cpp/distinfo
===================================================================
RCS file: /home/cvs/ports/misc/llama.cpp/distinfo,v
diff -u -p -r1.6 distinfo
--- misc/llama.cpp/distinfo	23 Nov 2025 14:30:49 -0000	1.6
+++ misc/llama.cpp/distinfo	13 Dec 2025 09:34:41 -0000
@@ -1,2 +1,2 @@
-SHA256 (llama.cpp-b7086.tar.gz) = FmxxNbpcxDsrZQtbfUkFl3h1f/CbXcEjWJEKpXEszwA=
-SIZE (llama.cpp-b7086.tar.gz) = 27243237
+SHA256 (llama.cpp-b7360.tar.gz) = nML2VgV6sqaB7UYkpy2ltjI7nRlGxoElcMOmWqx17Gk=
+SIZE (llama.cpp-b7360.tar.gz) = 28115754
Index: misc/llama.cpp/pkg/PLIST
===================================================================
RCS file: /home/cvs/ports/misc/llama.cpp/pkg/PLIST,v
diff -u -p -r1.6 PLIST
--- misc/llama.cpp/pkg/PLIST	12 Nov 2025 09:33:37 -0000	1.6
+++ misc/llama.cpp/pkg/PLIST	13 Dec 2025 14:06:47 -0000
@@ -6,6 +6,7 @@ bin/convert_hf_to_gguf.py
 @bin bin/llama-batched-bench
 @bin bin/llama-bench
 @bin bin/llama-cli
+@bin bin/llama-completion
 @bin bin/llama-diffusion-cli
 @bin bin/llama-embedding
 @bin bin/llama-eval-callback
@@ -14,6 +15,7 @@ bin/convert_hf_to_gguf.py
 @bin bin/llama-gguf
 @bin bin/llama-gguf-hash
 @bin bin/llama-gguf-split
+@bin bin/llama-idle
 @bin bin/llama-imatrix
 @bin bin/llama-logits
 @bin bin/llama-lookahead
Index: audio/whisper.cpp/patches/patch-src_whisper_cpp
===================================================================
RCS file: audio/whisper.cpp/patches/patch-src_whisper_cpp
diff -N audio/whisper.cpp/patches/patch-src_whisper_cpp
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ audio/whisper.cpp/patches/patch-src_whisper_cpp	13 Dec 2025 14:34:27 -0000
@@ -0,0 +1,25 @@
+Remove support of GGML_KQ_MASK_PAD
+
+https://github.com/ggml-org/whisper.cpp/commit/72714d169c4803957f57b515cf4f0922159cf9cb
+
+Index: src/whisper.cpp
+--- src/whisper.cpp.orig
++++ src/whisper.cpp
+@@ -2501,7 +2501,7 @@ static struct ggml_cgraph * whisper_build_graph_decode
+ 
+     const float KQscale = pow(float(n_state_head), -0.25);
+ 
+-    struct ggml_tensor * KQ_mask = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_kv, GGML_PAD(n_tokens, GGML_KQ_MASK_PAD), 1);
++    struct ggml_tensor * KQ_mask = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_kv, n_tokens, 1);
+     ggml_set_name(KQ_mask, "KQ_mask");
+     ggml_set_input(KQ_mask);
+ 
+@@ -2925,7 +2925,7 @@ static bool whisper_decode_internal(
+                     }
+                 }
+ 
+-                for (int i = n_tokens; i < GGML_PAD(n_tokens, GGML_KQ_MASK_PAD); ++i) {
++                for (int i = n_tokens; i < n_tokens; ++i) {
+                     for (int j = 0; j < n_kv; ++j) {
+                         data[h*(n_kv*n_tokens) + i*n_kv + j] = -INFINITY;
+                     }


-- 
wbr, Kirill