From: Stuart Henderson <stu@spacehopper.org>
Subject: NEW: audio/whisper.cpp, devel/libggml. UPDATE: misc/llama.cpp
To: ports <ports@openbsd.org>
Date: Wed, 1 Oct 2025 12:17:02 +0100

Currently libggml is built as part of misc/llama.cpp, but it's also
wanted by other useful software. The diffs and attached tgz do the
following:

- build libggml as a separate port, rather than part of llama.cpp

- condition vulkan support by arch, and patch some tests, allowing
ggml to build on smaller archs (c++ compiler for shaders takes a
bunch of ram) - i.e. enables support for more archs

- update llama.cpp to a newer version and remove ggml-related parts,
instead depending on libggml

- rename llama-cpp package to llama.cpp matching the common naming
and port directory (devel/quirks entry added)

- add a new port for whisper.cpp, which is a library and cli tool
for doing speech-to-text transcription, the usual model required for
english transcription is bundled in the package to simplify common
use cases, and a download script is available (with info in readme)
for other cases

tested on amd64, i386, aarch64 (builds, tests, basic use of whisper)

the only current port touched, llama.cpp, is an edge port, so I don't
think there's any concern for release, and I'd quite like to have
whisper available

ok?

Index: devel/quirks/Makefile
===================================================================
RCS file: /cvs/ports/devel/quirks/Makefile,v
diff -u -p -r1.1739 Makefile
--- devel/quirks/Makefile	1 Oct 2025 11:00:35 -0000	1.1739
+++ devel/quirks/Makefile	1 Oct 2025 11:04:12 -0000
@@ -3,7 +3,7 @@ CATEGORIES =	devel databases
 DISTFILES =
 
 # API.rev
-PKGNAME =	quirks-7.146
+PKGNAME =	quirks-7.147
 PKG_ARCH =	*
 MAINTAINER =	Marc Espie <espie@openbsd.org>
 
Index: devel/quirks/files/Quirks.pm
===================================================================
RCS file: /cvs/ports/devel/quirks/files/Quirks.pm,v
diff -u -p -r1.1751 Quirks.pm
--- devel/quirks/files/Quirks.pm	1 Oct 2025 11:00:35 -0000	1.1751
+++ devel/quirks/files/Quirks.pm	1 Oct 2025 11:04:12 -0000
@@ -1,7 +1,7 @@
 #! /usr/bin/perl
 
 # ex:ts=8 sw=4:
-# $OpenBSD: Quirks.pm,v 1.1751 2025/10/01 11:00:35 sthen Exp $
+# $OpenBSD: Quirks.pm,v 1.1750 2025/09/23 12:02:56 thfr Exp $
 #
 # Copyright (c) 2009 Marc Espie <espie@openbsd.org>
 #
@@ -637,6 +637,7 @@ my $stem_extensions = {
 	'kdsoap-qt6' => 'kdsoap-qt',
 	'spyder3' => 'spyder',
 	'p5-URI-ws' => 'p5-URI',
+	'llama-cpp' => 'llama.cpp',
 };
 
 my $obsolete_reason = {};
Index: misc/llama.cpp/Makefile
===================================================================
RCS file: /cvs/ports/misc/llama.cpp/Makefile,v
diff -u -p -r1.9 Makefile
--- misc/llama.cpp/Makefile	12 Jun 2025 00:03:18 -0000	1.9
+++ misc/llama.cpp/Makefile	1 Oct 2025 11:04:12 -0000
@@ -1,46 +1,29 @@
-# "error: non-constant-expression cannot be narrowed from type 'int64_t'
-# (aka 'long long') to 'size_t' (aka 'unsigned long') in initializer list",
-# but also this really isn't going to be of much use without large amounts
-# of RAM.
-ONLY_FOR_ARCHS =	${LP64_ARCHS}
-
 COMMENT =		LLM inference system
 
-GH_ACCOUNT =		ggerganov
+GH_ACCOUNT =		ggml-org
 GH_PROJECT =		llama.cpp
-GH_TAGNAME =		b5372
-PKGNAME =		llama-cpp-0.0.${GH_TAGNAME:S/b//}
+GH_TAGNAME =		b6641
+PKGNAME =		llama.cpp-0.0.${GH_TAGNAME:S/b//}
 
-SHARED_LIBS +=		ggml-base 1.0
-SHARED_LIBS +=		ggml-cpu 1.0
-SHARED_LIBS +=		ggml 1.0
 SHARED_LIBS +=		llama 2.0
-SHARED_LIBS +=		mtmd_shared 0.0
-SHARED_LIBS +=		ggml-vulkan 2.0
+SHARED_LIBS +=		mtmd 0.0
 
 CATEGORIES =		misc
 
-HOMEPAGE =		https://github.com/ggerganov/llama.cpp
-
 # MIT
 PERMIT_PACKAGE =	Yes
 
-WANTLIB +=		c curl m pthread vulkan ${COMPILER_LIBCXX}
+WANTLIB += ${COMPILER_LIBCXX} c curl ggml ggml-base m
 
 MODULES =		devel/cmake
 
-LIB_DEPENDS =		graphics/vulkan-loader \
+# some tests need network access
+LIB_DEPENDS =		devel/libggml \
 			net/curl
 
-BUILD_DEPENDS =		graphics/shaderc
-
-CONFIGURE_ARGS =	-DGGML_CCACHE=Off \
-			-DGGML_NATIVE=Off \
-			-DGGML_RVV=Off \
-			-DGGML_VULKAN=On
-
-# avoid requiring git during build
-CONFIGURE_ARGS +=	-DGGML_BUILD_NUMBER=""
+CONFIGURE_ARGS +=	-DLLAMA_USE_SYSTEM_GGML=on
+CFLAGS +=		-I${LOCALBASE}/include
+CXXFLAGS +=		-I${LOCALBASE}/include
 
 post-install:
 	rm ${PREFIX}/bin/test-*
Index: misc/llama.cpp/distinfo
===================================================================
RCS file: /cvs/ports/misc/llama.cpp/distinfo,v
diff -u -p -r1.3 distinfo
--- misc/llama.cpp/distinfo	15 May 2025 01:38:55 -0000	1.3
+++ misc/llama.cpp/distinfo	1 Oct 2025 11:04:12 -0000
@@ -1,2 +1,2 @@
-SHA256 (llama.cpp-b5372.tar.gz) = 28q/8fqCc/rtzo8qbUtQ6njW/Zs3mzaLH0ynCWIkuk8=
-SIZE (llama.cpp-b5372.tar.gz) = 21147804
+SHA256 (llama.cpp-b6641.tar.gz) = 0xJrrTblSgapCD4ujzjkobbpyI5bL6fT+4tYchCwwuA=
+SIZE (llama.cpp-b6641.tar.gz) = 25867942
Index: misc/llama.cpp/patches/patch-common_common_cpp
===================================================================
RCS file: misc/llama.cpp/patches/patch-common_common_cpp
diff -N misc/llama.cpp/patches/patch-common_common_cpp
--- misc/llama.cpp/patches/patch-common_common_cpp	15 May 2025 01:38:55 -0000	1.1
+++ /dev/null	1 Jan 1970 00:00:00 -0000
@@ -1,12 +0,0 @@
-Index: common/common.cpp
---- common/common.cpp.orig
-+++ common/common.cpp
-@@ -830,7 +830,7 @@ std::string fs_get_cache_directory() {
-     if (getenv("LLAMA_CACHE")) {
-         cache_directory = std::getenv("LLAMA_CACHE");
-     } else {
--#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)
-+#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX) || defined(__OpenBSD__)
-         if (std::getenv("XDG_CACHE_HOME")) {
-             cache_directory = std::getenv("XDG_CACHE_HOME");
-         } else {
Index: misc/llama.cpp/patches/patch-tools_rpc_rpc-server_cpp
===================================================================
RCS file: misc/llama.cpp/patches/patch-tools_rpc_rpc-server_cpp
diff -N misc/llama.cpp/patches/patch-tools_rpc_rpc-server_cpp
--- misc/llama.cpp/patches/patch-tools_rpc_rpc-server_cpp	15 May 2025 01:38:55 -0000	1.1
+++ /dev/null	1 Jan 1970 00:00:00 -0000
@@ -1,12 +0,0 @@
-Index: tools/rpc/rpc-server.cpp
---- tools/rpc/rpc-server.cpp.orig
-+++ tools/rpc/rpc-server.cpp
-@@ -111,7 +111,7 @@ static std::string fs_get_cache_directory() {
-     if (getenv("LLAMA_CACHE")) {
-         cache_directory = std::getenv("LLAMA_CACHE");
-     } else {
--#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)
-+#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX) || defined(__OpenBSD__)
-         if (std::getenv("XDG_CACHE_HOME")) {
-             cache_directory = std::getenv("XDG_CACHE_HOME");
-         } else {
Index: misc/llama.cpp/pkg/PLIST
===================================================================
RCS file: /cvs/ports/misc/llama.cpp/pkg/PLIST,v
diff -u -p -r1.4 PLIST
--- misc/llama.cpp/pkg/PLIST	15 May 2025 01:38:55 -0000	1.4
+++ misc/llama.cpp/pkg/PLIST	1 Oct 2025 11:04:12 -0000
@@ -3,18 +3,16 @@ bin/convert_hf_to_gguf.py
 @bin bin/llama-batched-bench
 @bin bin/llama-bench
 @bin bin/llama-cli
-@bin bin/llama-convert-llama2c-to-ggml
-@bin bin/llama-cvector-generator
+@bin bin/llama-diffusion-cli
 @bin bin/llama-embedding
 @bin bin/llama-eval-callback
-@bin bin/llama-export-lora
 @bin bin/llama-finetune
 @bin bin/llama-gen-docs
 @bin bin/llama-gguf
 @bin bin/llama-gguf-hash
 @bin bin/llama-gguf-split
-@bin bin/llama-gritlm
 @bin bin/llama-imatrix
+@bin bin/llama-logits
 @bin bin/llama-lookahead
 @bin bin/llama-lookup
 @bin bin/llama-lookup-create
@@ -35,34 +33,13 @@ bin/convert_hf_to_gguf.py
 @bin bin/llama-speculative-simple
 @bin bin/llama-tokenize
 @bin bin/llama-tts
-@bin bin/vulkan-shaders-gen
-include/ggml-alloc.h
-include/ggml-backend.h
-include/ggml-blas.h
-include/ggml-cann.h
-include/ggml-cpp.h
-include/ggml-cpu.h
-include/ggml-cuda.h
-include/ggml-kompute.h
-include/ggml-metal.h
-include/ggml-opt.h
-include/ggml-rpc.h
-include/ggml-sycl.h
-include/ggml-vulkan.h
-include/ggml.h
-include/gguf.h
 include/llama-cpp.h
 include/llama.h
-lib/cmake/ggml/
-lib/cmake/ggml/ggml-config.cmake
-lib/cmake/ggml/ggml-version.cmake
+include/mtmd-helper.h
+include/mtmd.h
 lib/cmake/llama/
 lib/cmake/llama/llama-config.cmake
 lib/cmake/llama/llama-version.cmake
-@lib lib/libggml-base.so.${LIBggml-base_VERSION}
-@lib lib/libggml-cpu.so.${LIBggml-cpu_VERSION}
-@lib lib/libggml-vulkan.so.${LIBggml-vulkan_VERSION}
-@lib lib/libggml.so.${LIBggml_VERSION}
 @lib lib/libllama.so.${LIBllama_VERSION}
-@lib lib/libmtmd_shared.so.${LIBmtmd_shared_VERSION}
+@lib lib/libmtmd.so.${LIBmtmd_VERSION}
 lib/pkgconfig/llama.pc