Download raw body.
misc/llama.cpp: update to b6934 with required update devel/libggml
ports@,
I'd like to update our misc/llama.cpp to the last snapshot (b6934).
It allows to run https://huggingface.co/collections/Qwen/qwen3-vl models.
We don't have GPU but with -t 32 I had run Qwen3 VL 30B model on CPU only at
AMD Ryzen 9 7950X3D with acceptable to use speed like 2 tokens/second which
more or leass useble. But it requires memory. 120G as :datasize is enough.
Because we uses libggml as dedicated port, it must to be updated to the last
version, and it contains a bug which brokes large models under large number
of threads: https://github.com/ggml-org/llama.cpp/issues/16960
I had included a patch to fix this issue as well.
I had tested analyze of .pdf document, chat and "explain the picture" on
-current/amd64. Everything works.
Ok?
Index: misc/llama.cpp/Makefile
===================================================================
RCS file: /home/cvs/ports/misc/llama.cpp/Makefile,v
diff -u -p -r1.10 Makefile
--- misc/llama.cpp/Makefile 1 Oct 2025 19:44:07 -0000 1.10
+++ misc/llama.cpp/Makefile 3 Nov 2025 13:49:51 -0000
@@ -2,7 +2,7 @@ COMMENT = LLM inference system
GH_ACCOUNT = ggml-org
GH_PROJECT = llama.cpp
-GH_TAGNAME = b6641
+GH_TAGNAME = b6934
PKGNAME = llama.cpp-0.0.${GH_TAGNAME:S/b//}
SHARED_LIBS += llama 2.0
Index: misc/llama.cpp/distinfo
===================================================================
RCS file: /home/cvs/ports/misc/llama.cpp/distinfo,v
diff -u -p -r1.4 distinfo
--- misc/llama.cpp/distinfo 1 Oct 2025 19:44:07 -0000 1.4
+++ misc/llama.cpp/distinfo 3 Nov 2025 13:50:07 -0000
@@ -1,2 +1,2 @@
-SHA256 (llama.cpp-b6641.tar.gz) = 0xJrrTblSgapCD4ujzjkobbpyI5bL6fT+4tYchCwwuA=
-SIZE (llama.cpp-b6641.tar.gz) = 25867942
+SHA256 (llama.cpp-b6934.tar.gz) = qsr4P+8j/z/nK8k8/Iv1/QTdSFhqIshFYwCI7L5/a7k=
+SIZE (llama.cpp-b6934.tar.gz) = 26417348
Index: devel/libggml/Makefile
===================================================================
RCS file: /home/cvs/ports/devel/libggml/Makefile,v
diff -u -p -r1.2 Makefile
--- devel/libggml/Makefile 20 Oct 2025 17:25:51 -0000 1.2
+++ devel/libggml/Makefile 3 Nov 2025 01:44:00 -0000
@@ -2,7 +2,8 @@ COMMENT= tensor library for machine lea
GH_ACCOUNT= ggml-org
GH_PROJECT= ggml
-GH_TAGNAME= v0.9.4
+GH_COMMIT= 09aa758381718f7731c148238574a7e169001f13
+DISTNAME= ggml-0.9.4.20251101
PKGNAME= lib${DISTNAME}
SHARED_LIBS += ggml 2.0
Index: devel/libggml/distinfo
===================================================================
RCS file: /home/cvs/ports/devel/libggml/distinfo,v
diff -u -p -r1.1.1.1 distinfo
--- devel/libggml/distinfo 1 Oct 2025 19:42:10 -0000 1.1.1.1
+++ devel/libggml/distinfo 3 Nov 2025 01:44:20 -0000
@@ -1,2 +1,2 @@
-SHA256 (ggml-0.9.4.tar.gz) = JL0VAK7ycUe5LQI8/23P/fyNqcVnUPlB9dXRtTh1xiM=
-SIZE (ggml-0.9.4.tar.gz) = 2193279
+SHA256 (ggml-0.9.4.20251101-09aa7583.tar.gz) = fx+ZI4GhV5KlZlGM3QDWERyj1xXY8t5vh5ELtkwK15Y=
+SIZE (ggml-0.9.4.20251101-09aa7583.tar.gz) = 2330931
Index: devel/libggml/patches/patch-src_ggml-backend-reg_cpp
===================================================================
RCS file: /home/cvs/ports/devel/libggml/patches/patch-src_ggml-backend-reg_cpp,v
diff -u -p -r1.1.1.1 patch-src_ggml-backend-reg_cpp
--- devel/libggml/patches/patch-src_ggml-backend-reg_cpp 1 Oct 2025 19:42:10 -0000 1.1.1.1
+++ devel/libggml/patches/patch-src_ggml-backend-reg_cpp 3 Nov 2025 12:01:00 -0000
@@ -1,7 +1,7 @@
Index: src/ggml-backend-reg.cpp
--- src/ggml-backend-reg.cpp.orig
+++ src/ggml-backend-reg.cpp
-@@ -517,7 +517,9 @@ static ggml_backend_reg_t ggml_backend_load_best(const
+@@ -524,7 +524,9 @@ static ggml_backend_reg_t ggml_backend_load_best(const
search_paths.push_back(fs::u8path(GGML_BACKEND_DIR));
#endif
// default search paths: executable directory, current directory
Index: devel/libggml/patches/patch-src_ggml-cpu_repack_cpp
===================================================================
RCS file: devel/libggml/patches/patch-src_ggml-cpu_repack_cpp
diff -N devel/libggml/patches/patch-src_ggml-cpu_repack_cpp
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ devel/libggml/patches/patch-src_ggml-cpu_repack_cpp 3 Nov 2025 12:01:17 -0000
@@ -0,0 +1,59 @@
+https://github.com/ggml-org/llama.cpp/pull/16956
+
+Index: src/ggml-cpu/repack.cpp
+--- src/ggml-cpu/repack.cpp.orig
++++ src/ggml-cpu/repack.cpp
+@@ -1678,10 +1678,24 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int6
+ int64_t chunk_size = (nr + nth_scaled - 1) / nth_scaled;
+ int64_t nchunk = (nr + chunk_size - 1) / chunk_size;
+
++ // Ensure minimum chunk size to avoid alignment issues with high thread counts
++ // Minimum chunk size should be at least NB_COLS to prevent overlapping chunks after alignment
++ const int64_t min_chunk_size = NB_COLS;
++ if (nchunk > 0 && (nr / nchunk) < min_chunk_size && nr >= min_chunk_size) {
++ nchunk = (nr + min_chunk_size - 1) / min_chunk_size;
++ }
++
+ if (nth == 1 || nchunk < nth || disable_chunking) {
+ nchunk = nth;
+ }
+
++ // Ensure nchunk doesn't exceed the number of rows divided by minimum chunk size
++ // This prevents creating too many tiny chunks that could overlap after alignment
++ const int64_t max_nchunk = (nr + min_chunk_size - 1) / min_chunk_size;
++ if (nchunk > max_nchunk) {
++ nchunk = max_nchunk;
++ }
++
+ if (ith == 0) {
+ // Every thread starts at ith, so the first unprocessed chunk is nth. This save a bit of coordination right at the start.
+ ggml_threadpool_chunk_set(params->threadpool, nth);
+@@ -1695,8 +1709,15 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int6
+ while (current_chunk < nchunk) {
+ int64_t src0_start = (current_chunk * ne01) / nchunk;
+ int64_t src0_end = ((current_chunk + 1) * ne01) / nchunk;
++
++ // Align boundaries to NB_COLS - round up to ensure all data is included
++ // The chunk size limiting above ensures chunks are large enough to prevent overlaps
+ src0_start = (src0_start % NB_COLS) ? src0_start + NB_COLS - (src0_start % NB_COLS) : src0_start;
+ src0_end = (src0_end % NB_COLS) ? src0_end + NB_COLS - (src0_end % NB_COLS) : src0_end;
++ if (src0_end > ne01) {
++ src0_end = ne01;
++ }
++
+ if (src0_start >= src0_end) {
+ break;
+ }
+@@ -1808,8 +1829,12 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int6
+ int64_t src0_cur_start = (ith * ne01) / nth;
+ int64_t src0_cur_end = ((ith + 1) * ne01) / nth;
+
++ // Align boundaries to NB_COLS - round up to ensure all data is included
+ src0_cur_start = (src0_cur_start % NB_COLS) ? src0_cur_start + NB_COLS - (src0_cur_start % NB_COLS) : src0_cur_start;
+ src0_cur_end = (src0_cur_end % NB_COLS) ? src0_cur_end + NB_COLS - (src0_cur_end % NB_COLS) : src0_cur_end;
++ if (src0_cur_end > ne01) {
++ src0_cur_end = ne01;
++ }
+
+ if (src0_cur_start >= src0_cur_end) {
+ return;
--
wbr, Kirill
misc/llama.cpp: update to b6934 with required update devel/libggml