ml/backend/ggml: optionally evaluate os.Executable() symlinks

for Intel macOS hosts, optionally evaluate symlinks to os.Executable ahead of loading backends, fixing issues where 'ollama' is a symlink and is run manually in the command line
2025-02-13 22:43:39 -08:00
17 changed files with 17 additions and 245 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -329,9 +329,7 @@ jobs:
          done
        working-directory: dist/${{ matrix.os }}-${{ matrix.arch }}
      - run: |
-          for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do
-            tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE --owner 0 --group 0 | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz);
-          done
+          for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz); done
      - uses: actions/upload-artifact@v4
        with:
          name: dist-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }}
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,3 @@ test_data
 __debug_bin*
 llama/build
 llama/vendor
-model/testdata/models/*
-!model/testdata/models/*.md
-!model/testdata/models/*.json
--- a/README.md
+++ b/README.md
@@ -381,7 +381,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)
 - [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
 - [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)

 ### Cloud

@@ -549,7 +548,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow)
 - [TextLLaMA](https://github.com/adarshM84/TextLLaMA) A Chrome Extension that helps you write emails, correct grammar, and translate into any language
 - [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai)
- [LLM Telegram Bot](https://github.com/innightwolfsleep/llm_telegram_bot) (telegram bot, primary for RP. Oobabooga-like buttons, [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) API integration e.t.c)

 ### Supported backends

--- a/discover/path.go
+++ b/discover/path.go
@@ -19,10 +19,6 @@ var LibOllamaPath string = func() string {
 		return ""
 	}

-	if eval, err := filepath.EvalSymlinks(exe); err == nil {
-		exe = eval
-	}
-
 	var libPath string
 	switch runtime.GOOS {
 	case "windows":
--- a/docs/windows.md
+++ b/docs/windows.md
@@ -55,7 +55,7 @@ Here's a quick example showing API access from `powershell`
 ## Troubleshooting

 Ollama on Windows stores files in a few different locations.  You can view them in
-the explorer window by hitting `<Ctrl>+R` and type in:
+the explorer window by hitting `<cmd>+R` and type in:
 - `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
    - *app.log* contains most resent logs from the GUI application
    - *server.log* contains the most recent server logs
--- a/format/format_test.go
+++ b/format/format_test.go
@@ -12,9 +12,6 @@ func TestHumanNumber(t *testing.T) {

 	testCases := []testCase{
 		{0, "0"},
-		{999, "999"},
-		{1000, "1K"},
-		{1001, "1K"},
 		{1000000, "1M"},
 		{125000000, "125M"},
 		{500500000, "500.50M"},
--- a/kvcache/causal_test.go
+++ b/kvcache/causal_test.go
@@ -305,10 +305,6 @@ func (b *testBackend) NewContext() ml.Context {
 	return &testContext{}
 }

-func (b *testBackend) SystemInfo() string {
-	return "not implemented"
-}
-
 type testContext struct{}

 func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
--- a/llm/server.go
+++ b/llm/server.go
@@ -320,10 +320,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
 			return nil, fmt.Errorf("unable to lookup executable path: %w", err)
 		}

-		if eval, err := filepath.EvalSymlinks(exe); err == nil {
-			exe = eval
-		}
-
 		// TODO - once fully switched to the Go runner, load the model here for tokenize/detokenize cgo access
 		s := &llmServer{
 			port:        port,
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -23,7 +23,6 @@ type Backend interface {
 	Config() Config
 	Get(name string) Tensor
 	NewContext() Context
-	SystemInfo() string
 }

 var backends = make(map[string]func(*os.File) (Backend, error))
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -1,27 +1,11 @@
 package ggml

-/*
-#cgo CPPFLAGS: -I${SRCDIR}/ggml/include
-#include <stdlib.h>
-#include <stdint.h>
-#include "ggml.h"
-#include "ggml-cpu.h"
-#include "ggml-backend.h"
-static struct ggml_backend_feature * getBackendFeatures(void *fp, ggml_backend_reg_t reg) {return ((ggml_backend_get_features_t)(fp))(reg);}
-static struct ggml_backend_feature * getNextBackendFeatures(struct ggml_backend_feature * feature) { return &feature[1];}
-
-typedef enum {COMP_UNKNOWN,COMP_GCC,COMP_CLANG} COMPILER;
-COMPILER inline get_compiler() {
-#if defined(__clang__)
-	return COMP_CLANG;
-#elif defined(__GNUC__)
-	return COMP_GCC;
-#else
-	return UNKNOWN_COMPILER;
-#endif
-}
-
-*/
+// #cgo CPPFLAGS: -I${SRCDIR}/ggml/include
+// #include <stdlib.h>
+// #include <stdint.h>
+// #include "ggml.h"
+// #include "ggml-cpu.h"
+// #include "ggml-backend.h"
 import "C"

 import (
@@ -642,34 +626,3 @@ func (t *Tensor) Conv2D(ctx ml.Context, t2 ml.Tensor, s0, s1, p0, p1, d0, d1 int
 		t: C.ggml_conv_2d(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1)),
 	}
 }
-
-func (b *Backend) SystemInfo() string {
-	var compiler string
-	switch C.get_compiler() {
-	case C.COMP_UNKNOWN:
-		compiler = "cgo(unknown_compiler)"
-	case C.COMP_GCC:
-		compiler = "cgo(gcc)"
-	case C.COMP_CLANG:
-		compiler = "cgo(clang)"
-	}
-
-	var s string
-	for i := range C.ggml_backend_reg_count() {
-		reg := C.ggml_backend_reg_get(i)
-		fName := C.CString("ggml_backend_get_features")
-		defer C.free(unsafe.Pointer(fName))
-		get_features_fn := C.ggml_backend_reg_get_proc_address(reg, fName)
-		if get_features_fn != nil {
-			s += C.GoString(C.ggml_backend_reg_name(reg))
-			s += " : "
-			for features := C.getBackendFeatures(get_features_fn, reg); features.name != nil; features = C.getNextBackendFeatures(features) {
-				s += C.GoString(features.name)
-				s += " = "
-				s += C.GoString(features.value)
-				s += " | "
-			}
-		}
-	}
-	return s + compiler
-}
--- a/ml/backend/ggml/ggml/src/ggml.go
+++ b/ml/backend/ggml/ggml/src/ggml.go
@@ -47,6 +47,10 @@ var OnceLoad = sync.OnceFunc(func() {
 		exe = "."
 	}

+	if eval, err := filepath.EvalSymlinks(exe); err == nil {
+		exe = eval
+	}
+
 	// PATH, LD_LIBRARY_PATH, and DYLD_LIBRARY_PATH are often
 	// set by the parent process, however, use a default value
 	// if the environment variable is not set.
--- a/model/model.go
+++ b/model/model.go
@@ -21,7 +21,6 @@ import (
 	_ "github.com/ollama/ollama/ml/backend"
 )

-// Options contains the inputs for a model forward pass
 type Options struct {
 	Inputs    []int32
 	Positions []int32
@@ -35,13 +34,11 @@ type config struct {
 	Cache kvcache.Cache
 }

-// Base implements the common fields and methods for all models
 type Base struct {
 	b ml.Backend
 	config
 }

-// Backend returns the underlying backend that will run the model
 func (m *Base) Backend() ml.Backend {
 	return m.b
 }
@@ -50,7 +47,6 @@ func (m *Base) Config() config {
 	return m.config
 }

-// Model implements a specific model architecture, defining the forward pass and any model-specific configuration
 type Model interface {
 	Forward(ml.Context, Options) (ml.Tensor, error)

@@ -60,7 +56,6 @@ type Model interface {

 var models = make(map[string]func(ml.Config) (Model, error))

-// Register registers a model constructor for the given architecture
 func Register(name string, f func(ml.Config) (Model, error)) {
 	if _, ok := models[name]; ok {
 		panic("model: model already registered")
@@ -69,9 +64,8 @@ func Register(name string, f func(ml.Config) (Model, error)) {
 	models[name] = f
 }

-// New initializes a new model instance with the provided configuration based on the metadata in the model file
-func New(modelPath string) (Model, error) {
-	r, err := os.Open(modelPath)
+func New(s string) (Model, error) {
+	r, err := os.Open(s)
 	if err != nil {
 		return nil, err
 	}
--- a/model/model_external_test.go
+++ b/model/model_external_test.go
@@ -1,138 +0,0 @@
-// Package model_test provides external tests for the model package.
-// This test file specifically tests the forward pass functionality on models.
-// It is in a separate package (model_test) to avoid import cycles while still
-// being able to test the public API of the model package.
-package model_test
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-
-	"github.com/ollama/ollama/ml"
-	"github.com/ollama/ollama/model"
-	"github.com/ollama/ollama/sample"
-
-	_ "github.com/ollama/ollama/model/models"
-)
-
-type modelTest struct {
-	Prompt            string   `json:"prompt"`
-	OutputContainsOne []string `json:"output_contains_one"`
-}
-
-func TestForwardSimple(t *testing.T) {
-	if testing.Short() {
-		t.Skip("skipping in short mode")
-	}
-
-	// Read all JSON files from testdata/models
-	files, err := os.ReadDir("testdata/models")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	for _, file := range files {
-		if !strings.HasSuffix(file.Name(), ".json") {
-			continue
-		}
-
-		jsonPath := filepath.Join("testdata/models", file.Name())
-		ggufPath := filepath.Join("testdata/models", strings.TrimSuffix(file.Name(), ".json")+".gguf")
-
-		// Skip if no corresponding .gguf file exists
-		if _, err := os.Stat(ggufPath); err != nil {
-			t.Logf("skipping %s: no corresponding GGUF file found", file.Name())
-			continue
-		}
-
-		data, err := os.ReadFile(jsonPath)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		var test modelTest
-		if err := json.Unmarshal(data, &test); err != nil {
-			t.Fatal(err)
-		}
-
-		t.Run(strings.TrimSuffix(file.Name(), ".json"), func(t *testing.T) {
-			m, err := model.New(ggufPath)
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			m.Config().Cache.Init(m.Backend(), ml.DTypeF32, 2048)
-
-			inputs, err := m.(model.TextProcessor).Encode(test.Prompt)
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			var result []string
-			for len(result) < 100 { // Limit to 100 tokens max
-				options := model.Options{
-					Inputs:    inputs,
-					Positions: make([]int32, len(inputs)),
-					Sequences: make([]int, len(inputs)),
-					Outputs:   []int32{int32(len(inputs) - 1)},
-				}
-				for i := range options.Positions {
-					options.Positions[i] = int32(i)
-					options.Sequences[i] = 0
-				}
-
-				ctx := m.Backend().NewContext()
-
-				modelOutput, err := model.Forward(ctx, m, options)
-				if err != nil {
-					ctx.Close()
-					t.Fatal(fmt.Errorf("forward pass failed: %v", err))
-				}
-
-				f32s := modelOutput.Floats()
-				logits := make([]float64, len(f32s))
-				for i, f32 := range f32s {
-					logits[i] = float64(f32)
-				}
-
-				token, err := sample.Sample(logits, sample.Greedy())
-				if err != nil {
-					ctx.Close()
-					t.Fatal(fmt.Errorf("sampling failed: %v", err))
-				}
-
-				ctx.Close()
-
-				// Greedy sampling: take the token with the highest logit
-				nextToken := int32(token[0])
-				if m.(model.TextProcessor).Is(nextToken, model.SpecialEOS) {
-					break
-				}
-
-				piece, err := m.(model.TextProcessor).Decode([]int32{nextToken})
-				if err != nil {
-					t.Fatal(err)
-				}
-
-				result = append(result, piece)
-				output := strings.Join(result, "")
-
-				for _, expectedOutput := range test.OutputContainsOne {
-					if strings.Contains(output, expectedOutput) {
-						t.Logf("Test passed with output: %q (matched expected: %q)", output, expectedOutput)
-						return
-					}
-				}
-
-				// Maintain full context by appending new token
-				inputs = append(inputs, nextToken)
-			}
-
-			t.Fatalf("Expected output containing one of %q but got: %q", test.OutputContainsOne, strings.Join(result, ""))
-		})
-	}
-}
--- a/model/testdata/models/README.md
+++ b/model/testdata/models/README.md
@@ -1,10 +0,0 @@
-# Test Model Directory
-
-This directory is used for storing model files (like `.gguf` files) that are required to run the tests in `model_external_test.go`. 
-
-## Usage
-
- Place any model files you need for testing in this directory
- The test file will look for any model files here (e.g., `llama3.gguf`)
- All non-markdown files in this directory are git-ignored to prevent large model files from being committed to the repository
- Only `.md` files (like this README) will be tracked in git
--- a/model/testdata/models/qwen2_5.json
+++ b/model/testdata/models/qwen2_5.json
@@ -1,7 +0,0 @@
-{
-  "prompt": "<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\nhi<|im_end|>\n<|im_start|>assistant\n",
-  "output_contains_one": [
-    "Hello",
-    "Hi"
-  ]
-}
--- a/runner/llamarunner/runner.go
+++ b/runner/llamarunner/runner.go
@@ -845,6 +845,8 @@ func (s *Server) loadModel(
 	threads int,
 	multiUserCache bool,
 ) {
+	llama.BackendInit()
+
 	var err error
 	s.model, err = llama.LoadModelFromFile(mpath, params)
 	if err != nil {
@@ -930,8 +932,6 @@ func Execute(args []string) error {
 	})
 	slog.SetDefault(slog.New(handler))
 	slog.Info("starting go runner")
-
-	llama.BackendInit()
 	slog.Info("system", "info", llama.PrintSystemInfo(), "threads", *threads)

 	server := &Server{
--- a/runner/ollamarunner/runner.go
+++ b/runner/ollamarunner/runner.go
@@ -813,8 +813,6 @@ func (s *Server) loadModel(
 		panic(err)
 	}

-	slog.Info("system", "info", s.model.Backend().SystemInfo() /* "threads", *threads */)
-
 	// TODO(jessegross): LoRA loading
 	if lpath.String() != "" {
 		panic("loras are not yet implemented")
@@ -883,6 +881,7 @@ func Execute(args []string) error {
 	})
 	slog.SetDefault(slog.New(handler))
 	slog.Info("starting ollama engine")
+	// TODO(jessegross): Some system info would be useful

 	server := &Server{
 		batchSize: *batchSize,