llm: attempt to evaluate symlinks, but do not fail (#9089 )

provides a better approach to #9088 that will attempt to evaluate symlinks (important for macOS where 'ollama' is often a symlink), but use the result of os.Executable() as a fallback in scenarios where filepath.EvalSymlinks fails due to permission erorrs or other issues
llm: do not evaluate symlink for exe path lookup (#9088 )
2025-02-13 22:38:23 -08:00 · 2025-02-13 22:13:47 -08:00 · 2025-02-13 18:43:33 -08:00 · 2025-02-13 00:23:17 -08:00 · 2025-02-12 11:17:39 -08:00 · 2025-02-12 09:16:26 -08:00
14 changed files with 165 additions and 391 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -104,6 +104,10 @@ if(CMAKE_HIP_COMPILER)
    if(AMDGPU_TARGETS)
        add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip)

+        if (WIN32)
+            target_compile_definitions(ggml-hip PRIVATE GGML_CUDA_NO_PEER_COPY=1)
+        endif()
+
        set(OLLAMA_HIP_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/rocm)
        install(TARGETS ggml-hip
            RUNTIME_DEPENDENCIES
--- a/README.md
+++ b/README.md
@@ -437,9 +437,10 @@ See the [API documentation](./docs/api.md) for all endpoints.

 - [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
 - [Gentoo](https://github.com/gentoo/guru/tree/master/app-misc/ollama)
+- [Homebrew](https://formulae.brew.sh/formula/ollama)
 - [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
 - [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
- [Nix package](https://search.nixos.org/packages?channel=24.05&show=ollama&from=0&size=50&sort=relevance&type=packages&query=ollama)
+- [Nix package](https://search.nixos.org/packages?show=ollama&from=0&size=50&sort=relevance&type=packages&query=ollama)
 - [Flox](https://flox.dev/blog/ollama-part-one)

 ### Libraries
@@ -494,7 +495,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in unified API)
 - [LlmTornado](https://github.com/lofcz/llmtornado) (C# library providing a unified interface for major FOSS & Commercial inference APIs)
 - [Ollama for Zig](https://github.com/dravenk/ollama-zig)
- [Abso](https://github.com/lunary-ai/abso/blob/main/README.md#ollama) (OpenAI-compatible TypeScript SDK for any LLM provider)
+- [Abso](https://github.com/lunary-ai/abso) (OpenAI-compatible TypeScript SDK for any LLM provider)

 ### Mobile

--- a/api/client.go
+++ b/api/client.go
@@ -18,6 +18,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -29,28 +30,6 @@ import (
 	"github.com/ollama/ollama/version"
 )

-// StatusError is an error with an HTTP status code and message,
-// it is parsed on the client-side and not returned from the API
-type StatusError struct {
-	StatusCode int    // e.g. 200
-	Status     string // e.g. "200 OK"
-	ErrorResponse
-}
-
-func (e StatusError) Error() string {
-	switch {
-	case e.Status != "" && e.Err != "":
-		return fmt.Sprintf("%s: %s", e.Status, e.Err)
-	case e.Status != "":
-		return e.Status
-	case e.Err != "":
-		return e.Err
-	default:
-		// this should not happen
-		return "something went wrong, please see the ollama server logs for details"
-	}
-}
-
 // Client encapsulates client state for interacting with the ollama
 // service. Use [ClientFromEnvironment] to create new Clients.
 type Client struct {
@@ -68,7 +47,7 @@ func checkError(resp *http.Response, body []byte) error {
 	err := json.Unmarshal(body, &apiError)
 	if err != nil {
 		// Use the full body as the message if we fail to decode a response.
-		apiError.Err = string(body)
+		apiError.ErrorMessage = string(body)
 	}

 	return apiError
@@ -153,7 +132,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
 const maxBufferSize = 512 * format.KiloByte

 func (c *Client) stream(ctx context.Context, method, path string, data any, fn func([]byte) error) error {
-	var buf io.Reader
+	var buf *bytes.Buffer
 	if data != nil {
 		bts, err := json.Marshal(data)
 		if err != nil {
@@ -184,22 +163,24 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 	scanBuf := make([]byte, 0, maxBufferSize)
 	scanner.Buffer(scanBuf, maxBufferSize)
 	for scanner.Scan() {
-		bts := scanner.Bytes()
+		var errorResponse struct {
+			Error string `json:"error,omitempty"`
+		}

-		var errorResponse ErrorResponse
+		bts := scanner.Bytes()
 		if err := json.Unmarshal(bts, &errorResponse); err != nil {
 			return fmt.Errorf("unmarshal: %w", err)
 		}

-		if errorResponse.Err != "" {
-			return errorResponse
+		if errorResponse.Error != "" {
+			return errors.New(errorResponse.Error)
 		}

 		if response.StatusCode >= http.StatusBadRequest {
 			return StatusError{
-				StatusCode:    response.StatusCode,
-				Status:        response.Status,
-				ErrorResponse: errorResponse,
+				StatusCode:   response.StatusCode,
+				Status:       response.Status,
+				ErrorMessage: errorResponse.Error,
 			}
 		}

--- a/api/client_test.go
+++ b/api/client_test.go
@@ -1,12 +1,6 @@
 package api

 import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"net/http/httptest"
-	"net/url"
 	"testing"
 )

@@ -49,270 +43,3 @@ func TestClientFromEnvironment(t *testing.T) {
 		})
 	}
 }
-
-// testError represents an internal error type for testing different error formats
-type testError struct {
-	message    string         // basic error message
-	structured *ErrorResponse // structured error response, nil for basic format
-	statusCode int
-}
-
-func (e testError) Error() string {
-	return e.message
-}
-
-func TestClientStream(t *testing.T) {
-	testCases := []struct {
-		name      string
-		responses []any
-		wantErr   string
-	}{
-		{
-			name: "basic error format",
-			responses: []any{
-				testError{
-					message:    "test error message",
-					statusCode: http.StatusBadRequest,
-				},
-			},
-			wantErr: "test error message",
-		},
-		{
-			name: "structured error format",
-			responses: []any{
-				testError{
-					message: "test structured error",
-					structured: &ErrorResponse{
-						Err:  "test structured error",
-						Hint: "test hint",
-					},
-					statusCode: http.StatusBadRequest,
-				},
-			},
-			wantErr: "test structured error\ntest hint",
-		},
-		{
-			name: "error after chunks - basic format",
-			responses: []any{
-				ChatResponse{Message: Message{Content: "partial 1"}},
-				ChatResponse{Message: Message{Content: "partial 2"}},
-				testError{
-					message:    "mid-stream basic error",
-					statusCode: http.StatusOK,
-				},
-			},
-			wantErr: "mid-stream basic error",
-		},
-		{
-			name: "error after chunks - structured format",
-			responses: []any{
-				ChatResponse{Message: Message{Content: "partial 1"}},
-				ChatResponse{Message: Message{Content: "partial 2"}},
-				testError{
-					message: "mid-stream structured error",
-					structured: &ErrorResponse{
-						Err:  "mid-stream structured error",
-						Hint: "additional context",
-					},
-					statusCode: http.StatusOK,
-				},
-			},
-			wantErr: "mid-stream structured error\nadditional context",
-		},
-		{
-			name: "successful stream completion",
-			responses: []any{
-				ChatResponse{Message: Message{Content: "chunk 1"}},
-				ChatResponse{Message: Message{Content: "chunk 2"}},
-				ChatResponse{
-					Message:    Message{Content: "final chunk"},
-					Done:       true,
-					DoneReason: "stop",
-				},
-			},
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				flusher, ok := w.(http.Flusher)
-				if !ok {
-					t.Fatal("expected http.Flusher")
-				}
-
-				w.Header().Set("Content-Type", "application/x-ndjson")
-
-				for _, resp := range tc.responses {
-					if errResp, ok := resp.(testError); ok {
-						w.WriteHeader(errResp.statusCode)
-						var err error
-						if errResp.structured != nil {
-							err = json.NewEncoder(w).Encode(errResp.structured)
-						} else {
-							err = json.NewEncoder(w).Encode(map[string]string{
-								"error": errResp.message,
-							})
-						}
-						if err != nil {
-							t.Fatal("failed to encode error response:", err)
-						}
-						return
-					}
-
-					if err := json.NewEncoder(w).Encode(resp); err != nil {
-						t.Fatalf("failed to encode response: %v", err)
-					}
-					flusher.Flush()
-				}
-			}))
-			defer ts.Close()
-
-			client := NewClient(&url.URL{Scheme: "http", Host: ts.Listener.Addr().String()}, http.DefaultClient)
-
-			var receivedChunks []ChatResponse
-			err := client.stream(context.Background(), http.MethodPost, "/v1/chat", nil, func(chunk []byte) error {
-				var resp ChatResponse
-				if err := json.Unmarshal(chunk, &resp); err != nil {
-					return fmt.Errorf("failed to unmarshal chunk: %w", err)
-				}
-				receivedChunks = append(receivedChunks, resp)
-				return nil
-			})
-
-			if tc.wantErr != "" {
-				if err == nil {
-					t.Fatalf("got nil, want error %q", tc.wantErr)
-				}
-				if err.Error() != tc.wantErr {
-					t.Errorf("error message mismatch: got %q, want %q", err.Error(), tc.wantErr)
-				}
-			} else {
-				if err != nil {
-					t.Errorf("got error %q, want nil", err)
-				}
-			}
-		})
-	}
-}
-
-func TestClientDo(t *testing.T) {
-	testCases := []struct {
-		name     string
-		response any
-		wantErr  string
-	}{
-		{
-			name: "basic error format",
-			response: testError{
-				message:    "test error message",
-				statusCode: http.StatusBadRequest,
-			},
-			wantErr: "test error message",
-		},
-		{
-			name: "structured error format",
-			response: testError{
-				message: "test structured error",
-				structured: &ErrorResponse{
-					Err:  "test structured error",
-					Hint: "test hint",
-				},
-				statusCode: http.StatusBadRequest,
-			},
-			wantErr: "test structured error",
-		},
-		{
-			name: "server error - basic format",
-			response: testError{
-				message:    "internal error",
-				statusCode: http.StatusInternalServerError,
-			},
-			wantErr: "internal error",
-		},
-		{
-			name: "server error - structured format",
-			response: testError{
-				message: "internal server error",
-				structured: &ErrorResponse{
-					Err:  "internal server error",
-					Hint: "please try again later",
-				},
-				statusCode: http.StatusInternalServerError,
-			},
-			wantErr: "internal server error",
-		},
-		{
-			name: "successful response",
-			response: struct {
-				ID      string `json:"id"`
-				Success bool   `json:"success"`
-			}{
-				ID:      "msg_123",
-				Success: true,
-			},
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				if errResp, ok := tc.response.(testError); ok {
-					w.WriteHeader(errResp.statusCode)
-					var err error
-					if errResp.structured != nil {
-						err = json.NewEncoder(w).Encode(errResp.structured)
-					} else {
-						err = json.NewEncoder(w).Encode(map[string]string{
-							"error": errResp.message,
-						})
-					}
-					if err != nil {
-						t.Fatal("failed to encode error response:", err)
-					}
-					return
-				}
-
-				w.Header().Set("Content-Type", "application/json")
-				if err := json.NewEncoder(w).Encode(tc.response); err != nil {
-					t.Fatalf("failed to encode response: %v", err)
-				}
-			}))
-			defer ts.Close()
-
-			client := NewClient(&url.URL{Scheme: "http", Host: ts.Listener.Addr().String()}, http.DefaultClient)
-
-			var resp struct {
-				ID      string `json:"id"`
-				Success bool   `json:"success"`
-			}
-			err := client.do(context.Background(), http.MethodPost, "/v1/messages", nil, &resp)
-
-			if tc.wantErr != "" {
-				if err == nil {
-					t.Fatalf("got nil, want error %q", tc.wantErr)
-				}
-				if err.Error() != tc.wantErr {
-					t.Errorf("error message mismatch: got %q, want %q", err.Error(), tc.wantErr)
-				}
-				return
-			}
-
-			if err != nil {
-				t.Errorf("got error %q, want nil", err)
-			}
-
-			if expectedResp, ok := tc.response.(struct {
-				ID      string `json:"id"`
-				Success bool   `json:"success"`
-			}); ok {
-				if resp.ID != expectedResp.ID {
-					t.Errorf("response ID mismatch: got %q, want %q", resp.ID, expectedResp.ID)
-				}
-				if resp.Success != expectedResp.Success {
-					t.Errorf("response Success mismatch: got %v, want %v", resp.Success, expectedResp.Success)
-				}
-			}
-		})
-	}
-}
--- a/api/types.go
+++ b/api/types.go
@@ -12,6 +12,27 @@ import (
 	"time"
 )

+// StatusError is an error with an HTTP status code and message.
+type StatusError struct {
+	StatusCode   int
+	Status       string
+	ErrorMessage string `json:"error"`
+}
+
+func (e StatusError) Error() string {
+	switch {
+	case e.Status != "" && e.ErrorMessage != "":
+		return fmt.Sprintf("%s: %s", e.Status, e.ErrorMessage)
+	case e.Status != "":
+		return e.Status
+	case e.ErrorMessage != "":
+		return e.ErrorMessage
+	default:
+		// this should not happen
+		return "something went wrong, please see the ollama server logs for details"
+	}
+}
+
 // ImageData represents the raw binary data of an image file.
 type ImageData []byte

@@ -640,22 +661,6 @@ func (d *Duration) UnmarshalJSON(b []byte) (err error) {
 	return nil
 }

-// ErrorResponse implements a structured error interface that is returned from the Ollama server
-type ErrorResponse struct {
-	// Err is the error from the server. It helps with debugging the code-path
-	Err string `json:"error"`
-
-	// Hint is a user-friendly message about what went wrong, with suggested troubleshooting
-	Hint string `json:"hint"`
-}
-
-func (e ErrorResponse) Error() string {
-	if e.Hint == "" {
-		return e.Err
-	}
-	return fmt.Sprintf("%s\n%s", e.Err, e.Hint)
-}
-
 // FormatParams converts specified parameter options to their correct types
 func FormatParams(params map[string][]string) (map[string]interface{}, error) {
 	opts := Options{}
--- a/discover/path.go
+++ b/discover/path.go
@@ -19,9 +19,8 @@ var LibOllamaPath string = func() string {
 		return ""
 	}

-	exe, err = filepath.EvalSymlinks(exe)
-	if err != nil {
-		return ""
+	if eval, err := filepath.EvalSymlinks(exe); err == nil {
+		exe = eval
 	}

 	var libPath string
--- a/llama/patches/0014-sort-devices-by-score.patch
+++ b/llama/patches/0014-sort-devices-by-score.patch
@@ -8,7 +8,7 @@ Subject: [PATCH] sort devices by score
 1 file changed, 13 insertions(+), 8 deletions(-)

 diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
-index 899d16f2..ac5cda07 100644
+index 899d16f2..135f7df0 100644
 --- a/ggml/src/ggml-backend-reg.cpp
 +++ b/ggml/src/ggml-backend-reg.cpp
@@ -150,7 +150,7 @@ struct ggml_backend_reg_entry {
@@ -29,7 +29,7 @@ index 899d16f2..ac5cda07 100644
         if (!reg) {
             return;
         }
-@@ -206,15 +206,15 @@ struct ggml_backend_registry {
+@@ -206,15 +206,20 @@ struct ggml_backend_registry {
 #endif
         backends.push_back({ reg, std::move(handle) });
         for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
@@ -45,10 +45,15 @@ index 899d16f2..ac5cda07 100644
 #endif
 -        devices.push_back(device);
 +        devices.push_back({device, score});
+        std::stable_sort(devices.begin(), devices.end(),
+            [](const auto & a, const auto & b) {
+                return a.second > b.second;
+            }
+        );
     }
 
     ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
-@@ -257,7 +257,7 @@ struct ggml_backend_registry {
+@@ -257,7 +262,7 @@ struct ggml_backend_registry {
 
         GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
 
@@ -57,7 +62,7 @@ index 899d16f2..ac5cda07 100644
 
         return reg;
     }
-@@ -280,7 +280,7 @@ struct ggml_backend_registry {
+@@ -280,7 +285,7 @@ struct ggml_backend_registry {
         // remove devices
         devices.erase(
             std::remove_if(devices.begin(), devices.end(),
@@ -66,17 +71,12 @@ index 899d16f2..ac5cda07 100644
             devices.end());
 
         // remove backend
-@@ -338,7 +338,12 @@ size_t ggml_backend_dev_count() {
+@@ -338,7 +343,7 @@ size_t ggml_backend_dev_count() {
 
 ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
     GGML_ASSERT(index < ggml_backend_dev_count());
 -    return get_reg().devices[index];
-+    auto devices = get_reg().devices;
-+    if (!std::is_heap(devices.begin(), devices.end())) {
-+        std::make_heap(devices.begin(), devices.end(), [](const auto & a, const auto & b) { return a.second < b.second; });
-+    }
-+
-+    return devices[index].first;
+    return get_reg().devices[index].first;
 }
 
 ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
--- a/llama/patches/0017-try-catch-backend-load.patch
+++ b/llama/patches/0017-try-catch-backend-load.patch
@@ -0,0 +1,69 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Michael Yang <mxyng@pm.me>
+Date: Tue, 11 Feb 2025 14:06:36 -0800
+Subject: [PATCH] try/catch backend load
+
+---
+ ggml/src/ggml-backend-reg.cpp | 45 ++++++++++++++++++-----------------
+ 1 file changed, 23 insertions(+), 22 deletions(-)
+
+diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
+index 135f7df0..84b21dd8 100644
+--- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp
+@@ -512,32 +512,33 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
+         }
+         fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
+         for (const auto & entry : dir_it) {
+-            if (entry.is_regular_file()) {
+-                std::wstring filename = entry.path().filename().wstring();
+-                std::wstring ext = entry.path().extension().wstring();
+-                if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
+-                    dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
+-                    if (!handle && !silent) {
+-                        GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
+-                    }
+-                    if (handle) {
+            try {
+                if (entry.is_regular_file()) {
+                    std::wstring filename = entry.path().filename().wstring();
+                    std::wstring ext = entry.path().extension().wstring();
+                    if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
+                        dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
+                        if (!handle) {
+                            GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
+                            continue;
+                        }
+
+                         auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
+-                        if (score_fn) {
+-                            int s = score_fn();
+-#ifndef NDEBUG
+-                            GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
+-#endif
+-                            if (s > best_score) {
+-                                best_score = s;
+-                                best_path = entry.path().wstring();
+-                            }
+-                        } else {
+-                            if (!silent) {
+-                                GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
+-                            }
+                        if (!score_fn) {
+                            GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
+                            continue;
+                        }
+
+                        int s = score_fn();
+                        GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
+                        if (s > best_score) {
+                            best_score = s;
+                            best_path = entry.path().wstring();
+                         }
+                     }
+                 }
+            } catch (const std::exception & e) {
+                GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
+             }
+         }
+     }
--- a/llm/server.go
+++ b/llm/server.go
@@ -320,9 +320,8 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
 			return nil, fmt.Errorf("unable to lookup executable path: %w", err)
 		}

-		exe, err = filepath.EvalSymlinks(exe)
-		if err != nil {
-			return nil, fmt.Errorf("unable to evaluate symlinks for executable path: %w", err)
+		if eval, err := filepath.EvalSymlinks(exe); err == nil {
+			exe = eval
 		}

 		// TODO - once fully switched to the Go runner, load the model here for tokenize/detokenize cgo access
--- a/ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
@@ -215,6 +215,11 @@ struct ggml_backend_registry {
        GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
 #endif
        devices.push_back({device, score});
+        std::stable_sort(devices.begin(), devices.end(),
+            [](const auto & a, const auto & b) {
+                return a.second > b.second;
+            }
+        );
    }

    ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
@@ -338,12 +343,7 @@ size_t ggml_backend_dev_count() {

 ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
    GGML_ASSERT(index < ggml_backend_dev_count());
-    auto devices = get_reg().devices;
-    if (!std::is_heap(devices.begin(), devices.end())) {
-        std::make_heap(devices.begin(), devices.end(), [](const auto & a, const auto & b) { return a.second < b.second; });
-    }
-
-    return devices[index].first;
+    return get_reg().devices[index].first;
 }

 ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
@@ -512,32 +512,33 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
        }
        fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
        for (const auto & entry : dir_it) {
-            if (entry.is_regular_file()) {
-                std::wstring filename = entry.path().filename().wstring();
-                std::wstring ext = entry.path().extension().wstring();
-                if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
-                    dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
-                    if (!handle && !silent) {
-                        GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
-                    }
-                    if (handle) {
+            try {
+                if (entry.is_regular_file()) {
+                    std::wstring filename = entry.path().filename().wstring();
+                    std::wstring ext = entry.path().extension().wstring();
+                    if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
+                        dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
+                        if (!handle) {
+                            GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
+                            continue;
+                        }
+
                        auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
-                        if (score_fn) {
-                            int s = score_fn();
-#ifndef NDEBUG
-                            GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
-#endif
-                            if (s > best_score) {
-                                best_score = s;
-                                best_path = entry.path().wstring();
-                            }
-                        } else {
-                            if (!silent) {
-                                GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
-                            }
+                        if (!score_fn) {
+                            GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
+                            continue;
+                        }
+
+                        int s = score_fn();
+                        GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
+                        if (s > best_score) {
+                            best_score = s;
+                            best_path = entry.path().wstring();
                        }
                    }
                }
+            } catch (const std::exception & e) {
+                GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
            }
        }
    }
--- a/ml/backend/ggml/ggml/src/ggml.go
+++ b/ml/backend/ggml/ggml/src/ggml.go
@@ -79,6 +79,11 @@ var OnceLoad = sync.OnceFunc(func() {
 			continue
 		}

+		if abspath != filepath.Dir(exe) && !strings.Contains(abspath, filepath.FromSlash("lib/ollama")) {
+			slog.Debug("skipping path which is not part of ollama", "path", abspath)
+			continue
+		}
+
 		if _, ok := visited[abspath]; !ok {
 			func() {
 				slog.Debug("ggml backend load all from path", "path", abspath)
--- a/openai/openai.go
+++ b/openai/openai.go
@@ -610,14 +610,14 @@ type EmbedWriter struct {
 }

 func (w *BaseWriter) writeError(data []byte) (int, error) {
-	var er api.ErrorResponse // error response is used here to parse the error message
-	err := json.Unmarshal(data, &er)
+	var serr api.StatusError
+	err := json.Unmarshal(data, &serr)
 	if err != nil {
 		return 0, err
 	}

 	w.ResponseWriter.Header().Set("Content-Type", "application/json")
-	err = json.NewEncoder(w.ResponseWriter).Encode(NewError(http.StatusInternalServerError, er.Err))
+	err = json.NewEncoder(w.ResponseWriter).Encode(NewError(http.StatusInternalServerError, serr.Error()))
 	if err != nil {
 		return 0, err
 	}
--- a/server/images.go
+++ b/server/images.go
@@ -550,7 +550,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu

 	manifest, err = pullModelManifest(ctx, mp, regOpts)
 	if err != nil {
-		return fmt.Errorf("pull model manifest: %w", err)
+		return fmt.Errorf("pull model manifest: %s", err)
 	}

 	var layers []Layer
@@ -629,18 +629,13 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 	return nil
 }

-var ErrRemoteModelNotFound = errors.New("model not found")
-
 func pullModelManifest(ctx context.Context, mp ModelPath, regOpts *registryOptions) (*Manifest, error) {
 	requestURL := mp.BaseURL().JoinPath("v2", mp.GetNamespaceRepository(), "manifests", mp.Tag)

 	headers := make(http.Header)
 	headers.Set("Accept", "application/vnd.docker.distribution.manifest.v2+json")
 	resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, headers, nil, regOpts)
-	if errors.Is(err, os.ErrNotExist) {
-		// The model was not found on the remote registry
-		return nil, fmt.Errorf("%w: %s", ErrRemoteModelNotFound, err)
-	} else if err != nil {
+	if err != nil {
 		return nil, err
 	}
 	defer resp.Body.Close()
--- a/server/routes.go
+++ b/server/routes.go
@@ -564,8 +564,7 @@ func (s *Server) PullHandler(c *gin.Context) {
 		return
 	}

-	reqName := cmp.Or(req.Model, req.Name)
-	name := model.ParseName(reqName)
+	name := model.ParseName(cmp.Or(req.Model, req.Name))
 	if !name.IsValid() {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg})
 		return
@@ -592,18 +591,7 @@ func (s *Server) PullHandler(c *gin.Context) {
 		defer cancel()

 		if err := PullModel(ctx, name.DisplayShortest(), regOpts, fn); err != nil {
-			if errors.Is(err, ErrRemoteModelNotFound) {
-				hint := fmt.Sprintf("Model %q not found - please check the model name is correct and try again", reqName)
-				if name.Host == DefaultRegistry {
-					hint = fmt.Sprintf("Model %q not found - search available models at: https://ollama.com/search?q=%s", reqName, reqName)
-				}
-				ch <- api.ErrorResponse{
-					Err:  err.Error(),
-					Hint: hint,
-				}
-			} else {
-				ch <- gin.H{"error": err.Error()}
-			}
+			ch <- gin.H{"error": err.Error()}
 		}
 	}()
Author	SHA1	Message	Date
Jeffrey Morgan	f8453e9d4a	llm: attempt to evaluate symlinks, but do not fail (#9089 ) provides a better approach to #9088 that will attempt to evaluate symlinks (important for macOS where 'ollama' is often a symlink), but use the result of os.Executable() as a fallback in scenarios where filepath.EvalSymlinks fails due to permission erorrs or other issues	2025-02-13 22:38:23 -08:00
Jeffrey Morgan	55c0ee76b4	llm: do not evaluate symlink for exe path lookup (#9088 ) In some cases, the directories in the executable path read by filepath.EvalSymlinks are not accessible, resulting in permission errors which results in an error when running models. It also doesn't work well on long paths on windows, also resulting in errors. This change removes filepath.EvalSymlinks when accessing os.Executable() altogether	2025-02-13 22:13:47 -08:00
Jeffrey Morgan	c03e248735	ml/backend/ggml: stable sort devices by score (#9081 )	2025-02-13 18:43:33 -08:00
Jeffrey Morgan	a4f69a0191	build: add -DGGML_CUDA_NO_PEER_COPY=ON for rocm builds on windows (#9060 )	2025-02-13 00:23:17 -08:00
Clinton	82658c3eec	readme: add Homebrew to package managers section (#9052 )	2025-02-12 11:17:39 -08:00
bloominstrong	378d6e1e6a	docs: fix nix package link (#9045 ) removing the channel tag from the url so it will always go to the current stable channel.	2025-02-12 09:16:26 -08:00
Hugues Chocart	afa55bc70c	doc: fix link for Abso (#9043 )	2025-02-12 09:15:08 -08:00
Michael Yang	49df03da9a	fix: harden backend loading (#9024 ) * wrap ggml_backend_load_best in try/catch * ignore non-ollama paths	2025-02-11 15:36:53 -08:00