Compare commits

..

6 Commits

Author SHA1 Message Date
Jeffrey Morgan
c03e248735 ml/backend/ggml: stable sort devices by score (#9081) 2025-02-13 18:43:33 -08:00
Jeffrey Morgan
a4f69a0191 build: add -DGGML_CUDA_NO_PEER_COPY=ON for rocm builds on windows (#9060) 2025-02-13 00:23:17 -08:00
Clinton
82658c3eec readme: add Homebrew to package managers section (#9052) 2025-02-12 11:17:39 -08:00
bloominstrong
378d6e1e6a docs: fix nix package link (#9045)
removing the channel tag from the url so it will always go to the current stable channel.
2025-02-12 09:16:26 -08:00
Hugues Chocart
afa55bc70c doc: fix link for Abso (#9043) 2025-02-12 09:15:08 -08:00
Michael Yang
49df03da9a fix: harden backend loading (#9024)
* wrap ggml_backend_load_best in try/catch
* ignore non-ollama paths
2025-02-11 15:36:53 -08:00
12 changed files with 161 additions and 385 deletions

View File

@@ -104,6 +104,10 @@ if(CMAKE_HIP_COMPILER)
if(AMDGPU_TARGETS)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip)
if (WIN32)
target_compile_definitions(ggml-hip PRIVATE GGML_CUDA_NO_PEER_COPY=1)
endif()
set(OLLAMA_HIP_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/rocm)
install(TARGETS ggml-hip
RUNTIME_DEPENDENCIES

View File

@@ -437,9 +437,10 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
- [Gentoo](https://github.com/gentoo/guru/tree/master/app-misc/ollama)
- [Homebrew](https://formulae.brew.sh/formula/ollama)
- [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
- [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
- [Nix package](https://search.nixos.org/packages?channel=24.05&show=ollama&from=0&size=50&sort=relevance&type=packages&query=ollama)
- [Nix package](https://search.nixos.org/packages?show=ollama&from=0&size=50&sort=relevance&type=packages&query=ollama)
- [Flox](https://flox.dev/blog/ollama-part-one)
### Libraries
@@ -494,7 +495,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in unified API)
- [LlmTornado](https://github.com/lofcz/llmtornado) (C# library providing a unified interface for major FOSS & Commercial inference APIs)
- [Ollama for Zig](https://github.com/dravenk/ollama-zig)
- [Abso](https://github.com/lunary-ai/abso/blob/main/README.md#ollama) (OpenAI-compatible TypeScript SDK for any LLM provider)
- [Abso](https://github.com/lunary-ai/abso) (OpenAI-compatible TypeScript SDK for any LLM provider)
### Mobile

View File

@@ -18,6 +18,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
@@ -29,28 +30,6 @@ import (
"github.com/ollama/ollama/version"
)
// StatusError is an error with an HTTP status code and message,
// it is parsed on the client-side and not returned from the API
type StatusError struct {
StatusCode int // e.g. 200
Status string // e.g. "200 OK"
ErrorResponse
}
func (e StatusError) Error() string {
switch {
case e.Status != "" && e.Err != "":
return fmt.Sprintf("%s: %s", e.Status, e.Err)
case e.Status != "":
return e.Status
case e.Err != "":
return e.Err
default:
// this should not happen
return "something went wrong, please see the ollama server logs for details"
}
}
// Client encapsulates client state for interacting with the ollama
// service. Use [ClientFromEnvironment] to create new Clients.
type Client struct {
@@ -68,7 +47,7 @@ func checkError(resp *http.Response, body []byte) error {
err := json.Unmarshal(body, &apiError)
if err != nil {
// Use the full body as the message if we fail to decode a response.
apiError.Err = string(body)
apiError.ErrorMessage = string(body)
}
return apiError
@@ -153,7 +132,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
const maxBufferSize = 512 * format.KiloByte
func (c *Client) stream(ctx context.Context, method, path string, data any, fn func([]byte) error) error {
var buf io.Reader
var buf *bytes.Buffer
if data != nil {
bts, err := json.Marshal(data)
if err != nil {
@@ -184,22 +163,24 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
scanBuf := make([]byte, 0, maxBufferSize)
scanner.Buffer(scanBuf, maxBufferSize)
for scanner.Scan() {
bts := scanner.Bytes()
var errorResponse struct {
Error string `json:"error,omitempty"`
}
var errorResponse ErrorResponse
bts := scanner.Bytes()
if err := json.Unmarshal(bts, &errorResponse); err != nil {
return fmt.Errorf("unmarshal: %w", err)
}
if errorResponse.Err != "" {
return errorResponse
if errorResponse.Error != "" {
return errors.New(errorResponse.Error)
}
if response.StatusCode >= http.StatusBadRequest {
return StatusError{
StatusCode: response.StatusCode,
Status: response.Status,
ErrorResponse: errorResponse,
StatusCode: response.StatusCode,
Status: response.Status,
ErrorMessage: errorResponse.Error,
}
}

View File

@@ -1,12 +1,6 @@
package api
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"testing"
)
@@ -49,270 +43,3 @@ func TestClientFromEnvironment(t *testing.T) {
})
}
}
// testError represents an internal error type for testing different error formats
type testError struct {
message string // basic error message
structured *ErrorResponse // structured error response, nil for basic format
statusCode int
}
func (e testError) Error() string {
return e.message
}
func TestClientStream(t *testing.T) {
testCases := []struct {
name string
responses []any
wantErr string
}{
{
name: "basic error format",
responses: []any{
testError{
message: "test error message",
statusCode: http.StatusBadRequest,
},
},
wantErr: "test error message",
},
{
name: "structured error format",
responses: []any{
testError{
message: "test structured error",
structured: &ErrorResponse{
Err: "test structured error",
Hint: "test hint",
},
statusCode: http.StatusBadRequest,
},
},
wantErr: "test structured error\ntest hint",
},
{
name: "error after chunks - basic format",
responses: []any{
ChatResponse{Message: Message{Content: "partial 1"}},
ChatResponse{Message: Message{Content: "partial 2"}},
testError{
message: "mid-stream basic error",
statusCode: http.StatusOK,
},
},
wantErr: "mid-stream basic error",
},
{
name: "error after chunks - structured format",
responses: []any{
ChatResponse{Message: Message{Content: "partial 1"}},
ChatResponse{Message: Message{Content: "partial 2"}},
testError{
message: "mid-stream structured error",
structured: &ErrorResponse{
Err: "mid-stream structured error",
Hint: "additional context",
},
statusCode: http.StatusOK,
},
},
wantErr: "mid-stream structured error\nadditional context",
},
{
name: "successful stream completion",
responses: []any{
ChatResponse{Message: Message{Content: "chunk 1"}},
ChatResponse{Message: Message{Content: "chunk 2"}},
ChatResponse{
Message: Message{Content: "final chunk"},
Done: true,
DoneReason: "stop",
},
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
flusher, ok := w.(http.Flusher)
if !ok {
t.Fatal("expected http.Flusher")
}
w.Header().Set("Content-Type", "application/x-ndjson")
for _, resp := range tc.responses {
if errResp, ok := resp.(testError); ok {
w.WriteHeader(errResp.statusCode)
var err error
if errResp.structured != nil {
err = json.NewEncoder(w).Encode(errResp.structured)
} else {
err = json.NewEncoder(w).Encode(map[string]string{
"error": errResp.message,
})
}
if err != nil {
t.Fatal("failed to encode error response:", err)
}
return
}
if err := json.NewEncoder(w).Encode(resp); err != nil {
t.Fatalf("failed to encode response: %v", err)
}
flusher.Flush()
}
}))
defer ts.Close()
client := NewClient(&url.URL{Scheme: "http", Host: ts.Listener.Addr().String()}, http.DefaultClient)
var receivedChunks []ChatResponse
err := client.stream(context.Background(), http.MethodPost, "/v1/chat", nil, func(chunk []byte) error {
var resp ChatResponse
if err := json.Unmarshal(chunk, &resp); err != nil {
return fmt.Errorf("failed to unmarshal chunk: %w", err)
}
receivedChunks = append(receivedChunks, resp)
return nil
})
if tc.wantErr != "" {
if err == nil {
t.Fatalf("got nil, want error %q", tc.wantErr)
}
if err.Error() != tc.wantErr {
t.Errorf("error message mismatch: got %q, want %q", err.Error(), tc.wantErr)
}
} else {
if err != nil {
t.Errorf("got error %q, want nil", err)
}
}
})
}
}
func TestClientDo(t *testing.T) {
testCases := []struct {
name string
response any
wantErr string
}{
{
name: "basic error format",
response: testError{
message: "test error message",
statusCode: http.StatusBadRequest,
},
wantErr: "test error message",
},
{
name: "structured error format",
response: testError{
message: "test structured error",
structured: &ErrorResponse{
Err: "test structured error",
Hint: "test hint",
},
statusCode: http.StatusBadRequest,
},
wantErr: "test structured error",
},
{
name: "server error - basic format",
response: testError{
message: "internal error",
statusCode: http.StatusInternalServerError,
},
wantErr: "internal error",
},
{
name: "server error - structured format",
response: testError{
message: "internal server error",
structured: &ErrorResponse{
Err: "internal server error",
Hint: "please try again later",
},
statusCode: http.StatusInternalServerError,
},
wantErr: "internal server error",
},
{
name: "successful response",
response: struct {
ID string `json:"id"`
Success bool `json:"success"`
}{
ID: "msg_123",
Success: true,
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if errResp, ok := tc.response.(testError); ok {
w.WriteHeader(errResp.statusCode)
var err error
if errResp.structured != nil {
err = json.NewEncoder(w).Encode(errResp.structured)
} else {
err = json.NewEncoder(w).Encode(map[string]string{
"error": errResp.message,
})
}
if err != nil {
t.Fatal("failed to encode error response:", err)
}
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(tc.response); err != nil {
t.Fatalf("failed to encode response: %v", err)
}
}))
defer ts.Close()
client := NewClient(&url.URL{Scheme: "http", Host: ts.Listener.Addr().String()}, http.DefaultClient)
var resp struct {
ID string `json:"id"`
Success bool `json:"success"`
}
err := client.do(context.Background(), http.MethodPost, "/v1/messages", nil, &resp)
if tc.wantErr != "" {
if err == nil {
t.Fatalf("got nil, want error %q", tc.wantErr)
}
if err.Error() != tc.wantErr {
t.Errorf("error message mismatch: got %q, want %q", err.Error(), tc.wantErr)
}
return
}
if err != nil {
t.Errorf("got error %q, want nil", err)
}
if expectedResp, ok := tc.response.(struct {
ID string `json:"id"`
Success bool `json:"success"`
}); ok {
if resp.ID != expectedResp.ID {
t.Errorf("response ID mismatch: got %q, want %q", resp.ID, expectedResp.ID)
}
if resp.Success != expectedResp.Success {
t.Errorf("response Success mismatch: got %v, want %v", resp.Success, expectedResp.Success)
}
}
})
}
}

View File

@@ -12,6 +12,27 @@ import (
"time"
)
// StatusError is an error with an HTTP status code and message.
type StatusError struct {
StatusCode int
Status string
ErrorMessage string `json:"error"`
}
func (e StatusError) Error() string {
switch {
case e.Status != "" && e.ErrorMessage != "":
return fmt.Sprintf("%s: %s", e.Status, e.ErrorMessage)
case e.Status != "":
return e.Status
case e.ErrorMessage != "":
return e.ErrorMessage
default:
// this should not happen
return "something went wrong, please see the ollama server logs for details"
}
}
// ImageData represents the raw binary data of an image file.
type ImageData []byte
@@ -640,22 +661,6 @@ func (d *Duration) UnmarshalJSON(b []byte) (err error) {
return nil
}
// ErrorResponse implements a structured error interface that is returned from the Ollama server
type ErrorResponse struct {
// Err is the error from the server. It helps with debugging the code-path
Err string `json:"error"`
// Hint is a user-friendly message about what went wrong, with suggested troubleshooting
Hint string `json:"hint"`
}
func (e ErrorResponse) Error() string {
if e.Hint == "" {
return e.Err
}
return fmt.Sprintf("%s\n%s", e.Err, e.Hint)
}
// FormatParams converts specified parameter options to their correct types
func FormatParams(params map[string][]string) (map[string]interface{}, error) {
opts := Options{}

View File

@@ -8,7 +8,7 @@ Subject: [PATCH] sort devices by score
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
index 899d16f2..ac5cda07 100644
index 899d16f2..135f7df0 100644
--- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp
@@ -150,7 +150,7 @@ struct ggml_backend_reg_entry {
@@ -29,7 +29,7 @@ index 899d16f2..ac5cda07 100644
if (!reg) {
return;
}
@@ -206,15 +206,15 @@ struct ggml_backend_registry {
@@ -206,15 +206,20 @@ struct ggml_backend_registry {
#endif
backends.push_back({ reg, std::move(handle) });
for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
@@ -45,10 +45,15 @@ index 899d16f2..ac5cda07 100644
#endif
- devices.push_back(device);
+ devices.push_back({device, score});
+ std::stable_sort(devices.begin(), devices.end(),
+ [](const auto & a, const auto & b) {
+ return a.second > b.second;
+ }
+ );
}
ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
@@ -257,7 +257,7 @@ struct ggml_backend_registry {
@@ -257,7 +262,7 @@ struct ggml_backend_registry {
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
@@ -57,7 +62,7 @@ index 899d16f2..ac5cda07 100644
return reg;
}
@@ -280,7 +280,7 @@ struct ggml_backend_registry {
@@ -280,7 +285,7 @@ struct ggml_backend_registry {
// remove devices
devices.erase(
std::remove_if(devices.begin(), devices.end(),
@@ -66,17 +71,12 @@ index 899d16f2..ac5cda07 100644
devices.end());
// remove backend
@@ -338,7 +338,12 @@ size_t ggml_backend_dev_count() {
@@ -338,7 +343,7 @@ size_t ggml_backend_dev_count() {
ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
GGML_ASSERT(index < ggml_backend_dev_count());
- return get_reg().devices[index];
+ auto devices = get_reg().devices;
+ if (!std::is_heap(devices.begin(), devices.end())) {
+ std::make_heap(devices.begin(), devices.end(), [](const auto & a, const auto & b) { return a.second < b.second; });
+ }
+
+ return devices[index].first;
+ return get_reg().devices[index].first;
}
ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {

View File

@@ -0,0 +1,69 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Tue, 11 Feb 2025 14:06:36 -0800
Subject: [PATCH] try/catch backend load
---
ggml/src/ggml-backend-reg.cpp | 45 ++++++++++++++++++-----------------
1 file changed, 23 insertions(+), 22 deletions(-)
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
index 135f7df0..84b21dd8 100644
--- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp
@@ -512,32 +512,33 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
}
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
for (const auto & entry : dir_it) {
- if (entry.is_regular_file()) {
- std::wstring filename = entry.path().filename().wstring();
- std::wstring ext = entry.path().extension().wstring();
- if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
- dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
- if (!handle && !silent) {
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
- }
- if (handle) {
+ try {
+ if (entry.is_regular_file()) {
+ std::wstring filename = entry.path().filename().wstring();
+ std::wstring ext = entry.path().extension().wstring();
+ if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
+ dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
+ if (!handle) {
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
+ continue;
+ }
+
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
- if (score_fn) {
- int s = score_fn();
-#ifndef NDEBUG
- GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
-#endif
- if (s > best_score) {
- best_score = s;
- best_path = entry.path().wstring();
- }
- } else {
- if (!silent) {
- GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
- }
+ if (!score_fn) {
+ GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
+ continue;
+ }
+
+ int s = score_fn();
+ GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
+ if (s > best_score) {
+ best_score = s;
+ best_path = entry.path().wstring();
}
}
}
+ } catch (const std::exception & e) {
+ GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
}
}
}

View File

@@ -215,6 +215,11 @@ struct ggml_backend_registry {
GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
#endif
devices.push_back({device, score});
std::stable_sort(devices.begin(), devices.end(),
[](const auto & a, const auto & b) {
return a.second > b.second;
}
);
}
ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
@@ -338,12 +343,7 @@ size_t ggml_backend_dev_count() {
ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
GGML_ASSERT(index < ggml_backend_dev_count());
auto devices = get_reg().devices;
if (!std::is_heap(devices.begin(), devices.end())) {
std::make_heap(devices.begin(), devices.end(), [](const auto & a, const auto & b) { return a.second < b.second; });
}
return devices[index].first;
return get_reg().devices[index].first;
}
ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
@@ -512,32 +512,33 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
}
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
for (const auto & entry : dir_it) {
if (entry.is_regular_file()) {
std::wstring filename = entry.path().filename().wstring();
std::wstring ext = entry.path().extension().wstring();
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
if (!handle && !silent) {
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
}
if (handle) {
try {
if (entry.is_regular_file()) {
std::wstring filename = entry.path().filename().wstring();
std::wstring ext = entry.path().extension().wstring();
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
if (!handle) {
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
continue;
}
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
if (score_fn) {
int s = score_fn();
#ifndef NDEBUG
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
#endif
if (s > best_score) {
best_score = s;
best_path = entry.path().wstring();
}
} else {
if (!silent) {
GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
}
if (!score_fn) {
GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
continue;
}
int s = score_fn();
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
if (s > best_score) {
best_score = s;
best_path = entry.path().wstring();
}
}
}
} catch (const std::exception & e) {
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
}
}
}

View File

@@ -79,6 +79,11 @@ var OnceLoad = sync.OnceFunc(func() {
continue
}
if abspath != filepath.Dir(exe) && !strings.Contains(abspath, filepath.FromSlash("lib/ollama")) {
slog.Debug("skipping path which is not part of ollama", "path", abspath)
continue
}
if _, ok := visited[abspath]; !ok {
func() {
slog.Debug("ggml backend load all from path", "path", abspath)

View File

@@ -610,14 +610,14 @@ type EmbedWriter struct {
}
func (w *BaseWriter) writeError(data []byte) (int, error) {
var er api.ErrorResponse // error response is used here to parse the error message
err := json.Unmarshal(data, &er)
var serr api.StatusError
err := json.Unmarshal(data, &serr)
if err != nil {
return 0, err
}
w.ResponseWriter.Header().Set("Content-Type", "application/json")
err = json.NewEncoder(w.ResponseWriter).Encode(NewError(http.StatusInternalServerError, er.Err))
err = json.NewEncoder(w.ResponseWriter).Encode(NewError(http.StatusInternalServerError, serr.Error()))
if err != nil {
return 0, err
}

View File

@@ -550,7 +550,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
manifest, err = pullModelManifest(ctx, mp, regOpts)
if err != nil {
return fmt.Errorf("pull model manifest: %w", err)
return fmt.Errorf("pull model manifest: %s", err)
}
var layers []Layer
@@ -629,18 +629,13 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
return nil
}
var ErrRemoteModelNotFound = errors.New("model not found")
func pullModelManifest(ctx context.Context, mp ModelPath, regOpts *registryOptions) (*Manifest, error) {
requestURL := mp.BaseURL().JoinPath("v2", mp.GetNamespaceRepository(), "manifests", mp.Tag)
headers := make(http.Header)
headers.Set("Accept", "application/vnd.docker.distribution.manifest.v2+json")
resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, headers, nil, regOpts)
if errors.Is(err, os.ErrNotExist) {
// The model was not found on the remote registry
return nil, fmt.Errorf("%w: %s", ErrRemoteModelNotFound, err)
} else if err != nil {
if err != nil {
return nil, err
}
defer resp.Body.Close()

View File

@@ -564,8 +564,7 @@ func (s *Server) PullHandler(c *gin.Context) {
return
}
reqName := cmp.Or(req.Model, req.Name)
name := model.ParseName(reqName)
name := model.ParseName(cmp.Or(req.Model, req.Name))
if !name.IsValid() {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg})
return
@@ -592,18 +591,7 @@ func (s *Server) PullHandler(c *gin.Context) {
defer cancel()
if err := PullModel(ctx, name.DisplayShortest(), regOpts, fn); err != nil {
if errors.Is(err, ErrRemoteModelNotFound) {
hint := fmt.Sprintf("Model %q not found - please check the model name is correct and try again", reqName)
if name.Host == DefaultRegistry {
hint = fmt.Sprintf("Model %q not found - search available models at: https://ollama.com/search?q=%s", reqName, reqName)
}
ch <- api.ErrorResponse{
Err: err.Error(),
Hint: hint,
}
} else {
ch <- gin.H{"error": err.Error()}
}
ch <- gin.H{"error": err.Error()}
}
}()