Compare commits

..

1 Commits

Author SHA1 Message Date
Michael Yang
2fe945412a get api models 2024-08-07 14:10:18 -07:00
21 changed files with 683 additions and 577 deletions

3
.gitattributes vendored
View File

@@ -1,3 +1,2 @@
llm/ext_server/* linguist-vendored
* text=auto
*.go text eol=lf
* text eol=lf

View File

@@ -325,7 +325,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [tlm](https://github.com/yusufcanb/tlm)
- [podman-ollama](https://github.com/ericcurtin/podman-ollama)
- [gollama](https://github.com/sammcj/gollama)
- [Ollama eBook Summary](https://github.com/cognitivetech/ollama-ebook-summary/)
### Database

View File

@@ -669,7 +669,7 @@ curl http://localhost:11434/api/chat -d '{
```
curl http://localhost:11434/api/chat -d '{
"model": "llama3.1",
"model": "mistral",
"messages": [
{
"role": "user",
@@ -708,7 +708,7 @@ curl http://localhost:11434/api/chat -d '{
```json
{
"model": "llama3.1",
"model": "mistral:7b-instruct-v0.3-q4_K_M",
"created_at": "2024-07-22T20:33:28.123648Z",
"message": {
"role": "assistant",
@@ -1175,10 +1175,7 @@ curl http://localhost:11434/api/embed -d '{
"embeddings": [[
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
]],
"total_duration": 14143917,
"load_duration": 1019500,
"prompt_eval_count": 8
]]
}
```

View File

@@ -49,9 +49,13 @@ func PayloadsDir() (string, error) {
}
// Track our pid so we can clean up orphaned tmpdirs
n := filepath.Join(tmpDir, "ollama.pid")
if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil {
return "", fmt.Errorf("failed to write pid file %s: %w", n, err)
pidFilePath := filepath.Join(tmpDir, "ollama.pid")
pidFile, err := os.OpenFile(pidFilePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm)
if err != nil {
return "", err
}
if _, err := pidFile.Write([]byte(strconv.Itoa(os.Getpid()))); err != nil {
return "", err
}
// We create a distinct subdirectory for payloads within the tmpdir
@@ -63,44 +67,37 @@ func PayloadsDir() (string, error) {
// Best effort to clean up prior tmpdirs
func cleanupTmpDirs() {
matches, err := filepath.Glob(filepath.Join(os.TempDir(), "ollama*", "ollama.pid"))
dirs, err := filepath.Glob(filepath.Join(os.TempDir(), "ollama*"))
if err != nil {
return
}
for _, match := range matches {
raw, err := os.ReadFile(match)
if errors.Is(err, os.ErrNotExist) {
slog.Debug("not a ollama runtime directory, skipping", "path", match)
for _, d := range dirs {
info, err := os.Stat(d)
if err != nil || !info.IsDir() {
continue
} else if err != nil {
slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err)
}
raw, err := os.ReadFile(filepath.Join(d, "ollama.pid"))
if err != nil {
slog.Warn("failed to read ollama.pid", "path", d, "error", err)
// No pid, ignore this tmpdir
continue
}
pid, err := strconv.Atoi(string(raw))
if err != nil {
slog.Warn("invalid pid, skipping", "path", match, "error", err)
slog.Warn("failed to parse pid", "path", d, "error", err)
continue
}
p, err := os.FindProcess(pid)
if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) {
slog.Warn("process still running, skipping", "pid", pid, "path", match)
proc, err := os.FindProcess(pid)
if err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
slog.Warn("found running ollama", "pid", pid, "path", d)
// Another running ollama, ignore this tmpdir
continue
}
if err := os.Remove(match); err != nil {
slog.Warn("could not cleanup stale pidfile", "path", match, "error", err)
}
runners := filepath.Join(filepath.Dir(match), "runners")
if err := os.RemoveAll(runners); err != nil {
slog.Warn("could not cleanup stale runners", "path", runners, "error", err)
}
if err := os.Remove(filepath.Dir(match)); err != nil {
slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err)
if err := os.Remove(d); err != nil {
slog.Warn("unable to cleanup stale tmpdir", "path", d, "error", err)
}
}
}

View File

@@ -305,41 +305,38 @@ func GetGPUInfo() GpuInfoList {
// Intel
if envconfig.IntelGPU() {
oHandles = initOneAPIHandles()
if oHandles != nil && oHandles.oneapi != nil {
// On windows we bundle the oneapi library one level above the runner dir
depPath = ""
if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" {
depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "oneapi")
}
// On windows we bundle the oneapi library one level above the runner dir
depPath = ""
if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" {
depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "oneapi")
for d := range oHandles.oneapi.num_drivers {
if oHandles.oneapi == nil {
// shouldn't happen
slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
continue
}
for d := range oHandles.oneapi.num_drivers {
if oHandles.oneapi == nil {
// shouldn't happen
slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
continue
}
devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
for i := range devCount {
gpuInfo := OneapiGPUInfo{
GpuInfo: GpuInfo{
Library: "oneapi",
},
driverIndex: int(d),
gpuIndex: int(i),
}
// TODO - split bootstrapping from updating free memory
C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
// TODO - convert this to MinimumMemory based on testing...
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo.free = C.uint64_t(totalFreeMem)
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DependencyPath = depPath
oneapiGPUs = append(oneapiGPUs, gpuInfo)
devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
for i := range devCount {
gpuInfo := OneapiGPUInfo{
GpuInfo: GpuInfo{
Library: "oneapi",
},
driverIndex: int(d),
gpuIndex: int(i),
}
// TODO - split bootstrapping from updating free memory
C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
// TODO - convert this to MinimumMemory based on testing...
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo.free = C.uint64_t(totalFreeMem)
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DependencyPath = depPath
oneapiGPUs = append(oneapiGPUs, gpuInfo)
}
}
}

View File

@@ -1223,7 +1223,9 @@ struct llama_server_context
res.result_json = json
{
{"id", res.id},
{"embedding", std::vector<float>(embd, embd + n_embd)},
{"timings", slot.get_formated_timings()},
};
}
}
@@ -3192,17 +3194,41 @@ int main(int argc, char **argv) {
prompt = "";
}
if (prompt.size() == 1) {
prompt = prompt[0];
}
// create and queue the task
const int task_id = llama.queue_tasks.get_new_id();
llama.queue_results.add_waiting_task_id(task_id);
llama.request_completion(task_id, {{"prompt", prompt}}, true, -1);
json responses;
{
const int id_task = llama.queue_tasks.get_new_id();
llama.queue_results.add_waiting_task_id(id_task);
llama.request_completion(id_task, {{"prompt", prompt}}, true, -1);
// get the result
task_result result = llama.queue_results.recv(task_id);
llama.queue_results.remove_waiting_task_id(task_id);
// get the result
task_result result = llama.queue_results.recv(id_task);
llama.queue_results.remove_waiting_task_id(id_task);
if (result.error) {
return res.set_content(result.result_json.dump(), "application/json; charset=utf-8");
}
// send the result
return res.set_content(result.result_json.dump(), "application/json; charset=utf-8");
responses = result.result_json.value("results", std::vector<json>{result.result_json});
std::sort(responses.begin(), responses.end(), [](const json& a, const json& b) {
return a["id"] < b["id"];
});
json embeddings = json::array();
int prompt_n = 0;
for (auto & elem : responses) {
embeddings.push_back(elem.at("embedding"));
prompt_n += elem.at("timings").at("prompt_n").get<int>();
}
// send the result
json embedding_res = json{{"embedding", embeddings}, {"prompt_n", prompt_n}};
return res.set_content(embedding_res.dump(), "application/json; charset=utf-8");
}
});
// GG: if I put the main loop inside a thread, it crashes on the first request when build in Debug!?

View File

@@ -33,7 +33,7 @@ type LlamaServer interface {
Ping(ctx context.Context) error
WaitUntilRunning(ctx context.Context) error
Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error
Embedding(ctx context.Context, input string) ([]float32, error)
Embed(ctx context.Context, input []string) (*EmbedResponse, error)
Tokenize(ctx context.Context, content string) ([]int, error)
Detokenize(ctx context.Context, tokens []int) (string, error)
Close() error
@@ -125,9 +125,8 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
}
}
// On linux and windows, over-allocating CPU memory will almost always result in an error
// Darwin has fully dynamic swap so has no direct concept of free swap space
if runtime.GOOS != "darwin" {
// On linux, over-allocating CPU memory will almost always result in an error
if runtime.GOOS == "linux" {
systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize
available := systemFreeMemory + systemSwapFreeMemory
if systemMemoryRequired > available {
@@ -883,20 +882,24 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
return nil
}
type EmbeddingRequest struct {
Content string `json:"content"`
type EmbedRequest struct {
Content []string `json:"content"`
}
type EmbeddingResponse struct {
Embedding []float32 `json:"embedding"`
type EmbedResponse struct {
Embedding [][]float32 `json:"embedding"`
PromptEvalCount int `json:"prompt_n"`
}
func (s *llmServer) Embedding(ctx context.Context, input string) ([]float32, error) {
if err := s.sem.Acquire(ctx, 1); err != nil {
func (s *llmServer) Embed(ctx context.Context, input []string) (*EmbedResponse, error) {
// each input will use a slot, so we need to acquire the semaphore for
// the number of inputs up to numParallel
slots := int64(min(len(input), s.numParallel))
if err := s.sem.Acquire(ctx, slots); err != nil {
slog.Error("Failed to acquire semaphore", "error", err)
return nil, err
}
defer s.sem.Release(1)
defer s.sem.Release(slots)
// Make sure the server is ready
status, err := s.getServerStatusRetry(ctx)
@@ -906,18 +909,18 @@ func (s *llmServer) Embedding(ctx context.Context, input string) ([]float32, err
return nil, fmt.Errorf("unexpected server status: %s", status.ToString())
}
data, err := json.Marshal(EmbeddingRequest{Content: input})
data, err := json.Marshal(EmbedRequest{Content: input})
if err != nil {
return nil, fmt.Errorf("error marshaling embed data: %w", err)
}
r, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("http://127.0.0.1:%d/embedding", s.port), bytes.NewBuffer(data))
req, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("http://127.0.0.1:%d/embedding", s.port), bytes.NewBuffer(data))
if err != nil {
return nil, fmt.Errorf("error creating embed request: %w", err)
}
r.Header.Set("Content-Type", "application/json")
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(r)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("do embedding request: %w", err)
}
@@ -933,12 +936,12 @@ func (s *llmServer) Embedding(ctx context.Context, input string) ([]float32, err
return nil, fmt.Errorf("%s", body)
}
var e EmbeddingResponse
var e EmbedResponse
if err := json.Unmarshal(body, &e); err != nil {
return nil, fmt.Errorf("unmarshal tokenize response: %w", err)
}
return e.Embedding, nil
return &e, nil
}
type TokenizeRequest struct {

View File

@@ -26,7 +26,6 @@ var errorPrefixes = []string{
"cudaMalloc failed",
"\"ERR\"",
"error loading model",
"GGML_ASSERT",
}
func (w *StatusWriter) Write(b []byte) (int, error) {

View File

@@ -7,22 +7,27 @@ import (
"io"
"net/http"
"net/http/httptest"
"reflect"
"strings"
"testing"
"time"
"github.com/gin-gonic/gin"
"github.com/stretchr/testify/assert"
"github.com/ollama/ollama/api"
)
const (
prefix = `data:image/jpeg;base64,`
image = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=`
prefix = `data:image/jpeg;base64,`
image = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=`
imageURL = prefix + image
)
var False = false
func prepareRequest(req *http.Request, body any) {
bodyBytes, _ := json.Marshal(body)
req.Body = io.NopCloser(bytes.NewReader(bodyBytes))
req.Header.Set("Content-Type", "application/json")
}
func captureRequestMiddleware(capturedRequest any) gin.HandlerFunc {
return func(c *gin.Context) {
@@ -38,136 +43,134 @@ func captureRequestMiddleware(capturedRequest any) gin.HandlerFunc {
func TestChatMiddleware(t *testing.T) {
type testCase struct {
name string
body string
req api.ChatRequest
err ErrorResponse
Name string
Setup func(t *testing.T, req *http.Request)
Expected func(t *testing.T, req *api.ChatRequest, resp *httptest.ResponseRecorder)
}
var capturedRequest *api.ChatRequest
testCases := []testCase{
{
name: "chat handler",
body: `{
"model": "test-model",
"messages": [
{"role": "user", "content": "Hello"}
]
}`,
req: api.ChatRequest{
Model: "test-model",
Messages: []api.Message{
{
Role: "user",
Content: "Hello",
},
},
Options: map[string]any{
"temperature": 1.0,
"top_p": 1.0,
},
Stream: &False,
Name: "chat handler",
Setup: func(t *testing.T, req *http.Request) {
body := ChatCompletionRequest{
Model: "test-model",
Messages: []Message{{Role: "user", Content: "Hello"}},
}
prepareRequest(req, body)
},
},
{
name: "chat handler with image content",
body: `{
"model": "test-model",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Hello"
},
{
"type": "image_url",
"image_url": {
"url": "` + prefix + image + `"
}
}
]
}
]
}`,
req: api.ChatRequest{
Model: "test-model",
Messages: []api.Message{
{
Role: "user",
Content: "Hello",
},
{
Role: "user",
Images: []api.ImageData{
func() []byte {
img, _ := base64.StdEncoding.DecodeString(image)
return img
}(),
},
},
},
Options: map[string]any{
"temperature": 1.0,
"top_p": 1.0,
},
Stream: &False,
},
},
{
name: "chat handler with tools",
body: `{
"model": "test-model",
"messages": [
{"role": "user", "content": "What's the weather like in Paris Today?"},
{"role": "assistant", "tool_calls": [{"id": "id", "type": "function", "function": {"name": "get_current_weather", "arguments": "{\"location\": \"Paris, France\", \"format\": \"celsius\"}"}}]}
]
}`,
req: api.ChatRequest{
Model: "test-model",
Messages: []api.Message{
{
Role: "user",
Content: "What's the weather like in Paris Today?",
},
{
Role: "assistant",
ToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_current_weather",
Arguments: map[string]interface{}{
"location": "Paris, France",
"format": "celsius",
},
},
},
},
},
},
Options: map[string]any{
"temperature": 1.0,
"top_p": 1.0,
},
Stream: &False,
},
},
Expected: func(t *testing.T, req *api.ChatRequest, resp *httptest.ResponseRecorder) {
if resp.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", resp.Code)
}
if req.Messages[0].Role != "user" {
t.Fatalf("expected 'user', got %s", req.Messages[0].Role)
}
if req.Messages[0].Content != "Hello" {
t.Fatalf("expected 'Hello', got %s", req.Messages[0].Content)
}
},
},
{
name: "chat handler error forwarding",
body: `{
"model": "test-model",
"messages": [
{"role": "user", "content": 2}
]
}`,
err: ErrorResponse{
Error: Error{
Message: "invalid message content type: float64",
Type: "invalid_request_error",
},
Name: "chat handler with image content",
Setup: func(t *testing.T, req *http.Request) {
body := ChatCompletionRequest{
Model: "test-model",
Messages: []Message{
{
Role: "user", Content: []map[string]any{
{"type": "text", "text": "Hello"},
{"type": "image_url", "image_url": map[string]string{"url": imageURL}},
},
},
},
}
prepareRequest(req, body)
},
Expected: func(t *testing.T, req *api.ChatRequest, resp *httptest.ResponseRecorder) {
if resp.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", resp.Code)
}
if req.Messages[0].Role != "user" {
t.Fatalf("expected 'user', got %s", req.Messages[0].Role)
}
if req.Messages[0].Content != "Hello" {
t.Fatalf("expected 'Hello', got %s", req.Messages[0].Content)
}
img, _ := base64.StdEncoding.DecodeString(imageURL[len(prefix):])
if req.Messages[1].Role != "user" {
t.Fatalf("expected 'user', got %s", req.Messages[1].Role)
}
if !bytes.Equal(req.Messages[1].Images[0], img) {
t.Fatalf("expected image encoding, got %s", req.Messages[1].Images[0])
}
},
},
{
Name: "chat handler with tools",
Setup: func(t *testing.T, req *http.Request) {
body := ChatCompletionRequest{
Model: "test-model",
Messages: []Message{
{Role: "user", Content: "What's the weather like in Paris Today?"},
{Role: "assistant", ToolCalls: []ToolCall{{
ID: "id",
Type: "function",
Function: struct {
Name string `json:"name"`
Arguments string `json:"arguments"`
}{
Name: "get_current_weather",
Arguments: "{\"location\": \"Paris, France\", \"format\": \"celsius\"}",
},
}}},
},
}
prepareRequest(req, body)
},
Expected: func(t *testing.T, req *api.ChatRequest, resp *httptest.ResponseRecorder) {
if resp.Code != 200 {
t.Fatalf("expected 200, got %d", resp.Code)
}
if req.Messages[0].Content != "What's the weather like in Paris Today?" {
t.Fatalf("expected What's the weather like in Paris Today?, got %s", req.Messages[0].Content)
}
if req.Messages[1].ToolCalls[0].Function.Arguments["location"] != "Paris, France" {
t.Fatalf("expected 'Paris, France', got %v", req.Messages[1].ToolCalls[0].Function.Arguments["location"])
}
if req.Messages[1].ToolCalls[0].Function.Arguments["format"] != "celsius" {
t.Fatalf("expected celsius, got %v", req.Messages[1].ToolCalls[0].Function.Arguments["format"])
}
},
},
{
Name: "chat handler error forwarding",
Setup: func(t *testing.T, req *http.Request) {
body := ChatCompletionRequest{
Model: "test-model",
Messages: []Message{{Role: "user", Content: 2}},
}
prepareRequest(req, body)
},
Expected: func(t *testing.T, req *api.ChatRequest, resp *httptest.ResponseRecorder) {
if resp.Code != http.StatusBadRequest {
t.Fatalf("expected 400, got %d", resp.Code)
}
if !strings.Contains(resp.Body.String(), "invalid message content type") {
t.Fatalf("error was not forwarded")
}
},
},
}
@@ -182,26 +185,16 @@ func TestChatMiddleware(t *testing.T) {
router.Handle(http.MethodPost, "/api/chat", endpoint)
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
req, _ := http.NewRequest(http.MethodPost, "/api/chat", strings.NewReader(tc.body))
req.Header.Set("Content-Type", "application/json")
t.Run(tc.Name, func(t *testing.T) {
req, _ := http.NewRequest(http.MethodPost, "/api/chat", nil)
tc.Setup(t, req)
resp := httptest.NewRecorder()
router.ServeHTTP(resp, req)
var errResp ErrorResponse
if resp.Code != http.StatusOK {
if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
t.Fatal(err)
}
}
if capturedRequest != nil && !reflect.DeepEqual(tc.req, *capturedRequest) {
t.Fatal("requests did not match")
}
tc.Expected(t, capturedRequest, resp)
if !reflect.DeepEqual(tc.err, errResp) {
t.Fatal("errors did not match")
}
capturedRequest = nil
})
}
@@ -209,52 +202,71 @@ func TestChatMiddleware(t *testing.T) {
func TestCompletionsMiddleware(t *testing.T) {
type testCase struct {
name string
body string
req api.GenerateRequest
err ErrorResponse
Name string
Setup func(t *testing.T, req *http.Request)
Expected func(t *testing.T, req *api.GenerateRequest, resp *httptest.ResponseRecorder)
}
var capturedRequest *api.GenerateRequest
testCases := []testCase{
{
name: "completions handler",
body: `{
"model": "test-model",
"prompt": "Hello",
"temperature": 0.8,
"stop": ["\n", "stop"],
"suffix": "suffix"
}`,
req: api.GenerateRequest{
Model: "test-model",
Prompt: "Hello",
Options: map[string]any{
"frequency_penalty": 0.0,
"presence_penalty": 0.0,
"temperature": 1.6,
"top_p": 1.0,
"stop": []any{"\n", "stop"},
},
Suffix: "suffix",
Stream: &False,
Name: "completions handler",
Setup: func(t *testing.T, req *http.Request) {
temp := float32(0.8)
body := CompletionRequest{
Model: "test-model",
Prompt: "Hello",
Temperature: &temp,
Stop: []string{"\n", "stop"},
Suffix: "suffix",
}
prepareRequest(req, body)
},
Expected: func(t *testing.T, req *api.GenerateRequest, resp *httptest.ResponseRecorder) {
if req.Prompt != "Hello" {
t.Fatalf("expected 'Hello', got %s", req.Prompt)
}
if req.Options["temperature"] != 1.6 {
t.Fatalf("expected 1.6, got %f", req.Options["temperature"])
}
stopTokens, ok := req.Options["stop"].([]any)
if !ok {
t.Fatalf("expected stop tokens to be a list")
}
if stopTokens[0] != "\n" || stopTokens[1] != "stop" {
t.Fatalf("expected ['\\n', 'stop'], got %v", stopTokens)
}
if req.Suffix != "suffix" {
t.Fatalf("expected 'suffix', got %s", req.Suffix)
}
},
},
{
name: "completions handler error forwarding",
body: `{
"model": "test-model",
"prompt": "Hello",
"temperature": null,
"stop": [1, 2],
"suffix": "suffix"
}`,
err: ErrorResponse{
Error: Error{
Message: "invalid type for 'stop' field: float64",
Type: "invalid_request_error",
},
Name: "completions handler error forwarding",
Setup: func(t *testing.T, req *http.Request) {
body := CompletionRequest{
Model: "test-model",
Prompt: "Hello",
Temperature: nil,
Stop: []int{1, 2},
Suffix: "suffix",
}
prepareRequest(req, body)
},
Expected: func(t *testing.T, req *api.GenerateRequest, resp *httptest.ResponseRecorder) {
if resp.Code != http.StatusBadRequest {
t.Fatalf("expected 400, got %d", resp.Code)
}
if !strings.Contains(resp.Body.String(), "invalid type for 'stop' field") {
t.Fatalf("error was not forwarded")
}
},
},
}
@@ -269,27 +281,15 @@ func TestCompletionsMiddleware(t *testing.T) {
router.Handle(http.MethodPost, "/api/generate", endpoint)
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
req, _ := http.NewRequest(http.MethodPost, "/api/generate", strings.NewReader(tc.body))
req.Header.Set("Content-Type", "application/json")
t.Run(tc.Name, func(t *testing.T) {
req, _ := http.NewRequest(http.MethodPost, "/api/generate", nil)
tc.Setup(t, req)
resp := httptest.NewRecorder()
router.ServeHTTP(resp, req)
var errResp ErrorResponse
if resp.Code != http.StatusOK {
if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
t.Fatal(err)
}
}
if capturedRequest != nil && !reflect.DeepEqual(tc.req, *capturedRequest) {
t.Fatal("requests did not match")
}
if !reflect.DeepEqual(tc.err, errResp) {
t.Fatal("errors did not match")
}
tc.Expected(t, capturedRequest, resp)
capturedRequest = nil
})
@@ -298,47 +298,78 @@ func TestCompletionsMiddleware(t *testing.T) {
func TestEmbeddingsMiddleware(t *testing.T) {
type testCase struct {
name string
body string
req api.EmbedRequest
err ErrorResponse
Name string
Setup func(t *testing.T, req *http.Request)
Expected func(t *testing.T, req *api.EmbedRequest, resp *httptest.ResponseRecorder)
}
var capturedRequest *api.EmbedRequest
testCases := []testCase{
{
name: "embed handler single input",
body: `{
"input": "Hello",
"model": "test-model"
}`,
req: api.EmbedRequest{
Input: "Hello",
Model: "test-model",
Name: "embed handler single input",
Setup: func(t *testing.T, req *http.Request) {
body := EmbedRequest{
Input: "Hello",
Model: "test-model",
}
prepareRequest(req, body)
},
Expected: func(t *testing.T, req *api.EmbedRequest, resp *httptest.ResponseRecorder) {
if req.Input != "Hello" {
t.Fatalf("expected 'Hello', got %s", req.Input)
}
if req.Model != "test-model" {
t.Fatalf("expected 'test-model', got %s", req.Model)
}
},
},
{
name: "embed handler batch input",
body: `{
"input": ["Hello", "World"],
"model": "test-model"
}`,
req: api.EmbedRequest{
Input: []any{"Hello", "World"},
Model: "test-model",
Name: "embed handler batch input",
Setup: func(t *testing.T, req *http.Request) {
body := EmbedRequest{
Input: []string{"Hello", "World"},
Model: "test-model",
}
prepareRequest(req, body)
},
Expected: func(t *testing.T, req *api.EmbedRequest, resp *httptest.ResponseRecorder) {
input, ok := req.Input.([]any)
if !ok {
t.Fatalf("expected input to be a list")
}
if input[0].(string) != "Hello" {
t.Fatalf("expected 'Hello', got %s", input[0])
}
if input[1].(string) != "World" {
t.Fatalf("expected 'World', got %s", input[1])
}
if req.Model != "test-model" {
t.Fatalf("expected 'test-model', got %s", req.Model)
}
},
},
{
name: "embed handler error forwarding",
body: `{
"model": "test-model"
}`,
err: ErrorResponse{
Error: Error{
Message: "invalid input",
Type: "invalid_request_error",
},
Name: "embed handler error forwarding",
Setup: func(t *testing.T, req *http.Request) {
body := EmbedRequest{
Model: "test-model",
}
prepareRequest(req, body)
},
Expected: func(t *testing.T, req *api.EmbedRequest, resp *httptest.ResponseRecorder) {
if resp.Code != http.StatusBadRequest {
t.Fatalf("expected 400, got %d", resp.Code)
}
if !strings.Contains(resp.Body.String(), "invalid input") {
t.Fatalf("error was not forwarded")
}
},
},
}
@@ -353,167 +384,116 @@ func TestEmbeddingsMiddleware(t *testing.T) {
router.Handle(http.MethodPost, "/api/embed", endpoint)
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
req, _ := http.NewRequest(http.MethodPost, "/api/embed", strings.NewReader(tc.body))
req.Header.Set("Content-Type", "application/json")
t.Run(tc.Name, func(t *testing.T) {
req, _ := http.NewRequest(http.MethodPost, "/api/embed", nil)
tc.Setup(t, req)
resp := httptest.NewRecorder()
router.ServeHTTP(resp, req)
var errResp ErrorResponse
if resp.Code != http.StatusOK {
if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
t.Fatal(err)
}
}
if capturedRequest != nil && !reflect.DeepEqual(tc.req, *capturedRequest) {
t.Fatal("requests did not match")
}
if !reflect.DeepEqual(tc.err, errResp) {
t.Fatal("errors did not match")
}
tc.Expected(t, capturedRequest, resp)
capturedRequest = nil
})
}
}
func TestListMiddleware(t *testing.T) {
func TestMiddlewareResponses(t *testing.T) {
type testCase struct {
name string
endpoint func(c *gin.Context)
resp string
Name string
Method string
Path string
TestPath string
Handler func() gin.HandlerFunc
Endpoint func(c *gin.Context)
Setup func(t *testing.T, req *http.Request)
Expected func(t *testing.T, resp *httptest.ResponseRecorder)
}
testCases := []testCase{
{
name: "list handler",
endpoint: func(c *gin.Context) {
Name: "list handler",
Method: http.MethodGet,
Path: "/api/tags",
TestPath: "/api/tags",
Handler: ListMiddleware,
Endpoint: func(c *gin.Context) {
c.JSON(http.StatusOK, api.ListResponse{
Models: []api.ListModelResponse{
{
Name: "test-model",
ModifiedAt: time.Unix(int64(1686935002), 0).UTC(),
Name: "Test Model",
},
},
})
},
resp: `{
"object": "list",
"data": [
{
"id": "test-model",
"object": "model",
"created": 1686935002,
"owned_by": "library"
}
]
}`,
},
{
name: "list handler empty output",
endpoint: func(c *gin.Context) {
c.JSON(http.StatusOK, api.ListResponse{})
Expected: func(t *testing.T, resp *httptest.ResponseRecorder) {
var listResp ListCompletion
if err := json.NewDecoder(resp.Body).Decode(&listResp); err != nil {
t.Fatal(err)
}
if listResp.Object != "list" {
t.Fatalf("expected list, got %s", listResp.Object)
}
if len(listResp.Data) != 1 {
t.Fatalf("expected 1, got %d", len(listResp.Data))
}
if listResp.Data[0].Id != "Test Model" {
t.Fatalf("expected Test Model, got %s", listResp.Data[0].Id)
}
},
resp: `{
"object": "list",
"data": null
}`,
},
}
gin.SetMode(gin.TestMode)
for _, tc := range testCases {
router := gin.New()
router.Use(ListMiddleware())
router.Handle(http.MethodGet, "/api/tags", tc.endpoint)
req, _ := http.NewRequest(http.MethodGet, "/api/tags", nil)
resp := httptest.NewRecorder()
router.ServeHTTP(resp, req)
var expected, actual map[string]any
err := json.Unmarshal([]byte(tc.resp), &expected)
if err != nil {
t.Fatalf("failed to unmarshal expected response: %v", err)
}
err = json.Unmarshal(resp.Body.Bytes(), &actual)
if err != nil {
t.Fatalf("failed to unmarshal actual response: %v", err)
}
if !reflect.DeepEqual(expected, actual) {
t.Errorf("responses did not match\nExpected: %+v\nActual: %+v", expected, actual)
}
}
}
func TestRetrieveMiddleware(t *testing.T) {
type testCase struct {
name string
endpoint func(c *gin.Context)
resp string
}
testCases := []testCase{
{
name: "retrieve handler",
endpoint: func(c *gin.Context) {
Name: "retrieve model",
Method: http.MethodGet,
Path: "/api/show/:model",
TestPath: "/api/show/test-model",
Handler: RetrieveMiddleware,
Endpoint: func(c *gin.Context) {
c.JSON(http.StatusOK, api.ShowResponse{
ModifiedAt: time.Unix(int64(1686935002), 0).UTC(),
ModifiedAt: time.Date(2024, 6, 17, 13, 45, 0, 0, time.UTC),
})
},
resp: `{
"id":"test-model",
"object":"model",
"created":1686935002,
"owned_by":"library"}
`,
},
{
name: "retrieve handler error forwarding",
endpoint: func(c *gin.Context) {
c.JSON(http.StatusBadRequest, gin.H{"error": "model not found"})
},
resp: `{
"error": {
"code": null,
"message": "model not found",
"param": null,
"type": "api_error"
Expected: func(t *testing.T, resp *httptest.ResponseRecorder) {
var retrieveResp Model
if err := json.NewDecoder(resp.Body).Decode(&retrieveResp); err != nil {
t.Fatal(err)
}
}`,
if retrieveResp.Object != "model" {
t.Fatalf("Expected object to be model, got %s", retrieveResp.Object)
}
if retrieveResp.Id != "test-model" {
t.Fatalf("Expected id to be test-model, got %s", retrieveResp.Id)
}
},
},
}
gin.SetMode(gin.TestMode)
router := gin.New()
for _, tc := range testCases {
router := gin.New()
router.Use(RetrieveMiddleware())
router.Handle(http.MethodGet, "/api/show/:model", tc.endpoint)
req, _ := http.NewRequest(http.MethodGet, "/api/show/test-model", nil)
t.Run(tc.Name, func(t *testing.T) {
router = gin.New()
router.Use(tc.Handler())
router.Handle(tc.Method, tc.Path, tc.Endpoint)
req, _ := http.NewRequest(tc.Method, tc.TestPath, nil)
resp := httptest.NewRecorder()
router.ServeHTTP(resp, req)
if tc.Setup != nil {
tc.Setup(t, req)
}
var expected, actual map[string]any
err := json.Unmarshal([]byte(tc.resp), &expected)
if err != nil {
t.Fatalf("failed to unmarshal expected response: %v", err)
}
resp := httptest.NewRecorder()
router.ServeHTTP(resp, req)
err = json.Unmarshal(resp.Body.Bytes(), &actual)
if err != nil {
t.Fatalf("failed to unmarshal actual response: %v", err)
}
assert.Equal(t, http.StatusOK, resp.Code)
if !reflect.DeepEqual(expected, actual) {
t.Errorf("responses did not match\nExpected: %+v\nActual: %+v", expected, actual)
}
tc.Expected(t, resp)
})
}
}

View File

@@ -209,15 +209,15 @@ install_cuda_driver_yum() {
case $PACKAGE_MANAGER in
yum)
$SUDO $PACKAGE_MANAGER -y install yum-utils
if curl -I --silent --fail --location "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m | sed -e 's/aarch64/sbsa/')/cuda-$1$2.repo" >/dev/null ; then
$SUDO $PACKAGE_MANAGER-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m | sed -e 's/aarch64/sbsa/')/cuda-$1$2.repo
if curl -I --silent --fail --location "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo" >/dev/null ; then
$SUDO $PACKAGE_MANAGER-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
else
error $CUDA_REPO_ERR_MSG
fi
;;
dnf)
if curl -I --silent --fail --location "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m | sed -e 's/aarch64/sbsa/')/cuda-$1$2.repo" >/dev/null ; then
$SUDO $PACKAGE_MANAGER config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m | sed -e 's/aarch64/sbsa/')/cuda-$1$2.repo
if curl -I --silent --fail --location "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo" >/dev/null ; then
$SUDO $PACKAGE_MANAGER config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
else
error $CUDA_REPO_ERR_MSG
fi
@@ -245,8 +245,8 @@ install_cuda_driver_yum() {
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#debian
install_cuda_driver_apt() {
status 'Installing NVIDIA repository...'
if curl -I --silent --fail --location "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m | sed -e 's/aarch64/sbsa/')/cuda-keyring_1.1-1_all.deb" >/dev/null ; then
curl -fsSL -o $TEMP_DIR/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m | sed -e 's/aarch64/sbsa/')/cuda-keyring_1.1-1_all.deb
if curl -I --silent --fail --location "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb" >/dev/null ; then
curl -fsSL -o $TEMP_DIR/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb
else
error $CUDA_REPO_ERR_MSG
fi

View File

@@ -216,7 +216,9 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
return err
}
defer file.Close()
setSparse(file)
if err := setSparse(file); err != nil {
return err
}
_ = file.Truncate(b.Total)
@@ -233,7 +235,7 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
newOpts.CheckRedirect = func(req *http.Request, via []*http.Request) error {
if len(via) > 10 {
return errors.New("maximum redirects exceeded (10) for directURL")
return errors.New("maxium redirects exceeded (10) for directURL")
}
// if the hostname is the same, allow the redirect

View File

@@ -250,21 +250,19 @@ func GetModel(name string) (*Model, error) {
Template: template.DefaultTemplate,
}
if manifest.Config.Digest != "" {
filename, err := GetBlobsPath(manifest.Config.Digest)
if err != nil {
return nil, err
}
filename, err := GetBlobsPath(manifest.Config.Digest)
if err != nil {
return nil, err
}
configFile, err := os.Open(filename)
if err != nil {
return nil, err
}
defer configFile.Close()
configFile, err := os.Open(filename)
if err != nil {
return nil, err
}
defer configFile.Close()
if err := json.NewDecoder(configFile).Decode(&model.Config); err != nil {
return nil, err
}
if err := json.NewDecoder(configFile).Decode(&model.Config); err != nil {
return nil, err
}
for _, layer := range manifest.Layers {
@@ -373,7 +371,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
var messages []*api.Message
parameters := make(map[string]any)
var layers []Layer
var layers []*Layer
for _, c := range modelfile.Commands {
mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
@@ -499,7 +497,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
if c.Name != "license" {
// replace
layers = slices.DeleteFunc(layers, func(layer Layer) bool {
layers = slices.DeleteFunc(layers, func(layer *Layer) bool {
if layer.MediaType != mediatype {
return false
}
@@ -545,7 +543,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
}
var err2 error
layers = slices.DeleteFunc(layers, func(layer Layer) bool {
layers = slices.DeleteFunc(layers, func(layer *Layer) bool {
switch layer.MediaType {
case "application/vnd.ollama.image.message":
// if there are new messages, remove the inherited ones
@@ -625,12 +623,12 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
return err
}
configLayer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
layer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
if err != nil {
return err
}
for _, layer := range append(layers, configLayer) {
for _, layer := range append(layers, layer) {
if layer.status != "" {
fn(api.ProgressResponse{Status: layer.status})
}
@@ -639,7 +637,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
old, _ := ParseNamedManifest(name)
fn(api.ProgressResponse{Status: "writing manifest"})
if err := WriteManifest(name, configLayer, layers); err != nil {
if err := WriteManifest(name, layer, layers); err != nil {
return err
}
@@ -716,7 +714,8 @@ func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{})
// save (i.e. delete from the deleteMap) any files used in other manifests
manifest, _, err := GetManifest(fmp)
if err != nil {
return err
//nolint:nilerr
return nil
}
for _, layer := range manifest.Layers {
@@ -783,8 +782,7 @@ func PruneLayers() error {
err = deleteUnusedLayers(nil, deleteMap)
if err != nil {
slog.Error(fmt.Sprintf("couldn't remove unused layers: %v", err))
return nil
return err
}
slog.Info(fmt.Sprintf("total unused blobs removed: %d", len(deleteMap)))
@@ -839,11 +837,9 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
return err
}
var layers []Layer
var layers []*Layer
layers = append(layers, manifest.Layers...)
if manifest.Config.Digest != "" {
layers = append(layers, manifest.Config)
}
layers = append(layers, manifest.Config)
for _, layer := range layers {
if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil {
@@ -894,9 +890,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
for _, l := range manifest.Layers {
deleteMap[l.Digest] = struct{}{}
}
if manifest.Config.Digest != "" {
deleteMap[manifest.Config.Digest] = struct{}{}
}
deleteMap[manifest.Config.Digest] = struct{}{}
}
}
@@ -911,11 +905,9 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
return fmt.Errorf("pull model manifest: %s", err)
}
var layers []Layer
var layers []*Layer
layers = append(layers, manifest.Layers...)
if manifest.Config.Digest != "" {
layers = append(layers, manifest.Config)
}
layers = append(layers, manifest.Config)
skipVerify := make(map[string]bool)
for _, layer := range layers {
@@ -979,8 +971,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
fn(api.ProgressResponse{Status: "removing any unused layers"})
err = deleteUnusedLayers(nil, deleteMap)
if err != nil {
slog.Error(fmt.Sprintf("couldn't remove unused layers: %v", err))
fn(api.ProgressResponse{Status: fmt.Sprintf("couldn't remove unused layers: %v", err)})
return err
}
}

View File

@@ -2,7 +2,6 @@ package server
import (
"crypto/sha256"
"errors"
"fmt"
"io"
"os"
@@ -16,15 +15,15 @@ type Layer struct {
status string
}
func NewLayer(r io.Reader, mediatype string) (Layer, error) {
func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
blobs, err := GetBlobsPath("")
if err != nil {
return Layer{}, err
return nil, err
}
temp, err := os.CreateTemp(blobs, "sha256-")
if err != nil {
return Layer{}, err
return nil, err
}
defer temp.Close()
defer os.Remove(temp.Name())
@@ -32,28 +31,28 @@ func NewLayer(r io.Reader, mediatype string) (Layer, error) {
sha256sum := sha256.New()
n, err := io.Copy(io.MultiWriter(temp, sha256sum), r)
if err != nil {
return Layer{}, err
return nil, err
}
if err := temp.Close(); err != nil {
return Layer{}, err
return nil, err
}
digest := fmt.Sprintf("sha256:%x", sha256sum.Sum(nil))
blob, err := GetBlobsPath(digest)
if err != nil {
return Layer{}, err
return nil, err
}
status := "using existing layer"
if _, err := os.Stat(blob); err != nil {
status = "creating new layer"
if err := os.Rename(temp.Name(), blob); err != nil {
return Layer{}, err
return nil, err
}
}
return Layer{
return &Layer{
MediaType: mediatype,
Digest: digest,
Size: n,
@@ -61,22 +60,18 @@ func NewLayer(r io.Reader, mediatype string) (Layer, error) {
}, nil
}
func NewLayerFromLayer(digest, mediatype, from string) (Layer, error) {
if digest == "" {
return Layer{}, errors.New("creating new layer from layer with empty digest")
}
func NewLayerFromLayer(digest, mediatype, from string) (*Layer, error) {
blob, err := GetBlobsPath(digest)
if err != nil {
return Layer{}, err
return nil, err
}
fi, err := os.Stat(blob)
if err != nil {
return Layer{}, err
return nil, err
}
return Layer{
return &Layer{
MediaType: mediatype,
Digest: digest,
Size: fi.Size(),
@@ -86,10 +81,6 @@ func NewLayerFromLayer(digest, mediatype, from string) (Layer, error) {
}
func (l *Layer) Open() (io.ReadSeekCloser, error) {
if l.Digest == "" {
return nil, errors.New("opening layer with empty digest")
}
blob, err := GetBlobsPath(l.Digest)
if err != nil {
return nil, err
@@ -99,10 +90,6 @@ func (l *Layer) Open() (io.ReadSeekCloser, error) {
}
func (l *Layer) Remove() error {
if l.Digest == "" {
return nil
}
ms, err := Manifests()
if err != nil {
return err

View File

@@ -14,11 +14,12 @@ import (
)
type Manifest struct {
SchemaVersion int `json:"schemaVersion"`
MediaType string `json:"mediaType"`
Config Layer `json:"config"`
Layers []Layer `json:"layers"`
SchemaVersion int `json:"schemaVersion"`
MediaType string `json:"mediaType"`
Config *Layer `json:"config"`
Layers []*Layer `json:"layers"`
name model.Name
filepath string
fi os.FileInfo
digest string
@@ -47,12 +48,10 @@ func (m *Manifest) Remove() error {
func (m *Manifest) RemoveLayers() error {
for _, layer := range append(m.Layers, m.Config) {
if layer.Digest != "" {
if err := layer.Remove(); errors.Is(err, os.ErrNotExist) {
slog.Debug("layer does not exist", "digest", layer.Digest)
} else if err != nil {
return err
}
if err := layer.Remove(); errors.Is(err, os.ErrNotExist) {
slog.Debug("layer does not exist", "digest", layer.Digest)
} else if err != nil {
return err
}
}
@@ -71,7 +70,6 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
p := filepath.Join(manifests, n.Filepath())
var m Manifest
f, err := os.Open(p)
if err != nil {
return nil, err
@@ -83,11 +81,13 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
return nil, err
}
var m Manifest
sha256sum := sha256.New()
if err := json.NewDecoder(io.TeeReader(f, sha256sum)).Decode(&m); err != nil {
return nil, err
}
m.name = n
m.filepath = p
m.fi = fi
m.digest = hex.EncodeToString(sha256sum.Sum(nil))
@@ -95,7 +95,7 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
return &m, nil
}
func WriteManifest(name model.Name, config Layer, layers []Layer) error {
func WriteManifest(name model.Name, config *Layer, layers []*Layer) error {
manifests, err := GetManifestPath()
if err != nil {
return err

View File

@@ -26,7 +26,7 @@ import (
var intermediateBlobs map[string]string = make(map[string]string)
type layerGGML struct {
Layer
*Layer
*llm.GGML
}

View File

@@ -23,7 +23,6 @@ import (
"github.com/gin-contrib/cors"
"github.com/gin-gonic/gin"
"golang.org/x/sync/errgroup"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
@@ -347,7 +346,6 @@ func (s *Server) EmbedHandler(c *gin.Context) {
return
}
var count int
for i, s := range input {
tokens, err := r.Tokenize(c.Request.Context(), s)
if err != nil {
@@ -370,36 +368,25 @@ func (s *Server) EmbedHandler(c *gin.Context) {
}
}
count += len(tokens)
input[i] = s
}
var g errgroup.Group
embeddings := make([][]float32, len(input))
for i, text := range input {
g.Go(func() error {
embedding, err := r.Embedding(c.Request.Context(), text)
if err != nil {
return err
}
embeddings[i] = normalize(embedding)
return nil
})
embeddings, err := r.Embed(c.Request.Context(), input)
if err != nil {
slog.Error("embedding generation failed", "error", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
return
}
if err := g.Wait(); err != nil {
slog.Error("embedding generation failed", "error", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Errorf("failed to generate embeddings: %v", err)})
return
for i, e := range embeddings.Embedding {
embeddings.Embedding[i] = normalize(e)
}
resp := api.EmbedResponse{
Model: req.Model,
Embeddings: embeddings,
Embeddings: embeddings.Embedding,
TotalDuration: time.Since(checkpointStart),
LoadDuration: checkpointLoaded.Sub(checkpointStart),
PromptEvalCount: count,
PromptEvalCount: embeddings.PromptEvalCount,
}
c.JSON(http.StatusOK, resp)
}
@@ -443,20 +430,21 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
return
}
embedding, err := r.Embedding(c.Request.Context(), req.Prompt)
embeddings, err := r.Embed(c.Request.Context(), []string{req.Prompt})
if err != nil {
slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
return
}
var e []float64
for _, v := range embedding {
e = append(e, float64(v))
embedding := make([]float64, len(embeddings.Embedding[0]))
for i, v := range embeddings.Embedding[0] {
embedding[i] = float64(v)
}
resp := api.EmbeddingResponse{
Embedding: e,
Embedding: embedding,
}
c.JSON(http.StatusOK, resp)
}
@@ -715,6 +703,153 @@ func (s *Server) ShowModelHandler(c *gin.Context) {
c.JSON(http.StatusOK, resp)
}
func manifestLayers(m *Manifest, exclude []string) (map[string]any, error) {
r := map[string]any{
"name": m.name.DisplayShortest(),
"digest": m.digest,
"size": m.Size(),
"modified_at": m.fi.ModTime(),
}
excludeAll := slices.Contains(exclude, "all")
excludeDetails := slices.Contains(exclude, "details")
for _, layer := range m.Layers {
var errExcludeKey = errors.New("exclude key")
key, content, err := func() (string, any, error) {
key := strings.TrimPrefix(layer.MediaType, "application/vnd.ollama.image.")
if slices.Contains(exclude, key) || excludeAll {
return "", nil, errExcludeKey
}
f, err := layer.Open()
if err != nil {
return "", nil, err
}
defer f.Close()
switch key {
case "model", "projector", "adapter":
ggml, _, err := llm.DecodeGGML(f, 0)
if err != nil {
return "", nil, err
}
content := map[string]any{
"architecture": ggml.KV().Architecture(),
"file_type": ggml.KV().FileType().String(),
"parameter_count": ggml.KV().ParameterCount(),
}
if !slices.Contains(exclude, key+".details") && !excludeAll && !excludeDetails {
// exclude any extraneous or redundant fields
delete(ggml.KV(), "general.basename")
delete(ggml.KV(), "general.description")
delete(ggml.KV(), "general.filename")
delete(ggml.KV(), "general.finetune")
delete(ggml.KV(), "general.languages")
delete(ggml.KV(), "general.license")
delete(ggml.KV(), "general.license.link")
delete(ggml.KV(), "general.name")
delete(ggml.KV(), "general.paramter_count")
delete(ggml.KV(), "general.size_label")
delete(ggml.KV(), "general.tags")
delete(ggml.KV(), "general.type")
delete(ggml.KV(), "general.quantization_version")
delete(ggml.KV(), "tokenizer.chat_template")
content["details"] = ggml.KV()
}
return key, content, nil
case "params", "messages":
var content any
if err := json.NewDecoder(f).Decode(&content); err != nil {
return "", nil, err
}
return key, content, nil
case "template", "system", "license":
bts, err := io.ReadAll(f)
if err != nil {
return "", nil, err
}
if key == "license" {
return key, []any{string(bts)}, nil
}
return key, string(bts), nil
}
return layer.MediaType, nil, nil
}()
if errors.Is(err, errExcludeKey) {
continue
} else if err != nil {
return nil, err
}
if s, ok := r[key].([]any); ok {
r[key] = append(s, content)
} else {
r[key] = content
}
}
return r, nil
}
func (s *Server) GetModelsHandler(c *gin.Context) {
ms, err := Manifests()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
var rs []map[string]any
for _, m := range ms {
r, err := manifestLayers(m, c.QueryArray("exclude"))
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
rs = append(rs, r)
}
slices.SortStableFunc(rs, func(i, j map[string]any) int {
// most recently modified first
return cmp.Compare(
j["modified_at"].(time.Time).Unix(),
i["modified_at"].(time.Time).Unix(),
)
})
c.JSON(http.StatusOK, rs)
}
func (s *Server) GetModelHandler(c *gin.Context) {
n := model.ParseName(strings.TrimPrefix(c.Param("model"), "/"))
if !n.IsValid() {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid model name"})
return
}
m, err := ParseNamedManifest(n)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
r, err := manifestLayers(m, c.QueryArray("exclude"))
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, r)
}
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
m, err := GetModel(req.Model)
if err != nil {
@@ -836,20 +971,17 @@ func (s *Server) ListModelsHandler(c *gin.Context) {
models := []api.ListModelResponse{}
for n, m := range ms {
f, err := m.Config.Open()
if err != nil {
slog.Warn("bad manifest filepath", "name", n, "error", err)
continue
}
defer f.Close()
var cf ConfigV2
if m.Config.Digest != "" {
f, err := m.Config.Open()
if err != nil {
slog.Warn("bad manifest filepath", "name", n, "error", err)
continue
}
defer f.Close()
if err := json.NewDecoder(f).Decode(&cf); err != nil {
slog.Warn("bad manifest config", "name", n, "error", err)
continue
}
if err := json.NewDecoder(f).Decode(&cf); err != nil {
slog.Warn("bad manifest config", "name", n, "error", err)
continue
}
// tag should never be masked
@@ -1043,11 +1175,6 @@ func allowedHostsMiddleware(addr net.Addr) gin.HandlerFunc {
if addr, err := netip.ParseAddr(host); err == nil {
if addr.IsLoopback() || addr.IsPrivate() || addr.IsUnspecified() || isLocalIP(addr) {
if c.Request.Method == http.MethodOptions {
c.AbortWithStatus(http.StatusNoContent)
return
}
c.Next()
return
}
@@ -1079,7 +1206,6 @@ func (s *Server) GenerateRoutes() http.Handler {
config.AllowOrigins = envconfig.Origins()
r := gin.Default()
r.HandleMethodNotAllowed = true
r.Use(
cors.New(config),
allowedHostsMiddleware(s.addr),
@@ -1111,6 +1237,9 @@ func (s *Server) GenerateRoutes() http.Handler {
c.String(http.StatusOK, "Ollama is running")
})
r.Handle(method, "/api/models", s.GetModelsHandler)
r.Handle(method, "/api/models/*model", s.GetModelHandler)
r.Handle(method, "/api/tags", s.ListModelsHandler)
r.Handle(method, "/api/version", func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"version": version.Version})

View File

@@ -98,7 +98,7 @@ func TestDeleteDuplicateLayers(t *testing.T) {
}
// create a manifest with duplicate layers
if err := WriteManifest(n, config, []Layer{config}); err != nil {
if err := WriteManifest(n, config, []*Layer{config}); err != nil {
t.Fatal(err)
}

View File

@@ -708,8 +708,8 @@ type mockLlm struct {
pingResp error
waitResp error
completionResp error
embeddingResp []float32
embeddingRespErr error
embedResp *llm.EmbedResponse
embedRespErr error
tokenizeResp []int
tokenizeRespErr error
detokenizeResp string
@@ -727,8 +727,8 @@ func (s *mockLlm) Completion(ctx context.Context, req llm.CompletionRequest, fn
return s.completionResp
}
func (s *mockLlm) Embedding(ctx context.Context, input string) ([]float32, error) {
return s.embeddingResp, s.embeddingRespErr
func (s *mockLlm) Embed(ctx context.Context, input []string) (*llm.EmbedResponse, error) {
return s.embedResp, s.embedRespErr
}
func (s *mockLlm) Tokenize(ctx context.Context, content string) ([]int, error) {

View File

@@ -4,5 +4,6 @@ package server
import "os"
func setSparse(*os.File) {
func setSparse(file *os.File) error {
return nil
}

View File

@@ -6,9 +6,8 @@ import (
"golang.org/x/sys/windows"
)
func setSparse(file *os.File) {
// exFat (and other FS types) don't support sparse files, so ignore errors
windows.DeviceIoControl( //nolint:errcheck
func setSparse(file *os.File) error {
return windows.DeviceIoControl(
windows.Handle(file.Fd()), windows.FSCTL_SET_SPARSE,
nil, 0,
nil, 0,

View File

@@ -26,7 +26,7 @@ import (
var blobUploadManager sync.Map
type blobUpload struct {
Layer
*Layer
Total int64
Completed atomic.Int64
@@ -362,7 +362,7 @@ func (p *progressWriter) Rollback() {
p.written = 0
}
func uploadBlob(ctx context.Context, mp ModelPath, layer Layer, opts *registryOptions, fn func(api.ProgressResponse)) error {
func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *registryOptions, fn func(api.ProgressResponse)) error {
requestURL := mp.BaseURL()
requestURL = requestURL.JoinPath("v2", mp.GetNamespaceRepository(), "blobs", layer.Digest)