...

types/model: add FilepathNoBuild
Also, add test for DisplayLongest. Also, plumb fill param to ParseName in MustParseName
2024-04-17 17:04:13 -07:00 · 2024-04-16 12:37:38 -07:00
60 changed files with 290 additions and 262 deletions
--- a/.github/ISSUE_TEMPLATE/10_bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/10_bug_report.yml
@@ -1,60 +0,0 @@
 name: Bug report
 labels: [bug]
 description: Something isn't working right.
 body:
  - type: textarea
    id: description
    attributes:
      label: What is the issue?
      description: What happened? What did you expect to happen?
    validations:
      required: true
  - type: dropdown
    id: os
    attributes:
      label: OS
      description: Which operating system are you using?
      multiple: true
      options:
        - Linux
        - macOS
        - Windows
        - Docker
        - WSL2
    validations:
      required: false
  - type: dropdown
    id: gpu
    attributes:
      label: GPU
      description: Which GPU are you using?
      multiple: true
      options:
        - Nvidia
        - AMD
        - Intel
        - Apple
        - Other
    validations:
      required: false
  - type: dropdown
    id: cpu
    attributes:
      label: CPU
      description: Which CPU are you using?
      multiple: true
      options:
        - Intel
        - AMD
        - Apple
        - Other
    validations:
      required: false
  - type: input
    id: version
    attributes:
      label: Ollama version
      description: What version of Ollama are you using? (`ollama --version`)
      placeholder: e.g., 0.1.32
    validations:
      required: false
--- a/.github/ISSUE_TEMPLATE/10_model_request.yml
+++ b/.github/ISSUE_TEMPLATE/10_model_request.yml
@@ -0,0 +1,18 @@
 name: Model request
 description: Request a new model for the library
 labels: [mr]
 body:
  - type: markdown
    attributes:
      value: |
        Please check if your Model request is [already available](https://ollama.com/search) or that you cannot [import it](https://github.com/ollama/ollama/blob/main/docs/import.md#import-a-model) yourself.
        Tell us about which Model you'd like to see in the library!
  - type: textarea
    id: problem
    attributes:
      label: What model would you like?
      description: Please provide a link to the model.
  - type: markdown
    attributes:
      value: |
        Thanks for filing a model request!
--- a/.github/ISSUE_TEMPLATE/20_feature_request.md
+++ b/.github/ISSUE_TEMPLATE/20_feature_request.md
@@ -1,6 +0,0 @@
 ---
 name: Feature request
 about: Request a new feature
 labels: feature request
 ---
--- a/.github/ISSUE_TEMPLATE/20_feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/20_feature_request.yml
@@ -0,0 +1,41 @@
 name: Feature request
 description: Propose a new feature
 labels: [needs-triage, fr]
 body:
  - type: markdown
    attributes:
      value: |
        Please check if your feature request is [already filed](https://github.com/ollama/ollama/issues).
        Tell us about your idea!
  - type: textarea
    id: problem
    attributes:
      label: What are you trying to do?
      description: Tell us about the problem you're trying to solve.
    validations:
      required: false
  - type: textarea
    id: solution
    attributes:
      label: How should we solve this?
      description: If you have an idea of how you'd like to see this feature work, let us know.
    validations:
      required: false
  - type: textarea
    id: alternative
    attributes:
      label: What is the impact of not solving this?
      description: (How) Are you currently working around the issue?
    validations:
      required: false
  - type: textarea
    id: context
    attributes:
      label: Anything else?
      description: Any additional context to share, e.g., links
    validations:
      required: false
  - type: markdown
    attributes:
      value: |
        Thanks for filing a feature request!
--- a/.github/ISSUE_TEMPLATE/30_model_request.md
+++ b/.github/ISSUE_TEMPLATE/30_model_request.md
@@ -1,5 +0,0 @@
 ---
 name: Model request
 about: Request support for a new model to be added to Ollama
 labels: model request
 ---
--- a/.github/ISSUE_TEMPLATE/90_bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/90_bug_report.yml
@@ -0,0 +1,125 @@
 name: Bug report
 description: File a bug report. If you need help, please join our Discord server.
 labels: [needs-triage, bug]
 body:
  - type: markdown
    attributes:
      value: |
        Please check if your bug is [already filed](https://github.com/ollama/ollama/issues) before filing a new one.
  - type: textarea
    id: what-happened
    attributes:
      label: What is the issue?
      description: What happened? What did you expect to happen?
    validations:
      required: true
  - type: textarea
    id: what-was-expected
    attributes:
      label: What did you expect to see?
      description: What did you expect to see/happen instead?
    validations:
      required: false
  - type: textarea
    id: steps
    attributes:
      label: Steps to reproduce
      description: What are the steps you took that hit this issue?
    validations:
      required: false
  - type: textarea
    id: changes
    attributes:
      label: Are there any recent changes that introduced the issue?
      description: If so, what are those changes?
    validations:
      required: false
  - type: dropdown
    id: os
    attributes:
      label: OS
      description: What OS are you using? You may select more than one.
      multiple: true
      options:
        - Linux
        - macOS
        - Windows
        - Other
    validations:
      required: false
  - type: dropdown
    id: architecture
    attributes:
      label: Architecture
      description: What architecture are you using? You may select more than one.
      multiple: true
      options:
        - arm64
        - amd64
        - x86
        - Other
  - type: dropdown
    id: platform
    attributes:
      label: Platform
      description: What platform are you using? You may select more than one.
      multiple: true
      options:
        - Docker
        - WSL
        - WSL2
    validations:
      required: false
  - type: input
    id: ollama-version
    attributes:
      label: Ollama version
      description: What Ollama version are you using? (`ollama --version`)
      placeholder: e.g., 1.14.4
    validations:
      required: false
  - type: dropdown
    id: gpu
    attributes:
      label: GPU
      description: What GPU, if any, are you using? You may select more than one.
      multiple: true
      options:
        - Nvidia
        - AMD
        - Intel
        - Apple
        - Other
    validations:
      required: false
  - type: textarea
    id: gpu-info
    attributes:
      label: GPU info
      description: What GPU info do you have? (`nvidia-smi`, `rocminfo`, `system_profiler SPDisplaysDataType`, etc.)
    validations:
      required: false
  - type: dropdown
    id: cpu
    attributes:
      label: CPU
      description: What CPU are you using? You may select more than one.
      multiple: true
      options:
        - Intel
        - AMD
        - Apple
        - Other
    validations:
      required: false
  - type: textarea
    id: other-software
    attributes:
      label: Other software
      description: What other software are you using that might be related to this issue?
    validations:
      required: false
  - type: markdown
    attributes:
      value: |
        Thanks for filing a bug report!
--- a/README.md
+++ b/README.md
@@ -60,6 +60,7 @@ Here are some example models that can be downloaded:
 | Llama 2 13B        | 13B        | 7.3GB | `ollama run llama2:13b`        |
 | Llama 2 70B        | 70B        | 39GB  | `ollama run llama2:70b`        |
 | Orca Mini          | 3B         | 1.9GB | `ollama run orca-mini`         |
 | Vicuna             | 7B         | 3.8GB | `ollama run vicuna`            |
 | LLaVA              | 7B         | 4.5GB | `ollama run llava`             |
 | Gemma              | 2B         | 1.4GB | `ollama run gemma:2b`          |
 | Gemma              | 7B         | 4.8GB | `ollama run gemma:7b`          |
@@ -377,6 +378,3 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
 - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
 - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
 ### Supported backends 
 - [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov. 
--- a/api/client.go
+++ b/api/client.go
@@ -20,8 +20,8 @@ import (
 	"runtime"
 	"strings"
-	"ollama.com/format"
+	"github.com/ollama/ollama/format"
-	"ollama.com/version"
+	"github.com/ollama/ollama/version"
 )
 // Client encapsulates client state for interacting with the ollama
--- a/app/lifecycle/lifecycle.go
+++ b/app/lifecycle/lifecycle.go
@@ -9,8 +9,8 @@ import (
 	"os/signal"
 	"syscall"
-	"ollama.com/app/store"
+	"github.com/ollama/ollama/app/store"
-	"ollama.com/app/tray"
+	"github.com/ollama/ollama/app/tray"
 )
 func Run() {
--- a/app/lifecycle/server.go
+++ b/app/lifecycle/server.go
@@ -11,7 +11,7 @@ import (
 	"path/filepath"
 	"time"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 func getCLIFullPath(command string) string {
--- a/app/lifecycle/updater.go
+++ b/app/lifecycle/updater.go
@@ -18,8 +18,8 @@ import (
 	"strings"
 	"time"
-	"ollama.com/auth"
+	"github.com/ollama/ollama/auth"
-	"ollama.com/version"
+	"github.com/ollama/ollama/version"
 )
 var (
--- a/app/main.go
+++ b/app/main.go
@@ -4,7 +4,7 @@ package main
 // go build -ldflags="-H windowsgui" .
 import (
-	"ollama.com/app/lifecycle"
+	"github.com/ollama/ollama/app/lifecycle"
 )
 func main() {
--- a/app/tray/tray.go
+++ b/app/tray/tray.go
@@ -4,8 +4,8 @@ import (
 	"fmt"
 	"runtime"
-	"ollama.com/app/assets"
+	"github.com/ollama/ollama/app/assets"
-	"ollama.com/app/tray/commontray"
+	"github.com/ollama/ollama/app/tray/commontray"
 )
 func NewTray() (commontray.OllamaTray, error) {
--- a/app/tray/tray_nonwindows.go
+++ b/app/tray/tray_nonwindows.go
@@ -5,7 +5,7 @@ package tray
 import (
 	"fmt"
-	"ollama.com/app/tray/commontray"
+	"github.com/ollama/ollama/app/tray/commontray"
 )
 func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
--- a/app/tray/tray_windows.go
+++ b/app/tray/tray_windows.go
@@ -1,8 +1,8 @@
 package tray
 import (
-	"ollama.com/app/tray/commontray"
+	"github.com/ollama/ollama/app/tray/commontray"
-	"ollama.com/app/tray/wintray"
+	"github.com/ollama/ollama/app/tray/wintray"
 )
 func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
--- a/app/tray/wintray/tray.go
+++ b/app/tray/wintray/tray.go
@@ -13,8 +13,8 @@ import (
 	"sync"
 	"unsafe"
 	"github.com/ollama/ollama/app/tray/commontray"
 	"golang.org/x/sys/windows"
 	"ollama.com/app/tray/commontray"
 )
 // Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -30,12 +30,12 @@ import (
 	"golang.org/x/exp/slices"
 	"golang.org/x/term"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
-	"ollama.com/format"
+	"github.com/ollama/ollama/format"
-	"ollama.com/parser"
+	"github.com/ollama/ollama/parser"
-	"ollama.com/progress"
+	"github.com/ollama/ollama/progress"
-	"ollama.com/server"
+	"github.com/ollama/ollama/server"
-	"ollama.com/version"
+	"github.com/ollama/ollama/version"
 )
 func CreateHandler(cmd *cobra.Command, args []string) error {
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -14,9 +14,9 @@ import (
 	"github.com/spf13/cobra"
 	"golang.org/x/exp/slices"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
-	"ollama.com/progress"
+	"github.com/ollama/ollama/progress"
-	"ollama.com/readline"
+	"github.com/ollama/ollama/readline"
 )
 type MultilineState int
--- a/cmd/interactive_test.go
+++ b/cmd/interactive_test.go
@@ -7,7 +7,7 @@ import (
 	"github.com/stretchr/testify/assert"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 func TestExtractFilenames(t *testing.T) {
--- a/cmd/start_darwin.go
+++ b/cmd/start_darwin.go
@@ -7,7 +7,7 @@ import (
 	"os/exec"
 	"strings"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 func startApp(ctx context.Context, client *api.Client) error {
--- a/cmd/start_default.go
+++ b/cmd/start_default.go
@@ -6,7 +6,7 @@ import (
 	"context"
 	"fmt"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 func startApp(ctx context.Context, client *api.Client) error {
--- a/cmd/start_windows.go
+++ b/cmd/start_windows.go
@@ -10,7 +10,7 @@ import (
 	"strings"
 	"syscall"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 func startApp(ctx context.Context, client *api.Client) error {
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -13,8 +13,8 @@ import (
 	"google.golang.org/protobuf/proto"
-	"ollama.com/convert/sentencepiece"
+	"github.com/ollama/ollama/convert/sentencepiece"
-	"ollama.com/llm"
+	"github.com/ollama/ollama/llm"
 )
 type Params struct {
--- a/convert/gemma.go
+++ b/convert/gemma.go
@@ -12,7 +12,7 @@ import (
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
-	"ollama.com/llm"
+	"github.com/ollama/ollama/llm"
 )
 type GemmaModel struct {
--- a/convert/llama.go
+++ b/convert/llama.go
@@ -14,7 +14,7 @@ import (
 	"github.com/pdevine/tensor/native"
 	"github.com/x448/float16"
-	"ollama.com/llm"
+	"github.com/ollama/ollama/llm"
 )
 type LlamaModel struct {
--- a/convert/mistral.go
+++ b/convert/mistral.go
@@ -13,7 +13,7 @@ import (
 	"github.com/pdevine/tensor/native"
 	"github.com/x448/float16"
-	"ollama.com/llm"
+	"github.com/ollama/ollama/llm"
 )
 type MistralModel struct {
--- a/convert/safetensors.go
+++ b/convert/safetensors.go
@@ -16,7 +16,7 @@ import (
 	"github.com/mitchellh/mapstructure"
 	"github.com/x448/float16"
-	"ollama.com/llm"
+	"github.com/ollama/ollama/llm"
 )
 type safetensorWriterTo struct {
--- a/convert/torch.go
+++ b/convert/torch.go
@@ -15,7 +15,7 @@ import (
 	"github.com/nlpodyssey/gopickle/types"
 	"github.com/x448/float16"
-	"ollama.com/llm"
+	"github.com/ollama/ollama/llm"
 )
 type torchWriterTo struct {
--- a/examples/go-chat/main.go
+++ b/examples/go-chat/main.go
@@ -5,7 +5,7 @@ import (
 	"fmt"
 	"log"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 func main() {
--- a/examples/go-generate-streaming/main.go
+++ b/examples/go-generate-streaming/main.go
@@ -5,7 +5,7 @@ import (
 	"fmt"
 	"log"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 func main() {
--- a/examples/go-generate/main.go
+++ b/examples/go-generate/main.go
@@ -5,7 +5,7 @@ import (
 	"fmt"
 	"log"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 func main() {
--- a/examples/go-multimodal/main.go
+++ b/examples/go-multimodal/main.go
@@ -6,7 +6,7 @@ import (
 	"log"
 	"os"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 func main() {
--- a/examples/go-pull-progress/main.go
+++ b/examples/go-pull-progress/main.go
@@ -5,7 +5,7 @@ import (
 	"fmt"
 	"log"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 func main() {
--- a/go.mod
+++ b/go.mod
@@ -1,4 +1,4 @@
-module ollama.com
+module github.com/ollama/ollama
 go 1.22
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -21,7 +21,7 @@ import (
 	"sync"
 	"unsafe"
-	"ollama.com/format"
+	"github.com/ollama/ollama/format"
 )
 type handles struct {
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@@ -55,6 +55,6 @@ func getCPUMem() (memInfo, error) {
 	return memInfo{
 		TotalMemory: uint64(C.getPhysicalMemory()),
 		FreeMemory:  0,
-		DeviceCount: 1,
+		DeviceCount: 0,
 	}, nil
 }
--- a/integration/basic_test.go
+++ b/integration/basic_test.go
@@ -8,7 +8,7 @@ import (
 	"testing"
 	"time"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 func TestOrcaMiniBlueSky(t *testing.T) {
--- a/integration/context_test.go
+++ b/integration/context_test.go
@@ -8,7 +8,7 @@ import (
 	"testing"
 	"time"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 func TestContextExhaustion(t *testing.T) {
--- a/integration/llm_image_test.go
+++ b/integration/llm_image_test.go
@@ -9,8 +9,8 @@ import (
 	"testing"
 	"time"
 	"github.com/ollama/ollama/api"
 	"github.com/stretchr/testify/require"
 	"ollama.com/api"
 )
 func TestIntegrationMultimodal(t *testing.T) {
--- a/integration/llm_test.go
+++ b/integration/llm_test.go
@@ -9,7 +9,7 @@ import (
 	"testing"
 	"time"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 // TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server
--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -21,9 +21,9 @@ import (
 	"testing"
 	"time"
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/app/lifecycle"
 	"github.com/stretchr/testify/assert"
 	"ollama.com/api"
 	"ollama.com/app/lifecycle"
 )
 func FindPort() string {
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -39,10 +39,6 @@
 #include "httplib.h"
 #include "json.hpp"
 #if defined(_WIN32)
 #include <windows.h>
 #endif
 #include <cstddef>
 #include <thread>
 #include <chrono>
@@ -2774,28 +2770,8 @@ inline void signal_handler(int signal) {
    shutdown_handler(signal);
 }
-#if defined(_WIN32)
+int main(int argc, char **argv)
-char* wchar_to_char(const wchar_t* wstr) {
+{
    if (wstr == nullptr) return nullptr;
    // Determine the number of bytes needed for the UTF-8 string
    int bytes = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, nullptr, 0, nullptr, nullptr);
    char* str = new char[bytes];
    // Convert the wide-character string to a UTF-8 string
    WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, bytes, nullptr, nullptr);
    return str;
 }
 int wmain(int argc, wchar_t **wargv) {
    char** argv = new char*[argc];
    for (int i = 0; i < argc; ++i) {
        argv[i] = wchar_to_char(wargv[i]);
    }
 #else
 int main(int argc, char **argv) {
 #endif
 #if SERVER_VERBOSE != 1
    log_disable();
 #endif
@@ -3306,11 +3282,6 @@ int main(int argc, char **argv) {
        return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
    };
    SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
    for (int i = 0; i < argc; ++i) {
        delete[] argv[i];
    }
    delete[] argv;
 #endif
    llama.queue_tasks.start_loop();
    svr.stop();
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -164,8 +164,7 @@ func (ts Tensors) Layers() map[string]Layer {
 	for _, t := range ts {
 		parts := strings.Split(t.Name, ".")
 		if parts[0] == "blk" {
-			// join first and second part, e.g. blk.%d
+			parts = parts[1:]
 			parts = append([]string{fmt.Sprintf("%s.%s", parts[0], parts[1])}, parts[2:]...)
 		}
 		if _, ok := layers[parts[0]]; !ok {
@@ -381,12 +380,6 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
 		)
 		partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
 	case "stablelm":
 		fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2)
 		partialOffload = max(
 			4*batch*(vocab+2*embedding),
 			fullOffload,
 		)
 	}
 	return
--- a/llm/gguf.go
+++ b/llm/gguf.go
@@ -248,17 +248,13 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
 	}
 	padding := llm.padding(offset, int64(alignment))
-	if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
+	if _, err := rs.Seek(padding-offset, io.SeekCurrent); err != nil {
 		return err
 	}
 	for _, tensor := range llm.tensors {
-		if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
+		padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
-			return err
+		if _, err := rs.Seek(padded, io.SeekCurrent); err != nil {
 		}
 		padding := llm.padding(int64(tensor.size()), int64(alignment))
 		if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
 			return err
 		}
 	}
@@ -627,9 +623,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
 		return err
 	}
-	var alignment int64 = 32
+	padding := llm.padding(offset, 32)
-	padding := llm.padding(offset, alignment)
+	if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
 	if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
 		return err
 	}
@@ -643,8 +638,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
 			return err
 		}
-		padding := llm.padding(offset, alignment)
+		padding := llm.padding(offset, 32)
-		if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
+		if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
 			return err
 		}
 	}
@@ -653,5 +648,5 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
 }
 func (gguf) padding(offset, align int64) int64 {
-	return (align - offset%align) % align
+	return (offset + align - 1) / align * align
 }
--- a/llm/payload.go
+++ b/llm/payload.go
@@ -14,7 +14,7 @@ import (
 	"golang.org/x/exp/slices"
 	"golang.org/x/sync/errgroup"
-	"ollama.com/gpu"
+	"github.com/ollama/ollama/gpu"
 )
 var errPayloadMissing = fmt.Errorf("expected payloads not included in this build of ollama")
--- a/llm/server.go
+++ b/llm/server.go
@@ -21,9 +21,9 @@ import (
 	"strings"
 	"time"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
-	"ollama.com/format"
+	"github.com/ollama/ollama/format"
-	"ollama.com/gpu"
+	"github.com/ollama/ollama/gpu"
 )
 // LlamaServer is an instance of the llama.cpp server
@@ -79,9 +79,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 		graphFullOffload = graphPartialOffload
 	}
 	graphFullOffload *= uint64(info.DeviceCount)
 	graphPartialOffload *= uint64(info.DeviceCount)
 	// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
 	memoryRequiredTotal := memoryMinimum + graphFullOffload
@@ -97,7 +94,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 	var layerCount int
 	layers := ggml.Tensors().Layers()
 	for i := 0; i < int(ggml.KV().BlockCount()); i++ {
-		memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
+		memoryLayer := layers[fmt.Sprintf("%d", i)].size()
 		// KV is proportional to the number of layers
 		memoryLayer += kv / ggml.KV().BlockCount()
@@ -109,13 +106,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 		}
 	}
-	var memoryLayerOutput uint64
+	memoryLayerOutput := layers["output"].size()
 	for k, v := range layers {
 		if !strings.HasPrefix(k, "blk.") {
 			memoryLayerOutput += v.size()
 		}
 	}
 	memoryRequiredTotal += memoryLayerOutput
 	if info.Library == "metal" && memoryRequiredTotal > info.TotalMemory {
@@ -130,47 +121,16 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 		opts.NumGPU = layerCount
 	}
 	memoryWeights := memoryRequiredTotal - memoryMinimum - graphFullOffload - kv
 	slog.Info(
 		"offload to gpu",
-		slog.Group(
+		"reallayers", opts.NumGPU,
-			"layers",
+		"layers", layerCount,
-			// actual number of layers offloaded
+		"required", format.HumanBytes2(memoryRequiredTotal),
-			"real", opts.NumGPU,
+		"used", format.HumanBytes2(memoryRequiredPartial),
-			// estimated number of layers that can be offloaded
+		"available", format.HumanBytes2(memoryAvailable),
-			"estimate", layerCount,
+		"kv", format.HumanBytes2(kv),
-		),
+		"fulloffload", format.HumanBytes2(graphFullOffload),
-		slog.Group(
+		"partialoffload", format.HumanBytes2(graphPartialOffload),
 			"memory",
 			// memory available for offloading
 			"available", format.HumanBytes2(memoryAvailable),
 			slog.Group(
 				"required",
 				// memory required for full offloading
 				"full", format.HumanBytes2(memoryRequiredTotal),
 				// memory required to offload layers.estimate layers
 				"partial", format.HumanBytes2(memoryRequiredPartial),
 				// memory of KV cache
 				"kv", format.HumanBytes2(kv),
 			),
 			slog.Group(
 				"weights",
 				// memory of the weights
 				"total", format.HumanBytes2(memoryWeights),
 				// memory of repeating layers
 				"repeating", format.HumanBytes2(memoryWeights-memoryLayerOutput),
 				// memory of non-repeating layers
 				"nonrepeating", format.HumanBytes2(memoryLayerOutput),
 			),
 			slog.Group(
 				"graph",
 				// memory of graph when fully offloaded
 				"full", format.HumanBytes2(graphFullOffload),
 				// memory of graph when not fully offloaded
 				"partial", format.HumanBytes2(graphPartialOffload),
 			),
 		),
 	)
 	if len(adapters) > 1 {
--- a/main.go
+++ b/main.go
@@ -3,8 +3,8 @@ package main
 import (
 	"context"
 	"github.com/ollama/ollama/cmd"
 	"github.com/spf13/cobra"
 	"ollama.com/cmd"
 )
 func main() {
--- a/openai/openai.go
+++ b/openai/openai.go
@@ -11,7 +11,7 @@ import (
 	"time"
 	"github.com/gin-gonic/gin"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 type Error struct {
--- a/progress/bar.go
+++ b/progress/bar.go
@@ -6,8 +6,8 @@ import (
 	"strings"
 	"time"
 	"github.com/ollama/ollama/format"
 	"golang.org/x/term"
 	"ollama.com/format"
 )
 type Bar struct {
--- a/server/auth.go
+++ b/server/auth.go
@@ -15,8 +15,8 @@ import (
 	"strings"
 	"time"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
-	"ollama.com/auth"
+	"github.com/ollama/ollama/auth"
 )
 type registryChallenge struct {
--- a/server/download.go
+++ b/server/download.go
@@ -21,8 +21,8 @@ import (
 	"golang.org/x/sync/errgroup"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
-	"ollama.com/format"
+	"github.com/ollama/ollama/format"
 )
 const maxRetries = 6
--- a/server/images.go
+++ b/server/images.go
@@ -24,12 +24,12 @@ import (
 	"golang.org/x/exp/slices"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
-	"ollama.com/convert"
+	"github.com/ollama/ollama/convert"
-	"ollama.com/format"
+	"github.com/ollama/ollama/format"
-	"ollama.com/llm"
+	"github.com/ollama/ollama/llm"
-	"ollama.com/parser"
+	"github.com/ollama/ollama/parser"
-	"ollama.com/version"
+	"github.com/ollama/ollama/version"
 )
 type registryOptions struct {
--- a/server/prompt.go
+++ b/server/prompt.go
@@ -7,7 +7,7 @@ import (
 	"text/template"
 	"text/template/parse"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 // isResponseNode checks if the node contains .Response
--- a/server/prompt_test.go
+++ b/server/prompt_test.go
@@ -4,7 +4,7 @@ import (
 	"strings"
 	"testing"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )
 func TestPrompt(t *testing.T) {
--- a/server/routes.go
+++ b/server/routes.go
@@ -27,12 +27,12 @@ import (
 	"github.com/gin-gonic/gin"
 	"golang.org/x/exp/slices"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
-	"ollama.com/gpu"
+	"github.com/ollama/ollama/gpu"
-	"ollama.com/llm"
+	"github.com/ollama/ollama/llm"
-	"ollama.com/openai"
+	"github.com/ollama/ollama/openai"
-	"ollama.com/parser"
+	"github.com/ollama/ollama/parser"
-	"ollama.com/version"
+	"github.com/ollama/ollama/version"
 )
 var mode string = gin.DebugMode
--- a/server/routes_test.go
+++ b/server/routes_test.go
@@ -16,9 +16,9 @@ import (
 	"github.com/stretchr/testify/assert"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
-	"ollama.com/parser"
+	"github.com/ollama/ollama/parser"
-	"ollama.com/version"
+	"github.com/ollama/ollama/version"
 )
 func Test_Routes(t *testing.T) {
--- a/server/upload.go
+++ b/server/upload.go
@@ -16,9 +16,9 @@ import (
 	"sync/atomic"
 	"time"
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/format"
 	"golang.org/x/sync/errgroup"
 	"ollama.com/api"
 	"ollama.com/format"
 )
 var blobUploadManager sync.Map
--- a/types/model/digest.go
+++ b/types/model/digest.go
@@ -1,6 +1,7 @@
 package model
 import (
 	"fmt"
 	"log/slog"
 	"strings"
 	"unicode"
@@ -47,8 +48,11 @@ var (
 // Digest.
 func ParseDigest(s string) Digest {
 	typ, digest, ok := strings.Cut(s, "-")
 	if !ok {
 		typ, digest, ok = strings.Cut(s, ":")
 	}
 	if ok && isValidDigestType(typ) && isValidHex(digest) {
-		return Digest{s: s}
+		return Digest{s: fmt.Sprintf("%s-%s", typ, digest)}
 	}
 	return Digest{}
 }
--- a/types/model/name.go
+++ b/types/model/name.go
@@ -12,7 +12,7 @@ import (
 	"strings"
 	"sync"
-	"ollama.com/types/structs"
+	"github.com/ollama/ollama/types/structs"
 )
 // Errors
@@ -521,8 +521,6 @@ func parts(s string) iter_Seq2[PartKind, string] {
 						return
 					}
 					state, j, partLen = PartModel, i, 0
 				case PartHost:
 					// noop: support for host:port
 				default:
 					yield(PartExtraneous, s[i+1:j])
 					return
@@ -680,9 +678,6 @@ func isValidByteFor(kind PartKind, c byte) bool {
 	if kind == PartNamespace && c == '.' {
 		return false
 	}
 	if kind == PartHost && c == ':' {
 		return true
 	}
 	if c == '.' || c == '-' {
 		return true
 	}
--- a/types/model/name_test.go
+++ b/types/model/name_test.go
@@ -40,7 +40,6 @@ var testNames = map[string]fields{
 	"user/model":                     {namespace: "user", model: "model"},
 	"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
 	"example.com/ns/mistral:7b+X":    {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
 	"localhost:5000/ns/mistral":      {host: "localhost:5000", namespace: "ns", model: "mistral"},
 	// invalid digest
 	"mistral:latest@invalid256-": {},
Author	SHA1	Message	Date
Blake Mizerany	cfd4152eb6	...	2024-04-17 17:04:13 -07:00
Blake Mizerany	0fbb379373	types/model: add FilepathNoBuild Also, add test for DisplayLongest. Also, plumb fill param to ParseName in MustParseName	2024-04-16 12:37:38 -07:00
`@@ -1,4 +1,4 @@`
	`module ollama.com`	`module github.com/ollama/ollama`

	`go 1.22`	`go 1.22`