feed the linter

show ggml modelinfo through the show api
2024-04-24 20:54:27 -07:00 · 2024-04-24 18:53:00 -07:00
32 changed files with 1870 additions and 1152 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -311,18 +311,29 @@ jobs:
      - uses: actions/download-artifact@v4
        with:
          name: generate-windows-cpu
+          path: |
+            llm/build
+            dist/windows-amd64
      - uses: actions/download-artifact@v4
        with:
          name: generate-windows-cuda
+          path: |
+            llm/build
+            dist/windows-amd64
      - uses: actions/download-artifact@v4
        with:
          name: windows-cuda-deps
+          path: dist/deps
      - uses: actions/download-artifact@v4
        with:
          name: windows-rocm-deps
+          path: dist/deps
      - uses: actions/download-artifact@v4
        with:
          name: generate-windows-rocm
+          path: |
+            llm/build
+            dist/windows-amd64
      - run: dir llm/build
      - run: |
          $gopath=(get-command go).source | split-path -parent
@@ -331,6 +342,8 @@ jobs:
          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
          $env:PATH="$gopath;$env:PATH"
          $env:OLLAMA_SKIP_GENERATE="1"
+          $env:NVIDIA_DIR=$(resolve-path ".\dist\deps")
+          $env:HIP_PATH=$(resolve-path ".\dist\deps")
          & .\scripts\build_windows.ps1
      - uses: actions/upload-artifact@v4
        with:
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -1,15 +1,5 @@
 name: test

-concurrency:
-  # For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
-  # cancels running CI jobs and starts all new ones.
-  #
-  # For non-PR pushes, concurrency.group needs to be unique for every distinct
-  # CI run we want to have happen. Use run_id, which in practice means all
-  # non-PR CI runs will be allowed to run without preempting each other.
-  group: ${{ github.workflow }}-$${{ github.pull_request.number || github.run_id }}
-  cancel-in-progress: true
-
 on:
  pull_request:
    paths:
@@ -31,9 +21,7 @@ jobs:
      - id: changes
        run: |
          changed() {
-            git diff-tree -r --no-commit-id --name-only \
-              $(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \
-              ${{ github.event.pull_request.head.sha }} \
+            git diff-tree -r --no-commit-id --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
              | xargs python3 -c "import sys; print(any([x.startswith('$1') for x in sys.argv[1:]]))"
          }

@@ -295,6 +283,7 @@ jobs:
        with:
          go-version-file: go.mod
          cache: true
+      - run: go get
      - run: |
          case ${{ matrix.arch }} in
            amd64) echo ARCH=x86_64 ;;
--- a/api/types.go
+++ b/api/types.go
@@ -207,6 +207,7 @@ type ShowResponse struct {
 	System     string       `json:"system,omitempty"`
 	Details    ModelDetails `json:"details,omitempty"`
 	Messages   []Message    `json:"messages,omitempty"`
+	ModelInfo  string       `json:"modelinfo,omitempty"`
 }

 type CopyRequest struct {
@@ -396,10 +397,8 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 func DefaultOptions() Options {
 	return Options{
 		// options set on request to runner
-		NumPredict: -1,
-
-		// set a minimal num_keep to avoid issues on context shifts
-		NumKeep:          4,
+		NumPredict:       -1,
+		NumKeep:          0,
 		Temperature:      0.8,
 		TopK:             40,
 		TopP:             0.9,
--- a/app/ollama.iss
+++ b/app/ollama.iss
@@ -92,8 +92,12 @@ Source: "..\dist\windows-amd64\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64
 Source: "..\dist\windows-amd64\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
 Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
 Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
-#if DirExists("..\dist\windows-amd64\rocm")
-  Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
+; Assumes v5.7, may need adjustments for v6
+#if GetEnv("HIP_PATH") != ""
+  Source: "{#GetEnv('HIP_PATH')}\bin\hipblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
+  Source: "{#GetEnv('HIP_PATH')}\bin\rocblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
+  ; amdhip64.dll dependency comes from the driver and must be installed already
+  Source: "{#GetEnv('HIP_PATH')}\bin\rocblas\library\*"; DestDir: "{app}\rocm\rocblas\library\"; Flags: ignoreversion
 #endif


@@ -129,7 +133,7 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi


 ;FinishedHeadingLabel=Run your first model
-;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama3
+;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama2
 ;ClickFinish=%n

 [Registry]
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -17,7 +17,6 @@ import (
 	"os"
 	"os/signal"
 	"path/filepath"
-	"regexp"
 	"runtime"
 	"strings"
 	"syscall"
@@ -54,6 +53,8 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	p := progress.NewProgress(os.Stderr)
 	defer p.Stop()

+	bars := make(map[string]*progress.Bar)
+
 	modelfile, err := os.ReadFile(filename)
 	if err != nil {
 		return err
@@ -94,16 +95,95 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				return err
 			}

+			// TODO make this work w/ adapters
 			if fi.IsDir() {
-				// this is likely a safetensors or pytorch directory
-				// TODO make this work w/ adapters
-				tempfile, err := tempZipFiles(path)
+				tf, err := os.CreateTemp("", "ollama-tf")
 				if err != nil {
 					return err
 				}
-				defer os.RemoveAll(tempfile)
+				defer os.RemoveAll(tf.Name())

-				path = tempfile
+				zf := zip.NewWriter(tf)
+
+				files := []string{}
+
+				tfiles, err := filepath.Glob(filepath.Join(path, "pytorch_model-*.bin"))
+				if err != nil {
+					return err
+				} else if len(tfiles) == 0 {
+					tfiles, err = filepath.Glob(filepath.Join(path, "model-*.safetensors"))
+					if err != nil {
+						return err
+					}
+				}
+
+				files = append(files, tfiles...)
+
+				if len(files) == 0 {
+					return fmt.Errorf("no models were found in '%s'", path)
+				}
+
+				// add the safetensor/torch config file + tokenizer
+				files = append(files, filepath.Join(path, "config.json"))
+				files = append(files, filepath.Join(path, "params.json"))
+				files = append(files, filepath.Join(path, "added_tokens.json"))
+				files = append(files, filepath.Join(path, "tokenizer.model"))
+
+				for _, fn := range files {
+					f, err := os.Open(fn)
+
+					// just skip whatever files aren't there
+					if os.IsNotExist(err) {
+						if strings.HasSuffix(fn, "tokenizer.model") {
+							// try the parent dir before giving up
+							parentDir := filepath.Dir(path)
+							newFn := filepath.Join(parentDir, "tokenizer.model")
+							f, err = os.Open(newFn)
+							if os.IsNotExist(err) {
+								continue
+							} else if err != nil {
+								return err
+							}
+						} else {
+							continue
+						}
+					} else if err != nil {
+						return err
+					}
+
+					fi, err := f.Stat()
+					if err != nil {
+						return err
+					}
+
+					h, err := zip.FileInfoHeader(fi)
+					if err != nil {
+						return err
+					}
+
+					h.Name = filepath.Base(fn)
+					h.Method = zip.Store
+
+					w, err := zf.CreateHeader(h)
+					if err != nil {
+						return err
+					}
+
+					_, err = io.Copy(w, f)
+					if err != nil {
+						return err
+					}
+
+				}
+
+				if err := zf.Close(); err != nil {
+					return err
+				}
+
+				if err := tf.Close(); err != nil {
+					return err
+				}
+				path = tf.Name()
 			}

 			digest, err := createBlob(cmd, client, path)
@@ -111,17 +191,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				return err
 			}

-			name := c.Name
-			if c.Name == "model" {
-				name = "from"
-			}
-
-			re := regexp.MustCompile(fmt.Sprintf(`(?im)^(%s)\s+%s\s*$`, name, c.Args))
-			modelfile = re.ReplaceAll(modelfile, []byte("$1 @"+digest))
+			modelfile = bytes.ReplaceAll(modelfile, []byte(c.Args), []byte("@"+digest))
 		}
 	}

-	bars := make(map[string]*progress.Bar)
 	fn := func(resp api.ProgressResponse) error {
 		if resp.Digest != "" {
 			spinner.Stop()
@@ -155,114 +228,6 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	return nil
 }

-func tempZipFiles(path string) (string, error) {
-	tempfile, err := os.CreateTemp("", "ollama-tf")
-	if err != nil {
-		return "", err
-	}
-	defer tempfile.Close()
-
-	zipfile := zip.NewWriter(tempfile)
-	defer zipfile.Close()
-
-	detectContentType := func(path string) (string, error) {
-		f, err := os.Open(path)
-		if err != nil {
-			return "", err
-		}
-		defer f.Close()
-
-		var b bytes.Buffer
-		b.Grow(512)
-
-		if _, err := io.CopyN(&b, f, 512); err != nil && !errors.Is(err, io.EOF) {
-			return "", err
-		}
-
-		contentType, _, _ := strings.Cut(http.DetectContentType(b.Bytes()), ";")
-		return contentType, nil
-	}
-
-	glob := func(pattern, contentType string) ([]string, error) {
-		matches, err := filepath.Glob(pattern)
-		if err != nil {
-			return nil, err
-		}
-
-		for _, safetensor := range matches {
-			if ct, err := detectContentType(safetensor); err != nil {
-				return nil, err
-			} else if ct != contentType {
-				return nil, fmt.Errorf("invalid content type: expected %s for %s", ct, safetensor)
-			}
-		}
-
-		return matches, nil
-	}
-
-	var files []string
-	if st, _ := glob(filepath.Join(path, "model*.safetensors"), "application/octet-stream"); len(st) > 0 {
-		// safetensors files might be unresolved git lfs references; skip if they are
-		// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
-		files = append(files, st...)
-	} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
-		// pytorch files might also be unresolved git lfs references; skip if they are
-		// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
-		files = append(files, pt...)
-	} else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/octet-stream"); len(pt) > 0 {
-		// pytorch files might also be unresolved git lfs references; skip if they are
-		// covers consolidated.x.pth, consolidated.pth
-		files = append(files, pt...)
-	} else {
-		return "", errors.New("no safetensors or torch files found")
-	}
-
-	// add configuration files, json files are detected as text/plain
-	js, err := glob(filepath.Join(path, "*.json"), "text/plain")
-	if err != nil {
-		return "", err
-	}
-	files = append(files, js...)
-
-	if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
-		// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
-		// tokenizer.model might be a unresolved git lfs reference; error if it is
-		files = append(files, tks...)
-	} else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 {
-		// some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B)
-		files = append(files, tks...)
-	}
-
-	for _, file := range files {
-		f, err := os.Open(file)
-		if err != nil {
-			return "", err
-		}
-		defer f.Close()
-
-		fi, err := f.Stat()
-		if err != nil {
-			return "", err
-		}
-
-		zfi, err := zip.FileInfoHeader(fi)
-		if err != nil {
-			return "", err
-		}
-
-		zf, err := zipfile.CreateHeader(zfi)
-		if err != nil {
-			return "", err
-		}
-
-		if _, err := io.Copy(zf, f); err != nil {
-			return "", err
-		}
-	}
-
-	return tempfile.Name(), nil
-}
-
 func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
 	bin, err := os.Open(path)
 	if err != nil {
--- a/docs/windows.md
+++ b/docs/windows.md
@@ -14,7 +14,7 @@ As this is a preview release, you should expect a few bugs here and there.  If
 you run into a problem you can reach out on
 [Discord](https://discord.gg/ollama), or file an 
 [issue](https://github.com/ollama/ollama/issues).
-Logs will often be helpful in diagnosing the problem (see
+Logs will often be helpful in dianosing the problem (see
 [Troubleshooting](#troubleshooting) below)

 ## System Requirements
--- a/gpu/amd_linux.go
+++ b/gpu/amd_linux.go
@@ -140,7 +140,7 @@ func AMDGetGPUInfo() []GpuInfo {
 		}

 		if int(major) < RocmComputeMin {
-			slog.Warn(fmt.Sprintf("amdgpu too old gfx%d%d%x", major, minor, patch), "gpu", gpuID)
+			slog.Warn(fmt.Sprintf("amdgpu too old gfx%d%d%d", major, minor, patch), "gpu", gpuID)
 			continue
 		}

@@ -266,7 +266,7 @@ func AMDGetGPUInfo() []GpuInfo {
 				}
 				slog.Debug("rocm supported GPUs", "types", supported)
 			}
-			gfx := fmt.Sprintf("gfx%d%d%x", gpuInfo.Major, gpuInfo.Minor, gpuInfo.Patch)
+			gfx := fmt.Sprintf("gfx%d%d%d", gpuInfo.Major, gpuInfo.Minor, gpuInfo.Patch)
 			if !slices.Contains[[]string, string](supported, gfx) {
 				slog.Warn("amdgpu is not supported", "gpu", gpuInfo.ID, "gpu_type", gfx, "library", libDir, "supported_types", supported)
 				// TODO - consider discrete markdown just for ROCM troubleshooting?
--- a/gpu/amd_windows.go
+++ b/gpu/amd_windows.go
@@ -149,16 +149,13 @@ func AMDGetGPUInfo() []GpuInfo {
 			}
 		}
 		if patch != "" {
-			// Patch rev is hex; e.g. gfx90a
-			p, err := strconv.ParseInt(patch, 16, 0)
+			gpuInfo.Patch, err = strconv.Atoi(patch)
 			if err != nil {
 				slog.Info("failed to parse version", "version", gfx, "error", err)
-			} else {
-				gpuInfo.Patch = int(p)
 			}
 		}
 		if gpuInfo.Major < RocmComputeMin {
-			slog.Warn(fmt.Sprintf("amdgpu [%s] too old gfx%d%d%x", gpuInfo.ID, gpuInfo.Major, gpuInfo.Minor, gpuInfo.Patch))
+			slog.Warn(fmt.Sprintf("amdgpu [%s] too old gfx%d%d%d", gpuInfo.ID, gpuInfo.Major, gpuInfo.Minor, gpuInfo.Patch))
 			continue
 		}

--- a/llm/generate/gen_common.sh
+++ b/llm/generate/gen_common.sh
@@ -21,7 +21,7 @@ init_vars() {
        # TODO - add additional optimization flags...
        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
    fi
-    case $(uname -s) in
+    case $(uname -s) in 
    "Darwin")
        LIB_EXT="dylib"
        WHOLE_ARCHIVE="-Wl,-force_load"
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -165,11 +165,11 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
    fi
    if [ "${ARCH}" == "arm64" ]; then
        echo "ARM CPU detected - disabling unsupported AVX instructions"
-
+        
        # ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
        #
-        # CUDA compute < 6.0 lacks proper FP16 support on ARM.
-        # Disabling has minimal performance effect while maintaining compatibility.
+        # CUDA compute < 6.0 lacks proper FP16 support on ARM. 
+        # Disabling has minimal performance effect while maintaining compatibility. 
        ARM64_DEFS="-DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_CUDA_F16=off"
    fi
    # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -26,25 +26,16 @@ function amdGPUs {
    $GPU_LIST -join ';'
 }

-
 function init_vars {
-    if (!$script:SRC_DIR) {
-        $script:SRC_DIR = $(resolve-path "..\..\")
-    }
-    if (!$script:llamacppDir) {
-        $script:llamacppDir = "../llama.cpp"
-    }
-    if (!$script:cmakeTargets) {
-        $script:cmakeTargets = @("ollama_llama_server")
-    }
+    $script:SRC_DIR = $(resolve-path "..\..\")
+    $script:llamacppDir = "../llama.cpp"
    $script:cmakeDefs = @(
        "-DBUILD_SHARED_LIBS=on",
        "-DLLAMA_NATIVE=off"
        )
-    $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
+    $script:cmakeTargets = @("ollama_llama_server")
    $script:ARCH = "amd64" # arm not yet supported.
    $script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
-    md "$script:DIST_BASE" -ea 0 > $null
    if ($env:CGO_CFLAGS -contains "-g") {
        $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
        $script:config = "RelWithDebInfo"
@@ -175,191 +166,137 @@ function cleanup {
    }
 }

+init_vars
+git_module_setup
+apply_patches

 # -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
 # -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
 # -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver

+$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")

-function build_static() {
-    if ((-not "${env:OLLAMA_SKIP_STATIC_GENERATE}") -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "static"))) {
-        # GCC build for direct linking into the Go binary
-        init_vars
-        # cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
-        # as we need this to be compiled by gcc for golang to be able to link with itx
-        write-host "Checking for MinGW..."
-        # error action ensures we exit on failure
-        get-command gcc
-        get-command mingw32-make
-        $oldTargets = $script:cmakeTargets
-        $script:cmakeTargets = @("llama", "ggml")
-        $script:cmakeDefs = @(
-            "-G", "MinGW Makefiles"
-            "-DCMAKE_C_COMPILER=gcc.exe",
-            "-DCMAKE_CXX_COMPILER=g++.exe",
-            "-DBUILD_SHARED_LIBS=off",
-            "-DLLAMA_NATIVE=off",
-            "-DLLAMA_AVX=off",
-            "-DLLAMA_AVX2=off",
-            "-DLLAMA_AVX512=off",
-            "-DLLAMA_F16C=off",
-            "-DLLAMA_FMA=off")
-        $script:buildDir="../build/windows/${script:ARCH}_static"
-        write-host "Building static library"
-        build
-        $script:cmakeTargets = $oldTargets
-    } else {
-        write-host "Skipping CPU generation step as requested"
-    }
-}
-
-function build_cpu() {
-    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
-        # remaining llama.cpp builds use MSVC 
-        init_vars
-        $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
-        $script:buildDir="../build/windows/${script:ARCH}/cpu"
-        $script:distDir="$script:DIST_BASE\cpu"
-        write-host "Building LCD CPU"
-        build
-        sign
-        install
-    } else {
-        write-host "Skipping CPU generation step as requested"
-    }
-}
-
-function build_cpu_avx() {
-    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx"))) {
-        init_vars
-        $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
-        $script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
-        $script:distDir="$script:DIST_BASE\cpu_avx"
-        write-host "Building AVX CPU"
-        build
-        sign
-        install
-    } else {
-        write-host "Skipping CPU AVX generation step as requested"
-    }
-}
-
-function build_cpu_avx2() {
-    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx2"))) {
-        init_vars
-        $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
-        $script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
-        $script:distDir="$script:DIST_BASE\cpu_avx2"
-        write-host "Building AVX2 CPU"
-        build
-        sign
-        install
-    } else {
-        write-host "Skipping CPU AVX2 generation step as requested"
-    }
-}
-
-function build_cuda() {
-    if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) {
-        # Then build cuda as a dynamically loaded library
-        $nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
-        $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
-        if ($null -ne $script:CUDA_VERSION) {
-            $script:CUDA_VARIANT="_"+$script:CUDA_VERSION
-        }
-        init_vars
-        $script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
-        $script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
-        $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
-        if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
-            write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
-            $script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
-            write-host "building custom CUDA GPU"
-        }
-        build
-        sign
-        install
-
-        write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\"
-        cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
-        cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
-        cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
-    } else {
-        write-host "Skipping CUDA generation step"
-    }
-}
-
-function build_rocm() {
-    if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) {
-        $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
-        if ($null -ne $script:ROCM_VERSION) {
-            $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
-        }
-
-        init_vars
-        $script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
-        $script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
-        $script:cmakeDefs += @(
-            "-G", "Ninja", 
-            "-DCMAKE_C_COMPILER=clang.exe",
-            "-DCMAKE_CXX_COMPILER=clang++.exe",
-            "-DLLAMA_HIPBLAS=on",
-            "-DHIP_PLATFORM=amd",
-            "-DLLAMA_AVX=on",
-            "-DLLAMA_AVX2=off",
-            "-DCMAKE_POSITION_INDEPENDENT_CODE=on",
-            "-DAMDGPU_TARGETS=$(amdGPUs)",
-            "-DGPU_TARGETS=$(amdGPUs)"
-            )
-
-        # Make sure the ROCm binary dir is first in the path
-        $env:PATH="$env:HIP_PATH\bin;$env:PATH"
-
-        # We have to clobber the LIB var from the developer shell for clang to work properly
-        $env:LIB=""
-        if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
-            write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
-            $script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
-            write-host "building custom ROCM GPU"
-        }
-        write-host "Building ROCm"
-        build
-        # Ninja doesn't prefix with config name
-        ${script:config}=""
-        if ($null -ne $script:DUMPBIN) {
-            & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
-        }
-        sign
-        install
-
-        # Assumes v5.7, may need adjustments for v6
-        rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
-        md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null
-        cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
-        cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
-        # amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
-        cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\"
-    } else {
-        write-host "Skipping ROCm generation step"
-    }
-}
+if ($null -eq ${env:OLLAMA_SKIP_CPU_GENERATE}) {

+# GCC build for direct linking into the Go binary
 init_vars
-if ($($args.count) -eq 0) {
-    git_module_setup
-    apply_patches
-    build_static
-    build_cpu
-    build_cpu_avx
-    build_cpu_avx2
-    build_cuda
-    build_rocm
+# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
+# as we need this to be compiled by gcc for golang to be able to link with itx
+write-host "Checking for MinGW..."
+# error action ensures we exit on failure
+get-command gcc
+get-command mingw32-make
+$script:cmakeTargets = @("llama", "ggml")
+$script:cmakeDefs = @(
+    "-G", "MinGW Makefiles"
+    "-DCMAKE_C_COMPILER=gcc.exe",
+    "-DCMAKE_CXX_COMPILER=g++.exe",
+    "-DBUILD_SHARED_LIBS=off",
+    "-DLLAMA_NATIVE=off",
+    "-DLLAMA_AVX=off",
+    "-DLLAMA_AVX2=off",
+    "-DLLAMA_AVX512=off",
+    "-DLLAMA_F16C=off",
+    "-DLLAMA_FMA=off")
+$script:buildDir="../build/windows/${script:ARCH}_static"
+write-host "Building static library"
+build

-    cleanup
-    write-host "`ngo generate completed.  LLM runners: $(get-childitem -path $script:DIST_BASE)"
+# remaining llama.cpp builds use MSVC 
+    init_vars
+    $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+    $script:buildDir="../build/windows/${script:ARCH}/cpu"
+    $script:distDir="$script:DIST_BASE\cpu"
+    write-host "Building LCD CPU"
+    build
+    sign
+    install
+
+    init_vars
+    $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+    $script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
+    $script:distDir="$script:DIST_BASE\cpu_avx"
+    write-host "Building AVX CPU"
+    build
+    sign
+    install
+
+    init_vars
+    $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
+    $script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
+    $script:distDir="$script:DIST_BASE\cpu_avx2"
+    write-host "Building AVX2 CPU"
+    build
+    sign
+    install
 } else {
-    for ( $i = 0; $i -lt $args.count; $i++ ) {
-        write-host "performing $($args[$i])"
-        & $($args[$i])
-    } 
-}
+    write-host "Skipping CPU generation step as requested"
+}
+
+if ($null -ne $script:CUDA_LIB_DIR) {
+    # Then build cuda as a dynamically loaded library
+    $nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
+    $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
+    if ($null -ne $script:CUDA_VERSION) {
+        $script:CUDA_VARIANT="_"+$script:CUDA_VERSION
+    }
+    init_vars
+    $script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
+    $script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
+    $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
+    if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
+        write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
+        $script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
+        write-host "building custom CUDA GPU"
+    }
+    build
+    sign
+    install
+}
+
+if ($null -ne $env:HIP_PATH) {
+    $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
+    if ($null -ne $script:ROCM_VERSION) {
+        $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
+    }
+
+    init_vars
+    $script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
+    $script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
+    $script:cmakeDefs += @(
+        "-G", "Ninja", 
+        "-DCMAKE_C_COMPILER=clang.exe",
+        "-DCMAKE_CXX_COMPILER=clang++.exe",
+        "-DLLAMA_HIPBLAS=on",
+        "-DHIP_PLATFORM=amd",
+        "-DLLAMA_AVX=on",
+        "-DLLAMA_AVX2=off",
+        "-DCMAKE_POSITION_INDEPENDENT_CODE=on",
+        "-DAMDGPU_TARGETS=$(amdGPUs)",
+        "-DGPU_TARGETS=$(amdGPUs)"
+        )
+
+    # Make sure the ROCm binary dir is first in the path
+    $env:PATH="$env:HIP_PATH\bin;$env:PATH"
+
+    # We have to clobber the LIB var from the developer shell for clang to work properly
+    $env:LIB=""
+    if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
+        write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
+        $script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
+        write-host "building custom ROCM GPU"
+    }
+    write-host "Building ROCm"
+    build
+    # Ninja doesn't prefix with config name
+    ${script:config}=""
+    if ($null -ne $script:DUMPBIN) {
+        & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
+    }
+    sign
+    install
+}
+
+
+cleanup
+write-host "`ngo generate completed.  LLM runners: $(get-childitem -path $script:DIST_BASE)"
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -191,7 +191,7 @@ func (l Layer) size() (size uint64) {
 type Tensor struct {
 	Name   string `json:"name"`
 	Kind   uint32 `json:"kind"`
-	Offset uint64 `json:"-"`
+	Offset uint64 `json:"offset"`

 	// Shape is the number of elements in each dimension
 	Shape []uint64 `json:"shape"`
--- a/llm/llama.cpp
+++ b/llm/llama.cpp
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -5,6 +5,7 @@ import (
 	"log/slog"
 	"os"
 	"strconv"
+	"strings"

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/format"
@@ -99,26 +100,8 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
 		return 0, 0
 	}

-	layers := ggml.Tensors().Layers()
-
-	var memoryLayerOutput uint64
-	if layer, ok := layers["output_norm"]; ok {
-		memoryLayerOutput += layer.size()
-	}
-
-	if layer, ok := layers["output"]; ok {
-		memoryLayerOutput += layer.size()
-	} else if layer, ok := layers["token_embd"]; ok {
-		memoryLayerOutput += layer.size()
-	}
-
-	if gpus[0].Library == "metal" && opts.UseMMap {
-		// memory is preallocated for output tensors
-		memoryRequiredTotal += memoryLayerOutput
-		memoryRequiredPartial += memoryLayerOutput
-	}
-
 	var layerCount int
+	layers := ggml.Tensors().Layers()
 	for i := 0; i < int(ggml.KV().BlockCount()); i++ {
 		memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()

@@ -132,11 +115,15 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
 		}
 	}

-	if gpus[0].Library != "metal" || !opts.UseMMap {
-		// memory was not preallocated for output tensors
-		memoryRequiredTotal += memoryLayerOutput
+	var memoryLayerOutput uint64
+	for k, v := range layers {
+		if !strings.HasPrefix(k, "blk.") {
+			memoryLayerOutput += v.size()
+		}
 	}

+	memoryRequiredTotal += memoryLayerOutput
+
 	if memoryAvailable > memoryRequiredTotal {
 		layerCount = int(ggml.KV().BlockCount()) + 1
 		memoryRequiredPartial = memoryRequiredTotal
--- a/llm/patches/02-clip-log.diff
+++ b/llm/patches/02-clip-log.diff
@@ -1,12 +0,0 @@
-diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
-index e431c7f7..f077e688 100644
--- a/examples/llava/clip.cpp
-+++ b/examples/llava/clip.cpp
-@@ -3,6 +3,7 @@
- // I'll gradually clean and extend it
- // Note: Even when using identical normalized image inputs (see normalize_image_u8_to_f32()) we have a significant difference in resulting embeddings compared to pytorch
- #include "clip.h"
-+#include "common.h"
- #include "log.h"
- #include "ggml.h"
- #include "ggml-alloc.h"
--- a/llm/patches/04-metal.diff
+++ b/llm/patches/04-metal.diff
@@ -1,45 +0,0 @@
-diff --git a/ggml-metal.m b/ggml-metal.m
-index 0207b787..b5e9884b 100644
--- a/ggml-metal.m
-+++ b/ggml-metal.m
-@@ -1396,27 +1396,23 @@ static enum ggml_status ggml_metal_graph_compute(
-                         // to the matrix-vector kernel
-                         int ne11_mm_min = 1;
- 
-#if 0
-                         // the numbers below are measured on M2 Ultra for 7B and 13B models
-                         // these numbers do not translate to other devices or model sizes
-                         // TODO: need to find a better approach
-                        if ([ctx->device.name isEqualToString:@"Apple M2 Ultra"]) {
-                            switch (src0t) {
-                                case GGML_TYPE_F16:  ne11_mm_min = 2;  break;
-                                case GGML_TYPE_Q8_0: ne11_mm_min = 7;  break;
-                                case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
-                                case GGML_TYPE_Q3_K: ne11_mm_min = 7;  break;
-                                case GGML_TYPE_Q4_0:
-                                case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
-                                case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
-                                case GGML_TYPE_Q5_0:                          // not tested yet
-                                case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
-                                case GGML_TYPE_Q5_K: ne11_mm_min = 7;  break;
-                                case GGML_TYPE_Q6_K: ne11_mm_min = 7;  break;
-                                default:             ne11_mm_min = 1;  break;
-                            }
-+                        switch (src0t) {
-+                            case GGML_TYPE_F16:  ne11_mm_min = 2;  break;
-+                            case GGML_TYPE_Q8_0: ne11_mm_min = 7;  break;
-+                            case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
-+                            case GGML_TYPE_Q3_K: ne11_mm_min = 7;  break;
-+                            case GGML_TYPE_Q4_0:
-+                            case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
-+                            case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
-+                            case GGML_TYPE_Q5_0:                          // not tested yet
-+                            case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
-+                            case GGML_TYPE_Q5_K: ne11_mm_min = 7;  break;
-+                            case GGML_TYPE_Q6_K: ne11_mm_min = 7;  break;
-+                            default:             ne11_mm_min = 1;  break;
-                         }
-#endif
- 
-                         // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
-                         // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
--- a/llm/server.go
+++ b/llm/server.go
@@ -442,7 +442,7 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
 		select {
 		case <-ctx.Done():
 			slog.Info("context expired before server started")
-			return fmt.Errorf("timed out waiting for llama runner to start: %w", ctx.Err())
+			return fmt.Errorf("timed out waiting for llama runner to start")
 		case err := <-s.done:
 			msg := ""
 			if s.status != nil && s.status.LastErrMsg != "" {
@@ -560,13 +560,6 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
 		return err
 	}
 	defer s.sem.Release(1)
-
-	// only allow maximum 10 "context shifts" to avoid infinite generation
-	if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
-		req.Options.NumPredict = 10 * s.options.NumCtx
-		slog.Debug("setting token limit to 10x num_ctx", "num_ctx", s.options.NumCtx, "num_predict", req.Options.NumPredict)
-	}
-
 	request := map[string]any{
 		"prompt":            req.Prompt,
 		"stream":            true,
--- a/scripts/build_windows.ps1
+++ b/scripts/build_windows.ps1
@@ -82,7 +82,7 @@ function buildOllama() {
        if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
    }
    New-Item -ItemType Directory -Path .\dist\windows-amd64\ -Force
-    cp .\ollama.exe .\dist\windows-amd64\
+    cp .\ollama.exe .\dist\windows-amd64\ollama-windows-amd64.exe
 }

 function buildApp() {
@@ -109,6 +109,9 @@ function gatherDependencies() {
    cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\"
    cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\"

+    cp "${script:NVIDIA_DIR}\cudart64_*.dll" "${script:DEPS_DIR}\"
+    cp "${script:NVIDIA_DIR}\cublas64_*.dll" "${script:DEPS_DIR}\"
+    cp "${script:NVIDIA_DIR}\cublasLt64_*.dll" "${script:DEPS_DIR}\"

    cp "${script:SRC_DIR}\app\ollama_welcome.ps1" "${script:SRC_DIR}\dist\"
    if ("${env:KEY_CONTAINER}") {
@@ -120,6 +123,15 @@ function gatherDependencies() {
            if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
        }
    }
+    if ($null -ne $env:HIP_PATH) {
+        # Assumes v5.7, may need adjustments for v6
+        rm -ea 0 -recurse -force -path "${script:DEPS_DIR}\rocm\"
+        md "${script:DEPS_DIR}\rocm\rocblas\library\" -ea 0 > $null
+        cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:DEPS_DIR}\rocm\"
+        cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:DEPS_DIR}\rocm\"
+        # amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
+        cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:DEPS_DIR}\rocm\rocblas\library\"
+    }
 }

 function buildInstaller() {
--- a/server/images.go
+++ b/server/images.go
@@ -29,7 +29,6 @@ import (
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/parser"
-	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
 )

@@ -702,39 +701,36 @@ func convertModel(name, path string, fn func(resp api.ProgressResponse)) (string
 	return path, nil
 }

-func CopyModel(src, dst model.Name) error {
-	if !dst.IsFullyQualified() {
-		return model.Unqualified(dst)
-	}
-	if !src.IsFullyQualified() {
-		return model.Unqualified(src)
-	}
-
-	manifests, err := GetManifestPath()
+func CopyModel(src, dest string) error {
+	srcModelPath := ParseModelPath(src)
+	srcPath, err := srcModelPath.GetManifestPath()
 	if err != nil {
 		return err
 	}

-	dstpath := filepath.Join(manifests, dst.Filepath())
-	if err := os.MkdirAll(filepath.Dir(dstpath), 0o755); err != nil {
-		return err
-	}
-
-	srcpath := filepath.Join(manifests, src.Filepath())
-	srcfile, err := os.Open(srcpath)
+	destModelPath := ParseModelPath(dest)
+	destPath, err := destModelPath.GetManifestPath()
 	if err != nil {
 		return err
 	}
-	defer srcfile.Close()
-
-	dstfile, err := os.Create(dstpath)
-	if err != nil {
+	if err := os.MkdirAll(filepath.Dir(destPath), 0o755); err != nil {
 		return err
 	}
-	defer dstfile.Close()

-	_, err = io.Copy(dstfile, srcfile)
-	return err
+	// copy the file
+	input, err := os.ReadFile(srcPath)
+	if err != nil {
+		fmt.Println("Error reading file:", err)
+		return err
+	}
+
+	err = os.WriteFile(destPath, input, 0o644)
+	if err != nil {
+		fmt.Println("Error reading file:", err)
+		return err
+	}
+
+	return nil
 }

 func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{}, dryRun bool) error {
--- a/server/routes.go
+++ b/server/routes.go
@@ -29,7 +29,6 @@ import (
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/openai"
 	"github.com/ollama/ollama/parser"
-	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
 )

@@ -146,11 +145,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 	select {
 	case runner = <-rCh:
 	case err = <-eCh:
-		if errors.Is(err, context.Canceled) {
-			c.JSON(499, gin.H{"error": "request canceled"})
-			return
-		}
-
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
@@ -394,11 +388,6 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
 	select {
 	case runner = <-rCh:
 	case err = <-eCh:
-		if errors.Is(err, context.Canceled) {
-			c.JSON(499, gin.H{"error": "request canceled"})
-			return
-		}
-
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
@@ -732,12 +721,61 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 	if err != nil {
 		return nil, err
 	}
-
 	resp.Modelfile = mf

+	ggmlData, err := getGGMLData(model)
+	if err != nil {
+		return nil, err
+	}
+	resp.ModelInfo = string(ggmlData)
+
 	return resp, nil
 }

+func getGGMLData(model *Model) ([]byte, error) {
+	f, err := os.Open(model.ModelPath)
+	if err != nil {
+		return nil, err
+	}
+
+	ggml, _, err := llm.DecodeGGML(f)
+	if err != nil {
+		return nil, err
+	}
+
+	kv := ggml.KV()
+	var keys []string
+	for k := range kv {
+		keys = append(keys, k)
+	}
+
+	kvMap := make(map[string]any)
+
+	for _, k := range keys {
+		val := kv[k]
+
+		switch v := val.(type) {
+		case []interface{}:
+			if len(v) > 5 {
+				kvMap[k] = []string{}
+				continue
+			}
+		}
+		kvMap[k] = val
+	}
+
+	ggmlMap := make(map[string]any)
+	ggmlMap["kv"] = kvMap
+	ggmlMap["tensors"] = ggml.Tensors()
+
+	ggmlJson, err := json.Marshal(ggmlMap)
+	if err != nil {
+		return nil, err
+	}
+
+	return ggmlJson, nil
+}
+
 func (s *Server) ListModelsHandler(c *gin.Context) {
 	models := make([]api.ModelResponse, 0)
 	manifestsPath, err := GetManifestPath()
@@ -799,34 +837,34 @@ func (s *Server) ListModelsHandler(c *gin.Context) {
 }

 func (s *Server) CopyModelHandler(c *gin.Context) {
-	var r api.CopyRequest
-	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
+	var req api.CopyRequest
+	err := c.ShouldBindJSON(&req)
+	switch {
+	case errors.Is(err, io.EOF):
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
 		return
-	} else if err != nil {
+	case err != nil:
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
 	}

-	src := model.ParseName(r.Source)
-	if !src.IsValid() {
-		_ = c.Error(fmt.Errorf("source %q is invalid", r.Source))
-	}
-
-	dst := model.ParseName(r.Destination)
-	if !dst.IsValid() {
-		_ = c.Error(fmt.Errorf("destination %q is invalid", r.Destination))
-	}
-
-	if len(c.Errors) > 0 {
-		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": c.Errors.Errors()})
+	if req.Source == "" || req.Destination == "" {
+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "source add destination are required"})
 		return
 	}

-	if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
-		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
-	} else if err != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+	if err := ParseModelPath(req.Destination).Validate(); err != nil {
+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+		return
+	}
+
+	if err := CopyModel(req.Source, req.Destination); err != nil {
+		if os.IsNotExist(err) {
+			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
+		} else {
+			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+		}
+		return
 	}
 }

@@ -1226,11 +1264,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
 	select {
 	case runner = <-rCh:
 	case err = <-eCh:
-		if errors.Is(err, context.Canceled) {
-			c.JSON(499, gin.H{"error": "request canceled"})
-			return
-		}
-
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
--- a/server/sched.go
+++ b/server/sched.go
@@ -23,6 +23,7 @@ import (
 type LlmRequest struct {
 	ctx             context.Context //nolint:containedctx
 	model           *Model
+	ggml            *llm.GGML // TODO - how large is this, and do we need to free it after we've finished loading?
 	opts            api.Options
 	sessionDuration time.Duration
 	successCh       chan *runnerRef
@@ -38,7 +39,7 @@ type Scheduler struct {
 	loaded   map[string]*runnerRef
 	loadedMu sync.Mutex

-	loadFn      func(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList)
+	loadFn      func(req *LlmRequest, gpus gpu.GpuInfoList)
 	newServerFn func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error)
 	getGpuFn    func() gpu.GpuInfoList
 }
@@ -46,7 +47,6 @@ type Scheduler struct {
 // TODO set this to zero after a release or two, to enable multiple models by default
 var loadedMax = 1          // Maximum runners; < 1 maps to as many as will fit in VRAM (unlimited for CPU runners)
 var maxQueuedRequests = 10 // TODO configurable
-var numParallel = 1

 func InitScheduler(ctx context.Context) *Scheduler {
 	maxRunners := os.Getenv("OLLAMA_MAX_LOADED_MODELS")
@@ -58,14 +58,6 @@ func InitScheduler(ctx context.Context) *Scheduler {
 			loadedMax = m
 		}
 	}
-	if onp := os.Getenv("OLLAMA_NUM_PARALLEL"); onp != "" {
-		p, err := strconv.Atoi(onp)
-		if err != nil || p <= 0 {
-			slog.Error("invalid parallel setting, must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err)
-		} else {
-			numParallel = p
-		}
-	}

 	sched := &Scheduler{
 		pendingReqCh:  make(chan *LlmRequest, maxQueuedRequests),
@@ -82,16 +74,20 @@ func InitScheduler(ctx context.Context) *Scheduler {

 // context must be canceled to decrement ref count and release the runner
 func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration time.Duration) (chan *runnerRef, chan error) {
+	ggml, err := llm.LoadModel(model.ModelPath)
 	req := &LlmRequest{
 		ctx:             c,
 		model:           model,
+		ggml:            ggml,
 		opts:            opts,
 		sessionDuration: sessionDuration,
 		successCh:       make(chan *runnerRef),
 		errCh:           make(chan error, 1),
 	}
-	// context split across parallel threads
-	opts.NumCtx = opts.NumCtx * numParallel
+	if err != nil {
+		req.errCh <- err
+		return req.successCh, req.errCh
+	}
 	select {
 	case s.pendingReqCh <- req:
 	default:
@@ -134,39 +130,28 @@ func (s *Scheduler) processPending(ctx context.Context) {
 						pending.useLoadedRunner(runner, s.finishedReqCh)
 						break
 					}
+				} else if loadedCount == 0 {
+					slog.Debug("loading first model", "model", pending.model.ModelPath)
+					gpus := s.getGpuFn()
+					g := pickBestFitGPUs(pending, gpus)
+					if g != nil {
+						gpus = g
+					}
+					s.loadFn(pending, gpus)
+					break
 				} else if loadedMax > 0 && loadedCount >= loadedMax {
 					slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount)
 					runnerToExpire = s.findRunnerToUnload(pending)
 				} else {
-					// Either no models are loaded or below loadedMax
+					// More than one loaded model, so we have to see if the new one fits
 					// Get a refreshed GPU list
 					gpus := s.getGpuFn()
-
-					// Load model for fitting
-					ggml, err := llm.LoadModel(pending.model.ModelPath)
-					if err != nil {
-						pending.errCh <- err
-						break
-					}
-
-					// No models loaded. Load the model but prefer the best fit.
-					if loadedCount == 0 {
-						slog.Debug("loading first model", "model", pending.model.ModelPath)
-						g := pickBestFitGPUs(pending, ggml, gpus)
-						if g != nil {
-							gpus = g
-						}
-						s.loadFn(pending, ggml, gpus)
-						break
-					}
-
-					// More than one loaded model, so we have to see if the new one fits
 					// Update free memory from currently loaded models
 					s.updateFreeSpace(gpus)
-					gpus = pickBestFitGPUs(pending, ggml, gpus)
+					gpus = pickBestFitGPUs(pending, gpus)
 					if gpus != nil {
 						slog.Debug("new model fits with existing models, loading")
-						s.loadFn(pending, ggml, gpus)
+						s.loadFn(pending, gpus)
 						break
 					}
 					runnerToExpire = s.findRunnerToUnload(pending)
@@ -297,8 +282,8 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
 	}()
 }

-func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) {
-	llama, err := s.newServerFn(gpus, req.model.ModelPath, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts)
+func (s *Scheduler) load(req *LlmRequest, gpus gpu.GpuInfoList) {
+	llama, err := s.newServerFn(gpus, req.model.ModelPath, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts)
 	if err != nil {
 		// some older models are not compatible with newer versions of llama.cpp
 		// show a generalized compatibility error until there is a better way to
@@ -432,21 +417,16 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
 	slog.Debug("evaluating already loaded", "model", req.model.ModelPath)
 	runner.refMu.Lock()
 	defer runner.refMu.Unlock()
-
+	// Ignore the NumGPU settings for comparison
+	optsExisting := runner.Options.Runner
+	optsExisting.NumGPU = -1
+	optsNew := req.opts.Runner
+	optsNew.NumGPU = -1
 	timeout := 10 * time.Second
 	if runner.loading {
 		timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems...
 	}
-
-	// Don't reload runner if num_gpu=-1 was provided
-	optsExisting := runner.Options.Runner
-	optsNew := req.opts.Runner
-	if optsNew.NumGPU < 0 {
-		optsExisting.NumGPU = -1
-		optsNew.NumGPU = -1
-	}
-
-	ctx, cancel := context.WithTimeout(ctx, timeout)
+	ctx, cancel := context.WithTimeout(ctx, timeout) // BUG -
 	defer cancel()
 	if !reflect.DeepEqual(runner.adapters, req.model.AdapterPaths) || // have the adapters changed?
 		!reflect.DeepEqual(runner.projectors, req.model.ProjectorPaths) || // have the projectors changed?
@@ -454,7 +434,6 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
 		runner.llama.Ping(ctx) != nil {
 		return true
 	}
-
 	return false
 }

@@ -475,7 +454,7 @@ func (a ByDuration) Less(i, j int) bool {

 // pickBestFitGPUs will try to find the optimal placement of the model in the available GPUs where the model fully fits
 // If the model can not be fit fully within the available GPU(s) nil is returned
-func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.GpuInfoList {
+func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
 	var estimatedVRAM uint64
 	for _, gl := range gpus.ByLibrary() {
 		var ok bool
@@ -487,7 +466,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.

 		// First attempt to fit the model into a single GPU
 		for _, g := range sgl {
-			if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
+			if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
 				slog.Debug("new model will fit in available VRAM in single GPU, loading", "model", req.model.ModelPath, "gpu", g.ID, "available", g.FreeMemory, "required", format.HumanBytes2(estimatedVRAM))
 				return []gpu.GpuInfo{g}
 			}
@@ -498,7 +477,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.
 		// - try subsets of GPUs instead of just falling back to 1 or all in a family

 		// Now try all the GPUs
-		if ok, estimatedVRAM = llm.PredictServerFit(gl, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
+		if ok, estimatedVRAM = llm.PredictServerFit(gl, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
 			slog.Debug("new model will fit in available VRAM, loading", "model", req.model.ModelPath, "library", gl[0].Library, "required", format.HumanBytes2(estimatedVRAM))
 			return gl
 		}
--- a/server/sched_test.go
+++ b/server/sched_test.go
@@ -47,7 +47,6 @@ func TestLoad(t *testing.T) {
 	ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
 	defer done()
 	s := InitScheduler(ctx)
-	var ggml *llm.GGML // value not used in tests
 	req := &LlmRequest{
 		ctx:             ctx,
 		model:           &Model{ModelPath: "foo"},
@@ -60,7 +59,7 @@ func TestLoad(t *testing.T) {
 		return nil, fmt.Errorf("something failed to load model blah")
 	}
 	gpus := gpu.GpuInfoList{}
-	s.load(req, ggml, gpus)
+	s.load(req, gpus)
 	require.Len(t, req.successCh, 0)
 	require.Len(t, req.errCh, 1)
 	require.Len(t, s.loaded, 0)
@@ -71,7 +70,7 @@ func TestLoad(t *testing.T) {
 	s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error) {
 		return server, nil
 	}
-	s.load(req, ggml, gpus)
+	s.load(req, gpus)
 	select {
 	case err := <-req.errCh:
 		require.NoError(t, err)
@@ -83,7 +82,7 @@ func TestLoad(t *testing.T) {

 	req.model.ModelPath = "dummy_model_path"
 	server.waitResp = fmt.Errorf("wait failure")
-	s.load(req, ggml, gpus)
+	s.load(req, gpus)
 	select {
 	case err := <-req.errCh:
 		require.Contains(t, err.Error(), "wait failure")
@@ -102,7 +101,6 @@ type bundle struct {
 	ctxDone func()
 	srv     *mockLlm
 	req     *LlmRequest
-	ggml    *llm.GGML
 }

 func (scenario *bundle) newServer(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error) {
@@ -134,15 +132,14 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
 		{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
 	})
 	assert.Nil(t, err)
-
 	fname := f.Name()
 	model := &Model{Name: modelName, ModelPath: fname}
-	scenario.ggml, err = llm.LoadModel(model.ModelPath)
+	ggml, err := llm.LoadModel(model.ModelPath)
 	require.NoError(t, err)
-
 	scenario.req = &LlmRequest{
 		ctx:             scenario.ctx,
 		model:           model,
+		ggml:            ggml,
 		sessionDuration: 5 * time.Millisecond,
 		successCh:       make(chan *runnerRef, 1),
 		errCh:           make(chan error, 1),
@@ -160,13 +157,13 @@ func TestRequests(t *testing.T) {
 	scenario1a.req.sessionDuration = 0
 	scenario1b := newScenario(t, ctx, "ollama-model-1", 11)
 	scenario1b.req.model = scenario1a.req.model
-	scenario1b.ggml = scenario1a.ggml
+	scenario1b.req.ggml = scenario1a.req.ggml
 	scenario1b.req.sessionDuration = 0

 	// simple reload of same model
 	scenario2a := newScenario(t, ctx, "ollama-model-1", 20)
 	scenario2a.req.model = scenario1a.req.model
-	scenario2a.ggml = scenario1a.ggml
+	scenario2a.req.ggml = scenario1a.req.ggml

 	// Multiple loaded models
 	scenario3a := newScenario(t, ctx, "ollama-model-3a", 1*format.GigaByte)
@@ -325,14 +322,13 @@ func TestGetRunner(t *testing.T) {
 	successCh1c, errCh1c := s.GetRunner(scenario1c.ctx, scenario1c.req.model, scenario1c.req.opts, scenario1c.req.sessionDuration)
 	require.Len(t, s.pendingReqCh, 0)
 	require.Len(t, successCh1c, 0)
-	require.Len(t, errCh1c, 0)
-
-	time.Sleep(5 * time.Millisecond)
-	require.Len(t, s.loaded, 0)
 	require.Len(t, errCh1c, 1)
 	err = <-errCh1c
 	require.Contains(t, err.Error(), "bad path")
 	scenario1b.ctxDone()
+
+	time.Sleep(5 * time.Millisecond)
+	require.Len(t, s.loaded, 0)
 }

 // TODO - add one scenario that triggers the bogus finished event with positive ref count
@@ -370,9 +366,7 @@ func TestPrematureExpired(t *testing.T) {
 	require.LessOrEqual(t, len(s.finishedReqCh), 1)
 	time.Sleep(10 * time.Millisecond)
 	require.Len(t, s.finishedReqCh, 0)
-	s.loadedMu.Lock()
 	require.Len(t, s.loaded, 0)
-	s.loadedMu.Unlock()

 	// also shouldn't happen in real life
 	s.finishedReqCh <- scenario1a.req
@@ -432,6 +426,7 @@ func TestUpdateFreeSpace(t *testing.T) {
 	s.updateFreeSpace(gpus)
 	require.Equal(t, uint64(850), gpus[0].FreeMemory)
 	require.Equal(t, uint64(1850), gpus[1].FreeMemory)
+
 }

 func TestFindRunnerToUnload(t *testing.T) {
@@ -490,9 +485,6 @@ func TestNeedsReload(t *testing.T) {
 	require.False(t, resp)
 	req.opts.NumGPU = 99
 	resp = runner.needsReload(ctx, req)
-	require.True(t, resp)
-	req.opts.NumGPU = -1
-	resp = runner.needsReload(ctx, req)
 	require.False(t, resp)
 }

--- a/types/model/digest.go
+++ b/types/model/digest.go
@@ -0,0 +1,87 @@
+package model
+
+import (
+	"fmt"
+	"log/slog"
+	"strings"
+	"unicode"
+)
+
+// Digest represents a digest of a model Manifest. It is a comparable value
+// type and is immutable.
+//
+// The zero Digest is not a valid digest.
+type Digest struct {
+	s string
+}
+
+// Split returns the digest type and the digest value.
+func (d Digest) Split() (typ, digest string) {
+	typ, digest, _ = strings.Cut(d.s, "-")
+	return
+}
+
+// String returns the digest in the form of "<digest-type>-<digest>", or the
+// empty string if the digest is invalid.
+func (d Digest) String() string { return d.s }
+
+// IsValid returns true if the digest is valid (not zero).
+//
+// A valid digest may be created only by ParseDigest, or
+// ParseName(name).Digest().
+func (d Digest) IsValid() bool { return d.s != "" }
+
+// LogValue implements slog.Value.
+func (d Digest) LogValue() slog.Value {
+	return slog.StringValue(d.String())
+}
+
+var (
+	_ slog.LogValuer = Digest{}
+)
+
+// ParseDigest parses a string in the form of "<digest-type>-<digest>" into a
+// Digest.
+func ParseDigest(s string) Digest {
+	typ, digest, ok := strings.Cut(s, "-")
+	if !ok {
+		typ, digest, ok = strings.Cut(s, ":")
+	}
+	if ok && isValidDigestType(typ) && isValidHex(digest) && len(digest) >= 2 {
+		return Digest{s: fmt.Sprintf("%s-%s", typ, digest)}
+	}
+	return Digest{}
+}
+
+func MustParseDigest(s string) Digest {
+	d := ParseDigest(s)
+	if !d.IsValid() {
+		panic(fmt.Sprintf("invalid digest: %q", s))
+	}
+	return d
+}
+
+func isValidDigestType(s string) bool {
+	if len(s) == 0 {
+		return false
+	}
+	for _, r := range s {
+		if !unicode.IsLower(r) && !unicode.IsDigit(r) {
+			return false
+		}
+	}
+	return true
+}
+
+func isValidHex(s string) bool {
+	if len(s) == 0 {
+		return false
+	}
+	for i := range s {
+		c := s[i]
+		if c < '0' || c > '9' && c < 'a' || c > 'f' {
+			return false
+		}
+	}
+	return true
+}
--- a/types/model/digest_test.go
+++ b/types/model/digest_test.go
@@ -0,0 +1,46 @@
+package model
+
+import "testing"
+
+var testDigests = map[string]Digest{
+	"":                 {},
+	"sha256-1234":      {s: "sha256-1234"},
+	"sha256-5678":      {s: "sha256-5678"},
+	"blake2-9abc":      {s: "blake2-9abc"},
+	"-1234":            {},
+	"sha256-":          {},
+	"sha256-1234-5678": {},
+	"sha256-P":         {}, //         invalid  hex
+	"sha256-1234P":     {},
+	"---":              {},
+}
+
+func TestDigestParse(t *testing.T) {
+	// Test cases.
+	for s, want := range testDigests {
+		got := ParseDigest(s)
+		t.Logf("ParseDigest(%q) = %#v", s, got)
+		if got != want {
+			t.Errorf("ParseDigest(%q) = %q; want %q", s, got, want)
+		}
+	}
+}
+
+func TestDigestString(t *testing.T) {
+	// Test cases.
+	for s, d := range testDigests {
+		want := s
+		if !d.IsValid() {
+			want = ""
+		}
+		got := d.String()
+		if got != want {
+			t.Errorf("ParseDigest(%q).String() = %q; want %q", s, got, want)
+		}
+
+		got = ParseDigest(s).String()
+		if got != want {
+			t.Errorf("roundtrip ParseDigest(%q).String() = %q; want %q", s, got, want)
+		}
+	}
+}
--- a/types/model/name.go
+++ b/types/model/name.go
--- a/types/model/name_test.go
+++ b/types/model/name_test.go
@@ -1,295 +1,715 @@
 package model

 import (
-	"reflect"
+	"bytes"
+	"cmp"
+	"fmt"
+	"log/slog"
+	"path/filepath"
+	"slices"
 	"strings"
 	"testing"
 )

-const (
-	part80  = "88888888888888888888888888888888888888888888888888888888888888888888888888888888"
-	part350 = "33333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333"
-)
+type fields struct {
+	host, namespace, model, tag, build string
+	digest                             string
+}

-func TestParseNameParts(t *testing.T) {
+func fieldsFromName(p Name) fields {
+	return fields{
+		host:      p.parts[PartHost],
+		namespace: p.parts[PartNamespace],
+		model:     p.parts[PartModel],
+		tag:       p.parts[PartTag],
+		build:     p.parts[PartBuild],
+		digest:    p.parts[PartDigest],
+	}
+}
+
+var testNames = map[string]fields{
+	"mistral:latest":                 {model: "mistral", tag: "latest"},
+	"mistral":                        {model: "mistral"},
+	"mistral:30B":                    {model: "mistral", tag: "30B"},
+	"mistral:7b":                     {model: "mistral", tag: "7b"},
+	"mistral:7b+Q4_0":                {model: "mistral", tag: "7b", build: "Q4_0"},
+	"mistral+KQED":                   {model: "mistral", build: "KQED"},
+	"mistral.x-3:7b+Q4_0":            {model: "mistral.x-3", tag: "7b", build: "Q4_0"},
+	"mistral:7b+q4_0":                {model: "mistral", tag: "7b", build: "q4_0"},
+	"llama2":                         {model: "llama2"},
+	"user/model":                     {namespace: "user", model: "model"},
+	"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
+	"example.com/ns/mistral:7b+X":    {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
+	"localhost:5000/ns/mistral":      {host: "localhost:5000", namespace: "ns", model: "mistral"},
+
+	// invalid digest
+	"mistral:latest@invalid256-": {},
+	"mistral:latest@-123":        {},
+	"mistral:latest@!-123":       {},
+	"mistral:latest@1-!":         {},
+	"mistral:latest@":            {},
+
+	// resolved
+	"x@sha123-12": {model: "x", digest: "sha123-12"},
+	"@sha456-22":  {digest: "sha456-22"},
+	"@sha456-1":  {},
+	"@@sha123-22": {},
+
+	// preserves case for build
+	"x+b": {model: "x", build: "b"},
+
+	// invalid (includes fuzzing trophies)
+	" / / : + ": {},
+	" / : + ":   {},
+	" : + ":     {},
+	" + ":       {},
+	" : ":       {},
+	" / ":       {},
+	" /":        {},
+	"/ ":        {},
+	"/":         {},
+	":":         {},
+	"+":         {},
+
+	// (".") in namepsace is not allowed
+	"invalid.com/7b+x": {},
+
+	"invalid:7b+Q4_0:latest": {},
+	"in valid":               {},
+	"invalid/y/z/foo":        {},
+	"/0":                     {},
+	"0 /0":                   {},
+	"0 /":                    {},
+	"0/":                     {},
+	":/0":                    {},
+	"+0/00000":               {},
+	"0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91": {},
+	"0//0":                        {},
+	"m+^^^":                       {},
+	"file:///etc/passwd":          {},
+	"file:///etc/passwd:latest":   {},
+	"file:///etc/passwd:latest+u": {},
+
+	":x": {},
+	"+x": {},
+	"x+": {},
+
+	// Disallow ("\.+") in any part to prevent path traversal anywhere
+	// we convert the name to a path.
+	"../etc/passwd":  {},
+	".../etc/passwd": {},
+	"./../passwd":    {},
+	"./0+..":         {},
+
+	strings.Repeat("a", MaxNamePartLen):   {model: strings.Repeat("a", MaxNamePartLen)},
+	strings.Repeat("a", MaxNamePartLen+1): {},
+}
+
+func TestIsValidNameLen(t *testing.T) {
+	if IsValidNamePart(PartNamespace, strings.Repeat("a", MaxNamePartLen+1)) {
+		t.Errorf("unexpectedly valid long name")
+	}
+}
+
+// TestConsecutiveDots tests that consecutive dots are not allowed in any
+// part, to avoid path traversal. There also are some tests in testNames, but
+// this test is more exhaustive and exists to emphasize the importance of
+// preventing path traversal.
+func TestNameConsecutiveDots(t *testing.T) {
+	for i := 1; i < 10; i++ {
+		s := strings.Repeat(".", i)
+		if i > 1 {
+			if g := ParseName(s, FillNothing).DisplayLong(); g != "" {
+				t.Errorf("ParseName(%q) = %q; want empty string", s, g)
+			}
+		} else {
+			if g := ParseName(s, FillNothing).DisplayLong(); g != s {
+				t.Errorf("ParseName(%q) = %q; want %q", s, g, s)
+			}
+		}
+	}
+}
+
+func TestNameParts(t *testing.T) {
+	var p Name
+	if w, g := int(NumParts), len(p.parts); w != g {
+		t.Errorf("Parts() = %d; want %d", g, w)
+	}
+}
+
+func TestNamePartString(t *testing.T) {
+	if g := PartKind(-2).String(); g != "Unknown" {
+		t.Errorf("Unknown part = %q; want %q", g, "Unknown")
+	}
+	for kind, name := range kindNames {
+		if g := kind.String(); g != name {
+			t.Errorf("%s = %q; want %q", kind, g, name)
+		}
+	}
+}
+
+func TestParseName(t *testing.T) {
+	for baseName, want := range testNames {
+		for _, prefix := range []string{"", "https://", "http://"} {
+			// We should get the same results with or without the
+			// http(s) prefixes
+			s := prefix + baseName
+
+			t.Run(s, func(t *testing.T) {
+				name := ParseName(s, FillNothing)
+				got := fieldsFromName(name)
+				if got != want {
+					t.Errorf("ParseName(%q) = %q; want %q", s, got, want)
+				}
+
+				// test round-trip
+				if !ParseName(name.DisplayLong(), FillNothing).EqualFold(name) {
+					t.Errorf("ParseName(%q).String() = %s; want %s", s, name.DisplayLong(), baseName)
+				}
+			})
+		}
+	}
+}
+
+func TestParseNameFill(t *testing.T) {
 	cases := []struct {
-		in              string
-		want            Name
-		wantValidDigest bool
+		in   string
+		fill string
+		want string
 	}{
-		{
-			in: "host/namespace/model:tag",
-			want: Name{
-				Host:      "host",
-				Namespace: "namespace",
-				Model:     "model",
-				Tag:       "tag",
-			},
-		},
-		{
-			in: "host/namespace/model",
-			want: Name{
-				Host:      "host",
-				Namespace: "namespace",
-				Model:     "model",
-			},
-		},
-		{
-			in: "namespace/model",
-			want: Name{
-				Namespace: "namespace",
-				Model:     "model",
-			},
-		},
-		{
-			in: "model",
-			want: Name{
-				Model: "model",
-			},
-		},
-		{
-			in: "h/nn/mm:t",
-			want: Name{
-				Host:      "h",
-				Namespace: "nn",
-				Model:     "mm",
-				Tag:       "t",
-			},
-		},
-		{
-			in: part80 + "/" + part80 + "/" + part80 + ":" + part80,
-			want: Name{
-				Host:      part80,
-				Namespace: part80,
-				Model:     part80,
-				Tag:       part80,
-			},
-		},
-		{
-			in: part350 + "/" + part80 + "/" + part80 + ":" + part80,
-			want: Name{
-				Host:      part350,
-				Namespace: part80,
-				Model:     part80,
-				Tag:       part80,
-			},
-		},
-		{
-			in: "@digest",
-			want: Name{
-				RawDigest: "digest",
-			},
-			wantValidDigest: false,
-		},
-		{
-			in: "model@sha256:" + validSHA256Hex,
-			want: Name{
-				Model:     "model",
-				RawDigest: "sha256:" + validSHA256Hex,
-			},
-			wantValidDigest: true,
-		},
+		{"mistral", "example.com/library/?:latest+Q4_0", "example.com/library/mistral:latest+Q4_0"},
+		{"mistral", "example.com/library/?:latest", "example.com/library/mistral:latest"},
+		{"llama2:x", "example.com/library/?:latest+Q4_0", "example.com/library/llama2:x+Q4_0"},
+
+		// Invalid
+		{"", "example.com/library/?:latest+Q4_0", ""},
+		{"llama2:?", "example.com/library/?:latest+Q4_0", ""},
 	}

 	for _, tt := range cases {
 		t.Run(tt.in, func(t *testing.T) {
-			got := ParseNameBare(tt.in)
-			if !reflect.DeepEqual(got, tt.want) {
-				t.Errorf("parseName(%q) = %v; want %v", tt.in, got, tt.want)
-			}
-			if got.Digest().IsValid() != tt.wantValidDigest {
-				t.Errorf("parseName(%q).Digest().IsValid() = %v; want %v", tt.in, got.Digest().IsValid(), tt.wantValidDigest)
-			}
-		})
-	}
-}
-
-var testCases = map[string]bool{ // name -> valid
-	"host/namespace/model:tag": true,
-	"host/namespace/model":     false,
-	"namespace/model":          false,
-	"model":                    false,
-	"@sha256-1000000000000000000000000000000000000000000000000000000000000000":      false,
-	"model@sha256-1000000000000000000000000000000000000000000000000000000000000000": false,
-	"model@sha256:1000000000000000000000000000000000000000000000000000000000000000": false,
-
-	// long (but valid)
-	part80 + "/" + part80 + "/" + part80 + ":" + part80:  true,
-	part350 + "/" + part80 + "/" + part80 + ":" + part80: true,
-
-	"h/nn/mm:t@sha256-1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes
-	"h/nn/mm:t@sha256:1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes
-
-	"m":        false, // model too short
-	"n/mm:":    false, // namespace too short
-	"h/n/mm:t": false, // namespace too short
-	"@t":       false, // digest too short
-	"mm@d":     false, // digest too short
-
-	// invalids
-	"^":      false,
-	"mm:":    false,
-	"/nn/mm": false,
-	"//":     false,
-	"//mm":   false,
-	"hh//":   false,
-	"//mm:@": false,
-	"00@":    false,
-	"@":      false,
-
-	// not starting with alphanum
-	"-hh/nn/mm:tt@dd": false,
-	"hh/-nn/mm:tt@dd": false,
-	"hh/nn/-mm:tt@dd": false,
-	"hh/nn/mm:-tt@dd": false,
-	"hh/nn/mm:tt@-dd": false,
-
-	"": false,
-
-	// hosts
-	"host:https/namespace/model:tag": true,
-
-	// colon in non-host part before tag
-	"host/name:space/model:tag": false,
-}
-
-func TestNameparseNameDefault(t *testing.T) {
-	const name = "xx"
-	n := ParseName(name)
-	got := n.String()
-	want := "registry.ollama.ai/library/xx:latest"
-	if got != want {
-		t.Errorf("parseName(%q).String() = %q; want %q", name, got, want)
-	}
-}
-
-func TestNameIsValid(t *testing.T) {
-	var numStringTests int
-	for s, want := range testCases {
-		n := ParseNameBare(s)
-		t.Logf("n: %#v", n)
-		got := n.IsValid()
-		if got != want {
-			t.Errorf("parseName(%q).IsValid() = %v; want %v", s, got, want)
-		}
-
-		// Test roundtrip with String
-		if got {
-			got := ParseNameBare(s).String()
-			if got != s {
-				t.Errorf("parseName(%q).String() = %q; want %q", s, got, s)
-			}
-			numStringTests++
-		}
-	}
-
-	if numStringTests == 0 {
-		t.Errorf("no tests for Name.String")
-	}
-}
-
-func TestNameIsValidPart(t *testing.T) {
-	cases := []struct {
-		kind partKind
-		s    string
-		want bool
-	}{
-		{kind: kindHost, s: "", want: false},
-		{kind: kindHost, s: "a", want: true},
-		{kind: kindHost, s: "a.", want: true},
-		{kind: kindHost, s: "a.b", want: true},
-		{kind: kindHost, s: "a:123", want: true},
-		{kind: kindHost, s: "a:123/aa/bb", want: false},
-		{kind: kindNamespace, s: "bb", want: true},
-		{kind: kindNamespace, s: "a.", want: false},
-		{kind: kindModel, s: "-h", want: false},
-		{kind: kindDigest, s: "sha256-1000000000000000000000000000000000000000000000000000000000000000", want: true},
-	}
-	for _, tt := range cases {
-		t.Run(tt.s, func(t *testing.T) {
-			got := isValidPart(tt.kind, tt.s)
-			if got != tt.want {
-				t.Errorf("isValidPart(%s, %q) = %v; want %v", tt.kind, tt.s, got, tt.want)
+			name := ParseName(tt.in, tt.fill)
+			if g := name.DisplayLong(); g != tt.want {
+				t.Errorf("ParseName(%q, %q) = %q; want %q", tt.in, tt.fill, g, tt.want)
 			}
 		})
 	}

-}
-
-func FuzzName(f *testing.F) {
-	for s := range testCases {
-		f.Add(s)
-	}
-	f.Fuzz(func(t *testing.T, s string) {
-		n := ParseNameBare(s)
-		if n.IsValid() {
-			parts := [...]string{n.Host, n.Namespace, n.Model, n.Tag, n.RawDigest}
-			for _, part := range parts {
-				if part == ".." {
-					t.Errorf("unexpected .. as valid part")
-				}
-				if len(part) > 350 {
-					t.Errorf("part too long: %q", part)
-				}
+	t.Run("invalid fill", func(t *testing.T) {
+		defer func() {
+			if recover() == nil {
+				t.Fatal("expected panic")
 			}
-			if n.String() != s {
-				t.Errorf("String() = %q; want %q", n.String(), s)
-			}
-		}
-
+		}()
+		ParseName("x", "^")
 	})
 }

-const validSHA256Hex = "abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789"
-
-func TestParseDigest(t *testing.T) {
-	cases := map[string]bool{
-		"sha256-1000000000000000000000000000000000000000000000000000000000000000": true,
-		"sha256:1000000000000000000000000000000000000000000000000000000000000000": true,
-		"sha256:0000000000000000000000000000000000000000000000000000000000000000": false,
-
-		"sha256:" + validSHA256Hex: true,
-		"sha256-" + validSHA256Hex: true,
-
-		"":                               false,
-		"sha134:" + validSHA256Hex:       false,
-		"sha256:" + validSHA256Hex + "x": false,
-		"sha256:x" + validSHA256Hex:      false,
-		"sha256-" + validSHA256Hex + "x": false,
-		"sha256-x":                       false,
+func TestParseNameHTTPDoublePrefixStrip(t *testing.T) {
+	cases := []string{
+		"http://https://valid.com/valid/valid:latest",
+		"https://http://valid.com/valid/valid:latest",
+	}
+	for _, s := range cases {
+		t.Run(s, func(t *testing.T) {
+			name := ParseName(s, FillNothing)
+			if name.IsValid() {
+				t.Errorf("expected invalid path; got %#v", name)
+			}
+		})
 	}

-	for s, want := range cases {
-		t.Run(s, func(t *testing.T) {
-			d := ParseDigest(s)
-			if d.IsValid() != want {
-				t.Errorf("ParseDigest(%q).IsValid() = %v; want %v", s, d.IsValid(), want)
+}
+
+func TestCompleteWithAndWithoutBuild(t *testing.T) {
+	cases := []struct {
+		in              string
+		complete        bool
+		completeNoBuild bool
+	}{
+		{"", false, false},
+		{"incomplete/mistral:7b+x", false, false},
+		{"incomplete/mistral:7b+Q4_0", false, false},
+		{"incomplete:7b+x", false, false},
+		{"complete.com/x/mistral:latest+Q4_0", true, true},
+		{"complete.com/x/mistral:latest", false, true},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.in, func(t *testing.T) {
+			p := ParseName(tt.in, FillNothing)
+			t.Logf("ParseName(%q) = %#v", tt.in, p)
+			if g := p.IsComplete(); g != tt.complete {
+				t.Errorf("Complete(%q) = %v; want %v", tt.in, g, tt.complete)
 			}
-			norm := strings.ReplaceAll(s, ":", "-")
-			if d.IsValid() && d.String() != norm {
-				t.Errorf("ParseDigest(%q).String() = %q; want %q", s, d.String(), norm)
+			if g := p.IsCompleteNoBuild(); g != tt.completeNoBuild {
+				t.Errorf("CompleteNoBuild(%q) = %v; want %v", tt.in, g, tt.completeNoBuild)
+			}
+		})
+	}
+
+	// Complete uses Parts which returns a slice, but it should be
+	// inlined when used in Complete, preventing any allocations or
+	// escaping to the heap.
+	allocs := testing.AllocsPerRun(1000, func() {
+		keep(ParseName("complete.com/x/mistral:latest+Q4_0", FillNothing).IsComplete())
+	})
+	if allocs > 0 {
+		t.Errorf("Complete allocs = %v; want 0", allocs)
+	}
+}
+
+func TestNameLogValue(t *testing.T) {
+	cases := []string{
+		"example.com/library/mistral:latest+Q4_0",
+		"mistral:latest",
+		"mistral:7b+Q4_0",
+	}
+	for _, s := range cases {
+		t.Run(s, func(t *testing.T) {
+			var b bytes.Buffer
+			log := slog.New(slog.NewTextHandler(&b, nil))
+			name := ParseName(s, FillNothing)
+			log.Info("", "name", name)
+			want := fmt.Sprintf("name=%s", name.GoString())
+			got := b.String()
+			if !strings.Contains(got, want) {
+				t.Errorf("expected log output to contain %q; got %q", want, got)
 			}
 		})
 	}
 }

-func TestDigestString(t *testing.T) {
+func TestNameGoString(t *testing.T) {
+	cases := []struct {
+		name         string
+		in           string
+		wantString   string
+		wantGoString string // default is tt.in
+	}{
+		{
+			name:         "Complete Name",
+			in:           "example.com/library/mistral:latest+Q4_0",
+			wantGoString: "example.com/library/mistral:latest+Q4_0@?",
+		},
+		{
+			name:         "Short Name",
+			in:           "mistral:latest",
+			wantGoString: "?/?/mistral:latest+?@?",
+		},
+		{
+			name:         "Long Name",
+			in:           "library/mistral:latest",
+			wantGoString: "?/library/mistral:latest+?@?",
+		},
+		{
+			name:         "Case Preserved",
+			in:           "Library/Mistral:Latest",
+			wantGoString: "?/Library/Mistral:Latest+?@?",
+		},
+		{
+			name:         "With digest",
+			in:           "Library/Mistral:Latest@sha256-123456",
+			wantGoString: "?/Library/Mistral:Latest+?@sha256-123456",
+		},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			p := ParseName(tt.in, FillNothing)
+			tt.wantGoString = cmp.Or(tt.wantGoString, tt.in)
+			if g := fmt.Sprintf("%#v", p); g != tt.wantGoString {
+				t.Errorf("GoString() = %q; want %q", g, tt.wantGoString)
+			}
+		})
+	}
+}
+
+func TestDisplayLongest(t *testing.T) {
+	g := ParseName("example.com/library/mistral:latest+Q4_0", FillNothing).DisplayLongest()
+	if g != "example.com/library/mistral:latest" {
+		t.Errorf("got = %q; want %q", g, "example.com/library/mistral:latest")
+	}
+}
+
+func TestDisplayShortest(t *testing.T) {
+	cases := []struct {
+		in        string
+		mask      string
+		want      string
+		wantPanic bool
+	}{
+		{"example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
+		{"example.com/library/mistral:latest+Q4_0", "example.com/_/_:latest", "library/mistral", false},
+		{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
+		{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
+
+		// case-insensitive
+		{"Example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
+		{"example.com/Library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
+		{"example.com/library/Mistral:latest+Q4_0", "example.com/library/_:latest", "Mistral", false},
+		{"example.com/library/mistral:Latest+Q4_0", "example.com/library/_:latest", "mistral", false},
+		{"example.com/library/mistral:Latest+q4_0", "example.com/library/_:latest", "mistral", false},
+
+		// zero value
+		{"", MaskDefault, "", true},
+
+		// invalid mask
+		{"example.com/library/mistral:latest+Q4_0", "example.com/mistral", "", true},
+
+		// DefaultMask
+		{"registry.ollama.ai/library/mistral:latest+Q4_0", MaskDefault, "mistral", false},
+
+		// Auto-Fill
+		{"x", "example.com/library/_:latest", "x", false},
+		{"x", "example.com/library/_:latest+Q4_0", "x", false},
+		{"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false},
+		{"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false},
+	}
+
+	for _, tt := range cases {
+		t.Run("", func(t *testing.T) {
+			defer func() {
+				if tt.wantPanic {
+					if recover() == nil {
+						t.Errorf("expected panic")
+					}
+				}
+			}()
+
+			p := ParseName(tt.in, FillNothing)
+			t.Logf("ParseName(%q) = %#v", tt.in, p)
+			if g := p.DisplayShortest(tt.mask); g != tt.want {
+				t.Errorf("got = %q; want %q", g, tt.want)
+			}
+		})
+	}
+}
+
+func TestParseNameAllocs(t *testing.T) {
+	allocs := testing.AllocsPerRun(1000, func() {
+		keep(ParseName("example.com/mistral:7b+Q4_0", FillNothing))
+	})
+	if allocs > 0 {
+		t.Errorf("ParseName allocs = %v; want 0", allocs)
+	}
+}
+
+func BenchmarkParseName(b *testing.B) {
+	b.ReportAllocs()
+
+	for range b.N {
+		keep(ParseName("example.com/mistral:7b+Q4_0", FillNothing))
+	}
+}
+
+func FuzzParseNameFromFilepath(f *testing.F) {
+	f.Add("example.com/library/mistral/7b/Q4_0")
+	f.Add("example.com/../mistral/7b/Q4_0")
+	f.Add("example.com/x/../7b/Q4_0")
+	f.Add("example.com/x/../7b")
+	f.Fuzz(func(t *testing.T, s string) {
+		name := ParseNameFromFilepath(s, FillNothing)
+		if strings.Contains(s, "..") && !name.IsZero() {
+			t.Fatalf("non-zero value for path with '..': %q", s)
+		}
+		if name.IsValid() == name.IsZero() {
+			t.Errorf("expected valid path to be non-zero value; got %#v", name)
+		}
+	})
+}
+
+func FuzzParseName(f *testing.F) {
+	f.Add("example.com/mistral:7b+Q4_0")
+	f.Add("example.com/mistral:7b+q4_0")
+	f.Add("example.com/mistral:7b+x")
+	f.Add("x/y/z:8n+I")
+	f.Add(":x")
+	f.Add("@sha256-123456")
+	f.Add("example.com/mistral:latest+Q4_0@sha256-123456")
+	f.Add(":@!@")
+	f.Add("...")
+	f.Fuzz(func(t *testing.T, s string) {
+		r0 := ParseName(s, FillNothing)
+
+		if strings.Contains(s, "..") && !r0.IsZero() {
+			t.Fatalf("non-zero value for path with '..': %q", s)
+		}
+
+		if !r0.IsValid() && !r0.IsResolved() {
+			if !r0.EqualFold(Name{}) {
+				t.Errorf("expected invalid path to be zero value; got %#v", r0)
+			}
+			t.Skipf("invalid path: %q", s)
+		}
+
+		for _, p := range r0.parts {
+			if len(p) > MaxNamePartLen {
+				t.Errorf("part too long: %q", p)
+			}
+		}
+
+		if !strings.EqualFold(r0.DisplayLong(), s) {
+			t.Errorf("String() did not round-trip with case insensitivity: %q\ngot  = %q\nwant = %q", s, r0.DisplayLong(), s)
+		}
+
+		r1 := ParseName(r0.DisplayLong(), FillNothing)
+		if !r0.EqualFold(r1) {
+			t.Errorf("round-trip mismatch: %+v != %+v", r0, r1)
+		}
+	})
+}
+
+func TestNameStringAllocs(t *testing.T) {
+	name := ParseName("example.com/ns/mistral:latest+Q4_0", FillNothing)
+	allocs := testing.AllocsPerRun(1000, func() {
+		keep(name.DisplayLong())
+	})
+	if allocs > 1 {
+		t.Errorf("String allocs = %v; want 0", allocs)
+	}
+}
+
+func TestNamePath(t *testing.T) {
 	cases := []struct {
 		in   string
 		want string
 	}{
-		{in: "sha256:" + validSHA256Hex, want: "sha256-" + validSHA256Hex},
-		{in: "sha256-" + validSHA256Hex, want: "sha256-" + validSHA256Hex},
-		{in: "", want: "unknown-0000000000000000000000000000000000000000000000000000000000000000"},
-		{in: "blah-100000000000000000000000000000000000000000000000000000000000000", want: "unknown-0000000000000000000000000000000000000000000000000000000000000000"},
-	}
+		{"example.com/library/mistral:latest+Q4_0", "example.com/library/mistral:latest"},

+		// incomplete
+		{"example.com/library/mistral:latest", "example.com/library/mistral:latest"},
+		{"", ""},
+	}
 	for _, tt := range cases {
 		t.Run(tt.in, func(t *testing.T) {
-			d := ParseDigest(tt.in)
-			if d.String() != tt.want {
-				t.Errorf("ParseDigest(%q).String() = %q; want %q", tt.in, d.String(), tt.want)
+			p := ParseName(tt.in, FillNothing)
+			t.Logf("ParseName(%q) = %#v", tt.in, p)
+			if g := p.DisplayURLPath(); g != tt.want {
+				t.Errorf("got = %q; want %q", g, tt.want)
 			}
 		})
 	}
 }
+
+func TestNameFilepath(t *testing.T) {
+	cases := []struct {
+		in          string
+		want        string
+		wantNoBuild string
+	}{
+		{
+			in:          "example.com/library/mistral:latest+Q4_0",
+			want:        "example.com/library/mistral/latest/Q4_0",
+			wantNoBuild: "example.com/library/mistral/latest",
+		},
+		{
+			in:          "Example.Com/Library/Mistral:Latest+Q4_0",
+			want:        "example.com/library/mistral/latest/Q4_0",
+			wantNoBuild: "example.com/library/mistral/latest",
+		},
+		{
+			in:          "Example.Com/Library/Mistral:Latest+Q4_0",
+			want:        "example.com/library/mistral/latest/Q4_0",
+			wantNoBuild: "example.com/library/mistral/latest",
+		},
+		{
+			in:          "example.com/library/mistral:latest",
+			want:        "example.com/library/mistral/latest",
+			wantNoBuild: "example.com/library/mistral/latest",
+		},
+		{
+			in:          "",
+			want:        "",
+			wantNoBuild: "",
+		},
+	}
+	for _, tt := range cases {
+		t.Run(tt.in, func(t *testing.T) {
+			p := ParseName(tt.in, FillNothing)
+			t.Logf("ParseName(%q) = %#v", tt.in, p)
+			g := p.Filepath()
+			g = filepath.ToSlash(g)
+			if g != tt.want {
+				t.Errorf("got = %q; want %q", g, tt.want)
+			}
+			g = p.FilepathNoBuild()
+			g = filepath.ToSlash(g)
+			if g != tt.wantNoBuild {
+				t.Errorf("got = %q; want %q", g, tt.wantNoBuild)
+			}
+		})
+	}
+}
+
+func TestParseNameFilepath(t *testing.T) {
+	cases := []struct {
+		in   string
+		fill string // default is FillNothing
+		want string
+	}{
+		{
+			in:   "example.com/library/mistral/latest/Q4_0",
+			want: "example.com/library/mistral:latest+Q4_0",
+		},
+		{
+			in:   "example.com/library/mistral/latest",
+			fill: "?/?/?:latest+Q4_0",
+			want: "example.com/library/mistral:latest+Q4_0",
+		},
+		{
+			in:   "example.com/library/mistral",
+			fill: "?/?/?:latest+Q4_0",
+			want: "example.com/library/mistral:latest+Q4_0",
+		},
+		{
+			in:   "example.com/library",
+			want: "",
+		},
+		{
+			in:   "example.com/",
+			want: "",
+		},
+		{
+			in:   "example.com/^/mistral/latest/Q4_0",
+			want: "",
+		},
+		{
+			in:   "example.com/library/mistral/../Q4_0",
+			want: "",
+		},
+		{
+			in:   "example.com/library/mistral/latest/Q4_0/extra",
+			want: "",
+		},
+	}
+	for _, tt := range cases {
+		t.Run(tt.in, func(t *testing.T) {
+			in := strings.ReplaceAll(tt.in, "/", string(filepath.Separator))
+			fill := cmp.Or(tt.fill, FillNothing)
+			want := ParseName(tt.want, fill)
+			if g := ParseNameFromFilepath(in, fill); !g.EqualFold(want) {
+				t.Errorf("got = %q; want %q", g.DisplayLong(), tt.want)
+			}
+		})
+	}
+}
+
+func TestParseNameFromPath(t *testing.T) {
+	cases := []struct {
+		in   string
+		want string
+		fill string // default is FillNothing
+	}{
+		{
+			in:   "example.com/library/mistral:latest+Q4_0",
+			want: "example.com/library/mistral:latest+Q4_0",
+		},
+		{
+			in:   "/example.com/library/mistral:latest+Q4_0",
+			want: "example.com/library/mistral:latest+Q4_0",
+		},
+		{
+			in:   "/example.com/library/mistral",
+			want: "example.com/library/mistral",
+		},
+		{
+			in:   "/example.com/library/mistral",
+			fill: "?/?/?:latest+Q4_0",
+			want: "example.com/library/mistral:latest+Q4_0",
+		},
+		{
+			in:   "/example.com/library",
+			want: "",
+		},
+		{
+			in:   "/example.com/",
+			want: "",
+		},
+		{
+			in:   "/example.com/^/mistral/latest",
+			want: "",
+		},
+	}
+	for _, tt := range cases {
+		t.Run(tt.in, func(t *testing.T) {
+			fill := cmp.Or(tt.fill, FillNothing)
+			if g := ParseNameFromURLPath(tt.in, fill); g.DisplayLong() != tt.want {
+				t.Errorf("got = %q; want %q", g.DisplayLong(), tt.want)
+			}
+		})
+	}
+}
+
+func ExampleName_MapHash() {
+	m := map[uint64]bool{}
+
+	// key 1
+	m[ParseName("mistral:latest+q4", FillNothing).MapHash()] = true
+	m[ParseName("miSTRal:latest+Q4", FillNothing).MapHash()] = true
+	m[ParseName("mistral:LATest+Q4", FillNothing).MapHash()] = true
+
+	// key 2
+	m[ParseName("mistral:LATest", FillNothing).MapHash()] = true
+
+	fmt.Println(len(m))
+	// Output:
+	// 2
+}
+
+func ExampleName_CompareFold_sort() {
+	names := []Name{
+		ParseName("mistral:latest", FillNothing),
+		ParseName("mistRal:7b+q4", FillNothing),
+		ParseName("MIstral:7b", FillNothing),
+	}
+
+	slices.SortFunc(names, Name.CompareFold)
+
+	for _, n := range names {
+		fmt.Println(n.DisplayLong())
+	}
+
+	// Output:
+	// MIstral:7b
+	// mistRal:7b+q4
+	// mistral:latest
+}
+
+func ExampleName_completeAndResolved() {
+	for _, s := range []string{
+		"x/y/z:latest+q4_0@sha123-abc",
+		"x/y/z:latest+q4_0",
+		"@sha123-abc",
+	} {
+		name := ParseName(s, FillNothing)
+		fmt.Printf("complete:%v resolved:%v  digest:%s\n", name.IsComplete(), name.IsResolved(), name.Digest())
+	}
+
+	// Output:
+	// complete:true resolved:true  digest:sha123-abc
+	// complete:true resolved:false  digest:
+	// complete:false resolved:true  digest:sha123-abc
+}
+
+func ExampleName_DisplayShortest() {
+	name := ParseName("example.com/jmorganca/mistral:latest+Q4_0", FillNothing)
+
+	fmt.Println(name.DisplayShortest("example.com/jmorganca/_:latest"))
+	fmt.Println(name.DisplayShortest("example.com/_/_:latest"))
+	fmt.Println(name.DisplayShortest("example.com/_/_:_"))
+	fmt.Println(name.DisplayShortest("_/_/_:_"))
+
+	// Default
+	name = ParseName("registry.ollama.ai/library/mistral:latest+Q4_0", FillNothing)
+	fmt.Println(name.DisplayShortest(""))
+
+	// Output:
+	// mistral
+	// jmorganca/mistral
+	// jmorganca/mistral:latest
+	// example.com/jmorganca/mistral:latest
+	// mistral
+}
+
+func keep[T any](v T) T { return v }
--- a/types/model/testdata/fuzz/FuzzParseRef/1d43ee52085cb4aa
+++ b/types/model/testdata/fuzz/FuzzParseRef/1d43ee52085cb4aa
@@ -1,2 +1,2 @@
 go test fuzz v1
-string("00@")
+string("/0")
--- a/types/model/testdata/fuzz/FuzzParseRef/27fd759314f0e6d6
+++ b/types/model/testdata/fuzz/FuzzParseRef/27fd759314f0e6d6
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("0//0")
--- a/types/model/testdata/fuzz/FuzzParseRef/3e3b70dba384074d
+++ b/types/model/testdata/fuzz/FuzzParseRef/3e3b70dba384074d
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("0 /0")
--- a/types/model/testdata/fuzz/FuzzParseRef/71f1fdff711b6dab
+++ b/types/model/testdata/fuzz/FuzzParseRef/71f1fdff711b6dab
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("+0/00000")
--- a/types/model/testdata/fuzz/FuzzParseRef/82c2975c430ac608
+++ b/types/model/testdata/fuzz/FuzzParseRef/82c2975c430ac608
@@ -0,0 +1,2 @@
+go test fuzz v1
+string(":")
--- a/types/model/testdata/fuzz/FuzzParseRef/b51b1c875e61a948
+++ b/types/model/testdata/fuzz/FuzzParseRef/b51b1c875e61a948
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91")
Author	SHA1	Message	Date
Patrick Devine	c0314cc41c	feed the linter	2024-04-24 20:54:27 -07:00
Patrick Devine	686178b6c5	show ggml modelinfo through the show api	2024-04-24 18:53:00 -07:00