use If-None-match header for github releases

types/model: remove old comment (#4020 )
fix copying model to itself (#4019 )
2024-04-29 09:25:32 -04:00 · 2024-04-28 20:52:26 -07:00 · 2024-04-28 23:47:49 -04:00 · 2024-04-28 14:52:58 -07:00 · 2024-04-28 14:20:27 -07:00 · 2024-04-28 13:42:39 -07:00
13 changed files with 227 additions and 256 deletions
--- a/app/lifecycle/updater.go
+++ b/app/lifecycle/updater.go
@@ -8,7 +8,6 @@ import (
 	"fmt"
 	"io"
 	"log/slog"
-	"mime"
 	"net/http"
 	"net/url"
 	"os"
@@ -70,7 +69,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
 	req.Header.Set("Authorization", signature)
 	req.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))

-	slog.Debug("checking for available update", "requestURL", requestURL)
+	slog.Info("checking for available update", "requestURL", requestURL)
 	resp, err := http.DefaultClient.Do(req)
 	if err != nil {
 		slog.Warn(fmt.Sprintf("failed to check for update: %s", err))
@@ -104,60 +103,63 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
 }

 func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
-	// Do a head first to check etag info
-	req, err := http.NewRequestWithContext(ctx, http.MethodHead, updateResp.UpdateURL, nil)
+	etagfilename := filepath.Join(UpdateStageDir, updateResp.UpdateVersion, "etag")
+
+	// parse the base of the url for the filename
+	u, err := url.Parse(updateResp.UpdateURL)
+	if err != nil {
+		return fmt.Errorf("could not parse update url: %w", err)
+	}
+
+	var etag string
+	filename := filepath.Join(UpdateStageDir, updateResp.UpdateVersion, filepath.Base(u.Path))
+	_, err = os.Stat(filename)
+	if err == nil {
+		file, err := os.Open(etagfilename)
+		if err == nil {
+			content, err := io.ReadAll(file)
+			if err != nil {
+				return fmt.Errorf("could not read etag file: %w", err)
+			}
+
+			etag = string(content)
+			file.Close()
+		}
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, updateResp.UpdateURL, nil)
 	if err != nil {
 		return err
 	}

+	if etag != "" {
+		req.Header.Set("If-None-Match", "\""+etag+"\"")
+	}
+
+	slog.Info("sending update request", "url", req.URL.String())
 	resp, err := http.DefaultClient.Do(req)
 	if err != nil {
 		return fmt.Errorf("error checking update: %w", err)
 	}
-	if resp.StatusCode != 200 {
-		return fmt.Errorf("unexpected status attempting to download update %d", resp.StatusCode)
-	}
-	resp.Body.Close()
-	etag := strings.Trim(resp.Header.Get("etag"), "\"")
-	if etag == "" {
-		slog.Debug("no etag detected, falling back to filename based dedup")
-		etag = "_"
-	}
-	filename := Installer
-	_, params, err := mime.ParseMediaType(resp.Header.Get("content-disposition"))
-	if err == nil {
-		filename = params["filename"]
-	}
+	defer resp.Body.Close()

-	stageFilename := filepath.Join(UpdateStageDir, etag, filename)
-
-	// Check to see if we already have it downloaded
-	_, err = os.Stat(stageFilename)
-	if err == nil {
+	if resp.StatusCode == 304 {
 		slog.Info("update already downloaded")
 		return nil
 	}

+	slog.Info("got status", "status", resp.StatusCode)
+	if resp.StatusCode != 200 {
+		return fmt.Errorf("unexpected status attempting to download update %d", resp.StatusCode)
+	}
+
 	cleanupOldDownloads()

-	req.Method = http.MethodGet
-	resp, err = http.DefaultClient.Do(req)
-	if err != nil {
-		return fmt.Errorf("error checking update: %w", err)
-	}
-	defer resp.Body.Close()
-	etag = strings.Trim(resp.Header.Get("etag"), "\"")
-	if etag == "" {
-		slog.Debug("no etag detected, falling back to filename based dedup") // TODO probably can get rid of this redundant log
-		etag = "_"
-	}
-
-	stageFilename = filepath.Join(UpdateStageDir, etag, filename)
-
-	_, err = os.Stat(filepath.Dir(stageFilename))
+	// Create the directory for the update
+	_, err = os.Stat(filepath.Dir(filename))
 	if errors.Is(err, os.ErrNotExist) {
-		if err := os.MkdirAll(filepath.Dir(stageFilename), 0o755); err != nil {
-			return fmt.Errorf("create ollama dir %s: %v", filepath.Dir(stageFilename), err)
+		if err := os.MkdirAll(filepath.Dir(filename), 0o755); err != nil {
+			return fmt.Errorf("create update dir %s: %v", filepath.Dir(filename), err)
 		}
 	}

@@ -165,17 +167,33 @@ func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
 	if err != nil {
 		return fmt.Errorf("failed to read body response: %w", err)
 	}
-	fp, err := os.OpenFile(stageFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
+	fp, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
 	if err != nil {
-		return fmt.Errorf("write payload %s: %w", stageFilename, err)
+		return fmt.Errorf("write payload %s: %w", filename, err)
 	}
 	defer fp.Close()
 	if n, err := fp.Write(payload); err != nil || n != len(payload) {
-		return fmt.Errorf("write payload %s: %d vs %d -- %w", stageFilename, n, len(payload), err)
+		return fmt.Errorf("write payload %s: %d vs %d -- %w", filename, n, len(payload), err)
 	}
-	slog.Info("new update downloaded " + stageFilename)
+
+	etag = strings.Trim(resp.Header.Get("etag"), "\"")
+	if etag != "" {
+		file, err := os.OpenFile(etagfilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o644)
+		if err != nil {
+			return fmt.Errorf("could not open etag file for writing: %w", err)
+		}
+
+		if _, err := file.Write([]byte(etag)); err != nil {
+			return fmt.Errorf("could not write etag file: %w", err)
+		}
+
+		file.Close()
+	}
+
+	slog.Info("new update downloaded " + filename)

 	UpdateDownloaded = true
+
 	return nil
 }

--- a/app/ollama.iss
+++ b/app/ollama.iss
@@ -88,8 +88,8 @@ DialogFontSize=12
 [Files]
 Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
 Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
-Source: "..\dist\windows-amd64\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
-Source: "..\dist\windows-amd64\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
+Source: "..\dist\windows-{#ARCH}\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
+Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
 Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
 Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
 #if DirExists("..\dist\windows-amd64\rocm")
--- a/gpu/assets.go
+++ b/gpu/assets.go
@@ -32,9 +32,25 @@ func PayloadsDir() (string, error) {
 				slog.Error("failed to lookup executable path", "error", err)
 				return "", err
 			}
+
+			cwd, err := os.Getwd()
+			if err != nil {
+				slog.Error("failed to lookup working directory", "error", err)
+				return "", err
+			}
+
+			var paths []string
+			for _, root := range []string{appExe, cwd} {
+				paths = append(paths,
+					filepath.Join(root),
+					filepath.Join(root, "windows-"+runtime.GOARCH),
+					filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
+				)
+			}
+
 			// Try a few variations to improve developer experience when building from source in the local tree
-			for _, d := range []string{".", "windows-" + runtime.GOARCH, "dist\\windows-" + runtime.GOARCH} {
-				candidate := filepath.Join(filepath.Dir(appExe), d, "ollama_runners")
+			for _, p := range paths {
+				candidate := filepath.Join(p, "ollama_runners")
 				_, err := os.Stat(candidate)
 				if err == nil {
 					runnersDir = candidate
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -42,7 +42,7 @@ function init_vars {
        "-DLLAMA_NATIVE=off"
        )
    $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
-    $script:ARCH = "amd64" # arm not yet supported.
+    $script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower()
    $script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
    md "$script:DIST_BASE" -ea 0 > $null
    if ($env:CGO_CFLAGS -contains "-g") {
@@ -213,11 +213,11 @@ function build_static() {
    }
 }

-function build_cpu() {
+function build_cpu($gen_arch) {
    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
        # remaining llama.cpp builds use MSVC 
        init_vars
-        $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+        $script:cmakeDefs = $script:commonCpuDefs + @("-A", $gen_arch, "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
        $script:buildDir="../build/windows/${script:ARCH}/cpu"
        $script:distDir="$script:DIST_BASE\cpu"
        write-host "Building LCD CPU"
@@ -349,11 +349,15 @@ if ($($args.count) -eq 0) {
    git_module_setup
    apply_patches
    build_static
-    build_cpu
-    build_cpu_avx
-    build_cpu_avx2
-    build_cuda
-    build_rocm
+    if ($script:ARCH -eq "arm64") {
+        build_cpu("ARM64")
+    } else { # amd64
+        build_cpu("x64")
+        build_cpu_avx
+        build_cpu_avx2
+        build_cuda
+        build_rocm
+    }

    cleanup
    write-host "`ngo generate completed.  LLM runners: $(get-childitem -path $script:DIST_BASE)"
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -4,6 +4,7 @@ package llm
 // #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/libllama.a -lstdc++
 // #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/libllama.a -lstdc++
 // #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
+// #cgo windows,arm64 LDFLAGS: ${SRCDIR}/build/windows/arm64_static/libllama.a -static -lstdc++
 // #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
 // #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
 // #include <stdlib.h>
--- a/macapp/src/app.tsx
+++ b/macapp/src/app.tsx
@@ -19,7 +19,7 @@ export default function () {
  const [step, setStep] = useState<Step>(Step.WELCOME)
  const [commandCopied, setCommandCopied] = useState<boolean>(false)

-  const command = 'ollama run llama2'
+  const command = 'ollama run llama3'

  return (
    <div className='drag'>
--- a/scripts/build_windows.ps1
+++ b/scripts/build_windows.ps1
@@ -7,6 +7,8 @@
 $ErrorActionPreference = "Stop"

 function checkEnv() {
+    $script:TARGET_ARCH=$Env:PROCESSOR_ARCHITECTURE.ToLower()
+    Write-host "Building for ${script:TARGET_ARCH}"
    write-host "Locating required tools and paths"
    $script:SRC_DIR=$PWD
    if (!$env:VCToolsRedistDir) {
@@ -30,7 +32,7 @@ function checkEnv() {
    
    $script:INNO_SETUP_DIR=(get-item "C:\Program Files*\Inno Setup*\")[0]

-    $script:DEPS_DIR="${script:SRC_DIR}\dist\windows-amd64"
+    $script:DEPS_DIR="${script:SRC_DIR}\dist\windows-${script:TARGET_ARCH}"
    $env:CGO_ENABLED="1"
    echo "Checking version"
    if (!$env:VERSION) {
@@ -81,8 +83,8 @@ function buildOllama() {
            /csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe
        if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
    }
-    New-Item -ItemType Directory -Path .\dist\windows-amd64\ -Force
-    cp .\ollama.exe .\dist\windows-amd64\
+    New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\ -Force
+    cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\
 }

 function buildApp() {
@@ -127,16 +129,16 @@ function buildInstaller() {
    cd "${script:SRC_DIR}\app"
    $env:PKG_VERSION=$script:PKG_VERSION
    if ("${env:KEY_CONTAINER}") {
-        & "${script:INNO_SETUP_DIR}\ISCC.exe" /SMySignTool="${script:SignTool} sign /fd sha256 /t http://timestamp.digicert.com /f ${script:OLLAMA_CERT} /csp `$qGoogle Cloud KMS Provider`$q /kc ${env:KEY_CONTAINER} `$f" .\ollama.iss
+        & "${script:INNO_SETUP_DIR}\ISCC.exe" /DARCH=$script:TARGET_ARCH /SMySignTool="${script:SignTool} sign /fd sha256 /t http://timestamp.digicert.com /f ${script:OLLAMA_CERT} /csp `$qGoogle Cloud KMS Provider`$q /kc ${env:KEY_CONTAINER} `$f" .\ollama.iss
    } else {
-        & "${script:INNO_SETUP_DIR}\ISCC.exe" .\ollama.iss
+        & "${script:INNO_SETUP_DIR}\ISCC.exe" /DARCH=$script:TARGET_ARCH .\ollama.iss
    }
    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
 }

 function distZip() {
-    write-host "Generating stand-alone distribution zip file ${script:SRC_DIR}\dist\ollama-windows-amd64.zip"
-    Compress-Archive -Path "${script:SRC_DIR}\dist\windows-amd64\*" -DestinationPath "${script:SRC_DIR}\dist\ollama-windows-amd64.zip" -Force
+    write-host "Generating stand-alone distribution zip file ${script:SRC_DIR}\dist\ollama-windows-${script:TARGET_ARCH}.zip"
+    Compress-Archive -Path "${script:SRC_DIR}\dist\windows-${script:TARGET_ARCH}\*" -DestinationPath "${script:SRC_DIR}\dist\ollama-windows-${script:TARGET_ARCH}.zip" -Force
 }

 try {
--- a/server/images.go
+++ b/server/images.go
@@ -710,6 +710,10 @@ func CopyModel(src, dst model.Name) error {
 		return model.Unqualified(src)
 	}

+	if src.Filepath() == dst.Filepath() {
+		return nil
+	}
+
 	manifests, err := GetManifestPath()
 	if err != nil {
 		return err
--- a/server/sched.go
+++ b/server/sched.go
@@ -149,6 +149,14 @@ func (s *Scheduler) processPending(ctx context.Context) {
 						break
 					}

+					// If we're CPU only mode, just limit by loadedMax above
+					// TODO handle system memory exhaustion
+					if (len(gpus) == 1 && gpus[0].Library == "cpu") || pending.opts.NumGPU == 0 {
+						slog.Debug("cpu mode with existing models, loading")
+						s.loadFn(pending, ggml, gpus)
+						break
+					}
+
 					// No models loaded. Load the model but prefer the best fit.
 					if loadedCount == 0 {
 						slog.Debug("loading first model", "model", pending.model.ModelPath)
--- a/server/sched_test.go
+++ b/server/sched_test.go
@@ -28,19 +28,33 @@ func TestInitScheduler(t *testing.T) {
 	ctx, done := context.WithCancel(context.Background())
 	defer done()
 	initialMax := loadedMax
+	initialParallel := numParallel
 	s := InitScheduler(ctx)
 	require.Equal(t, initialMax, loadedMax)
+	s.loadedMu.Lock()
 	require.NotNil(t, s.loaded)
+	s.loadedMu.Unlock()

 	os.Setenv("OLLAMA_MAX_LOADED_MODELS", "blue")
 	s = InitScheduler(ctx)
 	require.Equal(t, initialMax, loadedMax)
+	s.loadedMu.Lock()
 	require.NotNil(t, s.loaded)
+	s.loadedMu.Unlock()

 	os.Setenv("OLLAMA_MAX_LOADED_MODELS", "0")
 	s = InitScheduler(ctx)
 	require.Equal(t, 0, loadedMax)
+	s.loadedMu.Lock()
 	require.NotNil(t, s.loaded)
+	s.loadedMu.Unlock()
+
+	os.Setenv("OLLAMA_NUM_PARALLEL", "blue")
+	_ = InitScheduler(ctx)
+	require.Equal(t, initialParallel, numParallel)
+	os.Setenv("OLLAMA_NUM_PARALLEL", "10")
+	_ = InitScheduler(ctx)
+	require.Equal(t, 10, numParallel)
 }

 func TestLoad(t *testing.T) {
@@ -51,6 +65,7 @@ func TestLoad(t *testing.T) {
 	req := &LlmRequest{
 		ctx:             ctx,
 		model:           &Model{ModelPath: "foo"},
+		opts:            api.DefaultOptions(),
 		successCh:       make(chan *runnerRef, 1),
 		errCh:           make(chan error, 1),
 		sessionDuration: 2,
@@ -63,7 +78,9 @@ func TestLoad(t *testing.T) {
 	s.load(req, ggml, gpus)
 	require.Len(t, req.successCh, 0)
 	require.Len(t, req.errCh, 1)
+	s.loadedMu.Lock()
 	require.Len(t, s.loaded, 0)
+	s.loadedMu.Unlock()
 	err := <-req.errCh
 	require.Contains(t, err.Error(), "this model may be incompatible")

@@ -78,7 +95,9 @@ func TestLoad(t *testing.T) {
 	case resp := <-req.successCh:
 		require.Equal(t, uint64(10), resp.estimatedVRAM)
 		require.Equal(t, uint(1), resp.refCount)
+		s.loadedMu.Lock()
 		require.Len(t, s.loaded, 1)
+		s.loadedMu.Unlock()
 	}

 	req.model.ModelPath = "dummy_model_path"
@@ -90,7 +109,9 @@ func TestLoad(t *testing.T) {
 	case resp := <-req.successCh:
 		t.Errorf("unexpected success %v", resp)
 	}
+	s.loadedMu.Lock()
 	runner := s.loaded["dummy_model_path"]
+	s.loadedMu.Unlock()
 	require.NotNil(t, runner)
 	require.Equal(t, uint(0), runner.refCount)
 	time.Sleep(1 * time.Millisecond)
@@ -143,6 +164,7 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
 	scenario.req = &LlmRequest{
 		ctx:             scenario.ctx,
 		model:           model,
+		opts:            api.DefaultOptions(),
 		sessionDuration: 5 * time.Millisecond,
 		successCh:       make(chan *runnerRef, 1),
 		errCh:           make(chan error, 1),
@@ -171,7 +193,9 @@ func TestRequests(t *testing.T) {
 	// Multiple loaded models
 	scenario3a := newScenario(t, ctx, "ollama-model-3a", 1*format.GigaByte)
 	scenario3b := newScenario(t, ctx, "ollama-model-3b", 24*format.GigaByte)
-	scenario3c := newScenario(t, ctx, "ollama-model-3c", 30) // Needs prior unloaded
+	scenario3c := newScenario(t, ctx, "ollama-model-4a", 30)
+	scenario3c.req.opts.NumGPU = 0                           // CPU load, will be allowed
+	scenario3d := newScenario(t, ctx, "ollama-model-3c", 30) // Needs prior unloaded

 	s := InitScheduler(ctx)
 	s.getGpuFn = func() gpu.GpuInfoList {
@@ -240,7 +264,9 @@ func TestRequests(t *testing.T) {
 	case <-ctx.Done():
 		t.Errorf("timeout")
 	}
+	s.loadedMu.Lock()
 	require.Len(t, s.loaded, 1)
+	s.loadedMu.Unlock()

 	loadedMax = 0
 	s.newServerFn = scenario3b.newServer
@@ -254,19 +280,14 @@ func TestRequests(t *testing.T) {
 	case <-ctx.Done():
 		t.Errorf("timeout")
 	}
+	s.loadedMu.Lock()
 	require.Len(t, s.loaded, 2)
+	s.loadedMu.Unlock()

-	// Try to load a model that wont fit
+	// This is a CPU load with NumGPU = 0 so it should load
 	s.newServerFn = scenario3c.newServer
 	slog.Info("scenario3c")
-	require.Len(t, s.loaded, 2)
-	scenario3a.ctxDone() // Won't help since this one isn't big enough to make room
-	time.Sleep(2 * time.Millisecond)
 	s.pendingReqCh <- scenario3c.req
-	// finish prior request, so new model can load
-	time.Sleep(6 * time.Millisecond)
-	require.Len(t, s.loaded, 1)
-	scenario3b.ctxDone()
 	select {
 	case resp := <-scenario3c.req.successCh:
 		require.Equal(t, resp.llama, scenario3c.srv)
@@ -275,7 +296,36 @@ func TestRequests(t *testing.T) {
 	case <-ctx.Done():
 		t.Errorf("timeout")
 	}
-	require.Len(t, s.loaded, 1)
+	s.loadedMu.Lock()
+	require.Len(t, s.loaded, 3)
+	s.loadedMu.Unlock()
+
+	// Try to load a model that wont fit
+	s.newServerFn = scenario3d.newServer
+	slog.Info("scenario3d")
+	s.loadedMu.Lock()
+	require.Len(t, s.loaded, 3)
+	s.loadedMu.Unlock()
+	scenario3a.ctxDone() // Won't help since this one isn't big enough to make room
+	time.Sleep(2 * time.Millisecond)
+	s.pendingReqCh <- scenario3d.req
+	// finish prior request, so new model can load
+	time.Sleep(6 * time.Millisecond)
+	s.loadedMu.Lock()
+	require.Len(t, s.loaded, 2)
+	s.loadedMu.Unlock()
+	scenario3b.ctxDone()
+	select {
+	case resp := <-scenario3d.req.successCh:
+		require.Equal(t, resp.llama, scenario3d.srv)
+		require.Len(t, s.pendingReqCh, 0)
+		require.Len(t, scenario3d.req.errCh, 0)
+	case <-ctx.Done():
+		t.Errorf("timeout")
+	}
+	s.loadedMu.Lock()
+	require.Len(t, s.loaded, 2)
+	s.loadedMu.Unlock()
 }

 func TestGetRunner(t *testing.T) {
@@ -318,7 +368,9 @@ func TestGetRunner(t *testing.T) {
 		t.Errorf("timeout")
 	}
 	scenario1a.ctxDone()
+	s.loadedMu.Lock()
 	require.Len(t, s.loaded, 1)
+	s.loadedMu.Unlock()

 	scenario1c.req.model.ModelPath = "bad path"
 	slog.Info("scenario1c")
@@ -328,7 +380,9 @@ func TestGetRunner(t *testing.T) {
 	require.Len(t, errCh1c, 0)

 	time.Sleep(5 * time.Millisecond)
+	s.loadedMu.Lock()
 	require.Len(t, s.loaded, 0)
+	s.loadedMu.Unlock()
 	require.Len(t, errCh1c, 1)
 	err = <-errCh1c
 	require.Contains(t, err.Error(), "bad path")
@@ -358,7 +412,9 @@ func TestPrematureExpired(t *testing.T) {
 		require.Equal(t, resp.llama, scenario1a.srv)
 		require.Len(t, s.pendingReqCh, 0)
 		require.Len(t, errCh1a, 0)
+		s.loadedMu.Lock()
 		require.Len(t, s.loaded, 1)
+		s.loadedMu.Unlock()
 		slog.Info("sending premature expired event now")
 		s.expiredCh <- resp // Shouldn't happen in real life, but make sure its safe
 	case <-ctx.Done():
@@ -383,6 +439,7 @@ func TestUseLoadedRunner(t *testing.T) {
 	ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
 	req := &LlmRequest{
 		ctx:             ctx,
+		opts:            api.DefaultOptions(),
 		successCh:       make(chan *runnerRef, 1),
 		sessionDuration: 2,
 	}
@@ -426,8 +483,10 @@ func TestUpdateFreeSpace(t *testing.T) {
 	r2 := &runnerRef{llama: llm2, gpus: gpus}

 	s := InitScheduler(ctx)
+	s.loadedMu.Lock()
 	s.loaded["a"] = r1
 	s.loaded["b"] = r2
+	s.loadedMu.Unlock()

 	s.updateFreeSpace(gpus)
 	require.Equal(t, uint64(850), gpus[0].FreeMemory)
@@ -437,13 +496,18 @@ func TestUpdateFreeSpace(t *testing.T) {
 func TestFindRunnerToUnload(t *testing.T) {
 	ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
 	defer done()
-	req := &LlmRequest{ctx: ctx}
+	req := &LlmRequest{
+		ctx:  ctx,
+		opts: api.DefaultOptions(),
+	}
 	r1 := &runnerRef{refCount: 1, sessionDuration: 1}
 	r2 := &runnerRef{sessionDuration: 2}

 	s := InitScheduler(ctx)
+	s.loadedMu.Lock()
 	s.loaded["a"] = r1
 	s.loaded["b"] = r2
+	s.loadedMu.Unlock()

 	resp := s.findRunnerToUnload(req)
 	require.Equal(t, r2, resp)
@@ -458,10 +522,11 @@ func TestNeedsReload(t *testing.T) {
 	defer done()

 	llm := &mockLlm{}
+	do := api.DefaultOptions()
 	runner := &runnerRef{
 		adapters:   []string{"adapter1"},
 		projectors: []string{"projector1"},
-		Options:    &api.Options{},
+		Options:    &do,
 		llama:      llm,
 	}
 	req := &LlmRequest{
@@ -469,7 +534,7 @@ func TestNeedsReload(t *testing.T) {
 			AdapterPaths:   []string{"adapter2"},
 			ProjectorPaths: []string{"projector2"},
 		},
-		opts: api.Options{},
+		opts: api.DefaultOptions(),
 	}
 	resp := runner.needsReload(ctx, req)
 	require.True(t, resp)
@@ -508,8 +573,10 @@ func TestUnloadAllRunners(t *testing.T) {
 	r1 := &runnerRef{llama: llm1}
 	r2 := &runnerRef{llama: llm2}

+	s.loadedMu.Lock()
 	s.loaded["a"] = r1
 	s.loaded["b"] = r2
+	s.loadedMu.Unlock()
 	s.unloadAllRunners()

 	require.True(t, llm1.closeCalled)
--- a/types/model/name.go
+++ b/types/model/name.go
@@ -4,7 +4,6 @@ package model

 import (
 	"cmp"
-	"encoding/hex"
 	"errors"
 	"fmt"
 	"log/slog"
@@ -81,9 +80,6 @@ func (k partKind) String() string {
 //
 // It is not guaranteed to be valid. Use [Name.IsValid] to check if the name
 // is valid.
-//
-// It is not directly comparable with other Names. Use [Name.Equal] and
-// [Name.MapHash] for determining equality and using as a map key.
 type Name struct {
 	Host      string
 	Namespace string
@@ -110,20 +106,20 @@ type Name struct {
 //		  { model }
 //		  "@" { digest }
 //	  host:
-//	      pattern: alphanum { alphanum | "-" | "_" | "." | ":" }*
+//	      pattern: { alphanum | "_" } { alphanum | "-" | "_" | "." | ":" }*
 //	      length:  [1, 350]
 //	  namespace:
-//	      pattern: alphanum { alphanum | "-" | "_" }*
-//	      length:  [2, 80]
+//	      pattern: { alphanum | "_" } { alphanum | "-" | "_" }*
+//	      length:  [1, 80]
 //	  model:
-//	      pattern: alphanum { alphanum | "-" | "_" | "." }*
-//	      length:  [2, 80]
+//	      pattern: { alphanum | "_" } { alphanum | "-" | "_" | "." }*
+//	      length:  [1, 80]
 //	  tag:
-//	      pattern: alphanum { alphanum | "-" | "_" | "." }*
+//	      pattern: { alphanum | "_" } { alphanum | "-" | "_" | "." }*
 //	      length:  [1, 80]
 //	  digest:
-//	      pattern: alphanum { alphanum | "-" | ":" }*
-//	      length:  [2, 80]
+//	      pattern: { alphanum | "_" } { alphanum | "-" | ":" }*
+//	      length:  [1, 80]
 //
 // Most users should use [ParseName] instead, unless need to support
 // different defaults than DefaultName.
@@ -171,11 +167,6 @@ func Merge(a, b Name) Name {
 	return a
 }

-// Digest returns the result of [ParseDigest] with the RawDigest field.
-func (n Name) Digest() Digest {
-	return ParseDigest(n.RawDigest)
-}
-
 // String returns the name string, in the format that [ParseNameNoDefaults]
 // accepts as valid, if [Name.IsValid] reports true; otherwise the empty
 // string is returned.
@@ -204,7 +195,7 @@ func (n Name) String() string {
 // IsValid reports whether all parts of the name are present and valid. The
 // digest is a special case, and is checked for validity only if present.
 func (n Name) IsValid() bool {
-	if n.RawDigest != "" && !ParseDigest(n.RawDigest).IsValid() {
+	if n.RawDigest != "" && !isValidPart(kindDigest, n.RawDigest) {
 		return false
 	}
 	return n.IsFullyQualified()
@@ -260,7 +251,7 @@ func isValidLen(kind partKind, s string) bool {
 	case kindTag:
 		return len(s) >= 1 && len(s) <= 80
 	default:
-		return len(s) >= 2 && len(s) <= 80
+		return len(s) >= 1 && len(s) <= 80
 	}
 }

@@ -270,7 +261,7 @@ func isValidPart(kind partKind, s string) bool {
 	}
 	for i := range s {
 		if i == 0 {
-			if !isAlphanumeric(s[i]) {
+			if !isAlphanumericOrUnderscore(s[i]) {
 				return false
 			}
 			continue
@@ -282,11 +273,11 @@ func isValidPart(kind partKind, s string) bool {
 				return false
 			}
 		case ':':
-			if kind != kindHost {
+			if kind != kindHost && kind != kindDigest {
 				return false
 			}
 		default:
-			if !isAlphanumeric(s[i]) {
+			if !isAlphanumericOrUnderscore(s[i]) {
 				return false
 			}
 		}
@@ -294,8 +285,8 @@ func isValidPart(kind partKind, s string) bool {
 	return true
 }

-func isAlphanumeric(c byte) bool {
-	return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c >= '0' && c <= '9'
+func isAlphanumericOrUnderscore(c byte) bool {
+	return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c >= '0' && c <= '9' || c == '_'
 }

 func cutLast(s, sep string) (before, after string, ok bool) {
@@ -317,75 +308,3 @@ func cutPromised(s, sep string) (before, after string, ok bool) {
 	}
 	return cmp.Or(before, MissingPart), cmp.Or(after, MissingPart), true
 }
-
-type DigestType int
-
-const (
-	DigestTypeInvalid DigestType = iota
-	DigestTypeSHA256
-)
-
-func (t DigestType) String() string {
-	if t == DigestTypeSHA256 {
-		return "sha256"
-	}
-	return "unknown"
-}
-
-// Digest represents a type and hash of a digest. It is comparable and can
-// be used as a map key.
-type Digest struct {
-	Type DigestType
-	Hash [32]byte
-}
-
-// ParseDigest parses a digest string into a Digest struct. It accepts both
-// the forms:
-//
-//	sha256:deadbeef
-//	sha256-deadbeef
-//
-// The hash part must be exactly 64 characters long.
-//
-// The form "type:hash" does not round trip through [Digest.String].
-func ParseDigest(s string) Digest {
-	typ, hash, ok := cutLast(s, ":")
-	if !ok {
-		typ, hash, ok = cutLast(s, "-")
-		if !ok {
-			return Digest{}
-		}
-	}
-	if typ != "sha256" {
-		return Digest{}
-	}
-	var d Digest
-	n, err := hex.Decode(d.Hash[:], []byte(hash))
-	if err != nil || n != 32 {
-		return Digest{}
-	}
-	return Digest{Type: DigestTypeSHA256, Hash: d.Hash}
-}
-
-// IsValid returns true if the digest has a valid Type and Hash.
-func (d Digest) IsValid() bool {
-	if d.Type != DigestTypeSHA256 {
-		return false
-	}
-	return d.Hash != [32]byte{}
-}
-
-// String returns the digest as a string in the form "type-hash". The hash
-// is encoded as a hex string.
-func (d Digest) String() string {
-	var b strings.Builder
-	b.WriteString(d.Type.String())
-	b.WriteByte('-')
-	b.WriteString(hex.EncodeToString(d.Hash[:]))
-	return b.String()
-}
-
-// LogValue returns a slog.Value that represents the digest as a string.
-func (d Digest) LogValue() slog.Value {
-	return slog.StringValue(d.String())
-}
--- a/types/model/name_test.go
+++ b/types/model/name_test.go
@@ -2,7 +2,6 @@ package model

 import (
 	"reflect"
-	"strings"
 	"testing"
 )

@@ -82,10 +81,10 @@ func TestParseNameParts(t *testing.T) {
 			wantValidDigest: false,
 		},
 		{
-			in: "model@sha256:" + validSHA256Hex,
+			in: "model@sha256:123",
 			want: Name{
 				Model:     "model",
-				RawDigest: "sha256:" + validSHA256Hex,
+				RawDigest: "sha256:123",
 			},
 			wantValidDigest: true,
 		},
@@ -97,14 +96,18 @@ func TestParseNameParts(t *testing.T) {
 			if !reflect.DeepEqual(got, tt.want) {
 				t.Errorf("parseName(%q) = %v; want %v", tt.in, got, tt.want)
 			}
-			if got.Digest().IsValid() != tt.wantValidDigest {
-				t.Errorf("parseName(%q).Digest().IsValid() = %v; want %v", tt.in, got.Digest().IsValid(), tt.wantValidDigest)
-			}
 		})
 	}
 }

 var testCases = map[string]bool{ // name -> valid
+	"": false,
+
+	"_why/_the/_lucky:_stiff": true,
+
+	// minimal
+	"h/n/m:t@d": true,
+
 	"host/namespace/model:tag": true,
 	"host/namespace/model":     false,
 	"namespace/model":          false,
@@ -120,11 +123,12 @@ var testCases = map[string]bool{ // name -> valid
 	"h/nn/mm:t@sha256-1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes
 	"h/nn/mm:t@sha256:1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes

-	"m":        false, // model too short
-	"n/mm:":    false, // namespace too short
-	"h/n/mm:t": false, // namespace too short
-	"@t":       false, // digest too short
-	"mm@d":     false, // digest too short
+	// unqualified
+	"m":     false,
+	"n/m:":  false,
+	"h/n/m": false,
+	"@t":    false,
+	"m@d":   false,

 	// invalids
 	"^":      false,
@@ -144,8 +148,6 @@ var testCases = map[string]bool{ // name -> valid
 	"hh/nn/mm:-tt@dd": false,
 	"hh/nn/mm:tt@-dd": false,

-	"": false,
-
 	// hosts
 	"host:https/namespace/model:tag": true,

@@ -167,7 +169,6 @@ func TestNameIsValid(t *testing.T) {
 	var numStringTests int
 	for s, want := range testCases {
 		n := ParseNameBare(s)
-		t.Logf("n: %#v", n)
 		got := n.IsValid()
 		if got != want {
 			t.Errorf("parseName(%q).IsValid() = %v; want %v", s, got, want)
@@ -239,57 +240,3 @@ func FuzzName(f *testing.F) {

 	})
 }
-
-const validSHA256Hex = "abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789"
-
-func TestParseDigest(t *testing.T) {
-	cases := map[string]bool{
-		"sha256-1000000000000000000000000000000000000000000000000000000000000000": true,
-		"sha256:1000000000000000000000000000000000000000000000000000000000000000": true,
-		"sha256:0000000000000000000000000000000000000000000000000000000000000000": false,
-
-		"sha256:" + validSHA256Hex: true,
-		"sha256-" + validSHA256Hex: true,
-
-		"":                               false,
-		"sha134:" + validSHA256Hex:       false,
-		"sha256:" + validSHA256Hex + "x": false,
-		"sha256:x" + validSHA256Hex:      false,
-		"sha256-" + validSHA256Hex + "x": false,
-		"sha256-x":                       false,
-	}
-
-	for s, want := range cases {
-		t.Run(s, func(t *testing.T) {
-			d := ParseDigest(s)
-			if d.IsValid() != want {
-				t.Errorf("ParseDigest(%q).IsValid() = %v; want %v", s, d.IsValid(), want)
-			}
-			norm := strings.ReplaceAll(s, ":", "-")
-			if d.IsValid() && d.String() != norm {
-				t.Errorf("ParseDigest(%q).String() = %q; want %q", s, d.String(), norm)
-			}
-		})
-	}
-}
-
-func TestDigestString(t *testing.T) {
-	cases := []struct {
-		in   string
-		want string
-	}{
-		{in: "sha256:" + validSHA256Hex, want: "sha256-" + validSHA256Hex},
-		{in: "sha256-" + validSHA256Hex, want: "sha256-" + validSHA256Hex},
-		{in: "", want: "unknown-0000000000000000000000000000000000000000000000000000000000000000"},
-		{in: "blah-100000000000000000000000000000000000000000000000000000000000000", want: "unknown-0000000000000000000000000000000000000000000000000000000000000000"},
-	}
-
-	for _, tt := range cases {
-		t.Run(tt.in, func(t *testing.T) {
-			d := ParseDigest(tt.in)
-			if d.String() != tt.want {
-				t.Errorf("ParseDigest(%q).String() = %q; want %q", tt.in, d.String(), tt.want)
-			}
-		})
-	}
-}
--- a/types/structs/structs.go
+++ b/types/structs/structs.go
@@ -1,15 +0,0 @@
-// Copyright (c) Tailscale Inc & AUTHORS
-// SPDX-License-Identifier: BSD-3-Clause
-
-// Package structs contains the Incomparable type.
-package structs
-
-// Incomparable is a zero-width incomparable type. If added as the
-// first field in a struct, it marks that struct as not comparable
-// (can't do == or be a map key) and usually doesn't add any width to
-// the struct (unless the struct has only small fields).
-//
-// By making a struct incomparable, you can prevent misuse (prevent
-// people from using ==), but also you can shrink generated binaries,
-// as the compiler can omit equality funcs from the binary.
-type Incomparable [0]func()
Author	SHA1	Message	Date
jmorganca	a5a60e4a75	use If-None-match header for github releases	2024-04-29 09:25:32 -04:00
Blake Mizerany	7e432cdfac	types/model: remove old comment (#4020 )	2024-04-28 20:52:26 -07:00
Jeffrey Morgan	586672f490	fix copying model to itself (#4019 )	2024-04-28 23:47:49 -04:00
Daniel Hiltgen	b03408de74	Merge pull request #3972 from hmartinez82/win_arm64 Add support for building on Windows ARM64	2024-04-28 14:52:58 -07:00
Daniel Hiltgen	1e6a28bf5b	Merge pull request #4009 from dhiltgen/cpu_concurrency Fix concurrency for CPU mode	2024-04-28 14:20:27 -07:00
Daniel Hiltgen	d6e3b64582	Fix concurrency for CPU mode Prior refactoring passes accidentally removed the logic to bypass VRAM checks for CPU loads. This adds that back, along with test coverage. This also fixes loaded map access in the unit test to be behind the mutex which was likely the cause of various flakes in the tests.	2024-04-28 13:42:39 -07:00
Blake Mizerany	114c932a8e	types/model: allow _ as starter character in Name parts (#3991 )	2024-04-27 21:24:52 -07:00
Jeffrey Morgan	7f7103de06	mac: update setup command to `llama3` (#3986 )	2024-04-27 22:52:10 -04:00
Blake Mizerany	c631a9c726	types/model: relax name length constraint from 2 to 1 (#3984 )	2024-04-27 17:58:41 -07:00
Blake Mizerany	8fd9e56804	types/structs: drop unused structs package (#3981 )	2024-04-27 14:06:11 -07:00
Hernan Martinez	8a65717f55	Do not build AVX runners on ARM64	2024-04-26 23:55:32 -06:00
Hernan Martinez	6d3152a98a	Use architecture specific folders in installer script	2024-04-26 23:35:16 -06:00
Hernan Martinez	b438d485f1	Use architecture specific folders in the generate script	2024-04-26 23:34:12 -06:00
Hernan Martinez	204349b17b	Use architecture specific folders in the build script	2024-04-26 23:26:03 -06:00
Hernan Martinez	86e67fc4a9	Add import declaration for windows,arm64 to llm.go	2024-04-26 23:23:53 -06:00
Blake Mizerany	2bed62926e	types/model: remove Digest (for now) (#3970 ) The Digest type needs more thought and is not necessary at the moment.	2024-04-26 21:14:28 -07:00
Jeffrey Morgan	aad8d128a0	also look at cwd as a root for windows runners (#3959 )	2024-04-26 19:14:08 -04:00