Compare commits
17 Commits
v0.1.33-rc
...
jmorganca/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a5a60e4a75 | ||
|
|
7e432cdfac | ||
|
|
586672f490 | ||
|
|
b03408de74 | ||
|
|
1e6a28bf5b | ||
|
|
d6e3b64582 | ||
|
|
114c932a8e | ||
|
|
7f7103de06 | ||
|
|
c631a9c726 | ||
|
|
8fd9e56804 | ||
|
|
8a65717f55 | ||
|
|
6d3152a98a | ||
|
|
b438d485f1 | ||
|
|
204349b17b | ||
|
|
86e67fc4a9 | ||
|
|
2bed62926e | ||
|
|
aad8d128a0 |
@@ -8,7 +8,6 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"mime"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
@@ -70,7 +69,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
|
||||
req.Header.Set("Authorization", signature)
|
||||
req.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
|
||||
|
||||
slog.Debug("checking for available update", "requestURL", requestURL)
|
||||
slog.Info("checking for available update", "requestURL", requestURL)
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to check for update: %s", err))
|
||||
@@ -104,60 +103,63 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
|
||||
}
|
||||
|
||||
func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
|
||||
// Do a head first to check etag info
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodHead, updateResp.UpdateURL, nil)
|
||||
etagfilename := filepath.Join(UpdateStageDir, updateResp.UpdateVersion, "etag")
|
||||
|
||||
// parse the base of the url for the filename
|
||||
u, err := url.Parse(updateResp.UpdateURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not parse update url: %w", err)
|
||||
}
|
||||
|
||||
var etag string
|
||||
filename := filepath.Join(UpdateStageDir, updateResp.UpdateVersion, filepath.Base(u.Path))
|
||||
_, err = os.Stat(filename)
|
||||
if err == nil {
|
||||
file, err := os.Open(etagfilename)
|
||||
if err == nil {
|
||||
content, err := io.ReadAll(file)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not read etag file: %w", err)
|
||||
}
|
||||
|
||||
etag = string(content)
|
||||
file.Close()
|
||||
}
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, updateResp.UpdateURL, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if etag != "" {
|
||||
req.Header.Set("If-None-Match", "\""+etag+"\"")
|
||||
}
|
||||
|
||||
slog.Info("sending update request", "url", req.URL.String())
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error checking update: %w", err)
|
||||
}
|
||||
if resp.StatusCode != 200 {
|
||||
return fmt.Errorf("unexpected status attempting to download update %d", resp.StatusCode)
|
||||
}
|
||||
resp.Body.Close()
|
||||
etag := strings.Trim(resp.Header.Get("etag"), "\"")
|
||||
if etag == "" {
|
||||
slog.Debug("no etag detected, falling back to filename based dedup")
|
||||
etag = "_"
|
||||
}
|
||||
filename := Installer
|
||||
_, params, err := mime.ParseMediaType(resp.Header.Get("content-disposition"))
|
||||
if err == nil {
|
||||
filename = params["filename"]
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
stageFilename := filepath.Join(UpdateStageDir, etag, filename)
|
||||
|
||||
// Check to see if we already have it downloaded
|
||||
_, err = os.Stat(stageFilename)
|
||||
if err == nil {
|
||||
if resp.StatusCode == 304 {
|
||||
slog.Info("update already downloaded")
|
||||
return nil
|
||||
}
|
||||
|
||||
slog.Info("got status", "status", resp.StatusCode)
|
||||
if resp.StatusCode != 200 {
|
||||
return fmt.Errorf("unexpected status attempting to download update %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
cleanupOldDownloads()
|
||||
|
||||
req.Method = http.MethodGet
|
||||
resp, err = http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error checking update: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
etag = strings.Trim(resp.Header.Get("etag"), "\"")
|
||||
if etag == "" {
|
||||
slog.Debug("no etag detected, falling back to filename based dedup") // TODO probably can get rid of this redundant log
|
||||
etag = "_"
|
||||
}
|
||||
|
||||
stageFilename = filepath.Join(UpdateStageDir, etag, filename)
|
||||
|
||||
_, err = os.Stat(filepath.Dir(stageFilename))
|
||||
// Create the directory for the update
|
||||
_, err = os.Stat(filepath.Dir(filename))
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
if err := os.MkdirAll(filepath.Dir(stageFilename), 0o755); err != nil {
|
||||
return fmt.Errorf("create ollama dir %s: %v", filepath.Dir(stageFilename), err)
|
||||
if err := os.MkdirAll(filepath.Dir(filename), 0o755); err != nil {
|
||||
return fmt.Errorf("create update dir %s: %v", filepath.Dir(filename), err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -165,17 +167,33 @@ func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read body response: %w", err)
|
||||
}
|
||||
fp, err := os.OpenFile(stageFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
|
||||
fp, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("write payload %s: %w", stageFilename, err)
|
||||
return fmt.Errorf("write payload %s: %w", filename, err)
|
||||
}
|
||||
defer fp.Close()
|
||||
if n, err := fp.Write(payload); err != nil || n != len(payload) {
|
||||
return fmt.Errorf("write payload %s: %d vs %d -- %w", stageFilename, n, len(payload), err)
|
||||
return fmt.Errorf("write payload %s: %d vs %d -- %w", filename, n, len(payload), err)
|
||||
}
|
||||
slog.Info("new update downloaded " + stageFilename)
|
||||
|
||||
etag = strings.Trim(resp.Header.Get("etag"), "\"")
|
||||
if etag != "" {
|
||||
file, err := os.OpenFile(etagfilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not open etag file for writing: %w", err)
|
||||
}
|
||||
|
||||
if _, err := file.Write([]byte(etag)); err != nil {
|
||||
return fmt.Errorf("could not write etag file: %w", err)
|
||||
}
|
||||
|
||||
file.Close()
|
||||
}
|
||||
|
||||
slog.Info("new update downloaded " + filename)
|
||||
|
||||
UpdateDownloaded = true
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -88,8 +88,8 @@ DialogFontSize=12
|
||||
[Files]
|
||||
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
|
||||
Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
|
||||
Source: "..\dist\windows-amd64\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
|
||||
Source: "..\dist\windows-amd64\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
|
||||
Source: "..\dist\windows-{#ARCH}\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
|
||||
Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
|
||||
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
|
||||
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
|
||||
#if DirExists("..\dist\windows-amd64\rocm")
|
||||
|
||||
@@ -32,9 +32,25 @@ func PayloadsDir() (string, error) {
|
||||
slog.Error("failed to lookup executable path", "error", err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
cwd, err := os.Getwd()
|
||||
if err != nil {
|
||||
slog.Error("failed to lookup working directory", "error", err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
var paths []string
|
||||
for _, root := range []string{appExe, cwd} {
|
||||
paths = append(paths,
|
||||
filepath.Join(root),
|
||||
filepath.Join(root, "windows-"+runtime.GOARCH),
|
||||
filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
|
||||
)
|
||||
}
|
||||
|
||||
// Try a few variations to improve developer experience when building from source in the local tree
|
||||
for _, d := range []string{".", "windows-" + runtime.GOARCH, "dist\\windows-" + runtime.GOARCH} {
|
||||
candidate := filepath.Join(filepath.Dir(appExe), d, "ollama_runners")
|
||||
for _, p := range paths {
|
||||
candidate := filepath.Join(p, "ollama_runners")
|
||||
_, err := os.Stat(candidate)
|
||||
if err == nil {
|
||||
runnersDir = candidate
|
||||
|
||||
@@ -42,7 +42,7 @@ function init_vars {
|
||||
"-DLLAMA_NATIVE=off"
|
||||
)
|
||||
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
|
||||
$script:ARCH = "amd64" # arm not yet supported.
|
||||
$script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower()
|
||||
$script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
|
||||
md "$script:DIST_BASE" -ea 0 > $null
|
||||
if ($env:CGO_CFLAGS -contains "-g") {
|
||||
@@ -213,11 +213,11 @@ function build_static() {
|
||||
}
|
||||
}
|
||||
|
||||
function build_cpu() {
|
||||
function build_cpu($gen_arch) {
|
||||
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
|
||||
# remaining llama.cpp builds use MSVC
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", $gen_arch, "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||
$script:buildDir="../build/windows/${script:ARCH}/cpu"
|
||||
$script:distDir="$script:DIST_BASE\cpu"
|
||||
write-host "Building LCD CPU"
|
||||
@@ -349,11 +349,15 @@ if ($($args.count) -eq 0) {
|
||||
git_module_setup
|
||||
apply_patches
|
||||
build_static
|
||||
build_cpu
|
||||
build_cpu_avx
|
||||
build_cpu_avx2
|
||||
build_cuda
|
||||
build_rocm
|
||||
if ($script:ARCH -eq "arm64") {
|
||||
build_cpu("ARM64")
|
||||
} else { # amd64
|
||||
build_cpu("x64")
|
||||
build_cpu_avx
|
||||
build_cpu_avx2
|
||||
build_cuda
|
||||
build_rocm
|
||||
}
|
||||
|
||||
cleanup
|
||||
write-host "`ngo generate completed. LLM runners: $(get-childitem -path $script:DIST_BASE)"
|
||||
|
||||
@@ -4,6 +4,7 @@ package llm
|
||||
// #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/libllama.a -lstdc++
|
||||
// #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/libllama.a -lstdc++
|
||||
// #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
|
||||
// #cgo windows,arm64 LDFLAGS: ${SRCDIR}/build/windows/arm64_static/libllama.a -static -lstdc++
|
||||
// #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
|
||||
// #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
|
||||
// #include <stdlib.h>
|
||||
|
||||
@@ -19,7 +19,7 @@ export default function () {
|
||||
const [step, setStep] = useState<Step>(Step.WELCOME)
|
||||
const [commandCopied, setCommandCopied] = useState<boolean>(false)
|
||||
|
||||
const command = 'ollama run llama2'
|
||||
const command = 'ollama run llama3'
|
||||
|
||||
return (
|
||||
<div className='drag'>
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
function checkEnv() {
|
||||
$script:TARGET_ARCH=$Env:PROCESSOR_ARCHITECTURE.ToLower()
|
||||
Write-host "Building for ${script:TARGET_ARCH}"
|
||||
write-host "Locating required tools and paths"
|
||||
$script:SRC_DIR=$PWD
|
||||
if (!$env:VCToolsRedistDir) {
|
||||
@@ -30,7 +32,7 @@ function checkEnv() {
|
||||
|
||||
$script:INNO_SETUP_DIR=(get-item "C:\Program Files*\Inno Setup*\")[0]
|
||||
|
||||
$script:DEPS_DIR="${script:SRC_DIR}\dist\windows-amd64"
|
||||
$script:DEPS_DIR="${script:SRC_DIR}\dist\windows-${script:TARGET_ARCH}"
|
||||
$env:CGO_ENABLED="1"
|
||||
echo "Checking version"
|
||||
if (!$env:VERSION) {
|
||||
@@ -81,8 +83,8 @@ function buildOllama() {
|
||||
/csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
}
|
||||
New-Item -ItemType Directory -Path .\dist\windows-amd64\ -Force
|
||||
cp .\ollama.exe .\dist\windows-amd64\
|
||||
New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\ -Force
|
||||
cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\
|
||||
}
|
||||
|
||||
function buildApp() {
|
||||
@@ -127,16 +129,16 @@ function buildInstaller() {
|
||||
cd "${script:SRC_DIR}\app"
|
||||
$env:PKG_VERSION=$script:PKG_VERSION
|
||||
if ("${env:KEY_CONTAINER}") {
|
||||
& "${script:INNO_SETUP_DIR}\ISCC.exe" /SMySignTool="${script:SignTool} sign /fd sha256 /t http://timestamp.digicert.com /f ${script:OLLAMA_CERT} /csp `$qGoogle Cloud KMS Provider`$q /kc ${env:KEY_CONTAINER} `$f" .\ollama.iss
|
||||
& "${script:INNO_SETUP_DIR}\ISCC.exe" /DARCH=$script:TARGET_ARCH /SMySignTool="${script:SignTool} sign /fd sha256 /t http://timestamp.digicert.com /f ${script:OLLAMA_CERT} /csp `$qGoogle Cloud KMS Provider`$q /kc ${env:KEY_CONTAINER} `$f" .\ollama.iss
|
||||
} else {
|
||||
& "${script:INNO_SETUP_DIR}\ISCC.exe" .\ollama.iss
|
||||
& "${script:INNO_SETUP_DIR}\ISCC.exe" /DARCH=$script:TARGET_ARCH .\ollama.iss
|
||||
}
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
}
|
||||
|
||||
function distZip() {
|
||||
write-host "Generating stand-alone distribution zip file ${script:SRC_DIR}\dist\ollama-windows-amd64.zip"
|
||||
Compress-Archive -Path "${script:SRC_DIR}\dist\windows-amd64\*" -DestinationPath "${script:SRC_DIR}\dist\ollama-windows-amd64.zip" -Force
|
||||
write-host "Generating stand-alone distribution zip file ${script:SRC_DIR}\dist\ollama-windows-${script:TARGET_ARCH}.zip"
|
||||
Compress-Archive -Path "${script:SRC_DIR}\dist\windows-${script:TARGET_ARCH}\*" -DestinationPath "${script:SRC_DIR}\dist\ollama-windows-${script:TARGET_ARCH}.zip" -Force
|
||||
}
|
||||
|
||||
try {
|
||||
|
||||
@@ -710,6 +710,10 @@ func CopyModel(src, dst model.Name) error {
|
||||
return model.Unqualified(src)
|
||||
}
|
||||
|
||||
if src.Filepath() == dst.Filepath() {
|
||||
return nil
|
||||
}
|
||||
|
||||
manifests, err := GetManifestPath()
|
||||
if err != nil {
|
||||
return err
|
||||
|
||||
@@ -149,6 +149,14 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||
break
|
||||
}
|
||||
|
||||
// If we're CPU only mode, just limit by loadedMax above
|
||||
// TODO handle system memory exhaustion
|
||||
if (len(gpus) == 1 && gpus[0].Library == "cpu") || pending.opts.NumGPU == 0 {
|
||||
slog.Debug("cpu mode with existing models, loading")
|
||||
s.loadFn(pending, ggml, gpus)
|
||||
break
|
||||
}
|
||||
|
||||
// No models loaded. Load the model but prefer the best fit.
|
||||
if loadedCount == 0 {
|
||||
slog.Debug("loading first model", "model", pending.model.ModelPath)
|
||||
|
||||
@@ -28,19 +28,33 @@ func TestInitScheduler(t *testing.T) {
|
||||
ctx, done := context.WithCancel(context.Background())
|
||||
defer done()
|
||||
initialMax := loadedMax
|
||||
initialParallel := numParallel
|
||||
s := InitScheduler(ctx)
|
||||
require.Equal(t, initialMax, loadedMax)
|
||||
s.loadedMu.Lock()
|
||||
require.NotNil(t, s.loaded)
|
||||
s.loadedMu.Unlock()
|
||||
|
||||
os.Setenv("OLLAMA_MAX_LOADED_MODELS", "blue")
|
||||
s = InitScheduler(ctx)
|
||||
require.Equal(t, initialMax, loadedMax)
|
||||
s.loadedMu.Lock()
|
||||
require.NotNil(t, s.loaded)
|
||||
s.loadedMu.Unlock()
|
||||
|
||||
os.Setenv("OLLAMA_MAX_LOADED_MODELS", "0")
|
||||
s = InitScheduler(ctx)
|
||||
require.Equal(t, 0, loadedMax)
|
||||
s.loadedMu.Lock()
|
||||
require.NotNil(t, s.loaded)
|
||||
s.loadedMu.Unlock()
|
||||
|
||||
os.Setenv("OLLAMA_NUM_PARALLEL", "blue")
|
||||
_ = InitScheduler(ctx)
|
||||
require.Equal(t, initialParallel, numParallel)
|
||||
os.Setenv("OLLAMA_NUM_PARALLEL", "10")
|
||||
_ = InitScheduler(ctx)
|
||||
require.Equal(t, 10, numParallel)
|
||||
}
|
||||
|
||||
func TestLoad(t *testing.T) {
|
||||
@@ -51,6 +65,7 @@ func TestLoad(t *testing.T) {
|
||||
req := &LlmRequest{
|
||||
ctx: ctx,
|
||||
model: &Model{ModelPath: "foo"},
|
||||
opts: api.DefaultOptions(),
|
||||
successCh: make(chan *runnerRef, 1),
|
||||
errCh: make(chan error, 1),
|
||||
sessionDuration: 2,
|
||||
@@ -63,7 +78,9 @@ func TestLoad(t *testing.T) {
|
||||
s.load(req, ggml, gpus)
|
||||
require.Len(t, req.successCh, 0)
|
||||
require.Len(t, req.errCh, 1)
|
||||
s.loadedMu.Lock()
|
||||
require.Len(t, s.loaded, 0)
|
||||
s.loadedMu.Unlock()
|
||||
err := <-req.errCh
|
||||
require.Contains(t, err.Error(), "this model may be incompatible")
|
||||
|
||||
@@ -78,7 +95,9 @@ func TestLoad(t *testing.T) {
|
||||
case resp := <-req.successCh:
|
||||
require.Equal(t, uint64(10), resp.estimatedVRAM)
|
||||
require.Equal(t, uint(1), resp.refCount)
|
||||
s.loadedMu.Lock()
|
||||
require.Len(t, s.loaded, 1)
|
||||
s.loadedMu.Unlock()
|
||||
}
|
||||
|
||||
req.model.ModelPath = "dummy_model_path"
|
||||
@@ -90,7 +109,9 @@ func TestLoad(t *testing.T) {
|
||||
case resp := <-req.successCh:
|
||||
t.Errorf("unexpected success %v", resp)
|
||||
}
|
||||
s.loadedMu.Lock()
|
||||
runner := s.loaded["dummy_model_path"]
|
||||
s.loadedMu.Unlock()
|
||||
require.NotNil(t, runner)
|
||||
require.Equal(t, uint(0), runner.refCount)
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
@@ -143,6 +164,7 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
|
||||
scenario.req = &LlmRequest{
|
||||
ctx: scenario.ctx,
|
||||
model: model,
|
||||
opts: api.DefaultOptions(),
|
||||
sessionDuration: 5 * time.Millisecond,
|
||||
successCh: make(chan *runnerRef, 1),
|
||||
errCh: make(chan error, 1),
|
||||
@@ -171,7 +193,9 @@ func TestRequests(t *testing.T) {
|
||||
// Multiple loaded models
|
||||
scenario3a := newScenario(t, ctx, "ollama-model-3a", 1*format.GigaByte)
|
||||
scenario3b := newScenario(t, ctx, "ollama-model-3b", 24*format.GigaByte)
|
||||
scenario3c := newScenario(t, ctx, "ollama-model-3c", 30) // Needs prior unloaded
|
||||
scenario3c := newScenario(t, ctx, "ollama-model-4a", 30)
|
||||
scenario3c.req.opts.NumGPU = 0 // CPU load, will be allowed
|
||||
scenario3d := newScenario(t, ctx, "ollama-model-3c", 30) // Needs prior unloaded
|
||||
|
||||
s := InitScheduler(ctx)
|
||||
s.getGpuFn = func() gpu.GpuInfoList {
|
||||
@@ -240,7 +264,9 @@ func TestRequests(t *testing.T) {
|
||||
case <-ctx.Done():
|
||||
t.Errorf("timeout")
|
||||
}
|
||||
s.loadedMu.Lock()
|
||||
require.Len(t, s.loaded, 1)
|
||||
s.loadedMu.Unlock()
|
||||
|
||||
loadedMax = 0
|
||||
s.newServerFn = scenario3b.newServer
|
||||
@@ -254,19 +280,14 @@ func TestRequests(t *testing.T) {
|
||||
case <-ctx.Done():
|
||||
t.Errorf("timeout")
|
||||
}
|
||||
s.loadedMu.Lock()
|
||||
require.Len(t, s.loaded, 2)
|
||||
s.loadedMu.Unlock()
|
||||
|
||||
// Try to load a model that wont fit
|
||||
// This is a CPU load with NumGPU = 0 so it should load
|
||||
s.newServerFn = scenario3c.newServer
|
||||
slog.Info("scenario3c")
|
||||
require.Len(t, s.loaded, 2)
|
||||
scenario3a.ctxDone() // Won't help since this one isn't big enough to make room
|
||||
time.Sleep(2 * time.Millisecond)
|
||||
s.pendingReqCh <- scenario3c.req
|
||||
// finish prior request, so new model can load
|
||||
time.Sleep(6 * time.Millisecond)
|
||||
require.Len(t, s.loaded, 1)
|
||||
scenario3b.ctxDone()
|
||||
select {
|
||||
case resp := <-scenario3c.req.successCh:
|
||||
require.Equal(t, resp.llama, scenario3c.srv)
|
||||
@@ -275,7 +296,36 @@ func TestRequests(t *testing.T) {
|
||||
case <-ctx.Done():
|
||||
t.Errorf("timeout")
|
||||
}
|
||||
require.Len(t, s.loaded, 1)
|
||||
s.loadedMu.Lock()
|
||||
require.Len(t, s.loaded, 3)
|
||||
s.loadedMu.Unlock()
|
||||
|
||||
// Try to load a model that wont fit
|
||||
s.newServerFn = scenario3d.newServer
|
||||
slog.Info("scenario3d")
|
||||
s.loadedMu.Lock()
|
||||
require.Len(t, s.loaded, 3)
|
||||
s.loadedMu.Unlock()
|
||||
scenario3a.ctxDone() // Won't help since this one isn't big enough to make room
|
||||
time.Sleep(2 * time.Millisecond)
|
||||
s.pendingReqCh <- scenario3d.req
|
||||
// finish prior request, so new model can load
|
||||
time.Sleep(6 * time.Millisecond)
|
||||
s.loadedMu.Lock()
|
||||
require.Len(t, s.loaded, 2)
|
||||
s.loadedMu.Unlock()
|
||||
scenario3b.ctxDone()
|
||||
select {
|
||||
case resp := <-scenario3d.req.successCh:
|
||||
require.Equal(t, resp.llama, scenario3d.srv)
|
||||
require.Len(t, s.pendingReqCh, 0)
|
||||
require.Len(t, scenario3d.req.errCh, 0)
|
||||
case <-ctx.Done():
|
||||
t.Errorf("timeout")
|
||||
}
|
||||
s.loadedMu.Lock()
|
||||
require.Len(t, s.loaded, 2)
|
||||
s.loadedMu.Unlock()
|
||||
}
|
||||
|
||||
func TestGetRunner(t *testing.T) {
|
||||
@@ -318,7 +368,9 @@ func TestGetRunner(t *testing.T) {
|
||||
t.Errorf("timeout")
|
||||
}
|
||||
scenario1a.ctxDone()
|
||||
s.loadedMu.Lock()
|
||||
require.Len(t, s.loaded, 1)
|
||||
s.loadedMu.Unlock()
|
||||
|
||||
scenario1c.req.model.ModelPath = "bad path"
|
||||
slog.Info("scenario1c")
|
||||
@@ -328,7 +380,9 @@ func TestGetRunner(t *testing.T) {
|
||||
require.Len(t, errCh1c, 0)
|
||||
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
s.loadedMu.Lock()
|
||||
require.Len(t, s.loaded, 0)
|
||||
s.loadedMu.Unlock()
|
||||
require.Len(t, errCh1c, 1)
|
||||
err = <-errCh1c
|
||||
require.Contains(t, err.Error(), "bad path")
|
||||
@@ -358,7 +412,9 @@ func TestPrematureExpired(t *testing.T) {
|
||||
require.Equal(t, resp.llama, scenario1a.srv)
|
||||
require.Len(t, s.pendingReqCh, 0)
|
||||
require.Len(t, errCh1a, 0)
|
||||
s.loadedMu.Lock()
|
||||
require.Len(t, s.loaded, 1)
|
||||
s.loadedMu.Unlock()
|
||||
slog.Info("sending premature expired event now")
|
||||
s.expiredCh <- resp // Shouldn't happen in real life, but make sure its safe
|
||||
case <-ctx.Done():
|
||||
@@ -383,6 +439,7 @@ func TestUseLoadedRunner(t *testing.T) {
|
||||
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
||||
req := &LlmRequest{
|
||||
ctx: ctx,
|
||||
opts: api.DefaultOptions(),
|
||||
successCh: make(chan *runnerRef, 1),
|
||||
sessionDuration: 2,
|
||||
}
|
||||
@@ -426,8 +483,10 @@ func TestUpdateFreeSpace(t *testing.T) {
|
||||
r2 := &runnerRef{llama: llm2, gpus: gpus}
|
||||
|
||||
s := InitScheduler(ctx)
|
||||
s.loadedMu.Lock()
|
||||
s.loaded["a"] = r1
|
||||
s.loaded["b"] = r2
|
||||
s.loadedMu.Unlock()
|
||||
|
||||
s.updateFreeSpace(gpus)
|
||||
require.Equal(t, uint64(850), gpus[0].FreeMemory)
|
||||
@@ -437,13 +496,18 @@ func TestUpdateFreeSpace(t *testing.T) {
|
||||
func TestFindRunnerToUnload(t *testing.T) {
|
||||
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
||||
defer done()
|
||||
req := &LlmRequest{ctx: ctx}
|
||||
req := &LlmRequest{
|
||||
ctx: ctx,
|
||||
opts: api.DefaultOptions(),
|
||||
}
|
||||
r1 := &runnerRef{refCount: 1, sessionDuration: 1}
|
||||
r2 := &runnerRef{sessionDuration: 2}
|
||||
|
||||
s := InitScheduler(ctx)
|
||||
s.loadedMu.Lock()
|
||||
s.loaded["a"] = r1
|
||||
s.loaded["b"] = r2
|
||||
s.loadedMu.Unlock()
|
||||
|
||||
resp := s.findRunnerToUnload(req)
|
||||
require.Equal(t, r2, resp)
|
||||
@@ -458,10 +522,11 @@ func TestNeedsReload(t *testing.T) {
|
||||
defer done()
|
||||
|
||||
llm := &mockLlm{}
|
||||
do := api.DefaultOptions()
|
||||
runner := &runnerRef{
|
||||
adapters: []string{"adapter1"},
|
||||
projectors: []string{"projector1"},
|
||||
Options: &api.Options{},
|
||||
Options: &do,
|
||||
llama: llm,
|
||||
}
|
||||
req := &LlmRequest{
|
||||
@@ -469,7 +534,7 @@ func TestNeedsReload(t *testing.T) {
|
||||
AdapterPaths: []string{"adapter2"},
|
||||
ProjectorPaths: []string{"projector2"},
|
||||
},
|
||||
opts: api.Options{},
|
||||
opts: api.DefaultOptions(),
|
||||
}
|
||||
resp := runner.needsReload(ctx, req)
|
||||
require.True(t, resp)
|
||||
@@ -508,8 +573,10 @@ func TestUnloadAllRunners(t *testing.T) {
|
||||
r1 := &runnerRef{llama: llm1}
|
||||
r2 := &runnerRef{llama: llm2}
|
||||
|
||||
s.loadedMu.Lock()
|
||||
s.loaded["a"] = r1
|
||||
s.loaded["b"] = r2
|
||||
s.loadedMu.Unlock()
|
||||
s.unloadAllRunners()
|
||||
|
||||
require.True(t, llm1.closeCalled)
|
||||
|
||||
@@ -4,7 +4,6 @@ package model
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
@@ -81,9 +80,6 @@ func (k partKind) String() string {
|
||||
//
|
||||
// It is not guaranteed to be valid. Use [Name.IsValid] to check if the name
|
||||
// is valid.
|
||||
//
|
||||
// It is not directly comparable with other Names. Use [Name.Equal] and
|
||||
// [Name.MapHash] for determining equality and using as a map key.
|
||||
type Name struct {
|
||||
Host string
|
||||
Namespace string
|
||||
@@ -110,20 +106,20 @@ type Name struct {
|
||||
// { model }
|
||||
// "@" { digest }
|
||||
// host:
|
||||
// pattern: alphanum { alphanum | "-" | "_" | "." | ":" }*
|
||||
// pattern: { alphanum | "_" } { alphanum | "-" | "_" | "." | ":" }*
|
||||
// length: [1, 350]
|
||||
// namespace:
|
||||
// pattern: alphanum { alphanum | "-" | "_" }*
|
||||
// length: [2, 80]
|
||||
// pattern: { alphanum | "_" } { alphanum | "-" | "_" }*
|
||||
// length: [1, 80]
|
||||
// model:
|
||||
// pattern: alphanum { alphanum | "-" | "_" | "." }*
|
||||
// length: [2, 80]
|
||||
// pattern: { alphanum | "_" } { alphanum | "-" | "_" | "." }*
|
||||
// length: [1, 80]
|
||||
// tag:
|
||||
// pattern: alphanum { alphanum | "-" | "_" | "." }*
|
||||
// pattern: { alphanum | "_" } { alphanum | "-" | "_" | "." }*
|
||||
// length: [1, 80]
|
||||
// digest:
|
||||
// pattern: alphanum { alphanum | "-" | ":" }*
|
||||
// length: [2, 80]
|
||||
// pattern: { alphanum | "_" } { alphanum | "-" | ":" }*
|
||||
// length: [1, 80]
|
||||
//
|
||||
// Most users should use [ParseName] instead, unless need to support
|
||||
// different defaults than DefaultName.
|
||||
@@ -171,11 +167,6 @@ func Merge(a, b Name) Name {
|
||||
return a
|
||||
}
|
||||
|
||||
// Digest returns the result of [ParseDigest] with the RawDigest field.
|
||||
func (n Name) Digest() Digest {
|
||||
return ParseDigest(n.RawDigest)
|
||||
}
|
||||
|
||||
// String returns the name string, in the format that [ParseNameNoDefaults]
|
||||
// accepts as valid, if [Name.IsValid] reports true; otherwise the empty
|
||||
// string is returned.
|
||||
@@ -204,7 +195,7 @@ func (n Name) String() string {
|
||||
// IsValid reports whether all parts of the name are present and valid. The
|
||||
// digest is a special case, and is checked for validity only if present.
|
||||
func (n Name) IsValid() bool {
|
||||
if n.RawDigest != "" && !ParseDigest(n.RawDigest).IsValid() {
|
||||
if n.RawDigest != "" && !isValidPart(kindDigest, n.RawDigest) {
|
||||
return false
|
||||
}
|
||||
return n.IsFullyQualified()
|
||||
@@ -260,7 +251,7 @@ func isValidLen(kind partKind, s string) bool {
|
||||
case kindTag:
|
||||
return len(s) >= 1 && len(s) <= 80
|
||||
default:
|
||||
return len(s) >= 2 && len(s) <= 80
|
||||
return len(s) >= 1 && len(s) <= 80
|
||||
}
|
||||
}
|
||||
|
||||
@@ -270,7 +261,7 @@ func isValidPart(kind partKind, s string) bool {
|
||||
}
|
||||
for i := range s {
|
||||
if i == 0 {
|
||||
if !isAlphanumeric(s[i]) {
|
||||
if !isAlphanumericOrUnderscore(s[i]) {
|
||||
return false
|
||||
}
|
||||
continue
|
||||
@@ -282,11 +273,11 @@ func isValidPart(kind partKind, s string) bool {
|
||||
return false
|
||||
}
|
||||
case ':':
|
||||
if kind != kindHost {
|
||||
if kind != kindHost && kind != kindDigest {
|
||||
return false
|
||||
}
|
||||
default:
|
||||
if !isAlphanumeric(s[i]) {
|
||||
if !isAlphanumericOrUnderscore(s[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
@@ -294,8 +285,8 @@ func isValidPart(kind partKind, s string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func isAlphanumeric(c byte) bool {
|
||||
return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c >= '0' && c <= '9'
|
||||
func isAlphanumericOrUnderscore(c byte) bool {
|
||||
return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c >= '0' && c <= '9' || c == '_'
|
||||
}
|
||||
|
||||
func cutLast(s, sep string) (before, after string, ok bool) {
|
||||
@@ -317,75 +308,3 @@ func cutPromised(s, sep string) (before, after string, ok bool) {
|
||||
}
|
||||
return cmp.Or(before, MissingPart), cmp.Or(after, MissingPart), true
|
||||
}
|
||||
|
||||
type DigestType int
|
||||
|
||||
const (
|
||||
DigestTypeInvalid DigestType = iota
|
||||
DigestTypeSHA256
|
||||
)
|
||||
|
||||
func (t DigestType) String() string {
|
||||
if t == DigestTypeSHA256 {
|
||||
return "sha256"
|
||||
}
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
// Digest represents a type and hash of a digest. It is comparable and can
|
||||
// be used as a map key.
|
||||
type Digest struct {
|
||||
Type DigestType
|
||||
Hash [32]byte
|
||||
}
|
||||
|
||||
// ParseDigest parses a digest string into a Digest struct. It accepts both
|
||||
// the forms:
|
||||
//
|
||||
// sha256:deadbeef
|
||||
// sha256-deadbeef
|
||||
//
|
||||
// The hash part must be exactly 64 characters long.
|
||||
//
|
||||
// The form "type:hash" does not round trip through [Digest.String].
|
||||
func ParseDigest(s string) Digest {
|
||||
typ, hash, ok := cutLast(s, ":")
|
||||
if !ok {
|
||||
typ, hash, ok = cutLast(s, "-")
|
||||
if !ok {
|
||||
return Digest{}
|
||||
}
|
||||
}
|
||||
if typ != "sha256" {
|
||||
return Digest{}
|
||||
}
|
||||
var d Digest
|
||||
n, err := hex.Decode(d.Hash[:], []byte(hash))
|
||||
if err != nil || n != 32 {
|
||||
return Digest{}
|
||||
}
|
||||
return Digest{Type: DigestTypeSHA256, Hash: d.Hash}
|
||||
}
|
||||
|
||||
// IsValid returns true if the digest has a valid Type and Hash.
|
||||
func (d Digest) IsValid() bool {
|
||||
if d.Type != DigestTypeSHA256 {
|
||||
return false
|
||||
}
|
||||
return d.Hash != [32]byte{}
|
||||
}
|
||||
|
||||
// String returns the digest as a string in the form "type-hash". The hash
|
||||
// is encoded as a hex string.
|
||||
func (d Digest) String() string {
|
||||
var b strings.Builder
|
||||
b.WriteString(d.Type.String())
|
||||
b.WriteByte('-')
|
||||
b.WriteString(hex.EncodeToString(d.Hash[:]))
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// LogValue returns a slog.Value that represents the digest as a string.
|
||||
func (d Digest) LogValue() slog.Value {
|
||||
return slog.StringValue(d.String())
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ package model
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -82,10 +81,10 @@ func TestParseNameParts(t *testing.T) {
|
||||
wantValidDigest: false,
|
||||
},
|
||||
{
|
||||
in: "model@sha256:" + validSHA256Hex,
|
||||
in: "model@sha256:123",
|
||||
want: Name{
|
||||
Model: "model",
|
||||
RawDigest: "sha256:" + validSHA256Hex,
|
||||
RawDigest: "sha256:123",
|
||||
},
|
||||
wantValidDigest: true,
|
||||
},
|
||||
@@ -97,14 +96,18 @@ func TestParseNameParts(t *testing.T) {
|
||||
if !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("parseName(%q) = %v; want %v", tt.in, got, tt.want)
|
||||
}
|
||||
if got.Digest().IsValid() != tt.wantValidDigest {
|
||||
t.Errorf("parseName(%q).Digest().IsValid() = %v; want %v", tt.in, got.Digest().IsValid(), tt.wantValidDigest)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
var testCases = map[string]bool{ // name -> valid
|
||||
"": false,
|
||||
|
||||
"_why/_the/_lucky:_stiff": true,
|
||||
|
||||
// minimal
|
||||
"h/n/m:t@d": true,
|
||||
|
||||
"host/namespace/model:tag": true,
|
||||
"host/namespace/model": false,
|
||||
"namespace/model": false,
|
||||
@@ -120,11 +123,12 @@ var testCases = map[string]bool{ // name -> valid
|
||||
"h/nn/mm:t@sha256-1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes
|
||||
"h/nn/mm:t@sha256:1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes
|
||||
|
||||
"m": false, // model too short
|
||||
"n/mm:": false, // namespace too short
|
||||
"h/n/mm:t": false, // namespace too short
|
||||
"@t": false, // digest too short
|
||||
"mm@d": false, // digest too short
|
||||
// unqualified
|
||||
"m": false,
|
||||
"n/m:": false,
|
||||
"h/n/m": false,
|
||||
"@t": false,
|
||||
"m@d": false,
|
||||
|
||||
// invalids
|
||||
"^": false,
|
||||
@@ -144,8 +148,6 @@ var testCases = map[string]bool{ // name -> valid
|
||||
"hh/nn/mm:-tt@dd": false,
|
||||
"hh/nn/mm:tt@-dd": false,
|
||||
|
||||
"": false,
|
||||
|
||||
// hosts
|
||||
"host:https/namespace/model:tag": true,
|
||||
|
||||
@@ -167,7 +169,6 @@ func TestNameIsValid(t *testing.T) {
|
||||
var numStringTests int
|
||||
for s, want := range testCases {
|
||||
n := ParseNameBare(s)
|
||||
t.Logf("n: %#v", n)
|
||||
got := n.IsValid()
|
||||
if got != want {
|
||||
t.Errorf("parseName(%q).IsValid() = %v; want %v", s, got, want)
|
||||
@@ -239,57 +240,3 @@ func FuzzName(f *testing.F) {
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
const validSHA256Hex = "abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789"
|
||||
|
||||
func TestParseDigest(t *testing.T) {
|
||||
cases := map[string]bool{
|
||||
"sha256-1000000000000000000000000000000000000000000000000000000000000000": true,
|
||||
"sha256:1000000000000000000000000000000000000000000000000000000000000000": true,
|
||||
"sha256:0000000000000000000000000000000000000000000000000000000000000000": false,
|
||||
|
||||
"sha256:" + validSHA256Hex: true,
|
||||
"sha256-" + validSHA256Hex: true,
|
||||
|
||||
"": false,
|
||||
"sha134:" + validSHA256Hex: false,
|
||||
"sha256:" + validSHA256Hex + "x": false,
|
||||
"sha256:x" + validSHA256Hex: false,
|
||||
"sha256-" + validSHA256Hex + "x": false,
|
||||
"sha256-x": false,
|
||||
}
|
||||
|
||||
for s, want := range cases {
|
||||
t.Run(s, func(t *testing.T) {
|
||||
d := ParseDigest(s)
|
||||
if d.IsValid() != want {
|
||||
t.Errorf("ParseDigest(%q).IsValid() = %v; want %v", s, d.IsValid(), want)
|
||||
}
|
||||
norm := strings.ReplaceAll(s, ":", "-")
|
||||
if d.IsValid() && d.String() != norm {
|
||||
t.Errorf("ParseDigest(%q).String() = %q; want %q", s, d.String(), norm)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDigestString(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{in: "sha256:" + validSHA256Hex, want: "sha256-" + validSHA256Hex},
|
||||
{in: "sha256-" + validSHA256Hex, want: "sha256-" + validSHA256Hex},
|
||||
{in: "", want: "unknown-0000000000000000000000000000000000000000000000000000000000000000"},
|
||||
{in: "blah-100000000000000000000000000000000000000000000000000000000000000", want: "unknown-0000000000000000000000000000000000000000000000000000000000000000"},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.in, func(t *testing.T) {
|
||||
d := ParseDigest(tt.in)
|
||||
if d.String() != tt.want {
|
||||
t.Errorf("ParseDigest(%q).String() = %q; want %q", tt.in, d.String(), tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
// Package structs contains the Incomparable type.
|
||||
package structs
|
||||
|
||||
// Incomparable is a zero-width incomparable type. If added as the
|
||||
// first field in a struct, it marks that struct as not comparable
|
||||
// (can't do == or be a map key) and usually doesn't add any width to
|
||||
// the struct (unless the struct has only small fields).
|
||||
//
|
||||
// By making a struct incomparable, you can prevent misuse (prevent
|
||||
// people from using ==), but also you can shrink generated binaries,
|
||||
// as the compiler can omit equality funcs from the binary.
|
||||
type Incomparable [0]func()
|
||||
Reference in New Issue
Block a user