Compare commits
21 Commits
bmizerany/
...
v0.1.33-rc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2619850fb4 | ||
|
|
8feb97dc0d | ||
|
|
4e1ff6dcbb | ||
|
|
8589d752ac | ||
|
|
de4ded68b0 | ||
|
|
9b5a3c5991 | ||
|
|
00b0699c75 | ||
|
|
993cf8bf55 | ||
|
|
7bb7cb8a60 | ||
|
|
b123be5b71 | ||
|
|
ddf5c09a9b | ||
|
|
5f73c08729 | ||
|
|
f503a848c2 | ||
|
|
36a6daccab | ||
|
|
ceb0e26e5e | ||
|
|
284e02bed0 | ||
|
|
3450a57d4a | ||
|
|
592dae31c8 | ||
|
|
2010cbc5fa | ||
|
|
ac0801eced | ||
|
|
ad66e5b060 |
13
.github/workflows/release.yaml
vendored
13
.github/workflows/release.yaml
vendored
@@ -311,29 +311,18 @@ jobs:
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: generate-windows-cpu
|
||||
path: |
|
||||
llm/build
|
||||
dist/windows-amd64
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: generate-windows-cuda
|
||||
path: |
|
||||
llm/build
|
||||
dist/windows-amd64
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: windows-cuda-deps
|
||||
path: dist/deps
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: windows-rocm-deps
|
||||
path: dist/deps
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: generate-windows-rocm
|
||||
path: |
|
||||
llm/build
|
||||
dist/windows-amd64
|
||||
- run: dir llm/build
|
||||
- run: |
|
||||
$gopath=(get-command go).source | split-path -parent
|
||||
@@ -342,8 +331,6 @@ jobs:
|
||||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||
$env:PATH="$gopath;$env:PATH"
|
||||
$env:OLLAMA_SKIP_GENERATE="1"
|
||||
$env:NVIDIA_DIR=$(resolve-path ".\dist\deps")
|
||||
$env:HIP_PATH=$(resolve-path ".\dist\deps")
|
||||
& .\scripts\build_windows.ps1
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
|
||||
@@ -396,8 +396,10 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
|
||||
func DefaultOptions() Options {
|
||||
return Options{
|
||||
// options set on request to runner
|
||||
NumPredict: -1,
|
||||
NumKeep: 0,
|
||||
NumPredict: -1,
|
||||
|
||||
// set a minimal num_keep to avoid issues on context shifts
|
||||
NumKeep: 4,
|
||||
Temperature: 0.8,
|
||||
TopK: 40,
|
||||
TopP: 0.9,
|
||||
|
||||
@@ -92,12 +92,8 @@ Source: "..\dist\windows-amd64\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64
|
||||
Source: "..\dist\windows-amd64\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
|
||||
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
|
||||
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
|
||||
; Assumes v5.7, may need adjustments for v6
|
||||
#if GetEnv("HIP_PATH") != ""
|
||||
Source: "{#GetEnv('HIP_PATH')}\bin\hipblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
|
||||
Source: "{#GetEnv('HIP_PATH')}\bin\rocblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
|
||||
; amdhip64.dll dependency comes from the driver and must be installed already
|
||||
Source: "{#GetEnv('HIP_PATH')}\bin\rocblas\library\*"; DestDir: "{app}\rocm\rocblas\library\"; Flags: ignoreversion
|
||||
#if DirExists("..\dist\windows-amd64\rocm")
|
||||
Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
183
cmd/cmd.go
183
cmd/cmd.go
@@ -17,6 +17,7 @@ import (
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
@@ -53,8 +54,6 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
p := progress.NewProgress(os.Stderr)
|
||||
defer p.Stop()
|
||||
|
||||
bars := make(map[string]*progress.Bar)
|
||||
|
||||
modelfile, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -95,95 +94,16 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO make this work w/ adapters
|
||||
if fi.IsDir() {
|
||||
tf, err := os.CreateTemp("", "ollama-tf")
|
||||
// this is likely a safetensors or pytorch directory
|
||||
// TODO make this work w/ adapters
|
||||
tempfile, err := tempZipFiles(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer os.RemoveAll(tf.Name())
|
||||
defer os.RemoveAll(tempfile)
|
||||
|
||||
zf := zip.NewWriter(tf)
|
||||
|
||||
files := []string{}
|
||||
|
||||
tfiles, err := filepath.Glob(filepath.Join(path, "pytorch_model-*.bin"))
|
||||
if err != nil {
|
||||
return err
|
||||
} else if len(tfiles) == 0 {
|
||||
tfiles, err = filepath.Glob(filepath.Join(path, "model-*.safetensors"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
files = append(files, tfiles...)
|
||||
|
||||
if len(files) == 0 {
|
||||
return fmt.Errorf("no models were found in '%s'", path)
|
||||
}
|
||||
|
||||
// add the safetensor/torch config file + tokenizer
|
||||
files = append(files, filepath.Join(path, "config.json"))
|
||||
files = append(files, filepath.Join(path, "params.json"))
|
||||
files = append(files, filepath.Join(path, "added_tokens.json"))
|
||||
files = append(files, filepath.Join(path, "tokenizer.model"))
|
||||
|
||||
for _, fn := range files {
|
||||
f, err := os.Open(fn)
|
||||
|
||||
// just skip whatever files aren't there
|
||||
if os.IsNotExist(err) {
|
||||
if strings.HasSuffix(fn, "tokenizer.model") {
|
||||
// try the parent dir before giving up
|
||||
parentDir := filepath.Dir(path)
|
||||
newFn := filepath.Join(parentDir, "tokenizer.model")
|
||||
f, err = os.Open(newFn)
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
h, err := zip.FileInfoHeader(fi)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
h.Name = filepath.Base(fn)
|
||||
h.Method = zip.Store
|
||||
|
||||
w, err := zf.CreateHeader(h)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = io.Copy(w, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if err := zf.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := tf.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
path = tf.Name()
|
||||
path = tempfile
|
||||
}
|
||||
|
||||
digest, err := createBlob(cmd, client, path)
|
||||
@@ -191,10 +111,17 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
modelfile = bytes.ReplaceAll(modelfile, []byte(c.Args), []byte("@"+digest))
|
||||
name := c.Name
|
||||
if c.Name == "model" {
|
||||
name = "from"
|
||||
}
|
||||
|
||||
re := regexp.MustCompile(fmt.Sprintf(`(?im)^(%s)\s+%s\s*$`, name, c.Args))
|
||||
modelfile = re.ReplaceAll(modelfile, []byte("$1 @"+digest))
|
||||
}
|
||||
}
|
||||
|
||||
bars := make(map[string]*progress.Bar)
|
||||
fn := func(resp api.ProgressResponse) error {
|
||||
if resp.Digest != "" {
|
||||
spinner.Stop()
|
||||
@@ -228,6 +155,88 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func tempZipFiles(path string) (string, error) {
|
||||
tempfile, err := os.CreateTemp("", "ollama-tf")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer tempfile.Close()
|
||||
|
||||
zipfile := zip.NewWriter(tempfile)
|
||||
defer zipfile.Close()
|
||||
|
||||
tfiles, err := filepath.Glob(filepath.Join(path, "pytorch_model-*.bin"))
|
||||
if err != nil {
|
||||
return "", err
|
||||
} else if len(tfiles) == 0 {
|
||||
tfiles, err = filepath.Glob(filepath.Join(path, "model-*.safetensors"))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
files := []string{}
|
||||
files = append(files, tfiles...)
|
||||
|
||||
if len(files) == 0 {
|
||||
return "", fmt.Errorf("no models were found in '%s'", path)
|
||||
}
|
||||
|
||||
// add the safetensor/torch config file + tokenizer
|
||||
files = append(files, filepath.Join(path, "config.json"))
|
||||
files = append(files, filepath.Join(path, "params.json"))
|
||||
files = append(files, filepath.Join(path, "added_tokens.json"))
|
||||
files = append(files, filepath.Join(path, "tokenizer.model"))
|
||||
|
||||
for _, fn := range files {
|
||||
f, err := os.Open(fn)
|
||||
|
||||
// just skip whatever files aren't there
|
||||
if os.IsNotExist(err) {
|
||||
if strings.HasSuffix(fn, "tokenizer.model") {
|
||||
// try the parent dir before giving up
|
||||
parentDir := filepath.Dir(path)
|
||||
newFn := filepath.Join(parentDir, "tokenizer.model")
|
||||
f, err = os.Open(newFn)
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
} else if err != nil {
|
||||
return "", err
|
||||
}
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
} else if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
h, err := zip.FileInfoHeader(fi)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
h.Name = filepath.Base(fn)
|
||||
h.Method = zip.Store
|
||||
|
||||
w, err := zipfile.CreateHeader(h)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
_, err = io.Copy(w, f)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
return tempfile.Name(), nil
|
||||
}
|
||||
|
||||
func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
|
||||
bin, err := os.Open(path)
|
||||
if err != nil {
|
||||
|
||||
@@ -21,7 +21,7 @@ init_vars() {
|
||||
# TODO - add additional optimization flags...
|
||||
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
|
||||
fi
|
||||
case $(uname -s) in
|
||||
case $(uname -s) in
|
||||
"Darwin")
|
||||
LIB_EXT="dylib"
|
||||
WHOLE_ARCHIVE="-Wl,-force_load"
|
||||
|
||||
@@ -165,11 +165,11 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
|
||||
fi
|
||||
if [ "${ARCH}" == "arm64" ]; then
|
||||
echo "ARM CPU detected - disabling unsupported AVX instructions"
|
||||
|
||||
|
||||
# ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
|
||||
#
|
||||
# CUDA compute < 6.0 lacks proper FP16 support on ARM.
|
||||
# Disabling has minimal performance effect while maintaining compatibility.
|
||||
# CUDA compute < 6.0 lacks proper FP16 support on ARM.
|
||||
# Disabling has minimal performance effect while maintaining compatibility.
|
||||
ARM64_DEFS="-DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_CUDA_F16=off"
|
||||
fi
|
||||
# Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
|
||||
|
||||
@@ -252,6 +252,11 @@ if ($null -ne $script:CUDA_LIB_DIR) {
|
||||
build
|
||||
sign
|
||||
install
|
||||
|
||||
write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||
cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||
cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||
cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||
}
|
||||
|
||||
if ($null -ne $env:HIP_PATH) {
|
||||
@@ -295,6 +300,14 @@ if ($null -ne $env:HIP_PATH) {
|
||||
}
|
||||
sign
|
||||
install
|
||||
|
||||
# Assumes v5.7, may need adjustments for v6
|
||||
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
|
||||
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null
|
||||
cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
|
||||
cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
|
||||
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
|
||||
cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\"
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
@@ -100,8 +99,22 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
var layerCount int
|
||||
layers := ggml.Tensors().Layers()
|
||||
|
||||
var memoryLayerOutput uint64
|
||||
for k, v := range layers {
|
||||
if k == "output" || k == "output_norm" {
|
||||
memoryLayerOutput += v.size()
|
||||
}
|
||||
}
|
||||
|
||||
if gpus[0].Library == "metal" && opts.UseMMap {
|
||||
// memory is preallocated for output tensors
|
||||
memoryRequiredTotal += memoryLayerOutput
|
||||
memoryRequiredPartial += memoryLayerOutput
|
||||
}
|
||||
|
||||
var layerCount int
|
||||
for i := 0; i < int(ggml.KV().BlockCount()); i++ {
|
||||
memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
|
||||
|
||||
@@ -115,15 +128,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||
}
|
||||
}
|
||||
|
||||
var memoryLayerOutput uint64
|
||||
for k, v := range layers {
|
||||
if !strings.HasPrefix(k, "blk.") {
|
||||
memoryLayerOutput += v.size()
|
||||
}
|
||||
if gpus[0].Library != "metal" || !opts.UseMMap {
|
||||
// memory was not preallocated for output tensors
|
||||
memoryRequiredTotal += memoryLayerOutput
|
||||
}
|
||||
|
||||
memoryRequiredTotal += memoryLayerOutput
|
||||
|
||||
if memoryAvailable > memoryRequiredTotal {
|
||||
layerCount = int(ggml.KV().BlockCount()) + 1
|
||||
memoryRequiredPartial = memoryRequiredTotal
|
||||
|
||||
45
llm/patches/04-metal.diff
Normal file
45
llm/patches/04-metal.diff
Normal file
@@ -0,0 +1,45 @@
|
||||
diff --git a/ggml-metal.m b/ggml-metal.m
|
||||
index 0207b787..b5e9884b 100644
|
||||
--- a/ggml-metal.m
|
||||
+++ b/ggml-metal.m
|
||||
@@ -1396,27 +1396,23 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||
// to the matrix-vector kernel
|
||||
int ne11_mm_min = 1;
|
||||
|
||||
-#if 0
|
||||
// the numbers below are measured on M2 Ultra for 7B and 13B models
|
||||
// these numbers do not translate to other devices or model sizes
|
||||
// TODO: need to find a better approach
|
||||
- if ([ctx->device.name isEqualToString:@"Apple M2 Ultra"]) {
|
||||
- switch (src0t) {
|
||||
- case GGML_TYPE_F16: ne11_mm_min = 2; break;
|
||||
- case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
|
||||
- case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
|
||||
- case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
|
||||
- case GGML_TYPE_Q4_0:
|
||||
- case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
|
||||
- case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
|
||||
- case GGML_TYPE_Q5_0: // not tested yet
|
||||
- case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
|
||||
- case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
|
||||
- case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
|
||||
- default: ne11_mm_min = 1; break;
|
||||
- }
|
||||
+ switch (src0t) {
|
||||
+ case GGML_TYPE_F16: ne11_mm_min = 2; break;
|
||||
+ case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
|
||||
+ case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
|
||||
+ case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
|
||||
+ case GGML_TYPE_Q4_0:
|
||||
+ case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
|
||||
+ case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
|
||||
+ case GGML_TYPE_Q5_0: // not tested yet
|
||||
+ case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
|
||||
+ case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
|
||||
+ case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
|
||||
+ default: ne11_mm_min = 1; break;
|
||||
}
|
||||
-#endif
|
||||
|
||||
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
|
||||
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
|
||||
@@ -560,6 +560,13 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
||||
return err
|
||||
}
|
||||
defer s.sem.Release(1)
|
||||
|
||||
// only allow maximum 10 "context shifts" to avoid infinite generation
|
||||
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
|
||||
req.Options.NumPredict = 10 * s.options.NumCtx
|
||||
slog.Debug("setting token limit to 10x num_ctx", "num_ctx", s.options.NumCtx, "num_predict", req.Options.NumPredict)
|
||||
}
|
||||
|
||||
request := map[string]any{
|
||||
"prompt": req.Prompt,
|
||||
"stream": true,
|
||||
|
||||
@@ -109,9 +109,6 @@ function gatherDependencies() {
|
||||
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\"
|
||||
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\"
|
||||
|
||||
cp "${script:NVIDIA_DIR}\cudart64_*.dll" "${script:DEPS_DIR}\"
|
||||
cp "${script:NVIDIA_DIR}\cublas64_*.dll" "${script:DEPS_DIR}\"
|
||||
cp "${script:NVIDIA_DIR}\cublasLt64_*.dll" "${script:DEPS_DIR}\"
|
||||
|
||||
cp "${script:SRC_DIR}\app\ollama_welcome.ps1" "${script:SRC_DIR}\dist\"
|
||||
if ("${env:KEY_CONTAINER}") {
|
||||
@@ -123,15 +120,6 @@ function gatherDependencies() {
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
}
|
||||
}
|
||||
if ($null -ne $env:HIP_PATH) {
|
||||
# Assumes v5.7, may need adjustments for v6
|
||||
rm -ea 0 -recurse -force -path "${script:DEPS_DIR}\rocm\"
|
||||
md "${script:DEPS_DIR}\rocm\rocblas\library\" -ea 0 > $null
|
||||
cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:DEPS_DIR}\rocm\"
|
||||
cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:DEPS_DIR}\rocm\"
|
||||
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
|
||||
cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:DEPS_DIR}\rocm\rocblas\library\"
|
||||
}
|
||||
}
|
||||
|
||||
function buildInstaller() {
|
||||
|
||||
@@ -29,6 +29,7 @@ import (
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
@@ -701,36 +702,32 @@ func convertModel(name, path string, fn func(resp api.ProgressResponse)) (string
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func CopyModel(src, dest string) error {
|
||||
srcModelPath := ParseModelPath(src)
|
||||
srcPath, err := srcModelPath.GetManifestPath()
|
||||
func CopyModel(src, dst model.Name) error {
|
||||
manifests, err := GetManifestPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
destModelPath := ParseModelPath(dest)
|
||||
destPath, err := destModelPath.GetManifestPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(destPath), 0o755); err != nil {
|
||||
dstpath := filepath.Join(manifests, dst.FilepathNoBuild())
|
||||
if err := os.MkdirAll(filepath.Dir(dstpath), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// copy the file
|
||||
input, err := os.ReadFile(srcPath)
|
||||
srcpath := filepath.Join(manifests, src.FilepathNoBuild())
|
||||
srcfile, err := os.Open(srcpath)
|
||||
if err != nil {
|
||||
fmt.Println("Error reading file:", err)
|
||||
return err
|
||||
}
|
||||
defer srcfile.Close()
|
||||
|
||||
err = os.WriteFile(destPath, input, 0o644)
|
||||
dstfile, err := os.Create(dstpath)
|
||||
if err != nil {
|
||||
fmt.Println("Error reading file:", err)
|
||||
return err
|
||||
}
|
||||
defer dstfile.Close()
|
||||
|
||||
return nil
|
||||
_, err = io.Copy(dstfile, srcfile)
|
||||
return err
|
||||
}
|
||||
|
||||
func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{}, dryRun bool) error {
|
||||
|
||||
@@ -29,6 +29,7 @@ import (
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/openai"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
@@ -788,34 +789,34 @@ func (s *Server) ListModelsHandler(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (s *Server) CopyModelHandler(c *gin.Context) {
|
||||
var req api.CopyRequest
|
||||
err := c.ShouldBindJSON(&req)
|
||||
switch {
|
||||
case errors.Is(err, io.EOF):
|
||||
var r api.CopyRequest
|
||||
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
||||
return
|
||||
case err != nil:
|
||||
} else if err != nil {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if req.Source == "" || req.Destination == "" {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "source add destination are required"})
|
||||
src := model.ParseName(r.Source)
|
||||
if !src.IsValid() {
|
||||
_ = c.Error(fmt.Errorf("source %q is invalid", r.Source))
|
||||
}
|
||||
|
||||
dst := model.ParseName(r.Destination)
|
||||
if !dst.IsValid() {
|
||||
_ = c.Error(fmt.Errorf("destination %q is invalid", r.Destination))
|
||||
}
|
||||
|
||||
if len(c.Errors) > 0 {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": c.Errors.Errors()})
|
||||
return
|
||||
}
|
||||
|
||||
if err := ParseModelPath(req.Destination).Validate(); err != nil {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if err := CopyModel(req.Source, req.Destination); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
|
||||
} else {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
}
|
||||
return
|
||||
if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
|
||||
} else if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@ import (
|
||||
type LlmRequest struct {
|
||||
ctx context.Context //nolint:containedctx
|
||||
model *Model
|
||||
ggml *llm.GGML // TODO - how large is this, and do we need to free it after we've finished loading?
|
||||
opts api.Options
|
||||
sessionDuration time.Duration
|
||||
successCh chan *runnerRef
|
||||
@@ -39,7 +38,7 @@ type Scheduler struct {
|
||||
loaded map[string]*runnerRef
|
||||
loadedMu sync.Mutex
|
||||
|
||||
loadFn func(req *LlmRequest, gpus gpu.GpuInfoList)
|
||||
loadFn func(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList)
|
||||
newServerFn func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error)
|
||||
getGpuFn func() gpu.GpuInfoList
|
||||
}
|
||||
@@ -47,6 +46,7 @@ type Scheduler struct {
|
||||
// TODO set this to zero after a release or two, to enable multiple models by default
|
||||
var loadedMax = 1 // Maximum runners; < 1 maps to as many as will fit in VRAM (unlimited for CPU runners)
|
||||
var maxQueuedRequests = 10 // TODO configurable
|
||||
var numParallel = 1
|
||||
|
||||
func InitScheduler(ctx context.Context) *Scheduler {
|
||||
maxRunners := os.Getenv("OLLAMA_MAX_LOADED_MODELS")
|
||||
@@ -58,6 +58,14 @@ func InitScheduler(ctx context.Context) *Scheduler {
|
||||
loadedMax = m
|
||||
}
|
||||
}
|
||||
if onp := os.Getenv("OLLAMA_NUM_PARALLEL"); onp != "" {
|
||||
p, err := strconv.Atoi(onp)
|
||||
if err != nil || p <= 0 {
|
||||
slog.Error("invalid parallel setting, must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err)
|
||||
} else {
|
||||
numParallel = p
|
||||
}
|
||||
}
|
||||
|
||||
sched := &Scheduler{
|
||||
pendingReqCh: make(chan *LlmRequest, maxQueuedRequests),
|
||||
@@ -74,20 +82,16 @@ func InitScheduler(ctx context.Context) *Scheduler {
|
||||
|
||||
// context must be canceled to decrement ref count and release the runner
|
||||
func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration time.Duration) (chan *runnerRef, chan error) {
|
||||
ggml, err := llm.LoadModel(model.ModelPath)
|
||||
req := &LlmRequest{
|
||||
ctx: c,
|
||||
model: model,
|
||||
ggml: ggml,
|
||||
opts: opts,
|
||||
sessionDuration: sessionDuration,
|
||||
successCh: make(chan *runnerRef),
|
||||
errCh: make(chan error, 1),
|
||||
}
|
||||
if err != nil {
|
||||
req.errCh <- err
|
||||
return req.successCh, req.errCh
|
||||
}
|
||||
// context split across parallel threads
|
||||
opts.NumCtx = opts.NumCtx * numParallel
|
||||
select {
|
||||
case s.pendingReqCh <- req:
|
||||
default:
|
||||
@@ -130,28 +134,39 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||
pending.useLoadedRunner(runner, s.finishedReqCh)
|
||||
break
|
||||
}
|
||||
} else if loadedCount == 0 {
|
||||
slog.Debug("loading first model", "model", pending.model.ModelPath)
|
||||
gpus := s.getGpuFn()
|
||||
g := pickBestFitGPUs(pending, gpus)
|
||||
if g != nil {
|
||||
gpus = g
|
||||
}
|
||||
s.loadFn(pending, gpus)
|
||||
break
|
||||
} else if loadedMax > 0 && loadedCount >= loadedMax {
|
||||
slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount)
|
||||
runnerToExpire = s.findRunnerToUnload(pending)
|
||||
} else {
|
||||
// More than one loaded model, so we have to see if the new one fits
|
||||
// Either no models are loaded or below loadedMax
|
||||
// Get a refreshed GPU list
|
||||
gpus := s.getGpuFn()
|
||||
|
||||
// Load model for fitting
|
||||
ggml, err := llm.LoadModel(pending.model.ModelPath)
|
||||
if err != nil {
|
||||
pending.errCh <- err
|
||||
break
|
||||
}
|
||||
|
||||
// No models loaded. Load the model but prefer the best fit.
|
||||
if loadedCount == 0 {
|
||||
slog.Debug("loading first model", "model", pending.model.ModelPath)
|
||||
g := pickBestFitGPUs(pending, ggml, gpus)
|
||||
if g != nil {
|
||||
gpus = g
|
||||
}
|
||||
s.loadFn(pending, ggml, gpus)
|
||||
break
|
||||
}
|
||||
|
||||
// More than one loaded model, so we have to see if the new one fits
|
||||
// Update free memory from currently loaded models
|
||||
s.updateFreeSpace(gpus)
|
||||
gpus = pickBestFitGPUs(pending, gpus)
|
||||
gpus = pickBestFitGPUs(pending, ggml, gpus)
|
||||
if gpus != nil {
|
||||
slog.Debug("new model fits with existing models, loading")
|
||||
s.loadFn(pending, gpus)
|
||||
s.loadFn(pending, ggml, gpus)
|
||||
break
|
||||
}
|
||||
runnerToExpire = s.findRunnerToUnload(pending)
|
||||
@@ -282,8 +297,8 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
|
||||
}()
|
||||
}
|
||||
|
||||
func (s *Scheduler) load(req *LlmRequest, gpus gpu.GpuInfoList) {
|
||||
llama, err := s.newServerFn(gpus, req.model.ModelPath, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts)
|
||||
func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) {
|
||||
llama, err := s.newServerFn(gpus, req.model.ModelPath, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts)
|
||||
if err != nil {
|
||||
// some older models are not compatible with newer versions of llama.cpp
|
||||
// show a generalized compatibility error until there is a better way to
|
||||
@@ -417,16 +432,21 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
|
||||
slog.Debug("evaluating already loaded", "model", req.model.ModelPath)
|
||||
runner.refMu.Lock()
|
||||
defer runner.refMu.Unlock()
|
||||
// Ignore the NumGPU settings for comparison
|
||||
optsExisting := runner.Options.Runner
|
||||
optsExisting.NumGPU = -1
|
||||
optsNew := req.opts.Runner
|
||||
optsNew.NumGPU = -1
|
||||
|
||||
timeout := 10 * time.Second
|
||||
if runner.loading {
|
||||
timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems...
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(ctx, timeout) // BUG -
|
||||
|
||||
// Don't reload runner if num_gpu=-1 was provided
|
||||
optsExisting := runner.Options.Runner
|
||||
optsNew := req.opts.Runner
|
||||
if optsNew.NumGPU < 0 {
|
||||
optsExisting.NumGPU = -1
|
||||
optsNew.NumGPU = -1
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
if !reflect.DeepEqual(runner.adapters, req.model.AdapterPaths) || // have the adapters changed?
|
||||
!reflect.DeepEqual(runner.projectors, req.model.ProjectorPaths) || // have the projectors changed?
|
||||
@@ -434,6 +454,7 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
|
||||
runner.llama.Ping(ctx) != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -454,7 +475,7 @@ func (a ByDuration) Less(i, j int) bool {
|
||||
|
||||
// pickBestFitGPUs will try to find the optimal placement of the model in the available GPUs where the model fully fits
|
||||
// If the model can not be fit fully within the available GPU(s) nil is returned
|
||||
func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
|
||||
func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.GpuInfoList {
|
||||
var estimatedVRAM uint64
|
||||
for _, gl := range gpus.ByLibrary() {
|
||||
var ok bool
|
||||
@@ -466,7 +487,7 @@ func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
|
||||
|
||||
// First attempt to fit the model into a single GPU
|
||||
for _, g := range sgl {
|
||||
if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
||||
if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
||||
slog.Debug("new model will fit in available VRAM in single GPU, loading", "model", req.model.ModelPath, "gpu", g.ID, "available", g.FreeMemory, "required", format.HumanBytes2(estimatedVRAM))
|
||||
return []gpu.GpuInfo{g}
|
||||
}
|
||||
@@ -477,7 +498,7 @@ func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
|
||||
// - try subsets of GPUs instead of just falling back to 1 or all in a family
|
||||
|
||||
// Now try all the GPUs
|
||||
if ok, estimatedVRAM = llm.PredictServerFit(gl, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
||||
if ok, estimatedVRAM = llm.PredictServerFit(gl, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
||||
slog.Debug("new model will fit in available VRAM, loading", "model", req.model.ModelPath, "library", gl[0].Library, "required", format.HumanBytes2(estimatedVRAM))
|
||||
return gl
|
||||
}
|
||||
|
||||
@@ -47,6 +47,7 @@ func TestLoad(t *testing.T) {
|
||||
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
||||
defer done()
|
||||
s := InitScheduler(ctx)
|
||||
var ggml *llm.GGML // value not used in tests
|
||||
req := &LlmRequest{
|
||||
ctx: ctx,
|
||||
model: &Model{ModelPath: "foo"},
|
||||
@@ -59,7 +60,7 @@ func TestLoad(t *testing.T) {
|
||||
return nil, fmt.Errorf("something failed to load model blah")
|
||||
}
|
||||
gpus := gpu.GpuInfoList{}
|
||||
s.load(req, gpus)
|
||||
s.load(req, ggml, gpus)
|
||||
require.Len(t, req.successCh, 0)
|
||||
require.Len(t, req.errCh, 1)
|
||||
require.Len(t, s.loaded, 0)
|
||||
@@ -70,7 +71,7 @@ func TestLoad(t *testing.T) {
|
||||
s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error) {
|
||||
return server, nil
|
||||
}
|
||||
s.load(req, gpus)
|
||||
s.load(req, ggml, gpus)
|
||||
select {
|
||||
case err := <-req.errCh:
|
||||
require.NoError(t, err)
|
||||
@@ -82,7 +83,7 @@ func TestLoad(t *testing.T) {
|
||||
|
||||
req.model.ModelPath = "dummy_model_path"
|
||||
server.waitResp = fmt.Errorf("wait failure")
|
||||
s.load(req, gpus)
|
||||
s.load(req, ggml, gpus)
|
||||
select {
|
||||
case err := <-req.errCh:
|
||||
require.Contains(t, err.Error(), "wait failure")
|
||||
@@ -101,6 +102,7 @@ type bundle struct {
|
||||
ctxDone func()
|
||||
srv *mockLlm
|
||||
req *LlmRequest
|
||||
ggml *llm.GGML
|
||||
}
|
||||
|
||||
func (scenario *bundle) newServer(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error) {
|
||||
@@ -132,14 +134,15 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
|
||||
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
|
||||
})
|
||||
assert.Nil(t, err)
|
||||
|
||||
fname := f.Name()
|
||||
model := &Model{Name: modelName, ModelPath: fname}
|
||||
ggml, err := llm.LoadModel(model.ModelPath)
|
||||
scenario.ggml, err = llm.LoadModel(model.ModelPath)
|
||||
require.NoError(t, err)
|
||||
|
||||
scenario.req = &LlmRequest{
|
||||
ctx: scenario.ctx,
|
||||
model: model,
|
||||
ggml: ggml,
|
||||
sessionDuration: 5 * time.Millisecond,
|
||||
successCh: make(chan *runnerRef, 1),
|
||||
errCh: make(chan error, 1),
|
||||
@@ -157,13 +160,13 @@ func TestRequests(t *testing.T) {
|
||||
scenario1a.req.sessionDuration = 0
|
||||
scenario1b := newScenario(t, ctx, "ollama-model-1", 11)
|
||||
scenario1b.req.model = scenario1a.req.model
|
||||
scenario1b.req.ggml = scenario1a.req.ggml
|
||||
scenario1b.ggml = scenario1a.ggml
|
||||
scenario1b.req.sessionDuration = 0
|
||||
|
||||
// simple reload of same model
|
||||
scenario2a := newScenario(t, ctx, "ollama-model-1", 20)
|
||||
scenario2a.req.model = scenario1a.req.model
|
||||
scenario2a.req.ggml = scenario1a.req.ggml
|
||||
scenario2a.ggml = scenario1a.ggml
|
||||
|
||||
// Multiple loaded models
|
||||
scenario3a := newScenario(t, ctx, "ollama-model-3a", 1*format.GigaByte)
|
||||
@@ -322,13 +325,14 @@ func TestGetRunner(t *testing.T) {
|
||||
successCh1c, errCh1c := s.GetRunner(scenario1c.ctx, scenario1c.req.model, scenario1c.req.opts, scenario1c.req.sessionDuration)
|
||||
require.Len(t, s.pendingReqCh, 0)
|
||||
require.Len(t, successCh1c, 0)
|
||||
require.Len(t, errCh1c, 0)
|
||||
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
require.Len(t, s.loaded, 0)
|
||||
require.Len(t, errCh1c, 1)
|
||||
err = <-errCh1c
|
||||
require.Contains(t, err.Error(), "bad path")
|
||||
scenario1b.ctxDone()
|
||||
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
require.Len(t, s.loaded, 0)
|
||||
}
|
||||
|
||||
// TODO - add one scenario that triggers the bogus finished event with positive ref count
|
||||
@@ -366,7 +370,9 @@ func TestPrematureExpired(t *testing.T) {
|
||||
require.LessOrEqual(t, len(s.finishedReqCh), 1)
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
require.Len(t, s.finishedReqCh, 0)
|
||||
s.loadedMu.Lock()
|
||||
require.Len(t, s.loaded, 0)
|
||||
s.loadedMu.Unlock()
|
||||
|
||||
// also shouldn't happen in real life
|
||||
s.finishedReqCh <- scenario1a.req
|
||||
@@ -426,7 +432,6 @@ func TestUpdateFreeSpace(t *testing.T) {
|
||||
s.updateFreeSpace(gpus)
|
||||
require.Equal(t, uint64(850), gpus[0].FreeMemory)
|
||||
require.Equal(t, uint64(1850), gpus[1].FreeMemory)
|
||||
|
||||
}
|
||||
|
||||
func TestFindRunnerToUnload(t *testing.T) {
|
||||
@@ -485,6 +490,9 @@ func TestNeedsReload(t *testing.T) {
|
||||
require.False(t, resp)
|
||||
req.opts.NumGPU = 99
|
||||
resp = runner.needsReload(ctx, req)
|
||||
require.True(t, resp)
|
||||
req.opts.NumGPU = -1
|
||||
resp = runner.needsReload(ctx, req)
|
||||
require.False(t, resp)
|
||||
}
|
||||
|
||||
|
||||
@@ -686,10 +686,7 @@ func IsValidNamePart(kind PartKind, s string) bool {
|
||||
return false
|
||||
}
|
||||
var consecutiveDots int
|
||||
for i, c := range []byte(s) {
|
||||
if i == 0 && !isAlphaNumeric(c) {
|
||||
return false
|
||||
}
|
||||
for _, c := range []byte(s) {
|
||||
if c == '.' {
|
||||
if consecutiveDots++; consecutiveDots >= 2 {
|
||||
return false
|
||||
@@ -704,10 +701,6 @@ func IsValidNamePart(kind PartKind, s string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func isAlphaNumeric(c byte) bool {
|
||||
return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9'
|
||||
}
|
||||
|
||||
func isValidByteFor(kind PartKind, c byte) bool {
|
||||
if kind == PartNamespace && c == '.' {
|
||||
return false
|
||||
|
||||
@@ -101,8 +101,6 @@ var testNames = map[string]fields{
|
||||
"./../passwd": {},
|
||||
"./0+..": {},
|
||||
|
||||
"-h": {},
|
||||
|
||||
strings.Repeat("a", MaxNamePartLen): {model: strings.Repeat("a", MaxNamePartLen)},
|
||||
strings.Repeat("a", MaxNamePartLen+1): {},
|
||||
}
|
||||
@@ -119,7 +117,7 @@ func TestIsValidNameLen(t *testing.T) {
|
||||
// preventing path traversal.
|
||||
func TestNameConsecutiveDots(t *testing.T) {
|
||||
for i := 1; i < 10; i++ {
|
||||
s := "a" + strings.Repeat(".", i)
|
||||
s := strings.Repeat(".", i)
|
||||
if i > 1 {
|
||||
if g := ParseNameFill(s, FillNothing).DisplayLong(); g != "" {
|
||||
t.Errorf("ParseName(%q) = %q; want empty string", s, g)
|
||||
@@ -341,17 +339,17 @@ func TestDisplayShortest(t *testing.T) {
|
||||
want string
|
||||
wantPanic bool
|
||||
}{
|
||||
{"example.com/library/mistral:latest+Q4_0", "example.com/library/?:latest", "mistral", false},
|
||||
{"example.com/library/mistral:latest+Q4_0", "example.com/?/?:latest", "library/mistral", false},
|
||||
{"example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
|
||||
{"example.com/library/mistral:latest+Q4_0", "example.com/_/_:latest", "library/mistral", false},
|
||||
{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
|
||||
{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
|
||||
|
||||
// case-insensitive
|
||||
{"Example.com/library/mistral:latest+Q4_0", "example.com/library/?:latest", "mistral", false},
|
||||
{"example.com/Library/mistral:latest+Q4_0", "example.com/library/?:latest", "mistral", false},
|
||||
{"example.com/library/Mistral:latest+Q4_0", "example.com/library/?:latest", "Mistral", false},
|
||||
{"example.com/library/mistral:Latest+Q4_0", "example.com/library/?:latest", "mistral", false},
|
||||
{"example.com/library/mistral:Latest+q4_0", "example.com/library/?:latest", "mistral", false},
|
||||
{"Example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
|
||||
{"example.com/Library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
|
||||
{"example.com/library/Mistral:latest+Q4_0", "example.com/library/_:latest", "Mistral", false},
|
||||
{"example.com/library/mistral:Latest+Q4_0", "example.com/library/_:latest", "mistral", false},
|
||||
{"example.com/library/mistral:Latest+q4_0", "example.com/library/_:latest", "mistral", false},
|
||||
|
||||
// zero value
|
||||
{"", MaskDefault, "", true},
|
||||
@@ -363,10 +361,10 @@ func TestDisplayShortest(t *testing.T) {
|
||||
{"registry.ollama.ai/library/mistral:latest+Q4_0", MaskDefault, "mistral", false},
|
||||
|
||||
// Auto-Fill
|
||||
{"x", "example.com/library/?:latest", "x", false},
|
||||
{"x", "example.com/library/?:latest+Q4_0", "x", false},
|
||||
{"x/y:z", "a.com/library/?:latest+Q4_0", "x/y:z", false},
|
||||
{"x/y:z", "a.com/library/?:latest+Q4_0", "x/y:z", false},
|
||||
{"x", "example.com/library/_:latest", "x", false},
|
||||
{"x", "example.com/library/_:latest+Q4_0", "x", false},
|
||||
{"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false},
|
||||
{"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
@@ -697,10 +695,10 @@ func ExampleName_completeAndResolved() {
|
||||
func ExampleName_DisplayShortest() {
|
||||
name := ParseNameFill("example.com/jmorganca/mistral:latest+Q4_0", FillNothing)
|
||||
|
||||
fmt.Println(name.DisplayShortest("example.com/jmorganca/?:latest"))
|
||||
fmt.Println(name.DisplayShortest("example.com/?/?:latest"))
|
||||
fmt.Println(name.DisplayShortest("example.com/?/?:?"))
|
||||
fmt.Println(name.DisplayShortest("?/?/?:?"))
|
||||
fmt.Println(name.DisplayShortest("example.com/jmorganca/_:latest"))
|
||||
fmt.Println(name.DisplayShortest("example.com/_/_:latest"))
|
||||
fmt.Println(name.DisplayShortest("example.com/_/_:_"))
|
||||
fmt.Println(name.DisplayShortest("_/_/_:_"))
|
||||
|
||||
// Default
|
||||
name = ParseNameFill("registry.ollama.ai/library/mistral:latest+Q4_0", FillNothing)
|
||||
|
||||
Reference in New Issue
Block a user