Compare commits
21 Commits
v0.1.45-rc
...
v0.1.45-rc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e01e535cbb | ||
|
|
0195d6a2f8 | ||
|
|
8e0641a9bf | ||
|
|
662568d453 | ||
|
|
4ebb66c662 | ||
|
|
23e899f32d | ||
|
|
fedf71635e | ||
|
|
97c59be653 | ||
|
|
9d8a4988e8 | ||
|
|
1ae0750a21 | ||
|
|
9d91e5e587 | ||
|
|
96624aa412 | ||
|
|
10f33b8537 | ||
|
|
4a633cc295 | ||
|
|
d34d88e417 | ||
|
|
52ce350b7a | ||
|
|
2abebb2cbe | ||
|
|
380e06e5be | ||
|
|
badf975e45 | ||
|
|
755b4e4fc2 | ||
|
|
b2799f111b |
19
api/types.go
19
api/types.go
@@ -253,6 +253,7 @@ type ShowRequest struct {
|
||||
Model string `json:"model"`
|
||||
System string `json:"system"`
|
||||
Template string `json:"template"`
|
||||
Verbose bool `json:"verbose"`
|
||||
|
||||
Options map[string]interface{} `json:"options"`
|
||||
|
||||
@@ -262,14 +263,16 @@ type ShowRequest struct {
|
||||
|
||||
// ShowResponse is the response returned from [Client.Show].
|
||||
type ShowResponse struct {
|
||||
License string `json:"license,omitempty"`
|
||||
Modelfile string `json:"modelfile,omitempty"`
|
||||
Parameters string `json:"parameters,omitempty"`
|
||||
Template string `json:"template,omitempty"`
|
||||
System string `json:"system,omitempty"`
|
||||
Details ModelDetails `json:"details,omitempty"`
|
||||
Messages []Message `json:"messages,omitempty"`
|
||||
ModifiedAt time.Time `json:"modified_at,omitempty"`
|
||||
License string `json:"license,omitempty"`
|
||||
Modelfile string `json:"modelfile,omitempty"`
|
||||
Parameters string `json:"parameters,omitempty"`
|
||||
Template string `json:"template,omitempty"`
|
||||
System string `json:"system,omitempty"`
|
||||
Details ModelDetails `json:"details,omitempty"`
|
||||
Messages []Message `json:"messages,omitempty"`
|
||||
ModelInfo map[string]any `json:"model_info,omitempty"`
|
||||
ProjectorInfo map[string]any `json:"projector_info,omitempty"`
|
||||
ModifiedAt time.Time `json:"modified_at,omitempty"`
|
||||
}
|
||||
|
||||
// CopyRequest is the request passed to [Client.Copy].
|
||||
|
||||
@@ -5,6 +5,8 @@ import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
)
|
||||
@@ -24,6 +26,7 @@ func InitLogging() {
|
||||
logFile = os.Stderr
|
||||
// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
|
||||
} else {
|
||||
rotateLogs(AppLogFile)
|
||||
logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
|
||||
if err != nil {
|
||||
slog.Error(fmt.Sprintf("failed to create server log %v", err))
|
||||
@@ -46,3 +49,32 @@ func InitLogging() {
|
||||
|
||||
slog.Info("ollama app started")
|
||||
}
|
||||
|
||||
func rotateLogs(logFile string) {
|
||||
if _, err := os.Stat(logFile); os.IsNotExist(err) {
|
||||
return
|
||||
}
|
||||
index := strings.LastIndex(logFile, ".")
|
||||
pre := logFile[:index]
|
||||
post := "." + logFile[index+1:]
|
||||
for i := LogRotationCount; i > 0; i-- {
|
||||
older := pre + "-" + strconv.Itoa(i) + post
|
||||
newer := pre + "-" + strconv.Itoa(i-1) + post
|
||||
if i == 1 {
|
||||
newer = pre + post
|
||||
}
|
||||
if _, err := os.Stat(newer); err == nil {
|
||||
if _, err := os.Stat(older); err == nil {
|
||||
err := os.Remove(older)
|
||||
if err != nil {
|
||||
slog.Warn("Failed to remove older log", "older", older, "error", err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
err := os.Rename(newer, older)
|
||||
if err != nil {
|
||||
slog.Warn("Failed to rotate log", "older", older, "newer", newer, "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
44
app/lifecycle/logging_test.go
Normal file
44
app/lifecycle/logging_test.go
Normal file
@@ -0,0 +1,44 @@
|
||||
package lifecycle
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestRotateLogs(t *testing.T) {
|
||||
logDir := t.TempDir()
|
||||
logFile := filepath.Join(logDir, "testlog.log")
|
||||
|
||||
// No log exists
|
||||
rotateLogs(logFile)
|
||||
|
||||
require.NoError(t, os.WriteFile(logFile, []byte("1"), 0644))
|
||||
assert.FileExists(t, logFile)
|
||||
// First rotation
|
||||
rotateLogs(logFile)
|
||||
assert.FileExists(t, filepath.Join(logDir, "testlog-1.log"))
|
||||
assert.NoFileExists(t, filepath.Join(logDir, "testlog-2.log"))
|
||||
assert.NoFileExists(t, logFile)
|
||||
|
||||
// Should be a no-op without a new log
|
||||
rotateLogs(logFile)
|
||||
assert.FileExists(t, filepath.Join(logDir, "testlog-1.log"))
|
||||
assert.NoFileExists(t, filepath.Join(logDir, "testlog-2.log"))
|
||||
assert.NoFileExists(t, logFile)
|
||||
|
||||
for i := 2; i <= LogRotationCount+1; i++ {
|
||||
require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0644))
|
||||
assert.FileExists(t, logFile)
|
||||
rotateLogs(logFile)
|
||||
assert.NoFileExists(t, logFile)
|
||||
for j := 1; j < i; j++ {
|
||||
assert.FileExists(t, filepath.Join(logDir, "testlog-"+strconv.Itoa(j)+".log"))
|
||||
}
|
||||
assert.NoFileExists(t, filepath.Join(logDir, "testlog-"+strconv.Itoa(i+1)+".log"))
|
||||
}
|
||||
}
|
||||
@@ -16,11 +16,12 @@ var (
|
||||
AppDir = "/opt/Ollama"
|
||||
AppDataDir = "/opt/Ollama"
|
||||
// TODO - should there be a distinct log dir?
|
||||
UpdateStageDir = "/tmp"
|
||||
AppLogFile = "/tmp/ollama_app.log"
|
||||
ServerLogFile = "/tmp/ollama.log"
|
||||
UpgradeLogFile = "/tmp/ollama_update.log"
|
||||
Installer = "OllamaSetup.exe"
|
||||
UpdateStageDir = "/tmp"
|
||||
AppLogFile = "/tmp/ollama_app.log"
|
||||
ServerLogFile = "/tmp/ollama.log"
|
||||
UpgradeLogFile = "/tmp/ollama_update.log"
|
||||
Installer = "OllamaSetup.exe"
|
||||
LogRotationCount = 5
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
||||
@@ -54,7 +54,7 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
|
||||
return nil, fmt.Errorf("failed to spawn server stderr pipe: %w", err)
|
||||
}
|
||||
|
||||
// TODO - rotation
|
||||
rotateLogs(ServerLogFile)
|
||||
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create server log: %w", err)
|
||||
|
||||
@@ -88,10 +88,15 @@ DialogFontSize=12
|
||||
[Files]
|
||||
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
|
||||
Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
|
||||
Source: "..\dist\windows-{#ARCH}\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
|
||||
Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
|
||||
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
|
||||
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
|
||||
#if DirExists("..\dist\windows-amd64\cuda")
|
||||
Source: "..\dist\windows-amd64\cuda\*"; DestDir: "{app}\cuda\"; Flags: ignoreversion recursesubdirs
|
||||
#endif
|
||||
#if DirExists("..\dist\windows-amd64\oneapi")
|
||||
Source: "..\dist\windows-amd64\oneapi\*"; DestDir: "{app}\oneapi\"; Flags: ignoreversion recursesubdirs
|
||||
#endif
|
||||
#if DirExists("..\dist\windows-amd64\rocm")
|
||||
Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
|
||||
#endif
|
||||
|
||||
143
cmd/cmd.go
143
cmd/cmd.go
@@ -579,10 +579,6 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(args) != 1 {
|
||||
return errors.New("missing model name")
|
||||
}
|
||||
|
||||
license, errLicense := cmd.Flags().GetBool("license")
|
||||
modelfile, errModelfile := cmd.Flags().GetBool("modelfile")
|
||||
parameters, errParams := cmd.Flags().GetBool("parameters")
|
||||
@@ -625,8 +621,29 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
|
||||
|
||||
if flagsSet > 1 {
|
||||
return errors.New("only one of '--license', '--modelfile', '--parameters', '--system', or '--template' can be specified")
|
||||
} else if flagsSet == 0 {
|
||||
return errors.New("one of '--license', '--modelfile', '--parameters', '--system', or '--template' must be specified")
|
||||
}
|
||||
|
||||
if flagsSet == 1 {
|
||||
req := api.ShowRequest{Name: args[0]}
|
||||
resp, err := client.Show(cmd.Context(), &req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch showType {
|
||||
case "license":
|
||||
fmt.Println(resp.License)
|
||||
case "modelfile":
|
||||
fmt.Println(resp.Modelfile)
|
||||
case "parameters":
|
||||
fmt.Println(resp.Parameters)
|
||||
case "system":
|
||||
fmt.Println(resp.System)
|
||||
case "template":
|
||||
fmt.Println(resp.Template)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
req := api.ShowRequest{Name: args[0]}
|
||||
@@ -635,22 +652,114 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
switch showType {
|
||||
case "license":
|
||||
fmt.Println(resp.License)
|
||||
case "modelfile":
|
||||
fmt.Println(resp.Modelfile)
|
||||
case "parameters":
|
||||
fmt.Println(resp.Parameters)
|
||||
case "system":
|
||||
fmt.Println(resp.System)
|
||||
case "template":
|
||||
fmt.Println(resp.Template)
|
||||
arch := resp.ModelInfo["general.architecture"].(string)
|
||||
|
||||
modelData := [][]string{
|
||||
{"arch", arch},
|
||||
{"parameters", resp.Details.ParameterSize},
|
||||
{"quantization", resp.Details.QuantizationLevel},
|
||||
{"context length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)].(float64))},
|
||||
{"embedding length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.embedding_length", arch)].(float64))},
|
||||
}
|
||||
|
||||
mainTableData := [][]string{
|
||||
{"Model"},
|
||||
{renderSubTable(modelData, false)},
|
||||
}
|
||||
|
||||
if resp.ProjectorInfo != nil {
|
||||
projectorData := [][]string{
|
||||
{"arch", "clip"},
|
||||
{"parameters", format.HumanNumber(uint64(resp.ProjectorInfo["general.parameter_count"].(float64)))},
|
||||
{"projector type", resp.ProjectorInfo["clip.projector_type"].(string)},
|
||||
{"embedding length", fmt.Sprintf("%v", resp.ProjectorInfo["clip.vision.embedding_length"].(float64))},
|
||||
{"projection dimensionality", fmt.Sprintf("%v", resp.ProjectorInfo["clip.vision.projection_dim"].(float64))},
|
||||
}
|
||||
|
||||
mainTableData = append(mainTableData,
|
||||
[]string{"Projector"},
|
||||
[]string{renderSubTable(projectorData, false)},
|
||||
)
|
||||
}
|
||||
|
||||
if resp.Parameters != "" {
|
||||
mainTableData = append(mainTableData, []string{"Parameters"}, []string{formatParams(resp.Parameters)})
|
||||
}
|
||||
|
||||
if resp.System != "" {
|
||||
mainTableData = append(mainTableData, []string{"System"}, []string{renderSubTable(twoLines(resp.System), true)})
|
||||
}
|
||||
|
||||
if resp.License != "" {
|
||||
mainTableData = append(mainTableData, []string{"License"}, []string{renderSubTable(twoLines(resp.License), true)})
|
||||
}
|
||||
|
||||
table := tablewriter.NewWriter(os.Stdout)
|
||||
table.SetAutoWrapText(false)
|
||||
table.SetBorder(false)
|
||||
table.SetAlignment(tablewriter.ALIGN_LEFT)
|
||||
|
||||
for _, v := range mainTableData {
|
||||
table.Append(v)
|
||||
}
|
||||
|
||||
table.Render()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func renderSubTable(data [][]string, file bool) string {
|
||||
var buf bytes.Buffer
|
||||
table := tablewriter.NewWriter(&buf)
|
||||
table.SetAutoWrapText(!file)
|
||||
table.SetBorder(false)
|
||||
table.SetNoWhiteSpace(true)
|
||||
table.SetTablePadding("\t")
|
||||
table.SetAlignment(tablewriter.ALIGN_LEFT)
|
||||
|
||||
for _, v := range data {
|
||||
table.Append(v)
|
||||
}
|
||||
|
||||
table.Render()
|
||||
|
||||
renderedTable := buf.String()
|
||||
lines := strings.Split(renderedTable, "\n")
|
||||
for i, line := range lines {
|
||||
lines[i] = "\t" + line
|
||||
}
|
||||
|
||||
return strings.Join(lines, "\n")
|
||||
}
|
||||
|
||||
func twoLines(s string) [][]string {
|
||||
lines := strings.Split(s, "\n")
|
||||
res := [][]string{}
|
||||
|
||||
count := 0
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if line != "" {
|
||||
count++
|
||||
res = append(res, []string{line})
|
||||
if count == 2 {
|
||||
return res
|
||||
}
|
||||
}
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func formatParams(s string) string {
|
||||
lines := strings.Split(s, "\n")
|
||||
table := [][]string{}
|
||||
|
||||
for _, line := range lines {
|
||||
table = append(table, strings.Fields(line))
|
||||
}
|
||||
return renderSubTable(table, false)
|
||||
}
|
||||
|
||||
func CopyHandler(cmd *cobra.Command, args []string) error {
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
|
||||
37
docs/api.md
37
docs/api.md
@@ -777,11 +777,12 @@ A single JSON object will be returned.
|
||||
POST /api/show
|
||||
```
|
||||
|
||||
Show information about a model including details, modelfile, template, parameters, license, and system prompt.
|
||||
Show information about a model including details, modelfile, template, parameters, license, system prompt.
|
||||
|
||||
### Parameters
|
||||
|
||||
- `name`: name of the model to show
|
||||
- `verbose`: (optional) if set to `true`, returns full data for verbose response fields
|
||||
|
||||
### Examples
|
||||
|
||||
@@ -798,14 +799,40 @@ curl http://localhost:11434/api/show -d '{
|
||||
```json
|
||||
{
|
||||
"modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM /Users/matt/.ollama/models/blobs/sha256:200765e1283640ffbd013184bf496e261032fa75b99498a9613be4e94d63ad52\nTEMPLATE \"\"\"{{ .System }}\nUSER: {{ .Prompt }}\nASSISTANT: \"\"\"\nPARAMETER num_ctx 4096\nPARAMETER stop \"\u003c/s\u003e\"\nPARAMETER stop \"USER:\"\nPARAMETER stop \"ASSISTANT:\"",
|
||||
"parameters": "num_ctx 4096\nstop \u003c/s\u003e\nstop USER:\nstop ASSISTANT:",
|
||||
"template": "{{ .System }}\nUSER: {{ .Prompt }}\nASSISTANT: ",
|
||||
"parameters": "num_keep 24\nstop \"<|start_header_id|>\"\nstop \"<|end_header_id|>\"\nstop \"<|eot_id|>\"",
|
||||
"template": "{{ if .System }}<|start_header_id|>system<|end_header_id|>\n\n{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>\n\n{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>\n\n{{ .Response }}<|eot_id|>",
|
||||
"details": {
|
||||
"parent_model": "",
|
||||
"format": "gguf",
|
||||
"family": "llama",
|
||||
"families": ["llama", "clip"],
|
||||
"parameter_size": "7B",
|
||||
"families": [
|
||||
"llama"
|
||||
],
|
||||
"parameter_size": "8.0B",
|
||||
"quantization_level": "Q4_0"
|
||||
},
|
||||
"model_info": {
|
||||
"general.architecture": "llama",
|
||||
"general.file_type": 2,
|
||||
"general.parameter_count": 8030261248,
|
||||
"general.quantization_version": 2,
|
||||
"llama.attention.head_count": 32,
|
||||
"llama.attention.head_count_kv": 8,
|
||||
"llama.attention.layer_norm_rms_epsilon": 0.00001,
|
||||
"llama.block_count": 32,
|
||||
"llama.context_length": 8192,
|
||||
"llama.embedding_length": 4096,
|
||||
"llama.feed_forward_length": 14336,
|
||||
"llama.rope.dimension_count": 128,
|
||||
"llama.rope.freq_base": 500000,
|
||||
"llama.vocab_size": 128256,
|
||||
"tokenizer.ggml.bos_token_id": 128000,
|
||||
"tokenizer.ggml.eos_token_id": 128009,
|
||||
"tokenizer.ggml.merges": [], // populates if `verbose=true`
|
||||
"tokenizer.ggml.model": "gpt2",
|
||||
"tokenizer.ggml.pre": "llama-bpe",
|
||||
"tokenizer.ggml.token_type": [], // populates if `verbose=true`
|
||||
"tokenizer.ggml.tokens": [] // populates if `verbose=true`
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -22,7 +22,7 @@ docker logs <container-name>
|
||||
If manually running `ollama serve` in a terminal, the logs will be on that terminal.
|
||||
|
||||
When you run Ollama on **Windows**, there are a few different locations. You can view them in the explorer window by hitting `<cmd>+R` and type in:
|
||||
- `explorer %LOCALAPPDATA%\Ollama` to view logs
|
||||
- `explorer %LOCALAPPDATA%\Ollama` to view logs. The most recent server logs will be in `server.log` and older logs will be in `server-#.log`
|
||||
- `explorer %LOCALAPPDATA%\Programs\Ollama` to browse the binaries (The installer adds this to your user PATH)
|
||||
- `explorer %HOMEPATH%\.ollama` to browse where models and configuration is stored
|
||||
- `explorer %TEMP%` where temporary executable files are stored in one or more `ollama*` directories
|
||||
|
||||
@@ -39,8 +39,8 @@ server.
|
||||
Ollama on Windows stores files in a few different locations. You can view them in
|
||||
the explorer window by hitting `<cmd>+R` and type in:
|
||||
- `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
|
||||
- *app.log* contains logs from the GUI application
|
||||
- *server.log* contains the server logs
|
||||
- *app.log* contains most resent logs from the GUI application
|
||||
- *server.log* contains the most recent server logs
|
||||
- *upgrade.log* contains log output for upgrades
|
||||
- `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH)
|
||||
- `explorer %HOMEPATH%\.ollama` contains models and configuration
|
||||
|
||||
@@ -77,20 +77,27 @@ func cleanupTmpDirs() {
|
||||
continue
|
||||
}
|
||||
raw, err := os.ReadFile(filepath.Join(d, "ollama.pid"))
|
||||
if err == nil {
|
||||
pid, err := strconv.Atoi(string(raw))
|
||||
if err == nil {
|
||||
if proc, err := os.FindProcess(pid); err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
|
||||
// Another running ollama, ignore this tmpdir
|
||||
continue
|
||||
}
|
||||
}
|
||||
} else {
|
||||
slog.Debug("failed to open ollama.pid", "path", d, "error", err)
|
||||
}
|
||||
err = os.RemoveAll(d)
|
||||
if err != nil {
|
||||
slog.Debug("unable to cleanup stale tmpdir", "path", d, "error", err)
|
||||
slog.Warn("failed to read ollama.pid", "path", d, "error", err)
|
||||
// No pid, ignore this tmpdir
|
||||
continue
|
||||
}
|
||||
|
||||
pid, err := strconv.Atoi(string(raw))
|
||||
if err != nil {
|
||||
slog.Warn("failed to parse pid", "path", d, "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
proc, err := os.FindProcess(pid)
|
||||
if err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
|
||||
slog.Warn("found running ollama", "pid", pid, "path", d)
|
||||
// Another running ollama, ignore this tmpdir
|
||||
continue
|
||||
}
|
||||
|
||||
if err := os.Remove(d); err != nil {
|
||||
slog.Warn("unable to cleanup stale tmpdir", "path", d, "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
10
gpu/gpu.go
10
gpu/gpu.go
@@ -231,7 +231,7 @@ func GetGPUInfo() GpuInfoList {
|
||||
// On windows we bundle the nvidia library one level above the runner dir
|
||||
depPath := ""
|
||||
if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
|
||||
depPath = filepath.Dir(envconfig.RunnersDir)
|
||||
depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir), "cuda")
|
||||
}
|
||||
|
||||
// Load ALL libraries
|
||||
@@ -282,6 +282,12 @@ func GetGPUInfo() GpuInfoList {
|
||||
// Intel
|
||||
if envconfig.IntelGpu {
|
||||
oHandles = initOneAPIHandles()
|
||||
// On windows we bundle the oneapi library one level above the runner dir
|
||||
depPath = ""
|
||||
if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
|
||||
depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir), "oneapi")
|
||||
}
|
||||
|
||||
for d := range oHandles.oneapi.num_drivers {
|
||||
if oHandles.oneapi == nil {
|
||||
// shouldn't happen
|
||||
@@ -306,7 +312,7 @@ func GetGPUInfo() GpuInfoList {
|
||||
gpuInfo.FreeMemory = uint64(memInfo.free)
|
||||
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||
// TODO dependency path?
|
||||
gpuInfo.DependencyPath = depPath
|
||||
oneapiGPUs = append(oneapiGPUs, gpuInfo)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
|
||||
|
||||
for (i = 0; l[i].s != NULL; i++) {
|
||||
*l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
|
||||
if (!l[i].p) {
|
||||
if (!*(l[i].p)) {
|
||||
char *msg = LOAD_ERR();
|
||||
LOG(resp->ch.verbose, "dlerr: %s\n", msg);
|
||||
UNLOAD_LIBRARY(resp->ch.handle);
|
||||
|
||||
@@ -43,7 +43,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
|
||||
|
||||
for (i = 0; l[i].s != NULL; i++) {
|
||||
*l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
|
||||
if (!*l[i].p) {
|
||||
if (!*(l[i].p)) {
|
||||
char *msg = LOAD_ERR();
|
||||
LOG(resp->ch.verbose, "dlerr: %s\n", msg);
|
||||
UNLOAD_LIBRARY(resp->ch.handle);
|
||||
|
||||
@@ -42,7 +42,7 @@ void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp) {
|
||||
// LOG(resp->ch.verbose, "dlsym: %s\n", l[i].s);
|
||||
|
||||
*l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
|
||||
if (!l[i].p) {
|
||||
if (!*(l[i].p)) {
|
||||
resp->ch.handle = NULL;
|
||||
char *msg = LOAD_ERR();
|
||||
LOG(resp->ch.verbose, "dlerr: %s\n", msg);
|
||||
|
||||
@@ -50,7 +50,7 @@ void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp) {
|
||||
LOG(resp->oh.verbose, "dlsym: %s\n", l[i].s);
|
||||
|
||||
*l[i].p = LOAD_SYMBOL(resp->oh.handle, l[i].s);
|
||||
if (!l[i].p) {
|
||||
if (!*(l[i].p)) {
|
||||
resp->oh.handle = NULL;
|
||||
char *msg = LOAD_ERR();
|
||||
LOG(resp->oh.verbose, "dlerr: %s\n", msg);
|
||||
|
||||
17
llm/ext_server/server.cpp
vendored
17
llm/ext_server/server.cpp
vendored
@@ -56,7 +56,6 @@ struct server_params {
|
||||
std::string hostname = "127.0.0.1";
|
||||
std::vector<std::string> api_keys;
|
||||
std::string public_path = "examples/server/public";
|
||||
std::string chat_template = "";
|
||||
int32_t port = 8080;
|
||||
int32_t read_timeout = 600;
|
||||
int32_t write_timeout = 600;
|
||||
@@ -427,16 +426,6 @@ struct llama_server_context
|
||||
return true;
|
||||
}
|
||||
|
||||
void validate_model_chat_template(server_params & sparams) {
|
||||
llama_chat_message chat[] = {{"user", "test"}};
|
||||
std::vector<char> buf(1);
|
||||
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
|
||||
if (res < 0) {
|
||||
LOG_ERROR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {});
|
||||
sparams.chat_template = "chatml";
|
||||
}
|
||||
}
|
||||
|
||||
void initialize() {
|
||||
// create slots
|
||||
all_slots_are_idle = true;
|
||||
@@ -2535,7 +2524,6 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
sparams.chat_template = argv[i];
|
||||
}
|
||||
else if (arg == "--override-kv")
|
||||
{
|
||||
@@ -3008,11 +2996,6 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
const auto model_meta = llama.model_meta();
|
||||
|
||||
if (sparams.chat_template.empty()) { // custom chat template is not supplied
|
||||
// check if the template comes with the model is supported by us
|
||||
llama.validate_model_chat_template(sparams);
|
||||
}
|
||||
|
||||
// Middleware for API key validation
|
||||
auto validate_api_key = [&sparams](const httplib::Request &req, httplib::Response &res) -> bool {
|
||||
// If API key is not set, skip validation
|
||||
|
||||
@@ -295,10 +295,12 @@ function build_cuda() {
|
||||
sign
|
||||
install
|
||||
|
||||
write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||
cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||
cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||
cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
|
||||
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" -ea 0 > $null
|
||||
write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
|
||||
cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
|
||||
cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
|
||||
cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
|
||||
} else {
|
||||
write-host "Skipping CUDA generation step"
|
||||
}
|
||||
@@ -332,16 +334,18 @@ function build_oneapi() {
|
||||
sign
|
||||
install
|
||||
|
||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:distDir}"
|
||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:distDir}"
|
||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:distDir}"
|
||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:distDir}"
|
||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:distDir}"
|
||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:distDir}"
|
||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:distDir}"
|
||||
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:distDir}"
|
||||
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:distDir}"
|
||||
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:distDir}"
|
||||
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
||||
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" -ea 0 > $null
|
||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
||||
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
||||
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
||||
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
||||
} else {
|
||||
Write-Host "Skipping oneAPI generation step"
|
||||
}
|
||||
|
||||
40
llm/ggml.go
40
llm/ggml.go
@@ -69,6 +69,30 @@ func (kv KV) HeadCountKV() uint64 {
|
||||
return 1
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingHeadCount() uint64 {
|
||||
if heads := kv.HeadCount(); heads > 0 {
|
||||
return kv.EmbeddingLength() / kv.HeadCount()
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingHeadCountK() uint64 {
|
||||
if k := kv.u64(fmt.Sprintf("%s.attention.key_length", kv.Architecture())); k > 0 {
|
||||
return k
|
||||
}
|
||||
|
||||
return kv.EmbeddingHeadCount()
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingHeadCountV() uint64 {
|
||||
if v := kv.u64(fmt.Sprintf("%s.attention.value_length", kv.Architecture())); v > 0 {
|
||||
return v
|
||||
}
|
||||
|
||||
return kv.EmbeddingHeadCount()
|
||||
}
|
||||
|
||||
func (kv KV) GQA() uint64 {
|
||||
return kv.HeadCount() / kv.HeadCountKV()
|
||||
}
|
||||
@@ -299,6 +323,9 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
||||
headsKV := llm.KV().HeadCountKV()
|
||||
vocab := uint64(len(llm.KV()["tokenizer.ggml.tokens"].([]any)))
|
||||
|
||||
embeddingHeads := llm.KV().EmbeddingHeadCount()
|
||||
embeddingHeadsK := llm.KV().EmbeddingHeadCountK()
|
||||
|
||||
layers := llm.Tensors().Layers()
|
||||
|
||||
switch llm.KV().Architecture() {
|
||||
@@ -308,7 +335,7 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
||||
partialOffload = 4 * batch * embedding
|
||||
partialOffload += max(
|
||||
// 4*batch*(4+6*embedding+context*(2*heads)+llm.KV().GQA()),
|
||||
4*batch*(1+embedding+max(context, embedding))+embedding*embedding*9/16+4*context*(batch*heads+embedding/heads*headsKV),
|
||||
4*batch*(1+embedding+max(context, embedding))+embedding*embedding*9/16+4*context*(batch*heads+embeddingHeads*headsKV),
|
||||
4*batch*(embedding+vocab)+embedding*vocab*105/128,
|
||||
)
|
||||
|
||||
@@ -316,15 +343,15 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
||||
// mixtral 8x22b
|
||||
ff := uint64(llm.KV()["llama.feed_forward_length"].(uint32))
|
||||
partialOffload = max(
|
||||
3*ffnGateExpsWeight.Size()+4*batch*(2*ff+headsKV+embedding+context+embedding/heads*headsKV),
|
||||
4*(context*batch*heads+context*embedding/heads*headsKV+batch*1024+embedding/heads*headsKV*batch),
|
||||
3*ffnGateExpsWeight.Size()+4*batch*(2*ff+headsKV+embedding+context+embeddingHeads*headsKV),
|
||||
4*(context*batch*heads+context*embeddingHeads*headsKV+batch*1024+embeddingHeads*headsKV*batch),
|
||||
)
|
||||
} else if ffnGateWeight, ok := layers["blk.0"]["ffn_gate.0.weight"]; ok {
|
||||
// mixtral 8x7b
|
||||
ffnGateWeight1 := ffnGateWeight.Shape[1]
|
||||
fullOffload = 4 * batch * (2 + 3*embedding + context*(1+heads) + 2*headsKV + ffnGateWeight1)
|
||||
partialOffload = max(
|
||||
4*batch*(3+embedding/heads*headsKV+embedding+context*(1+heads)+ffnGateWeight1)+(embedding*embedding+3*embedding*headsKV*ffnGateWeight1)*9/16,
|
||||
4*batch*(3+embeddingHeads*headsKV+embedding+context*(1+heads)+ffnGateWeight1)+(embedding*embedding+3*embedding*headsKV*ffnGateWeight1)*9/16,
|
||||
4*batch*(1+2*embedding+context*(1+heads))+embedding*(6*context*headsKV/heads+embedding*9/16),
|
||||
)
|
||||
}
|
||||
@@ -368,15 +395,14 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
||||
fullOffload,
|
||||
)
|
||||
case "deepseek2":
|
||||
keys := uint64(llm.KV()["deepseek2.attention.key_length"].(uint32))
|
||||
fullOffload = max(
|
||||
4*batch*(3*embedding+vocab),
|
||||
4*batch*(3*embedding+2+context*(1+headsKV)+2*keys*headsKV),
|
||||
4*batch*(3*embedding+2+context*(1+headsKV)+2*embeddingHeadsK*headsKV),
|
||||
)
|
||||
|
||||
partialOffload = max(
|
||||
4*batch*(3*embedding+vocab)+embedding*vocab*105/128,
|
||||
4*batch*(2*embedding+1+2*keys*headsKV+context+context*headsKV)+4*keys*context*headsKV+embedding*keys*headsKV*9/16,
|
||||
4*batch*(2*embedding+1+2*embeddingHeadsK*headsKV+context+context*headsKV)+4*embeddingHeadsK*context*headsKV+embedding*embeddingHeadsK*headsKV*9/16,
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -115,8 +115,8 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||
slog.Warn("model missing blk.0 layer size")
|
||||
}
|
||||
|
||||
// fp16 k,v = (1 (k) + 1 (v)) * sizeof(float16) * n_ctx * n_layer * n_embd / n_head * n_head_kv
|
||||
var kv uint64 = 2 * 2 * uint64(opts.NumCtx) * ggml.KV().BlockCount() * ggml.KV().EmbeddingLength() / ggml.KV().HeadCount() * ggml.KV().HeadCountKV()
|
||||
// fp16 k,v = sizeof(float16) * n_ctx * n_layer * (n_embd_head_k + n_embd_head_v) * n_head_kv
|
||||
var kv uint64 = 2 * uint64(opts.NumCtx) * ggml.KV().BlockCount() * (ggml.KV().EmbeddingHeadCountK() + ggml.KV().EmbeddingHeadCountV()) * ggml.KV().HeadCountKV()
|
||||
|
||||
// KV is proportional to the number of layers
|
||||
layerSize += kv / ggml.KV().BlockCount()
|
||||
|
||||
@@ -58,7 +58,7 @@ func availableServers() map[string]string {
|
||||
}
|
||||
|
||||
// glob payloadsDir for files that start with ollama_
|
||||
pattern := filepath.Join(payloadsDir, "*")
|
||||
pattern := filepath.Join(payloadsDir, "*", "ollama_*")
|
||||
|
||||
files, err := filepath.Glob(pattern)
|
||||
if err != nil {
|
||||
@@ -69,7 +69,7 @@ func availableServers() map[string]string {
|
||||
servers := make(map[string]string)
|
||||
for _, file := range files {
|
||||
slog.Debug("availableServers : found", "file", file)
|
||||
servers[filepath.Base(file)] = file
|
||||
servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file)
|
||||
}
|
||||
|
||||
return servers
|
||||
|
||||
@@ -274,8 +274,8 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
if runtime.GOOS == "windows" {
|
||||
pathEnv = "PATH"
|
||||
}
|
||||
// prepend the server directory to LD_LIBRARY_PATH/PATH
|
||||
libraryPaths := []string{dir}
|
||||
// prepend the server directory to LD_LIBRARY_PATH/PATH and the parent dir for common dependencies
|
||||
libraryPaths := []string{dir, filepath.Dir(dir)}
|
||||
|
||||
if libraryPath, ok := os.LookupEnv(pathEnv); ok {
|
||||
// Append our runner directory to the path
|
||||
|
||||
@@ -103,19 +103,19 @@ function buildApp() {
|
||||
function gatherDependencies() {
|
||||
write-host "Gathering runtime dependencies"
|
||||
cd "${script:SRC_DIR}"
|
||||
md "${script:DEPS_DIR}" -ea 0 > $null
|
||||
md "${script:DEPS_DIR}\ollama_runners" -ea 0 > $null
|
||||
|
||||
# TODO - this varies based on host build system and MSVC version - drive from dumpbin output
|
||||
# currently works for Win11 + MSVC 2019 + Cuda V11
|
||||
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\msvcp140.dll" "${script:DEPS_DIR}\"
|
||||
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\"
|
||||
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\"
|
||||
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\msvcp140.dll" "${script:DEPS_DIR}\ollama_runners\"
|
||||
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\ollama_runners\"
|
||||
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\ollama_runners\"
|
||||
|
||||
|
||||
cp "${script:SRC_DIR}\app\ollama_welcome.ps1" "${script:SRC_DIR}\dist\"
|
||||
if ("${env:KEY_CONTAINER}") {
|
||||
write-host "about to sign"
|
||||
foreach ($file in (get-childitem "${script:DEPS_DIR}/cu*.dll") + @("${script:SRC_DIR}\dist\ollama_welcome.ps1")){
|
||||
foreach ($file in (get-childitem "${script:DEPS_DIR}\cuda\cu*.dll") + @("${script:SRC_DIR}\dist\ollama_welcome.ps1")){
|
||||
write-host "signing $file"
|
||||
& "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" `
|
||||
/csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} $file
|
||||
|
||||
@@ -734,9 +734,44 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
||||
fmt.Fprint(&sb, m.String())
|
||||
resp.Modelfile = sb.String()
|
||||
|
||||
kvData, err := getKVData(m.ModelPath, req.Verbose)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
delete(kvData, "general.name")
|
||||
delete(kvData, "tokenizer.chat_template")
|
||||
resp.ModelInfo = kvData
|
||||
|
||||
if len(m.ProjectorPaths) > 0 {
|
||||
projectorData, err := getKVData(m.ProjectorPaths[0], req.Verbose)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp.ProjectorInfo = projectorData
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func getKVData(digest string, verbose bool) (llm.KV, error) {
|
||||
kvData, err := llm.LoadModel(digest)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
kv := kvData.KV()
|
||||
|
||||
if !verbose {
|
||||
for k := range kv {
|
||||
if t, ok := kv[k].([]any); len(t) > 5 && ok {
|
||||
kv[k] = []any{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return kv, nil
|
||||
}
|
||||
|
||||
func (s *Server) ListModelsHandler(c *gin.Context) {
|
||||
ms, err := Manifests()
|
||||
if err != nil {
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
"github.com/ollama/ollama/version"
|
||||
@@ -212,6 +213,7 @@ func Test_Routes(t *testing.T) {
|
||||
"top_p 0.9",
|
||||
}
|
||||
assert.Equal(t, expectedParams, params)
|
||||
assert.InDelta(t, 0, showResp.ModelInfo["general.parameter_count"], 1e-9, "Parameter count should be 0")
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -325,3 +327,40 @@ func TestCase(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestShow(t *testing.T) {
|
||||
t.Setenv("OLLAMA_MODELS", t.TempDir())
|
||||
envconfig.LoadConfig()
|
||||
|
||||
var s Server
|
||||
|
||||
createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
Name: "show-model",
|
||||
Modelfile: fmt.Sprintf(
|
||||
"FROM %s\nFROM %s",
|
||||
createBinFile(t, llm.KV{"general.architecture": "test"}, nil),
|
||||
createBinFile(t, llm.KV{"general.architecture": "clip"}, nil),
|
||||
),
|
||||
})
|
||||
|
||||
w := createRequest(t, s.ShowModelHandler, api.ShowRequest{
|
||||
Name: "show-model",
|
||||
})
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status code 200, actual %d", w.Code)
|
||||
}
|
||||
|
||||
var resp api.ShowResponse
|
||||
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if resp.ModelInfo["general.architecture"] != "test" {
|
||||
t.Fatal("Expected model architecture to be 'test', but got", resp.ModelInfo["general.architecture"])
|
||||
}
|
||||
|
||||
if resp.ProjectorInfo["general.architecture"] != "clip" {
|
||||
t.Fatal("Expected projector architecture to be 'clip', but got", resp.ProjectorInfo["general.architecture"])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ package model
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
@@ -371,57 +370,3 @@ func cutPromised(s, sep string) (before, after string, ok bool) {
|
||||
}
|
||||
return cmp.Or(before, MissingPart), cmp.Or(after, MissingPart), true
|
||||
}
|
||||
|
||||
type DigestType byte
|
||||
|
||||
const (
|
||||
DigestTypeInvalid DigestType = iota
|
||||
DigestTypeSHA256
|
||||
)
|
||||
|
||||
func (t DigestType) String() string {
|
||||
switch t {
|
||||
case DigestTypeSHA256:
|
||||
return "sha256"
|
||||
default:
|
||||
return "invalid"
|
||||
}
|
||||
}
|
||||
|
||||
type Digest struct {
|
||||
Type DigestType
|
||||
Sum [32]byte
|
||||
}
|
||||
|
||||
func ParseDigest(s string) (Digest, error) {
|
||||
i := strings.IndexAny(s, "-:")
|
||||
if i < 0 {
|
||||
return Digest{}, fmt.Errorf("invalid digest %q", s)
|
||||
}
|
||||
typ, encSum := s[:i], s[i+1:]
|
||||
if typ != "sha256" {
|
||||
return Digest{}, fmt.Errorf("unsupported digest type %q", typ)
|
||||
}
|
||||
d := Digest{
|
||||
Type: DigestTypeSHA256,
|
||||
}
|
||||
n, err := hex.Decode(d.Sum[:], []byte(encSum))
|
||||
if err != nil {
|
||||
return Digest{}, err
|
||||
}
|
||||
if n != 32 {
|
||||
return Digest{}, fmt.Errorf("digest %q decoded to %d bytes; want 32", encSum, n)
|
||||
}
|
||||
return d, nil
|
||||
}
|
||||
|
||||
func (d Digest) String() string {
|
||||
if d.Type == DigestTypeInvalid {
|
||||
return ""
|
||||
}
|
||||
return fmt.Sprintf("sha256-%x", d.Sum)
|
||||
}
|
||||
|
||||
func (d Digest) IsValid() bool {
|
||||
return d.Type != DigestTypeInvalid
|
||||
}
|
||||
|
||||
@@ -284,40 +284,6 @@ func TestFilepathAllocs(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
validSha256 = "sha256-1000000000000000000000000000000000000000000000000000000000000000"
|
||||
validSha256Old = "sha256:1000000000000000000000000000000000000000000000000000000000000000"
|
||||
)
|
||||
|
||||
func TestParseDigest(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{"", ""}, // empty
|
||||
{"sha123-12", ""}, // invalid type
|
||||
{"sha256-", ""}, // invalid sum
|
||||
{"sha256-123", ""}, // invalid odd length sum
|
||||
|
||||
{validSha256, validSha256},
|
||||
{validSha256Old, validSha256},
|
||||
}
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.in, func(t *testing.T) {
|
||||
got, err := ParseDigest(tt.in)
|
||||
if err != nil {
|
||||
if tt.want != "" {
|
||||
t.Errorf("parseDigest(%q) = %v; want %v", tt.in, err, tt.want)
|
||||
}
|
||||
return
|
||||
}
|
||||
if got.String() != tt.want {
|
||||
t.Errorf("parseDigest(%q).String() = %q; want %q", tt.in, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseNameFromFilepath(t *testing.T) {
|
||||
cases := map[string]Name{
|
||||
filepath.Join("host", "namespace", "model", "tag"): {Host: "host", Namespace: "namespace", Model: "model", Tag: "tag"},
|
||||
|
||||
Reference in New Issue
Block a user