Compare commits
39 Commits
v0.1.33-rc
...
mxyng/spli
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dc474f9b83 | ||
|
|
41ae232e10 | ||
|
|
122b35c784 | ||
|
|
3244a25c79 | ||
|
|
b535afe35c | ||
|
|
fd071eab8b | ||
|
|
da0bb5d772 | ||
|
|
1909e624ce | ||
|
|
1d8c850f38 | ||
|
|
e9ae607ece | ||
|
|
93707fa3f2 | ||
|
|
94c369095f | ||
|
|
9164b0161b | ||
|
|
bf4fc25f7b | ||
|
|
5b806d8d24 | ||
|
|
cb1e072643 | ||
|
|
45b6a12e45 | ||
|
|
68755f1f5e | ||
|
|
997a455039 | ||
|
|
88775e1ff9 | ||
|
|
8867e744ff | ||
|
|
4fd064bea6 | ||
|
|
59fbceedcc | ||
|
|
321d57e1a0 | ||
|
|
ba26c7aa00 | ||
|
|
63c763685f | ||
|
|
34a4a94f13 | ||
|
|
f4a73d57a4 | ||
|
|
948114e3e3 | ||
|
|
a3e60d9058 | ||
|
|
5ea844964e | ||
|
|
bd8eed57fc | ||
|
|
9cf0f2e973 | ||
|
|
176ad3aa6e | ||
|
|
4d08363580 | ||
|
|
8907bf51d2 | ||
|
|
abe614c705 | ||
|
|
238715037d | ||
|
|
c0a00f68ae |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -11,4 +11,5 @@ ggml-metal.metal
|
||||
.idea
|
||||
test_data
|
||||
*.crt
|
||||
llm/build
|
||||
llm/build
|
||||
__debug_bin*
|
||||
@@ -1,5 +1,5 @@
|
||||
<div align="center">
|
||||
<img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
|
||||
<img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
|
||||
</div>
|
||||
|
||||
# Ollama
|
||||
@@ -173,7 +173,7 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol
|
||||
The image features a yellow smiley face, which is likely the central focus of the picture.
|
||||
```
|
||||
|
||||
### Pass in prompt as arguments
|
||||
### Pass the prompt as an argument
|
||||
|
||||
```
|
||||
$ ollama run llama3 "Summarize this file: $(cat README.md)"
|
||||
@@ -294,7 +294,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [RAGFlow: Open-source Retrieval-Augmented Generation engine based on deep document understanding](https://github.com/infiniflow/ragflow)
|
||||
- [chat: chat web app for teams](https://github.com/swuecho/chat)
|
||||
- [Lobe Chat](https://github.com/lobehub/lobe-chat) with [Integrating Doc](https://lobehub.com/docs/self-hosting/examples/ollama)
|
||||
- [Ollama RAG Chatbot: Local Chat with multiples PDFs using Ollama and RAG.](https://github.com/datvodinh/rag-chatbot.git)
|
||||
- [Ollama RAG Chatbot: Local Chat with multiple PDFs using Ollama and RAG.](https://github.com/datvodinh/rag-chatbot.git)
|
||||
|
||||
### Terminal
|
||||
|
||||
@@ -384,4 +384,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
|
||||
|
||||
### Supported backends
|
||||
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
|
||||
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
|
||||
25
cmd/cmd.go
25
cmd/cmd.go
@@ -57,12 +57,13 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
p := progress.NewProgress(os.Stderr)
|
||||
defer p.Stop()
|
||||
|
||||
modelfile, err := os.ReadFile(filename)
|
||||
modelfile, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer modelfile.Close()
|
||||
|
||||
commands, err := parser.Parse(bytes.NewReader(modelfile))
|
||||
commands, err := parser.Parse(modelfile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -76,10 +77,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
spinner := progress.NewSpinner(status)
|
||||
p.Add(status, spinner)
|
||||
|
||||
for _, c := range commands {
|
||||
switch c.Name {
|
||||
for i := range commands {
|
||||
switch commands[i].Name {
|
||||
case "model", "adapter":
|
||||
path := c.Args
|
||||
path := commands[i].Args
|
||||
if path == "~" {
|
||||
path = home
|
||||
} else if strings.HasPrefix(path, "~/") {
|
||||
@@ -91,7 +92,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
fi, err := os.Stat(path)
|
||||
if errors.Is(err, os.ErrNotExist) && c.Name == "model" {
|
||||
if errors.Is(err, os.ErrNotExist) && commands[i].Name == "model" {
|
||||
continue
|
||||
} else if err != nil {
|
||||
return err
|
||||
@@ -114,13 +115,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
name := c.Name
|
||||
if c.Name == "model" {
|
||||
name = "from"
|
||||
}
|
||||
|
||||
re := regexp.MustCompile(fmt.Sprintf(`(?im)^(%s)\s+%s\s*$`, name, c.Args))
|
||||
modelfile = re.ReplaceAll(modelfile, []byte("$1 @"+digest))
|
||||
commands[i].Args = "@"+digest
|
||||
}
|
||||
}
|
||||
|
||||
@@ -150,7 +145,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
|
||||
quantization, _ := cmd.Flags().GetString("quantization")
|
||||
|
||||
request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile), Quantization: quantization}
|
||||
request := api.CreateRequest{Name: args[0], Modelfile: parser.Format(commands), Quantization: quantization}
|
||||
if err := client.Create(cmd.Context(), &request, fn); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1125,7 +1120,7 @@ Environment Variables:
|
||||
RunE: ListHandler,
|
||||
}
|
||||
copyCmd := &cobra.Command{
|
||||
Use: "cp SOURCE TARGET",
|
||||
Use: "cp SOURCE DESTINATION",
|
||||
Short: "Copy a model",
|
||||
Args: cobra.ExactArgs(2),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
|
||||
@@ -94,6 +94,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
||||
fmt.Fprintln(os.Stderr, " /show Show model information")
|
||||
fmt.Fprintln(os.Stderr, " /load <model> Load a session or model")
|
||||
fmt.Fprintln(os.Stderr, " /save <model> Save your current session")
|
||||
fmt.Fprintln(os.Stderr, " /clear Clear session context")
|
||||
fmt.Fprintln(os.Stderr, " /bye Exit")
|
||||
fmt.Fprintln(os.Stderr, " /?, /help Help for a command")
|
||||
fmt.Fprintln(os.Stderr, " /? shortcuts Help for keyboard shortcuts")
|
||||
@@ -280,6 +281,10 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
||||
}
|
||||
fmt.Printf("Created new model '%s'\n", args[1])
|
||||
continue
|
||||
case strings.HasPrefix(line, "/clear"):
|
||||
opts.Messages = []api.Message{}
|
||||
fmt.Println("Cleared session context")
|
||||
continue
|
||||
case strings.HasPrefix(line, "/set"):
|
||||
args := strings.Fields(line)
|
||||
if len(args) > 1 {
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -47,7 +48,7 @@ type ByteOrder interface {
|
||||
type ModelArch interface {
|
||||
GetTensors() error
|
||||
LoadVocab() error
|
||||
WriteGGUF() (string, error)
|
||||
WriteGGUF(io.WriteSeeker) error
|
||||
}
|
||||
|
||||
type ModelFormat interface {
|
||||
|
||||
@@ -94,7 +94,7 @@ func (m *GemmaModel) LoadVocab() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *GemmaModel) WriteGGUF() (string, error) {
|
||||
func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||
kv := llm.KV{
|
||||
"general.architecture": "gemma",
|
||||
"general.name": m.Name,
|
||||
@@ -122,16 +122,5 @@ func (m *GemmaModel) WriteGGUF() (string, error) {
|
||||
"tokenizer.ggml.add_eos_token": false,
|
||||
}
|
||||
|
||||
f, err := os.CreateTemp("", "ollama-gguf")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
mod := llm.NewGGUFV3(m.Params.ByteOrder)
|
||||
if err := mod.Encode(f, kv, m.Tensors); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return f.Name(), nil
|
||||
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
||||
}
|
||||
|
||||
@@ -132,7 +132,7 @@ func (m *LlamaModel) LoadVocab() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LlamaModel) WriteGGUF() (string, error) {
|
||||
func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||
kv := llm.KV{
|
||||
"general.architecture": "llama",
|
||||
"general.name": m.Name,
|
||||
@@ -161,16 +161,9 @@ func (m *LlamaModel) WriteGGUF() (string, error) {
|
||||
|
||||
f, err := os.CreateTemp("", "ollama-gguf")
|
||||
if err != nil {
|
||||
return "", err
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
mod := llm.NewGGUFV3(m.Params.ByteOrder)
|
||||
if err := mod.Encode(f, kv, m.Tensors); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
slog.Debug(fmt.Sprintf("gguf file = %s", f.Name()))
|
||||
|
||||
return f.Name(), nil
|
||||
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(f, kv, m.Tensors)
|
||||
}
|
||||
|
||||
@@ -132,7 +132,7 @@ func (m *MistralModel) LoadVocab() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MistralModel) WriteGGUF() (string, error) {
|
||||
func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||
kv := llm.KV{
|
||||
"general.architecture": "llama",
|
||||
"general.name": m.Name,
|
||||
@@ -158,16 +158,5 @@ func (m *MistralModel) WriteGGUF() (string, error) {
|
||||
"tokenizer.ggml.unknown_token_id": uint32(0),
|
||||
}
|
||||
|
||||
f, err := os.CreateTemp("", "ollama-gguf")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
mod := llm.NewGGUFV3(m.Params.ByteOrder)
|
||||
if err := mod.Encode(f, kv, m.Tensors); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return f.Name(), nil
|
||||
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"os"
|
||||
"io"
|
||||
"regexp"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
@@ -47,7 +47,7 @@ func (m *MixtralModel) LoadVocab() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MixtralModel) WriteGGUF() (string, error) {
|
||||
func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||
kv := llm.KV{
|
||||
"general.architecture": "llama",
|
||||
"general.name": m.Name,
|
||||
@@ -81,16 +81,5 @@ func (m *MixtralModel) WriteGGUF() (string, error) {
|
||||
"tokenizer.ggml.add_eos_token": false,
|
||||
}
|
||||
|
||||
f, err := os.CreateTemp("", "ollama-gguf")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
mod := llm.NewGGUFV3(m.Params.ByteOrder)
|
||||
if err := mod.Encode(f, kv, m.Tensors); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return f.Name(), nil
|
||||
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
||||
}
|
||||
|
||||
@@ -51,7 +51,7 @@ Typically the build scripts will auto-detect CUDA, however, if your Linux distro
|
||||
or installation approach uses unusual paths, you can specify the location by
|
||||
specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
|
||||
libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
|
||||
set set of target CUDA architectues by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
|
||||
a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
|
||||
|
||||
Then generate dependencies:
|
||||
|
||||
@@ -142,4 +142,4 @@ In addition to the common Windows development tools described above, install AMD
|
||||
- [AMD HIP](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html)
|
||||
- [Strawberry Perl](https://strawberryperl.com/)
|
||||
|
||||
Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).
|
||||
Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).
|
||||
|
||||
@@ -1,47 +1,47 @@
|
||||
# Ollama Windows Preview
|
||||
|
||||
Welcome to the Ollama Windows preview.
|
||||
|
||||
No more WSL required!
|
||||
|
||||
Ollama now runs as a native Windows application, including NVIDIA and AMD Radeon GPU support.
|
||||
After installing Ollama Windows Preview, Ollama will run in the background and
|
||||
the `ollama` command line is available in `cmd`, `powershell` or your favorite
|
||||
terminal application. As usual the Ollama [api](./api.md) will be served on
|
||||
`http://localhost:11434`.
|
||||
|
||||
As this is a preview release, you should expect a few bugs here and there. If
|
||||
you run into a problem you can reach out on
|
||||
[Discord](https://discord.gg/ollama), or file an
|
||||
[issue](https://github.com/ollama/ollama/issues).
|
||||
Logs will often be helpful in diagnosing the problem (see
|
||||
[Troubleshooting](#troubleshooting) below)
|
||||
|
||||
## System Requirements
|
||||
|
||||
* Windows 10 or newer, Home or Pro
|
||||
* NVIDIA 452.39 or newer Drivers if you have an NVIDIA card
|
||||
* AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card
|
||||
|
||||
## API Access
|
||||
|
||||
Here's a quick example showing API access from `powershell`
|
||||
```powershell
|
||||
(Invoke-WebRequest -method POST -Body '{"model":"llama2", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
While we're in preview, `OLLAMA_DEBUG` is always enabled, which adds
|
||||
a "view logs" menu item to the app, and increses logging for the GUI app and
|
||||
server.
|
||||
|
||||
Ollama on Windows stores files in a few different locations. You can view them in
|
||||
the explorer window by hitting `<cmd>+R` and type in:
|
||||
- `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
|
||||
- *app.log* contains logs from the GUI application
|
||||
- *server.log* contains the server logs
|
||||
- *upgrade.log* contains log output for upgrades
|
||||
- `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH)
|
||||
- `explorer %HOMEPATH%\.ollama` contains models and configuration
|
||||
- `explorer %TEMP%` contains temporary executable files in one or more `ollama*` directories
|
||||
# Ollama Windows Preview
|
||||
|
||||
Welcome to the Ollama Windows preview.
|
||||
|
||||
No more WSL required!
|
||||
|
||||
Ollama now runs as a native Windows application, including NVIDIA and AMD Radeon GPU support.
|
||||
After installing Ollama Windows Preview, Ollama will run in the background and
|
||||
the `ollama` command line is available in `cmd`, `powershell` or your favorite
|
||||
terminal application. As usual the Ollama [api](./api.md) will be served on
|
||||
`http://localhost:11434`.
|
||||
|
||||
As this is a preview release, you should expect a few bugs here and there. If
|
||||
you run into a problem you can reach out on
|
||||
[Discord](https://discord.gg/ollama), or file an
|
||||
[issue](https://github.com/ollama/ollama/issues).
|
||||
Logs will often be helpful in diagnosing the problem (see
|
||||
[Troubleshooting](#troubleshooting) below)
|
||||
|
||||
## System Requirements
|
||||
|
||||
* Windows 10 or newer, Home or Pro
|
||||
* NVIDIA 452.39 or newer Drivers if you have an NVIDIA card
|
||||
* AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card
|
||||
|
||||
## API Access
|
||||
|
||||
Here's a quick example showing API access from `powershell`
|
||||
```powershell
|
||||
(Invoke-WebRequest -method POST -Body '{"model":"llama2", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
While we're in preview, `OLLAMA_DEBUG` is always enabled, which adds
|
||||
a "view logs" menu item to the app, and increses logging for the GUI app and
|
||||
server.
|
||||
|
||||
Ollama on Windows stores files in a few different locations. You can view them in
|
||||
the explorer window by hitting `<cmd>+R` and type in:
|
||||
- `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
|
||||
- *app.log* contains logs from the GUI application
|
||||
- *server.log* contains the server logs
|
||||
- *upgrade.log* contains log output for upgrades
|
||||
- `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH)
|
||||
- `explorer %HOMEPATH%\.ollama` contains models and configuration
|
||||
- `explorer %TEMP%` contains temporary executable files in one or more `ollama*` directories
|
||||
|
||||
@@ -107,7 +107,7 @@ func startServer(ctx context.Context, ollamaHost string) error {
|
||||
|
||||
if tmp := os.Getenv("OLLAMA_HOST"); tmp != ollamaHost {
|
||||
slog.Info("setting env", "OLLAMA_HOST", ollamaHost)
|
||||
os.Setenv("OLLAMA_HOST", ollamaHost)
|
||||
t.Setenv("OLLAMA_HOST", ollamaHost)
|
||||
}
|
||||
|
||||
slog.Info("starting server", "url", ollamaHost)
|
||||
|
||||
140
llm/filetype.go
Normal file
140
llm/filetype.go
Normal file
@@ -0,0 +1,140 @@
|
||||
package llm
|
||||
|
||||
import "fmt"
|
||||
|
||||
type fileType uint32
|
||||
|
||||
const (
|
||||
fileTypeF32 fileType = iota
|
||||
fileTypeF16
|
||||
fileTypeQ4_0
|
||||
fileTypeQ4_1
|
||||
fileTypeQ4_1_F16
|
||||
fileTypeQ4_2 // unused
|
||||
fileTypeQ4_3 // unused
|
||||
fileTypeQ8_0
|
||||
fileTypeQ5_0
|
||||
fileTypeQ5_1
|
||||
fileTypeQ2_K
|
||||
fileTypeQ3_K_S
|
||||
fileTypeQ3_K_M
|
||||
fileTypeQ3_K_L
|
||||
fileTypeQ4_K_S
|
||||
fileTypeQ4_K_M
|
||||
fileTypeQ5_K_S
|
||||
fileTypeQ5_K_M
|
||||
fileTypeQ6_K
|
||||
fileTypeIQ2_XXS
|
||||
fileTypeIQ2_XS
|
||||
fileTypeQ2_K_S
|
||||
fileTypeQ3_K_XS
|
||||
fileTypeIQ3_XXS
|
||||
|
||||
fileTypeUnknown
|
||||
)
|
||||
|
||||
func ParseFileType(s string) (fileType, error) {
|
||||
switch s {
|
||||
case "F32":
|
||||
return fileTypeF32, nil
|
||||
case "F16":
|
||||
return fileTypeF16, nil
|
||||
case "Q4_0":
|
||||
return fileTypeQ4_0, nil
|
||||
case "Q4_1":
|
||||
return fileTypeQ4_1, nil
|
||||
case "Q4_1_F16":
|
||||
return fileTypeQ4_1_F16, nil
|
||||
case "Q8_0":
|
||||
return fileTypeQ8_0, nil
|
||||
case "Q5_0":
|
||||
return fileTypeQ5_0, nil
|
||||
case "Q5_1":
|
||||
return fileTypeQ5_1, nil
|
||||
case "Q2_K":
|
||||
return fileTypeQ2_K, nil
|
||||
case "Q3_K_S":
|
||||
return fileTypeQ3_K_S, nil
|
||||
case "Q3_K_M":
|
||||
return fileTypeQ3_K_M, nil
|
||||
case "Q3_K_L":
|
||||
return fileTypeQ3_K_L, nil
|
||||
case "Q4_K_S":
|
||||
return fileTypeQ4_K_S, nil
|
||||
case "Q4_K_M":
|
||||
return fileTypeQ4_K_M, nil
|
||||
case "Q5_K_S":
|
||||
return fileTypeQ5_K_S, nil
|
||||
case "Q5_K_M":
|
||||
return fileTypeQ5_K_M, nil
|
||||
case "Q6_K":
|
||||
return fileTypeQ6_K, nil
|
||||
case "IQ2_XXS":
|
||||
return fileTypeIQ2_XXS, nil
|
||||
case "IQ2_XS":
|
||||
return fileTypeIQ2_XS, nil
|
||||
case "Q2_K_S":
|
||||
return fileTypeQ2_K_S, nil
|
||||
case "Q3_K_XS":
|
||||
return fileTypeQ3_K_XS, nil
|
||||
case "IQ3_XXS":
|
||||
return fileTypeIQ3_XXS, nil
|
||||
default:
|
||||
return fileTypeUnknown, fmt.Errorf("unknown fileType: %s", s)
|
||||
}
|
||||
}
|
||||
|
||||
func (t fileType) String() string {
|
||||
switch t {
|
||||
case fileTypeF32:
|
||||
return "F32"
|
||||
case fileTypeF16:
|
||||
return "F16"
|
||||
case fileTypeQ4_0:
|
||||
return "Q4_0"
|
||||
case fileTypeQ4_1:
|
||||
return "Q4_1"
|
||||
case fileTypeQ4_1_F16:
|
||||
return "Q4_1_F16"
|
||||
case fileTypeQ8_0:
|
||||
return "Q8_0"
|
||||
case fileTypeQ5_0:
|
||||
return "Q5_0"
|
||||
case fileTypeQ5_1:
|
||||
return "Q5_1"
|
||||
case fileTypeQ2_K:
|
||||
return "Q2_K"
|
||||
case fileTypeQ3_K_S:
|
||||
return "Q3_K_S"
|
||||
case fileTypeQ3_K_M:
|
||||
return "Q3_K_M"
|
||||
case fileTypeQ3_K_L:
|
||||
return "Q3_K_L"
|
||||
case fileTypeQ4_K_S:
|
||||
return "Q4_K_S"
|
||||
case fileTypeQ4_K_M:
|
||||
return "Q4_K_M"
|
||||
case fileTypeQ5_K_S:
|
||||
return "Q5_K_S"
|
||||
case fileTypeQ5_K_M:
|
||||
return "Q5_K_M"
|
||||
case fileTypeQ6_K:
|
||||
return "Q6_K"
|
||||
case fileTypeIQ2_XXS:
|
||||
return "IQ2_XXS"
|
||||
case fileTypeIQ2_XS:
|
||||
return "IQ2_XS"
|
||||
case fileTypeQ2_K_S:
|
||||
return "Q2_K_S"
|
||||
case fileTypeQ3_K_XS:
|
||||
return "Q3_K_XS"
|
||||
case fileTypeIQ3_XXS:
|
||||
return "IQ3_XXS"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
func (t fileType) Value() uint32 {
|
||||
return uint32(t)
|
||||
}
|
||||
12
llm/ggla.go
12
llm/ggla.go
@@ -33,6 +33,7 @@ func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {
|
||||
|
||||
type ggla struct {
|
||||
*containerGGLA
|
||||
offset int64
|
||||
|
||||
kv KV
|
||||
tensors []*Tensor
|
||||
@@ -53,6 +54,10 @@ func (llm *ggla) Tensors() Tensors {
|
||||
return llm.tensors
|
||||
}
|
||||
|
||||
func (llm *ggla) Offset() int64 {
|
||||
return llm.offset
|
||||
}
|
||||
|
||||
func (llm *ggla) decode(rs io.ReadSeeker) error {
|
||||
var r uint32
|
||||
if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
|
||||
@@ -66,6 +71,13 @@ func (llm *ggla) decode(rs io.ReadSeeker) error {
|
||||
}
|
||||
llm.kv["alpha"] = alpha
|
||||
|
||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
llm.offset = offset
|
||||
|
||||
for {
|
||||
var dims uint32
|
||||
if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
|
||||
|
||||
96
llm/ggml.go
96
llm/ggml.go
@@ -13,85 +13,10 @@ type GGML struct {
|
||||
model
|
||||
}
|
||||
|
||||
const (
|
||||
fileTypeF32 uint32 = iota
|
||||
fileTypeF16
|
||||
fileTypeQ4_0
|
||||
fileTypeQ4_1
|
||||
fileTypeQ4_1_F16
|
||||
fileTypeQ8_0 uint32 = iota + 2
|
||||
fileTypeQ5_0
|
||||
fileTypeQ5_1
|
||||
fileTypeQ2_K
|
||||
fileTypeQ3_K_S
|
||||
fileTypeQ3_K_M
|
||||
fileTypeQ3_K_L
|
||||
fileTypeQ4_K_S
|
||||
fileTypeQ4_K_M
|
||||
fileTypeQ5_K_S
|
||||
fileTypeQ5_K_M
|
||||
fileTypeQ6_K
|
||||
fileTypeIQ2_XXS
|
||||
fileTypeIQ2_XS
|
||||
fileTypeQ2_K_S
|
||||
fileTypeQ3_K_XS
|
||||
fileTypeIQ3_XXS
|
||||
)
|
||||
|
||||
func fileType(fileType uint32) string {
|
||||
switch fileType {
|
||||
case fileTypeF32:
|
||||
return "F32"
|
||||
case fileTypeF16:
|
||||
return "F16"
|
||||
case fileTypeQ4_0:
|
||||
return "Q4_0"
|
||||
case fileTypeQ4_1:
|
||||
return "Q4_1"
|
||||
case fileTypeQ4_1_F16:
|
||||
return "Q4_1_F16"
|
||||
case fileTypeQ8_0:
|
||||
return "Q8_0"
|
||||
case fileTypeQ5_0:
|
||||
return "Q5_0"
|
||||
case fileTypeQ5_1:
|
||||
return "Q5_1"
|
||||
case fileTypeQ2_K:
|
||||
return "Q2_K"
|
||||
case fileTypeQ3_K_S:
|
||||
return "Q3_K_S"
|
||||
case fileTypeQ3_K_M:
|
||||
return "Q3_K_M"
|
||||
case fileTypeQ3_K_L:
|
||||
return "Q3_K_L"
|
||||
case fileTypeQ4_K_S:
|
||||
return "Q4_K_S"
|
||||
case fileTypeQ4_K_M:
|
||||
return "Q4_K_M"
|
||||
case fileTypeQ5_K_S:
|
||||
return "Q5_K_S"
|
||||
case fileTypeQ5_K_M:
|
||||
return "Q5_K_M"
|
||||
case fileTypeQ6_K:
|
||||
return "Q6_K"
|
||||
case fileTypeIQ2_XXS:
|
||||
return "IQ2_XXS"
|
||||
case fileTypeIQ2_XS:
|
||||
return "IQ2_XS"
|
||||
case fileTypeQ2_K_S:
|
||||
return "Q2_K_S"
|
||||
case fileTypeQ3_K_XS:
|
||||
return "Q3_K_XS"
|
||||
case fileTypeIQ3_XXS:
|
||||
return "IQ3_XXS"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
type model interface {
|
||||
KV() KV
|
||||
Tensors() Tensors
|
||||
Offset() int64
|
||||
}
|
||||
|
||||
type KV map[string]any
|
||||
@@ -123,7 +48,7 @@ func (kv KV) ParameterCount() uint64 {
|
||||
|
||||
func (kv KV) FileType() string {
|
||||
if u64 := kv.u64("general.file_type"); u64 > 0 {
|
||||
return fileType(uint32(u64))
|
||||
return fileType(uint32(u64)).String()
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
@@ -286,6 +211,23 @@ const (
|
||||
|
||||
var ErrUnsupportedFormat = errors.New("unsupported model format")
|
||||
|
||||
func DetectGGMLType(b []byte) string {
|
||||
switch binary.LittleEndian.Uint32(b[:4]) {
|
||||
case FILE_MAGIC_GGML:
|
||||
return "ggml"
|
||||
case FILE_MAGIC_GGMF:
|
||||
return "ggmf"
|
||||
case FILE_MAGIC_GGJT:
|
||||
return "ggjt"
|
||||
case FILE_MAGIC_GGLA:
|
||||
return "ggla"
|
||||
case FILE_MAGIC_GGUF_LE, FILE_MAGIC_GGUF_BE:
|
||||
return "gguf"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error) {
|
||||
var magic uint32
|
||||
if err := binary.Read(rs, binary.LittleEndian, &magic); err != nil {
|
||||
|
||||
11
llm/gguf.go
11
llm/gguf.go
@@ -55,7 +55,7 @@ func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
|
||||
|
||||
model := newGGUF(c)
|
||||
slog.Debug(fmt.Sprintf("model = %#v", model))
|
||||
if err := model.Decode(rs); err != nil {
|
||||
if err := model.decode(rs); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -90,6 +90,7 @@ const (
|
||||
|
||||
type gguf struct {
|
||||
*containerGGUF
|
||||
offset int64
|
||||
|
||||
kv KV
|
||||
tensors []*Tensor
|
||||
@@ -116,6 +117,10 @@ func (llm *gguf) Tensors() Tensors {
|
||||
return llm.tensors
|
||||
}
|
||||
|
||||
func (llm *gguf) Offset() int64 {
|
||||
return llm.offset
|
||||
}
|
||||
|
||||
func (llm *gguf) numTensor() uint64 {
|
||||
switch llm.Version {
|
||||
case 1:
|
||||
@@ -138,7 +143,7 @@ func (llm *gguf) numKV() uint64 {
|
||||
}
|
||||
}
|
||||
|
||||
func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||
func (llm *gguf) decode(rs io.ReadSeeker) error {
|
||||
// decode key-values
|
||||
for i := 0; uint64(i) < llm.numKV(); i++ {
|
||||
k, err := readGGUFString(llm, rs)
|
||||
@@ -250,6 +255,8 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||
return err
|
||||
}
|
||||
|
||||
llm.offset = offset + padding
|
||||
|
||||
for _, tensor := range llm.tensors {
|
||||
if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
|
||||
return err
|
||||
|
||||
56
llm/llm.go
56
llm/llm.go
@@ -20,7 +20,7 @@ func SystemInfo() string {
|
||||
return C.GoString(C.llama_print_system_info())
|
||||
}
|
||||
|
||||
func Quantize(infile, outfile, filetype string) error {
|
||||
func Quantize(infile, outfile string, ftype fileType) error {
|
||||
cinfile := C.CString(infile)
|
||||
defer C.free(unsafe.Pointer(cinfile))
|
||||
|
||||
@@ -29,58 +29,10 @@ func Quantize(infile, outfile, filetype string) error {
|
||||
|
||||
params := C.llama_model_quantize_default_params()
|
||||
params.nthread = -1
|
||||
params.ftype = ftype.Value()
|
||||
|
||||
switch filetype {
|
||||
case "F32":
|
||||
params.ftype = fileTypeF32
|
||||
case "F16":
|
||||
params.ftype = fileTypeF16
|
||||
case "Q4_0":
|
||||
params.ftype = fileTypeQ4_0
|
||||
case "Q4_1":
|
||||
params.ftype = fileTypeQ4_1
|
||||
case "Q4_1_F16":
|
||||
params.ftype = fileTypeQ4_1_F16
|
||||
case "Q8_0":
|
||||
params.ftype = fileTypeQ8_0
|
||||
case "Q5_0":
|
||||
params.ftype = fileTypeQ5_0
|
||||
case "Q5_1":
|
||||
params.ftype = fileTypeQ5_1
|
||||
case "Q2_K":
|
||||
params.ftype = fileTypeQ2_K
|
||||
case "Q3_K_S":
|
||||
params.ftype = fileTypeQ3_K_S
|
||||
case "Q3_K_M":
|
||||
params.ftype = fileTypeQ3_K_M
|
||||
case "Q3_K_L":
|
||||
params.ftype = fileTypeQ3_K_L
|
||||
case "Q4_K_S":
|
||||
params.ftype = fileTypeQ4_K_S
|
||||
case "Q4_K_M":
|
||||
params.ftype = fileTypeQ4_K_M
|
||||
case "Q5_K_S":
|
||||
params.ftype = fileTypeQ5_K_S
|
||||
case "Q5_K_M":
|
||||
params.ftype = fileTypeQ5_K_M
|
||||
case "Q6_K":
|
||||
params.ftype = fileTypeQ6_K
|
||||
case "IQ2_XXS":
|
||||
params.ftype = fileTypeIQ2_XXS
|
||||
case "IQ2_XS":
|
||||
params.ftype = fileTypeIQ2_XS
|
||||
case "Q2_K_S":
|
||||
params.ftype = fileTypeQ2_K_S
|
||||
case "Q3_K_XS":
|
||||
params.ftype = fileTypeQ3_K_XS
|
||||
case "IQ3_XXS":
|
||||
params.ftype = fileTypeIQ3_XXS
|
||||
default:
|
||||
return fmt.Errorf("unknown filetype: %s", filetype)
|
||||
}
|
||||
|
||||
if retval := C.llama_model_quantize(cinfile, coutfile, ¶ms); retval != 0 {
|
||||
return fmt.Errorf("llama_model_quantize: %d", retval)
|
||||
if rc := C.llama_model_quantize(cinfile, coutfile, ¶ms); rc != 0 {
|
||||
return fmt.Errorf("llama_model_quantize: %d", rc)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
@@ -300,12 +300,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
continue
|
||||
}
|
||||
|
||||
// reap subprocess when it exits
|
||||
go func() {
|
||||
// Exit status managed via getServerStatus
|
||||
_ = s.cmd.Wait()
|
||||
}()
|
||||
|
||||
// TODO - make sure this is all wired up correctly
|
||||
// if err = s.WaitUntilRunning(); err != nil {
|
||||
// slog.Error("error starting llama server", "server", servers[i], "error", err)
|
||||
@@ -899,7 +893,13 @@ func (s *llmServer) Detokenize(ctx context.Context, tokens []int) (string, error
|
||||
func (s *llmServer) Close() error {
|
||||
if s.cmd != nil {
|
||||
slog.Debug("stopping llama server")
|
||||
return s.cmd.Process.Kill()
|
||||
if err := s.cmd.Process.Kill(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_ = s.cmd.Wait()
|
||||
|
||||
slog.Debug("llama server stopped")
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
363
parser/parser.go
363
parser/parser.go
@@ -6,8 +6,8 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Command struct {
|
||||
@@ -15,118 +15,283 @@ type Command struct {
|
||||
Args string
|
||||
}
|
||||
|
||||
func (c *Command) Reset() {
|
||||
c.Name = ""
|
||||
c.Args = ""
|
||||
}
|
||||
type state int
|
||||
|
||||
func Parse(reader io.Reader) ([]Command, error) {
|
||||
var commands []Command
|
||||
var command, modelCommand Command
|
||||
const (
|
||||
stateNil state = iota
|
||||
stateName
|
||||
stateValue
|
||||
stateParameter
|
||||
stateMessage
|
||||
stateComment
|
||||
)
|
||||
|
||||
scanner := bufio.NewScanner(reader)
|
||||
scanner.Buffer(make([]byte, 0, bufio.MaxScanTokenSize), bufio.MaxScanTokenSize)
|
||||
scanner.Split(scanModelfile)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
var (
|
||||
errMissingFrom = errors.New("no FROM line")
|
||||
errInvalidMessageRole = errors.New("message role must be one of \"system\", \"user\", or \"assistant\"")
|
||||
errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
|
||||
)
|
||||
|
||||
fields := bytes.SplitN(line, []byte(" "), 2)
|
||||
if len(fields) == 0 || len(fields[0]) == 0 {
|
||||
continue
|
||||
}
|
||||
func Format(cmds []Command) string {
|
||||
var sb strings.Builder
|
||||
for _, cmd := range cmds {
|
||||
name := cmd.Name
|
||||
args := cmd.Args
|
||||
|
||||
switch string(bytes.ToUpper(fields[0])) {
|
||||
case "FROM":
|
||||
command.Name = "model"
|
||||
command.Args = string(bytes.TrimSpace(fields[1]))
|
||||
// copy command for validation
|
||||
modelCommand = command
|
||||
case "ADAPTER":
|
||||
command.Name = string(bytes.ToLower(fields[0]))
|
||||
command.Args = string(bytes.TrimSpace(fields[1]))
|
||||
case "LICENSE", "TEMPLATE", "SYSTEM", "PROMPT":
|
||||
command.Name = string(bytes.ToLower(fields[0]))
|
||||
command.Args = string(fields[1])
|
||||
case "PARAMETER":
|
||||
fields = bytes.SplitN(fields[1], []byte(" "), 2)
|
||||
if len(fields) < 2 {
|
||||
return nil, fmt.Errorf("missing value for %s", fields)
|
||||
}
|
||||
|
||||
command.Name = string(fields[0])
|
||||
command.Args = string(bytes.TrimSpace(fields[1]))
|
||||
case "EMBED":
|
||||
return nil, fmt.Errorf("deprecated command: EMBED is no longer supported, use the /embed API endpoint instead")
|
||||
case "MESSAGE":
|
||||
command.Name = string(bytes.ToLower(fields[0]))
|
||||
fields = bytes.SplitN(fields[1], []byte(" "), 2)
|
||||
if len(fields) < 2 {
|
||||
return nil, fmt.Errorf("should be in the format <role> <message>")
|
||||
}
|
||||
if !slices.Contains([]string{"system", "user", "assistant"}, string(bytes.ToLower(fields[0]))) {
|
||||
return nil, fmt.Errorf("role must be one of \"system\", \"user\", or \"assistant\"")
|
||||
}
|
||||
command.Args = fmt.Sprintf("%s: %s", string(bytes.ToLower(fields[0])), string(fields[1]))
|
||||
switch cmd.Name {
|
||||
case "model":
|
||||
name = "from"
|
||||
args = cmd.Args
|
||||
case "license", "template", "system", "adapter":
|
||||
args = quote(args)
|
||||
case "message":
|
||||
role, message, _ := strings.Cut(cmd.Args, ": ")
|
||||
args = role + " " + quote(message)
|
||||
default:
|
||||
if !bytes.HasPrefix(fields[0], []byte("#")) {
|
||||
// log a warning for unknown commands
|
||||
slog.Warn(fmt.Sprintf("Unknown command: %s", fields[0]))
|
||||
}
|
||||
continue
|
||||
name = "parameter"
|
||||
args = cmd.Name + " " + quote(cmd.Args)
|
||||
}
|
||||
|
||||
commands = append(commands, command)
|
||||
command.Reset()
|
||||
fmt.Fprintln(&sb, strings.ToUpper(name), args)
|
||||
}
|
||||
|
||||
if modelCommand.Args == "" {
|
||||
return nil, errors.New("no FROM line for the model was specified")
|
||||
}
|
||||
|
||||
return commands, scanner.Err()
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func scanModelfile(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||
advance, token, err = scan([]byte(`"""`), []byte(`"""`), data, atEOF)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
func Parse(r io.Reader) (cmds []Command, err error) {
|
||||
var cmd Command
|
||||
var curr state
|
||||
var b bytes.Buffer
|
||||
var role string
|
||||
|
||||
if advance > 0 && token != nil {
|
||||
return advance, token, nil
|
||||
}
|
||||
|
||||
advance, token, err = scan([]byte(`"`), []byte(`"`), data, atEOF)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
if advance > 0 && token != nil {
|
||||
return advance, token, nil
|
||||
}
|
||||
|
||||
return bufio.ScanLines(data, atEOF)
|
||||
}
|
||||
|
||||
func scan(openBytes, closeBytes, data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||
newline := bytes.IndexByte(data, '\n')
|
||||
|
||||
if start := bytes.Index(data, openBytes); start >= 0 && start < newline {
|
||||
end := bytes.Index(data[start+len(openBytes):], closeBytes)
|
||||
if end < 0 {
|
||||
if atEOF {
|
||||
return 0, nil, fmt.Errorf("unterminated %s: expecting %s", openBytes, closeBytes)
|
||||
} else {
|
||||
return 0, nil, nil
|
||||
}
|
||||
br := bufio.NewReader(r)
|
||||
for {
|
||||
r, _, err := br.ReadRune()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
n := start + len(openBytes) + end + len(closeBytes)
|
||||
next, r, err := parseRuneForState(r, curr)
|
||||
if errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
return nil, fmt.Errorf("%w: %s", err, b.String())
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
newData := data[:start]
|
||||
newData = append(newData, data[start+len(openBytes):n-len(closeBytes)]...)
|
||||
return n, newData, nil
|
||||
// process the state transition, some transitions need to be intercepted and redirected
|
||||
if next != curr {
|
||||
switch curr {
|
||||
case stateName:
|
||||
if !isValidCommand(b.String()) {
|
||||
return nil, errInvalidCommand
|
||||
}
|
||||
|
||||
// next state sometimes depends on the current buffer value
|
||||
switch s := strings.ToLower(b.String()); s {
|
||||
case "from":
|
||||
cmd.Name = "model"
|
||||
case "parameter":
|
||||
// transition to stateParameter which sets command name
|
||||
next = stateParameter
|
||||
case "message":
|
||||
// transition to stateMessage which validates the message role
|
||||
next = stateMessage
|
||||
fallthrough
|
||||
default:
|
||||
cmd.Name = s
|
||||
}
|
||||
case stateParameter:
|
||||
cmd.Name = b.String()
|
||||
case stateMessage:
|
||||
if !isValidMessageRole(b.String()) {
|
||||
return nil, errInvalidMessageRole
|
||||
}
|
||||
|
||||
role = b.String()
|
||||
case stateComment, stateNil:
|
||||
// pass
|
||||
case stateValue:
|
||||
s, ok := unquote(b.String())
|
||||
if !ok || isSpace(r) {
|
||||
if _, err := b.WriteRune(r); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if role != "" {
|
||||
s = role + ": " + s
|
||||
role = ""
|
||||
}
|
||||
|
||||
cmd.Args = s
|
||||
cmds = append(cmds, cmd)
|
||||
}
|
||||
|
||||
b.Reset()
|
||||
curr = next
|
||||
}
|
||||
|
||||
if strconv.IsPrint(r) {
|
||||
if _, err := b.WriteRune(r); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0, nil, nil
|
||||
// flush the buffer
|
||||
switch curr {
|
||||
case stateComment, stateNil:
|
||||
// pass; nothing to flush
|
||||
case stateValue:
|
||||
s, ok := unquote(b.String())
|
||||
if !ok {
|
||||
return nil, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
if role != "" {
|
||||
s = role + ": " + s
|
||||
}
|
||||
|
||||
cmd.Args = s
|
||||
cmds = append(cmds, cmd)
|
||||
default:
|
||||
return nil, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
for _, cmd := range cmds {
|
||||
if cmd.Name == "model" {
|
||||
return cmds, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, errMissingFrom
|
||||
}
|
||||
|
||||
func parseRuneForState(r rune, cs state) (state, rune, error) {
|
||||
switch cs {
|
||||
case stateNil:
|
||||
switch {
|
||||
case r == '#':
|
||||
return stateComment, 0, nil
|
||||
case isSpace(r), isNewline(r):
|
||||
return stateNil, 0, nil
|
||||
default:
|
||||
return stateName, r, nil
|
||||
}
|
||||
case stateName:
|
||||
switch {
|
||||
case isAlpha(r):
|
||||
return stateName, r, nil
|
||||
case isSpace(r):
|
||||
return stateValue, 0, nil
|
||||
default:
|
||||
return stateNil, 0, errInvalidCommand
|
||||
}
|
||||
case stateValue:
|
||||
switch {
|
||||
case isNewline(r):
|
||||
return stateNil, r, nil
|
||||
case isSpace(r):
|
||||
return stateNil, r, nil
|
||||
default:
|
||||
return stateValue, r, nil
|
||||
}
|
||||
case stateParameter:
|
||||
switch {
|
||||
case isAlpha(r), isNumber(r), r == '_':
|
||||
return stateParameter, r, nil
|
||||
case isSpace(r):
|
||||
return stateValue, 0, nil
|
||||
default:
|
||||
return stateNil, 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
case stateMessage:
|
||||
switch {
|
||||
case isAlpha(r):
|
||||
return stateMessage, r, nil
|
||||
case isSpace(r):
|
||||
return stateValue, 0, nil
|
||||
default:
|
||||
return stateNil, 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
case stateComment:
|
||||
switch {
|
||||
case isNewline(r):
|
||||
return stateNil, 0, nil
|
||||
default:
|
||||
return stateComment, 0, nil
|
||||
}
|
||||
default:
|
||||
return stateNil, 0, errors.New("")
|
||||
}
|
||||
}
|
||||
|
||||
func quote(s string) string {
|
||||
if strings.Contains(s, "\n") || strings.HasPrefix(s, " ") || strings.HasSuffix(s, " ") {
|
||||
if strings.Contains(s, "\"") {
|
||||
return `"""` + s + `"""`
|
||||
}
|
||||
|
||||
return `"` + s + `"`
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func unquote(s string) (string, bool) {
|
||||
if len(s) == 0 {
|
||||
return "", false
|
||||
}
|
||||
|
||||
// TODO: single quotes
|
||||
if len(s) >= 3 && s[:3] == `"""` {
|
||||
if len(s) >= 6 && s[len(s)-3:] == `"""` {
|
||||
return s[3 : len(s)-3], true
|
||||
}
|
||||
|
||||
return "", false
|
||||
}
|
||||
|
||||
if len(s) >= 1 && s[0] == '"' {
|
||||
if len(s) >= 2 && s[len(s)-1] == '"' {
|
||||
return s[1 : len(s)-1], true
|
||||
}
|
||||
|
||||
return "", false
|
||||
}
|
||||
|
||||
return s, true
|
||||
}
|
||||
|
||||
func isAlpha(r rune) bool {
|
||||
return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
|
||||
}
|
||||
|
||||
func isNumber(r rune) bool {
|
||||
return r >= '0' && r <= '9'
|
||||
}
|
||||
|
||||
func isSpace(r rune) bool {
|
||||
return r == ' ' || r == '\t'
|
||||
}
|
||||
|
||||
func isNewline(r rune) bool {
|
||||
return r == '\r' || r == '\n'
|
||||
}
|
||||
|
||||
func isValidMessageRole(role string) bool {
|
||||
return role == "system" || role == "user" || role == "assistant"
|
||||
}
|
||||
|
||||
func isValidCommand(cmd string) bool {
|
||||
switch strings.ToLower(cmd) {
|
||||
case "from", "license", "template", "system", "adapter", "parameter", "message":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,14 +1,16 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func Test_Parser(t *testing.T) {
|
||||
|
||||
func TestParser(t *testing.T) {
|
||||
input := `
|
||||
FROM model1
|
||||
ADAPTER adapter1
|
||||
@@ -35,21 +37,62 @@ TEMPLATE template1
|
||||
assert.Equal(t, expectedCommands, commands)
|
||||
}
|
||||
|
||||
func Test_Parser_NoFromLine(t *testing.T) {
|
||||
func TestParserFrom(t *testing.T) {
|
||||
var cases = []struct {
|
||||
input string
|
||||
expected []Command
|
||||
err error
|
||||
}{
|
||||
{
|
||||
"FROM foo",
|
||||
[]Command{{Name: "model", Args: "foo"}},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"FROM /path/to/model",
|
||||
[]Command{{Name: "model", Args: "/path/to/model"}},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"FROM /path/to/model/fp16.bin",
|
||||
[]Command{{Name: "model", Args: "/path/to/model/fp16.bin"}},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"FROM llama3:latest",
|
||||
[]Command{{Name: "model", Args: "llama3:latest"}},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"FROM llama3:7b-instruct-q4_K_M",
|
||||
[]Command{{Name: "model", Args: "llama3:7b-instruct-q4_K_M"}},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"", nil, errMissingFrom,
|
||||
},
|
||||
{
|
||||
"PARAMETER param1 value1",
|
||||
nil,
|
||||
errMissingFrom,
|
||||
},
|
||||
{
|
||||
"PARAMETER param1 value1\nFROM foo",
|
||||
[]Command{{Name: "param1", Args: "value1"}, {Name: "model", Args: "foo"}},
|
||||
nil,
|
||||
},
|
||||
}
|
||||
|
||||
input := `
|
||||
PARAMETER param1 value1
|
||||
PARAMETER param2 value2
|
||||
`
|
||||
|
||||
reader := strings.NewReader(input)
|
||||
|
||||
_, err := Parse(reader)
|
||||
assert.ErrorContains(t, err, "no FROM line")
|
||||
for _, c := range cases {
|
||||
t.Run("", func(t *testing.T) {
|
||||
commands, err := Parse(strings.NewReader(c.input))
|
||||
assert.ErrorIs(t, err, c.err)
|
||||
assert.Equal(t, c.expected, commands)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_Parser_MissingValue(t *testing.T) {
|
||||
|
||||
func TestParserParametersMissingValue(t *testing.T) {
|
||||
input := `
|
||||
FROM foo
|
||||
PARAMETER param1
|
||||
@@ -58,41 +101,401 @@ PARAMETER param1
|
||||
reader := strings.NewReader(input)
|
||||
|
||||
_, err := Parse(reader)
|
||||
assert.ErrorContains(t, err, "missing value for [param1]")
|
||||
assert.ErrorIs(t, err, io.ErrUnexpectedEOF)
|
||||
}
|
||||
|
||||
func TestParserBadCommand(t *testing.T) {
|
||||
input := `
|
||||
FROM foo
|
||||
BADCOMMAND param1 value1
|
||||
`
|
||||
_, err := Parse(strings.NewReader(input))
|
||||
assert.ErrorIs(t, err, errInvalidCommand)
|
||||
|
||||
}
|
||||
|
||||
func Test_Parser_Messages(t *testing.T) {
|
||||
|
||||
input := `
|
||||
func TestParserMessages(t *testing.T) {
|
||||
var cases = []struct {
|
||||
input string
|
||||
expected []Command
|
||||
err error
|
||||
}{
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
MESSAGE system You are a Parser. Always Parse things.
|
||||
`,
|
||||
[]Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: "message", Args: "system: You are a Parser. Always Parse things."},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
MESSAGE system You are a Parser. Always Parse things.`,
|
||||
[]Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: "message", Args: "system: You are a Parser. Always Parse things."},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
MESSAGE system You are a Parser. Always Parse things.
|
||||
MESSAGE user Hey there!
|
||||
MESSAGE assistant Hello, I want to parse all the things!
|
||||
`
|
||||
|
||||
reader := strings.NewReader(input)
|
||||
commands, err := Parse(reader)
|
||||
assert.Nil(t, err)
|
||||
|
||||
expectedCommands := []Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: "message", Args: "system: You are a Parser. Always Parse things."},
|
||||
{Name: "message", Args: "user: Hey there!"},
|
||||
{Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
|
||||
}
|
||||
|
||||
assert.Equal(t, expectedCommands, commands)
|
||||
}
|
||||
|
||||
func Test_Parser_Messages_BadRole(t *testing.T) {
|
||||
|
||||
input := `
|
||||
`,
|
||||
[]Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: "message", Args: "system: You are a Parser. Always Parse things."},
|
||||
{Name: "message", Args: "user: Hey there!"},
|
||||
{Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
MESSAGE system """
|
||||
You are a multiline Parser. Always Parse things.
|
||||
"""
|
||||
`,
|
||||
[]Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: "message", Args: "system: \nYou are a multiline Parser. Always Parse things.\n"},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
MESSAGE badguy I'm a bad guy!
|
||||
`
|
||||
`,
|
||||
nil,
|
||||
errInvalidMessageRole,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
MESSAGE system
|
||||
`,
|
||||
nil,
|
||||
io.ErrUnexpectedEOF,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
MESSAGE system`,
|
||||
nil,
|
||||
io.ErrUnexpectedEOF,
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run("", func(t *testing.T) {
|
||||
commands, err := Parse(strings.NewReader(c.input))
|
||||
assert.ErrorIs(t, err, c.err)
|
||||
assert.Equal(t, c.expected, commands)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParserQuoted(t *testing.T) {
|
||||
var cases = []struct {
|
||||
multiline string
|
||||
expected []Command
|
||||
err error
|
||||
}{
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
SYSTEM """
|
||||
This is a
|
||||
multiline system.
|
||||
"""
|
||||
`,
|
||||
[]Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: "system", Args: "\nThis is a\nmultiline system.\n"},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
SYSTEM """
|
||||
This is a
|
||||
multiline system."""
|
||||
`,
|
||||
[]Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: "system", Args: "\nThis is a\nmultiline system."},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
SYSTEM """This is a
|
||||
multiline system."""
|
||||
`,
|
||||
[]Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: "system", Args: "This is a\nmultiline system."},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
SYSTEM """This is a multiline system."""
|
||||
`,
|
||||
[]Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: "system", Args: "This is a multiline system."},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
SYSTEM """This is a multiline system.""
|
||||
`,
|
||||
nil,
|
||||
io.ErrUnexpectedEOF,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
SYSTEM "
|
||||
`,
|
||||
nil,
|
||||
io.ErrUnexpectedEOF,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
SYSTEM """
|
||||
This is a multiline system with "quotes".
|
||||
"""
|
||||
`,
|
||||
[]Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: "system", Args: "\nThis is a multiline system with \"quotes\".\n"},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
SYSTEM """"""
|
||||
`,
|
||||
[]Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: "system", Args: ""},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
SYSTEM ""
|
||||
`,
|
||||
[]Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: "system", Args: ""},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
SYSTEM "'"
|
||||
`,
|
||||
[]Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: "system", Args: "'"},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
SYSTEM """''"'""'""'"'''''""'""'"""
|
||||
`,
|
||||
[]Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: "system", Args: `''"'""'""'"'''''""'""'`},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
`
|
||||
FROM foo
|
||||
TEMPLATE """
|
||||
{{ .Prompt }}
|
||||
"""`,
|
||||
[]Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: "template", Args: "\n{{ .Prompt }}\n"},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run("", func(t *testing.T) {
|
||||
commands, err := Parse(strings.NewReader(c.multiline))
|
||||
assert.ErrorIs(t, err, c.err)
|
||||
assert.Equal(t, c.expected, commands)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParserParameters(t *testing.T) {
|
||||
var cases = map[string]struct {
|
||||
name, value string
|
||||
}{
|
||||
"numa true": {"numa", "true"},
|
||||
"num_ctx 1": {"num_ctx", "1"},
|
||||
"num_batch 1": {"num_batch", "1"},
|
||||
"num_gqa 1": {"num_gqa", "1"},
|
||||
"num_gpu 1": {"num_gpu", "1"},
|
||||
"main_gpu 1": {"main_gpu", "1"},
|
||||
"low_vram true": {"low_vram", "true"},
|
||||
"f16_kv true": {"f16_kv", "true"},
|
||||
"logits_all true": {"logits_all", "true"},
|
||||
"vocab_only true": {"vocab_only", "true"},
|
||||
"use_mmap true": {"use_mmap", "true"},
|
||||
"use_mlock true": {"use_mlock", "true"},
|
||||
"num_thread 1": {"num_thread", "1"},
|
||||
"num_keep 1": {"num_keep", "1"},
|
||||
"seed 1": {"seed", "1"},
|
||||
"num_predict 1": {"num_predict", "1"},
|
||||
"top_k 1": {"top_k", "1"},
|
||||
"top_p 1.0": {"top_p", "1.0"},
|
||||
"tfs_z 1.0": {"tfs_z", "1.0"},
|
||||
"typical_p 1.0": {"typical_p", "1.0"},
|
||||
"repeat_last_n 1": {"repeat_last_n", "1"},
|
||||
"temperature 1.0": {"temperature", "1.0"},
|
||||
"repeat_penalty 1.0": {"repeat_penalty", "1.0"},
|
||||
"presence_penalty 1.0": {"presence_penalty", "1.0"},
|
||||
"frequency_penalty 1.0": {"frequency_penalty", "1.0"},
|
||||
"mirostat 1": {"mirostat", "1"},
|
||||
"mirostat_tau 1.0": {"mirostat_tau", "1.0"},
|
||||
"mirostat_eta 1.0": {"mirostat_eta", "1.0"},
|
||||
"penalize_newline true": {"penalize_newline", "true"},
|
||||
"stop ### User:": {"stop", "### User:"},
|
||||
"stop ### User: ": {"stop", "### User: "},
|
||||
"stop \"### User:\"": {"stop", "### User:"},
|
||||
"stop \"### User: \"": {"stop", "### User: "},
|
||||
"stop \"\"\"### User:\"\"\"": {"stop", "### User:"},
|
||||
"stop \"\"\"### User:\n\"\"\"": {"stop", "### User:\n"},
|
||||
"stop <|endoftext|>": {"stop", "<|endoftext|>"},
|
||||
"stop <|eot_id|>": {"stop", "<|eot_id|>"},
|
||||
"stop </s>": {"stop", "</s>"},
|
||||
}
|
||||
|
||||
for k, v := range cases {
|
||||
t.Run(k, func(t *testing.T) {
|
||||
var b bytes.Buffer
|
||||
fmt.Fprintln(&b, "FROM foo")
|
||||
fmt.Fprintln(&b, "PARAMETER", k)
|
||||
commands, err := Parse(&b)
|
||||
assert.Nil(t, err)
|
||||
|
||||
assert.Equal(t, []Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
{Name: v.name, Args: v.value},
|
||||
}, commands)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParserComments(t *testing.T) {
|
||||
var cases = []struct {
|
||||
input string
|
||||
expected []Command
|
||||
}{
|
||||
{
|
||||
`
|
||||
# comment
|
||||
FROM foo
|
||||
`,
|
||||
[]Command{
|
||||
{Name: "model", Args: "foo"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run("", func(t *testing.T) {
|
||||
commands, err := Parse(strings.NewReader(c.input))
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, c.expected, commands)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFormatParse(t *testing.T) {
|
||||
var cases = []string{
|
||||
`
|
||||
FROM foo
|
||||
ADAPTER adapter1
|
||||
LICENSE MIT
|
||||
PARAMETER param1 value1
|
||||
PARAMETER param2 value2
|
||||
TEMPLATE template1
|
||||
MESSAGE system You are a Parser. Always Parse things.
|
||||
MESSAGE user Hey there!
|
||||
MESSAGE assistant Hello, I want to parse all the things!
|
||||
`,
|
||||
`
|
||||
FROM foo
|
||||
ADAPTER adapter1
|
||||
LICENSE MIT
|
||||
PARAMETER param1 value1
|
||||
PARAMETER param2 value2
|
||||
TEMPLATE template1
|
||||
MESSAGE system """
|
||||
You are a store greeter. Always responsed with "Hello!".
|
||||
"""
|
||||
MESSAGE user Hey there!
|
||||
MESSAGE assistant Hello, I want to parse all the things!
|
||||
`,
|
||||
`
|
||||
FROM foo
|
||||
ADAPTER adapter1
|
||||
LICENSE """
|
||||
Very long and boring legal text.
|
||||
Blah blah blah.
|
||||
"Oh look, a quote!"
|
||||
"""
|
||||
|
||||
PARAMETER param1 value1
|
||||
PARAMETER param2 value2
|
||||
TEMPLATE template1
|
||||
MESSAGE system """
|
||||
You are a store greeter. Always responsed with "Hello!".
|
||||
"""
|
||||
MESSAGE user Hey there!
|
||||
MESSAGE assistant Hello, I want to parse all the things!
|
||||
`,
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run("", func(t *testing.T) {
|
||||
commands, err := Parse(strings.NewReader(c))
|
||||
assert.NoError(t, err)
|
||||
|
||||
commands2, err := Parse(strings.NewReader(Format(commands)))
|
||||
assert.NoError(t, err)
|
||||
|
||||
assert.Equal(t, commands, commands2)
|
||||
})
|
||||
}
|
||||
|
||||
reader := strings.NewReader(input)
|
||||
_, err := Parse(reader)
|
||||
assert.ErrorContains(t, err, "role must be one of \"system\", \"user\", or \"assistant\"")
|
||||
}
|
||||
|
||||
785
server/images.go
785
server/images.go
@@ -1,8 +1,8 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"cmp"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
@@ -11,7 +11,6 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"log"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
@@ -21,13 +20,11 @@ import (
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/auth"
|
||||
"github.com/ollama/ollama/convert"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/parser"
|
||||
@@ -64,6 +61,48 @@ func (m *Model) IsEmbedding() bool {
|
||||
return slices.Contains(m.Config.ModelFamilies, "bert") || slices.Contains(m.Config.ModelFamilies, "nomic-bert")
|
||||
}
|
||||
|
||||
func (m *Model) Commands() (cmds []parser.Command) {
|
||||
cmds = append(cmds, parser.Command{Name: "model", Args: m.ModelPath})
|
||||
|
||||
if m.Template != "" {
|
||||
cmds = append(cmds, parser.Command{Name: "template", Args: m.Template})
|
||||
}
|
||||
|
||||
if m.System != "" {
|
||||
cmds = append(cmds, parser.Command{Name: "system", Args: m.System})
|
||||
}
|
||||
|
||||
for _, adapter := range m.AdapterPaths {
|
||||
cmds = append(cmds, parser.Command{Name: "adapter", Args: adapter})
|
||||
}
|
||||
|
||||
for _, projector := range m.ProjectorPaths {
|
||||
cmds = append(cmds, parser.Command{Name: "projector", Args: projector})
|
||||
}
|
||||
|
||||
for k, v := range m.Options {
|
||||
switch v := v.(type) {
|
||||
case []any:
|
||||
for _, s := range v {
|
||||
cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", s)})
|
||||
}
|
||||
default:
|
||||
cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", v)})
|
||||
}
|
||||
}
|
||||
|
||||
for _, license := range m.License {
|
||||
cmds = append(cmds, parser.Command{Name: "license", Args: license})
|
||||
}
|
||||
|
||||
for _, msg := range m.Messages {
|
||||
cmds = append(cmds, parser.Command{Name: "message", Args: fmt.Sprintf("%s %s", msg.Role, msg.Content)})
|
||||
}
|
||||
|
||||
return cmds
|
||||
|
||||
}
|
||||
|
||||
type Message struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
@@ -89,36 +128,6 @@ type ConfigV2 struct {
|
||||
RootFS RootFS `json:"rootfs"`
|
||||
}
|
||||
|
||||
func (c *ConfigV2) SetModelFormat(format string) {
|
||||
if c.ModelFormat == "" {
|
||||
c.ModelFormat = format
|
||||
}
|
||||
}
|
||||
|
||||
func (c *ConfigV2) SetModelFamily(families ...string) {
|
||||
for _, family := range families {
|
||||
if c.ModelFamily == "" {
|
||||
c.ModelFamily = family
|
||||
}
|
||||
|
||||
if !slices.Contains(c.ModelFamilies, family) {
|
||||
c.ModelFamilies = append(c.ModelFamilies, family)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *ConfigV2) SetModelType(modelType string) {
|
||||
if c.ModelType == "" {
|
||||
c.ModelType = modelType
|
||||
}
|
||||
}
|
||||
|
||||
func (c *ConfigV2) SetFileType(fileType string) {
|
||||
if c.FileType == "" {
|
||||
c.FileType = fileType
|
||||
}
|
||||
}
|
||||
|
||||
type RootFS struct {
|
||||
Type string `json:"type"`
|
||||
DiffIDs []string `json:"diff_ids"`
|
||||
@@ -199,6 +208,14 @@ func GetModel(name string) (*Model, error) {
|
||||
|
||||
switch layer.MediaType {
|
||||
case "application/vnd.ollama.image.model":
|
||||
model.ModelPath = filename
|
||||
model.ParentModel = layer.From
|
||||
case "application/vnd.ollama.image.model+metadata", "application/vnd.ollama.image.model+data":
|
||||
filename, err = GetBlobsPath(layer.MergeBase)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
model.ModelPath = filename
|
||||
model.ParentModel = layer.From
|
||||
case "application/vnd.ollama.image.embed":
|
||||
@@ -263,7 +280,7 @@ func GetModel(name string) (*Model, error) {
|
||||
return model, nil
|
||||
}
|
||||
|
||||
func realpath(mfDir, from string) string {
|
||||
func realpath(rel, from string) string {
|
||||
abspath, err := filepath.Abs(from)
|
||||
if err != nil {
|
||||
return from
|
||||
@@ -280,22 +297,15 @@ func realpath(mfDir, from string) string {
|
||||
return filepath.Join(home, from[2:])
|
||||
}
|
||||
|
||||
if _, err := os.Stat(filepath.Join(mfDir, from)); err == nil {
|
||||
if _, err := os.Stat(filepath.Join(rel, from)); err == nil {
|
||||
// this is a file relative to the Modelfile
|
||||
return filepath.Join(mfDir, from)
|
||||
return filepath.Join(rel, from)
|
||||
}
|
||||
|
||||
return abspath
|
||||
}
|
||||
|
||||
func CreateModel(ctx context.Context, name, modelFileDir, quantization string, commands []parser.Command, fn func(resp api.ProgressResponse)) error {
|
||||
deleteMap := make(map[string]struct{})
|
||||
if manifest, _, err := GetManifest(ParseModelPath(name)); err == nil {
|
||||
for _, layer := range append(manifest.Layers, manifest.Config) {
|
||||
deleteMap[layer.Digest] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
func CreateModel(ctx context.Context, name, modelFileDir, quantization string, commands []parser.Command, fn func(resp api.ProgressResponse)) (err error) {
|
||||
config := ConfigV2{
|
||||
OS: "linux",
|
||||
Architecture: "amd64",
|
||||
@@ -304,250 +314,222 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
|
||||
},
|
||||
}
|
||||
|
||||
var layers Layers
|
||||
messages := []string{}
|
||||
|
||||
params := make(map[string][]string)
|
||||
fromParams := make(map[string]any)
|
||||
var messages []*api.Message
|
||||
parameters := make(map[string]any)
|
||||
|
||||
var layers []*Layer
|
||||
for _, c := range commands {
|
||||
mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
|
||||
|
||||
switch c.Name {
|
||||
case "model":
|
||||
if strings.HasPrefix(c.Args, "@") {
|
||||
blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@"))
|
||||
case "model", "adapter":
|
||||
var baseLayers []*layerWithGGML
|
||||
if name := model.ParseName(c.Args); name.IsValid() {
|
||||
baseLayers, err = parseFromModel(ctx, name, fn)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else if strings.HasPrefix(c.Args, "@") {
|
||||
blobpath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.Args = blobPath
|
||||
}
|
||||
|
||||
pathName := realpath(modelFileDir, c.Args)
|
||||
|
||||
ggufName, err := convertModel(name, pathName, fn)
|
||||
if err != nil {
|
||||
var pathErr *fs.PathError
|
||||
switch {
|
||||
case errors.Is(err, zip.ErrFormat):
|
||||
// it's not a safetensor archive
|
||||
case errors.As(err, &pathErr):
|
||||
// it's not a file on disk, could be a model reference
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if ggufName != "" {
|
||||
pathName = ggufName
|
||||
defer os.RemoveAll(ggufName)
|
||||
|
||||
if quantization != "" {
|
||||
quantization = strings.ToUpper(quantization)
|
||||
fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", "F16", quantization)})
|
||||
tempfile, err := os.CreateTemp(filepath.Dir(ggufName), quantization)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer os.RemoveAll(tempfile.Name())
|
||||
|
||||
if err := llm.Quantize(ggufName, tempfile.Name(), quantization); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := tempfile.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pathName = tempfile.Name()
|
||||
}
|
||||
}
|
||||
|
||||
bin, err := os.Open(pathName)
|
||||
if err != nil {
|
||||
// not a file on disk so must be a model reference
|
||||
modelpath := ParseModelPath(c.Args)
|
||||
manifest, _, err := GetManifest(modelpath)
|
||||
switch {
|
||||
case errors.Is(err, os.ErrNotExist):
|
||||
fn(api.ProgressResponse{Status: "pulling model"})
|
||||
if err := PullModel(ctx, c.Args, ®istryOptions{}, fn); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
manifest, _, err = GetManifest(modelpath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case err != nil:
|
||||
return err
|
||||
}
|
||||
|
||||
fn(api.ProgressResponse{Status: "reading model metadata"})
|
||||
fromConfigPath, err := GetBlobsPath(manifest.Config.Digest)
|
||||
blob, err := os.Open(blobpath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer blob.Close()
|
||||
|
||||
fromConfigFile, err := os.Open(fromConfigPath)
|
||||
baseLayers, err = parseFromFile(ctx, blob, fn)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fromConfigFile.Close()
|
||||
} else if file, err := os.Open(realpath(modelFileDir, c.Args)); err == nil {
|
||||
defer file.Close()
|
||||
|
||||
var fromConfig ConfigV2
|
||||
if err := json.NewDecoder(fromConfigFile).Decode(&fromConfig); err != nil {
|
||||
baseLayers, err = parseFromFile(ctx, file, fn)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
return fmt.Errorf("invalid model reference: %s", c.Args)
|
||||
}
|
||||
|
||||
// if the model is still not in gguf format, error out
|
||||
if fromConfig.ModelFormat != "gguf" {
|
||||
return fmt.Errorf("%s is not in gguf format, this base model is not compatible with this version of ollama", c.Args)
|
||||
}
|
||||
|
||||
config.SetModelFormat(fromConfig.ModelFormat)
|
||||
config.SetModelFamily(append(fromConfig.ModelFamilies, fromConfig.ModelFamily)...)
|
||||
config.SetModelType(fromConfig.ModelType)
|
||||
config.SetFileType(fromConfig.FileType)
|
||||
|
||||
for _, layer := range manifest.Layers {
|
||||
deleteMap[layer.Digest] = struct{}{}
|
||||
if layer.MediaType == "application/vnd.ollama.image.params" {
|
||||
fromParamsPath, err := GetBlobsPath(layer.Digest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fromParamsFile, err := os.Open(fromParamsPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fromParamsFile.Close()
|
||||
|
||||
if err := json.NewDecoder(fromParamsFile).Decode(&fromParams); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, modelpath.GetShortTagname())
|
||||
for _, baseLayer := range baseLayers {
|
||||
if quantization != "" && baseLayer.MediaType == "application/vnd.ollama.image.model" {
|
||||
ftype, err := llm.ParseFileType(quantization)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
layers.Add(layer)
|
||||
filetype := baseLayer.GGML.KV().FileType()
|
||||
if !slices.Contains([]string{"F16", "F32"}, filetype) {
|
||||
return errors.New("quantization is only supported for F16 and F32 models")
|
||||
}
|
||||
|
||||
fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", filetype, quantization)})
|
||||
|
||||
blob, err := GetBlobsPath(baseLayer.Digest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
temp, err := os.CreateTemp(filepath.Dir(blob), quantization)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer temp.Close()
|
||||
defer os.Remove(temp.Name())
|
||||
|
||||
if err := llm.Quantize(blob, temp.Name(), ftype); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
baseLayer.Layer, err = NewLayer(temp, baseLayer.Layer.MediaType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
deleteMap[manifest.Config.Digest] = struct{}{}
|
||||
continue
|
||||
if baseLayer.GGML != nil {
|
||||
config.ModelFormat = cmp.Or(config.ModelFormat, baseLayer.GGML.Name())
|
||||
config.ModelFamily = cmp.Or(config.ModelFamily, baseLayer.GGML.KV().Architecture())
|
||||
config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount()))
|
||||
config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType())
|
||||
config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture())
|
||||
|
||||
f, err := baseLayer.Layer.Open()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
metadata := io.NewSectionReader(f, 0, baseLayer.GGML.Offset())
|
||||
metadataLayer, err := NewLayer(metadata, "application/vnd.ollama.image.model+metadata")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
metadataLayer.Intermediate = true
|
||||
metadataLayer.MergeBase = baseLayer.Digest
|
||||
|
||||
layers = append(layers, metadataLayer)
|
||||
|
||||
metadataPath, err := GetBlobsPath(metadataLayer.Digest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer os.Remove(metadataPath)
|
||||
|
||||
stat, err := f.Stat()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
data := io.NewSectionReader(f, baseLayer.GGML.Offset(), stat.Size())
|
||||
dataLayer, err := NewLayer(data, "application/vnd.ollama.image.model+data")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dataLayer.Intermediate = true
|
||||
dataLayer.MergeBase = baseLayer.Digest
|
||||
|
||||
layers = append(layers, dataLayer)
|
||||
|
||||
dataPath, err := GetBlobsPath(dataLayer.Digest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer os.Remove(dataPath)
|
||||
continue
|
||||
}
|
||||
|
||||
layers = append(layers, baseLayer.Layer)
|
||||
}
|
||||
defer bin.Close()
|
||||
|
||||
var offset int64
|
||||
for {
|
||||
fn(api.ProgressResponse{Status: "creating model layer"})
|
||||
if _, err := bin.Seek(offset, io.SeekStart); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ggml, size, err := llm.DecodeGGML(bin)
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
} else if errors.Is(err, llm.ErrUnsupportedFormat) {
|
||||
return fmt.Errorf("model binary specified in FROM field is not a valid gguf format model, %w", err)
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
config.SetModelFormat(ggml.Name())
|
||||
config.SetModelFamily(ggml.KV().Architecture())
|
||||
config.SetModelType(format.HumanNumber(ggml.KV().ParameterCount()))
|
||||
config.SetFileType(ggml.KV().FileType())
|
||||
|
||||
mediatype := mediatype
|
||||
if ggml.KV().Architecture() == "clip" {
|
||||
mediatype = "application/vnd.ollama.image.projector"
|
||||
}
|
||||
|
||||
sr := io.NewSectionReader(bin, offset, size)
|
||||
layer, err := NewLayer(sr, mediatype)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
layers.Add(layer)
|
||||
|
||||
offset += size
|
||||
}
|
||||
case "adapter":
|
||||
if strings.HasPrefix(c.Args, "@") {
|
||||
blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.Args = blobPath
|
||||
}
|
||||
|
||||
fn(api.ProgressResponse{Status: "creating adapter layer"})
|
||||
bin, err := os.Open(realpath(modelFileDir, c.Args))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer bin.Close()
|
||||
|
||||
_, size, err := llm.DecodeGGML(bin)
|
||||
case "license", "template", "system":
|
||||
blob := strings.NewReader(c.Args)
|
||||
layer, err := NewLayer(blob, mediatype)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sr := io.NewSectionReader(bin, 0, size)
|
||||
layer, err := NewLayer(sr, mediatype)
|
||||
if err != nil {
|
||||
return err
|
||||
if c.Name != "license" {
|
||||
// replace
|
||||
layers = slices.DeleteFunc(layers, func(layer *Layer) bool {
|
||||
return layer.MediaType == mediatype
|
||||
})
|
||||
}
|
||||
|
||||
layers.Add(layer)
|
||||
case "license":
|
||||
fn(api.ProgressResponse{Status: "creating license layer"})
|
||||
|
||||
bin := strings.NewReader(c.Args)
|
||||
layer, err := NewLayer(bin, mediatype)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
layers.Add(layer)
|
||||
case "template", "system":
|
||||
fn(api.ProgressResponse{Status: fmt.Sprintf("creating %s layer", c.Name)})
|
||||
|
||||
bin := strings.NewReader(c.Args)
|
||||
layer, err := NewLayer(bin, mediatype)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
layers.Replace(layer)
|
||||
layers = append(layers, layer)
|
||||
case "message":
|
||||
messages = append(messages, c.Args)
|
||||
role, content, ok := strings.Cut(c.Args, ": ")
|
||||
if !ok {
|
||||
return fmt.Errorf("invalid message: %s", c.Args)
|
||||
}
|
||||
|
||||
messages = append(messages, &api.Message{Role: role, Content: content})
|
||||
default:
|
||||
params[c.Name] = append(params[c.Name], c.Args)
|
||||
ps, err := api.FormatParams(map[string][]string{c.Name: {c.Args}})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for k, v := range ps {
|
||||
if ks, ok := parameters[k].([]string); ok {
|
||||
parameters[k] = append(ks, v.([]string)...)
|
||||
} else if vs, ok := v.([]string); ok {
|
||||
parameters[k] = vs
|
||||
} else {
|
||||
parameters[k] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(messages) > 0 {
|
||||
fn(api.ProgressResponse{Status: "creating parameters layer"})
|
||||
var err2 error
|
||||
layers = slices.DeleteFunc(layers, func(layer *Layer) bool {
|
||||
switch layer.MediaType {
|
||||
case "application/vnd.ollama.image.message":
|
||||
// if there are new messages, remove the inherited ones
|
||||
if len(messages) > 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
msgs := make([]api.Message, 0)
|
||||
return false
|
||||
case "application/vnd.ollama.image.params":
|
||||
// merge inherited parameters with new ones
|
||||
r, err := layer.Open()
|
||||
if err != nil {
|
||||
err2 = err
|
||||
return false
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
for _, m := range messages {
|
||||
// todo: handle images
|
||||
msg := strings.SplitN(m, ": ", 2)
|
||||
msgs = append(msgs, api.Message{Role: msg[0], Content: msg[1]})
|
||||
var ps map[string]any
|
||||
if err := json.NewDecoder(r).Decode(&ps); err != nil {
|
||||
err2 = err
|
||||
return false
|
||||
}
|
||||
|
||||
for k, v := range ps {
|
||||
if _, ok := parameters[k]; !ok {
|
||||
parameters[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
})
|
||||
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
|
||||
if len(messages) > 0 {
|
||||
var b bytes.Buffer
|
||||
if err := json.NewEncoder(&b).Encode(msgs); err != nil {
|
||||
if err := json.NewEncoder(&b).Encode(messages); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -556,39 +538,25 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
|
||||
return err
|
||||
}
|
||||
|
||||
layers.Replace(layer)
|
||||
layers = append(layers, layer)
|
||||
}
|
||||
|
||||
if len(params) > 0 {
|
||||
fn(api.ProgressResponse{Status: "creating parameters layer"})
|
||||
|
||||
formattedParams, err := api.FormatParams(params)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for k, v := range fromParams {
|
||||
if _, ok := formattedParams[k]; !ok {
|
||||
formattedParams[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
if len(parameters) > 0 {
|
||||
var b bytes.Buffer
|
||||
if err := json.NewEncoder(&b).Encode(formattedParams); err != nil {
|
||||
if err := json.NewEncoder(&b).Encode(parameters); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fn(api.ProgressResponse{Status: "creating config layer"})
|
||||
layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
layers.Replace(layer)
|
||||
layers = append(layers, layer)
|
||||
}
|
||||
|
||||
digests := make([]string, len(layers.items))
|
||||
for i, layer := range layers.items {
|
||||
digests := make([]string, len(layers))
|
||||
for i, layer := range layers {
|
||||
digests[i] = layer.Digest
|
||||
}
|
||||
|
||||
@@ -599,36 +567,38 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
|
||||
return err
|
||||
}
|
||||
|
||||
configLayer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
|
||||
layer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
delete(deleteMap, configLayer.Digest)
|
||||
for _, layer := range append(layers, layer) {
|
||||
if layer.message != "" {
|
||||
fn(api.ProgressResponse{Status: layer.message})
|
||||
}
|
||||
}
|
||||
|
||||
for _, layer := range append(layers.items, configLayer) {
|
||||
committed, err := layer.Commit()
|
||||
if err != nil {
|
||||
return err
|
||||
unref := make(map[string]struct{})
|
||||
if manifest, _, err := GetManifest(ParseModelPath(name)); err == nil {
|
||||
for _, layer := range manifest.Layers {
|
||||
if !slices.Contains(digests, layer.Digest) {
|
||||
unref[layer.Digest] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
status := "writing layer"
|
||||
if !committed {
|
||||
status = "using already created layer"
|
||||
if manifest.Config.Digest != layer.Digest {
|
||||
unref[manifest.Config.Digest] = struct{}{}
|
||||
}
|
||||
|
||||
fn(api.ProgressResponse{Status: fmt.Sprintf("%s %s", status, layer.Digest)})
|
||||
|
||||
delete(deleteMap, layer.Digest)
|
||||
}
|
||||
|
||||
fn(api.ProgressResponse{Status: "writing manifest"})
|
||||
if err := WriteManifest(name, configLayer, layers.items); err != nil {
|
||||
if err := WriteManifest(name, layer, layers); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
|
||||
if err := deleteUnusedLayers(nil, deleteMap, false); err != nil {
|
||||
if os.Getenv("OLLAMA_NOPRUNE") == "" && len(unref) > 0 {
|
||||
fn(api.ProgressResponse{Status: "removing unused layers"})
|
||||
if err := deleteUnusedLayers(nil, unref, false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -637,74 +607,6 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
|
||||
return nil
|
||||
}
|
||||
|
||||
func convertModel(name, path string, fn func(resp api.ProgressResponse)) (string, error) {
|
||||
r, err := zip.OpenReader(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
tempDir, err := os.MkdirTemp("", "ollama-convert")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer os.RemoveAll(tempDir)
|
||||
|
||||
fn(api.ProgressResponse{Status: "unpacking model metadata"})
|
||||
for _, f := range r.File {
|
||||
fpath := filepath.Join(tempDir, f.Name)
|
||||
outFile, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
rc, err := f.Open()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
_, err = io.Copy(outFile, rc)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
outFile.Close()
|
||||
rc.Close()
|
||||
}
|
||||
|
||||
mf, err := convert.GetModelFormat(tempDir)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
params, err := mf.GetParams(tempDir)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
mArch, err := mf.GetModelArch(name, tempDir, params)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
fn(api.ProgressResponse{Status: "processing tensors"})
|
||||
if err := mArch.GetTensors(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if err := mArch.LoadVocab(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
fn(api.ProgressResponse{Status: "converting model"})
|
||||
path, err = mArch.WriteGGUF()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func CopyModel(src, dst model.Name) error {
|
||||
if !dst.IsFullyQualified() {
|
||||
return model.Unqualified(dst)
|
||||
@@ -774,6 +676,9 @@ func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{},
|
||||
|
||||
for _, layer := range manifest.Layers {
|
||||
delete(deleteMap, layer.Digest)
|
||||
if layer.MergeBase != "" {
|
||||
delete(deleteMap, layer.MergeBase)
|
||||
}
|
||||
}
|
||||
|
||||
delete(deleteMap, manifest.Config.Digest)
|
||||
@@ -880,6 +785,9 @@ func DeleteModel(name string) error {
|
||||
deleteMap := make(map[string]struct{})
|
||||
for _, layer := range manifest.Layers {
|
||||
deleteMap[layer.Digest] = struct{}{}
|
||||
if layer.MergeBase != "" {
|
||||
deleteMap[layer.MergeBase] = struct{}{}
|
||||
}
|
||||
}
|
||||
deleteMap[manifest.Config.Digest] = struct{}{}
|
||||
|
||||
@@ -901,67 +809,6 @@ func DeleteModel(name string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func ShowModelfile(model *Model) (string, error) {
|
||||
var mt struct {
|
||||
*Model
|
||||
From string
|
||||
Parameters map[string][]any
|
||||
}
|
||||
|
||||
mt.Parameters = make(map[string][]any)
|
||||
for k, v := range model.Options {
|
||||
if s, ok := v.([]any); ok {
|
||||
mt.Parameters[k] = s
|
||||
continue
|
||||
}
|
||||
|
||||
mt.Parameters[k] = []any{v}
|
||||
}
|
||||
|
||||
mt.Model = model
|
||||
mt.From = model.ModelPath
|
||||
|
||||
if model.ParentModel != "" {
|
||||
mt.From = model.ParentModel
|
||||
}
|
||||
|
||||
modelFile := `# Modelfile generated by "ollama show"
|
||||
# To build a new Modelfile based on this one, replace the FROM line with:
|
||||
# FROM {{ .ShortName }}
|
||||
|
||||
FROM {{ .From }}
|
||||
TEMPLATE """{{ .Template }}"""
|
||||
|
||||
{{- if .System }}
|
||||
SYSTEM """{{ .System }}"""
|
||||
{{- end }}
|
||||
|
||||
{{- range $adapter := .AdapterPaths }}
|
||||
ADAPTER {{ $adapter }}
|
||||
{{- end }}
|
||||
|
||||
{{- range $k, $v := .Parameters }}
|
||||
{{- range $parameter := $v }}
|
||||
PARAMETER {{ $k }} {{ printf "%#v" $parameter }}
|
||||
{{- end }}
|
||||
{{- end }}`
|
||||
|
||||
tmpl, err := template.New("").Parse(modelFile)
|
||||
if err != nil {
|
||||
slog.Info(fmt.Sprintf("error parsing template: %q", err))
|
||||
return "", err
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
|
||||
if err = tmpl.Execute(&buf, mt); err != nil {
|
||||
slog.Info(fmt.Sprintf("error executing template: %q", err))
|
||||
return "", err
|
||||
}
|
||||
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
|
||||
mp := ParseModelPath(name)
|
||||
fn(api.ProgressResponse{Status: "retrieving manifest"})
|
||||
@@ -980,6 +827,49 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
|
||||
layers = append(layers, manifest.Layers...)
|
||||
layers = append(layers, manifest.Config)
|
||||
|
||||
for _, layer := range layers {
|
||||
if !layer.Intermediate {
|
||||
continue
|
||||
}
|
||||
|
||||
switch layer.MediaType {
|
||||
case "application/vnd.ollama.image.model+metadata", "application/vnd.ollama.image.model+data":
|
||||
if _, err := GetBlobsPath(layer.MergeBase); errors.Is(err, os.ErrNotExist) {
|
||||
filename, err := GetBlobsPath(layer.MergeBase)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
f, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
ggml, size, err := llm.DecodeGGML(f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := f.Seek(0, io.SeekStart); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
metadata := io.NewSectionReader(f, 0, ggml.Offset())
|
||||
if _, err := NewLayer(metadata, "application/vnd.ollama.image.model+metadata"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
data := io.NewSectionReader(f, ggml.Offset(), size)
|
||||
if _, err := NewLayer(data, "application/vnd.ollama.image.model+metadata"); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, layer := range layers {
|
||||
if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil {
|
||||
slog.Info(fmt.Sprintf("error uploading blob: %v", err))
|
||||
@@ -1049,6 +939,27 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
|
||||
layers = append(layers, manifest.Config)
|
||||
|
||||
for _, layer := range layers {
|
||||
if layer.Intermediate {
|
||||
filename, err := GetBlobsPath(layer.MergeBase)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
|
||||
// pass
|
||||
} else if err != nil {
|
||||
return err
|
||||
} else {
|
||||
fn(api.ProgressResponse{
|
||||
Status: fmt.Sprintf("pulling %s", layer.Digest[7:19]),
|
||||
Digest: layer.Digest,
|
||||
Total: layer.Size,
|
||||
Completed: layer.Size,
|
||||
})
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if err := downloadBlob(
|
||||
ctx,
|
||||
downloadOpts{
|
||||
@@ -1063,9 +974,59 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
|
||||
}
|
||||
delete(deleteMap, manifest.Config.Digest)
|
||||
|
||||
type mergedLayer struct {
|
||||
Metadata, Data *Layer
|
||||
}
|
||||
|
||||
mergedLayers := make(map[string]mergedLayer)
|
||||
for _, layer := range manifest.Layers {
|
||||
filename, err := GetBlobsPath(layer.MergeBase)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
|
||||
merged := mergedLayers[layer.MergeBase]
|
||||
if layer.MediaType == "application/vnd.ollama.image.model+metadata" {
|
||||
merged.Metadata = layer
|
||||
} else if layer.MediaType == "application/vnd.ollama.image.model+data" {
|
||||
merged.Data = layer
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
|
||||
mergedLayers[layer.MergeBase] = merged
|
||||
} else if err != nil {
|
||||
return err
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
for _, mergedLayer := range mergedLayers {
|
||||
fn(api.ProgressResponse{Status: "merging layers"})
|
||||
metadata, err := mergedLayer.Metadata.Open()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer metadata.Close()
|
||||
|
||||
data, err := mergedLayer.Data.Open()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer data.Close()
|
||||
|
||||
if _, err := NewLayer(io.MultiReader(metadata, data), "application/vnd.ollama.image.model"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
fn(api.ProgressResponse{Status: "verifying sha256 digest"})
|
||||
for _, layer := range layers {
|
||||
if err := verifyBlob(layer.Digest); err != nil {
|
||||
if err := verifyBlob(layer.Digest); errors.Is(err, os.ErrNotExist) && layer.Intermediate {
|
||||
// pass
|
||||
} else if err != nil {
|
||||
if errors.Is(err, errDigestMismatch) {
|
||||
// something went wrong, delete the blob
|
||||
fp, err := GetBlobsPath(layer.Digest)
|
||||
|
||||
@@ -5,39 +5,18 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
|
||||
type Layers struct {
|
||||
items []*Layer
|
||||
}
|
||||
|
||||
func (ls *Layers) Add(layer *Layer) {
|
||||
if layer.Size > 0 {
|
||||
ls.items = append(ls.items, layer)
|
||||
}
|
||||
}
|
||||
|
||||
func (ls *Layers) Replace(layer *Layer) {
|
||||
if layer.Size > 0 {
|
||||
mediatype := layer.MediaType
|
||||
layers := slices.DeleteFunc(ls.items, func(l *Layer) bool {
|
||||
return l.MediaType == mediatype
|
||||
})
|
||||
|
||||
ls.items = append(layers, layer)
|
||||
}
|
||||
}
|
||||
|
||||
type Layer struct {
|
||||
MediaType string `json:"mediaType"`
|
||||
Digest string `json:"digest"`
|
||||
Size int64 `json:"size"`
|
||||
From string `json:"from,omitempty"`
|
||||
|
||||
tempFileName string
|
||||
Intermediate bool `json:"intermediate,omitempty"`
|
||||
MergeBase string `json:"merge_base,omitempty"`
|
||||
|
||||
message string
|
||||
}
|
||||
|
||||
func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
||||
@@ -46,14 +25,12 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
const delimiter = "-"
|
||||
|
||||
pattern := strings.Join([]string{"sha256", "*-partial"}, delimiter)
|
||||
temp, err := os.CreateTemp(blobs, pattern)
|
||||
temp, err := os.CreateTemp(blobs, "sha256-")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer temp.Close()
|
||||
defer os.Remove(temp.Name())
|
||||
|
||||
sha256sum := sha256.New()
|
||||
n, err := io.Copy(io.MultiWriter(temp, sha256sum), r)
|
||||
@@ -61,11 +38,29 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := temp.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
digest := fmt.Sprintf("sha256:%x", sha256sum.Sum(nil))
|
||||
blob, err := GetBlobsPath(digest)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
status := "using existing layer"
|
||||
if _, err := os.Stat(blob); err != nil {
|
||||
status = "creating new layer"
|
||||
if err := os.Rename(temp.Name(), blob); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &Layer{
|
||||
MediaType: mediatype,
|
||||
Digest: fmt.Sprintf("sha256:%x", sha256sum.Sum(nil)),
|
||||
Size: n,
|
||||
tempFileName: temp.Name(),
|
||||
MediaType: mediatype,
|
||||
Digest: digest,
|
||||
Size: n,
|
||||
message: fmt.Sprintf("%s %s", status, digest),
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -85,21 +80,15 @@ func NewLayerFromLayer(digest, mediatype, from string) (*Layer, error) {
|
||||
Digest: digest,
|
||||
Size: fi.Size(),
|
||||
From: from,
|
||||
message: fmt.Sprintf("using existing layer %s", digest),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (l *Layer) Commit() (bool, error) {
|
||||
// always remove temp
|
||||
defer os.Remove(l.tempFileName)
|
||||
|
||||
func (l *Layer) Open() (*os.File, error) {
|
||||
blob, err := GetBlobsPath(l.Digest)
|
||||
if err != nil {
|
||||
return false, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if _, err := os.Stat(blob); err != nil {
|
||||
return true, os.Rename(l.tempFileName, blob)
|
||||
}
|
||||
|
||||
return false, nil
|
||||
return os.Open(blob)
|
||||
}
|
||||
259
server/model.go
Normal file
259
server/model.go
Normal file
@@ -0,0 +1,259 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/convert"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
)
|
||||
|
||||
type layerWithGGML struct {
|
||||
*Layer
|
||||
*llm.GGML
|
||||
}
|
||||
|
||||
func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) {
|
||||
modelpath := ParseModelPath(name.String())
|
||||
manifest, _, err := GetManifest(modelpath)
|
||||
switch {
|
||||
case errors.Is(err, os.ErrNotExist):
|
||||
if err := PullModel(ctx, name.String(), ®istryOptions{}, fn); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
modelpath = ParseModelPath(name.String())
|
||||
manifest, _, err = GetManifest(modelpath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
case err != nil:
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, layer := range manifest.Layers {
|
||||
layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, modelpath.GetShortTagname())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch layer.MediaType {
|
||||
case "application/vnd.ollama.image.model",
|
||||
"application/vnd.ollama.image.projector",
|
||||
"application/vnd.ollama.image.adapter":
|
||||
blobpath, err := GetBlobsPath(layer.Digest)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
blob, err := os.Open(blobpath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer blob.Close()
|
||||
|
||||
ggml, _, err := llm.DecodeGGML(blob)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
layers = append(layers, &layerWithGGML{layer, ggml})
|
||||
default:
|
||||
layers = append(layers, &layerWithGGML{layer, nil})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return layers, nil
|
||||
}
|
||||
|
||||
func parseFromZipFile(_ context.Context, file *os.File, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) {
|
||||
stat, err := file.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
r, err := zip.NewReader(file, stat.Size())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tempdir, err := os.MkdirTemp(filepath.Dir(file.Name()), "")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer os.RemoveAll(tempdir)
|
||||
|
||||
fn(api.ProgressResponse{Status: "unpacking model metadata"})
|
||||
for _, f := range r.File {
|
||||
// TODO(mxyng): this should not write out all files to disk
|
||||
outfile, err := os.Create(filepath.Join(tempdir, f.Name))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
infile, err := f.Open()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if _, err = io.Copy(outfile, infile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := outfile.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := infile.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
mf, err := convert.GetModelFormat(tempdir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
params, err := mf.GetParams(tempdir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
mArch, err := mf.GetModelArch("", tempdir, params)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fn(api.ProgressResponse{Status: "processing tensors"})
|
||||
if err := mArch.GetTensors(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := mArch.LoadVocab(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fn(api.ProgressResponse{Status: "converting model"})
|
||||
|
||||
// TODO(mxyng): this should write directly into a layer
|
||||
// e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model")
|
||||
temp, err := os.CreateTemp(tempdir, "fp16")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer temp.Close()
|
||||
defer os.Remove(temp.Name())
|
||||
|
||||
if err = mArch.WriteGGUF(temp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if _, err := temp.Seek(0, io.SeekStart); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
layer, err := NewLayer(temp, "application/vnd.ollama.image.model")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("aaa: %w", err)
|
||||
}
|
||||
|
||||
blobpath, err := GetBlobsPath(layer.Digest)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
bin, err := os.Open(blobpath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer bin.Close()
|
||||
|
||||
ggml, _, err := llm.DecodeGGML(bin)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
layer, err = NewLayerFromLayer(layer.Digest, layer.MediaType, "")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
layers = append(layers, &layerWithGGML{layer, ggml})
|
||||
return layers, nil
|
||||
}
|
||||
|
||||
func parseFromFile(ctx context.Context, file *os.File, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) {
|
||||
sr := io.NewSectionReader(file, 0, 512)
|
||||
contentType, err := detectContentType(sr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch contentType {
|
||||
case "gguf", "ggla":
|
||||
// noop
|
||||
case "application/zip":
|
||||
return parseFromZipFile(ctx, file, fn)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported content type: %s", contentType)
|
||||
}
|
||||
|
||||
stat, err := file.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var offset int64
|
||||
for offset < stat.Size() {
|
||||
ggml, n, err := llm.DecodeGGML(file)
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
mediatype := "application/vnd.ollama.image.model"
|
||||
if ggml.Name() == "ggla" {
|
||||
mediatype = "application/vnd.ollama.image.adapter"
|
||||
} else if ggml.KV().Architecture() == "clip" {
|
||||
mediatype = "application/vnd.ollama.image.projector"
|
||||
}
|
||||
|
||||
layer, err := NewLayer(io.NewSectionReader(file, offset, n), mediatype)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
layers = append(layers, &layerWithGGML{layer, ggml})
|
||||
offset = n
|
||||
}
|
||||
|
||||
return layers, nil
|
||||
}
|
||||
|
||||
func detectContentType(r io.Reader) (string, error) {
|
||||
var b bytes.Buffer
|
||||
if _, err := io.Copy(&b, r); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if contentType := llm.DetectGGMLType(b.Bytes()); contentType != "" {
|
||||
return contentType, nil
|
||||
}
|
||||
|
||||
if contentType := http.DetectContentType(b.Bytes()); contentType != "application/octet-stream" {
|
||||
return contentType, nil
|
||||
}
|
||||
|
||||
return "unknown", nil
|
||||
}
|
||||
@@ -580,7 +580,7 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
|
||||
ctx, cancel := context.WithCancel(c.Request.Context())
|
||||
defer cancel()
|
||||
|
||||
if err := CreateModel(ctx, model, filepath.Dir(req.Path), req.Quantization, commands, fn); err != nil {
|
||||
if err := CreateModel(ctx, model, filepath.Dir(req.Path), strings.ToUpper(req.Quantization), commands, fn); err != nil {
|
||||
ch <- gin.H{"error": err.Error()}
|
||||
}
|
||||
}()
|
||||
@@ -728,12 +728,12 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
||||
}
|
||||
}
|
||||
|
||||
mf, err := ShowModelfile(model)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp.Modelfile = mf
|
||||
var sb strings.Builder
|
||||
fmt.Fprintln(&sb, "# Modelfile generate by \"ollama show\"")
|
||||
fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
|
||||
fmt.Fprintf(&sb, "# FROM %s\n\n", model.ShortName)
|
||||
fmt.Fprint(&sb, parser.Format(model.Commands()))
|
||||
resp.Modelfile = sb.String()
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
@@ -810,16 +810,13 @@ func (s *Server) CopyModelHandler(c *gin.Context) {
|
||||
|
||||
src := model.ParseName(r.Source)
|
||||
if !src.IsValid() {
|
||||
_ = c.Error(fmt.Errorf("source %q is invalid", r.Source))
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("source %q is invalid", r.Source)})
|
||||
return
|
||||
}
|
||||
|
||||
dst := model.ParseName(r.Destination)
|
||||
if !dst.IsValid() {
|
||||
_ = c.Error(fmt.Errorf("destination %q is invalid", r.Destination))
|
||||
}
|
||||
|
||||
if len(c.Errors) > 0 {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": c.Errors.Errors()})
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("destination %q is invalid", r.Source)})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -875,11 +872,6 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if _, err := layer.Commit(); err != nil {
|
||||
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
c.Status(http.StatusCreated)
|
||||
}
|
||||
|
||||
|
||||
@@ -124,14 +124,12 @@ func Test_Routes(t *testing.T) {
|
||||
Method: http.MethodPost,
|
||||
Path: "/api/create",
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
f, err := os.CreateTemp(t.TempDir(), "ollama-model")
|
||||
assert.Nil(t, err)
|
||||
defer f.Close()
|
||||
fname := createTestFile(t, "ollama-model")
|
||||
|
||||
stream := false
|
||||
createReq := api.CreateRequest{
|
||||
Name: "t-bone",
|
||||
Modelfile: fmt.Sprintf("FROM %s", f.Name()),
|
||||
Modelfile: fmt.Sprintf("FROM %s", fname),
|
||||
Stream: &stream,
|
||||
}
|
||||
jsonData, err := json.Marshal(createReq)
|
||||
@@ -216,28 +214,25 @@ func Test_Routes(t *testing.T) {
|
||||
httpSrv := httptest.NewServer(router)
|
||||
t.Cleanup(httpSrv.Close)
|
||||
|
||||
workDir, err := os.MkdirTemp("", "ollama-test")
|
||||
assert.Nil(t, err)
|
||||
defer os.RemoveAll(workDir)
|
||||
os.Setenv("OLLAMA_MODELS", workDir)
|
||||
t.Setenv("OLLAMA_MODELS", t.TempDir())
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Logf("Running Test: [%s]", tc.Name)
|
||||
u := httpSrv.URL + tc.Path
|
||||
req, err := http.NewRequestWithContext(context.TODO(), tc.Method, u, nil)
|
||||
assert.Nil(t, err)
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
u := httpSrv.URL + tc.Path
|
||||
req, err := http.NewRequestWithContext(context.TODO(), tc.Method, u, nil)
|
||||
assert.Nil(t, err)
|
||||
|
||||
if tc.Setup != nil {
|
||||
tc.Setup(t, req)
|
||||
}
|
||||
if tc.Setup != nil {
|
||||
tc.Setup(t, req)
|
||||
}
|
||||
|
||||
resp, err := httpSrv.Client().Do(req)
|
||||
assert.Nil(t, err)
|
||||
defer resp.Body.Close()
|
||||
|
||||
if tc.Expected != nil {
|
||||
tc.Expected(t, resp)
|
||||
}
|
||||
resp, err := httpSrv.Client().Do(req)
|
||||
assert.Nil(t, err)
|
||||
defer resp.Body.Close()
|
||||
|
||||
if tc.Expected != nil {
|
||||
tc.Expected(t, resp)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -250,6 +250,7 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
|
||||
defer runner.refMu.Unlock()
|
||||
if runner.expireTimer != nil {
|
||||
runner.expireTimer.Stop()
|
||||
runner.expireTimer = nil
|
||||
}
|
||||
s.expiredCh <- runner
|
||||
})
|
||||
@@ -296,6 +297,10 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
|
||||
runner.refMu.Lock()
|
||||
defer runner.refMu.Unlock()
|
||||
runner.refCount++
|
||||
if runner.expireTimer != nil {
|
||||
runner.expireTimer.Stop()
|
||||
runner.expireTimer = nil
|
||||
}
|
||||
runner.sessionDuration = pending.sessionDuration
|
||||
pending.successCh <- runner
|
||||
go func() {
|
||||
@@ -426,6 +431,10 @@ type runnerRef struct {
|
||||
|
||||
// The refMu must already be held when calling unload
|
||||
func (runner *runnerRef) unload() {
|
||||
if runner.expireTimer != nil {
|
||||
runner.expireTimer.Stop()
|
||||
runner.expireTimer = nil
|
||||
}
|
||||
if runner.llama != nil {
|
||||
runner.llama.Close()
|
||||
}
|
||||
|
||||
@@ -143,18 +143,28 @@ func ParseNameBare(s string) Name {
|
||||
n.RawDigest = MissingPart
|
||||
}
|
||||
|
||||
s, n.Tag, _ = cutPromised(s, ":")
|
||||
// "/" is an illegal tag character, so we can use it to split the host
|
||||
if strings.LastIndex(s, ":") > strings.LastIndex(s, "/") {
|
||||
s, n.Tag, _ = cutPromised(s, ":")
|
||||
}
|
||||
|
||||
s, n.Model, promised = cutPromised(s, "/")
|
||||
if !promised {
|
||||
n.Model = s
|
||||
return n
|
||||
}
|
||||
|
||||
s, n.Namespace, promised = cutPromised(s, "/")
|
||||
if !promised {
|
||||
n.Namespace = s
|
||||
return n
|
||||
}
|
||||
n.Host = s
|
||||
|
||||
scheme, host, ok := strings.Cut(s, "://")
|
||||
if ! ok {
|
||||
host = scheme
|
||||
}
|
||||
n.Host = host
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"testing"
|
||||
@@ -15,8 +16,19 @@ func TestParseNameParts(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want Name
|
||||
wantFilepath string
|
||||
wantValidDigest bool
|
||||
}{
|
||||
{
|
||||
in: "scheme://host:port/namespace/model:tag",
|
||||
want: Name{
|
||||
Host: "host:port",
|
||||
Namespace: "namespace",
|
||||
Model: "model",
|
||||
Tag: "tag",
|
||||
},
|
||||
wantFilepath: filepath.Join("host:port", "namespace", "model", "tag"),
|
||||
},
|
||||
{
|
||||
in: "host/namespace/model:tag",
|
||||
want: Name{
|
||||
@@ -25,6 +37,17 @@ func TestParseNameParts(t *testing.T) {
|
||||
Model: "model",
|
||||
Tag: "tag",
|
||||
},
|
||||
wantFilepath: filepath.Join("host", "namespace", "model", "tag"),
|
||||
},
|
||||
{
|
||||
in: "host:port/namespace/model:tag",
|
||||
want: Name{
|
||||
Host: "host:port",
|
||||
Namespace: "namespace",
|
||||
Model: "model",
|
||||
Tag: "tag",
|
||||
},
|
||||
wantFilepath: filepath.Join("host:port", "namespace", "model", "tag"),
|
||||
},
|
||||
{
|
||||
in: "host/namespace/model",
|
||||
@@ -33,6 +56,16 @@ func TestParseNameParts(t *testing.T) {
|
||||
Namespace: "namespace",
|
||||
Model: "model",
|
||||
},
|
||||
wantFilepath: filepath.Join("host", "namespace", "model", "latest"),
|
||||
},
|
||||
{
|
||||
in: "host:port/namespace/model",
|
||||
want: Name{
|
||||
Host: "host:port",
|
||||
Namespace: "namespace",
|
||||
Model: "model",
|
||||
},
|
||||
wantFilepath: filepath.Join("host:port", "namespace", "model", "latest"),
|
||||
},
|
||||
{
|
||||
in: "namespace/model",
|
||||
@@ -40,12 +73,14 @@ func TestParseNameParts(t *testing.T) {
|
||||
Namespace: "namespace",
|
||||
Model: "model",
|
||||
},
|
||||
wantFilepath: filepath.Join("registry.ollama.ai", "namespace", "model", "latest"),
|
||||
},
|
||||
{
|
||||
in: "model",
|
||||
want: Name{
|
||||
Model: "model",
|
||||
},
|
||||
wantFilepath: filepath.Join("registry.ollama.ai", "library", "model", "latest"),
|
||||
},
|
||||
{
|
||||
in: "h/nn/mm:t",
|
||||
@@ -55,6 +90,7 @@ func TestParseNameParts(t *testing.T) {
|
||||
Model: "mm",
|
||||
Tag: "t",
|
||||
},
|
||||
wantFilepath: filepath.Join("h", "nn", "mm", "t"),
|
||||
},
|
||||
{
|
||||
in: part80 + "/" + part80 + "/" + part80 + ":" + part80,
|
||||
@@ -64,6 +100,7 @@ func TestParseNameParts(t *testing.T) {
|
||||
Model: part80,
|
||||
Tag: part80,
|
||||
},
|
||||
wantFilepath: filepath.Join(part80, part80, part80, part80),
|
||||
},
|
||||
{
|
||||
in: part350 + "/" + part80 + "/" + part80 + ":" + part80,
|
||||
@@ -73,6 +110,7 @@ func TestParseNameParts(t *testing.T) {
|
||||
Model: part80,
|
||||
Tag: part80,
|
||||
},
|
||||
wantFilepath: filepath.Join(part350, part80, part80, part80),
|
||||
},
|
||||
{
|
||||
in: "@digest",
|
||||
@@ -97,6 +135,11 @@ func TestParseNameParts(t *testing.T) {
|
||||
if !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("parseName(%q) = %v; want %v", tt.in, got, tt.want)
|
||||
}
|
||||
|
||||
got = ParseName(tt.in)
|
||||
if tt.wantFilepath != "" && got.Filepath() != tt.wantFilepath {
|
||||
t.Errorf("parseName(%q).Filepath() = %q; want %q", tt.in, got.Filepath(), tt.wantFilepath)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user