Compare commits
32 Commits
paligemma-
...
v0.3.9
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a1cef4d0a5 | ||
|
|
c41f0b9e6c | ||
|
|
142cbb722d | ||
|
|
9468c6824a | ||
|
|
11018196e0 | ||
|
|
56346ccfa3 | ||
|
|
8e4e509fa4 | ||
|
|
47c2b947a9 | ||
|
|
5eb77bf976 | ||
|
|
e4d0a9c325 | ||
|
|
7416ced70f | ||
|
|
9cfd2dd3e3 | ||
|
|
8e6da3cbc5 | ||
|
|
d9d50c43cc | ||
|
|
6c1c1ad6a9 | ||
|
|
93ea9240ae | ||
|
|
413ae39f3c | ||
|
|
60e47573a6 | ||
|
|
d13c3daa0b | ||
|
|
1713eddcd0 | ||
|
|
4e1c4f6e0b | ||
|
|
397cae7962 | ||
|
|
1c70a00f71 | ||
|
|
eae3af6807 | ||
|
|
3eb08377f8 | ||
|
|
ac80010db8 | ||
|
|
47fa0839b9 | ||
|
|
0f92b19bec | ||
|
|
69be940bf6 | ||
|
|
9638c24c58 | ||
|
|
bb362caf88 | ||
|
|
386af6c1a0 |
@@ -32,6 +32,10 @@ linters:
|
||||
linters-settings:
|
||||
gci:
|
||||
sections: [standard, default, localmodule]
|
||||
staticcheck:
|
||||
checks:
|
||||
- all
|
||||
- -SA1019 # omit Deprecated check
|
||||
severity:
|
||||
default-severity: error
|
||||
rules:
|
||||
|
||||
@@ -337,6 +337,8 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
|
||||
- [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
|
||||
- [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
|
||||
- [Nix package](https://search.nixos.org/packages?channel=24.05&show=ollama&from=0&size=50&sort=relevance&type=packages&query=ollama)
|
||||
- [Flox](https://flox.dev/blog/ollama-part-one)
|
||||
|
||||
### Libraries
|
||||
|
||||
|
||||
16
api/types.go
16
api/types.go
@@ -296,15 +296,17 @@ type EmbeddingResponse struct {
|
||||
// CreateRequest is the request passed to [Client.Create].
|
||||
type CreateRequest struct {
|
||||
Model string `json:"model"`
|
||||
Path string `json:"path"`
|
||||
Modelfile string `json:"modelfile"`
|
||||
Stream *bool `json:"stream,omitempty"`
|
||||
Quantize string `json:"quantize,omitempty"`
|
||||
|
||||
// Name is deprecated, see Model
|
||||
// Deprecated: set the model name with Model instead
|
||||
Name string `json:"name"`
|
||||
|
||||
// Quantization is deprecated, see Quantize
|
||||
// Deprecated: set the file content with Modelfile instead
|
||||
Path string `json:"path"`
|
||||
|
||||
// Deprecated: use Quantize instead
|
||||
Quantization string `json:"quantization,omitempty"`
|
||||
}
|
||||
|
||||
@@ -312,7 +314,7 @@ type CreateRequest struct {
|
||||
type DeleteRequest struct {
|
||||
Model string `json:"model"`
|
||||
|
||||
// Name is deprecated, see Model
|
||||
// Deprecated: set the model name with Model instead
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
@@ -327,7 +329,7 @@ type ShowRequest struct {
|
||||
|
||||
Options map[string]interface{} `json:"options"`
|
||||
|
||||
// Name is deprecated, see Model
|
||||
// Deprecated: set the model name with Model instead
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
@@ -359,7 +361,7 @@ type PullRequest struct {
|
||||
Password string `json:"password"`
|
||||
Stream *bool `json:"stream,omitempty"`
|
||||
|
||||
// Name is deprecated, see Model
|
||||
// Deprecated: set the model name with Model instead
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
@@ -380,7 +382,7 @@ type PushRequest struct {
|
||||
Password string `json:"password"`
|
||||
Stream *bool `json:"stream,omitempty"`
|
||||
|
||||
// Name is deprecated, see Model
|
||||
// Deprecated: set the model name with Model instead
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
|
||||
@@ -87,7 +87,7 @@ DialogFontSize=12
|
||||
|
||||
[Files]
|
||||
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
|
||||
Source: "..\ollama.exe"; DestDir: "{app}\bin"; Flags: ignoreversion 64bit
|
||||
Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
|
||||
Source: "..\dist\windows-{#ARCH}\lib\ollama\runners\*"; DestDir: "{app}\lib\ollama\runners"; Flags: ignoreversion 64bit recursesubdirs
|
||||
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
|
||||
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
|
||||
@@ -99,7 +99,7 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
|
||||
Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
|
||||
|
||||
[Run]
|
||||
Filename: "{cmd}"; Parameters: "/C set PATH={app}\bin;%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
|
||||
Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
|
||||
|
||||
[UninstallRun]
|
||||
; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden
|
||||
@@ -134,8 +134,8 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
|
||||
|
||||
[Registry]
|
||||
Root: HKCU; Subkey: "Environment"; \
|
||||
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}\bin"; \
|
||||
Check: NeedsAddPath('{app}\bin')
|
||||
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}"; \
|
||||
Check: NeedsAddPath('{app}')
|
||||
|
||||
[Code]
|
||||
|
||||
|
||||
@@ -89,7 +89,7 @@ func TestMain(m *testing.M) {
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
func TestConvertFull(t *testing.T) {
|
||||
func TestConvertModel(t *testing.T) {
|
||||
cases := []string{
|
||||
"Meta-Llama-3-8B-Instruct",
|
||||
"Meta-Llama-3.1-8B-Instruct",
|
||||
@@ -140,6 +140,107 @@ func TestConvertFull(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertInvalidDatatype(t *testing.T) {
|
||||
f, err := os.CreateTemp(t.TempDir(), "testmodel")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
tempDir := t.TempDir()
|
||||
generateSafetensorTestData(t, tempDir)
|
||||
|
||||
err = ConvertModel(os.DirFS(tempDir), f)
|
||||
if err == nil || err.Error() != "unsupported safetensors model" {
|
||||
t.Errorf("expected error but didn't get one")
|
||||
}
|
||||
}
|
||||
|
||||
func generateSafetensorTestData(t *testing.T, tempDir string) {
|
||||
type tensorData struct {
|
||||
Offsets []int `json:"data_offsets"`
|
||||
Type string `json:"dtype"`
|
||||
Shape []int `json:"shape"`
|
||||
}
|
||||
offset := 4096 * 14336
|
||||
|
||||
td := map[string]*tensorData{}
|
||||
td["model.layers.0.mlp.down_proj.weight"] = &tensorData{
|
||||
Offsets: []int{0, offset},
|
||||
Type: "I8",
|
||||
Shape: []int{4096, 14336},
|
||||
}
|
||||
td["model.layers.0.mlp.down_proj.weight_format"] = &tensorData{
|
||||
Offsets: []int{offset, offset},
|
||||
Type: "U8",
|
||||
Shape: []int{},
|
||||
}
|
||||
|
||||
data, err := json.Marshal(td)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
|
||||
l := int64(len(data))
|
||||
err = binary.Write(&buf, binary.LittleEndian, l)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, err = buf.Write(data)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
fdata, err := os.Create(filepath.Join(tempDir, "model-00001-of-00001.safetensors"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fdata.Close()
|
||||
|
||||
_, err = fdata.Write(buf.Bytes())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
configData := `
|
||||
{
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
]
|
||||
}
|
||||
`
|
||||
|
||||
f, err := os.Create(filepath.Join(tempDir, "config.json"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.WriteString(configData)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
tokenizerData := `
|
||||
{
|
||||
}
|
||||
`
|
||||
|
||||
f, err = os.Create(filepath.Join(tempDir, "tokenizer.json"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.WriteString(tokenizerData)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertAdapter(t *testing.T) {
|
||||
type AdapterCase struct {
|
||||
Name string
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
@@ -50,6 +51,10 @@ func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]T
|
||||
|
||||
for _, key := range keys {
|
||||
if value := headers[key]; value.Type != "" {
|
||||
// bitsandbytes quantized models are unsupported
|
||||
if len(value.Shape) == 0 {
|
||||
return nil, errors.New("unsupported safetensors model")
|
||||
}
|
||||
ts = append(ts, safetensor{
|
||||
fs: fsys,
|
||||
path: p,
|
||||
|
||||
@@ -100,8 +100,21 @@ func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error)
|
||||
}
|
||||
|
||||
if template, ok := p["chat_template"]; ok {
|
||||
if err := json.Unmarshal(template, &t.Template); err != nil {
|
||||
return nil, err
|
||||
var s []struct {
|
||||
Name string `json:"name"`
|
||||
Template string `json:"template"`
|
||||
}
|
||||
if err := json.Unmarshal(template, &t.Template); err == nil {
|
||||
// noop
|
||||
} else if err := json.Unmarshal(template, &s); err == nil {
|
||||
for _, e := range s {
|
||||
if e.Name == "default" {
|
||||
t.Template = e.Template
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return nil, fmt.Errorf("invalid chat_template: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,7 +154,6 @@ func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error)
|
||||
}
|
||||
|
||||
type tokenizer struct {
|
||||
Version string `json:"version"`
|
||||
AddedTokens []token `json:"added_tokens"`
|
||||
Model struct {
|
||||
Type string `json:"type"`
|
||||
@@ -239,7 +251,7 @@ func parseVocabulary(fsys fs.FS) (*Vocabulary, error) {
|
||||
return pattern.Func(fsys)
|
||||
}
|
||||
|
||||
return nil, errors.New("unknown tensor format")
|
||||
return nil, errors.New("unknown tokenizer format")
|
||||
}
|
||||
|
||||
type SpecialVocabulary struct {
|
||||
|
||||
208
convert/tokenizer_test.go
Normal file
208
convert/tokenizer_test.go
Normal file
@@ -0,0 +1,208 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
func createTokenizerFS(t *testing.T, dir string, files map[string]io.Reader) fs.FS {
|
||||
t.Helper()
|
||||
|
||||
for k, v := range files {
|
||||
if err := func() error {
|
||||
f, err := os.Create(filepath.Join(dir, k))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if _, err := io.Copy(f, v); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}(); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return os.DirFS(dir)
|
||||
}
|
||||
|
||||
func TestParseTokenizer(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
fsys fs.FS
|
||||
specialTokenTypes []string
|
||||
want *Tokenizer
|
||||
}{
|
||||
{
|
||||
name: "string chat template",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{}`),
|
||||
"tokenizer_config.json": strings.NewReader(`{
|
||||
"chat_template": "<default template>"
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{Model: "gpt2"},
|
||||
Pre: "default",
|
||||
Template: "<default template>",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "list chat template",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{}`),
|
||||
"tokenizer_config.json": strings.NewReader(`{
|
||||
"chat_template": [
|
||||
{
|
||||
"name": "default",
|
||||
"template": "<default template>"
|
||||
},
|
||||
{
|
||||
"name": "tools",
|
||||
"template": "<tools template>"
|
||||
}
|
||||
]
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{Model: "gpt2"},
|
||||
Pre: "default",
|
||||
Template: "<default template>",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "added tokens",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{
|
||||
"added_tokens": [
|
||||
{
|
||||
"id": 999,
|
||||
"content": "<unused999>",
|
||||
"special": false
|
||||
}
|
||||
]
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{
|
||||
Model: "gpt2",
|
||||
Tokens: []string{"<unused999>"},
|
||||
Scores: []float32{999},
|
||||
Types: []int32{4},
|
||||
},
|
||||
Pre: "default",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "added tokens overlap vocab",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{
|
||||
"added_tokens": [
|
||||
{
|
||||
"id": 0,
|
||||
"content": "<pad>",
|
||||
"special": true
|
||||
}
|
||||
],
|
||||
"model": {
|
||||
"vocab": {
|
||||
"<pad>": 0
|
||||
}
|
||||
}
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{
|
||||
Model: "gpt2",
|
||||
Tokens: []string{"<pad>"},
|
||||
Scores: []float32{0},
|
||||
Types: []int32{3},
|
||||
},
|
||||
Pre: "default",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "special token types",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{
|
||||
"added_tokens": [
|
||||
{
|
||||
"id": 0,
|
||||
"content": "<pad>",
|
||||
"special": true
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"content": "<eos>",
|
||||
"special": true
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "<bos>",
|
||||
"special": true
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "<unk>",
|
||||
"special": true
|
||||
}
|
||||
],
|
||||
"model": {
|
||||
"vocab": {
|
||||
"<pad>": 0,
|
||||
"<eos>": 1,
|
||||
"<bos>": 2,
|
||||
"<unk>": 3
|
||||
}
|
||||
}
|
||||
}`),
|
||||
"tokenizer_config.json": strings.NewReader(`{
|
||||
"add_bos_token": true,
|
||||
"add_eos_token": false,
|
||||
"bos_token": "<bos>",
|
||||
"eos_token": "<eos>",
|
||||
"pad_token": "<pad>",
|
||||
"unk_token": "<unk>"
|
||||
}`),
|
||||
}),
|
||||
specialTokenTypes: []string{"pad", "eos", "bos", "unk"},
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{
|
||||
Model: "gpt2",
|
||||
Tokens: []string{"<pad>", "<eos>", "<bos>", "<unk>"},
|
||||
Scores: []float32{0, 1, 2, 3},
|
||||
Types: []int32{3, 3, 3, 3},
|
||||
},
|
||||
SpecialVocabulary: []*SpecialVocabulary{
|
||||
{Type: "pad", Content: "<pad>", ID: 0, AddToken: false},
|
||||
{Type: "eos", Content: "<eos>", ID: 1, AddToken: false},
|
||||
{Type: "bos", Content: "<bos>", ID: 2, AddToken: true},
|
||||
{Type: "unk", Content: "<unk>", ID: 3, AddToken: false},
|
||||
},
|
||||
Pre: "default",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tokenizer, err := parseTokenizer(tt.fsys, tt.specialTokenTypes)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(tt.want, tokenizer); diff != "" {
|
||||
t.Errorf("unexpected tokenizer (-want +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -111,7 +111,10 @@ On Windows, Ollama inherits your user and system environment variables.
|
||||
|
||||
## How do I use Ollama behind a proxy?
|
||||
|
||||
Ollama is compatible with proxy servers if `HTTP_PROXY` or `HTTPS_PROXY` are configured. When using either variables, ensure it is set where `ollama serve` can access the values. When using `HTTPS_PROXY`, ensure the proxy certificate is installed as a system certificate. Refer to the section above for how to use environment variables on your platform.
|
||||
Ollama pulls models from the Internet and may require a proxy server to access the models. Use `HTTPS_PROXY` to redirect outbound requests through the proxy. Ensure the proxy certificate is installed as a system certificate. Refer to the section above for how to use environment variables on your platform.
|
||||
|
||||
> [!NOTE]
|
||||
> Avoid setting `HTTP_PROXY`. Ollama does not use HTTP for model pulls, only HTTPS. Setting `HTTP_PROXY` may interrupt client connections to the server.
|
||||
|
||||
### How do I use Ollama behind a proxy in Docker?
|
||||
|
||||
@@ -276,4 +279,4 @@ Note: Windows with Radeon GPUs currently default to 1 model maximum due to limit
|
||||
|
||||
## How does Ollama load models on multiple GPUs?
|
||||
|
||||
Installing multiple GPUs of the same brand can be a great way to increase your available VRAM to load larger models. When you load a new model, Ollama evaluates the required VRAM for the model against what is currently available. If the model will entirely fit on any single GPU, Ollama will load the model on that GPU. This typically provides the best performance as it reduces the amount of data transfering across the PCI bus during inference. If the model does not fit entirely on one GPU, then it will be spread across all the available GPUs.
|
||||
Installing multiple GPUs of the same brand can be a great way to increase your available VRAM to load larger models. When you load a new model, Ollama evaluates the required VRAM for the model against what is currently available. If the model will entirely fit on any single GPU, Ollama will load the model on that GPU. This typically provides the best performance as it reduces the amount of data transfering across the PCI bus during inference. If the model does not fit entirely on one GPU, then it will be spread across all the available GPUs.
|
||||
|
||||
BIN
docs/images/ollama-keys.png
Normal file
BIN
docs/images/ollama-keys.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 150 KiB |
BIN
docs/images/signup.png
Normal file
BIN
docs/images/signup.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 80 KiB |
188
docs/import.md
188
docs/import.md
@@ -1,44 +1,129 @@
|
||||
# Import
|
||||
# Importing a model
|
||||
|
||||
GGUF models and select Safetensors models can be imported directly into Ollama.
|
||||
## Table of Contents
|
||||
|
||||
## Import GGUF
|
||||
* [Importing a Safetensors adapter](#Importing-a-fine-tuned-adapter-from-Safetensors-weights)
|
||||
* [Importing a Safetensors model](#Importing-a-model-from-Safetensors-weights)
|
||||
* [Importing a GGUF file](#Importing-a-GGUF-based-model-or-adapter)
|
||||
* [Sharing models on ollama.com](#Sharing-your-model-on-ollamacom)
|
||||
|
||||
A binary GGUF file can be imported directly into Ollama through a Modelfile.
|
||||
## Importing a fine tuned adapter from Safetensors weights
|
||||
|
||||
First, create a `Modelfile` with a `FROM` command pointing at the base model you used for fine tuning, and an `ADAPTER` command which points to the directory with your Safetensors adapter:
|
||||
|
||||
```dockerfile
|
||||
FROM /path/to/file.gguf
|
||||
FROM <base model name>
|
||||
ADAPTER /path/to/safetensors/adapter/directory
|
||||
```
|
||||
|
||||
## Import Safetensors
|
||||
Make sure that you use the same base model in the `FROM` command as you used to create the adapter otherwise you will get erratic results. Most frameworks use different quantization methods, so it's best to use non-quantized (i.e. non-QLoRA) adapters. If your adapter is in the same directory as your `Modelfile`, use `ADAPTER .` to specify the adapter path.
|
||||
|
||||
If the model being imported is one of these architectures, it can be imported directly into Ollama through a Modelfile:
|
||||
Now run `ollama create` from the directory where the `Modelfile` was created:
|
||||
|
||||
- LlamaForCausalLM
|
||||
- MistralForCausalLM
|
||||
- MixtralForCausalLM
|
||||
- GemmaForCausalLM
|
||||
- Phi3ForCausalLM
|
||||
```bash
|
||||
ollama create my-model
|
||||
```
|
||||
|
||||
Lastly, test the model:
|
||||
|
||||
```bash
|
||||
ollama run my-model
|
||||
```
|
||||
|
||||
Ollama supports importing adapters based on several different model architectures including:
|
||||
|
||||
* Llama (including Llama 2, Llama 3, and Llama 3.1);
|
||||
* Mistral (including Mistral 1, Mistral 2, and Mixtral); and
|
||||
* Gemma (including Gemma 1 and Gemma 2)
|
||||
|
||||
You can create the adapter using a fine tuning framework or tool which can output adapters in the Safetensors format, such as:
|
||||
|
||||
* Hugging Face [fine tuning framework] (https://huggingface.co/docs/transformers/en/training)
|
||||
* [Unsloth](https://github.com/unslothai/unsloth)
|
||||
* [MLX](https://github.com/ml-explore/mlx)
|
||||
|
||||
|
||||
## Importing a model from Safetensors weights
|
||||
|
||||
First, create a `Modelfile` with a `FROM` command which points to the directory containing your Safetensors weights:
|
||||
|
||||
```dockerfile
|
||||
FROM /path/to/safetensors/directory
|
||||
```
|
||||
|
||||
For architectures not directly convertable by Ollama, see llama.cpp's [guide](https://github.com/ggerganov/llama.cpp/blob/master/README.md#prepare-and-quantize) on conversion. After conversion, see [Import GGUF](#import-gguf).
|
||||
If you create the Modelfile in the same directory as the weights, you can use the command `FROM .`.
|
||||
|
||||
## Automatic Quantization
|
||||
Now run the `ollama create` command from the directory where you created the `Modelfile`:
|
||||
|
||||
> [!NOTE]
|
||||
> Automatic quantization requires v0.1.35 or higher.
|
||||
```shell
|
||||
ollama create my-model
|
||||
```
|
||||
|
||||
Ollama is capable of quantizing FP16 or FP32 models to any of the supported quantizations with the `-q/--quantize` flag in `ollama create`.
|
||||
Lastly, test the model:
|
||||
|
||||
```shell
|
||||
ollama run my-model
|
||||
```
|
||||
|
||||
Ollama supports importing models for several different architectures including:
|
||||
|
||||
* Llama (including Llama 2, Llama 3, and Llama 3.1);
|
||||
* Mistral (including Mistral 1, Mistral 2, and Mixtral);
|
||||
* Gemma (including Gemma 1 and Gemma 2); and
|
||||
* Phi3
|
||||
|
||||
This includes importing foundation models as well as any fine tuned models which which have been _fused_ with a foundation model.
|
||||
|
||||
|
||||
## Importing a GGUF based model or adapter
|
||||
|
||||
If you have a GGUF based model or adapter it is possible to import it into Ollama. You can obtain a GGUF model or adapter by:
|
||||
|
||||
* converting a Safetensors model with the `convert_hf_to_gguf.py` from Llama.cpp;
|
||||
* converting a Safetensors adapter with the `convert_lora_to_gguf.py` from Llama.cpp; or
|
||||
* downloading a model or adapter from a place such as HuggingFace
|
||||
|
||||
To import a GGUF model, create a `Modelfile` containg:
|
||||
|
||||
```dockerfile
|
||||
FROM /path/to/file.gguf
|
||||
```
|
||||
|
||||
For a GGUF adapter, create the `Modelfile` with:
|
||||
|
||||
```dockerfile
|
||||
FROM <model name>
|
||||
ADAPTER /path/to/file.gguf
|
||||
```
|
||||
|
||||
When importing a GGUF adapter, it's important to use the same base model as the base model that the adapter was created with. You can use:
|
||||
|
||||
* a model from Ollama
|
||||
* a GGUF file
|
||||
* a Safetensors based model
|
||||
|
||||
Once you have created your `Modelfile`, use the `ollama create` command to build the model.
|
||||
|
||||
```shell
|
||||
ollama create my-model
|
||||
```
|
||||
|
||||
## Quantizing a Model
|
||||
|
||||
Quantizing a model allows you to run models faster and with less memory consumption but at reduced accuracy. This allows you to run a model on more modest hardware.
|
||||
|
||||
Ollama can quantize FP16 and FP32 based models into different quantization levels using the `-q/--quantize` flag with the `ollama create` command.
|
||||
|
||||
First, create a Modelfile with the FP16 or FP32 based model you wish to quantize.
|
||||
|
||||
```dockerfile
|
||||
FROM /path/to/my/gemma/f16/model
|
||||
```
|
||||
|
||||
Use `ollama create` to then create the quantized model.
|
||||
|
||||
```shell
|
||||
$ ollama create -q Q4_K_M mymodel
|
||||
$ ollama create --quantize q4_K_M mymodel
|
||||
transferring model data
|
||||
quantizing F16 model to Q4_K_M
|
||||
creating new layer sha256:735e246cc1abfd06e9cdcf95504d6789a6cd1ad7577108a70d9902fef503c1bd
|
||||
@@ -49,42 +134,53 @@ success
|
||||
|
||||
### Supported Quantizations
|
||||
|
||||
- `Q4_0`
|
||||
- `Q4_1`
|
||||
- `Q5_0`
|
||||
- `Q5_1`
|
||||
- `Q8_0`
|
||||
- `q4_0`
|
||||
- `q4_1`
|
||||
- `q5_0`
|
||||
- `q5_1`
|
||||
- `q8_0`
|
||||
|
||||
#### K-means Quantizations
|
||||
|
||||
- `Q3_K_S`
|
||||
- `Q3_K_M`
|
||||
- `Q3_K_L`
|
||||
- `Q4_K_S`
|
||||
- `Q4_K_M`
|
||||
- `Q5_K_S`
|
||||
- `Q5_K_M`
|
||||
- `Q6_K`
|
||||
- `q3_K_S`
|
||||
- `q3_K_M`
|
||||
- `q3_K_L`
|
||||
- `q4_K_S`
|
||||
- `q4_K_M`
|
||||
- `q5_K_S`
|
||||
- `q5_K_M`
|
||||
- `q6_K`
|
||||
|
||||
## Template Detection
|
||||
|
||||
> [!NOTE]
|
||||
> Template detection requires v0.1.42 or higher.
|
||||
## Sharing your model on ollama.com
|
||||
|
||||
Ollama uses model metadata, specifically `tokenizer.chat_template`, to automatically create a template appropriate for the model you're importing.
|
||||
You can share any model you have created by pushing it to [ollama.com](https://ollama.com) so that other users can try it out.
|
||||
|
||||
```dockerfile
|
||||
FROM /path/to/my/gemma/model
|
||||
```
|
||||
First, use your browser to go to the [Ollama Sign-Up](https://ollama.com/signup) page. If you already have an account, you can skip this step.
|
||||
|
||||
<img src="images/signup.png" alt="Sign-Up" width="40%">
|
||||
|
||||
The `Username` field will be used as part of your model's name (e.g. `jmorganca/mymodel`), so make sure you are comfortable with the username that you have selected.
|
||||
|
||||
Now that you have created an account and are signed-in, go to the [Ollama Keys Settings](https://ollama.com/settings/keys) page.
|
||||
|
||||
Follow the directions on the page to determine where your Ollama Public Key is located.
|
||||
|
||||
<img src="images/ollama-keys.png" alt="Ollama Keys" width="80%">
|
||||
|
||||
Click on the `Add Ollama Public Key` button, and copy and paste the contents of your Ollama Public Key into the text field.
|
||||
|
||||
To push a model to [ollama.com](https://ollama.com), first make sure that it is named correctly with your username. You may have to use the `ollama cp` command to copy
|
||||
your model to give it the correct name. Once you're happy with your model's name, use the `ollama push` command to push it to [ollama.com](https://ollama.com).
|
||||
|
||||
```shell
|
||||
$ ollama create mymodel
|
||||
transferring model data
|
||||
using autodetected template gemma-instruct
|
||||
creating new layer sha256:baa2a0edc27d19cc6b7537578a9a7ba1a4e3214dc185ed5ae43692b319af7b84
|
||||
creating new layer sha256:ba66c3309914dbef07e5149a648fd1877f030d337a4f240d444ea335008943cb
|
||||
writing manifest
|
||||
success
|
||||
ollama cp mymodel myuser/mymodel
|
||||
ollama push myuser/mymodel
|
||||
```
|
||||
|
||||
Once your model has been pushed, other users can pull and run it by using the command:
|
||||
|
||||
```shell
|
||||
ollama run myuser/mymodel
|
||||
```
|
||||
|
||||
Defining a template in the Modelfile will disable this feature which may be useful if you want to use a different template than the autodetected one.
|
||||
|
||||
@@ -28,6 +28,11 @@ Download and extract the Linux package:
|
||||
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
|
||||
```
|
||||
|
||||
If you have an AMD GPU, also download and extract the ROCm package into the same location
|
||||
```bash
|
||||
curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tgz | sudo tar zx -C /usr
|
||||
```
|
||||
|
||||
### Adding Ollama as a startup service (recommended)
|
||||
|
||||
Create a user for Ollama:
|
||||
|
||||
@@ -11,8 +11,9 @@ A model file is the blueprint to create and share models with Ollama.
|
||||
- [Examples](#examples)
|
||||
- [Instructions](#instructions)
|
||||
- [FROM (Required)](#from-required)
|
||||
- [Build from llama3](#build-from-llama3)
|
||||
- [Build from a bin file](#build-from-a-bin-file)
|
||||
- [Build from llama3.1](#build-from-llama31)
|
||||
- [Build from a Safetensors model](#build-from-a-safetensors-model)
|
||||
- [Build from a GGUF file](#build-from-a-gguf-file)
|
||||
- [PARAMETER](#parameter)
|
||||
- [Valid Parameters and Values](#valid-parameters-and-values)
|
||||
- [TEMPLATE](#template)
|
||||
@@ -99,22 +100,39 @@ The `FROM` instruction defines the base model to use when creating a model.
|
||||
FROM <model name>:<tag>
|
||||
```
|
||||
|
||||
#### Build from llama3
|
||||
#### Build from llama3.1
|
||||
|
||||
```modelfile
|
||||
FROM llama3
|
||||
FROM llama3.1
|
||||
```
|
||||
|
||||
A list of available base models:
|
||||
<https://github.com/ollama/ollama#model-library>
|
||||
Additional models can be found at:
|
||||
<https://ollama.com/library>
|
||||
|
||||
#### Build from a `bin` file
|
||||
#### Build from a Safetensors model
|
||||
|
||||
```modelfile
|
||||
FROM <model directory>
|
||||
```
|
||||
|
||||
The model directory should contain the Safetensors weights for a supported architecture.
|
||||
|
||||
Currently supported model architectures:
|
||||
* Llama (including Llama 2, Llama 3, and Llama 3.1)
|
||||
* Mistral (including Mistral 1, Mistral 2, and Mixtral)
|
||||
* Gemma (including Gemma 1 and Gemma 2)
|
||||
* Phi3
|
||||
|
||||
#### Build from a GGUF file
|
||||
|
||||
```modelfile
|
||||
FROM ./ollama-model.bin
|
||||
```
|
||||
|
||||
This bin file location should be specified as an absolute path or relative to the `Modelfile` location.
|
||||
The GGUF bin file location should be specified as an absolute path or relative to the `Modelfile` location.
|
||||
|
||||
|
||||
### PARAMETER
|
||||
|
||||
@@ -174,7 +192,20 @@ SYSTEM """<system message>"""
|
||||
|
||||
### ADAPTER
|
||||
|
||||
The `ADAPTER` instruction is an optional instruction that specifies any LoRA adapter that should apply to the base model. The value of this instruction should be an absolute path or a path relative to the Modelfile and the file must be in a GGML file format. The adapter should be tuned from the base model otherwise the behaviour is undefined.
|
||||
The `ADAPTER` instruction specifies a fine tuned LoRA adapter that should apply to the base model. The value of the adapter should be an absolute path or a path relative to the Modelfile. The base model should be specified with a `FROM` instruction. If the base model is not the same as the base model that the adapter was tuned from the behaviour will be erratic.
|
||||
|
||||
#### Safetensor adapter
|
||||
|
||||
```modelfile
|
||||
ADAPTER <path to safetensor adapter>
|
||||
```
|
||||
|
||||
Currently supported Safetensor adapters:
|
||||
* Llama (including Llama 2, Llama 3, and Llama 3.1)
|
||||
* Mistral (including Mistral 1, Mistral 2, and Mixtral)
|
||||
* Gemma (including Gemma 1 and Gemma 2)
|
||||
|
||||
#### GGUF adapter
|
||||
|
||||
```modelfile
|
||||
ADAPTER ./ollama-lora.bin
|
||||
|
||||
@@ -300,3 +300,28 @@ curl http://localhost:11434/v1/chat/completions \
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
### Setting the context size
|
||||
|
||||
The OpenAI API does not have a way of setting the context size for a model. If you need to change the context size, create a `Modelfile` which looks like:
|
||||
|
||||
```modelfile
|
||||
FROM <some model>
|
||||
PARAMETER num_ctx <context size>
|
||||
```
|
||||
|
||||
Use the `ollama create mymodel` command to create a new model with the updated context size. Call the API with the updated model name:
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "mymodel",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello!"
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -30,9 +30,7 @@ func Host() *url.URL {
|
||||
defaultPort = "443"
|
||||
}
|
||||
|
||||
// trim trailing slashes
|
||||
hostport = strings.TrimRight(hostport, "/")
|
||||
|
||||
hostport, path, _ := strings.Cut(hostport, "/")
|
||||
host, port, err := net.SplitHostPort(hostport)
|
||||
if err != nil {
|
||||
host, port = "127.0.0.1", defaultPort
|
||||
@@ -45,15 +43,13 @@ func Host() *url.URL {
|
||||
|
||||
if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
|
||||
slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
|
||||
return &url.URL{
|
||||
Scheme: scheme,
|
||||
Host: net.JoinHostPort(host, defaultPort),
|
||||
}
|
||||
port = defaultPort
|
||||
}
|
||||
|
||||
return &url.URL{
|
||||
Scheme: scheme,
|
||||
Host: net.JoinHostPort(host, port),
|
||||
Path: path,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -190,7 +186,7 @@ func RunnersDir() (p string) {
|
||||
}
|
||||
|
||||
var paths []string
|
||||
for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), ".."), cwd} {
|
||||
for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), LibRelativeToExe()), cwd} {
|
||||
paths = append(paths,
|
||||
root,
|
||||
filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
|
||||
@@ -282,3 +278,12 @@ func Values() map[string]string {
|
||||
func Var(key string) string {
|
||||
return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
|
||||
}
|
||||
|
||||
// On windows, we keep the binary at the top directory, but
|
||||
// other platforms use a "bin" directory, so this returns ".."
|
||||
func LibRelativeToExe() string {
|
||||
if runtime.GOOS == "windows" {
|
||||
return "."
|
||||
}
|
||||
return ".."
|
||||
}
|
||||
|
||||
@@ -13,34 +13,35 @@ func TestHost(t *testing.T) {
|
||||
value string
|
||||
expect string
|
||||
}{
|
||||
"empty": {"", "127.0.0.1:11434"},
|
||||
"only address": {"1.2.3.4", "1.2.3.4:11434"},
|
||||
"only port": {":1234", ":1234"},
|
||||
"address and port": {"1.2.3.4:1234", "1.2.3.4:1234"},
|
||||
"hostname": {"example.com", "example.com:11434"},
|
||||
"hostname and port": {"example.com:1234", "example.com:1234"},
|
||||
"zero port": {":0", ":0"},
|
||||
"too large port": {":66000", ":11434"},
|
||||
"too small port": {":-1", ":11434"},
|
||||
"ipv6 localhost": {"[::1]", "[::1]:11434"},
|
||||
"ipv6 world open": {"[::]", "[::]:11434"},
|
||||
"ipv6 no brackets": {"::1", "[::1]:11434"},
|
||||
"ipv6 + port": {"[::1]:1337", "[::1]:1337"},
|
||||
"extra space": {" 1.2.3.4 ", "1.2.3.4:11434"},
|
||||
"extra quotes": {"\"1.2.3.4\"", "1.2.3.4:11434"},
|
||||
"extra space+quotes": {" \" 1.2.3.4 \" ", "1.2.3.4:11434"},
|
||||
"extra single quotes": {"'1.2.3.4'", "1.2.3.4:11434"},
|
||||
"http": {"http://1.2.3.4", "1.2.3.4:80"},
|
||||
"http port": {"http://1.2.3.4:4321", "1.2.3.4:4321"},
|
||||
"https": {"https://1.2.3.4", "1.2.3.4:443"},
|
||||
"https port": {"https://1.2.3.4:4321", "1.2.3.4:4321"},
|
||||
"empty": {"", "http://127.0.0.1:11434"},
|
||||
"only address": {"1.2.3.4", "http://1.2.3.4:11434"},
|
||||
"only port": {":1234", "http://:1234"},
|
||||
"address and port": {"1.2.3.4:1234", "http://1.2.3.4:1234"},
|
||||
"hostname": {"example.com", "http://example.com:11434"},
|
||||
"hostname and port": {"example.com:1234", "http://example.com:1234"},
|
||||
"zero port": {":0", "http://:0"},
|
||||
"too large port": {":66000", "http://:11434"},
|
||||
"too small port": {":-1", "http://:11434"},
|
||||
"ipv6 localhost": {"[::1]", "http://[::1]:11434"},
|
||||
"ipv6 world open": {"[::]", "http://[::]:11434"},
|
||||
"ipv6 no brackets": {"::1", "http://[::1]:11434"},
|
||||
"ipv6 + port": {"[::1]:1337", "http://[::1]:1337"},
|
||||
"extra space": {" 1.2.3.4 ", "http://1.2.3.4:11434"},
|
||||
"extra quotes": {"\"1.2.3.4\"", "http://1.2.3.4:11434"},
|
||||
"extra space+quotes": {" \" 1.2.3.4 \" ", "http://1.2.3.4:11434"},
|
||||
"extra single quotes": {"'1.2.3.4'", "http://1.2.3.4:11434"},
|
||||
"http": {"http://1.2.3.4", "http://1.2.3.4:80"},
|
||||
"http port": {"http://1.2.3.4:4321", "http://1.2.3.4:4321"},
|
||||
"https": {"https://1.2.3.4", "https://1.2.3.4:443"},
|
||||
"https port": {"https://1.2.3.4:4321", "https://1.2.3.4:4321"},
|
||||
"proxy path": {"https://example.com/ollama", "https://example.com:443/ollama"},
|
||||
}
|
||||
|
||||
for name, tt := range cases {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
t.Setenv("OLLAMA_HOST", tt.value)
|
||||
if host := Host(); host.Host != tt.expect {
|
||||
t.Errorf("%s: expected %s, got %s", name, tt.expect, host.Host)
|
||||
if host := Host(); host.String() != tt.expect {
|
||||
t.Errorf("%s: expected %s, got %s", name, tt.expect, host.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -9,6 +9,8 @@ import (
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
)
|
||||
|
||||
// Determine if the given ROCm lib directory is usable by checking for existence of some glob patterns
|
||||
@@ -54,7 +56,7 @@ func commonAMDValidateLibDir() (string, error) {
|
||||
// Installer payload location if we're running the installed binary
|
||||
exe, err := os.Executable()
|
||||
if err == nil {
|
||||
rocmTargetDir := filepath.Join(filepath.Dir(exe), "..", "lib", "ollama")
|
||||
rocmTargetDir := filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama")
|
||||
if rocmLibUsable(rocmTargetDir) {
|
||||
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
|
||||
return rocmTargetDir, nil
|
||||
|
||||
@@ -153,7 +153,7 @@ func AMDValidateLibDir() (string, error) {
|
||||
// Installer payload (if we're running from some other location)
|
||||
localAppData := os.Getenv("LOCALAPPDATA")
|
||||
appDir := filepath.Join(localAppData, "Programs", "Ollama")
|
||||
rocmTargetDir := filepath.Join(appDir, "..", "lib", "ollama")
|
||||
rocmTargetDir := filepath.Join(appDir, envconfig.LibRelativeToExe(), "lib", "ollama")
|
||||
if rocmLibUsable(rocmTargetDir) {
|
||||
slog.Debug("detected ollama installed ROCm at " + rocmTargetDir)
|
||||
return rocmTargetDir, nil
|
||||
|
||||
@@ -653,7 +653,7 @@ func LibraryDir() string {
|
||||
slog.Warn("failed to lookup working directory", "error", err)
|
||||
}
|
||||
// Scan for any of our dependeices, and pick first match
|
||||
for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), ".."), cwd} {
|
||||
for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe()), cwd} {
|
||||
libDep := filepath.Join("lib", "ollama")
|
||||
if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
|
||||
return filepath.Join(root, libDep)
|
||||
|
||||
@@ -32,4 +32,29 @@ func TestCPUMemInfo(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestByLibrary(t *testing.T) {
|
||||
type testCase struct {
|
||||
input []GpuInfo
|
||||
expect int
|
||||
}
|
||||
|
||||
testCases := map[string]*testCase{
|
||||
"empty": {input: []GpuInfo{}, expect: 0},
|
||||
"cpu": {input: []GpuInfo{{Library: "cpu"}}, expect: 1},
|
||||
"cpu + GPU": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda"}}, expect: 2},
|
||||
"cpu + 2 GPU no variant": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda"}, {Library: "cuda"}}, expect: 2},
|
||||
"cpu + 2 GPU same variant": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda", Variant: "v11"}, {Library: "cuda", Variant: "v11"}}, expect: 2},
|
||||
"cpu + 2 GPU diff variant": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda", Variant: "v11"}, {Library: "cuda", Variant: "v12"}}, expect: 3},
|
||||
}
|
||||
|
||||
for k, v := range testCases {
|
||||
t.Run(k, func(t *testing.T) {
|
||||
resp := (GpuInfoList)(v.input).ByLibrary()
|
||||
if len(resp) != v.expect {
|
||||
t.Fatalf("expected length %d, got %d => %+v", v.expect, len(resp), resp)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TODO - add some logic to figure out card type through other means and actually verify we got back what we expected
|
||||
|
||||
@@ -94,7 +94,7 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
libs = append(libs, info.Library)
|
||||
libs = append(libs, requested)
|
||||
resp = append(resp, []GpuInfo{info})
|
||||
}
|
||||
}
|
||||
|
||||
73
llm/ext_server/server.cpp
vendored
73
llm/ext_server/server.cpp
vendored
@@ -1271,61 +1271,8 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
|
||||
// 1 image only
|
||||
bool prepare_pali(server_slot &slot, int n_batch)
|
||||
{
|
||||
int n_past = 0;
|
||||
int image_idx = 0;
|
||||
slot_image &img = slot.images[image_idx];
|
||||
|
||||
// rescale image embeddings
|
||||
float *data = img.image_embedding;
|
||||
for (int i = 0; i < 2048 * 256; i++)
|
||||
{
|
||||
data[i] = data[i] / sqrt(2048);
|
||||
}
|
||||
set_image_embeds(ctx, data);
|
||||
|
||||
// generate user_prompt -> this should contain image tokens prepended and a new line appended:
|
||||
// batch.n_tokens += (int)slot.images.size() * llama_n_embd(model);
|
||||
std::vector<llama_token> tokens;
|
||||
|
||||
for (int i = 0; i < (int)slot.images.size() * 256; i++)
|
||||
{
|
||||
tokens.push_back(257152);
|
||||
}
|
||||
|
||||
tokens.push_back(2);
|
||||
|
||||
// move prefix prompt behind image tokens
|
||||
for (int i = 0; i < batch.n_tokens; i++)
|
||||
{
|
||||
tokens.push_back(batch.token[i]);
|
||||
}
|
||||
|
||||
llama_batch_clear(batch);
|
||||
for (int i = 0; i < (int)tokens.size(); ++i)
|
||||
{
|
||||
llama_batch_add(batch, tokens[i], system_tokens.size() + slot.n_past, {slot.id}, true);
|
||||
slot.n_past += 1;
|
||||
}
|
||||
|
||||
// append prefix of next image
|
||||
const auto json_prompt = slot.params.input_suffix;
|
||||
|
||||
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
|
||||
append_tokens.push_back(108);
|
||||
|
||||
for (int i = 0; i < (int)append_tokens.size(); ++i)
|
||||
{
|
||||
llama_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, {slot.id}, true);
|
||||
slot.n_past += 1;
|
||||
}
|
||||
llama_set_causal_attn(ctx, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool process_llava(server_slot &slot, int n_batch)
|
||||
// for multiple images processing
|
||||
bool ingest_images(server_slot &slot, int n_batch)
|
||||
{
|
||||
int image_idx = 0;
|
||||
|
||||
@@ -1402,21 +1349,6 @@ struct llama_server_context
|
||||
return true;
|
||||
}
|
||||
|
||||
// for multiple images processing based on model architecture
|
||||
bool ingest_images(server_slot &slot, int n_batch)
|
||||
{
|
||||
switch (llama_get_architecture(model))
|
||||
{
|
||||
case 0:
|
||||
return process_llava(slot, n_batch);
|
||||
case 25:
|
||||
return prepare_pali(slot, n_batch);
|
||||
default:
|
||||
LOG_TEE("%s : failed to retrieve model architecture\n", __func__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void request_cancel(int task_id)
|
||||
{
|
||||
task_server task;
|
||||
@@ -1984,7 +1916,6 @@ struct llama_server_context
|
||||
};
|
||||
|
||||
const int ret = llama_decode(ctx, batch_view);
|
||||
llama_set_causal_attn(ctx, true);
|
||||
|
||||
if (ret != 0)
|
||||
{
|
||||
|
||||
@@ -87,6 +87,8 @@ apply_patches() {
|
||||
build() {
|
||||
cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
|
||||
cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
|
||||
# remove unnecessary build artifacts
|
||||
rm -f ${BUILD_DIR}/bin/ggml-common.h ${BUILD_DIR}/bin/ggml-metal.metal
|
||||
}
|
||||
|
||||
compress() {
|
||||
|
||||
@@ -7,7 +7,7 @@ index 1fe2b9f7..a43312a7 100644
|
||||
|
||||
// TODO: use a per-batch flag for logits presence instead
|
||||
- const bool has_logits = !cparams.embeddings;
|
||||
+ const bool has_logits = cparams.causal_attn || lctx.image_embeds;
|
||||
+ const bool has_logits = cparams.causal_attn;
|
||||
const bool has_embd = lctx.is_encoding || (cparams.embeddings && (cparams.pooling_type == LLAMA_POOLING_TYPE_NONE));
|
||||
|
||||
const size_t logits_size = has_logits ? n_vocab*n_outputs_max : 0;
|
||||
@@ -36,7 +36,7 @@ index 1fe2b9f7..a43312a7 100644
|
||||
GGML_ASSERT(strcmp(res->name, "result_output") == 0 && "missing result_output tensor");
|
||||
}
|
||||
+
|
||||
+ if (!cparams.causal_attn && !has_image_embeds) {
|
||||
+ if (!cparams.causal_attn) {
|
||||
+ res = nullptr; // do not extract logits when not needed
|
||||
+ }
|
||||
+
|
||||
|
||||
@@ -1,113 +0,0 @@
|
||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||
index 9c0d351e..019a147c 100644
|
||||
--- a/examples/llava/clip.cpp
|
||||
+++ b/examples/llava/clip.cpp
|
||||
@@ -718,10 +718,12 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||
embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings);
|
||||
embeddings = ggml_add(ctx0, embeddings, model.mm_0_b);
|
||||
|
||||
- embeddings = ggml_gelu(ctx0, embeddings);
|
||||
- embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings);
|
||||
- embeddings = ggml_add(ctx0, embeddings, model.mm_2_b);
|
||||
-
|
||||
+ if (model.mm_2_w)
|
||||
+ {
|
||||
+ embeddings = ggml_gelu(ctx0, embeddings);
|
||||
+ embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings);
|
||||
+ embeddings = ggml_add(ctx0, embeddings, model.mm_2_b);
|
||||
+ }
|
||||
} else if (ctx->proj_type == PROJECTOR_TYPE_MLP_NORM) {
|
||||
embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings);
|
||||
embeddings = ggml_add(ctx0, embeddings, model.mm_0_b);
|
||||
@@ -2102,6 +2104,10 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
|
||||
return ctx->vision_model.mm_model_peg_0_b->ne[0];
|
||||
}
|
||||
if (ctx->proj_type == PROJECTOR_TYPE_MLP) {
|
||||
+ if (ctx->vision_model.mm_2_b == nullptr)
|
||||
+ {
|
||||
+ return ctx->vision_model.mm_0_b->ne[0];
|
||||
+ }
|
||||
return ctx->vision_model.mm_2_b->ne[0];
|
||||
}
|
||||
if (ctx->proj_type == PROJECTOR_TYPE_MLP_NORM) {
|
||||
diff --git a/include/llama.h b/include/llama.h
|
||||
index 6072e76e..4c572a74 100644
|
||||
--- a/include/llama.h
|
||||
+++ b/include/llama.h
|
||||
@@ -444,6 +444,12 @@ extern "C" {
|
||||
// Frees all allocated memory
|
||||
LLAMA_API void llama_free(struct llama_context * ctx);
|
||||
|
||||
+ // Sets image embeddings
|
||||
+ LLAMA_API void set_image_embeds(struct llama_context *ctx, float *data);
|
||||
+
|
||||
+ // Get architecture
|
||||
+ LLAMA_API int llama_get_architecture(struct llama_model *model);
|
||||
+
|
||||
LLAMA_API int64_t llama_time_us(void);
|
||||
|
||||
LLAMA_API size_t llama_max_devices(void);
|
||||
diff --git a/src/llama.cpp b/src/llama.cpp
|
||||
index d883ed19..322b4b59 100644
|
||||
--- a/src/llama.cpp
|
||||
+++ b/src/llama.cpp
|
||||
@@ -2710,6 +2710,8 @@ struct llama_context {
|
||||
|
||||
bool logits_all = false;
|
||||
|
||||
+ float *image_embeds = nullptr;
|
||||
+
|
||||
// embeddings output (2-dimensional array: [n_outputs][n_embd])
|
||||
// populated only when pooling_type == LLAMA_POOLING_TYPE_NONE
|
||||
size_t embd_size = 0; // capacity (of floats) for embeddings
|
||||
@@ -11591,6 +11593,15 @@ struct llm_build_context {
|
||||
|
||||
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
|
||||
|
||||
+ if (lctx.image_embeds)
|
||||
+ {
|
||||
+ struct ggml_tensor *image_embeds = ggml_dup_tensor(ctx0, inpL);
|
||||
+ image_embeds->data = lctx.image_embeds;
|
||||
+ image_embeds->ne[1] = 256;
|
||||
+ inpL = ggml_set_2d_inplace(ctx0, inpL, image_embeds, inpL->nb[1], 0);
|
||||
+ lctx.image_embeds = NULL;
|
||||
+ }
|
||||
+
|
||||
inpL = ggml_scale(ctx0, inpL, sqrtf(n_embd));
|
||||
cb(inpL, "inp_scaled", -1);
|
||||
|
||||
@@ -14468,6 +14479,7 @@ static int llama_decode_internal(
|
||||
|
||||
const int64_t n_embd = hparams.n_embd;
|
||||
const int64_t n_vocab = hparams.n_vocab;
|
||||
+ const bool has_image_embeds = lctx.image_embeds;
|
||||
|
||||
uint32_t n_outputs = 0;
|
||||
uint32_t n_outputs_prev = 0;
|
||||
@@ -14581,7 +14593,8 @@ static int llama_decode_internal(
|
||||
}
|
||||
|
||||
// non-causal masks do not use the KV cache
|
||||
- if (hparams.causal_attn) {
|
||||
+ if (hparams.causal_attn || lctx.image_embeds)
|
||||
+ {
|
||||
llama_kv_cache_update(&lctx);
|
||||
|
||||
// if we have enough unused cells before the current head ->
|
||||
@@ -16455,6 +16468,16 @@ void llama_free_model(struct llama_model * model) {
|
||||
delete model;
|
||||
}
|
||||
|
||||
+void set_image_embeds(llama_context *ctx, float *data)
|
||||
+{
|
||||
+ ctx->image_embeds = data;
|
||||
+}
|
||||
+
|
||||
+int llama_get_architecture(llama_model *model)
|
||||
+{
|
||||
+ return model->arch;
|
||||
+}
|
||||
+
|
||||
struct llama_context * llama_new_context_with_model(
|
||||
struct llama_model * model,
|
||||
struct llama_context_params params) {
|
||||
@@ -258,7 +258,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
params = append(params, "--mlock")
|
||||
}
|
||||
|
||||
if gpu.IsNUMA() {
|
||||
if gpu.IsNUMA() && gpus[0].Library == "cpu" {
|
||||
numaMode := "distribute"
|
||||
if runtime.GOOS == "linux" {
|
||||
if _, err := exec.LookPath("numactl"); err == nil {
|
||||
@@ -409,7 +409,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
}
|
||||
|
||||
if err = s.cmd.Start(); err != nil {
|
||||
// Detect permission denied and augment them essage about noexec
|
||||
// Detect permission denied and augment the message about noexec
|
||||
if errors.Is(err, os.ErrPermission) {
|
||||
finalErr = fmt.Errorf("unable to start server %w. %s may have noexec set. Set OLLAMA_TMPDIR for server to a writable executable directory", err, dir)
|
||||
continue
|
||||
|
||||
@@ -122,8 +122,8 @@ function buildOllama() {
|
||||
/csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
}
|
||||
New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\bin\ -Force
|
||||
cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\bin\
|
||||
New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\ -Force
|
||||
cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\
|
||||
}
|
||||
|
||||
function buildApp() {
|
||||
|
||||
@@ -30,7 +30,7 @@ if grep -i "centos" /etc/system-release >/dev/null; then
|
||||
dnf install -y rh-git227-git
|
||||
ln -s /opt/rh/rh-git227/root/usr/bin/git /usr/local/bin/git
|
||||
fi
|
||||
dnf install -y devtoolset-10-gcc devtoolset-10-gcc-c++ pigz
|
||||
dnf install -y devtoolset-10-gcc devtoolset-10-gcc-c++ pigz findutils
|
||||
elif grep -i "rocky" /etc/system-release >/dev/null; then
|
||||
# Temporary workaround until rocky 8 AppStream ships GCC 10.4 (10.3 is incompatible with NVCC)
|
||||
cat << EOF > /etc/yum.repos.d/Rocky-Vault.repo
|
||||
@@ -45,6 +45,7 @@ EOF
|
||||
dnf install -y git \
|
||||
gcc-toolset-10-gcc-10.2.1-8.2.el8 \
|
||||
gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 \
|
||||
findutils \
|
||||
pigz
|
||||
else
|
||||
echo "ERROR Unexpected distro"
|
||||
|
||||
@@ -139,6 +139,7 @@ The temperature in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.`,
|
||||
|
||||
func TestParseFromFileFromLayer(t *testing.T) {
|
||||
tempModels := t.TempDir()
|
||||
t.Setenv("OLLAMA_MODELS", tempModels)
|
||||
|
||||
file, err := os.CreateTemp(tempModels, "")
|
||||
if err != nil {
|
||||
@@ -189,6 +190,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
|
||||
|
||||
func TestParseLayerFromCopy(t *testing.T) {
|
||||
tempModels := t.TempDir()
|
||||
t.Setenv("OLLAMA_MODELS", tempModels)
|
||||
|
||||
file2, err := os.CreateTemp(tempModels, "")
|
||||
if err != nil {
|
||||
|
||||
@@ -73,18 +73,6 @@ func ParseModelPath(name string) ModelPath {
|
||||
|
||||
var errModelPathInvalid = errors.New("invalid model path")
|
||||
|
||||
func (mp ModelPath) Validate() error {
|
||||
if mp.Repository == "" {
|
||||
return fmt.Errorf("%w: model repository name is required", errModelPathInvalid)
|
||||
}
|
||||
|
||||
if strings.Contains(mp.Tag, ":") {
|
||||
return fmt.Errorf("%w: ':' (colon) is not allowed in tag names", errModelPathInvalid)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (mp ModelPath) GetNamespaceRepository() string {
|
||||
return fmt.Sprintf("%s/%s", mp.Namespace, mp.Repository)
|
||||
}
|
||||
@@ -105,7 +93,11 @@ func (mp ModelPath) GetShortTagname() string {
|
||||
|
||||
// GetManifestPath returns the path to the manifest file for the given model path, it is up to the caller to create the directory if it does not exist.
|
||||
func (mp ModelPath) GetManifestPath() (string, error) {
|
||||
return filepath.Join(envconfig.Models(), "manifests", mp.Registry, mp.Namespace, mp.Repository, mp.Tag), nil
|
||||
if p := filepath.Join(mp.Registry, mp.Namespace, mp.Repository, mp.Tag); filepath.IsLocal(p) {
|
||||
return filepath.Join(envconfig.Models(), "manifests", p), nil
|
||||
}
|
||||
|
||||
return "", errModelPathInvalid
|
||||
}
|
||||
|
||||
func (mp ModelPath) BaseURL() *url.URL {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
@@ -154,3 +155,10 @@ func TestParseModelPath(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestInsecureModelpath(t *testing.T) {
|
||||
mp := ParseModelPath("../../..:something")
|
||||
if _, err := mp.GetManifestPath(); !errors.Is(err, errModelPathInvalid) {
|
||||
t.Errorf("expected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -463,7 +463,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
func (s *Server) PullModelHandler(c *gin.Context) {
|
||||
func (s *Server) PullHandler(c *gin.Context) {
|
||||
var req api.PullRequest
|
||||
err := c.ShouldBindJSON(&req)
|
||||
switch {
|
||||
@@ -513,7 +513,7 @@ func (s *Server) PullModelHandler(c *gin.Context) {
|
||||
streamResponse(c, ch)
|
||||
}
|
||||
|
||||
func (s *Server) PushModelHandler(c *gin.Context) {
|
||||
func (s *Server) PushHandler(c *gin.Context) {
|
||||
var req api.PushRequest
|
||||
err := c.ShouldBindJSON(&req)
|
||||
switch {
|
||||
@@ -577,7 +577,7 @@ func checkNameExists(name model.Name) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) CreateModelHandler(c *gin.Context) {
|
||||
func (s *Server) CreateHandler(c *gin.Context) {
|
||||
var r api.CreateRequest
|
||||
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
||||
@@ -647,7 +647,7 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
|
||||
streamResponse(c, ch)
|
||||
}
|
||||
|
||||
func (s *Server) DeleteModelHandler(c *gin.Context) {
|
||||
func (s *Server) DeleteHandler(c *gin.Context) {
|
||||
var r api.DeleteRequest
|
||||
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
||||
@@ -680,7 +680,7 @@ func (s *Server) DeleteModelHandler(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) ShowModelHandler(c *gin.Context) {
|
||||
func (s *Server) ShowHandler(c *gin.Context) {
|
||||
var req api.ShowRequest
|
||||
err := c.ShouldBindJSON(&req)
|
||||
switch {
|
||||
@@ -829,7 +829,7 @@ func getKVData(digest string, verbose bool) (llm.KV, error) {
|
||||
return kv, nil
|
||||
}
|
||||
|
||||
func (s *Server) ListModelsHandler(c *gin.Context) {
|
||||
func (s *Server) ListHandler(c *gin.Context) {
|
||||
ms, err := Manifests()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
@@ -879,7 +879,7 @@ func (s *Server) ListModelsHandler(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, api.ListResponse{Models: models})
|
||||
}
|
||||
|
||||
func (s *Server) CopyModelHandler(c *gin.Context) {
|
||||
func (s *Server) CopyHandler(c *gin.Context) {
|
||||
var r api.CopyRequest
|
||||
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
||||
@@ -1081,33 +1081,33 @@ func (s *Server) GenerateRoutes() http.Handler {
|
||||
allowedHostsMiddleware(s.addr),
|
||||
)
|
||||
|
||||
r.POST("/api/pull", s.PullModelHandler)
|
||||
r.POST("/api/pull", s.PullHandler)
|
||||
r.POST("/api/generate", s.GenerateHandler)
|
||||
r.POST("/api/chat", s.ChatHandler)
|
||||
r.POST("/api/embed", s.EmbedHandler)
|
||||
r.POST("/api/embeddings", s.EmbeddingsHandler)
|
||||
r.POST("/api/create", s.CreateModelHandler)
|
||||
r.POST("/api/push", s.PushModelHandler)
|
||||
r.POST("/api/copy", s.CopyModelHandler)
|
||||
r.DELETE("/api/delete", s.DeleteModelHandler)
|
||||
r.POST("/api/show", s.ShowModelHandler)
|
||||
r.POST("/api/create", s.CreateHandler)
|
||||
r.POST("/api/push", s.PushHandler)
|
||||
r.POST("/api/copy", s.CopyHandler)
|
||||
r.DELETE("/api/delete", s.DeleteHandler)
|
||||
r.POST("/api/show", s.ShowHandler)
|
||||
r.POST("/api/blobs/:digest", s.CreateBlobHandler)
|
||||
r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
|
||||
r.GET("/api/ps", s.ProcessHandler)
|
||||
r.GET("/api/ps", s.PsHandler)
|
||||
|
||||
// Compatibility endpoints
|
||||
r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler)
|
||||
r.POST("/v1/completions", openai.CompletionsMiddleware(), s.GenerateHandler)
|
||||
r.POST("/v1/embeddings", openai.EmbeddingsMiddleware(), s.EmbedHandler)
|
||||
r.GET("/v1/models", openai.ListMiddleware(), s.ListModelsHandler)
|
||||
r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowModelHandler)
|
||||
r.GET("/v1/models", openai.ListMiddleware(), s.ListHandler)
|
||||
r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowHandler)
|
||||
|
||||
for _, method := range []string{http.MethodGet, http.MethodHead} {
|
||||
r.Handle(method, "/", func(c *gin.Context) {
|
||||
c.String(http.StatusOK, "Ollama is running")
|
||||
})
|
||||
|
||||
r.Handle(method, "/api/tags", s.ListModelsHandler)
|
||||
r.Handle(method, "/api/tags", s.ListHandler)
|
||||
r.Handle(method, "/api/version", func(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, gin.H{"version": version.Version})
|
||||
})
|
||||
@@ -1269,7 +1269,7 @@ func streamResponse(c *gin.Context, ch chan any) {
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Server) ProcessHandler(c *gin.Context) {
|
||||
func (s *Server) PsHandler(c *gin.Context) {
|
||||
models := []api.ProcessModelResponse{}
|
||||
|
||||
for _, v := range s.sched.loaded {
|
||||
|
||||
@@ -93,7 +93,7 @@ func TestCreateFromBin(t *testing.T) {
|
||||
t.Setenv("OLLAMA_MODELS", p)
|
||||
|
||||
var s Server
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
@@ -120,7 +120,7 @@ func TestCreateFromModel(t *testing.T) {
|
||||
t.Setenv("OLLAMA_MODELS", p)
|
||||
var s Server
|
||||
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
@@ -134,7 +134,7 @@ func TestCreateFromModel(t *testing.T) {
|
||||
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
|
||||
})
|
||||
|
||||
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w = createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test2",
|
||||
Modelfile: "FROM test",
|
||||
Stream: &stream,
|
||||
@@ -162,7 +162,7 @@ func TestCreateRemovesLayers(t *testing.T) {
|
||||
t.Setenv("OLLAMA_MODELS", p)
|
||||
var s Server
|
||||
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
@@ -182,7 +182,7 @@ func TestCreateRemovesLayers(t *testing.T) {
|
||||
filepath.Join(p, "blobs", "sha256-bc80b03733773e0728011b2f4adf34c458b400e1aad48cb28d61170f3a2ad2d6"),
|
||||
})
|
||||
|
||||
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w = createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
@@ -210,7 +210,7 @@ func TestCreateUnsetsSystem(t *testing.T) {
|
||||
t.Setenv("OLLAMA_MODELS", p)
|
||||
var s Server
|
||||
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s\nSYSTEM Say hi!", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
@@ -230,7 +230,7 @@ func TestCreateUnsetsSystem(t *testing.T) {
|
||||
filepath.Join(p, "blobs", "sha256-f29e82a8284dbdf5910b1555580ff60b04238b8da9d5e51159ada67a4d0d5851"),
|
||||
})
|
||||
|
||||
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w = createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s\nSYSTEM \"\"", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
@@ -267,7 +267,7 @@ func TestCreateMergeParameters(t *testing.T) {
|
||||
t.Setenv("OLLAMA_MODELS", p)
|
||||
var s Server
|
||||
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s\nPARAMETER temperature 1\nPARAMETER top_k 10\nPARAMETER stop USER:\nPARAMETER stop ASSISTANT:", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
@@ -288,7 +288,7 @@ func TestCreateMergeParameters(t *testing.T) {
|
||||
})
|
||||
|
||||
// in order to merge parameters, the second model must be created FROM the first
|
||||
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w = createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test2",
|
||||
Modelfile: "FROM test\nPARAMETER temperature 0.6\nPARAMETER top_p 0.7",
|
||||
Stream: &stream,
|
||||
@@ -326,7 +326,7 @@ func TestCreateMergeParameters(t *testing.T) {
|
||||
}
|
||||
|
||||
// slices are replaced
|
||||
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w = createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test2",
|
||||
Modelfile: "FROM test\nPARAMETER temperature 0.6\nPARAMETER top_p 0.7\nPARAMETER stop <|endoftext|>",
|
||||
Stream: &stream,
|
||||
@@ -371,7 +371,7 @@ func TestCreateReplacesMessages(t *testing.T) {
|
||||
t.Setenv("OLLAMA_MODELS", p)
|
||||
var s Server
|
||||
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s\nMESSAGE assistant \"What is my purpose?\"\nMESSAGE user \"You run tests.\"\nMESSAGE assistant \"Oh, my god.\"", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
@@ -391,7 +391,7 @@ func TestCreateReplacesMessages(t *testing.T) {
|
||||
filepath.Join(p, "blobs", "sha256-e0e27d47045063ccb167ae852c51d49a98eab33fabaee4633fdddf97213e40b5"),
|
||||
})
|
||||
|
||||
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w = createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test2",
|
||||
Modelfile: "FROM test\nMESSAGE assistant \"You're a test, Harry.\"\nMESSAGE user \"I-I'm a what?\"\nMESSAGE assistant \"A test. And a thumping good one at that, I'd wager.\"",
|
||||
Stream: &stream,
|
||||
@@ -448,7 +448,7 @@ func TestCreateTemplateSystem(t *testing.T) {
|
||||
t.Setenv("OLLAMA_MODELS", p)
|
||||
var s Server
|
||||
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}\nSYSTEM Say hello!\nTEMPLATE {{ .System }} {{ .Prompt }}\nSYSTEM Say bye!", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
@@ -488,7 +488,7 @@ func TestCreateTemplateSystem(t *testing.T) {
|
||||
}
|
||||
|
||||
t.Run("incomplete template", func(t *testing.T) {
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
@@ -500,7 +500,7 @@ func TestCreateTemplateSystem(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("template with unclosed if", func(t *testing.T) {
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ if .Prompt }}", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
@@ -512,7 +512,7 @@ func TestCreateTemplateSystem(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("template with undefined function", func(t *testing.T) {
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ Prompt }}", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
@@ -531,7 +531,7 @@ func TestCreateLicenses(t *testing.T) {
|
||||
t.Setenv("OLLAMA_MODELS", p)
|
||||
var s Server
|
||||
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s\nLICENSE MIT\nLICENSE Apache-2.0", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
@@ -579,7 +579,7 @@ func TestCreateDetectTemplate(t *testing.T) {
|
||||
var s Server
|
||||
|
||||
t.Run("matched", func(t *testing.T) {
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
|
||||
"tokenizer.chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
|
||||
@@ -593,14 +593,14 @@ func TestCreateDetectTemplate(t *testing.T) {
|
||||
|
||||
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
|
||||
filepath.Join(p, "blobs", "sha256-0d79f567714c62c048378f2107fb332dabee0135d080c302d884317da9433cc5"),
|
||||
filepath.Join(p, "blobs", "sha256-35360843d0c84fb1506952a131bbef13cd2bb4a541251f22535170c05b56e672"),
|
||||
filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
|
||||
filepath.Join(p, "blobs", "sha256-c608dc615584cd20d9d830363dabf8a4783ae5d34245c3d8c115edb3bc7b28e4"),
|
||||
filepath.Join(p, "blobs", "sha256-ea34c57ba5b78b740aafe2aeb74dc6507fc3ad14170b64c26a04fb9e36c88d75"),
|
||||
filepath.Join(p, "blobs", "sha256-de3959f841e9ef6b4b6255fa41cb9e0a45da89c3066aa72bdd07a4747f848990"),
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("unmatched", func(t *testing.T) {
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
|
||||
@@ -22,7 +22,7 @@ func TestDelete(t *testing.T) {
|
||||
|
||||
var s Server
|
||||
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
|
||||
})
|
||||
@@ -31,7 +31,7 @@ func TestDelete(t *testing.T) {
|
||||
t.Fatalf("expected status code 200, actual %d", w.Code)
|
||||
}
|
||||
|
||||
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w = createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "test2",
|
||||
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)),
|
||||
})
|
||||
@@ -52,7 +52,7 @@ func TestDelete(t *testing.T) {
|
||||
filepath.Join(p, "blobs", "sha256-fe7ac77b725cda2ccad03f88a880ecdfd7a33192d6cae08fce2c0ee1455991ed"),
|
||||
})
|
||||
|
||||
w = createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test"})
|
||||
w = createRequest(t, s.DeleteHandler, api.DeleteRequest{Name: "test"})
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status code 200, actual %d", w.Code)
|
||||
@@ -68,7 +68,7 @@ func TestDelete(t *testing.T) {
|
||||
filepath.Join(p, "blobs", "sha256-fe7ac77b725cda2ccad03f88a880ecdfd7a33192d6cae08fce2c0ee1455991ed"),
|
||||
})
|
||||
|
||||
w = createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test2"})
|
||||
w = createRequest(t, s.DeleteHandler, api.DeleteRequest{Name: "test2"})
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status code 200, actual %d", w.Code)
|
||||
@@ -102,7 +102,7 @@ func TestDeleteDuplicateLayers(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
w := createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test"})
|
||||
w := createRequest(t, s.DeleteHandler, api.DeleteRequest{Name: "test"})
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status code 200, actual %d", w.Code)
|
||||
}
|
||||
|
||||
@@ -84,7 +84,7 @@ func TestGenerateChat(t *testing.T) {
|
||||
|
||||
go s.sched.Run(context.TODO())
|
||||
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Model: "test",
|
||||
Modelfile: fmt.Sprintf(`FROM %s
|
||||
TEMPLATE """
|
||||
@@ -144,7 +144,7 @@ func TestGenerateChat(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("missing capabilities chat", func(t *testing.T) {
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Model: "bert",
|
||||
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
|
||||
"general.architecture": "bert",
|
||||
@@ -270,7 +270,7 @@ func TestGenerateChat(t *testing.T) {
|
||||
checkChatResponse(t, w.Body, "test", "Hi!")
|
||||
})
|
||||
|
||||
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w = createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Model: "test-system",
|
||||
Modelfile: "FROM test\nSYSTEM You are a helpful assistant.",
|
||||
})
|
||||
@@ -382,7 +382,7 @@ func TestGenerate(t *testing.T) {
|
||||
|
||||
go s.sched.Run(context.TODO())
|
||||
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Model: "test",
|
||||
Modelfile: fmt.Sprintf(`FROM %s
|
||||
TEMPLATE """
|
||||
@@ -442,7 +442,7 @@ func TestGenerate(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("missing capabilities generate", func(t *testing.T) {
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Model: "bert",
|
||||
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
|
||||
"general.architecture": "bert",
|
||||
@@ -583,7 +583,7 @@ func TestGenerate(t *testing.T) {
|
||||
checkGenerateResponse(t, w.Body, "test", "Hi!")
|
||||
})
|
||||
|
||||
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w = createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Model: "test-system",
|
||||
Modelfile: "FROM test\nSYSTEM You are a helpful assistant.",
|
||||
})
|
||||
@@ -652,7 +652,7 @@ func TestGenerate(t *testing.T) {
|
||||
checkGenerateResponse(t, w.Body, "test-system", "Abra kadabra!")
|
||||
})
|
||||
|
||||
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w = createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Model: "test-suffix",
|
||||
Modelfile: `FROM test
|
||||
TEMPLATE """{{- if .Suffix }}<PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>
|
||||
|
||||
@@ -31,13 +31,13 @@ func TestList(t *testing.T) {
|
||||
|
||||
var s Server
|
||||
for _, n := range expectNames {
|
||||
createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: n,
|
||||
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
|
||||
})
|
||||
}
|
||||
|
||||
w := createRequest(t, s.ListModelsHandler, nil)
|
||||
w := createRequest(t, s.ListHandler, nil)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status code 200, actual %d", w.Code)
|
||||
}
|
||||
|
||||
@@ -318,7 +318,7 @@ func TestCase(t *testing.T) {
|
||||
var s Server
|
||||
for _, tt := range cases {
|
||||
t.Run(tt, func(t *testing.T) {
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: tt,
|
||||
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
@@ -334,7 +334,7 @@ func TestCase(t *testing.T) {
|
||||
}
|
||||
|
||||
t.Run("create", func(t *testing.T) {
|
||||
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
w = createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: strings.ToUpper(tt),
|
||||
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
@@ -350,7 +350,7 @@ func TestCase(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("pull", func(t *testing.T) {
|
||||
w := createRequest(t, s.PullModelHandler, api.PullRequest{
|
||||
w := createRequest(t, s.PullHandler, api.PullRequest{
|
||||
Name: strings.ToUpper(tt),
|
||||
Stream: &stream,
|
||||
})
|
||||
@@ -365,7 +365,7 @@ func TestCase(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("copy", func(t *testing.T) {
|
||||
w := createRequest(t, s.CopyModelHandler, api.CopyRequest{
|
||||
w := createRequest(t, s.CopyHandler, api.CopyRequest{
|
||||
Source: tt,
|
||||
Destination: strings.ToUpper(tt),
|
||||
})
|
||||
@@ -387,7 +387,7 @@ func TestShow(t *testing.T) {
|
||||
|
||||
var s Server
|
||||
|
||||
createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Name: "show-model",
|
||||
Modelfile: fmt.Sprintf(
|
||||
"FROM %s\nFROM %s",
|
||||
@@ -396,7 +396,7 @@ func TestShow(t *testing.T) {
|
||||
),
|
||||
})
|
||||
|
||||
w := createRequest(t, s.ShowModelHandler, api.ShowRequest{
|
||||
w := createRequest(t, s.ShowHandler, api.ShowRequest{
|
||||
Name: "show-model",
|
||||
})
|
||||
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
{{ if .System }}<start_system>{{ .System }}<end_message>{{ end }}{{ if .Prompt }}<start_user>{{ .Prompt }}<end_message>{{ end }}<start_assistant>{{ .Response }}<end_message>
|
||||
{{- range .Messages }}<start_{{ .Role }}>{{ .Content }}<end_message>
|
||||
{{- end }}<start_assistant>
|
||||
@@ -1,8 +1,18 @@
|
||||
{{ if .System }}{{ .System }}
|
||||
{{- $system := "" }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}
|
||||
{{- if not $system }}{{ $system = .Content }}
|
||||
{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
|
||||
{{- end }}
|
||||
{{- else if eq .Role "user" }}
|
||||
{{- if $system }}{{ $system }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}### Instruction:
|
||||
{{ .Prompt }}
|
||||
{{ $system = "" }}
|
||||
{{- end }}### Instruction:
|
||||
{{ .Content }}
|
||||
|
||||
{{ end }}### Response:
|
||||
{{ .Response }}
|
||||
{{ else if eq .Role "assistant" }}### Response:
|
||||
{{ .Content }}
|
||||
|
||||
{{ end }}
|
||||
{{- end }}### Response:
|
||||
|
||||
@@ -1,6 +1,3 @@
|
||||
{{ if .System }}<|im_start|>system
|
||||
{{ .System }}<|im_end|>
|
||||
{{ end }}{{ if .Prompt }}<|im_start|>user
|
||||
{{ .Prompt }}<|im_end|>
|
||||
{{- range .Messages }}<|im_start|>{{ .Role }}
|
||||
{{ .Content }}<|im_end|>
|
||||
{{ end }}<|im_start|>assistant
|
||||
{{ .Response }}<|im_end|>
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
{{ if .System }}System: {{ .System }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}User: {{ .Prompt }}
|
||||
|
||||
{{ end }}Assistant: {{ .Response }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}System:
|
||||
{{- else if eq .Role "user" }}User:
|
||||
{{- else if eq .Role "assistant" }}Assistant:
|
||||
{{- end }} {{ .Content }}
|
||||
|
||||
{{ end }}Assistant:
|
||||
@@ -1,10 +1,10 @@
|
||||
{{ if .System }}Source: system
|
||||
|
||||
{{ .System }} <step> {{ end }}Source: user
|
||||
|
||||
{{ .Prompt }} <step> Source: assistant
|
||||
{{- if not .Response }}
|
||||
Destination: user
|
||||
{{- range .Messages }}Source:
|
||||
{{- if eq .Role "system" }} system
|
||||
{{- else if eq .Role "user" }} user
|
||||
{{- else if eq .Role "assistant" }} assistant
|
||||
{{- end }}
|
||||
|
||||
{{ .Response }} <step>
|
||||
{{ .Content }} <step> {{ end }}Source: assistant
|
||||
Destination: user
|
||||
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
{{ if .System }}System: {{ .System }}
|
||||
{{ end }}{{ if .Prompt }}User:
|
||||
{{ .Prompt }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}System: {{ .Content }}
|
||||
{{ continue }}
|
||||
{{- else if eq .Role "user" }}User:
|
||||
{{- else if eq .Role "assistant" }}Falcon:
|
||||
{{- end }}
|
||||
{{ .Content }}
|
||||
{{ end }}Falcon:
|
||||
{{ .Response }}
|
||||
|
||||
@@ -1,5 +1,16 @@
|
||||
<start_of_turn>user
|
||||
{{ if .System }}{{ .System }}
|
||||
{{ end }}{{ .Prompt }}<end_of_turn>
|
||||
<start_of_turn>model
|
||||
{{ .Response }}<end_of_turn>
|
||||
{{- $system := "" }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}
|
||||
{{- if not $system }}{{ $system = .Content }}
|
||||
{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
|
||||
{{- end }}
|
||||
{{- continue }}
|
||||
{{- else if eq .Role "user" }}<start_of_turn>user
|
||||
{{- if $system }}
|
||||
{{ $system }}
|
||||
{{- $system = "" }}
|
||||
{{- end }}
|
||||
{{- else if eq .Role "assistant" }}<start_of_turn>model
|
||||
{{- end }}
|
||||
{{ .Content }}<end_of_turn>
|
||||
{{ end }}<start_of_turn>model
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
{{ if .System }}System:
|
||||
{{ .System }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}Question:
|
||||
{{ .Prompt }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}System:
|
||||
{{- else if eq .Role "user" }}Question:
|
||||
{{- else if eq .Role "assistant" }}Answer:
|
||||
{{- end }}
|
||||
{{ .Content }}
|
||||
|
||||
{{ end }}Answer:
|
||||
{{ .Response }}
|
||||
|
||||
|
||||
@@ -91,6 +91,10 @@
|
||||
"template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
|
||||
"name": "llama3-instruct"
|
||||
},
|
||||
{
|
||||
"template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
|
||||
"name": "llama3-instruct"
|
||||
},
|
||||
{
|
||||
"template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ 'Question:\n' + message['content'] + '\n\n' }}{% elif message['role'] == 'system' %}\n{{ 'System:\n' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Answer:\n' + message['content'] + '\n\n' }}{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ 'Answer:\n' }}{% endif %}{% endfor %}",
|
||||
"name": "granite-instruct"
|
||||
|
||||
@@ -1,6 +1,14 @@
|
||||
[INST] <<SYS>>
|
||||
{{- if .System }}
|
||||
{{ .System }}
|
||||
{{ end }}<</SYS>>
|
||||
{{- $system := "" }}[INST] {{ range .Messages }}
|
||||
{{- if eq .Role "system" }}
|
||||
{{- if not $system }}{{ $system = .Content }}
|
||||
{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
|
||||
{{- end }}
|
||||
{{- else if eq .Role "user" }}<<SYS>>
|
||||
{{- if $system }}
|
||||
{{ $system }}
|
||||
{{ $system = "" }}
|
||||
{{- end }}<</SYS>>
|
||||
|
||||
{{ .Prompt }} [/INST] {{ .Response }}</s><s>
|
||||
{{ .Content }} [/INST]
|
||||
{{- else if eq .Role "assistant" }} {{ .Content }}</s><s>[INST] {{ end }}
|
||||
{{- end }}
|
||||
@@ -1,7 +1,5 @@
|
||||
{{ if .System }}<|start_header_id|>system<|end_header_id|>
|
||||
{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
|
||||
|
||||
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
|
||||
{{ .Content }}<|eot_id|>
|
||||
{{- end }}<|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
{{ .Response }}<|eot_id|>
|
||||
@@ -1,8 +1,17 @@
|
||||
{{ if .System }}{{ .System }}
|
||||
{{- $system := "" }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}
|
||||
{{- if not $system }}{{ $system = .Content }}
|
||||
{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
|
||||
{{- end }}
|
||||
{{- continue }}
|
||||
{{- else if eq .Role "user" }}
|
||||
{{- if $system }}{{ $system }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}@@ Instruction
|
||||
{{ .Prompt }}
|
||||
{{ $system = "" }}
|
||||
{{- end }}@@ Instruction
|
||||
{{- else if eq .Role "assistant" }}@@ Response
|
||||
{{- end }}
|
||||
{{ .Content }}
|
||||
|
||||
{{ end }}@@ Response
|
||||
{{ .Response }}
|
||||
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
[INST] {{ if .System }}{{ .System }}
|
||||
[INST] {{ range $index, $_ := .Messages }}
|
||||
{{- if eq .Role "system" }}{{ .Content }}
|
||||
|
||||
{{ end }}{{ .Prompt }}[/INST] {{ .Response }}</s>
|
||||
{{ else if eq .Role "user" }}{{ .Content }}[/INST]
|
||||
{{- else if eq .Role "assistant" }} {{ .Content }}</s>[INST] {{ end }}
|
||||
{{- end }}
|
||||
@@ -1 +1,6 @@
|
||||
{{ if .System }}GPT4 Correct System: {{ .System }}<|end_of_turn|>{{ end }}GPT4 Correct User: {{ .Prompt }}<|end_of_turn|>GPT4 Correct Assistant: {{ .Response }}<|end_of_turn|>
|
||||
{{- range .Messages }}GPT4 Correct
|
||||
{{- if eq .Role "system" }} System:
|
||||
{{- else if eq .Role "user" }} User:
|
||||
{{- else if eq .Role "assistant" }} Assistant:
|
||||
{{- end }} {{ .Content }}<|end_of_turn|>
|
||||
{{- end }}GPT4 Correct Assistant:
|
||||
@@ -1,6 +1,3 @@
|
||||
{{ if .System }}<|system|>
|
||||
{{ .System }}<|end|>
|
||||
{{ end }}{{ if .Prompt }}<|user|>
|
||||
{{ .Prompt }}<|end|>
|
||||
{{- range .Messages }}<|{{ .Role }}|>
|
||||
{{ .Content }}<|end|>
|
||||
{{ end }}<|assistant|>
|
||||
{{ .Response }}<|end|>
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
{{ if .System }}### System:
|
||||
{{ .System }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}### System:
|
||||
{{- else if eq .Role "user" }}### User:
|
||||
{{- else if eq .Role "assistant" }}### Assistant:
|
||||
{{ .Content }}</s>
|
||||
|
||||
{{ end }}{{ if .Prompt }}### User:
|
||||
{{ .Prompt }}
|
||||
{{ continue }}
|
||||
{{- end }}
|
||||
{{ .Content }}
|
||||
|
||||
{{ end }}### Assistant:
|
||||
{{ .Response }}</s>
|
||||
|
||||
|
||||
@@ -1,8 +1,18 @@
|
||||
{{ if .System }}{{ .System }}
|
||||
{{- $system := "" }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}
|
||||
{{- if not $system }}{{ $system = .Content }}
|
||||
{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
|
||||
{{- end }}
|
||||
{{- else if eq .Role "user" }}
|
||||
{{- if $system }}{{ $system }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}### Instruction
|
||||
{{ .Prompt }}
|
||||
{{ $system = "" }}
|
||||
{{- end }}### Instruction
|
||||
{{ .Content }}
|
||||
|
||||
{{ end }}### Response
|
||||
{{ .Response }}<|endoftext|>
|
||||
{{ else if eq .Role "assistant" }}### Response
|
||||
{{ .Content }}<|endoftext|>
|
||||
|
||||
{{ end }}
|
||||
{{- end }}### Response
|
||||
|
||||
@@ -1,4 +1,14 @@
|
||||
{{ if .System }}{{ .System }}
|
||||
{{- $system := "" }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}
|
||||
{{- if not $system }}{{ $system = .Content }}
|
||||
{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
|
||||
{{- end }}
|
||||
{{- else if eq .Role "user" }}
|
||||
{{- if $system }}{{ $system }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}USER: {{ .Prompt }}
|
||||
{{ end }}ASSISTANT: {{ .Response }}</s>
|
||||
{{ $system = "" }}
|
||||
{{- end }}USER: {{ .Content }}
|
||||
{{ else if eq .Role "assistant" }}ASSISTANT: {{ .Content }}</s>
|
||||
{{ end }}
|
||||
{{- end }}ASSISTANT:
|
||||
@@ -1,6 +1,3 @@
|
||||
{{ if .System }}<|system|>
|
||||
{{ .System }}</s>
|
||||
{{ end }}{{ if .Prompt }}<|user|>
|
||||
{{ .Prompt }}</s>
|
||||
{{- range .Messages }}<|{{ .Role }}|>
|
||||
{{ .Content }}</s>
|
||||
{{ end }}<|assistant|>
|
||||
{{ .Response }}</s>
|
||||
|
||||
Reference in New Issue
Block a user