Compare commits
46 Commits
jyan/local
...
jyan/ollam
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3958d82908 | ||
|
|
632237139e | ||
|
|
2f5497301f | ||
|
|
b0a947cf1f | ||
|
|
cf7344b221 | ||
|
|
3cd1c9baa9 | ||
|
|
e5f03af4d3 | ||
|
|
b669677be9 | ||
|
|
86a874fceb | ||
|
|
18203bf8d8 | ||
|
|
aaa1c08a5d | ||
|
|
ab9dfbddea | ||
|
|
38d9036b59 | ||
|
|
6f26e9322f | ||
|
|
0e4d653687 | ||
|
|
2c01610616 | ||
|
|
f3d7a481b7 | ||
|
|
f2a96c7d77 | ||
|
|
e8a66680d1 | ||
|
|
079b2c3b03 | ||
|
|
750c1c55f7 | ||
|
|
a622c47bd3 | ||
|
|
ec4c35fe99 | ||
|
|
3d9de805b7 | ||
|
|
f5e3939220 | ||
|
|
ae27d9dcfd | ||
|
|
37096790a7 | ||
|
|
997c903884 | ||
|
|
c8af3c2d96 | ||
|
|
455e61170d | ||
|
|
4de1370a9d | ||
|
|
bbf8f102ee | ||
|
|
bb46bbcf5e | ||
|
|
ac33aa7d37 | ||
|
|
a6cd8f6169 | ||
|
|
c78089263a | ||
|
|
3e5ea035d5 | ||
|
|
5d604eec5b | ||
|
|
db0968f30c | ||
|
|
e12fff8810 | ||
|
|
9b60a038e5 | ||
|
|
83a0cb8d88 | ||
|
|
c0648233f2 | ||
|
|
d835368eb8 | ||
|
|
f02f83660c | ||
|
|
ebc529cbb3 |
10
.github/workflows/release.yaml
vendored
10
.github/workflows/release.yaml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
security set-keychain-settings -lut 3600 build.keychain
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
go-version: "stable"
|
||||
cache: true
|
||||
- name: Build Darwin
|
||||
env:
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
write-host "plugin installed"
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
go-version: "stable"
|
||||
cache: true
|
||||
- run: go get ./...
|
||||
- run: |
|
||||
@@ -141,7 +141,7 @@ jobs:
|
||||
write-host "plugin installed"
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
go-version: "stable"
|
||||
cache: true
|
||||
- name: 'Install ROCm'
|
||||
run: |
|
||||
@@ -218,7 +218,7 @@ jobs:
|
||||
write-host "plugin installed"
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
go-version: "stable"
|
||||
cache: true
|
||||
- name: 'Install CUDA'
|
||||
run: |
|
||||
@@ -306,7 +306,7 @@ jobs:
|
||||
write-host "plugin installed"
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
go-version: "stable"
|
||||
cache: true
|
||||
- run: go get
|
||||
- uses: actions/download-artifact@v4
|
||||
|
||||
10
.github/workflows/test.yaml
vendored
10
.github/workflows/test.yaml
vendored
@@ -63,7 +63,7 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
go-version: "stable"
|
||||
cache: true
|
||||
- run: go get ./...
|
||||
- run: |
|
||||
@@ -163,7 +163,7 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
go-version: "stable"
|
||||
cache: true
|
||||
- name: 'Install ROCm'
|
||||
run: |
|
||||
@@ -200,7 +200,7 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
go-version: "stable"
|
||||
cache: true
|
||||
- name: 'Install CUDA'
|
||||
run: |
|
||||
@@ -255,7 +255,7 @@ jobs:
|
||||
submodules: recursive
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
go-version: "stable"
|
||||
cache: false
|
||||
- run: |
|
||||
case ${{ matrix.arch }} in
|
||||
@@ -297,7 +297,7 @@ jobs:
|
||||
submodules: recursive
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
go-version: "stable"
|
||||
cache: true
|
||||
- run: |
|
||||
case ${{ matrix.arch }} in
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
ARG GOLANG_VERSION=1.22.1
|
||||
ARG GOLANG_VERSION=1.22.5
|
||||
ARG CMAKE_VERSION=3.22.1
|
||||
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
|
||||
ARG CUDA_VERSION=11.3.1
|
||||
|
||||
35
README.md
35
README.md
@@ -35,10 +35,10 @@ The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `olla
|
||||
|
||||
## Quickstart
|
||||
|
||||
To run and chat with [Llama 3](https://ollama.com/library/llama3):
|
||||
To run and chat with [Llama 3.1](https://ollama.com/library/llama3.1):
|
||||
|
||||
```
|
||||
ollama run llama3
|
||||
ollama run llama3.1
|
||||
```
|
||||
|
||||
## Model library
|
||||
@@ -49,8 +49,9 @@ Here are some example models that can be downloaded:
|
||||
|
||||
| Model | Parameters | Size | Download |
|
||||
| ------------------ | ---------- | ----- | ------------------------------ |
|
||||
| Llama 3 | 8B | 4.7GB | `ollama run llama3` |
|
||||
| Llama 3 | 70B | 40GB | `ollama run llama3:70b` |
|
||||
| Llama 3.1 | 8B | 4.7GB | `ollama run llama3.1` |
|
||||
| Llama 3.1 | 70B | 40GB | `ollama run llama3.1:70b` |
|
||||
| Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` |
|
||||
| Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
|
||||
| Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
|
||||
| Gemma 2 | 9B | 5.5GB | `ollama run gemma2` |
|
||||
@@ -64,7 +65,8 @@ Here are some example models that can be downloaded:
|
||||
| LLaVA | 7B | 4.5GB | `ollama run llava` |
|
||||
| Solar | 10.7B | 6.1GB | `ollama run solar` |
|
||||
|
||||
> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
|
||||
> [!NOTE]
|
||||
> You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
|
||||
|
||||
## Customize a model
|
||||
|
||||
@@ -96,16 +98,16 @@ See the [guide](docs/import.md) on importing models for more information.
|
||||
|
||||
### Customize a prompt
|
||||
|
||||
Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3` model:
|
||||
Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.1` model:
|
||||
|
||||
```
|
||||
ollama pull llama3
|
||||
ollama pull llama3.1
|
||||
```
|
||||
|
||||
Create a `Modelfile`:
|
||||
|
||||
```
|
||||
FROM llama3
|
||||
FROM llama3.1
|
||||
|
||||
# set the temperature to 1 [higher is more creative, lower is more coherent]
|
||||
PARAMETER temperature 1
|
||||
@@ -140,7 +142,7 @@ ollama create mymodel -f ./Modelfile
|
||||
### Pull a model
|
||||
|
||||
```
|
||||
ollama pull llama3
|
||||
ollama pull llama3.1
|
||||
```
|
||||
|
||||
> This command can also be used to update a local model. Only the diff will be pulled.
|
||||
@@ -148,13 +150,13 @@ ollama pull llama3
|
||||
### Remove a model
|
||||
|
||||
```
|
||||
ollama rm llama3
|
||||
ollama rm llama3.1
|
||||
```
|
||||
|
||||
### Copy a model
|
||||
|
||||
```
|
||||
ollama cp llama3 my-model
|
||||
ollama cp llama3.1 my-model
|
||||
```
|
||||
|
||||
### Multiline input
|
||||
@@ -178,14 +180,14 @@ The image features a yellow smiley face, which is likely the central focus of th
|
||||
### Pass the prompt as an argument
|
||||
|
||||
```
|
||||
$ ollama run llama3 "Summarize this file: $(cat README.md)"
|
||||
$ ollama run llama3.1 "Summarize this file: $(cat README.md)"
|
||||
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
|
||||
```
|
||||
|
||||
### Show model information
|
||||
|
||||
```
|
||||
ollama show llama3
|
||||
ollama show llama3.1
|
||||
```
|
||||
|
||||
### List models on your computer
|
||||
@@ -213,7 +215,7 @@ Next, start the server:
|
||||
Finally, in a separate shell, run a model:
|
||||
|
||||
```
|
||||
./ollama run llama3
|
||||
./ollama run llama3.1
|
||||
```
|
||||
|
||||
## REST API
|
||||
@@ -224,7 +226,7 @@ Ollama has a REST API for running and managing models.
|
||||
|
||||
```
|
||||
curl http://localhost:11434/api/generate -d '{
|
||||
"model": "llama3",
|
||||
"model": "llama3.1",
|
||||
"prompt":"Why is the sky blue?"
|
||||
}'
|
||||
```
|
||||
@@ -233,7 +235,7 @@ curl http://localhost:11434/api/generate -d '{
|
||||
|
||||
```
|
||||
curl http://localhost:11434/api/chat -d '{
|
||||
"model": "llama3",
|
||||
"model": "llama3.1",
|
||||
"messages": [
|
||||
{ "role": "user", "content": "why is the sky blue?" }
|
||||
]
|
||||
@@ -296,6 +298,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
|
||||
- [AI Studio](https://github.com/MindWorkAI/AI-Studio)
|
||||
- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
|
||||
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
|
||||
|
||||
### Terminal
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -25,10 +24,7 @@ import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/auth"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/version"
|
||||
@@ -387,16 +383,3 @@ func (c *Client) Version(ctx context.Context) (string, error) {
|
||||
|
||||
return version.Version, nil
|
||||
}
|
||||
|
||||
func Authorization(ctx context.Context, request *http.Request) (string, error) {
|
||||
data := []byte(fmt.Sprintf("%s,%s,%d", request.Method, request.URL.RequestURI(), time.Now().Unix()))
|
||||
|
||||
token, err := auth.Sign(ctx, data)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// interleave request data into the token
|
||||
key, sig, _ := strings.Cut(token, ":")
|
||||
return fmt.Sprintf("%s:%s:%s", key, base64.StdEncoding.EncodeToString(data), sig), nil
|
||||
}
|
||||
|
||||
@@ -209,6 +209,7 @@ type Options struct {
|
||||
NumPredict int `json:"num_predict,omitempty"`
|
||||
TopK int `json:"top_k,omitempty"`
|
||||
TopP float32 `json:"top_p,omitempty"`
|
||||
MinP float32 `json:"min_p,omitempty"`
|
||||
TFSZ float32 `json:"tfs_z,omitempty"`
|
||||
TypicalP float32 `json:"typical_p,omitempty"`
|
||||
RepeatLastN int `json:"repeat_last_n,omitempty"`
|
||||
|
||||
@@ -138,7 +138,7 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
|
||||
|
||||
|
||||
;FinishedHeadingLabel=Run your first model
|
||||
;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama3
|
||||
;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama3.1
|
||||
;ClickFinish=%n
|
||||
|
||||
[Registry]
|
||||
|
||||
@@ -4,5 +4,5 @@ write-host "Welcome to Ollama!"
|
||||
write-host ""
|
||||
write-host "Run your first model:"
|
||||
write-host ""
|
||||
write-host "`tollama run llama3"
|
||||
write-host "`tollama run llama3.1"
|
||||
write-host ""
|
||||
126
auth/auth.go
126
auth/auth.go
@@ -3,68 +3,49 @@ package auth
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"crypto/rand"
|
||||
"encoding/base64"
|
||||
"encoding/pem"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
const defaultPrivateKey = "id_ed25519"
|
||||
|
||||
func privateKey() (ssh.Signer, error) {
|
||||
func keyPath() (string, error) {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return "", err
|
||||
}
|
||||
|
||||
keyPath := filepath.Join(home, ".ollama", defaultPrivateKey)
|
||||
privateKeyFile, err := os.ReadFile(keyPath)
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
err := initializeKeypair()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return privateKey()
|
||||
} else if err != nil {
|
||||
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ssh.ParsePrivateKey(privateKeyFile)
|
||||
return filepath.Join(home, ".ollama", defaultPrivateKey), nil
|
||||
}
|
||||
|
||||
func GetPublicKey() (ssh.PublicKey, error) {
|
||||
// try to read pubkey first
|
||||
home, err := os.UserHomeDir()
|
||||
func GetPublicKey() (string, error) {
|
||||
keyPath, err := keyPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return "", err
|
||||
}
|
||||
|
||||
pubkeyPath := filepath.Join(home, ".ollama", defaultPrivateKey+".pub")
|
||||
pubKeyFile, err := os.ReadFile(pubkeyPath)
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
// try from privateKey
|
||||
privateKey, err := privateKey()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read public key: %w", err)
|
||||
}
|
||||
|
||||
return privateKey.PublicKey(), nil
|
||||
} else if err != nil {
|
||||
return nil, fmt.Errorf("failed to read public key: %w", err)
|
||||
privateKeyFile, err := os.ReadFile(keyPath)
|
||||
if err != nil {
|
||||
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
|
||||
return "", err
|
||||
}
|
||||
|
||||
pubKey, _, _, _, err := ssh.ParseAuthorizedKey(pubKeyFile)
|
||||
return pubKey, err
|
||||
privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
|
||||
|
||||
return strings.TrimSpace(string(publicKey)), nil
|
||||
}
|
||||
|
||||
func NewNonce(r io.Reader, length int) (string, error) {
|
||||
@@ -77,20 +58,25 @@ func NewNonce(r io.Reader, length int) (string, error) {
|
||||
}
|
||||
|
||||
func Sign(ctx context.Context, bts []byte) (string, error) {
|
||||
privateKey, err := privateKey()
|
||||
keyPath, err := keyPath()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
privateKeyFile, err := os.ReadFile(keyPath)
|
||||
if err != nil {
|
||||
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
|
||||
return "", err
|
||||
}
|
||||
|
||||
privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// get the pubkey, but remove the type
|
||||
publicKey, err := GetPublicKey()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
publicKeyBytes := ssh.MarshalAuthorizedKey(publicKey)
|
||||
|
||||
parts := bytes.Split(publicKeyBytes, []byte(" "))
|
||||
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
|
||||
parts := bytes.Split(publicKey, []byte(" "))
|
||||
if len(parts) < 2 {
|
||||
return "", fmt.Errorf("malformed public key")
|
||||
}
|
||||
@@ -103,49 +89,3 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
|
||||
// signature is <pubkey>:<signature>
|
||||
return fmt.Sprintf("%s:%s", bytes.TrimSpace(parts[1]), base64.StdEncoding.EncodeToString(signedData.Blob)), nil
|
||||
}
|
||||
|
||||
func initializeKeypair() error {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
privKeyPath := filepath.Join(home, ".ollama", "id_ed25519")
|
||||
pubKeyPath := filepath.Join(home, ".ollama", "id_ed25519.pub")
|
||||
|
||||
_, err = os.Stat(privKeyPath)
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
fmt.Printf("Couldn't find '%s'. Generating new private key.\n", privKeyPath)
|
||||
cryptoPublicKey, cryptoPrivateKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
privateKeyBytes, err := ssh.MarshalPrivateKey(cryptoPrivateKey, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(filepath.Dir(privKeyPath), 0o755); err != nil {
|
||||
return fmt.Errorf("could not create directory %w", err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(privKeyPath, pem.EncodeToMemory(privateKeyBytes), 0o600); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sshPublicKey, err := ssh.NewPublicKey(cryptoPublicKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
publicKeyBytes := ssh.MarshalAuthorizedKey(sshPublicKey)
|
||||
|
||||
if err := os.WriteFile(pubKeyPath, publicKeyBytes, 0o644); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Printf("Your new public key is: \n\n%s\n", publicKeyBytes)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
116
cmd/cmd.go
116
cmd/cmd.go
@@ -4,7 +4,10 @@ import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"crypto/rand"
|
||||
"crypto/sha256"
|
||||
"encoding/pem"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -12,7 +15,6 @@ import (
|
||||
"math"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
@@ -110,7 +112,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
path = tempfile
|
||||
}
|
||||
|
||||
digest, err := createBlob(cmd, path)
|
||||
digest, err := createBlob(cmd, client, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -261,9 +263,7 @@ func tempZipFiles(path string) (string, error) {
|
||||
return tempfile.Name(), nil
|
||||
}
|
||||
|
||||
var ErrBlobExists = errors.New("blob exists")
|
||||
|
||||
func createBlob(cmd *cobra.Command, path string) (string, error) {
|
||||
func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
|
||||
bin, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
@@ -280,65 +280,12 @@ func createBlob(cmd *cobra.Command, path string) (string, error) {
|
||||
}
|
||||
|
||||
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
|
||||
|
||||
// Use our new CreateBlob request which will include the file path
|
||||
// The server checks for that file and if the server is local, it will copy the file over
|
||||
// If the local copy fails, the server will continue to the default local copy
|
||||
// If that fails, it will continue with the server POST
|
||||
err = CreateBlob(cmd.Context(), path, digest, bin)
|
||||
if errors.Is(err, ErrBlobExists) {
|
||||
return digest, nil
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return digest, nil
|
||||
}
|
||||
|
||||
func CreateBlob(ctx context.Context, src, digest string, r *os.File) (error) {
|
||||
ollamaHost := envconfig.Host
|
||||
|
||||
client := http.DefaultClient
|
||||
base := &url.URL{
|
||||
Scheme: ollamaHost.Scheme,
|
||||
Host: net.JoinHostPort(ollamaHost.Host, ollamaHost.Port),
|
||||
}
|
||||
|
||||
path := fmt.Sprintf("/api/blobs/%s", digest)
|
||||
requestURL := base.JoinPath(path)
|
||||
request, err := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
authz, err := api.Authorization(ctx, request)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
request.Header.Set("Authorization", authz)
|
||||
request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
|
||||
request.Header.Set("X-Ollama-File", src)
|
||||
|
||||
resp, err := client.Do(request)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode == http.StatusCreated {
|
||||
return nil
|
||||
}
|
||||
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
return ErrBlobExists
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func RunHandler(cmd *cobra.Command, args []string) error {
|
||||
interactive := true
|
||||
|
||||
@@ -432,12 +379,11 @@ func errFromUnknownKey(unknownKeyErr error) error {
|
||||
if len(matches) > 0 {
|
||||
serverPubKey := matches[0]
|
||||
|
||||
publicKey, err := auth.GetPublicKey()
|
||||
localPubKey, err := auth.GetPublicKey()
|
||||
if err != nil {
|
||||
return unknownKeyErr
|
||||
}
|
||||
|
||||
localPubKey := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(publicKey)))
|
||||
if runtime.GOOS == "linux" && serverPubKey != localPubKey {
|
||||
// try the ollama service public key
|
||||
svcPubKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub")
|
||||
@@ -1126,7 +1072,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
||||
}
|
||||
|
||||
func RunServer(cmd *cobra.Command, _ []string) error {
|
||||
if _, err := auth.GetPublicKey(); err != nil {
|
||||
if err := initializeKeypair(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -1143,6 +1089,52 @@ func RunServer(cmd *cobra.Command, _ []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func initializeKeypair() error {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
privKeyPath := filepath.Join(home, ".ollama", "id_ed25519")
|
||||
pubKeyPath := filepath.Join(home, ".ollama", "id_ed25519.pub")
|
||||
|
||||
_, err = os.Stat(privKeyPath)
|
||||
if os.IsNotExist(err) {
|
||||
fmt.Printf("Couldn't find '%s'. Generating new private key.\n", privKeyPath)
|
||||
cryptoPublicKey, cryptoPrivateKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
privateKeyBytes, err := ssh.MarshalPrivateKey(cryptoPrivateKey, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(filepath.Dir(privKeyPath), 0o755); err != nil {
|
||||
return fmt.Errorf("could not create directory %w", err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(privKeyPath, pem.EncodeToMemory(privateKeyBytes), 0o600); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sshPublicKey, err := ssh.NewPublicKey(cryptoPublicKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
publicKeyBytes := ssh.MarshalAuthorizedKey(sshPublicKey)
|
||||
|
||||
if err := os.WriteFile(pubKeyPath, publicKeyBytes, 0o644); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Printf("Your new public key is: \n\n%s\n", publicKeyBytes)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -9,13 +10,14 @@ import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/exp/maps"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/progress"
|
||||
"github.com/ollama/ollama/readline"
|
||||
"github.com/ollama/ollama/types/errtypes"
|
||||
@@ -138,6 +140,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_predict <int> Max number of tokens to predict")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter top_k <int> Pick from top k num of tokens")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter top_p <float> Pick token based on sum of probabilities")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter min_p <float> Pick token based on top token probability * min_p")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_ctx <int> Set the context size")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter temperature <float> Set creativity level")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter repeat_penalty <float> How strongly to penalize repetitions")
|
||||
@@ -375,9 +378,9 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
||||
return err
|
||||
}
|
||||
req := &api.ShowRequest{
|
||||
Name: opts.Model,
|
||||
System: opts.System,
|
||||
Options: opts.Options,
|
||||
Name: opts.Model,
|
||||
System: opts.System,
|
||||
Options: opts.Options,
|
||||
}
|
||||
resp, err := client.Show(cmd.Context(), req)
|
||||
if err != nil {
|
||||
@@ -506,31 +509,35 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
||||
}
|
||||
|
||||
func buildModelfile(opts runOptions) string {
|
||||
var mf strings.Builder
|
||||
model := opts.ParentModel
|
||||
if model == "" {
|
||||
model = opts.Model
|
||||
}
|
||||
fmt.Fprintf(&mf, "FROM %s\n", model)
|
||||
var f parser.File
|
||||
f.Commands = append(f.Commands, parser.Command{Name: "model", Args: cmp.Or(opts.ParentModel, opts.Model)})
|
||||
|
||||
if opts.System != "" {
|
||||
fmt.Fprintf(&mf, "SYSTEM \"\"\"%s\"\"\"\n", opts.System)
|
||||
f.Commands = append(f.Commands, parser.Command{Name: "system", Args: opts.System})
|
||||
}
|
||||
|
||||
keys := make([]string, 0)
|
||||
for k := range opts.Options {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
keys := maps.Keys(opts.Options)
|
||||
slices.Sort(keys)
|
||||
for _, k := range keys {
|
||||
fmt.Fprintf(&mf, "PARAMETER %s %v\n", k, opts.Options[k])
|
||||
v := opts.Options[k]
|
||||
var cmds []parser.Command
|
||||
switch t := v.(type) {
|
||||
case []string:
|
||||
for _, s := range t {
|
||||
cmds = append(cmds, parser.Command{Name: k, Args: s})
|
||||
}
|
||||
default:
|
||||
cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", t)})
|
||||
}
|
||||
|
||||
f.Commands = append(f.Commands, cmds...)
|
||||
}
|
||||
fmt.Fprintln(&mf)
|
||||
|
||||
for _, msg := range opts.Messages {
|
||||
fmt.Fprintf(&mf, "MESSAGE %s \"\"\"%s\"\"\"\n", msg.Role, msg.Content)
|
||||
f.Commands = append(f.Commands, parser.Command{Name: "message", Args: fmt.Sprintf("%s: %s", msg.Role, msg.Content)})
|
||||
}
|
||||
|
||||
return mf.String()
|
||||
return f.String()
|
||||
}
|
||||
|
||||
func normalizeFilePath(fp string) string {
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
"text/template"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
@@ -57,58 +55,53 @@ d:\path with\spaces\seven.svg inbetween7 c:\users\jdoe\eight.png inbetween8
|
||||
|
||||
func TestModelfileBuilder(t *testing.T) {
|
||||
opts := runOptions{
|
||||
Model: "hork",
|
||||
System: "You are part horse and part shark, but all hork. Do horklike things",
|
||||
Model: "hork",
|
||||
System: "You are part horse and part shark, but all hork. Do horklike things",
|
||||
Messages: []api.Message{
|
||||
{Role: "user", Content: "Hey there hork!"},
|
||||
{Role: "assistant", Content: "Yes it is true, I am half horse, half shark."},
|
||||
},
|
||||
Options: map[string]interface{}{},
|
||||
Options: map[string]any{
|
||||
"temperature": 0.9,
|
||||
"seed": 42,
|
||||
"penalize_newline": false,
|
||||
"stop": []string{"hi", "there"},
|
||||
},
|
||||
}
|
||||
|
||||
opts.Options["temperature"] = 0.9
|
||||
opts.Options["seed"] = 42
|
||||
opts.Options["penalize_newline"] = false
|
||||
opts.Options["stop"] = []string{"hi", "there"}
|
||||
|
||||
mf := buildModelfile(opts)
|
||||
expectedModelfile := `FROM {{.Model}}
|
||||
SYSTEM """{{.System}}"""
|
||||
t.Run("model", func(t *testing.T) {
|
||||
expect := `FROM hork
|
||||
SYSTEM You are part horse and part shark, but all hork. Do horklike things
|
||||
PARAMETER penalize_newline false
|
||||
PARAMETER seed 42
|
||||
PARAMETER stop [hi there]
|
||||
PARAMETER stop hi
|
||||
PARAMETER stop there
|
||||
PARAMETER temperature 0.9
|
||||
|
||||
MESSAGE user """Hey there hork!"""
|
||||
MESSAGE assistant """Yes it is true, I am half horse, half shark."""
|
||||
MESSAGE user Hey there hork!
|
||||
MESSAGE assistant Yes it is true, I am half horse, half shark.
|
||||
`
|
||||
|
||||
tmpl, err := template.New("").Parse(expectedModelfile)
|
||||
require.NoError(t, err)
|
||||
actual := buildModelfile(opts)
|
||||
if diff := cmp.Diff(expect, actual); diff != "" {
|
||||
t.Errorf("mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
|
||||
var buf bytes.Buffer
|
||||
err = tmpl.Execute(&buf, opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, buf.String(), mf)
|
||||
|
||||
opts.ParentModel = "horseshark"
|
||||
mf = buildModelfile(opts)
|
||||
expectedModelfile = `FROM {{.ParentModel}}
|
||||
SYSTEM """{{.System}}"""
|
||||
t.Run("parent model", func(t *testing.T) {
|
||||
opts.ParentModel = "horseshark"
|
||||
expect := `FROM horseshark
|
||||
SYSTEM You are part horse and part shark, but all hork. Do horklike things
|
||||
PARAMETER penalize_newline false
|
||||
PARAMETER seed 42
|
||||
PARAMETER stop [hi there]
|
||||
PARAMETER stop hi
|
||||
PARAMETER stop there
|
||||
PARAMETER temperature 0.9
|
||||
|
||||
MESSAGE user """Hey there hork!"""
|
||||
MESSAGE assistant """Yes it is true, I am half horse, half shark."""
|
||||
MESSAGE user Hey there hork!
|
||||
MESSAGE assistant Yes it is true, I am half horse, half shark.
|
||||
`
|
||||
|
||||
tmpl, err = template.New("").Parse(expectedModelfile)
|
||||
require.NoError(t, err)
|
||||
|
||||
var parentBuf bytes.Buffer
|
||||
err = tmpl.Execute(&parentBuf, opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, parentBuf.String(), mf)
|
||||
actual := buildModelfile(opts)
|
||||
if diff := cmp.Diff(expect, actual); diff != "" {
|
||||
t.Errorf("mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -71,6 +71,11 @@ func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||
"tokenizer.ggml.unknown_token_id": uint32(0),
|
||||
}
|
||||
|
||||
if m.Params.HeadDimension > 0 {
|
||||
kv["llama.attention.key_length"] = uint32(m.Params.HeadDimension)
|
||||
kv["llama.attention.value_length"] = uint32(m.Params.HeadDimension)
|
||||
}
|
||||
|
||||
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
||||
}
|
||||
|
||||
|
||||
204
docs/api.md
204
docs/api.md
@@ -40,6 +40,7 @@ Generate a response for a given prompt with a provided model. This is a streamin
|
||||
|
||||
- `model`: (required) the [model name](#model-names)
|
||||
- `prompt`: the prompt to generate a response for
|
||||
- `suffix`: the text after the model response
|
||||
- `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`)
|
||||
|
||||
Advanced parameters (optional):
|
||||
@@ -57,7 +58,8 @@ Advanced parameters (optional):
|
||||
|
||||
Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.
|
||||
|
||||
> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
|
||||
> [!IMPORTANT]
|
||||
> It's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
|
||||
|
||||
### Examples
|
||||
|
||||
@@ -148,8 +150,44 @@ If `stream` is set to `false`, the response will be a single JSON object:
|
||||
}
|
||||
```
|
||||
|
||||
#### Request (with suffix)
|
||||
|
||||
##### Request
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/generate -d '{
|
||||
"model": "codellama:code",
|
||||
"prompt": "def compute_gcd(a, b):",
|
||||
"suffix": " return result",
|
||||
"options": {
|
||||
"temperature": 0
|
||||
},
|
||||
"stream": false
|
||||
}'
|
||||
```
|
||||
|
||||
##### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "codellama:code",
|
||||
"created_at": "2024-07-22T20:47:51.147561Z",
|
||||
"response": "\n if a == 0:\n return b\n else:\n return compute_gcd(b % a, a)\n\ndef compute_lcm(a, b):\n result = (a * b) / compute_gcd(a, b)\n",
|
||||
"done": true,
|
||||
"done_reason": "stop",
|
||||
"context": [...],
|
||||
"total_duration": 1162761250,
|
||||
"load_duration": 6683708,
|
||||
"prompt_eval_count": 17,
|
||||
"prompt_eval_duration": 201222000,
|
||||
"eval_count": 63,
|
||||
"eval_duration": 953997000
|
||||
}
|
||||
```
|
||||
|
||||
#### Request (JSON mode)
|
||||
|
||||
> [!IMPORTANT]
|
||||
> When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON.
|
||||
|
||||
##### Request
|
||||
@@ -298,6 +336,7 @@ curl http://localhost:11434/api/generate -d '{
|
||||
"num_predict": 100,
|
||||
"top_k": 20,
|
||||
"top_p": 0.9,
|
||||
"min_p": 0.0,
|
||||
"tfs_z": 0.5,
|
||||
"typical_p": 0.7,
|
||||
"repeat_last_n": 33,
|
||||
@@ -380,12 +419,14 @@ Generate the next message in a chat with a provided model. This is a streaming e
|
||||
|
||||
- `model`: (required) the [model name](#model-names)
|
||||
- `messages`: the messages of the chat, this can be used to keep a chat memory
|
||||
- `tools`: tools for the model to use if supported. Requires `stream` to be set to `false`
|
||||
|
||||
The `message` object has the following fields:
|
||||
|
||||
- `role`: the role of the message, either `system`, `user` or `assistant`
|
||||
- `role`: the role of the message, either `system`, `user`, `assistant`, or `tool`
|
||||
- `content`: the content of the message
|
||||
- `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
|
||||
- `tool_calls` (optional): a list of tools the model wants to use
|
||||
|
||||
Advanced parameters (optional):
|
||||
|
||||
@@ -546,7 +587,7 @@ Final response:
|
||||
|
||||
##### Request
|
||||
|
||||
Send a chat message with a conversation history.
|
||||
Send a chat message with images. The images should be provided as an array, with the individual images encoded in Base64.
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/chat -d '{
|
||||
@@ -622,6 +663,79 @@ curl http://localhost:11434/api/chat -d '{
|
||||
}
|
||||
```
|
||||
|
||||
#### Chat request (with tools)
|
||||
|
||||
##### Request
|
||||
|
||||
```
|
||||
curl http://localhost:11434/api/chat -d '{
|
||||
"model": "mistral",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the weather today in Paris?"
|
||||
}
|
||||
],
|
||||
"stream": false,
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather for a location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The location to get the weather for, e.g. San Francisco, CA"
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
|
||||
"enum": ["celsius", "fahrenheit"]
|
||||
}
|
||||
},
|
||||
"required": ["location", "format"]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
##### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "mistral:7b-instruct-v0.3-q4_K_M",
|
||||
"created_at": "2024-07-22T20:33:28.123648Z",
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"arguments": {
|
||||
"format": "celsius",
|
||||
"location": "Paris, FR"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"done_reason": "stop",
|
||||
"done": true,
|
||||
"total_duration": 885095291,
|
||||
"load_duration": 3753500,
|
||||
"prompt_eval_count": 122,
|
||||
"prompt_eval_duration": 328493000,
|
||||
"eval_count": 33,
|
||||
"eval_duration": 552222000
|
||||
}
|
||||
```
|
||||
|
||||
## Create a Model
|
||||
|
||||
```shell
|
||||
@@ -1026,7 +1140,7 @@ If `stream` is set to `false`, then the response is a single JSON object:
|
||||
## Generate Embeddings
|
||||
|
||||
```shell
|
||||
POST /api/embeddings
|
||||
POST /api/embed
|
||||
```
|
||||
|
||||
Generate embeddings from a model
|
||||
@@ -1034,10 +1148,11 @@ Generate embeddings from a model
|
||||
### Parameters
|
||||
|
||||
- `model`: name of model to generate embeddings from
|
||||
- `prompt`: text to generate embeddings for
|
||||
- `input`: text or list of text to generate embeddings for
|
||||
|
||||
Advanced parameters:
|
||||
|
||||
- `truncate`: truncates the end of each input to fit within context length. Returns error if `false` and context length is exceeded. Defaults to `true`
|
||||
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
|
||||
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
|
||||
|
||||
@@ -1046,9 +1161,9 @@ Advanced parameters:
|
||||
#### Request
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/embeddings -d '{
|
||||
curl http://localhost:11434/api/embed -d '{
|
||||
"model": "all-minilm",
|
||||
"prompt": "Here is an article about llamas..."
|
||||
"input": "Why is the sky blue?"
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -1056,10 +1171,35 @@ curl http://localhost:11434/api/embeddings -d '{
|
||||
|
||||
```json
|
||||
{
|
||||
"embedding": [
|
||||
0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
|
||||
0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
|
||||
]
|
||||
"model": "all-minilm",
|
||||
"embeddings": [[
|
||||
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
|
||||
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
|
||||
]]
|
||||
}
|
||||
```
|
||||
|
||||
#### Request (Multiple input)
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/embed -d '{
|
||||
"model": "all-minilm",
|
||||
"input": ["Why is the sky blue?", "Why is the grass green?"]
|
||||
}'
|
||||
```
|
||||
|
||||
#### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "all-minilm",
|
||||
"embeddings": [[
|
||||
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
|
||||
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
|
||||
],[
|
||||
-0.0098027075, 0.06042469, 0.025257962, -0.006364387, 0.07272725,
|
||||
0.017194884, 0.09032035, -0.051705178, 0.09951512, 0.09072481
|
||||
]]
|
||||
}
|
||||
```
|
||||
|
||||
@@ -1106,3 +1246,45 @@ A single JSON object will be returned.
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Generate Embedding
|
||||
|
||||
> Note: this endpoint has been superseded by `/api/embed`
|
||||
|
||||
```shell
|
||||
POST /api/embeddings
|
||||
```
|
||||
|
||||
Generate embeddings from a model
|
||||
|
||||
### Parameters
|
||||
|
||||
- `model`: name of model to generate embeddings from
|
||||
- `prompt`: text to generate embeddings for
|
||||
|
||||
Advanced parameters:
|
||||
|
||||
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
|
||||
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
|
||||
|
||||
### Examples
|
||||
|
||||
#### Request
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/embeddings -d '{
|
||||
"model": "all-minilm",
|
||||
"prompt": "Here is an article about llamas..."
|
||||
}'
|
||||
```
|
||||
|
||||
#### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"embedding": [
|
||||
0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
|
||||
0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
@@ -63,7 +63,7 @@ docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 114
|
||||
Now you can run a model:
|
||||
|
||||
```
|
||||
docker exec -it ollama ollama run llama3
|
||||
docker exec -it ollama ollama run llama3.1
|
||||
```
|
||||
|
||||
### Try different models
|
||||
|
||||
@@ -227,7 +227,7 @@ curl http://localhost:11434/api/chat -d '{"model": "mistral"}'
|
||||
|
||||
To preload a model using the CLI, use the command:
|
||||
```shell
|
||||
ollama run llama3 ""
|
||||
ollama run llama3.1 ""
|
||||
```
|
||||
|
||||
## How do I keep a model loaded in memory or make it unload immediately?
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Ollama Model File
|
||||
|
||||
> Note: `Modelfile` syntax is in development
|
||||
> [!NOTE]
|
||||
> `Modelfile` syntax is in development
|
||||
|
||||
A model file is the blueprint to create and share models with Ollama.
|
||||
|
||||
@@ -140,6 +141,7 @@ PARAMETER <parameter> <parametervalue>
|
||||
| num_predict | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context) | int | num_predict 42 |
|
||||
| top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 |
|
||||
| top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 |
|
||||
| min_p | Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0) | float | min_p 0.05 |
|
||||
|
||||
### TEMPLATE
|
||||
|
||||
|
||||
@@ -78,8 +78,8 @@ curl http://localhost:11434/v1/chat/completions \
|
||||
- [x] Streaming
|
||||
- [x] JSON mode
|
||||
- [x] Reproducible outputs
|
||||
- [x] Tools (streaming support coming soon)
|
||||
- [ ] Vision
|
||||
- [ ] Function calling
|
||||
- [ ] Logprobs
|
||||
|
||||
#### Supported request fields
|
||||
@@ -97,9 +97,9 @@ curl http://localhost:11434/v1/chat/completions \
|
||||
- [x] `temperature`
|
||||
- [x] `top_p`
|
||||
- [x] `max_tokens`
|
||||
- [ ] `logit_bias`
|
||||
- [ ] `tools`
|
||||
- [x] `tools`
|
||||
- [ ] `tool_choice`
|
||||
- [ ] `logit_bias`
|
||||
- [ ] `user`
|
||||
- [ ] `n`
|
||||
|
||||
|
||||
173
docs/template.md
Normal file
173
docs/template.md
Normal file
@@ -0,0 +1,173 @@
|
||||
# Template
|
||||
|
||||
Ollama provides a powerful templating engine backed by Go's built-in templating engine to construct prompts for your large language model. This feature is a valuable tool to get the most out of your models.
|
||||
|
||||
## Basic Template Structure
|
||||
|
||||
A basic Go template consists of three main parts:
|
||||
|
||||
* **Layout**: The overall structure of the template.
|
||||
* **Variables**: Placeholders for dynamic data that will be replaced with actual values when the template is rendered.
|
||||
* **Functions**: Custom functions or logic that can be used to manipulate the template's content.
|
||||
|
||||
Here's an example of a simple chat template:
|
||||
|
||||
```gotmpl
|
||||
{{- range .Messages }}
|
||||
{{ .Role }}: {{ .Content }}
|
||||
{{- end }}
|
||||
```
|
||||
|
||||
In this example, we have:
|
||||
|
||||
* A basic messages structure (layout)
|
||||
* Three variables: `Messages`, `Role`, and `Content` (variables)
|
||||
* A custom function (action) that iterates over an array of items (`range .Messages`) and displays each item
|
||||
|
||||
## Adding templates to your model
|
||||
|
||||
By default, models imported into Ollama have a default template of `{{ .Prompt }}`, i.e. user inputs are sent verbatim to the LLM. This is appropriate for text or code completion models but lacks essential markers for chat or instruction models.
|
||||
|
||||
Omitting a template in these models puts the responsibility of correctly templating input onto the user. Adding a template allows users to easily get the best results from the model.
|
||||
|
||||
To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3.
|
||||
|
||||
```dockerfile
|
||||
FROM llama3
|
||||
|
||||
TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>
|
||||
|
||||
{{ .System }}<|eot_id|>
|
||||
{{- end }}
|
||||
{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
|
||||
|
||||
{{ .Content }}<|eot_id|>
|
||||
{{- end }}<|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
"""
|
||||
```
|
||||
|
||||
## Variables
|
||||
|
||||
`System` (string): system prompt
|
||||
|
||||
`Prompt` (string): user prompt
|
||||
|
||||
`Response` (string): assistant response
|
||||
|
||||
`Suffix` (string): text inserted after the assistant's response
|
||||
|
||||
`Messages` (list): list of messages
|
||||
|
||||
`Messages[].Role` (string): role which can be one of `system`, `user`, `assistant`, or `tool`
|
||||
|
||||
`Messages[].Content` (string): message content
|
||||
|
||||
`Messages[].ToolCalls` (list): list of tools the model wants to call
|
||||
|
||||
`Messages[].ToolCalls[].Function` (object): function to call
|
||||
|
||||
`Messages[].ToolCalls[].Function.Name` (string): function name
|
||||
|
||||
`Messages[].ToolCalls[].Function.Arguments` (map): mapping of argument name to argument value
|
||||
|
||||
`Tools` (list): list of tools the model can access
|
||||
|
||||
`Tools[].Type` (string): schema type. `type` is always `function`
|
||||
|
||||
`Tools[].Function` (object): function definition
|
||||
|
||||
`Tools[].Function.Name` (string): function name
|
||||
|
||||
`Tools[].Function.Description` (string): function description
|
||||
|
||||
`Tools[].Function.Parameters` (object): function parameters
|
||||
|
||||
`Tools[].Function.Parameters.Type` (string): schema type. `type` is always `object`
|
||||
|
||||
`Tools[].Function.Parameters.Required` (list): list of required properties
|
||||
|
||||
`Tools[].Function.Parameters.Properties` (map): mapping of property name to property definition
|
||||
|
||||
`Tools[].Function.Parameters.Properties[].Type` (string): property type
|
||||
|
||||
`Tools[].Function.Parameters.Properties[].Description` (string): property description
|
||||
|
||||
`Tools[].Function.Parameters.Properties[].Enum` (list): list of valid values
|
||||
|
||||
## Tips and Best Practices
|
||||
|
||||
Keep the following tips and best practices in mind when working with Go templates:
|
||||
|
||||
* **Be mindful of dot**: Control flow structures like `range` and `with` changes the value `.`
|
||||
* **Out-of-scope variables**: Use `$.` to reference variables not currently in scope, starting from the root
|
||||
* **Whitespace control**: Use `-` to trim leading (`{{-`) and trailing (`-}}`) whitespace
|
||||
|
||||
## Examples
|
||||
|
||||
### Example Messages
|
||||
|
||||
#### ChatML
|
||||
|
||||
ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
|
||||
|
||||
```gotmpl
|
||||
{{- if .System }}<|im_start|>system
|
||||
{{ .System }}<|im_end|>
|
||||
{{ end }}
|
||||
{{- range .Messages }}<|im_start|>{{ .Role }}
|
||||
{{ .Content }}<|im_end|>
|
||||
{{ end }}<|im_start|>assistant
|
||||
{{ else }}
|
||||
{{ if .System }}<|im_start|>system
|
||||
{{ .System }}<|im_end|>
|
||||
```
|
||||
|
||||
### Example Tools
|
||||
|
||||
Tools support can be added to a model by adding a `{{ .Tools }}` node to the template. This feature is useful for models trained to call external tools and can a powerful tool for retrieving real-time data or performing complex tasks.
|
||||
|
||||
#### Mistral
|
||||
|
||||
Mistral v0.3 and Mixtral 8x22B supports tool calling.
|
||||
|
||||
```gotmpl
|
||||
{{- range $index, $_ := .Messages }}
|
||||
{{- if eq .Role "user" }}
|
||||
{{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS] {{ json $.Tools }}[/AVAILABLE_TOOLS]
|
||||
{{- end }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}
|
||||
|
||||
{{ end }}{{ .Content }}[/INST]
|
||||
{{- else if eq .Role "assistant" }}
|
||||
{{- if .Content }} {{ .Content }}</s>
|
||||
{{- else if .ToolCalls }}[TOOL_CALLS] [
|
||||
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}}
|
||||
{{- end }}]</s>
|
||||
{{- end }}
|
||||
{{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}}[/TOOL_RESULTS]
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
```
|
||||
|
||||
### Example Fill-in-Middle
|
||||
|
||||
Fill-in-middle support can be added to a model by adding a `{{ .Suffix }}` node to the template. This feature is useful for models that are trained to generate text in the middle of user input, such as code completion models.
|
||||
|
||||
#### CodeLlama
|
||||
|
||||
CodeLlama [7B](https://ollama.com/library/codellama:7b-code) and [13B](https://ollama.com/library/codellama:13b-code) code completion models support fill-in-middle.
|
||||
|
||||
```gotmpl
|
||||
<PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> CodeLlama 34B and 70B code completion and all instruct and Python fine-tuned models do not support fill-in-middle.
|
||||
|
||||
#### Codestral
|
||||
|
||||
Codestral [22B](https://ollama.com/library/codestral:22b) supports fill-in-middle.
|
||||
|
||||
```gotmpl
|
||||
[SUFFIX]{{ .Suffix }}[PREFIX] {{ .Prompt }}
|
||||
```
|
||||
@@ -15,7 +15,7 @@ import { Ollama } from "@langchain/community/llms/ollama";
|
||||
|
||||
const ollama = new Ollama({
|
||||
baseUrl: "http://localhost:11434",
|
||||
model: "llama3",
|
||||
model: "llama3.1",
|
||||
});
|
||||
|
||||
const answer = await ollama.invoke(`why is the sky blue?`);
|
||||
@@ -23,7 +23,7 @@ const answer = await ollama.invoke(`why is the sky blue?`);
|
||||
console.log(answer);
|
||||
```
|
||||
|
||||
That will get us the same thing as if we ran `ollama run llama3 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
|
||||
That will get us the same thing as if we ran `ollama run llama3.1 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
|
||||
|
||||
```bash
|
||||
npm install cheerio
|
||||
|
||||
2
go.mod
2
go.mod
@@ -25,6 +25,7 @@ require (
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/Masterminds/semver/v3 v3.2.1 // indirect
|
||||
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect
|
||||
github.com/bytedance/sonic/loader v0.1.1 // indirect
|
||||
github.com/chewxy/hm v1.0.0 // indirect
|
||||
@@ -40,6 +41,7 @@ require (
|
||||
github.com/rivo/uniseg v0.2.0 // indirect
|
||||
github.com/xtgo/set v1.0.0 // indirect
|
||||
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect
|
||||
golang.org/x/mod v0.19.0 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
|
||||
gonum.org/v1/gonum v0.15.0 // indirect
|
||||
gorgonia.org/vecf32 v0.9.0 // indirect
|
||||
|
||||
4
go.sum
4
go.sum
@@ -4,6 +4,8 @@ dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7
|
||||
gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
|
||||
github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0=
|
||||
github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
|
||||
github.com/agnivade/levenshtein v1.1.1 h1:QY8M92nrzkmr798gCo3kmMyqXFzdQVpxLlGPRBij0P8=
|
||||
github.com/agnivade/levenshtein v1.1.1/go.mod h1:veldBMzWxcCG2ZvUTKD2kJNRdCk5hVbJomOvKkmgYbo=
|
||||
github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
|
||||
@@ -240,6 +242,8 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB
|
||||
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.19.0 h1:fEdghXQSo20giMthA7cd28ZC+jts4amQ3YMXiP5oMQ8=
|
||||
golang.org/x/mod v0.19.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
|
||||
@@ -4,12 +4,45 @@ package integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func floatsEqual32(a, b float32) bool {
|
||||
return math.Abs(float64(a-b)) <= 1e-4
|
||||
}
|
||||
|
||||
func floatsEqual64(a, b float64) bool {
|
||||
return math.Abs(a-b) <= 1e-4
|
||||
}
|
||||
|
||||
func TestAllMiniLMEmbeddings(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
req := api.EmbeddingRequest{
|
||||
Model: "all-minilm",
|
||||
Prompt: "why is the sky blue?",
|
||||
}
|
||||
|
||||
res, err := embeddingTestHelper(ctx, t, req)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("error: %v", err)
|
||||
}
|
||||
|
||||
if len(res.Embedding) != 384 {
|
||||
t.Fatalf("expected 384 floats, got %d", len(res.Embedding))
|
||||
}
|
||||
|
||||
if !floatsEqual64(res.Embedding[0], 0.06642947345972061) {
|
||||
t.Fatalf("expected 0.06642947345972061, got %.16f", res.Embedding[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestAllMiniLMEmbed(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
@@ -33,8 +66,8 @@ func TestAllMiniLMEmbed(t *testing.T) {
|
||||
t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
|
||||
}
|
||||
|
||||
if res.Embeddings[0][0] != 0.010071031 {
|
||||
t.Fatalf("expected 0.010071031, got %f", res.Embeddings[0][0])
|
||||
if !floatsEqual32(res.Embeddings[0][0], 0.010071031) {
|
||||
t.Fatalf("expected 0.010071031, got %.8f", res.Embeddings[0][0])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,12 +94,12 @@ func TestAllMiniLMBatchEmbed(t *testing.T) {
|
||||
t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
|
||||
}
|
||||
|
||||
if res.Embeddings[0][0] != 0.010071031 || res.Embeddings[1][0] != -0.009802706 {
|
||||
t.Fatalf("expected 0.010071031 and -0.009802706, got %f and %f", res.Embeddings[0][0], res.Embeddings[1][0])
|
||||
if !floatsEqual32(res.Embeddings[0][0], 0.010071031) || !floatsEqual32(res.Embeddings[1][0], -0.009802706) {
|
||||
t.Fatalf("expected 0.010071031 and -0.009802706, got %.8f and %.8f", res.Embeddings[0][0], res.Embeddings[1][0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestAllMiniLmEmbedTruncate(t *testing.T) {
|
||||
func TestAllMiniLMEmbedTruncate(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
@@ -135,6 +168,22 @@ func TestAllMiniLmEmbedTruncate(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func embeddingTestHelper(ctx context.Context, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) {
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatalf("failed to pull model %s: %v", req.Model, err)
|
||||
}
|
||||
|
||||
response, err := client.Embeddings(ctx, &req)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
||||
func embedTestHelper(ctx context.Context, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) {
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
4
llm/ext_server/server.cpp
vendored
4
llm/ext_server/server.cpp
vendored
@@ -41,6 +41,7 @@
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#include <errhandlingapi.h>
|
||||
#endif
|
||||
|
||||
#include <cstddef>
|
||||
@@ -2737,6 +2738,9 @@ int wmain(int argc, wchar_t **wargv) {
|
||||
for (int i = 0; i < argc; ++i) {
|
||||
argv[i] = wchar_to_char(wargv[i]);
|
||||
}
|
||||
|
||||
// Adjust error mode to avoid error dialog after we start.
|
||||
SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||
#else
|
||||
int main(int argc, char **argv) {
|
||||
#endif
|
||||
|
||||
@@ -2,7 +2,10 @@ package llm
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
//go:embed build/darwin/x86_64/*/bin/*
|
||||
var libEmbed embed.FS
|
||||
|
||||
var LlamaServerSysProcAttr = &syscall.SysProcAttr{}
|
||||
|
||||
@@ -2,7 +2,10 @@ package llm
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
//go:embed build/darwin/arm64/*/bin/*
|
||||
var libEmbed embed.FS
|
||||
|
||||
var LlamaServerSysProcAttr = &syscall.SysProcAttr{}
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
package llm
|
||||
|
||||
import "embed"
|
||||
import (
|
||||
"embed"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
//go:embed build/linux/*/*/bin/*
|
||||
var libEmbed embed.FS
|
||||
|
||||
var LlamaServerSysProcAttr = &syscall.SysProcAttr{}
|
||||
|
||||
@@ -1,6 +1,20 @@
|
||||
package llm
|
||||
|
||||
import "embed"
|
||||
import (
|
||||
"embed"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// unused on windows
|
||||
var libEmbed embed.FS
|
||||
|
||||
const CREATE_DEFAULT_ERROR_MODE = 0x04000000
|
||||
|
||||
var LlamaServerSysProcAttr = &syscall.SysProcAttr{
|
||||
// Wire up the default error handling logic If for some reason a DLL is
|
||||
// missing in the path this will pop up a GUI Dialog explaining the fault so
|
||||
// the user can either fix their PATH, or report a bug. Without this
|
||||
// setting, the process exits immediately with a generic exit status but no
|
||||
// way to (easily) figure out what the actual missing DLL was.
|
||||
CreationFlags: CREATE_DEFAULT_ERROR_MODE,
|
||||
}
|
||||
|
||||
70
llm/patches/10-llama3-rope.diff
Normal file
70
llm/patches/10-llama3-rope.diff
Normal file
@@ -0,0 +1,70 @@
|
||||
From 2f872f294fb6f5c6e8f983b68c40ea656053dd92 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Yang <mxyng@pm.me>
|
||||
Date: Tue, 23 Jul 2024 14:33:29 -0700
|
||||
Subject: [PATCH] llama 3.1 rope scaling
|
||||
|
||||
---
|
||||
src/llama.cpp | 14 ++++++++++++--
|
||||
1 file changed, 12 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/llama.cpp b/src/llama.cpp
|
||||
index 8fe51971..a9969df8 100644
|
||||
--- a/src/llama.cpp
|
||||
+++ b/src/llama.cpp
|
||||
@@ -2472,6 +2472,7 @@ struct llama_layer {
|
||||
// long rope factors
|
||||
struct ggml_tensor * rope_long = nullptr;
|
||||
struct ggml_tensor * rope_short = nullptr;
|
||||
+ struct ggml_tensor * rope_freqs = nullptr;
|
||||
|
||||
// bitnet scale
|
||||
struct ggml_tensor * wq_scale;
|
||||
@@ -6143,6 +6144,8 @@ static bool llm_load_tensors(
|
||||
|
||||
layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd});
|
||||
|
||||
+ layer.rope_freqs = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ROPE_FREQS, "weight"), { n_embd/n_head/2 }, llama_model_loader::TENSOR_NOT_REQUIRED | (i != 0 ? llama_model_loader::TENSOR_DUPLICATED : 0));
|
||||
+
|
||||
if (n_expert == 0) {
|
||||
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
|
||||
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd});
|
||||
@@ -8620,6 +8623,10 @@ struct llm_build_context {
|
||||
// choose long/short freq factors based on the context size
|
||||
const auto n_ctx_pre_seq = cparams.n_ctx / cparams.n_seq_max;
|
||||
|
||||
+ if (model.layers[il].rope_freqs != nullptr) {
|
||||
+ return model.layers[il].rope_freqs;
|
||||
+ }
|
||||
+
|
||||
if (n_ctx_pre_seq > hparams.n_ctx_orig_yarn) {
|
||||
return model.layers[il].rope_long;
|
||||
}
|
||||
@@ -8814,6 +8821,9 @@ struct llm_build_context {
|
||||
|
||||
// self-attention
|
||||
{
|
||||
+ // rope freq factors for llama3; may return nullptr for llama2 and other models
|
||||
+ struct ggml_tensor * rope_factors = build_rope_factors(il);
|
||||
+
|
||||
// compute Q and K and RoPE them
|
||||
struct ggml_tensor * Qcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wq, cur);
|
||||
cb(Qcur, "Qcur", il);
|
||||
@@ -8837,14 +8847,14 @@ struct llm_build_context {
|
||||
}
|
||||
|
||||
Qcur = ggml_rope_ext(
|
||||
- ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, nullptr,
|
||||
+ ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, rope_factors,
|
||||
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
|
||||
ext_factor, attn_factor, beta_fast, beta_slow
|
||||
);
|
||||
cb(Qcur, "Qcur", il);
|
||||
|
||||
Kcur = ggml_rope_ext(
|
||||
- ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos, nullptr,
|
||||
+ ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos, rope_factors,
|
||||
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
|
||||
ext_factor, attn_factor, beta_fast, beta_slow
|
||||
);
|
||||
--
|
||||
2.45.2
|
||||
@@ -346,6 +346,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
s.cmd.Env = os.Environ()
|
||||
s.cmd.Stdout = os.Stdout
|
||||
s.cmd.Stderr = s.status
|
||||
s.cmd.SysProcAttr = LlamaServerSysProcAttr
|
||||
|
||||
envWorkarounds := [][2]string{}
|
||||
for _, gpu := range gpus {
|
||||
@@ -726,6 +727,7 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
||||
"temperature": req.Options.Temperature,
|
||||
"top_k": req.Options.TopK,
|
||||
"top_p": req.Options.TopP,
|
||||
"min_p": req.Options.MinP,
|
||||
"tfs_z": req.Options.TFSZ,
|
||||
"typical_p": req.Options.TypicalP,
|
||||
"repeat_last_n": req.Options.RepeatLastN,
|
||||
|
||||
@@ -19,7 +19,7 @@ export default function () {
|
||||
const [step, setStep] = useState<Step>(Step.WELCOME)
|
||||
const [commandCopied, setCommandCopied] = useState<boolean>(false)
|
||||
|
||||
const command = 'ollama run llama3'
|
||||
const command = 'ollama run llama3.1'
|
||||
|
||||
return (
|
||||
<div className='drag'>
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/mod/semver"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
@@ -41,6 +42,8 @@ func (c Command) String() string {
|
||||
case "message":
|
||||
role, message, _ := strings.Cut(c.Args, ": ")
|
||||
fmt.Fprintf(&sb, "MESSAGE %s %s", role, quote(message))
|
||||
case "ollama":
|
||||
fmt.Fprintf(&sb, "OLLAMA %s", c.Args)
|
||||
default:
|
||||
fmt.Fprintf(&sb, "PARAMETER %s %s", c.Name, quote(c.Args))
|
||||
}
|
||||
@@ -57,12 +60,14 @@ const (
|
||||
stateParameter
|
||||
stateMessage
|
||||
stateComment
|
||||
stateVersion
|
||||
)
|
||||
|
||||
var (
|
||||
errMissingFrom = errors.New("no FROM line")
|
||||
errInvalidMessageRole = errors.New("message role must be one of \"system\", \"user\", or \"assistant\"")
|
||||
errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
|
||||
errInvalidVersion = errors.New("invalid OLLAMA version")
|
||||
)
|
||||
|
||||
func ParseFile(r io.Reader) (*File, error) {
|
||||
@@ -109,6 +114,9 @@ func ParseFile(r io.Reader) (*File, error) {
|
||||
case "message":
|
||||
// transition to stateMessage which validates the message role
|
||||
next = stateMessage
|
||||
cmd.Name = s
|
||||
case "ollama":
|
||||
next = stateVersion
|
||||
fallthrough
|
||||
default:
|
||||
cmd.Name = s
|
||||
@@ -123,6 +131,29 @@ func ParseFile(r io.Reader) (*File, error) {
|
||||
role = b.String()
|
||||
case stateComment, stateNil:
|
||||
// pass
|
||||
case stateVersion:
|
||||
s, ok := unquote(strings.TrimSpace(b.String()))
|
||||
if !ok {
|
||||
if _, err := b.WriteRune(r); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
continue
|
||||
} else if isSpace(r) {
|
||||
return nil, errInvalidVersion
|
||||
}
|
||||
|
||||
if s[0] != 'v' {
|
||||
s = "v" + s
|
||||
}
|
||||
|
||||
if !semver.IsValid(s) {
|
||||
return nil, errInvalidVersion
|
||||
}
|
||||
|
||||
cmd.Args = semver.Canonical(s)
|
||||
f.Commands = append(f.Commands, cmd)
|
||||
|
||||
case stateValue:
|
||||
s, ok := unquote(strings.TrimSpace(b.String()))
|
||||
if !ok || isSpace(r) {
|
||||
@@ -157,6 +188,22 @@ func ParseFile(r io.Reader) (*File, error) {
|
||||
switch curr {
|
||||
case stateComment, stateNil:
|
||||
// pass; nothing to flush
|
||||
case stateVersion:
|
||||
s, ok := unquote(strings.TrimSpace(b.String()))
|
||||
if !ok {
|
||||
return nil, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
if s[0] != 'v' {
|
||||
s = "v" + s
|
||||
}
|
||||
|
||||
if !semver.IsValid(s) {
|
||||
return nil, errInvalidVersion
|
||||
}
|
||||
|
||||
cmd.Args = semver.Canonical(s)
|
||||
f.Commands = append(f.Commands, cmd)
|
||||
case stateValue:
|
||||
s, ok := unquote(strings.TrimSpace(b.String()))
|
||||
if !ok {
|
||||
@@ -236,6 +283,15 @@ func parseRuneForState(r rune, cs state) (state, rune, error) {
|
||||
default:
|
||||
return stateComment, 0, nil
|
||||
}
|
||||
case stateVersion:
|
||||
switch {
|
||||
case isNewline(r), isSpace(r):
|
||||
return stateNil, 0, nil
|
||||
case isAlpha(r), isNumber(r), r == '.', r == '+', r == '-':
|
||||
return stateVersion, r, nil
|
||||
default:
|
||||
return stateNil, r, nil
|
||||
}
|
||||
default:
|
||||
return stateNil, 0, errors.New("")
|
||||
}
|
||||
@@ -296,7 +352,7 @@ func isValidMessageRole(role string) bool {
|
||||
|
||||
func isValidCommand(cmd string) bool {
|
||||
switch strings.ToLower(cmd) {
|
||||
case "from", "license", "template", "system", "adapter", "parameter", "message":
|
||||
case "from", "license", "template", "system", "adapter", "parameter", "message", "ollama":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
|
||||
@@ -451,6 +451,7 @@ func TestParseFileParameters(t *testing.T) {
|
||||
"num_predict 1": {"num_predict", "1"},
|
||||
"top_k 1": {"top_k", "1"},
|
||||
"top_p 1.0": {"top_p", "1.0"},
|
||||
"min_p 0.05": {"min_p", "0.05"},
|
||||
"tfs_z 1.0": {"tfs_z", "1.0"},
|
||||
"typical_p 1.0": {"typical_p", "1.0"},
|
||||
"repeat_last_n 1": {"repeat_last_n", "1"},
|
||||
|
||||
@@ -67,7 +67,7 @@ func getAuthorizationToken(ctx context.Context, challenge registryChallenge) (st
|
||||
|
||||
headers.Add("Authorization", signature)
|
||||
|
||||
response, err := makeRequest(ctx, http.MethodGet, redirectURL, headers, nil, nil)
|
||||
response, err := makeRequest(ctx, http.MethodGet, redirectURL, headers, nil, ®istryOptions{})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func localCopy(src, target string) error {
|
||||
dirPath := filepath.Dir(target)
|
||||
|
||||
if err := os.MkdirAll(dirPath, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err := unix.Clonefile(src, target, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
package server
|
||||
|
||||
import "errors"
|
||||
|
||||
func localCopy(src, target string) error {
|
||||
return errors.New("no local copy implementation for linux")
|
||||
}
|
||||
@@ -1,67 +0,0 @@
|
||||
//go:build windows
|
||||
// +build windows
|
||||
|
||||
package server
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func localCopy(src, target string) error {
|
||||
// Create target directory if it doesn't exist
|
||||
dirPath := filepath.Dir(target)
|
||||
if err := os.MkdirAll(dirPath, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Open source file
|
||||
sourceFile, err := os.Open(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sourceFile.Close()
|
||||
|
||||
// Create target file
|
||||
targetFile, err := os.Create(target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer targetFile.Close()
|
||||
|
||||
// Use CopyFileExW to copy the file
|
||||
err = copyFileEx(src, target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func copyFileEx(src, dst string) error {
|
||||
kernel32 := syscall.NewLazyDLL("kernel32.dll")
|
||||
copyFileEx := kernel32.NewProc("CopyFileExW")
|
||||
|
||||
srcPtr, err := syscall.UTF16PtrFromString(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dstPtr, err := syscall.UTF16PtrFromString(dst)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
r1, _, err := copyFileEx.Call(
|
||||
uintptr(unsafe.Pointer(srcPtr)),
|
||||
uintptr(unsafe.Pointer(dstPtr)),
|
||||
0, 0, 0, 0)
|
||||
|
||||
if r1 == 0 {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"io"
|
||||
"log/slog"
|
||||
"math"
|
||||
"math/rand/v2"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
@@ -43,17 +44,19 @@ type blobDownload struct {
|
||||
|
||||
context.CancelFunc
|
||||
|
||||
done bool
|
||||
done chan struct{}
|
||||
err error
|
||||
references atomic.Int32
|
||||
}
|
||||
|
||||
type blobDownloadPart struct {
|
||||
N int
|
||||
Offset int64
|
||||
Size int64
|
||||
Completed int64
|
||||
lastUpdated time.Time
|
||||
N int
|
||||
Offset int64
|
||||
Size int64
|
||||
Completed atomic.Int64
|
||||
|
||||
lastUpdatedMu sync.Mutex
|
||||
lastUpdated time.Time
|
||||
|
||||
*blobDownload `json:"-"`
|
||||
}
|
||||
@@ -71,7 +74,7 @@ func (p *blobDownloadPart) Name() string {
|
||||
}
|
||||
|
||||
func (p *blobDownloadPart) StartsAt() int64 {
|
||||
return p.Offset + p.Completed
|
||||
return p.Offset + p.Completed.Load()
|
||||
}
|
||||
|
||||
func (p *blobDownloadPart) StopsAt() int64 {
|
||||
@@ -81,7 +84,9 @@ func (p *blobDownloadPart) StopsAt() int64 {
|
||||
func (p *blobDownloadPart) Write(b []byte) (n int, err error) {
|
||||
n = len(b)
|
||||
p.blobDownload.Completed.Add(int64(n))
|
||||
p.lastUpdatedMu.Lock()
|
||||
p.lastUpdated = time.Now()
|
||||
p.lastUpdatedMu.Unlock()
|
||||
return n, nil
|
||||
}
|
||||
|
||||
@@ -91,6 +96,8 @@ func (b *blobDownload) Prepare(ctx context.Context, requestURL *url.URL, opts *r
|
||||
return err
|
||||
}
|
||||
|
||||
b.done = make(chan struct{})
|
||||
|
||||
for _, partFilePath := range partFilePaths {
|
||||
part, err := b.readPart(partFilePath)
|
||||
if err != nil {
|
||||
@@ -98,7 +105,7 @@ func (b *blobDownload) Prepare(ctx context.Context, requestURL *url.URL, opts *r
|
||||
}
|
||||
|
||||
b.Total += part.Size
|
||||
b.Completed.Add(part.Completed)
|
||||
b.Completed.Add(part.Completed.Load())
|
||||
b.Parts = append(b.Parts, part)
|
||||
}
|
||||
|
||||
@@ -138,9 +145,36 @@ func (b *blobDownload) Prepare(ctx context.Context, requestURL *url.URL, opts *r
|
||||
}
|
||||
|
||||
func (b *blobDownload) Run(ctx context.Context, requestURL *url.URL, opts *registryOptions) {
|
||||
defer close(b.done)
|
||||
b.err = b.run(ctx, requestURL, opts)
|
||||
}
|
||||
|
||||
func newBackoff(maxBackoff time.Duration) func(ctx context.Context) error {
|
||||
var n int
|
||||
return func(ctx context.Context) error {
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
n++
|
||||
|
||||
// n^2 backoff timer is a little smoother than the
|
||||
// common choice of 2^n.
|
||||
d := min(time.Duration(n*n)*10*time.Millisecond, maxBackoff)
|
||||
// Randomize the delay between 0.5-1.5 x msec, in order
|
||||
// to prevent accidental "thundering herd" problems.
|
||||
d = time.Duration(float64(d) * (rand.Float64() + 0.5))
|
||||
t := time.NewTimer(d)
|
||||
defer t.Stop()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-t.C:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *registryOptions) error {
|
||||
defer blobDownloadManager.Delete(b.Digest)
|
||||
ctx, b.CancelFunc = context.WithCancel(ctx)
|
||||
@@ -153,11 +187,57 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
|
||||
|
||||
_ = file.Truncate(b.Total)
|
||||
|
||||
directURL, err := func() (*url.URL, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
backoff := newBackoff(10 * time.Second)
|
||||
for {
|
||||
// shallow clone opts to be used in the closure
|
||||
// without affecting the outer opts.
|
||||
newOpts := new(registryOptions)
|
||||
*newOpts = *opts
|
||||
|
||||
newOpts.CheckRedirect = func(req *http.Request, via []*http.Request) error {
|
||||
if len(via) > 10 {
|
||||
return errors.New("maxium redirects exceeded (10) for directURL")
|
||||
}
|
||||
|
||||
// if the hostname is the same, allow the redirect
|
||||
if req.URL.Hostname() == requestURL.Hostname() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// stop at the first redirect that is not
|
||||
// the same hostname as the original
|
||||
// request.
|
||||
return http.ErrUseLastResponse
|
||||
}
|
||||
|
||||
resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, nil, nil, newOpts)
|
||||
if err != nil {
|
||||
slog.Warn("failed to get direct URL; backing off and retrying", "err", err)
|
||||
if err := backoff(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
continue
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusTemporaryRedirect {
|
||||
return nil, fmt.Errorf("unexpected status code %d", resp.StatusCode)
|
||||
}
|
||||
return resp.Location()
|
||||
}
|
||||
}()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
g, inner := errgroup.WithContext(ctx)
|
||||
g.SetLimit(numDownloadParts)
|
||||
for i := range b.Parts {
|
||||
part := b.Parts[i]
|
||||
if part.Completed == part.Size {
|
||||
if part.Completed.Load() == part.Size {
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -165,7 +245,7 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
|
||||
var err error
|
||||
for try := 0; try < maxRetries; try++ {
|
||||
w := io.NewOffsetWriter(file, part.StartsAt())
|
||||
err = b.downloadChunk(inner, requestURL, w, part, opts)
|
||||
err = b.downloadChunk(inner, directURL, w, part)
|
||||
switch {
|
||||
case errors.Is(err, context.Canceled), errors.Is(err, syscall.ENOSPC):
|
||||
// return immediately if the context is canceled or the device is out of space
|
||||
@@ -206,29 +286,31 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
|
||||
return err
|
||||
}
|
||||
|
||||
b.done = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *blobDownload) downloadChunk(ctx context.Context, requestURL *url.URL, w io.Writer, part *blobDownloadPart, opts *registryOptions) error {
|
||||
func (b *blobDownload) downloadChunk(ctx context.Context, requestURL *url.URL, w io.Writer, part *blobDownloadPart) error {
|
||||
g, ctx := errgroup.WithContext(ctx)
|
||||
g.Go(func() error {
|
||||
headers := make(http.Header)
|
||||
headers.Set("Range", fmt.Sprintf("bytes=%d-%d", part.StartsAt(), part.StopsAt()-1))
|
||||
resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, headers, nil, opts)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, requestURL.String(), nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", part.StartsAt(), part.StopsAt()-1))
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
n, err := io.CopyN(w, io.TeeReader(resp.Body, part), part.Size-part.Completed)
|
||||
n, err := io.CopyN(w, io.TeeReader(resp.Body, part), part.Size-part.Completed.Load())
|
||||
if err != nil && !errors.Is(err, context.Canceled) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
// rollback progress
|
||||
b.Completed.Add(-n)
|
||||
return err
|
||||
}
|
||||
|
||||
part.Completed += n
|
||||
part.Completed.Add(n)
|
||||
if err := b.writePart(part.Name(), part); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -242,15 +324,21 @@ func (b *blobDownload) downloadChunk(ctx context.Context, requestURL *url.URL, w
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
if part.Completed >= part.Size {
|
||||
if part.Completed.Load() >= part.Size {
|
||||
return nil
|
||||
}
|
||||
|
||||
if !part.lastUpdated.IsZero() && time.Since(part.lastUpdated) > 5*time.Second {
|
||||
part.lastUpdatedMu.Lock()
|
||||
lastUpdated := part.lastUpdated
|
||||
part.lastUpdatedMu.Unlock()
|
||||
|
||||
if !lastUpdated.IsZero() && time.Since(lastUpdated) > 5*time.Second {
|
||||
const msg = "%s part %d stalled; retrying. If this persists, press ctrl-c to exit, then 'ollama pull' to find a faster connection."
|
||||
slog.Info(fmt.Sprintf(msg, b.Digest[7:19], part.N))
|
||||
// reset last updated
|
||||
part.lastUpdatedMu.Lock()
|
||||
part.lastUpdated = time.Time{}
|
||||
part.lastUpdatedMu.Unlock()
|
||||
return errPartStalled
|
||||
}
|
||||
case <-ctx.Done():
|
||||
@@ -315,6 +403,8 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse))
|
||||
ticker := time.NewTicker(60 * time.Millisecond)
|
||||
for {
|
||||
select {
|
||||
case <-b.done:
|
||||
return b.err
|
||||
case <-ticker.C:
|
||||
fn(api.ProgressResponse{
|
||||
Status: fmt.Sprintf("pulling %s", b.Digest[7:19]),
|
||||
@@ -322,10 +412,6 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse))
|
||||
Total: b.Total,
|
||||
Completed: b.Completed.Load(),
|
||||
})
|
||||
|
||||
if b.done || b.err != nil {
|
||||
return b.err
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
@@ -32,7 +32,6 @@ import (
|
||||
"github.com/ollama/ollama/types/errtypes"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
"github.com/ollama/ollama/version"
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -55,6 +54,8 @@ type registryOptions struct {
|
||||
Username string
|
||||
Password string
|
||||
Token string
|
||||
|
||||
CheckRedirect func(req *http.Request, via []*http.Request) error
|
||||
}
|
||||
|
||||
type Model struct {
|
||||
@@ -373,6 +374,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
||||
}
|
||||
|
||||
var messages []*api.Message
|
||||
var version string
|
||||
parameters := make(map[string]any)
|
||||
|
||||
var layers []*Layer
|
||||
@@ -383,7 +385,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
||||
case "model", "adapter":
|
||||
var baseLayers []*layerGGML
|
||||
if name := model.ParseName(c.Args); name.IsValid() {
|
||||
baseLayers, err = parseFromModel(ctx, name, fn)
|
||||
baseLayers, version, err = parseFromModel(ctx, name, fn)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -528,6 +530,10 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
||||
}
|
||||
|
||||
messages = append(messages, &api.Message{Role: role, Content: content})
|
||||
case "ollama":
|
||||
if version == "" {
|
||||
version = c.Args
|
||||
}
|
||||
default:
|
||||
ps, err := api.FormatParams(map[string][]string{c.Name: {c.Args}})
|
||||
if err != nil {
|
||||
@@ -641,7 +647,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
||||
old, _ := ParseNamedManifest(name)
|
||||
|
||||
fn(api.ProgressResponse{Status: "writing manifest"})
|
||||
if err := WriteManifest(name, layer, layers); err != nil {
|
||||
if err := WriteManifest(name, version, layer, layers); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -1089,12 +1095,11 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
|
||||
if anonymous {
|
||||
// no user is associated with the public key, and the request requires non-anonymous access
|
||||
pubKey, nestedErr := auth.GetPublicKey()
|
||||
localPubKey := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(pubKey)))
|
||||
if nestedErr != nil {
|
||||
slog.Error(fmt.Sprintf("couldn't get public key: %v", nestedErr))
|
||||
return nil, errUnauthorized
|
||||
}
|
||||
return nil, &errtypes.UnknownOllamaKey{Key: localPubKey}
|
||||
return nil, &errtypes.UnknownOllamaKey{Key: pubKey}
|
||||
}
|
||||
// user is associated with the public key, but is not authorized to make the request
|
||||
return nil, errUnauthorized
|
||||
@@ -1133,7 +1138,9 @@ func makeRequest(ctx context.Context, method string, requestURL *url.URL, header
|
||||
req.ContentLength = contentLength
|
||||
}
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
resp, err := (&http.Client{
|
||||
CheckRedirect: regOpts.CheckRedirect,
|
||||
}).Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ type Manifest struct {
|
||||
MediaType string `json:"mediaType"`
|
||||
Config *Layer `json:"config"`
|
||||
Layers []*Layer `json:"layers"`
|
||||
Ollama string `json:"string,omitempty"`
|
||||
|
||||
filepath string
|
||||
fi os.FileInfo
|
||||
@@ -93,7 +94,7 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
|
||||
return &m, nil
|
||||
}
|
||||
|
||||
func WriteManifest(name model.Name, config *Layer, layers []*Layer) error {
|
||||
func WriteManifest(name model.Name, ollama string, config *Layer, layers []*Layer) error {
|
||||
manifests, err := GetManifestPath()
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -115,6 +116,7 @@ func WriteManifest(name model.Name, config *Layer, layers []*Layer) error {
|
||||
MediaType: "application/vnd.docker.distribution.manifest.v2+json",
|
||||
Config: config,
|
||||
Layers: layers,
|
||||
Ollama: ollama,
|
||||
}
|
||||
|
||||
return json.NewEncoder(f).Encode(m)
|
||||
|
||||
@@ -30,26 +30,27 @@ type layerGGML struct {
|
||||
*llm.GGML
|
||||
}
|
||||
|
||||
func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
||||
func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerGGML, version string, err error) {
|
||||
m, err := ParseNamedManifest(name)
|
||||
switch {
|
||||
case errors.Is(err, os.ErrNotExist):
|
||||
if err := PullModel(ctx, name.String(), ®istryOptions{}, fn); err != nil {
|
||||
return nil, err
|
||||
return nil, version, err
|
||||
}
|
||||
|
||||
m, err = ParseNamedManifest(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, version, err
|
||||
}
|
||||
case err != nil:
|
||||
return nil, err
|
||||
return nil, version, err
|
||||
}
|
||||
|
||||
version = m.Ollama
|
||||
for _, layer := range m.Layers {
|
||||
layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, name.DisplayShortest())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, version, err
|
||||
}
|
||||
|
||||
switch layer.MediaType {
|
||||
@@ -58,18 +59,18 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
|
||||
"application/vnd.ollama.image.adapter":
|
||||
blobpath, err := GetBlobsPath(layer.Digest)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, version, err
|
||||
}
|
||||
|
||||
blob, err := os.Open(blobpath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, version, err
|
||||
}
|
||||
defer blob.Close()
|
||||
|
||||
ggml, _, err := llm.DecodeGGML(blob, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, version, err
|
||||
}
|
||||
|
||||
layers = append(layers, &layerGGML{layer, ggml})
|
||||
@@ -78,7 +79,7 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
|
||||
}
|
||||
}
|
||||
|
||||
return layers, nil
|
||||
return layers, version, nil
|
||||
}
|
||||
|
||||
func extractFromZipFile(p string, file *os.File, fn func(api.ProgressResponse)) error {
|
||||
@@ -263,13 +264,27 @@ func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
|
||||
if t, err := template.Named(s); err != nil {
|
||||
slog.Debug("template detection", "error", err)
|
||||
} else {
|
||||
tmpl, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
|
||||
layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tmpl.status = fmt.Sprintf("using autodetected template %s", t.Name)
|
||||
layers = append(layers, &layerGGML{tmpl, nil})
|
||||
layer.status = fmt.Sprintf("using autodetected template %s", t.Name)
|
||||
layers = append(layers, &layerGGML{layer, nil})
|
||||
|
||||
if t.Parameters != nil {
|
||||
var b bytes.Buffer
|
||||
if err := json.NewEncoder(&b).Encode(t.Parameters); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
layers = append(layers, &layerGGML{layer, nil})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
121
server/routes.go
121
server/routes.go
@@ -4,7 +4,6 @@ import (
|
||||
"bytes"
|
||||
"cmp"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
@@ -24,10 +23,8 @@ import (
|
||||
|
||||
"github.com/gin-contrib/cors"
|
||||
"github.com/gin-gonic/gin"
|
||||
"golang.org/x/crypto/ssh"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/auth"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/gpu"
|
||||
"github.com/ollama/ollama/llm"
|
||||
@@ -612,10 +609,9 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
|
||||
defer cancel()
|
||||
|
||||
quantization := cmp.Or(r.Quantize, r.Quantization)
|
||||
if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); err != nil {
|
||||
if errors.Is(err, errBadTemplate) {
|
||||
ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
|
||||
}
|
||||
if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); errors.Is(err, errBadTemplate) {
|
||||
ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
|
||||
} else if err != nil {
|
||||
ch <- gin.H{"error": err.Error()}
|
||||
}
|
||||
}()
|
||||
@@ -931,6 +927,7 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
_, err = os.Stat(path)
|
||||
switch {
|
||||
case errors.Is(err, os.ErrNotExist):
|
||||
@@ -943,14 +940,6 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if c.GetHeader("X-Ollama-File") != "" && s.isLocal(c) {
|
||||
err = localBlobCopy(c.GetHeader("X-Ollama-File"), path)
|
||||
if err == nil {
|
||||
c.Status(http.StatusCreated)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
layer, err := NewLayer(c.Request.Body, "")
|
||||
if err != nil {
|
||||
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
@@ -965,108 +954,6 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
|
||||
c.Status(http.StatusCreated)
|
||||
}
|
||||
|
||||
func localBlobCopy (src, dest string) error {
|
||||
_, err := os.Stat(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = localCopy(src, dest)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
err = defaultCopy(src, dest)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("failed to copy blob")
|
||||
}
|
||||
|
||||
func (s *Server) isLocal(c *gin.Context) bool {
|
||||
if authz := c.GetHeader("Authorization"); authz != "" {
|
||||
parts := strings.Split(authz, ":")
|
||||
if len(parts) != 3 {
|
||||
return false
|
||||
}
|
||||
|
||||
clientPublicKey, _, _, _, err := ssh.ParseAuthorizedKey([]byte(fmt.Sprintf("ssh-ed25519 %s", parts[0])))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// partialRequestData is formatted as http.Method,http.requestURI,timestamp,nonce
|
||||
requestData, err := base64.StdEncoding.DecodeString(parts[1])
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
partialRequestDataParts := strings.Split(string(requestData), ",")
|
||||
if len(partialRequestDataParts) != 3 {
|
||||
return false
|
||||
}
|
||||
|
||||
signature, err := base64.StdEncoding.DecodeString(parts[2])
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if err := clientPublicKey.Verify(requestData, &ssh.Signature{Format: clientPublicKey.Type(), Blob: signature}); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
serverPublicKey, err := auth.GetPublicKey()
|
||||
if err != nil {
|
||||
slog.Error(fmt.Sprintf("failed to get server public key: %v", err))
|
||||
return false
|
||||
}
|
||||
|
||||
if bytes.Equal(serverPublicKey.Marshal(), clientPublicKey.Marshal()) {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func defaultCopy(path string, dest string) error {
|
||||
// This function should be called if the server is local
|
||||
// It should find the model directory, copy the blob over, and return the digest
|
||||
dirPath := filepath.Dir(dest)
|
||||
|
||||
if err := os.MkdirAll(dirPath, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy blob over
|
||||
sourceFile, err := os.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not open source file: %v", err)
|
||||
}
|
||||
defer sourceFile.Close()
|
||||
|
||||
destFile, err := os.Create(dest)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not create destination file: %v", err)
|
||||
}
|
||||
defer destFile.Close()
|
||||
|
||||
_, err = io.CopyBuffer(destFile, sourceFile, make([]byte, 4*1024*1024))
|
||||
if err != nil {
|
||||
return fmt.Errorf("error copying file: %v", err)
|
||||
}
|
||||
|
||||
err = destFile.Sync()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error flushing file: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func isLocalIP(ip netip.Addr) bool {
|
||||
if interfaces, err := net.Interfaces(); err == nil {
|
||||
for _, iface := range interfaces {
|
||||
|
||||
@@ -498,7 +498,7 @@ func TestCreateTemplateSystem(t *testing.T) {
|
||||
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
})
|
||||
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected status code 400, actual %d", w.Code)
|
||||
}
|
||||
@@ -510,7 +510,7 @@ func TestCreateTemplateSystem(t *testing.T) {
|
||||
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ if .Prompt }}", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
})
|
||||
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected status code 400, actual %d", w.Code)
|
||||
}
|
||||
@@ -522,7 +522,7 @@ func TestCreateTemplateSystem(t *testing.T) {
|
||||
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ Prompt }}", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
})
|
||||
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected status code 400, actual %d", w.Code)
|
||||
}
|
||||
@@ -599,9 +599,10 @@ func TestCreateDetectTemplate(t *testing.T) {
|
||||
}
|
||||
|
||||
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
|
||||
filepath.Join(p, "blobs", "sha256-0d79f567714c62c048378f2107fb332dabee0135d080c302d884317da9433cc5"),
|
||||
filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
|
||||
filepath.Join(p, "blobs", "sha256-c608dc615584cd20d9d830363dabf8a4783ae5d34245c3d8c115edb3bc7b28e4"),
|
||||
filepath.Join(p, "blobs", "sha256-f836ee110db21567f826332e4cedd746c06d10664fd5a9ea3659e3683a944510"),
|
||||
filepath.Join(p, "blobs", "sha256-ea34c57ba5b78b740aafe2aeb74dc6507fc3ad14170b64c26a04fb9e36c88d75"),
|
||||
})
|
||||
})
|
||||
|
||||
@@ -622,3 +623,119 @@ func TestCreateDetectTemplate(t *testing.T) {
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func TestCreateVersion(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
p := t.TempDir()
|
||||
t.Setenv("OLLAMA_MODELS", p)
|
||||
envconfig.LoadConfig()
|
||||
var s Server
|
||||
|
||||
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
Name: "test",
|
||||
Modelfile: fmt.Sprintf("FROM %s\nOLLAMA 0.2.3\nLICENSE MIT\nLICENSE Apache-2.0", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
})
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status code 200, actual %d", w.Code)
|
||||
}
|
||||
|
||||
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
|
||||
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
|
||||
})
|
||||
|
||||
f, err := os.Open(filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
bts := json.NewDecoder(f)
|
||||
|
||||
var m Manifest
|
||||
if err := bts.Decode(&m); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if m.Ollama != "v0.2.3" {
|
||||
t.Errorf("got %s != want v0.2.3", m.Ollama)
|
||||
}
|
||||
|
||||
t.Run("no version", func(t *testing.T) {
|
||||
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
Name: "noversion",
|
||||
Modelfile: fmt.Sprintf("FROM %s\nLICENSE MIT\nLICENSE Apache-2.0", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
})
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status code 200, actual %d", w.Code)
|
||||
}
|
||||
|
||||
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "noversion", "*"), []string{
|
||||
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "noversion", "latest"),
|
||||
})
|
||||
|
||||
f, err := os.Open(filepath.Join(p, "manifests", "registry.ollama.ai", "library", "noversion", "latest"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
bts := json.NewDecoder(f)
|
||||
var m Manifest
|
||||
if err := bts.Decode(&m); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if m.Ollama != "" {
|
||||
t.Errorf("got %s != want empty", m.Ollama)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("invalid version", func(t *testing.T) {
|
||||
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
Name: "invalid",
|
||||
Modelfile: fmt.Sprintf("FROM %s\nOLLAMA 0..400", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
})
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected status code 400, actual %d", w.Code)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("from valid version", func(t *testing.T) {
|
||||
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
|
||||
Name: "fromvalid",
|
||||
Modelfile: "FROM test",
|
||||
Stream: &stream,
|
||||
})
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status code 200, actual %d", w.Code)
|
||||
}
|
||||
|
||||
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "fromvalid", "*"), []string{
|
||||
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "fromvalid", "latest"),
|
||||
})
|
||||
|
||||
f, err := os.Open(filepath.Join(p, "manifests", "registry.ollama.ai", "library", "fromvalid", "latest"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
bts := json.NewDecoder(f)
|
||||
|
||||
var m Manifest
|
||||
if err := bts.Decode(&m); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if m.Ollama != "v0.2.3" {
|
||||
t.Errorf("got %s != want v0.2.3", m.Ollama)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -99,7 +99,7 @@ func TestDeleteDuplicateLayers(t *testing.T) {
|
||||
}
|
||||
|
||||
// create a manifest with duplicate layers
|
||||
if err := WriteManifest(n, config, []*Layer{config}); err != nil {
|
||||
if err := WriteManifest(n, "", config, []*Layer{config}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
|
||||
@@ -10,18 +10,15 @@ import (
|
||||
"math"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/auth"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/openai"
|
||||
@@ -530,64 +527,3 @@ func TestNormalize(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsLocalReal(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
clientPubLoc := t.TempDir()
|
||||
t.Setenv("HOME", clientPubLoc)
|
||||
t.Setenv("USERPROFILE", clientPubLoc)
|
||||
|
||||
_, err := auth.GetPublicKey()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
ctx, _ := gin.CreateTestContext(w)
|
||||
ctx.Request = &http.Request{
|
||||
Header: make(http.Header),
|
||||
}
|
||||
|
||||
requestURL := url.URL{
|
||||
Scheme: "http",
|
||||
Host: "localhost:8080",
|
||||
Path: "/api/blobs",
|
||||
}
|
||||
request := &http.Request{
|
||||
Method: http.MethodPost,
|
||||
URL: &requestURL,
|
||||
}
|
||||
s := &Server{}
|
||||
|
||||
authz, err := api.Authorization(ctx, request)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Set client authorization header
|
||||
ctx.Request.Header.Set("Authorization", authz)
|
||||
if !s.isLocal(ctx) {
|
||||
t.Fatal("Expected isLocal to return true")
|
||||
}
|
||||
|
||||
t.Run("different server pubkey", func(t *testing.T) {
|
||||
serverPubLoc := t.TempDir()
|
||||
t.Setenv("HOME", serverPubLoc)
|
||||
t.Setenv("USERPROFILE", serverPubLoc)
|
||||
_, err := auth.GetPublicKey()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if s.isLocal(ctx) {
|
||||
t.Fatal("Expected isLocal to return false")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("invalid pubkey", func(t *testing.T) {
|
||||
ctx.Request.Header.Set("Authorization", "sha-25616:invalid")
|
||||
if s.isLocal(ctx) {
|
||||
t.Fatal("Expected isLocal to return false")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -254,7 +254,7 @@ func (b *blobUpload) uploadPart(ctx context.Context, method string, requestURL *
|
||||
|
||||
// retry uploading to the redirect URL
|
||||
for try := range maxRetries {
|
||||
err = b.uploadPart(ctx, http.MethodPut, redirectURL, part, nil)
|
||||
err = b.uploadPart(ctx, http.MethodPut, redirectURL, part, ®istryOptions{})
|
||||
switch {
|
||||
case errors.Is(err, context.Canceled):
|
||||
return err
|
||||
|
||||
8
template/alfred.json
Normal file
8
template/alfred.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"stop": [
|
||||
"<start_system>",
|
||||
"<end_message>",
|
||||
"<start_user>",
|
||||
"<start_assistant>"
|
||||
]
|
||||
}
|
||||
6
template/alpaca.json
Normal file
6
template/alpaca.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"stop": [
|
||||
"### Instruction:",
|
||||
"### Response"
|
||||
]
|
||||
}
|
||||
6
template/chatml.json
Normal file
6
template/chatml.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"stop": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>"
|
||||
]
|
||||
}
|
||||
8
template/chatqa.json
Normal file
8
template/chatqa.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"stop": [
|
||||
"System:",
|
||||
"User:",
|
||||
"Assistant:",
|
||||
"<|begin_of_text|>"
|
||||
]
|
||||
}
|
||||
7
template/codellama-70b-instruct.json
Normal file
7
template/codellama-70b-instruct.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"stop": [
|
||||
"Source:",
|
||||
"Destination:",
|
||||
"<step>"
|
||||
]
|
||||
}
|
||||
6
template/falcon-instruct.json
Normal file
6
template/falcon-instruct.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"stop": [
|
||||
"User:",
|
||||
"Assistant:"
|
||||
]
|
||||
}
|
||||
6
template/gemma-instruct.json
Normal file
6
template/gemma-instruct.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"stop": [
|
||||
"<start_of_turn>",
|
||||
"<end_of_turn>"
|
||||
]
|
||||
}
|
||||
7
template/granite-instruct.json
Normal file
7
template/granite-instruct.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"stop": [
|
||||
"System:",
|
||||
"Question:",
|
||||
"Answer:"
|
||||
]
|
||||
}
|
||||
8
template/llama2-chat.json
Normal file
8
template/llama2-chat.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"stop": [
|
||||
"[INST]",
|
||||
"[/INST]",
|
||||
"<<SYS>>",
|
||||
"<</SYS>>"
|
||||
]
|
||||
}
|
||||
7
template/llama3-instruct.json
Normal file
7
template/llama3-instruct.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"stop": [
|
||||
"<|start_header_id|>",
|
||||
"<|end_header_id|>",
|
||||
"<|eot_id|>"
|
||||
]
|
||||
}
|
||||
6
template/magicoder.json
Normal file
6
template/magicoder.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"stop": [
|
||||
"@@ Instruction",
|
||||
"@@ Response"
|
||||
]
|
||||
}
|
||||
6
template/mistral-instruct.json
Normal file
6
template/mistral-instruct.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"stop": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>"
|
||||
]
|
||||
}
|
||||
5
template/openchat.json
Normal file
5
template/openchat.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"stop": [
|
||||
"<|end_of_turn|>"
|
||||
]
|
||||
}
|
||||
8
template/phi-3.json
Normal file
8
template/phi-3.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"stop": [
|
||||
"<|end|>",
|
||||
"<|system|>",
|
||||
"<|user|>",
|
||||
"<|assistant|>"
|
||||
]
|
||||
}
|
||||
7
template/solar-instruct.json
Normal file
7
template/solar-instruct.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"stop": [
|
||||
"### System:",
|
||||
"### User:",
|
||||
"### Assistant"
|
||||
]
|
||||
}
|
||||
7
template/starcoder2-instruct.json
Normal file
7
template/starcoder2-instruct.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"stop": [
|
||||
"### Instruction",
|
||||
"### Response",
|
||||
"<|endoftext|>"
|
||||
]
|
||||
}
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
var indexBytes []byte
|
||||
|
||||
//go:embed *.gotmpl
|
||||
//go:embed *.json
|
||||
var templatesFS embed.FS
|
||||
|
||||
var templatesOnce = sync.OnceValues(func() ([]*named, error) {
|
||||
@@ -39,6 +40,15 @@ var templatesOnce = sync.OnceValues(func() ([]*named, error) {
|
||||
|
||||
// normalize line endings
|
||||
t.Bytes = bytes.ReplaceAll(bts, []byte("\r\n"), []byte("\n"))
|
||||
|
||||
params, err := templatesFS.ReadFile(t.Name + ".json")
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(params, &t.Parameters); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return templates, nil
|
||||
@@ -48,6 +58,10 @@ type named struct {
|
||||
Name string `json:"name"`
|
||||
Template string `json:"template"`
|
||||
Bytes []byte
|
||||
|
||||
Parameters *struct {
|
||||
Stop []string `json:"stop"`
|
||||
}
|
||||
}
|
||||
|
||||
func (t named) Reader() io.Reader {
|
||||
|
||||
6
template/vicuna.json
Normal file
6
template/vicuna.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"stop": [
|
||||
"USER:",
|
||||
"ASSISTANT:"
|
||||
]
|
||||
}
|
||||
8
template/zephyr.json
Normal file
8
template/zephyr.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"stop": [
|
||||
"<|system|>",
|
||||
"</s>",
|
||||
"<|user|>",
|
||||
"<|assistant|>"
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user