Compare commits
5 Commits
jmorganca/
...
bmizerany/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d9ea2e5c7a | ||
|
|
5ce997a7b9 | ||
|
|
672ffe9b7d | ||
|
|
47cfe58af5 | ||
|
|
e72c567cfd |
50
api/types_test.go
Normal file
50
api/types_test.go
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"math"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestKeepAliveParsingFromJSON(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
req string
|
||||||
|
exp *Duration
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Positive Integer",
|
||||||
|
req: `{ "keep_alive": 42 }`,
|
||||||
|
exp: &Duration{42 * time.Second},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Positive Integer String",
|
||||||
|
req: `{ "keep_alive": "42m" }`,
|
||||||
|
exp: &Duration{42 * time.Minute},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Negative Integer",
|
||||||
|
req: `{ "keep_alive": -1 }`,
|
||||||
|
exp: &Duration{math.MaxInt64},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Negative Integer String",
|
||||||
|
req: `{ "keep_alive": "-1m" }`,
|
||||||
|
exp: &Duration{math.MaxInt64},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
var dec ChatRequest
|
||||||
|
err := json.Unmarshal([]byte(test.req), &dec)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, test.exp, dec.KeepAlive)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -970,9 +970,10 @@ func NewCLI() *cobra.Command {
|
|||||||
serveCmd.SetUsageTemplate(serveCmd.UsageTemplate() + `
|
serveCmd.SetUsageTemplate(serveCmd.UsageTemplate() + `
|
||||||
Environment Variables:
|
Environment Variables:
|
||||||
|
|
||||||
OLLAMA_HOST The host:port to bind to (default "127.0.0.1:11434")
|
OLLAMA_HOST The host:port to bind to (default "127.0.0.1:11434")
|
||||||
OLLAMA_ORIGINS A comma separated list of allowed origins.
|
OLLAMA_ORIGINS A comma separated list of allowed origins.
|
||||||
OLLAMA_MODELS The path to the models directory (default is "~/.ollama/models")
|
OLLAMA_MODELS The path to the models directory (default is "~/.ollama/models")
|
||||||
|
OLLAMA_KEEP_ALIVE The duration that models stay loaded in memory (default is "5m")
|
||||||
`)
|
`)
|
||||||
|
|
||||||
pullCmd := &cobra.Command{
|
pullCmd := &cobra.Command{
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
### Getting Started
|
### Getting Started
|
||||||
* [Quickstart](../README.md#quickstart)
|
* [Quickstart](../README.md#quickstart)
|
||||||
* [Examples](../examples)
|
* [Examples](../examples)
|
||||||
* [Importing models](./import.md) from GGUF, Pytorch and Safetensors
|
* [Importing models](./import.md)
|
||||||
* [Linux Documentation](./linux.md)
|
* [Linux Documentation](./linux.md)
|
||||||
* [Windows Documentation](./windows.md)
|
* [Windows Documentation](./windows.md)
|
||||||
* [Docker Documentation](https://hub.docker.com/r/ollama/ollama)
|
* [Docker Documentation](https://hub.docker.com/r/ollama/ollama)
|
||||||
|
|||||||
@@ -172,19 +172,6 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
|
|||||||
slog.Info(fmt.Sprintf("loaded %d images", len(predict.Images)))
|
slog.Info(fmt.Sprintf("loaded %d images", len(predict.Images)))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Limit the number of predictions to the maximum context length
|
|
||||||
// this will cause no more than two context shifts
|
|
||||||
// TODO: limit this further to num_ctx - len(prompt) to avoid
|
|
||||||
// any context shifts at all
|
|
||||||
if predict.Options.NumPredict > llm.options.NumCtx {
|
|
||||||
slog.Warn(fmt.Sprintf("requested num_predict is greater than the context length (%d > %d), using %d instead", predict.Options.NumPredict, llm.options.NumCtx, llm.options.NumCtx))
|
|
||||||
predict.Options.NumPredict = llm.options.NumCtx
|
|
||||||
}
|
|
||||||
|
|
||||||
if predict.Options.NumPredict == -1 {
|
|
||||||
predict.Options.NumPredict = llm.options.NumCtx
|
|
||||||
}
|
|
||||||
|
|
||||||
request := map[string]any{
|
request := map[string]any{
|
||||||
"prompt": predict.Prompt,
|
"prompt": predict.Prompt,
|
||||||
"stream": true,
|
"stream": true,
|
||||||
|
|||||||
13
llm/patches/04-locale.diff
Normal file
13
llm/patches/04-locale.diff
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
diff --git a/llama.cpp b/llama.cpp
|
||||||
|
index b27aa272..99372f9c 100644
|
||||||
|
--- a/llama.cpp
|
||||||
|
+++ b/llama.cpp
|
||||||
|
@@ -9360,7 +9360,7 @@ struct llm_tokenizer_wpm {
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t to_lower(uint32_t code) {
|
||||||
|
- static const std::locale locale("en_US.UTF-8");
|
||||||
|
+ static const std::locale locale("");
|
||||||
|
#if defined(_WIN32)
|
||||||
|
if (code > 0xFFFF) {
|
||||||
|
return code;
|
||||||
@@ -795,10 +795,8 @@ func PruneLayers() error {
|
|||||||
|
|
||||||
for _, blob := range blobs {
|
for _, blob := range blobs {
|
||||||
name := blob.Name()
|
name := blob.Name()
|
||||||
if runtime.GOOS == "windows" {
|
name = strings.ReplaceAll(name, "-", ":")
|
||||||
name = strings.ReplaceAll(name, "-", ":")
|
if strings.HasPrefix(name, "sha256-") {
|
||||||
}
|
|
||||||
if strings.HasPrefix(name, "sha256:") {
|
|
||||||
deleteMap[name] = struct{}{}
|
deleteMap[name] = struct{}{}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"runtime"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"golang.org/x/exp/slices"
|
"golang.org/x/exp/slices"
|
||||||
@@ -47,10 +46,7 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
delimiter := ":"
|
const delimiter = "-"
|
||||||
if runtime.GOOS == "windows" {
|
|
||||||
delimiter = "-"
|
|
||||||
}
|
|
||||||
|
|
||||||
pattern := strings.Join([]string{"sha256", "*-partial"}, delimiter)
|
pattern := strings.Join([]string{"sha256", "*-partial"}, delimiter)
|
||||||
temp, err := os.CreateTemp(blobs, pattern)
|
temp, err := os.CreateTemp(blobs, pattern)
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -150,10 +149,7 @@ func GetBlobsPath(digest string) (string, error) {
|
|||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
if runtime.GOOS == "windows" {
|
digest = strings.ReplaceAll(digest, ":", "-")
|
||||||
digest = strings.ReplaceAll(digest, ":", "-")
|
|
||||||
}
|
|
||||||
|
|
||||||
path := filepath.Join(dir, "blobs", digest)
|
path := filepath.Join(dir, "blobs", digest)
|
||||||
dirPath := filepath.Dir(path)
|
dirPath := filepath.Dir(path)
|
||||||
if digest == "" {
|
if digest == "" {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"math"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
@@ -16,6 +17,7 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
"reflect"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"syscall"
|
"syscall"
|
||||||
@@ -207,7 +209,7 @@ func GenerateHandler(c *gin.Context) {
|
|||||||
|
|
||||||
var sessionDuration time.Duration
|
var sessionDuration time.Duration
|
||||||
if req.KeepAlive == nil {
|
if req.KeepAlive == nil {
|
||||||
sessionDuration = defaultSessionDuration
|
sessionDuration = getDefaultSessionDuration()
|
||||||
} else {
|
} else {
|
||||||
sessionDuration = req.KeepAlive.Duration
|
sessionDuration = req.KeepAlive.Duration
|
||||||
}
|
}
|
||||||
@@ -384,6 +386,32 @@ func GenerateHandler(c *gin.Context) {
|
|||||||
streamResponse(c, ch)
|
streamResponse(c, ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getDefaultSessionDuration() time.Duration {
|
||||||
|
if t, exists := os.LookupEnv("OLLAMA_KEEP_ALIVE"); exists {
|
||||||
|
v, err := strconv.Atoi(t)
|
||||||
|
if err != nil {
|
||||||
|
d, err := time.ParseDuration(t)
|
||||||
|
if err != nil {
|
||||||
|
return defaultSessionDuration
|
||||||
|
}
|
||||||
|
|
||||||
|
if d < 0 {
|
||||||
|
return time.Duration(math.MaxInt64)
|
||||||
|
}
|
||||||
|
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
d := time.Duration(v) * time.Second
|
||||||
|
if d < 0 {
|
||||||
|
return time.Duration(math.MaxInt64)
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultSessionDuration
|
||||||
|
}
|
||||||
|
|
||||||
func EmbeddingsHandler(c *gin.Context) {
|
func EmbeddingsHandler(c *gin.Context) {
|
||||||
loaded.mu.Lock()
|
loaded.mu.Lock()
|
||||||
defer loaded.mu.Unlock()
|
defer loaded.mu.Unlock()
|
||||||
@@ -427,7 +455,7 @@ func EmbeddingsHandler(c *gin.Context) {
|
|||||||
|
|
||||||
var sessionDuration time.Duration
|
var sessionDuration time.Duration
|
||||||
if req.KeepAlive == nil {
|
if req.KeepAlive == nil {
|
||||||
sessionDuration = defaultSessionDuration
|
sessionDuration = getDefaultSessionDuration()
|
||||||
} else {
|
} else {
|
||||||
sessionDuration = req.KeepAlive.Duration
|
sessionDuration = req.KeepAlive.Duration
|
||||||
}
|
}
|
||||||
@@ -1228,7 +1256,7 @@ func ChatHandler(c *gin.Context) {
|
|||||||
|
|
||||||
var sessionDuration time.Duration
|
var sessionDuration time.Duration
|
||||||
if req.KeepAlive == nil {
|
if req.KeepAlive == nil {
|
||||||
sessionDuration = defaultSessionDuration
|
sessionDuration = getDefaultSessionDuration()
|
||||||
} else {
|
} else {
|
||||||
sessionDuration = req.KeepAlive.Duration
|
sessionDuration = req.KeepAlive.Duration
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user