Compare commits
1 Commits
bmizerany/
...
jmorganca/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ca7c3f7e0f |
@@ -1,50 +0,0 @@
|
|||||||
package api
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"math"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestKeepAliveParsingFromJSON(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
req string
|
|
||||||
exp *Duration
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "Positive Integer",
|
|
||||||
req: `{ "keep_alive": 42 }`,
|
|
||||||
exp: &Duration{42 * time.Second},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Positive Integer String",
|
|
||||||
req: `{ "keep_alive": "42m" }`,
|
|
||||||
exp: &Duration{42 * time.Minute},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Negative Integer",
|
|
||||||
req: `{ "keep_alive": -1 }`,
|
|
||||||
exp: &Duration{math.MaxInt64},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Negative Integer String",
|
|
||||||
req: `{ "keep_alive": "-1m" }`,
|
|
||||||
exp: &Duration{math.MaxInt64},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, test := range tests {
|
|
||||||
t.Run(test.name, func(t *testing.T) {
|
|
||||||
var dec ChatRequest
|
|
||||||
err := json.Unmarshal([]byte(test.req), &dec)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
assert.Equal(t, test.exp, dec.KeepAlive)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -970,10 +970,9 @@ func NewCLI() *cobra.Command {
|
|||||||
serveCmd.SetUsageTemplate(serveCmd.UsageTemplate() + `
|
serveCmd.SetUsageTemplate(serveCmd.UsageTemplate() + `
|
||||||
Environment Variables:
|
Environment Variables:
|
||||||
|
|
||||||
OLLAMA_HOST The host:port to bind to (default "127.0.0.1:11434")
|
OLLAMA_HOST The host:port to bind to (default "127.0.0.1:11434")
|
||||||
OLLAMA_ORIGINS A comma separated list of allowed origins.
|
OLLAMA_ORIGINS A comma separated list of allowed origins.
|
||||||
OLLAMA_MODELS The path to the models directory (default is "~/.ollama/models")
|
OLLAMA_MODELS The path to the models directory (default is "~/.ollama/models")
|
||||||
OLLAMA_KEEP_ALIVE The duration that models stay loaded in memory (default is "5m")
|
|
||||||
`)
|
`)
|
||||||
|
|
||||||
pullCmd := &cobra.Command{
|
pullCmd := &cobra.Command{
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
### Getting Started
|
### Getting Started
|
||||||
* [Quickstart](../README.md#quickstart)
|
* [Quickstart](../README.md#quickstart)
|
||||||
* [Examples](../examples)
|
* [Examples](../examples)
|
||||||
* [Importing models](./import.md)
|
* [Importing models](./import.md) from GGUF, Pytorch and Safetensors
|
||||||
* [Linux Documentation](./linux.md)
|
* [Linux Documentation](./linux.md)
|
||||||
* [Windows Documentation](./windows.md)
|
* [Windows Documentation](./windows.md)
|
||||||
* [Docker Documentation](https://hub.docker.com/r/ollama/ollama)
|
* [Docker Documentation](https://hub.docker.com/r/ollama/ollama)
|
||||||
|
|||||||
@@ -172,6 +172,19 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
|
|||||||
slog.Info(fmt.Sprintf("loaded %d images", len(predict.Images)))
|
slog.Info(fmt.Sprintf("loaded %d images", len(predict.Images)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Limit the number of predictions to the maximum context length
|
||||||
|
// this will cause no more than two context shifts
|
||||||
|
// TODO: limit this further to num_ctx - len(prompt) to avoid
|
||||||
|
// any context shifts at all
|
||||||
|
if predict.Options.NumPredict > llm.options.NumCtx {
|
||||||
|
slog.Warn(fmt.Sprintf("requested num_predict is greater than the context length (%d > %d), using %d instead", predict.Options.NumPredict, llm.options.NumCtx, llm.options.NumCtx))
|
||||||
|
predict.Options.NumPredict = llm.options.NumCtx
|
||||||
|
}
|
||||||
|
|
||||||
|
if predict.Options.NumPredict == -1 {
|
||||||
|
predict.Options.NumPredict = llm.options.NumCtx
|
||||||
|
}
|
||||||
|
|
||||||
request := map[string]any{
|
request := map[string]any{
|
||||||
"prompt": predict.Prompt,
|
"prompt": predict.Prompt,
|
||||||
"stream": true,
|
"stream": true,
|
||||||
|
|||||||
@@ -1,13 +0,0 @@
|
|||||||
diff --git a/llama.cpp b/llama.cpp
|
|
||||||
index b27aa272..99372f9c 100644
|
|
||||||
--- a/llama.cpp
|
|
||||||
+++ b/llama.cpp
|
|
||||||
@@ -9360,7 +9360,7 @@ struct llm_tokenizer_wpm {
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t to_lower(uint32_t code) {
|
|
||||||
- static const std::locale locale("en_US.UTF-8");
|
|
||||||
+ static const std::locale locale("");
|
|
||||||
#if defined(_WIN32)
|
|
||||||
if (code > 0xFFFF) {
|
|
||||||
return code;
|
|
||||||
@@ -795,8 +795,10 @@ func PruneLayers() error {
|
|||||||
|
|
||||||
for _, blob := range blobs {
|
for _, blob := range blobs {
|
||||||
name := blob.Name()
|
name := blob.Name()
|
||||||
name = strings.ReplaceAll(name, "-", ":")
|
if runtime.GOOS == "windows" {
|
||||||
if strings.HasPrefix(name, "sha256-") {
|
name = strings.ReplaceAll(name, "-", ":")
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(name, "sha256:") {
|
||||||
deleteMap[name] = struct{}{}
|
deleteMap[name] = struct{}{}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"golang.org/x/exp/slices"
|
"golang.org/x/exp/slices"
|
||||||
@@ -46,7 +47,10 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
const delimiter = "-"
|
delimiter := ":"
|
||||||
|
if runtime.GOOS == "windows" {
|
||||||
|
delimiter = "-"
|
||||||
|
}
|
||||||
|
|
||||||
pattern := strings.Join([]string{"sha256", "*-partial"}, delimiter)
|
pattern := strings.Join([]string{"sha256", "*-partial"}, delimiter)
|
||||||
temp, err := os.CreateTemp(blobs, pattern)
|
temp, err := os.CreateTemp(blobs, pattern)
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -149,7 +150,10 @@ func GetBlobsPath(digest string) (string, error) {
|
|||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
digest = strings.ReplaceAll(digest, ":", "-")
|
if runtime.GOOS == "windows" {
|
||||||
|
digest = strings.ReplaceAll(digest, ":", "-")
|
||||||
|
}
|
||||||
|
|
||||||
path := filepath.Join(dir, "blobs", digest)
|
path := filepath.Join(dir, "blobs", digest)
|
||||||
dirPath := filepath.Dir(path)
|
dirPath := filepath.Dir(path)
|
||||||
if digest == "" {
|
if digest == "" {
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"math"
|
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
@@ -17,7 +16,6 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
"reflect"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"syscall"
|
"syscall"
|
||||||
@@ -209,7 +207,7 @@ func GenerateHandler(c *gin.Context) {
|
|||||||
|
|
||||||
var sessionDuration time.Duration
|
var sessionDuration time.Duration
|
||||||
if req.KeepAlive == nil {
|
if req.KeepAlive == nil {
|
||||||
sessionDuration = getDefaultSessionDuration()
|
sessionDuration = defaultSessionDuration
|
||||||
} else {
|
} else {
|
||||||
sessionDuration = req.KeepAlive.Duration
|
sessionDuration = req.KeepAlive.Duration
|
||||||
}
|
}
|
||||||
@@ -386,32 +384,6 @@ func GenerateHandler(c *gin.Context) {
|
|||||||
streamResponse(c, ch)
|
streamResponse(c, ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDefaultSessionDuration() time.Duration {
|
|
||||||
if t, exists := os.LookupEnv("OLLAMA_KEEP_ALIVE"); exists {
|
|
||||||
v, err := strconv.Atoi(t)
|
|
||||||
if err != nil {
|
|
||||||
d, err := time.ParseDuration(t)
|
|
||||||
if err != nil {
|
|
||||||
return defaultSessionDuration
|
|
||||||
}
|
|
||||||
|
|
||||||
if d < 0 {
|
|
||||||
return time.Duration(math.MaxInt64)
|
|
||||||
}
|
|
||||||
|
|
||||||
return d
|
|
||||||
}
|
|
||||||
|
|
||||||
d := time.Duration(v) * time.Second
|
|
||||||
if d < 0 {
|
|
||||||
return time.Duration(math.MaxInt64)
|
|
||||||
}
|
|
||||||
return d
|
|
||||||
}
|
|
||||||
|
|
||||||
return defaultSessionDuration
|
|
||||||
}
|
|
||||||
|
|
||||||
func EmbeddingsHandler(c *gin.Context) {
|
func EmbeddingsHandler(c *gin.Context) {
|
||||||
loaded.mu.Lock()
|
loaded.mu.Lock()
|
||||||
defer loaded.mu.Unlock()
|
defer loaded.mu.Unlock()
|
||||||
@@ -455,7 +427,7 @@ func EmbeddingsHandler(c *gin.Context) {
|
|||||||
|
|
||||||
var sessionDuration time.Duration
|
var sessionDuration time.Duration
|
||||||
if req.KeepAlive == nil {
|
if req.KeepAlive == nil {
|
||||||
sessionDuration = getDefaultSessionDuration()
|
sessionDuration = defaultSessionDuration
|
||||||
} else {
|
} else {
|
||||||
sessionDuration = req.KeepAlive.Duration
|
sessionDuration = req.KeepAlive.Duration
|
||||||
}
|
}
|
||||||
@@ -1256,7 +1228,7 @@ func ChatHandler(c *gin.Context) {
|
|||||||
|
|
||||||
var sessionDuration time.Duration
|
var sessionDuration time.Duration
|
||||||
if req.KeepAlive == nil {
|
if req.KeepAlive == nil {
|
||||||
sessionDuration = getDefaultSessionDuration()
|
sessionDuration = defaultSessionDuration
|
||||||
} else {
|
} else {
|
||||||
sessionDuration = req.KeepAlive.Duration
|
sessionDuration = req.KeepAlive.Duration
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user