Merge remote-tracking branch 'upstream/main' into vulkanV3
This commit is contained in:
@@ -149,9 +149,22 @@ func Bool(k string) func() bool {
|
||||
}
|
||||
}
|
||||
|
||||
// LogLevel returns the log level for the application.
|
||||
// Values are 0 or false INFO (Default), 1 or true DEBUG, 2 TRACE
|
||||
func LogLevel() slog.Level {
|
||||
level := slog.LevelInfo
|
||||
if s := Var("OLLAMA_DEBUG"); s != "" {
|
||||
if b, _ := strconv.ParseBool(s); b {
|
||||
level = slog.LevelDebug
|
||||
} else if i, _ := strconv.ParseInt(s, 10, 64); i != 0 {
|
||||
level = slog.Level(i * -4)
|
||||
}
|
||||
}
|
||||
|
||||
return level
|
||||
}
|
||||
|
||||
var (
|
||||
// Debug enabled additional debug information.
|
||||
Debug = Bool("OLLAMA_DEBUG")
|
||||
// FlashAttention enables the experimental flash attention feature.
|
||||
FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
|
||||
// KvCacheType is the quantization type for the K/V cache.
|
||||
@@ -169,7 +182,9 @@ var (
|
||||
// Enable the new Ollama engine
|
||||
NewEngine = Bool("OLLAMA_NEW_ENGINE")
|
||||
// ContextLength sets the default context length
|
||||
ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 2048)
|
||||
ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096)
|
||||
// Auth enables authentication between the Ollama client and server
|
||||
UseAuth = Bool("OLLAMA_AUTH")
|
||||
)
|
||||
|
||||
func String(s string) func() string {
|
||||
@@ -205,13 +220,11 @@ func Uint(key string, defaultValue uint) func() uint {
|
||||
|
||||
var (
|
||||
// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
|
||||
NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
|
||||
NumParallel = Uint("OLLAMA_NUM_PARALLEL", 1)
|
||||
// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
|
||||
MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
|
||||
// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
|
||||
MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
|
||||
// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
|
||||
MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
|
||||
)
|
||||
|
||||
func Uint64(key string, defaultValue uint64) func() uint64 {
|
||||
@@ -239,7 +252,7 @@ type EnvVar struct {
|
||||
|
||||
func AsMap() map[string]EnvVar {
|
||||
ret := map[string]EnvVar{
|
||||
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
||||
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", LogLevel(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
||||
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
|
||||
"OLLAMA_KV_CACHE_TYPE": {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"},
|
||||
"OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
|
||||
@@ -256,7 +269,7 @@ func AsMap() map[string]EnvVar {
|
||||
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
|
||||
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
|
||||
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
|
||||
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 2048)"},
|
||||
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"},
|
||||
"OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
|
||||
|
||||
// Informational
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
package envconfig
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"math"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/ollama/ollama/logutil"
|
||||
)
|
||||
|
||||
func TestHost(t *testing.T) {
|
||||
@@ -279,8 +281,8 @@ func TestVar(t *testing.T) {
|
||||
|
||||
func TestContextLength(t *testing.T) {
|
||||
cases := map[string]uint{
|
||||
"": 2048,
|
||||
"4096": 4096,
|
||||
"": 4096,
|
||||
"2048": 2048,
|
||||
}
|
||||
|
||||
for k, v := range cases {
|
||||
@@ -292,3 +294,34 @@ func TestContextLength(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLogLevel(t *testing.T) {
|
||||
cases := map[string]slog.Level{
|
||||
// Default to INFO
|
||||
"": slog.LevelInfo,
|
||||
"false": slog.LevelInfo,
|
||||
"f": slog.LevelInfo,
|
||||
"0": slog.LevelInfo,
|
||||
|
||||
// True values enable Debug
|
||||
"true": slog.LevelDebug,
|
||||
"t": slog.LevelDebug,
|
||||
|
||||
// Positive values increase verbosity
|
||||
"1": slog.LevelDebug,
|
||||
"2": logutil.LevelTrace,
|
||||
|
||||
// Negative values decrease verbosity
|
||||
"-1": slog.LevelWarn,
|
||||
"-2": slog.LevelError,
|
||||
}
|
||||
|
||||
for k, v := range cases {
|
||||
t.Run(k, func(t *testing.T) {
|
||||
t.Setenv("OLLAMA_DEBUG", k)
|
||||
if i := LogLevel(); i != v {
|
||||
t.Errorf("%s: expected %d, got %d", k, v, i)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user