Compare commits
10 Commits
v0.1.45
...
bmizerany/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
acbffa59e9 | ||
|
|
ccef9431c8 | ||
|
|
9a9e7d83c4 | ||
|
|
189a43caa2 | ||
|
|
e835ef1836 | ||
|
|
7e7749224c | ||
|
|
c7c2f3bc22 | ||
|
|
54a79d6a8a | ||
|
|
5bf5aeec01 | ||
|
|
1a1c99e334 |
@@ -182,6 +182,12 @@ $ ollama run llama3 "Summarize this file: $(cat README.md)"
|
||||
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
|
||||
```
|
||||
|
||||
### Show model information
|
||||
|
||||
```
|
||||
ollama show llama3
|
||||
```
|
||||
|
||||
### List models on your computer
|
||||
|
||||
```
|
||||
|
||||
13
api/types.go
13
api/types.go
@@ -608,6 +608,19 @@ func FormatParams(params map[string][]string) (map[string]interface{}, error) {
|
||||
} else {
|
||||
field := valueOpts.FieldByName(opt.Name)
|
||||
if field.IsValid() && field.CanSet() {
|
||||
if reflect.PointerTo(field.Type()) == reflect.TypeOf((*TriState)(nil)) {
|
||||
boolVal, err := strconv.ParseBool(vals[0])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid bool value %s", vals)
|
||||
}
|
||||
if boolVal {
|
||||
out[key] = TriStateTrue
|
||||
} else {
|
||||
out[key] = TriStateFalse
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
switch field.Kind() {
|
||||
case reflect.Float32:
|
||||
floatVal, err := strconv.ParseFloat(vals[0], 32)
|
||||
|
||||
@@ -2,6 +2,7 @@ package api
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -141,3 +142,65 @@ func TestUseMmapParsingFromJSON(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUseMmapFormatParams(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
req map[string][]string
|
||||
exp TriState
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "True",
|
||||
req: map[string][]string{
|
||||
"use_mmap": []string{"true"},
|
||||
},
|
||||
exp: TriStateTrue,
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "False",
|
||||
req: map[string][]string{
|
||||
"use_mmap": []string{"false"},
|
||||
},
|
||||
exp: TriStateFalse,
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "Numeric True",
|
||||
req: map[string][]string{
|
||||
"use_mmap": []string{"1"},
|
||||
},
|
||||
exp: TriStateTrue,
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "Numeric False",
|
||||
req: map[string][]string{
|
||||
"use_mmap": []string{"0"},
|
||||
},
|
||||
exp: TriStateFalse,
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "invalid string",
|
||||
req: map[string][]string{
|
||||
"use_mmap": []string{"foo"},
|
||||
},
|
||||
exp: TriStateUndefined,
|
||||
err: fmt.Errorf("invalid bool value [foo]"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
resp, err := FormatParams(test.req)
|
||||
require.Equal(t, err, test.err)
|
||||
respVal, ok := resp["use_mmap"]
|
||||
if test.exp != TriStateUndefined {
|
||||
assert.True(t, ok, "resp: %v", resp)
|
||||
assert.Equal(t, test.exp, respVal)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -321,7 +321,7 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
||||
embedding := llm.KV().EmbeddingLength()
|
||||
heads := llm.KV().HeadCount()
|
||||
headsKV := llm.KV().HeadCountKV()
|
||||
vocab := uint64(len(llm.KV()["tokenizer.ggml.tokens"].([]any)))
|
||||
vocab := llm.KV()["tokenizer.ggml.tokens"].(*array).size
|
||||
|
||||
embeddingHeads := llm.KV().EmbeddingHeadCount()
|
||||
embeddingHeadsK := llm.KV().EmbeddingHeadCountK()
|
||||
|
||||
36
llm/gguf.go
36
llm/gguf.go
@@ -316,7 +316,7 @@ func writeGGUFString(llm *gguf, w io.Writer, s string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
|
||||
func readGGUFV1Array(llm *gguf, r io.Reader) (*array, error) {
|
||||
t, err := readGGUF[uint32](llm, r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -327,6 +327,8 @@ func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
a := &array{size: uint64(n)}
|
||||
|
||||
for i := 0; uint32(i) < n; i++ {
|
||||
var e any
|
||||
switch t {
|
||||
@@ -361,13 +363,27 @@ func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
a = append(a, e)
|
||||
if len(a.values) < arrayMaxSize {
|
||||
a.values = append(a.values, e)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
return a, nil
|
||||
}
|
||||
|
||||
func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
|
||||
const arrayMaxSize = 1000
|
||||
|
||||
type array struct {
|
||||
size uint64
|
||||
|
||||
// values is the slice of values in the array.
|
||||
//
|
||||
// Its length may be less than size if the array is too big to reaonably
|
||||
// fit in memory. The current limit si arrayMaxSize.
|
||||
values []any
|
||||
}
|
||||
|
||||
func readGGUFArray(llm *gguf, r io.Reader) (*array, error) {
|
||||
if llm.Version == 1 {
|
||||
return readGGUFV1Array(llm, r)
|
||||
}
|
||||
@@ -382,6 +398,8 @@ func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
a := &array{size: n}
|
||||
|
||||
for i := 0; uint64(i) < n; i++ {
|
||||
var e any
|
||||
switch t {
|
||||
@@ -416,10 +434,16 @@ func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
a = append(a, e)
|
||||
// TODO(bmizerany): We may want to only enforce this limit
|
||||
// on certain fields, however, as of now, I (bmizerany) do
|
||||
// not know of any array fields that are needed by Ollama that
|
||||
// exceed this limit.
|
||||
if len(a.values) < arrayMaxSize {
|
||||
a.values = append(a.values, e)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
return a, nil
|
||||
}
|
||||
|
||||
func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error {
|
||||
|
||||
@@ -81,7 +81,17 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
var err error
|
||||
var cpuRunner string
|
||||
var estimate MemoryEstimate
|
||||
var systemMemory uint64
|
||||
var systemTotalMemory uint64
|
||||
var systemFreeMemory uint64
|
||||
|
||||
systemMemInfo, err := gpu.GetCPUMem()
|
||||
if err != nil {
|
||||
slog.Error("failed to lookup system memory", "error", err)
|
||||
} else {
|
||||
systemTotalMemory = systemMemInfo.TotalMemory
|
||||
systemFreeMemory = systemMemInfo.FreeMemory
|
||||
slog.Debug("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", systemFreeMemory)
|
||||
}
|
||||
|
||||
// If the user wants zero GPU layers, reset the gpu list to be CPU/system ram info
|
||||
if opts.NumGPU == 0 {
|
||||
@@ -91,19 +101,10 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
cpuRunner = serverForCpu()
|
||||
estimate = EstimateGPULayers(gpus, ggml, projectors, opts)
|
||||
} else {
|
||||
if gpus[0].Library == "metal" {
|
||||
memInfo, err := gpu.GetCPUMem()
|
||||
if err != nil {
|
||||
slog.Error("failed to lookup system memory", "error", err)
|
||||
} else {
|
||||
systemMemory = memInfo.TotalMemory
|
||||
slog.Debug("system memory", "total", format.HumanBytes2(systemMemory))
|
||||
}
|
||||
}
|
||||
estimate = EstimateGPULayers(gpus, ggml, projectors, opts)
|
||||
|
||||
switch {
|
||||
case gpus[0].Library == "metal" && estimate.VRAMSize > systemMemory:
|
||||
case gpus[0].Library == "metal" && estimate.VRAMSize > systemTotalMemory:
|
||||
// disable partial offloading when model is greater than total system memory as this
|
||||
// can lead to locking up the system
|
||||
opts.NumGPU = 0
|
||||
@@ -211,7 +212,10 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
}
|
||||
|
||||
// Windows CUDA should not use mmap for best performance
|
||||
if (runtime.GOOS == "windows" && gpus[0].Library == "cuda") || opts.UseMMap == api.TriStateFalse {
|
||||
// Linux with a model larger than free space, mmap leads to thrashing
|
||||
if (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && opts.UseMMap == api.TriStateUndefined) ||
|
||||
(runtime.GOOS == "linux" && systemFreeMemory < estimate.TotalSize && opts.UseMMap == api.TriStateUndefined) ||
|
||||
opts.UseMMap == api.TriStateFalse {
|
||||
params = append(params, "--no-mmap")
|
||||
}
|
||||
|
||||
|
||||
@@ -279,7 +279,7 @@ if ! check_gpu nvidia-smi || [ -z "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\
|
||||
case $OS_NAME in
|
||||
centos|rhel) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -d '.' -f 1) ;;
|
||||
rocky) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -c1) ;;
|
||||
fedora) [ $OS_VERSION -lt '37' ] && install_cuda_driver_yum $OS_NAME $OS_VERSION || install_cuda_driver_yum $OS_NAME '37';;
|
||||
fedora) [ $OS_VERSION -lt '39' ] && install_cuda_driver_yum $OS_NAME $OS_VERSION || install_cuda_driver_yum $OS_NAME '39';;
|
||||
amzn) install_cuda_driver_yum 'fedora' '37' ;;
|
||||
debian) install_cuda_driver_apt $OS_NAME $OS_VERSION ;;
|
||||
ubuntu) install_cuda_driver_apt $OS_NAME $(echo $OS_VERSION | sed 's/\.//') ;;
|
||||
|
||||
@@ -414,17 +414,22 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
||||
return err
|
||||
}
|
||||
|
||||
layers, err := parseFromFile(ctx, temp, "", fn)
|
||||
layer, err := NewLayer(temp, baseLayer.MediaType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(layers) != 1 {
|
||||
return errors.New("quantization failed")
|
||||
if _, err := temp.Seek(0, io.SeekStart); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
baseLayer.Layer = layers[0].Layer
|
||||
baseLayer.GGML = layers[0].GGML
|
||||
ggml, _, err := llm.DecodeGGML(temp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
baseLayer.Layer = layer
|
||||
baseLayer.GGML = ggml
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user