Compare commits

..

12 Commits

Author SHA1 Message Date
Patrick Devine
45ac07cd02 create the blobs directory correctly (#508) 2023-09-11 14:54:52 -07:00
Jeffrey Morgan
7d749cc787 fix darwin build script 2023-09-11 16:31:46 -04:00
Patrick Devine
e7e91cd71c add autoprune to remove unused layers (#491) 2023-09-11 11:46:35 -07:00
Jeffrey Morgan
3920e15386 add model format to config layer (#497) 2023-09-09 17:53:44 -04:00
Michael Yang
41e976edde Merge pull request #492 from jmorganca/mxyng/nil-pointer
fix nil pointer dereference
2023-09-07 17:25:23 -07:00
Michael Yang
de227b620f fix nil pointer dereference 2023-09-07 17:24:31 -07:00
Michael Yang
63def6ca49 Merge pull request #487 from jmorganca/mxyng/dockerignore
update dockerignore
2023-09-07 14:16:17 -07:00
Michael Yang
738fe9c4aa Merge pull request #486 from jmorganca/mxyng/fix-push
fix: retry push on expired token
2023-09-07 13:58:34 -07:00
Michael Yang
a8da0bacbe update dockerignore 2023-09-07 13:36:25 -07:00
Michael Yang
bf146fb072 fix retry on unauthorized chunk 2023-09-07 12:02:04 -07:00
Michael Yang
f0f4943577 fix get auth token 2023-09-07 12:01:56 -07:00
Bruce MacDonald
09dd2aeff9 GGUF support (#441) 2023-09-07 13:55:37 -04:00
17 changed files with 721 additions and 185 deletions

View File

@@ -2,3 +2,4 @@
ollama ollama
app app
llm/llama.cpp/ggml llm/llama.cpp/ggml
llm/llama.cpp/gguf

11
.gitmodules vendored
View File

@@ -1,4 +1,9 @@
[submodule "llm/llama.cpp/ggml"] [submodule "llm/llama.cpp/ggml"]
path = llm/llama.cpp/ggml path = llm/llama.cpp/ggml
url = https://github.com/ggerganov/llama.cpp.git url = https://github.com/ggerganov/llama.cpp.git
ignore = dirty ignore = dirty
shallow = true
[submodule "llm/llama.cpp/gguf"]
path = llm/llama.cpp/gguf
url = https://github.com/ggerganov/llama.cpp.git
shallow = true

View File

@@ -672,6 +672,12 @@ func RunServer(cmd *cobra.Command, _ []string) error {
origins = strings.Split(o, ",") origins = strings.Split(o, ",")
} }
if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
if err := server.PruneLayers(); err != nil {
return err
}
}
return server.Serve(ln, origins) return server.Serve(ln, origins)
} }

View File

@@ -3,12 +3,15 @@ package llm
import ( import (
"encoding/binary" "encoding/binary"
"errors" "errors"
"fmt"
"io" "io"
"path"
"sync"
) )
type ModelFamily string type ModelFamily string
const ModelFamilyUnknown ModelFamily = "unknown"
type ModelType uint32 type ModelType uint32
const ( const (
@@ -57,18 +60,17 @@ type model interface {
type container interface { type container interface {
Name() string Name() string
Decode(io.Reader) error Decode(io.Reader) (model, error)
} }
type containerGGML struct { type containerGGML struct{}
}
func (c *containerGGML) Name() string { func (c *containerGGML) Name() string {
return "ggml" return "ggml"
} }
func (c *containerGGML) Decode(r io.Reader) error { func (c *containerGGML) Decode(r io.Reader) (model, error) {
return nil return nil, nil
} }
type containerGGMF struct { type containerGGMF struct {
@@ -79,18 +81,18 @@ func (c *containerGGMF) Name() string {
return "ggmf" return "ggmf"
} }
func (c *containerGGMF) Decode(r io.Reader) error { func (c *containerGGMF) Decode(r io.Reader) (model, error) {
var version uint32 var version uint32
binary.Read(r, binary.LittleEndian, &version) binary.Read(r, binary.LittleEndian, &version)
switch version { switch version {
case 1: case 1:
default: default:
return errors.New("invalid version") return nil, errors.New("invalid version")
} }
c.version = version c.version = version
return nil return nil, nil
} }
type containerGGJT struct { type containerGGJT struct {
@@ -101,18 +103,22 @@ func (c *containerGGJT) Name() string {
return "ggjt" return "ggjt"
} }
func (c *containerGGJT) Decode(r io.Reader) error { func (c *containerGGJT) Decode(r io.Reader) (model, error) {
var version uint32 var version uint32
binary.Read(r, binary.LittleEndian, &version) binary.Read(r, binary.LittleEndian, &version)
switch version { switch version {
case 1, 2, 3: case 1, 2, 3:
default: default:
return errors.New("invalid version") return nil, errors.New("invalid version")
} }
c.version = version c.version = version
return nil
// different model types may have different layouts for hyperparameters
var llama llamaModel
binary.Read(r, binary.LittleEndian, &llama.hyperparameters)
return &llama, nil
} }
type containerLORA struct { type containerLORA struct {
@@ -123,32 +129,51 @@ func (c *containerLORA) Name() string {
return "ggla" return "ggla"
} }
func (c *containerLORA) Decode(r io.Reader) error { func (c *containerLORA) Decode(r io.Reader) (model, error) {
var version uint32 var version uint32
binary.Read(r, binary.LittleEndian, &version) binary.Read(r, binary.LittleEndian, &version)
switch version { switch version {
case 1: case 1:
default: default:
return errors.New("invalid version") return nil, errors.New("invalid version")
} }
c.version = version c.version = version
return nil return nil, nil
}
var (
ggmlGPU = path.Join("llama.cpp", "ggml", "build", "gpu", "bin")
ggmlCPU = path.Join("llama.cpp", "ggml", "build", "cpu", "bin")
)
var (
ggmlInit sync.Once
ggmlRunnerPath string
)
func ggmlRunner() ModelRunner {
ggmlInit.Do(func() {
ggmlRunnerPath = chooseRunner(ggmlGPU, ggmlCPU)
})
return ModelRunner{Path: ggmlRunnerPath}
} }
const ( const (
// / Magic constant for `ggml` files (unversioned). // Magic constant for `ggml` files (unversioned).
FILE_MAGIC_GGML = 0x67676d6c FILE_MAGIC_GGML = 0x67676d6c
// / Magic constant for `ggml` files (versioned, ggmf). // Magic constant for `ggml` files (versioned, ggmf).
FILE_MAGIC_GGMF = 0x67676d66 FILE_MAGIC_GGMF = 0x67676d66
// / Magic constant for `ggml` files (versioned, ggjt). // Magic constant for `ggml` files (versioned, ggjt).
FILE_MAGIC_GGJT = 0x67676a74 FILE_MAGIC_GGJT = 0x67676a74
// / Magic constant for `ggla` files (LoRA adapter). // Magic constant for `ggla` files (LoRA adapter).
FILE_MAGIC_GGLA = 0x67676C61 FILE_MAGIC_GGLA = 0x67676C61
// Magic constant for `gguf` files (versioned, gguf)
FILE_MAGIC_GGUF = 0x46554747
) )
func DecodeGGML(r io.ReadSeeker, hint ModelFamily) (*GGML, error) { func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
var ggml GGML var ggml GGML
binary.Read(r, binary.LittleEndian, &ggml.magic) binary.Read(r, binary.LittleEndian, &ggml.magic)
@@ -161,24 +186,18 @@ func DecodeGGML(r io.ReadSeeker, hint ModelFamily) (*GGML, error) {
ggml.container = &containerGGJT{} ggml.container = &containerGGJT{}
case FILE_MAGIC_GGLA: case FILE_MAGIC_GGLA:
ggml.container = &containerLORA{} ggml.container = &containerLORA{}
case FILE_MAGIC_GGUF:
ggml.container = &containerGGUF{}
default: default:
return nil, errors.New("invalid file magic") return nil, errors.New("invalid file magic")
} }
if err := ggml.Decode(r); err != nil { model, err := ggml.Decode(r)
if err != nil {
return nil, err return nil, err
} }
// different model types may have different layouts for hyperparameters ggml.model = model
switch hint {
case ModelFamilyLlama:
var llama llamaModel
binary.Read(r, binary.LittleEndian, &llama.hyperparameters)
ggml.model = &llama
// TODO: sanity check hyperparameters
default:
return nil, fmt.Errorf("unsupported model type: %s", hint)
}
// final model type // final model type
return &ggml, nil return &ggml, nil

385
llm/gguf.go Normal file
View File

@@ -0,0 +1,385 @@
package llm
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"log"
"path"
"sync"
)
type containerGGUF struct {
Version uint32
V1 struct {
NumTensor uint32
NumKV uint32
}
V2 struct {
NumTensor uint64
NumKV uint64
}
}
func (c *containerGGUF) Name() string {
return "gguf"
}
func (c *containerGGUF) Decode(r io.Reader) (model, error) {
binary.Read(r, binary.LittleEndian, &c.Version)
switch c.Version {
case 1:
binary.Read(r, binary.LittleEndian, &c.V1)
case 2:
binary.Read(r, binary.LittleEndian, &c.V2)
default:
return nil, errors.New("invalid version")
}
model := newGGUFModel(c)
if err := model.Decode(r); err != nil {
return nil, err
}
return model, nil
}
const (
ggufTypeUint8 uint32 = iota
ggufTypeInt8
ggufTypeUint16
ggufTypeInt16
ggufTypeUint32
ggufTypeInt32
ggufTypeFloat32
ggufTypeBool
ggufTypeString
ggufTypeArray
ggufTypeUint64
ggufTypeInt64
ggufTypeFloat64
)
type kv map[string]any
type ggufModel struct {
*containerGGUF
kv
}
func newGGUFModel(container *containerGGUF) *ggufModel {
return &ggufModel{
containerGGUF: container,
kv: make(kv),
}
}
func (llm *ggufModel) NumKV() uint64 {
if llm.Version == 1 {
return uint64(llm.V1.NumKV)
}
return llm.V2.NumKV
}
func (llm *ggufModel) ModelFamily() ModelFamily {
t, ok := llm.kv["general.architecture"].(string)
if ok {
return ModelFamily(t)
}
log.Printf("unknown model family: %T", t)
return ModelFamilyUnknown
}
func (llm *ggufModel) ModelType() ModelType {
switch llm.ModelFamily() {
case ModelFamilyLlama:
blocks, ok := llm.kv["llama.block_count"].(uint32)
if ok {
return ModelType(blocks)
}
}
return ModelType7B
}
func (llm *ggufModel) FileType() FileType {
switch llm.ModelFamily() {
case ModelFamilyLlama:
t, ok := llm.kv["general.file_type"].(uint32)
if ok {
return llamaFileType(t)
}
}
return llamaFileTypeF16
}
func (llm *ggufModel) Decode(r io.Reader) error {
read := llm.readString
if llm.Version == 1 {
read = llm.readStringV1
}
for i := 0; uint64(i) < llm.NumKV(); i++ {
k, err := read(r)
if err != nil {
return err
}
vtype := llm.readU32(r)
var v any
switch vtype {
case ggufTypeUint8:
v = llm.readU8(r)
case ggufTypeInt8:
v = llm.readI8(r)
case ggufTypeUint16:
v = llm.readU16(r)
case ggufTypeInt16:
v = llm.readI16(r)
case ggufTypeUint32:
v = llm.readU32(r)
case ggufTypeInt32:
v = llm.readI32(r)
case ggufTypeUint64:
v = llm.readU64(r)
case ggufTypeInt64:
v = llm.readI64(r)
case ggufTypeFloat32:
v = llm.readF32(r)
case ggufTypeFloat64:
v = llm.readF64(r)
case ggufTypeBool:
v = llm.readBool(r)
case ggufTypeString:
fn := llm.readString
if llm.Version == 1 {
fn = llm.readStringV1
}
s, err := fn(r)
if err != nil {
return err
}
v = s
case ggufTypeArray:
fn := llm.readArray
if llm.Version == 1 {
fn = llm.readArrayV1
}
a, err := fn(r)
if err != nil {
return err
}
v = a
default:
return fmt.Errorf("invalid type: %d", vtype)
}
llm.kv[k] = v
}
return nil
}
func (ggufModel) readU8(r io.Reader) uint8 {
var u8 uint8
binary.Read(r, binary.LittleEndian, &u8)
return u8
}
func (ggufModel) readI8(r io.Reader) int8 {
var i8 int8
binary.Read(r, binary.LittleEndian, &i8)
return i8
}
func (ggufModel) readU16(r io.Reader) uint16 {
var u16 uint16
binary.Read(r, binary.LittleEndian, &u16)
return u16
}
func (ggufModel) readI16(r io.Reader) int16 {
var i16 int16
binary.Read(r, binary.LittleEndian, &i16)
return i16
}
func (ggufModel) readU32(r io.Reader) uint32 {
var u32 uint32
binary.Read(r, binary.LittleEndian, &u32)
return u32
}
func (ggufModel) readI32(r io.Reader) int32 {
var i32 int32
binary.Read(r, binary.LittleEndian, &i32)
return i32
}
func (ggufModel) readU64(r io.Reader) uint64 {
var u64 uint64
binary.Read(r, binary.LittleEndian, &u64)
return u64
}
func (ggufModel) readI64(r io.Reader) int64 {
var i64 int64
binary.Read(r, binary.LittleEndian, &i64)
return i64
}
func (ggufModel) readF32(r io.Reader) float32 {
var f32 float32
binary.Read(r, binary.LittleEndian, &f32)
return f32
}
func (ggufModel) readF64(r io.Reader) float64 {
var f64 float64
binary.Read(r, binary.LittleEndian, &f64)
return f64
}
func (ggufModel) readBool(r io.Reader) bool {
var b bool
binary.Read(r, binary.LittleEndian, &b)
return b
}
func (ggufModel) readStringV1(r io.Reader) (string, error) {
var nameLength uint32
binary.Read(r, binary.LittleEndian, &nameLength)
var b bytes.Buffer
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
return "", err
}
// gguf v1 strings are null-terminated
b.Truncate(b.Len() - 1)
return b.String(), nil
}
func (llm ggufModel) readString(r io.Reader) (string, error) {
var nameLength uint64
binary.Read(r, binary.LittleEndian, &nameLength)
var b bytes.Buffer
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
return "", err
}
return b.String(), nil
}
func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
atype := llm.readU32(r)
n := llm.readU32(r)
for i := 0; uint32(i) < n; i++ {
switch atype {
case ggufTypeUint8:
arr = append(arr, llm.readU8(r))
case ggufTypeInt8:
arr = append(arr, llm.readU8(r))
case ggufTypeUint16:
arr = append(arr, llm.readU16(r))
case ggufTypeInt16:
arr = append(arr, llm.readI16(r))
case ggufTypeUint32:
arr = append(arr, llm.readU32(r))
case ggufTypeInt32:
arr = append(arr, llm.readI32(r))
case ggufTypeFloat32:
arr = append(arr, llm.readF32(r))
case ggufTypeBool:
arr = append(arr, llm.readBool(r))
case ggufTypeString:
s, err := llm.readStringV1(r)
if err != nil {
return nil, err
}
arr = append(arr, s)
default:
return nil, fmt.Errorf("invalid array type: %d", atype)
}
}
return
}
func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
atype := llm.readU32(r)
n := llm.readU64(r)
for i := 0; uint64(i) < n; i++ {
switch atype {
case ggufTypeUint8:
arr = append(arr, llm.readU8(r))
case ggufTypeInt8:
arr = append(arr, llm.readU8(r))
case ggufTypeUint16:
arr = append(arr, llm.readU16(r))
case ggufTypeInt16:
arr = append(arr, llm.readI16(r))
case ggufTypeUint32:
arr = append(arr, llm.readU32(r))
case ggufTypeInt32:
arr = append(arr, llm.readI32(r))
case ggufTypeUint64:
arr = append(arr, llm.readU64(r))
case ggufTypeInt64:
arr = append(arr, llm.readI64(r))
case ggufTypeFloat32:
arr = append(arr, llm.readF32(r))
case ggufTypeFloat64:
arr = append(arr, llm.readF64(r))
case ggufTypeBool:
arr = append(arr, llm.readBool(r))
case ggufTypeString:
s, err := llm.readString(r)
if err != nil {
return nil, err
}
arr = append(arr, s)
default:
return nil, fmt.Errorf("invalid array type: %d", atype)
}
}
return
}
var (
ggufGPU = path.Join("llama.cpp", "gguf", "build", "gpu", "bin")
ggufCPU = path.Join("llama.cpp", "gguf", "build", "cpu", "bin")
)
var (
ggufInit sync.Once
ggufRunnerPath string
)
func ggufRunner() ModelRunner {
ggufInit.Do(func() {
ggufRunnerPath = chooseRunner(ggufGPU, ggufCPU)
})
return ModelRunner{Path: ggufRunnerPath}
}

View File

@@ -4,10 +4,12 @@
package llm package llm
//go:generate git submodule init //go:generate git submodule init
//go:generate git submodule update --force ggml //go:generate git submodule update --force ggml gguf
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch //go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch //go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch //go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch //go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake --fresh -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on //go:generate cmake --fresh -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
//go:generate cmake --build ggml/build/cpu --target server --config Release //go:generate cmake --build ggml/build/cpu --target server --config Release
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
//go:generate cmake --build gguf/build/cpu --target server --config Release

View File

@@ -1,10 +1,12 @@
package llm package llm
//go:generate git submodule init //go:generate git submodule init
//go:generate git submodule update --force ggml //go:generate git submodule update --force ggml gguf
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch //go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch //go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch //go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch //go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake --fresh -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 //go:generate cmake --fresh -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build ggml/build/cpu --target server --config Release //go:generate cmake --build ggml/build/cpu --target server --config Release
//go:generate cmake --fresh -S gguf -B gguf/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build gguf/build/cpu --target server --config Release

View File

@@ -1,10 +1,12 @@
package llm package llm
//go:generate git submodule init //go:generate git submodule init
//go:generate git submodule update --force ggml //go:generate git submodule update --force ggml gguf
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch //go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch //go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch //go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch //go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake --fresh -S ggml -B ggml/build/gpu -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 //go:generate cmake --fresh -S ggml -B ggml/build/gpu -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build ggml/build/gpu --target server --config Release //go:generate cmake --build ggml/build/gpu --target server --config Release
//go:generate cmake -S gguf -B gguf/build/gpu -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build gguf/build/gpu --target server --config Release

1
llm/llama.cpp/gguf Submodule

Submodule llm/llama.cpp/gguf added at 53885d7256

View File

@@ -20,27 +20,14 @@ import (
"runtime" "runtime"
"strconv" "strconv"
"strings" "strings"
"sync"
"time" "time"
"github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/api"
) )
const ModelFamilyLlama ModelFamily = "llama" //go:embed llama.cpp/*/build/*/bin/*
//go:embed llama.cpp/ggml/build/*/bin/*
var llamaCppEmbed embed.FS var llamaCppEmbed embed.FS
var (
ggmlGPU = path.Join("llama.cpp", "ggml", "build", "gpu", "bin")
ggmlCPU = path.Join("llama.cpp", "ggml", "build", "cpu", "bin")
)
var (
ggmlInit sync.Once
ggmlRunnerPath string
)
func osPath(llamaPath string) string { func osPath(llamaPath string) string {
if runtime.GOOS == "windows" { if runtime.GOOS == "windows" {
return path.Join(llamaPath, "Release") return path.Join(llamaPath, "Release")
@@ -49,67 +36,60 @@ func osPath(llamaPath string) string {
return llamaPath return llamaPath
} }
func initGGML() { func chooseRunner(gpuPath, cpuPath string) string {
ggmlInit.Do(func() { tmpDir, err := os.MkdirTemp("", "llama-*")
tmpDir, err := os.MkdirTemp("", "llama-*") if err != nil {
if err != nil { log.Fatalf("llama.cpp: failed to create temp dir: %v", err)
log.Fatalf("llama.cpp: failed to create temp dir: %v", err) }
}
llamaPath := osPath(ggmlGPU) llamaPath := osPath(gpuPath)
if _, err := fs.Stat(llamaCppEmbed, llamaPath); err != nil {
llamaPath = osPath(cpuPath)
if _, err := fs.Stat(llamaCppEmbed, llamaPath); err != nil { if _, err := fs.Stat(llamaCppEmbed, llamaPath); err != nil {
llamaPath = osPath(ggmlCPU) log.Fatalf("llama.cpp executable not found")
if _, err := fs.Stat(llamaCppEmbed, llamaPath); err != nil {
log.Fatalf("llama.cpp executable not found")
}
} }
}
files := []string{"server"} files := []string{"server"}
switch runtime.GOOS { switch runtime.GOOS {
case "windows": case "windows":
files = []string{"server.exe"} files = []string{"server.exe"}
case "darwin": case "darwin":
if llamaPath == osPath(ggmlGPU) { if llamaPath == osPath(gpuPath) {
files = append(files, "ggml-metal.metal") files = append(files, "ggml-metal.metal")
}
} }
}
for _, f := range files { for _, f := range files {
srcPath := path.Join(llamaPath, f) srcPath := path.Join(llamaPath, f)
destPath := filepath.Join(tmpDir, f) destPath := filepath.Join(tmpDir, f)
srcFile, err := llamaCppEmbed.Open(srcPath) srcFile, err := llamaCppEmbed.Open(srcPath)
if err != nil { if err != nil {
log.Fatalf("read llama.cpp %s: %v", f, err) log.Fatalf("read llama.cpp %s: %v", f, err)
}
defer srcFile.Close()
destFile, err := os.OpenFile(destPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
if err != nil {
log.Fatalf("write llama.cpp %s: %v", f, err)
}
defer destFile.Close()
if _, err := io.Copy(destFile, srcFile); err != nil {
log.Fatalf("copy llama.cpp %s: %v", f, err)
}
} }
defer srcFile.Close()
ggmlRunnerPath = filepath.Join(tmpDir, "server") destFile, err := os.OpenFile(destPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
if runtime.GOOS == "windows" { if err != nil {
ggmlRunnerPath = filepath.Join(tmpDir, "server.exe") log.Fatalf("write llama.cpp %s: %v", f, err)
} }
}) defer destFile.Close()
if _, err := io.Copy(destFile, srcFile); err != nil {
log.Fatalf("copy llama.cpp %s: %v", f, err)
}
}
runPath := filepath.Join(tmpDir, "server")
if runtime.GOOS == "windows" {
runPath = filepath.Join(tmpDir, "server.exe")
}
return runPath
} }
type ModelRunner struct { const ModelFamilyLlama ModelFamily = "llama"
Path string // path to the model runner executable
}
func ggmlRunner() ModelRunner {
initGGML()
return ModelRunner{Path: ggmlRunnerPath}
}
type llamaModel struct { type llamaModel struct {
hyperparameters llamaHyperparameters hyperparameters llamaHyperparameters
@@ -229,6 +209,10 @@ type Running struct {
Cancel context.CancelFunc Cancel context.CancelFunc
} }
type ModelRunner struct {
Path string // path to the model runner executable
}
type llama struct { type llama struct {
api.Options api.Options
Running Running
@@ -250,7 +234,6 @@ func newLlama(model string, adapters []string, runner ModelRunner, opts api.Opti
params := []string{ params := []string{
"--model", model, "--model", model,
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx), "--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
"--gqa", fmt.Sprintf("%d", opts.NumGQA),
"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase), "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale), "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
"--batch-size", fmt.Sprintf("%d", opts.NumBatch), "--batch-size", fmt.Sprintf("%d", opts.NumBatch),
@@ -258,6 +241,10 @@ func newLlama(model string, adapters []string, runner ModelRunner, opts api.Opti
"--embedding", "--embedding",
} }
if opts.NumGQA > 0 {
params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
}
if len(adapters) > 0 { if len(adapters) > 0 {
// TODO: applying multiple adapters is not supported by the llama.cpp server yet // TODO: applying multiple adapters is not supported by the llama.cpp server yet
params = append(params, "--lora", adapters[0]) params = append(params, "--lora", adapters[0])
@@ -289,17 +276,25 @@ func newLlama(model string, adapters []string, runner ModelRunner, opts api.Opti
runner.Path, runner.Path,
append(params, "--port", strconv.Itoa(port))..., append(params, "--port", strconv.Itoa(port))...,
) )
cmd.Stdout = os.Stderr cmd.Stdout = os.Stderr
cmd.Stderr = os.Stderr cmd.Stderr = os.Stderr
llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel}} llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel}}
log.Print("starting llama.cpp server")
if err := llm.Cmd.Start(); err != nil {
log.Printf("error starting the external llama.cpp server: %v", err)
continue
}
if err := waitForServer(llm); err != nil { if err := waitForServer(llm); err != nil {
log.Printf("error starting llama.cpp server: %v", err) log.Printf("error starting llama.cpp server: %v", err)
llm.Close() llm.Close()
// try again // try again
continue continue
} }
// server started successfully // server started successfully
return llm, nil return llm, nil
} }
@@ -308,48 +303,31 @@ func newLlama(model string, adapters []string, runner ModelRunner, opts api.Opti
} }
func waitForServer(llm *llama) error { func waitForServer(llm *llama) error {
log.Print("starting llama.cpp server")
var stderr bytes.Buffer
llm.Cmd.Stderr = &stderr
err := llm.Cmd.Start()
if err != nil {
return fmt.Errorf("error starting the external llama.cpp server: %w", err)
}
exitChan := make(chan error, 1)
// the server is a long running process, watch for it exiting to keep track of something going wrong
go func() {
err := llm.Cmd.Wait()
log.Print(stderr.String())
exitChan <- err
}()
// wait for the server to start responding // wait for the server to start responding
start := time.Now() start := time.Now()
expiresAt := time.Now().Add(30 * time.Second) expiresAt := time.Now().Add(30 * time.Second)
ticker := time.NewTicker(100 * time.Millisecond) ticker := time.NewTicker(200 * time.Millisecond)
log.Print("waiting for llama.cpp server to start responding") log.Print("waiting for llama.cpp server to start responding")
for range ticker.C {
if time.Now().After(expiresAt) {
return fmt.Errorf("llama.cpp server did not start within alloted time, retrying")
}
for { if err := llm.Ping(context.Background()); err == nil {
select { break
case <-ticker.C:
if time.Now().After(expiresAt) {
return fmt.Errorf("llama.cpp server did not start responding within 30 seconds, retrying")
}
if err := llm.Ping(context.Background()); err == nil {
log.Printf("llama.cpp server started in %f seconds", time.Since(start).Seconds())
return nil
}
case err := <-exitChan:
return fmt.Errorf("llama.cpp server exited unexpectedly: %w", err)
} }
} }
log.Printf("llama.cpp server started in %f seconds", time.Since(start).Seconds())
return nil
} }
func (llm *llama) Close() { func (llm *llama) Close() {
llm.Running.Cmd.Cancel() llm.Cancel()
if err := llm.Cmd.Wait(); err != nil {
log.Printf("llama.cpp server exited with error: %v", err)
}
} }
func (llm *llama) SetOptions(opts api.Options) { func (llm *llama) SetOptions(opts api.Options) {
@@ -676,7 +654,7 @@ func (llm *llama) Embedding(ctx context.Context, input string) ([]float64, error
// Ping checks that the server subprocess is still running and responding to requests // Ping checks that the server subprocess is still running and responding to requests
func (llm *llama) Ping(ctx context.Context) error { func (llm *llama) Ping(ctx context.Context) error {
resp, err := http.Head(fmt.Sprintf("http://127.0.0.1:%d", llm.Running.Port)) resp, err := http.Head(fmt.Sprintf("http://127.0.0.1:%d", llm.Port))
if err != nil { if err != nil {
return fmt.Errorf("ping resp: %w", err) return fmt.Errorf("ping resp: %w", err)
} }

View File

@@ -32,15 +32,22 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
} }
defer f.Close() defer f.Close()
ggml, err := DecodeGGML(f, ModelFamilyLlama) ggml, err := DecodeGGML(f)
if err != nil { if err != nil {
return nil, err return nil, err
} }
switch ggml.FileType().String() { switch ggml.FileType().String() {
case "F32", "Q5_0", "Q5_1", "Q8_0": case "Q8_0":
if ggml.Name() != "gguf" && opts.NumGPU != 0 {
// GGML Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0
}
case "F32", "Q5_0", "Q5_1":
if opts.NumGPU != 0 { if opts.NumGPU != 0 {
// F32, F16, Q5_0, Q5_1, and Q8_0 do not support Metal API and will // F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU // cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0") log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0 opts.NumGPU = 0
@@ -75,8 +82,11 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
} }
} }
switch ggml.ModelFamily() { switch ggml.Name() {
case ModelFamilyLlama: case "gguf":
opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
return newLlama(model, adapters, ggufRunner(), opts)
case "ggml", "ggmf", "ggjt", "ggla":
return newLlama(model, adapters, ggmlRunner(), opts) return newLlama(model, adapters, ggmlRunner(), opts)
default: default:
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily()) return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())

View File

@@ -8,7 +8,7 @@ GO_LDFLAGS="$GO_LDFLAGS -X github.com/jmorganca/ollama/server.mode=release"
# build universal binary # build universal binary
GOARCH=arm64 go generate ./... GOARCH=arm64 go generate ./...
GOARCH=arm64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-arm64 GOARCH=arm64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-arm64
rm -rf llm/llama.cpp/ggml/build/*/bin rm -rf llm/llama.cpp/*/build/*/bin
GOARCH=amd64 go generate ./... GOARCH=amd64 go generate ./...
GOARCH=amd64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-amd64 GOARCH=amd64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-amd64
lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64 lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64

View File

@@ -103,7 +103,7 @@ func getAuthToken(ctx context.Context, redirData AuthRedirect, regOpts *Registry
headers := make(http.Header) headers := make(http.Header)
headers.Set("Authorization", sig) headers.Set("Authorization", sig)
resp, err := makeRequest(ctx, "GET", redirectURL, headers, nil, regOpts) resp, err := makeRequest(ctx, "GET", redirectURL, headers, nil, nil)
if err != nil { if err != nil {
log.Printf("couldn't get token: %q", err) log.Printf("couldn't get token: %q", err)
} }

View File

@@ -269,6 +269,29 @@ func filenameWithPath(path, f string) (string, error) {
} }
func CreateModel(ctx context.Context, name string, path string, fn func(resp api.ProgressResponse)) error { func CreateModel(ctx context.Context, name string, path string, fn func(resp api.ProgressResponse)) error {
mp := ParseModelPath(name)
var manifest *ManifestV2
var err error
var noprune string
// build deleteMap to prune unused layers
deleteMap := make(map[string]bool)
if noprune = os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
manifest, _, err = GetManifest(mp)
if err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
if manifest != nil {
for _, l := range manifest.Layers {
deleteMap[l.Digest] = true
}
deleteMap[manifest.Config.Digest] = true
}
}
mf, err := os.Open(path) mf, err := os.Open(path)
if err != nil { if err != nil {
fn(api.ProgressResponse{Status: fmt.Sprintf("couldn't open modelfile '%s'", path)}) fn(api.ProgressResponse{Status: fmt.Sprintf("couldn't open modelfile '%s'", path)})
@@ -329,7 +352,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
} }
defer file.Close() defer file.Close()
ggml, err := llm.DecodeGGML(file, llm.ModelFamilyLlama) ggml, err := llm.DecodeGGML(file)
if err != nil { if err != nil {
return err return err
} }
@@ -368,7 +391,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
return err return err
} }
// copy the model metadata // copie the model metadata
config.ModelFamily = source.ModelFamily config.ModelFamily = source.ModelFamily
config.ModelType = source.ModelType config.ModelType = source.ModelType
config.ModelFormat = source.ModelFormat config.ModelFormat = source.ModelFormat
@@ -506,6 +529,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
var manifestLayers []*Layer var manifestLayers []*Layer
for _, l := range layers { for _, l := range layers {
manifestLayers = append(manifestLayers, &l.Layer) manifestLayers = append(manifestLayers, &l.Layer)
delete(deleteMap, l.Layer.Digest)
} }
// Create a layer for the config object // Create a layer for the config object
@@ -515,6 +539,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
return err return err
} }
layers = append(layers, cfg) layers = append(layers, cfg)
delete(deleteMap, cfg.Layer.Digest)
if err := SaveLayers(layers, fn, false); err != nil { if err := SaveLayers(layers, fn, false); err != nil {
return err return err
@@ -527,6 +552,14 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
return err return err
} }
if noprune == "" {
fn(api.ProgressResponse{Status: "removing any unused layers"})
err = deleteUnusedLayers(nil, deleteMap, false)
if err != nil {
return err
}
}
fn(api.ProgressResponse{Status: "success"}) fn(api.ProgressResponse{Status: "success"})
return nil return nil
} }
@@ -869,18 +902,7 @@ func CopyModel(src, dest string) error {
return nil return nil
} }
func DeleteModel(name string) error { func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]bool, dryRun bool) error {
mp := ParseModelPath(name)
manifest, _, err := GetManifest(mp)
if err != nil {
return err
}
deleteMap := make(map[string]bool)
for _, layer := range manifest.Layers {
deleteMap[layer.Digest] = true
}
deleteMap[manifest.Config.Digest] = true
fp, err := GetManifestPath() fp, err := GetManifestPath()
if err != nil { if err != nil {
return err return err
@@ -897,14 +919,13 @@ func DeleteModel(name string) error {
fmp := ParseModelPath(tag) fmp := ParseModelPath(tag)
// skip the manifest we're trying to delete // skip the manifest we're trying to delete
if mp.GetFullTagname() == fmp.GetFullTagname() { if skipModelPath != nil && skipModelPath.GetFullTagname() == fmp.GetFullTagname() {
return nil return nil
} }
// save (i.e. delete from the deleteMap) any files used in other manifests // save (i.e. delete from the deleteMap) any files used in other manifests
manifest, _, err := GetManifest(fmp) manifest, _, err := GetManifest(fmp)
if err != nil { if err != nil {
log.Printf("skipping file: %s", fp)
return nil return nil
} }
@@ -928,14 +949,72 @@ func DeleteModel(name string) error {
log.Printf("couldn't get file path for '%s': %v", k, err) log.Printf("couldn't get file path for '%s': %v", k, err)
continue continue
} }
if err := os.Remove(fp); err != nil { if !dryRun {
log.Printf("couldn't remove file '%s': %v", fp, err) if err := os.Remove(fp); err != nil {
continue log.Printf("couldn't remove file '%s': %v", fp, err)
continue
}
} else {
log.Printf("wanted to remove: %s", fp)
} }
} }
} }
fp, err = mp.GetManifestPath(false) return nil
}
func PruneLayers() error {
deleteMap := make(map[string]bool)
p, err := GetBlobsPath("")
if err != nil {
return err
}
blobs, err := os.ReadDir(p)
if err != nil {
log.Printf("couldn't read dir '%s': %v", p, err)
return err
}
for _, blob := range blobs {
name := blob.Name()
if runtime.GOOS == "windows" {
name = strings.ReplaceAll(name, "-", ":")
}
deleteMap[name] = true
}
log.Printf("total blobs: %d", len(deleteMap))
err = deleteUnusedLayers(nil, deleteMap, false)
if err != nil {
return err
}
log.Printf("total unused blobs removed: %d", len(deleteMap))
return nil
}
func DeleteModel(name string) error {
mp := ParseModelPath(name)
manifest, _, err := GetManifest(mp)
if err != nil {
return err
}
deleteMap := make(map[string]bool)
for _, layer := range manifest.Layers {
deleteMap[layer.Digest] = true
}
deleteMap[manifest.Config.Digest] = true
err = deleteUnusedLayers(&mp, deleteMap, false)
if err != nil {
return err
}
fp, err := mp.GetManifestPath(false)
if err != nil { if err != nil {
return err return err
} }
@@ -1114,13 +1193,34 @@ func PushModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn func(api.ProgressResponse)) error { func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn func(api.ProgressResponse)) error {
mp := ParseModelPath(name) mp := ParseModelPath(name)
var manifest *ManifestV2
var err error
var noprune string
// build deleteMap to prune unused layers
deleteMap := make(map[string]bool)
if noprune = os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
manifest, _, err = GetManifest(mp)
if err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
if manifest != nil {
for _, l := range manifest.Layers {
deleteMap[l.Digest] = true
}
deleteMap[manifest.Config.Digest] = true
}
}
if mp.ProtocolScheme == "http" && !regOpts.Insecure { if mp.ProtocolScheme == "http" && !regOpts.Insecure {
return fmt.Errorf("insecure protocol http") return fmt.Errorf("insecure protocol http")
} }
fn(api.ProgressResponse{Status: "pulling manifest"}) fn(api.ProgressResponse{Status: "pulling manifest"})
manifest, err := pullModelManifest(ctx, mp, regOpts) manifest, err = pullModelManifest(ctx, mp, regOpts)
if err != nil { if err != nil {
return fmt.Errorf("pull model manifest: %s", err) return fmt.Errorf("pull model manifest: %s", err)
} }
@@ -1140,7 +1240,9 @@ func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
}); err != nil { }); err != nil {
return err return err
} }
delete(deleteMap, layer.Digest)
} }
delete(deleteMap, manifest.Config.Digest)
fn(api.ProgressResponse{Status: "verifying sha256 digest"}) fn(api.ProgressResponse{Status: "verifying sha256 digest"})
for _, layer := range layers { for _, layer := range layers {
@@ -1178,6 +1280,14 @@ func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
return err return err
} }
if noprune == "" {
fn(api.ProgressResponse{Status: "removing any unused layers"})
err = deleteUnusedLayers(nil, deleteMap, false)
if err != nil {
return err
}
}
fn(api.ProgressResponse{Status: "success"}) fn(api.ProgressResponse{Status: "success"})
return nil return nil
@@ -1303,7 +1413,7 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
} }
func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) { func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) {
if requestURL.Scheme != "http" && regOpts.Insecure { if requestURL.Scheme != "http" && regOpts != nil && regOpts.Insecure {
requestURL.Scheme = "http" requestURL.Scheme = "http"
} }
@@ -1316,10 +1426,12 @@ func makeRequest(ctx context.Context, method string, requestURL *url.URL, header
req.Header = headers req.Header = headers
} }
if regOpts.Token != "" { if regOpts != nil {
req.Header.Set("Authorization", "Bearer "+regOpts.Token) if regOpts.Token != "" {
} else if regOpts.Username != "" && regOpts.Password != "" { req.Header.Set("Authorization", "Bearer "+regOpts.Token)
req.SetBasicAuth(regOpts.Username, regOpts.Password) } else if regOpts.Username != "" && regOpts.Password != "" {
req.SetBasicAuth(regOpts.Username, regOpts.Password)
}
} }
req.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version())) req.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))

View File

@@ -133,7 +133,12 @@ func GetBlobsPath(digest string) (string, error) {
} }
path := filepath.Join(home, ".ollama", "models", "blobs", digest) path := filepath.Join(home, ".ollama", "models", "blobs", digest)
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { dirPath := filepath.Dir(path)
if digest == "" {
dirPath = path
}
if err := os.MkdirAll(dirPath, 0o755); err != nil {
return "", err return "", err
} }

View File

@@ -363,6 +363,7 @@ func DeleteModelHandler(c *gin.Context) {
} }
return return
} }
c.JSON(http.StatusOK, nil)
} }
func ShowModelHandler(c *gin.Context) { func ShowModelHandler(c *gin.Context) {

View File

@@ -66,31 +66,39 @@ func uploadBlobChunked(ctx context.Context, requestURL *url.URL, layer *Layer, r
sectionReader := io.NewSectionReader(f, int64(offset), chunk) sectionReader := io.NewSectionReader(f, int64(offset), chunk)
for try := 0; try < MaxRetries; try++ { for try := 0; try < MaxRetries; try++ {
ch := make(chan error, 1)
r, w := io.Pipe() r, w := io.Pipe()
defer r.Close() defer r.Close()
go func() { go func() {
defer w.Close() defer w.Close()
for chunked := int64(0); chunked < chunk; { for chunked := int64(0); chunked < chunk; {
n, err := io.CopyN(w, sectionReader, 1024*1024) select {
if err != nil && !errors.Is(err, io.EOF) { case err := <-ch:
log.Printf("chunk interrupted: %v", err)
return
default:
n, err := io.CopyN(w, sectionReader, 1024*1024)
if err != nil && !errors.Is(err, io.EOF) {
fn(api.ProgressResponse{
Status: fmt.Sprintf("error reading chunk: %v", err),
Digest: layer.Digest,
Total: layer.Size,
Completed: int(offset),
})
return
}
chunked += n
fn(api.ProgressResponse{ fn(api.ProgressResponse{
Status: fmt.Sprintf("error reading chunk: %v", err), Status: fmt.Sprintf("uploading %s", layer.Digest),
Digest: layer.Digest, Digest: layer.Digest,
Total: layer.Size, Total: layer.Size,
Completed: int(offset), Completed: int(offset) + int(chunked),
}) })
return
} }
chunked += n
fn(api.ProgressResponse{
Status: fmt.Sprintf("uploading %s", layer.Digest),
Digest: layer.Digest,
Total: layer.Size,
Completed: int(offset) + int(chunked),
})
} }
}() }()
@@ -113,6 +121,8 @@ func uploadBlobChunked(ctx context.Context, requestURL *url.URL, layer *Layer, r
switch { switch {
case resp.StatusCode == http.StatusUnauthorized: case resp.StatusCode == http.StatusUnauthorized:
ch <- errors.New("unauthorized")
auth := resp.Header.Get("www-authenticate") auth := resp.Header.Get("www-authenticate")
authRedir := ParseAuthRedirectString(auth) authRedir := ParseAuthRedirectString(auth)
token, err := getAuthToken(ctx, authRedir, regOpts) token, err := getAuthToken(ctx, authRedir, regOpts)
@@ -121,10 +131,7 @@ func uploadBlobChunked(ctx context.Context, requestURL *url.URL, layer *Layer, r
} }
regOpts.Token = token regOpts.Token = token
if _, err := sectionReader.Seek(0, io.SeekStart); err != nil { sectionReader = io.NewSectionReader(f, int64(offset), chunk)
return err
}
continue continue
case resp.StatusCode >= http.StatusBadRequest: case resp.StatusCode >= http.StatusBadRequest:
body, _ := io.ReadAll(resp.Body) body, _ := io.ReadAll(resp.Body)