Compare commits

..

2 Commits

Author SHA1 Message Date
Blake Mizerany
cfd4152eb6 ... 2024-04-17 17:04:13 -07:00
Blake Mizerany
0fbb379373 types/model: add FilepathNoBuild
Also, add test for DisplayLongest.

Also, plumb fill param to ParseName in MustParseName
2024-04-16 12:37:38 -07:00
60 changed files with 290 additions and 262 deletions

View File

@@ -1,60 +0,0 @@
name: Bug report
labels: [bug]
description: Something isn't working right.
body:
- type: textarea
id: description
attributes:
label: What is the issue?
description: What happened? What did you expect to happen?
validations:
required: true
- type: dropdown
id: os
attributes:
label: OS
description: Which operating system are you using?
multiple: true
options:
- Linux
- macOS
- Windows
- Docker
- WSL2
validations:
required: false
- type: dropdown
id: gpu
attributes:
label: GPU
description: Which GPU are you using?
multiple: true
options:
- Nvidia
- AMD
- Intel
- Apple
- Other
validations:
required: false
- type: dropdown
id: cpu
attributes:
label: CPU
description: Which CPU are you using?
multiple: true
options:
- Intel
- AMD
- Apple
- Other
validations:
required: false
- type: input
id: version
attributes:
label: Ollama version
description: What version of Ollama are you using? (`ollama --version`)
placeholder: e.g., 0.1.32
validations:
required: false

View File

@@ -0,0 +1,18 @@
name: Model request
description: Request a new model for the library
labels: [mr]
body:
- type: markdown
attributes:
value: |
Please check if your Model request is [already available](https://ollama.com/search) or that you cannot [import it](https://github.com/ollama/ollama/blob/main/docs/import.md#import-a-model) yourself.
Tell us about which Model you'd like to see in the library!
- type: textarea
id: problem
attributes:
label: What model would you like?
description: Please provide a link to the model.
- type: markdown
attributes:
value: |
Thanks for filing a model request!

View File

@@ -1,6 +0,0 @@
---
name: Feature request
about: Request a new feature
labels: feature request
---

View File

@@ -0,0 +1,41 @@
name: Feature request
description: Propose a new feature
labels: [needs-triage, fr]
body:
- type: markdown
attributes:
value: |
Please check if your feature request is [already filed](https://github.com/ollama/ollama/issues).
Tell us about your idea!
- type: textarea
id: problem
attributes:
label: What are you trying to do?
description: Tell us about the problem you're trying to solve.
validations:
required: false
- type: textarea
id: solution
attributes:
label: How should we solve this?
description: If you have an idea of how you'd like to see this feature work, let us know.
validations:
required: false
- type: textarea
id: alternative
attributes:
label: What is the impact of not solving this?
description: (How) Are you currently working around the issue?
validations:
required: false
- type: textarea
id: context
attributes:
label: Anything else?
description: Any additional context to share, e.g., links
validations:
required: false
- type: markdown
attributes:
value: |
Thanks for filing a feature request!

View File

@@ -1,5 +0,0 @@
---
name: Model request
about: Request support for a new model to be added to Ollama
labels: model request
---

125
.github/ISSUE_TEMPLATE/90_bug_report.yml vendored Normal file
View File

@@ -0,0 +1,125 @@
name: Bug report
description: File a bug report. If you need help, please join our Discord server.
labels: [needs-triage, bug]
body:
- type: markdown
attributes:
value: |
Please check if your bug is [already filed](https://github.com/ollama/ollama/issues) before filing a new one.
- type: textarea
id: what-happened
attributes:
label: What is the issue?
description: What happened? What did you expect to happen?
validations:
required: true
- type: textarea
id: what-was-expected
attributes:
label: What did you expect to see?
description: What did you expect to see/happen instead?
validations:
required: false
- type: textarea
id: steps
attributes:
label: Steps to reproduce
description: What are the steps you took that hit this issue?
validations:
required: false
- type: textarea
id: changes
attributes:
label: Are there any recent changes that introduced the issue?
description: If so, what are those changes?
validations:
required: false
- type: dropdown
id: os
attributes:
label: OS
description: What OS are you using? You may select more than one.
multiple: true
options:
- Linux
- macOS
- Windows
- Other
validations:
required: false
- type: dropdown
id: architecture
attributes:
label: Architecture
description: What architecture are you using? You may select more than one.
multiple: true
options:
- arm64
- amd64
- x86
- Other
- type: dropdown
id: platform
attributes:
label: Platform
description: What platform are you using? You may select more than one.
multiple: true
options:
- Docker
- WSL
- WSL2
validations:
required: false
- type: input
id: ollama-version
attributes:
label: Ollama version
description: What Ollama version are you using? (`ollama --version`)
placeholder: e.g., 1.14.4
validations:
required: false
- type: dropdown
id: gpu
attributes:
label: GPU
description: What GPU, if any, are you using? You may select more than one.
multiple: true
options:
- Nvidia
- AMD
- Intel
- Apple
- Other
validations:
required: false
- type: textarea
id: gpu-info
attributes:
label: GPU info
description: What GPU info do you have? (`nvidia-smi`, `rocminfo`, `system_profiler SPDisplaysDataType`, etc.)
validations:
required: false
- type: dropdown
id: cpu
attributes:
label: CPU
description: What CPU are you using? You may select more than one.
multiple: true
options:
- Intel
- AMD
- Apple
- Other
validations:
required: false
- type: textarea
id: other-software
attributes:
label: Other software
description: What other software are you using that might be related to this issue?
validations:
required: false
- type: markdown
attributes:
value: |
Thanks for filing a bug report!

View File

@@ -60,6 +60,7 @@ Here are some example models that can be downloaded:
| Llama 2 13B | 13B | 7.3GB | `ollama run llama2:13b` | | Llama 2 13B | 13B | 7.3GB | `ollama run llama2:13b` |
| Llama 2 70B | 70B | 39GB | `ollama run llama2:70b` | | Llama 2 70B | 70B | 39GB | `ollama run llama2:70b` |
| Orca Mini | 3B | 1.9GB | `ollama run orca-mini` | | Orca Mini | 3B | 1.9GB | `ollama run orca-mini` |
| Vicuna | 7B | 3.8GB | `ollama run vicuna` |
| LLaVA | 7B | 4.5GB | `ollama run llava` | | LLaVA | 7B | 4.5GB | `ollama run llava` |
| Gemma | 2B | 1.4GB | `ollama run gemma:2b` | | Gemma | 2B | 1.4GB | `ollama run gemma:2b` |
| Gemma | 7B | 4.8GB | `ollama run gemma:7b` | | Gemma | 7B | 4.8GB | `ollama run gemma:7b` |
@@ -377,6 +378,3 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension) - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend) - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support) - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
### Supported backends
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.

View File

@@ -20,8 +20,8 @@ import (
"runtime" "runtime"
"strings" "strings"
"ollama.com/format" "github.com/ollama/ollama/format"
"ollama.com/version" "github.com/ollama/ollama/version"
) )
// Client encapsulates client state for interacting with the ollama // Client encapsulates client state for interacting with the ollama

View File

@@ -9,8 +9,8 @@ import (
"os/signal" "os/signal"
"syscall" "syscall"
"ollama.com/app/store" "github.com/ollama/ollama/app/store"
"ollama.com/app/tray" "github.com/ollama/ollama/app/tray"
) )
func Run() { func Run() {

View File

@@ -11,7 +11,7 @@ import (
"path/filepath" "path/filepath"
"time" "time"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
func getCLIFullPath(command string) string { func getCLIFullPath(command string) string {

View File

@@ -18,8 +18,8 @@ import (
"strings" "strings"
"time" "time"
"ollama.com/auth" "github.com/ollama/ollama/auth"
"ollama.com/version" "github.com/ollama/ollama/version"
) )
var ( var (

View File

@@ -4,7 +4,7 @@ package main
// go build -ldflags="-H windowsgui" . // go build -ldflags="-H windowsgui" .
import ( import (
"ollama.com/app/lifecycle" "github.com/ollama/ollama/app/lifecycle"
) )
func main() { func main() {

View File

@@ -4,8 +4,8 @@ import (
"fmt" "fmt"
"runtime" "runtime"
"ollama.com/app/assets" "github.com/ollama/ollama/app/assets"
"ollama.com/app/tray/commontray" "github.com/ollama/ollama/app/tray/commontray"
) )
func NewTray() (commontray.OllamaTray, error) { func NewTray() (commontray.OllamaTray, error) {

View File

@@ -5,7 +5,7 @@ package tray
import ( import (
"fmt" "fmt"
"ollama.com/app/tray/commontray" "github.com/ollama/ollama/app/tray/commontray"
) )
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) { func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {

View File

@@ -1,8 +1,8 @@
package tray package tray
import ( import (
"ollama.com/app/tray/commontray" "github.com/ollama/ollama/app/tray/commontray"
"ollama.com/app/tray/wintray" "github.com/ollama/ollama/app/tray/wintray"
) )
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) { func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {

View File

@@ -13,8 +13,8 @@ import (
"sync" "sync"
"unsafe" "unsafe"
"github.com/ollama/ollama/app/tray/commontray"
"golang.org/x/sys/windows" "golang.org/x/sys/windows"
"ollama.com/app/tray/commontray"
) )
// Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32 // Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32

View File

@@ -30,12 +30,12 @@ import (
"golang.org/x/exp/slices" "golang.org/x/exp/slices"
"golang.org/x/term" "golang.org/x/term"
"ollama.com/api" "github.com/ollama/ollama/api"
"ollama.com/format" "github.com/ollama/ollama/format"
"ollama.com/parser" "github.com/ollama/ollama/parser"
"ollama.com/progress" "github.com/ollama/ollama/progress"
"ollama.com/server" "github.com/ollama/ollama/server"
"ollama.com/version" "github.com/ollama/ollama/version"
) )
func CreateHandler(cmd *cobra.Command, args []string) error { func CreateHandler(cmd *cobra.Command, args []string) error {

View File

@@ -14,9 +14,9 @@ import (
"github.com/spf13/cobra" "github.com/spf13/cobra"
"golang.org/x/exp/slices" "golang.org/x/exp/slices"
"ollama.com/api" "github.com/ollama/ollama/api"
"ollama.com/progress" "github.com/ollama/ollama/progress"
"ollama.com/readline" "github.com/ollama/ollama/readline"
) )
type MultilineState int type MultilineState int

View File

@@ -7,7 +7,7 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
func TestExtractFilenames(t *testing.T) { func TestExtractFilenames(t *testing.T) {

View File

@@ -7,7 +7,7 @@ import (
"os/exec" "os/exec"
"strings" "strings"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
func startApp(ctx context.Context, client *api.Client) error { func startApp(ctx context.Context, client *api.Client) error {

View File

@@ -6,7 +6,7 @@ import (
"context" "context"
"fmt" "fmt"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
func startApp(ctx context.Context, client *api.Client) error { func startApp(ctx context.Context, client *api.Client) error {

View File

@@ -10,7 +10,7 @@ import (
"strings" "strings"
"syscall" "syscall"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
func startApp(ctx context.Context, client *api.Client) error { func startApp(ctx context.Context, client *api.Client) error {

View File

@@ -13,8 +13,8 @@ import (
"google.golang.org/protobuf/proto" "google.golang.org/protobuf/proto"
"ollama.com/convert/sentencepiece" "github.com/ollama/ollama/convert/sentencepiece"
"ollama.com/llm" "github.com/ollama/ollama/llm"
) )
type Params struct { type Params struct {

View File

@@ -12,7 +12,7 @@ import (
"github.com/pdevine/tensor" "github.com/pdevine/tensor"
"github.com/pdevine/tensor/native" "github.com/pdevine/tensor/native"
"ollama.com/llm" "github.com/ollama/ollama/llm"
) )
type GemmaModel struct { type GemmaModel struct {

View File

@@ -14,7 +14,7 @@ import (
"github.com/pdevine/tensor/native" "github.com/pdevine/tensor/native"
"github.com/x448/float16" "github.com/x448/float16"
"ollama.com/llm" "github.com/ollama/ollama/llm"
) )
type LlamaModel struct { type LlamaModel struct {

View File

@@ -13,7 +13,7 @@ import (
"github.com/pdevine/tensor/native" "github.com/pdevine/tensor/native"
"github.com/x448/float16" "github.com/x448/float16"
"ollama.com/llm" "github.com/ollama/ollama/llm"
) )
type MistralModel struct { type MistralModel struct {

View File

@@ -16,7 +16,7 @@ import (
"github.com/mitchellh/mapstructure" "github.com/mitchellh/mapstructure"
"github.com/x448/float16" "github.com/x448/float16"
"ollama.com/llm" "github.com/ollama/ollama/llm"
) )
type safetensorWriterTo struct { type safetensorWriterTo struct {

View File

@@ -15,7 +15,7 @@ import (
"github.com/nlpodyssey/gopickle/types" "github.com/nlpodyssey/gopickle/types"
"github.com/x448/float16" "github.com/x448/float16"
"ollama.com/llm" "github.com/ollama/ollama/llm"
) )
type torchWriterTo struct { type torchWriterTo struct {

View File

@@ -5,7 +5,7 @@ import (
"fmt" "fmt"
"log" "log"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
func main() { func main() {

View File

@@ -5,7 +5,7 @@ import (
"fmt" "fmt"
"log" "log"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
func main() { func main() {

View File

@@ -5,7 +5,7 @@ import (
"fmt" "fmt"
"log" "log"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
func main() { func main() {

View File

@@ -6,7 +6,7 @@ import (
"log" "log"
"os" "os"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
func main() { func main() {

View File

@@ -5,7 +5,7 @@ import (
"fmt" "fmt"
"log" "log"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
func main() { func main() {

2
go.mod
View File

@@ -1,4 +1,4 @@
module ollama.com module github.com/ollama/ollama
go 1.22 go 1.22

View File

@@ -21,7 +21,7 @@ import (
"sync" "sync"
"unsafe" "unsafe"
"ollama.com/format" "github.com/ollama/ollama/format"
) )
type handles struct { type handles struct {

View File

@@ -55,6 +55,6 @@ func getCPUMem() (memInfo, error) {
return memInfo{ return memInfo{
TotalMemory: uint64(C.getPhysicalMemory()), TotalMemory: uint64(C.getPhysicalMemory()),
FreeMemory: 0, FreeMemory: 0,
DeviceCount: 1, DeviceCount: 0,
}, nil }, nil
} }

View File

@@ -8,7 +8,7 @@ import (
"testing" "testing"
"time" "time"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
func TestOrcaMiniBlueSky(t *testing.T) { func TestOrcaMiniBlueSky(t *testing.T) {

View File

@@ -8,7 +8,7 @@ import (
"testing" "testing"
"time" "time"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
func TestContextExhaustion(t *testing.T) { func TestContextExhaustion(t *testing.T) {

View File

@@ -9,8 +9,8 @@ import (
"testing" "testing"
"time" "time"
"github.com/ollama/ollama/api"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"ollama.com/api"
) )
func TestIntegrationMultimodal(t *testing.T) { func TestIntegrationMultimodal(t *testing.T) {

View File

@@ -9,7 +9,7 @@ import (
"testing" "testing"
"time" "time"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
// TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server // TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server

View File

@@ -21,9 +21,9 @@ import (
"testing" "testing"
"time" "time"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/app/lifecycle"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"ollama.com/api"
"ollama.com/app/lifecycle"
) )
func FindPort() string { func FindPort() string {

View File

@@ -39,10 +39,6 @@
#include "httplib.h" #include "httplib.h"
#include "json.hpp" #include "json.hpp"
#if defined(_WIN32)
#include <windows.h>
#endif
#include <cstddef> #include <cstddef>
#include <thread> #include <thread>
#include <chrono> #include <chrono>
@@ -2774,28 +2770,8 @@ inline void signal_handler(int signal) {
shutdown_handler(signal); shutdown_handler(signal);
} }
#if defined(_WIN32) int main(int argc, char **argv)
char* wchar_to_char(const wchar_t* wstr) { {
if (wstr == nullptr) return nullptr;
// Determine the number of bytes needed for the UTF-8 string
int bytes = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, nullptr, 0, nullptr, nullptr);
char* str = new char[bytes];
// Convert the wide-character string to a UTF-8 string
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, bytes, nullptr, nullptr);
return str;
}
int wmain(int argc, wchar_t **wargv) {
char** argv = new char*[argc];
for (int i = 0; i < argc; ++i) {
argv[i] = wchar_to_char(wargv[i]);
}
#else
int main(int argc, char **argv) {
#endif
#if SERVER_VERBOSE != 1 #if SERVER_VERBOSE != 1
log_disable(); log_disable();
#endif #endif
@@ -3306,11 +3282,6 @@ int main(int argc, char **argv) {
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false; return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
}; };
SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true); SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
for (int i = 0; i < argc; ++i) {
delete[] argv[i];
}
delete[] argv;
#endif #endif
llama.queue_tasks.start_loop(); llama.queue_tasks.start_loop();
svr.stop(); svr.stop();

View File

@@ -164,8 +164,7 @@ func (ts Tensors) Layers() map[string]Layer {
for _, t := range ts { for _, t := range ts {
parts := strings.Split(t.Name, ".") parts := strings.Split(t.Name, ".")
if parts[0] == "blk" { if parts[0] == "blk" {
// join first and second part, e.g. blk.%d parts = parts[1:]
parts = append([]string{fmt.Sprintf("%s.%s", parts[0], parts[1])}, parts[2:]...)
} }
if _, ok := layers[parts[0]]; !ok { if _, ok := layers[parts[0]]; !ok {
@@ -381,12 +380,6 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
) )
partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128 partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
case "stablelm":
fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2)
partialOffload = max(
4*batch*(vocab+2*embedding),
fullOffload,
)
} }
return return

View File

@@ -248,17 +248,13 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
} }
padding := llm.padding(offset, int64(alignment)) padding := llm.padding(offset, int64(alignment))
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil { if _, err := rs.Seek(padding-offset, io.SeekCurrent); err != nil {
return err return err
} }
for _, tensor := range llm.tensors { for _, tensor := range llm.tensors {
if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil { padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
return err if _, err := rs.Seek(padded, io.SeekCurrent); err != nil {
}
padding := llm.padding(int64(tensor.size()), int64(alignment))
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
return err return err
} }
} }
@@ -627,9 +623,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
return err return err
} }
var alignment int64 = 32 padding := llm.padding(offset, 32)
padding := llm.padding(offset, alignment) if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
return err return err
} }
@@ -643,8 +638,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
return err return err
} }
padding := llm.padding(offset, alignment) padding := llm.padding(offset, 32)
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil { if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
return err return err
} }
} }
@@ -653,5 +648,5 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
} }
func (gguf) padding(offset, align int64) int64 { func (gguf) padding(offset, align int64) int64 {
return (align - offset%align) % align return (offset + align - 1) / align * align
} }

View File

@@ -14,7 +14,7 @@ import (
"golang.org/x/exp/slices" "golang.org/x/exp/slices"
"golang.org/x/sync/errgroup" "golang.org/x/sync/errgroup"
"ollama.com/gpu" "github.com/ollama/ollama/gpu"
) )
var errPayloadMissing = fmt.Errorf("expected payloads not included in this build of ollama") var errPayloadMissing = fmt.Errorf("expected payloads not included in this build of ollama")

View File

@@ -21,9 +21,9 @@ import (
"strings" "strings"
"time" "time"
"ollama.com/api" "github.com/ollama/ollama/api"
"ollama.com/format" "github.com/ollama/ollama/format"
"ollama.com/gpu" "github.com/ollama/ollama/gpu"
) )
// LlamaServer is an instance of the llama.cpp server // LlamaServer is an instance of the llama.cpp server
@@ -79,9 +79,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
graphFullOffload = graphPartialOffload graphFullOffload = graphPartialOffload
} }
graphFullOffload *= uint64(info.DeviceCount)
graphPartialOffload *= uint64(info.DeviceCount)
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers) // memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
memoryRequiredTotal := memoryMinimum + graphFullOffload memoryRequiredTotal := memoryMinimum + graphFullOffload
@@ -97,7 +94,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
var layerCount int var layerCount int
layers := ggml.Tensors().Layers() layers := ggml.Tensors().Layers()
for i := 0; i < int(ggml.KV().BlockCount()); i++ { for i := 0; i < int(ggml.KV().BlockCount()); i++ {
memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size() memoryLayer := layers[fmt.Sprintf("%d", i)].size()
// KV is proportional to the number of layers // KV is proportional to the number of layers
memoryLayer += kv / ggml.KV().BlockCount() memoryLayer += kv / ggml.KV().BlockCount()
@@ -109,13 +106,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
} }
} }
var memoryLayerOutput uint64 memoryLayerOutput := layers["output"].size()
for k, v := range layers {
if !strings.HasPrefix(k, "blk.") {
memoryLayerOutput += v.size()
}
}
memoryRequiredTotal += memoryLayerOutput memoryRequiredTotal += memoryLayerOutput
if info.Library == "metal" && memoryRequiredTotal > info.TotalMemory { if info.Library == "metal" && memoryRequiredTotal > info.TotalMemory {
@@ -130,47 +121,16 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
opts.NumGPU = layerCount opts.NumGPU = layerCount
} }
memoryWeights := memoryRequiredTotal - memoryMinimum - graphFullOffload - kv
slog.Info( slog.Info(
"offload to gpu", "offload to gpu",
slog.Group( "reallayers", opts.NumGPU,
"layers", "layers", layerCount,
// actual number of layers offloaded "required", format.HumanBytes2(memoryRequiredTotal),
"real", opts.NumGPU, "used", format.HumanBytes2(memoryRequiredPartial),
// estimated number of layers that can be offloaded "available", format.HumanBytes2(memoryAvailable),
"estimate", layerCount, "kv", format.HumanBytes2(kv),
), "fulloffload", format.HumanBytes2(graphFullOffload),
slog.Group( "partialoffload", format.HumanBytes2(graphPartialOffload),
"memory",
// memory available for offloading
"available", format.HumanBytes2(memoryAvailable),
slog.Group(
"required",
// memory required for full offloading
"full", format.HumanBytes2(memoryRequiredTotal),
// memory required to offload layers.estimate layers
"partial", format.HumanBytes2(memoryRequiredPartial),
// memory of KV cache
"kv", format.HumanBytes2(kv),
),
slog.Group(
"weights",
// memory of the weights
"total", format.HumanBytes2(memoryWeights),
// memory of repeating layers
"repeating", format.HumanBytes2(memoryWeights-memoryLayerOutput),
// memory of non-repeating layers
"nonrepeating", format.HumanBytes2(memoryLayerOutput),
),
slog.Group(
"graph",
// memory of graph when fully offloaded
"full", format.HumanBytes2(graphFullOffload),
// memory of graph when not fully offloaded
"partial", format.HumanBytes2(graphPartialOffload),
),
),
) )
if len(adapters) > 1 { if len(adapters) > 1 {

View File

@@ -3,8 +3,8 @@ package main
import ( import (
"context" "context"
"github.com/ollama/ollama/cmd"
"github.com/spf13/cobra" "github.com/spf13/cobra"
"ollama.com/cmd"
) )
func main() { func main() {

View File

@@ -11,7 +11,7 @@ import (
"time" "time"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
type Error struct { type Error struct {

View File

@@ -6,8 +6,8 @@ import (
"strings" "strings"
"time" "time"
"github.com/ollama/ollama/format"
"golang.org/x/term" "golang.org/x/term"
"ollama.com/format"
) )
type Bar struct { type Bar struct {

View File

@@ -15,8 +15,8 @@ import (
"strings" "strings"
"time" "time"
"ollama.com/api" "github.com/ollama/ollama/api"
"ollama.com/auth" "github.com/ollama/ollama/auth"
) )
type registryChallenge struct { type registryChallenge struct {

View File

@@ -21,8 +21,8 @@ import (
"golang.org/x/sync/errgroup" "golang.org/x/sync/errgroup"
"ollama.com/api" "github.com/ollama/ollama/api"
"ollama.com/format" "github.com/ollama/ollama/format"
) )
const maxRetries = 6 const maxRetries = 6

View File

@@ -24,12 +24,12 @@ import (
"golang.org/x/exp/slices" "golang.org/x/exp/slices"
"ollama.com/api" "github.com/ollama/ollama/api"
"ollama.com/convert" "github.com/ollama/ollama/convert"
"ollama.com/format" "github.com/ollama/ollama/format"
"ollama.com/llm" "github.com/ollama/ollama/llm"
"ollama.com/parser" "github.com/ollama/ollama/parser"
"ollama.com/version" "github.com/ollama/ollama/version"
) )
type registryOptions struct { type registryOptions struct {

View File

@@ -7,7 +7,7 @@ import (
"text/template" "text/template"
"text/template/parse" "text/template/parse"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
// isResponseNode checks if the node contains .Response // isResponseNode checks if the node contains .Response

View File

@@ -4,7 +4,7 @@ import (
"strings" "strings"
"testing" "testing"
"ollama.com/api" "github.com/ollama/ollama/api"
) )
func TestPrompt(t *testing.T) { func TestPrompt(t *testing.T) {

View File

@@ -27,12 +27,12 @@ import (
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"golang.org/x/exp/slices" "golang.org/x/exp/slices"
"ollama.com/api" "github.com/ollama/ollama/api"
"ollama.com/gpu" "github.com/ollama/ollama/gpu"
"ollama.com/llm" "github.com/ollama/ollama/llm"
"ollama.com/openai" "github.com/ollama/ollama/openai"
"ollama.com/parser" "github.com/ollama/ollama/parser"
"ollama.com/version" "github.com/ollama/ollama/version"
) )
var mode string = gin.DebugMode var mode string = gin.DebugMode

View File

@@ -16,9 +16,9 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"ollama.com/api" "github.com/ollama/ollama/api"
"ollama.com/parser" "github.com/ollama/ollama/parser"
"ollama.com/version" "github.com/ollama/ollama/version"
) )
func Test_Routes(t *testing.T) { func Test_Routes(t *testing.T) {

View File

@@ -16,9 +16,9 @@ import (
"sync/atomic" "sync/atomic"
"time" "time"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/format"
"golang.org/x/sync/errgroup" "golang.org/x/sync/errgroup"
"ollama.com/api"
"ollama.com/format"
) )
var blobUploadManager sync.Map var blobUploadManager sync.Map

View File

@@ -1,6 +1,7 @@
package model package model
import ( import (
"fmt"
"log/slog" "log/slog"
"strings" "strings"
"unicode" "unicode"
@@ -47,8 +48,11 @@ var (
// Digest. // Digest.
func ParseDigest(s string) Digest { func ParseDigest(s string) Digest {
typ, digest, ok := strings.Cut(s, "-") typ, digest, ok := strings.Cut(s, "-")
if !ok {
typ, digest, ok = strings.Cut(s, ":")
}
if ok && isValidDigestType(typ) && isValidHex(digest) { if ok && isValidDigestType(typ) && isValidHex(digest) {
return Digest{s: s} return Digest{s: fmt.Sprintf("%s-%s", typ, digest)}
} }
return Digest{} return Digest{}
} }

View File

@@ -12,7 +12,7 @@ import (
"strings" "strings"
"sync" "sync"
"ollama.com/types/structs" "github.com/ollama/ollama/types/structs"
) )
// Errors // Errors
@@ -521,8 +521,6 @@ func parts(s string) iter_Seq2[PartKind, string] {
return return
} }
state, j, partLen = PartModel, i, 0 state, j, partLen = PartModel, i, 0
case PartHost:
// noop: support for host:port
default: default:
yield(PartExtraneous, s[i+1:j]) yield(PartExtraneous, s[i+1:j])
return return
@@ -680,9 +678,6 @@ func isValidByteFor(kind PartKind, c byte) bool {
if kind == PartNamespace && c == '.' { if kind == PartNamespace && c == '.' {
return false return false
} }
if kind == PartHost && c == ':' {
return true
}
if c == '.' || c == '-' { if c == '.' || c == '-' {
return true return true
} }

View File

@@ -40,7 +40,6 @@ var testNames = map[string]fields{
"user/model": {namespace: "user", model: "model"}, "user/model": {namespace: "user", model: "model"},
"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"}, "example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
"example.com/ns/mistral:7b+X": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"}, "example.com/ns/mistral:7b+X": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
"localhost:5000/ns/mistral": {host: "localhost:5000", namespace: "ns", model: "mistral"},
// invalid digest // invalid digest
"mistral:latest@invalid256-": {}, "mistral:latest@invalid256-": {},