Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7396030d60 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -7,4 +7,3 @@ dist
|
||||
ollama
|
||||
ggml-metal.metal
|
||||
.cache
|
||||
*.exe
|
||||
|
||||
@@ -262,7 +262,7 @@ func (c *Client) Create(ctx context.Context, req *CreateRequest, fn CreateProgre
|
||||
|
||||
func (c *Client) List(ctx context.Context) (*ListResponse, error) {
|
||||
var lr ListResponse
|
||||
if err := c.do(ctx, http.MethodGet, "/api/tags", nil, &lr); err != nil {
|
||||
if err := c.do(ctx, http.MethodGet, "/api/list", nil, &lr); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &lr, nil
|
||||
|
||||
@@ -157,7 +157,7 @@ def push(model_name, insecure=False, callback=None):
|
||||
# List models that are available locally.
|
||||
def list():
|
||||
try:
|
||||
response = requests.get(f"{BASE_URL}/api/tags")
|
||||
response = requests.get(f"{BASE_URL}/api/list")
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
models = data.get('models', [])
|
||||
|
||||
47
cmd/cmd.go
47
cmd/cmd.go
@@ -602,12 +602,14 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
scanner, err := readline.New(readline.Prompt{
|
||||
prompt := readline.Prompt{
|
||||
Prompt: ">>> ",
|
||||
AltPrompt: "... ",
|
||||
Placeholder: "Send a message (/? for help)",
|
||||
AltPlaceholder: `Use """ to end multi-line input`,
|
||||
})
|
||||
}
|
||||
|
||||
scanner, err := readline.New(prompt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -615,7 +617,7 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
||||
fmt.Print(readline.StartBracketedPaste)
|
||||
defer fmt.Printf(readline.EndBracketedPaste)
|
||||
|
||||
var prompt string
|
||||
var multiLineBuffer string
|
||||
|
||||
for {
|
||||
line, err := scanner.Readline()
|
||||
@@ -628,33 +630,27 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
||||
fmt.Println("\nUse Ctrl-D or /bye to exit.")
|
||||
}
|
||||
|
||||
scanner.Prompt.UseAlt = false
|
||||
prompt = ""
|
||||
|
||||
continue
|
||||
case err != nil:
|
||||
return err
|
||||
}
|
||||
|
||||
switch {
|
||||
case strings.HasPrefix(prompt, `"""`):
|
||||
// if the prompt so far starts with """ then we're in multiline mode
|
||||
// and we need to keep reading until we find a line that ends with """
|
||||
cut, found := strings.CutSuffix(line, `"""`)
|
||||
prompt += cut + "\n"
|
||||
line = strings.TrimSpace(line)
|
||||
|
||||
if !found {
|
||||
switch {
|
||||
case scanner.Prompt.UseAlt:
|
||||
if strings.HasSuffix(line, `"""`) {
|
||||
scanner.Prompt.UseAlt = false
|
||||
multiLineBuffer += strings.TrimSuffix(line, `"""`)
|
||||
line = multiLineBuffer
|
||||
multiLineBuffer = ""
|
||||
} else {
|
||||
multiLineBuffer += line + " "
|
||||
continue
|
||||
}
|
||||
|
||||
prompt = strings.TrimPrefix(prompt, `"""`)
|
||||
scanner.Prompt.UseAlt = false
|
||||
case strings.HasPrefix(line, `"""`) && len(prompt) == 0:
|
||||
case strings.HasPrefix(line, `"""`):
|
||||
scanner.Prompt.UseAlt = true
|
||||
prompt += line + "\n"
|
||||
continue
|
||||
case scanner.Pasting:
|
||||
prompt += line + "\n"
|
||||
multiLineBuffer = strings.TrimPrefix(line, `"""`) + " "
|
||||
continue
|
||||
case strings.HasPrefix(line, "/list"):
|
||||
args := strings.Fields(line)
|
||||
@@ -761,17 +757,12 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
||||
case strings.HasPrefix(line, "/"):
|
||||
args := strings.Fields(line)
|
||||
fmt.Printf("Unknown command '%s'. Type /? for help\n", args[0])
|
||||
continue
|
||||
default:
|
||||
prompt += line
|
||||
}
|
||||
|
||||
if len(prompt) > 0 && prompt[0] != '/' {
|
||||
if err := generate(cmd, model, prompt, wordWrap, format); err != nil {
|
||||
if len(line) > 0 && line[0] != '/' {
|
||||
if err := generate(cmd, model, line, wordWrap, format); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
prompt = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -372,10 +372,10 @@ curl -T model.bin -X POST http://localhost:11434/api/blobs/sha256:29fdb92e57cf08
|
||||
|
||||
Return 201 Created if the blob was successfully created.
|
||||
|
||||
## List Local Models
|
||||
## List Models
|
||||
|
||||
```shell
|
||||
GET /api/tags
|
||||
GET /api/list
|
||||
```
|
||||
|
||||
List models that are available locally.
|
||||
@@ -385,7 +385,7 @@ List models that are available locally.
|
||||
#### Request
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/tags
|
||||
curl http://localhost:11434/api/list
|
||||
```
|
||||
|
||||
#### Response
|
||||
|
||||
@@ -13,6 +13,6 @@ package llm
|
||||
|
||||
//go:generate git submodule update --force gguf
|
||||
//go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
|
||||
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=on
|
||||
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
|
||||
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
||||
//go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner
|
||||
|
||||
@@ -21,6 +21,6 @@ package llm
|
||||
//go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
|
||||
//go:generate cmake --build ggml/build/cuda --target server --config Release
|
||||
//go:generate mv ggml/build/cuda/bin/server ggml/build/cuda/bin/ollama-runner
|
||||
//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA_PEER_MAX_BATCH_SIZE=0
|
||||
//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
|
||||
//go:generate cmake --build gguf/build/cuda --target server --config Release
|
||||
//go:generate mv gguf/build/cuda/bin/server gguf/build/cuda/bin/ollama-runner
|
||||
|
||||
@@ -14,11 +14,3 @@ package llm
|
||||
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
|
||||
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
||||
//go:generate cmd /c move gguf\build\cpu\bin\Release\server.exe gguf\build\cpu\bin\Release\ollama-runner.exe
|
||||
|
||||
//go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
|
||||
//go:generate cmake --build ggml/build/cuda --target server --config Release
|
||||
//go:generate cmd /c move ggml\build\cuda\bin\Release\server.exe ggml\build\cuda\bin\Release\ollama-runner.exe
|
||||
|
||||
//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
|
||||
//go:generate cmake --build gguf/build/cuda --target server --config Release
|
||||
//go:generate cmd /c move gguf\build\cuda\bin\Release\server.exe gguf\build\cuda\bin\Release\ollama-runner.exe
|
||||
|
||||
Submodule llm/llama.cpp/gguf updated: 9656026b53...0b871f1a04
@@ -84,7 +84,6 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||
case "windows":
|
||||
// TODO: select windows GPU runner here when available
|
||||
runners = []ModelRunner{
|
||||
{Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true},
|
||||
{Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
|
||||
}
|
||||
default:
|
||||
@@ -270,7 +269,7 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
|
||||
if opts.NumGPU != -1 {
|
||||
return opts.NumGPU
|
||||
}
|
||||
if runtime.GOOS == "linux" || runtime.GOOS == "windows" {
|
||||
if runtime.GOOS == "linux" {
|
||||
freeBytes, err := CheckVRAM()
|
||||
if err != nil {
|
||||
if !errors.Is(err, errNvidiaSMI) {
|
||||
|
||||
@@ -43,12 +43,9 @@ func (h *History) Init() error {
|
||||
}
|
||||
|
||||
path := filepath.Join(home, ".ollama", "history")
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
h.Filename = path
|
||||
|
||||
//todo check if the file exists
|
||||
f, err := os.OpenFile(path, os.O_CREATE|os.O_RDONLY, 0600)
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
|
||||
@@ -24,7 +24,6 @@ type Instance struct {
|
||||
Prompt *Prompt
|
||||
Terminal *Terminal
|
||||
History *History
|
||||
Pasting bool
|
||||
}
|
||||
|
||||
func New(prompt Prompt) (*Instance, error) {
|
||||
@@ -47,7 +46,7 @@ func New(prompt Prompt) (*Instance, error) {
|
||||
|
||||
func (i *Instance) Readline() (string, error) {
|
||||
prompt := i.Prompt.Prompt
|
||||
if i.Prompt.UseAlt || i.Pasting {
|
||||
if i.Prompt.UseAlt {
|
||||
prompt = i.Prompt.AltPrompt
|
||||
}
|
||||
fmt.Print(prompt)
|
||||
@@ -64,13 +63,12 @@ func (i *Instance) Readline() (string, error) {
|
||||
var esc bool
|
||||
var escex bool
|
||||
var metaDel bool
|
||||
var pasteMode PasteMode
|
||||
|
||||
var currentLineBuf []rune
|
||||
|
||||
for {
|
||||
// don't show placeholder when pasting unless we're in multiline mode
|
||||
showPlaceholder := !i.Pasting || i.Prompt.UseAlt
|
||||
if buf.IsEmpty() && showPlaceholder {
|
||||
if buf.IsEmpty() {
|
||||
ph := i.Prompt.Placeholder
|
||||
if i.Prompt.UseAlt {
|
||||
ph = i.Prompt.AltPlaceholder
|
||||
@@ -121,9 +119,9 @@ func (i *Instance) Readline() (string, error) {
|
||||
code += string(r)
|
||||
}
|
||||
if code == CharBracketedPasteStart {
|
||||
i.Pasting = true
|
||||
pasteMode = PasteModeStart
|
||||
} else if code == CharBracketedPasteEnd {
|
||||
i.Pasting = false
|
||||
pasteMode = PasteModeEnd
|
||||
}
|
||||
case KeyDel:
|
||||
if buf.Size() > 0 {
|
||||
@@ -198,7 +196,12 @@ func (i *Instance) Readline() (string, error) {
|
||||
}
|
||||
buf.MoveToEnd()
|
||||
fmt.Println()
|
||||
|
||||
switch pasteMode {
|
||||
case PasteModeStart:
|
||||
output = `"""` + output
|
||||
case PasteModeEnd:
|
||||
output = output + `"""`
|
||||
}
|
||||
return output, nil
|
||||
default:
|
||||
if metaDel {
|
||||
|
||||
@@ -76,3 +76,11 @@ const (
|
||||
CharBracketedPasteStart = "00~"
|
||||
CharBracketedPasteEnd = "01~"
|
||||
)
|
||||
|
||||
type PasteMode int
|
||||
|
||||
const (
|
||||
PastModeOff = iota
|
||||
PasteModeStart
|
||||
PasteModeEnd
|
||||
)
|
||||
|
||||
@@ -771,6 +771,7 @@ func Serve(ln net.Listener, allowOrigins []string) error {
|
||||
c.String(http.StatusOK, "Ollama is running")
|
||||
})
|
||||
|
||||
r.Handle(method, "/api/list", ListModelsHandler)
|
||||
r.Handle(method, "/api/tags", ListModelsHandler)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user