Compare commits

..

1 Commits

Author SHA1 Message Date
Jeffrey Morgan
7396030d60 rename /api/tags to /api/list 2023-11-23 14:48:35 -05:00
14 changed files with 49 additions and 59 deletions

1
.gitignore vendored
View File

@@ -7,4 +7,3 @@ dist
ollama
ggml-metal.metal
.cache
*.exe

View File

@@ -262,7 +262,7 @@ func (c *Client) Create(ctx context.Context, req *CreateRequest, fn CreateProgre
func (c *Client) List(ctx context.Context) (*ListResponse, error) {
var lr ListResponse
if err := c.do(ctx, http.MethodGet, "/api/tags", nil, &lr); err != nil {
if err := c.do(ctx, http.MethodGet, "/api/list", nil, &lr); err != nil {
return nil, err
}
return &lr, nil

View File

@@ -157,7 +157,7 @@ def push(model_name, insecure=False, callback=None):
# List models that are available locally.
def list():
try:
response = requests.get(f"{BASE_URL}/api/tags")
response = requests.get(f"{BASE_URL}/api/list")
response.raise_for_status()
data = response.json()
models = data.get('models', [])

View File

@@ -602,12 +602,14 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
fmt.Fprintln(os.Stderr, "")
}
scanner, err := readline.New(readline.Prompt{
prompt := readline.Prompt{
Prompt: ">>> ",
AltPrompt: "... ",
Placeholder: "Send a message (/? for help)",
AltPlaceholder: `Use """ to end multi-line input`,
})
}
scanner, err := readline.New(prompt)
if err != nil {
return err
}
@@ -615,7 +617,7 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
fmt.Print(readline.StartBracketedPaste)
defer fmt.Printf(readline.EndBracketedPaste)
var prompt string
var multiLineBuffer string
for {
line, err := scanner.Readline()
@@ -628,33 +630,27 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
fmt.Println("\nUse Ctrl-D or /bye to exit.")
}
scanner.Prompt.UseAlt = false
prompt = ""
continue
case err != nil:
return err
}
switch {
case strings.HasPrefix(prompt, `"""`):
// if the prompt so far starts with """ then we're in multiline mode
// and we need to keep reading until we find a line that ends with """
cut, found := strings.CutSuffix(line, `"""`)
prompt += cut + "\n"
line = strings.TrimSpace(line)
if !found {
switch {
case scanner.Prompt.UseAlt:
if strings.HasSuffix(line, `"""`) {
scanner.Prompt.UseAlt = false
multiLineBuffer += strings.TrimSuffix(line, `"""`)
line = multiLineBuffer
multiLineBuffer = ""
} else {
multiLineBuffer += line + " "
continue
}
prompt = strings.TrimPrefix(prompt, `"""`)
scanner.Prompt.UseAlt = false
case strings.HasPrefix(line, `"""`) && len(prompt) == 0:
case strings.HasPrefix(line, `"""`):
scanner.Prompt.UseAlt = true
prompt += line + "\n"
continue
case scanner.Pasting:
prompt += line + "\n"
multiLineBuffer = strings.TrimPrefix(line, `"""`) + " "
continue
case strings.HasPrefix(line, "/list"):
args := strings.Fields(line)
@@ -761,17 +757,12 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
case strings.HasPrefix(line, "/"):
args := strings.Fields(line)
fmt.Printf("Unknown command '%s'. Type /? for help\n", args[0])
continue
default:
prompt += line
}
if len(prompt) > 0 && prompt[0] != '/' {
if err := generate(cmd, model, prompt, wordWrap, format); err != nil {
if len(line) > 0 && line[0] != '/' {
if err := generate(cmd, model, line, wordWrap, format); err != nil {
return err
}
prompt = ""
}
}
}

View File

@@ -372,10 +372,10 @@ curl -T model.bin -X POST http://localhost:11434/api/blobs/sha256:29fdb92e57cf08
Return 201 Created if the blob was successfully created.
## List Local Models
## List Models
```shell
GET /api/tags
GET /api/list
```
List models that are available locally.
@@ -385,7 +385,7 @@ List models that are available locally.
#### Request
```shell
curl http://localhost:11434/api/tags
curl http://localhost:11434/api/list
```
#### Response

View File

@@ -13,6 +13,6 @@ package llm
//go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=on
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
//go:generate cmake --build gguf/build/cpu --target server --config Release
//go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner

View File

@@ -21,6 +21,6 @@ package llm
//go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
//go:generate cmake --build ggml/build/cuda --target server --config Release
//go:generate mv ggml/build/cuda/bin/server ggml/build/cuda/bin/ollama-runner
//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA_PEER_MAX_BATCH_SIZE=0
//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
//go:generate cmake --build gguf/build/cuda --target server --config Release
//go:generate mv gguf/build/cuda/bin/server gguf/build/cuda/bin/ollama-runner

View File

@@ -14,11 +14,3 @@ package llm
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
//go:generate cmake --build gguf/build/cpu --target server --config Release
//go:generate cmd /c move gguf\build\cpu\bin\Release\server.exe gguf\build\cpu\bin\Release\ollama-runner.exe
//go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
//go:generate cmake --build ggml/build/cuda --target server --config Release
//go:generate cmd /c move ggml\build\cuda\bin\Release\server.exe ggml\build\cuda\bin\Release\ollama-runner.exe
//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
//go:generate cmake --build gguf/build/cuda --target server --config Release
//go:generate cmd /c move gguf\build\cuda\bin\Release\server.exe gguf\build\cuda\bin\Release\ollama-runner.exe

View File

@@ -84,7 +84,6 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
case "windows":
// TODO: select windows GPU runner here when available
runners = []ModelRunner{
{Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true},
{Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
}
default:
@@ -270,7 +269,7 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
if opts.NumGPU != -1 {
return opts.NumGPU
}
if runtime.GOOS == "linux" || runtime.GOOS == "windows" {
if runtime.GOOS == "linux" {
freeBytes, err := CheckVRAM()
if err != nil {
if !errors.Is(err, errNvidiaSMI) {

View File

@@ -43,12 +43,9 @@ func (h *History) Init() error {
}
path := filepath.Join(home, ".ollama", "history")
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return err
}
h.Filename = path
//todo check if the file exists
f, err := os.OpenFile(path, os.O_CREATE|os.O_RDONLY, 0600)
if err != nil {
if errors.Is(err, os.ErrNotExist) {

View File

@@ -24,7 +24,6 @@ type Instance struct {
Prompt *Prompt
Terminal *Terminal
History *History
Pasting bool
}
func New(prompt Prompt) (*Instance, error) {
@@ -47,7 +46,7 @@ func New(prompt Prompt) (*Instance, error) {
func (i *Instance) Readline() (string, error) {
prompt := i.Prompt.Prompt
if i.Prompt.UseAlt || i.Pasting {
if i.Prompt.UseAlt {
prompt = i.Prompt.AltPrompt
}
fmt.Print(prompt)
@@ -64,13 +63,12 @@ func (i *Instance) Readline() (string, error) {
var esc bool
var escex bool
var metaDel bool
var pasteMode PasteMode
var currentLineBuf []rune
for {
// don't show placeholder when pasting unless we're in multiline mode
showPlaceholder := !i.Pasting || i.Prompt.UseAlt
if buf.IsEmpty() && showPlaceholder {
if buf.IsEmpty() {
ph := i.Prompt.Placeholder
if i.Prompt.UseAlt {
ph = i.Prompt.AltPlaceholder
@@ -121,9 +119,9 @@ func (i *Instance) Readline() (string, error) {
code += string(r)
}
if code == CharBracketedPasteStart {
i.Pasting = true
pasteMode = PasteModeStart
} else if code == CharBracketedPasteEnd {
i.Pasting = false
pasteMode = PasteModeEnd
}
case KeyDel:
if buf.Size() > 0 {
@@ -198,7 +196,12 @@ func (i *Instance) Readline() (string, error) {
}
buf.MoveToEnd()
fmt.Println()
switch pasteMode {
case PasteModeStart:
output = `"""` + output
case PasteModeEnd:
output = output + `"""`
}
return output, nil
default:
if metaDel {

View File

@@ -76,3 +76,11 @@ const (
CharBracketedPasteStart = "00~"
CharBracketedPasteEnd = "01~"
)
type PasteMode int
const (
PastModeOff = iota
PasteModeStart
PasteModeEnd
)

View File

@@ -771,6 +771,7 @@ func Serve(ln net.Listener, allowOrigins []string) error {
c.String(http.StatusOK, "Ollama is running")
})
r.Handle(method, "/api/list", ListModelsHandler)
r.Handle(method, "/api/tags", ListModelsHandler)
}