Compare commits
1 Commits
progress-f
...
jmorganca/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fba7f04ca0 |
4
.github/workflows/release.yaml
vendored
4
.github/workflows/release.yaml
vendored
@@ -329,9 +329,7 @@ jobs:
|
|||||||
done
|
done
|
||||||
working-directory: dist/${{ matrix.os }}-${{ matrix.arch }}
|
working-directory: dist/${{ matrix.os }}-${{ matrix.arch }}
|
||||||
- run: |
|
- run: |
|
||||||
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do
|
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz); done
|
||||||
tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE --owner 0 --group 0 | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz);
|
|
||||||
done
|
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: dist-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }}
|
name: dist-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }}
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ set(GGML_LLAMAFILE ON)
|
|||||||
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
|
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
|
||||||
set(GGML_CUDA_GRAPHS ON)
|
set(GGML_CUDA_GRAPHS ON)
|
||||||
|
|
||||||
if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
|
if((NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
|
||||||
OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))
|
OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))
|
||||||
set(GGML_CPU_ALL_VARIANTS ON)
|
set(GGML_CPU_ALL_VARIANTS ON)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -381,7 +381,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)
|
- [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)
|
||||||
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
|
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
|
||||||
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
|
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
|
||||||
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)
|
|
||||||
|
|
||||||
### Cloud
|
### Cloud
|
||||||
|
|
||||||
@@ -549,7 +548,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow)
|
- [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow)
|
||||||
- [TextLLaMA](https://github.com/adarshM84/TextLLaMA) A Chrome Extension that helps you write emails, correct grammar, and translate into any language
|
- [TextLLaMA](https://github.com/adarshM84/TextLLaMA) A Chrome Extension that helps you write emails, correct grammar, and translate into any language
|
||||||
- [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai)
|
- [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai)
|
||||||
- [LLM Telegram Bot](https://github.com/innightwolfsleep/llm_telegram_bot) (telegram bot, primary for RP. Oobabooga-like buttons, [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) API integration e.t.c)
|
|
||||||
|
|
||||||
### Supported backends
|
### Supported backends
|
||||||
|
|
||||||
|
|||||||
@@ -126,8 +126,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
return ctx.Err()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const maxBufferSize = 512 * format.KiloByte
|
const maxBufferSize = 512 * format.KiloByte
|
||||||
@@ -190,7 +189,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ctx.Err()
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// GenerateResponseFunc is a function that [Client.Generate] invokes every time
|
// GenerateResponseFunc is a function that [Client.Generate] invokes every time
|
||||||
|
|||||||
38
cmd/cmd.go
38
cmd/cmd.go
@@ -15,11 +15,13 @@ import (
|
|||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
"os/signal"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/containerd/console"
|
"github.com/containerd/console"
|
||||||
@@ -328,7 +330,6 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
|||||||
if err := PullHandler(cmd, []string{name}); err != nil {
|
if err := PullHandler(cmd, []string{name}); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
||||||
}
|
}
|
||||||
return info, err
|
return info, err
|
||||||
@@ -857,6 +858,17 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
|
|||||||
spinner := progress.NewSpinner("")
|
spinner := progress.NewSpinner("")
|
||||||
p.Add("", spinner)
|
p.Add("", spinner)
|
||||||
|
|
||||||
|
cancelCtx, cancel := context.WithCancel(cmd.Context())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
sigChan := make(chan os.Signal, 1)
|
||||||
|
signal.Notify(sigChan, syscall.SIGINT)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
<-sigChan
|
||||||
|
cancel()
|
||||||
|
}()
|
||||||
|
|
||||||
var state *displayResponseState = &displayResponseState{}
|
var state *displayResponseState = &displayResponseState{}
|
||||||
var latest api.ChatResponse
|
var latest api.ChatResponse
|
||||||
var fullResponse strings.Builder
|
var fullResponse strings.Builder
|
||||||
@@ -891,7 +903,10 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
|
|||||||
req.KeepAlive = opts.KeepAlive
|
req.KeepAlive = opts.KeepAlive
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := client.Chat(cmd.Context(), req, fn); err != nil {
|
if err := client.Chat(cancelCtx, req, fn); err != nil {
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -931,6 +946,17 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
|||||||
generateContext = []int{}
|
generateContext = []int{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(cmd.Context())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
sigChan := make(chan os.Signal, 1)
|
||||||
|
signal.Notify(sigChan, syscall.SIGINT)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
<-sigChan
|
||||||
|
cancel()
|
||||||
|
}()
|
||||||
|
|
||||||
var state *displayResponseState = &displayResponseState{}
|
var state *displayResponseState = &displayResponseState{}
|
||||||
|
|
||||||
fn := func(response api.GenerateResponse) error {
|
fn := func(response api.GenerateResponse) error {
|
||||||
@@ -966,7 +992,10 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
|||||||
KeepAlive: opts.KeepAlive,
|
KeepAlive: opts.KeepAlive,
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := client.Generate(cmd.Context(), &request, fn); err != nil {
|
if err := client.Generate(ctx, &request, fn); err != nil {
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -988,7 +1017,8 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
|||||||
latest.Summary()
|
latest.Summary()
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd.SetContext(context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context))
|
ctx = context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context)
|
||||||
|
cmd.SetContext(ctx)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,10 +19,6 @@ var LibOllamaPath string = func() string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
if eval, err := filepath.EvalSymlinks(exe); err == nil {
|
|
||||||
exe = eval
|
|
||||||
}
|
|
||||||
|
|
||||||
var libPath string
|
var libPath string
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
case "windows":
|
case "windows":
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ Here's a quick example showing API access from `powershell`
|
|||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
Ollama on Windows stores files in a few different locations. You can view them in
|
Ollama on Windows stores files in a few different locations. You can view them in
|
||||||
the explorer window by hitting `<Ctrl>+R` and type in:
|
the explorer window by hitting `<cmd>+R` and type in:
|
||||||
- `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
|
- `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
|
||||||
- *app.log* contains most resent logs from the GUI application
|
- *app.log* contains most resent logs from the GUI application
|
||||||
- *server.log* contains the most recent server logs
|
- *server.log* contains the most recent server logs
|
||||||
|
|||||||
@@ -12,9 +12,6 @@ func TestHumanNumber(t *testing.T) {
|
|||||||
|
|
||||||
testCases := []testCase{
|
testCases := []testCase{
|
||||||
{0, "0"},
|
{0, "0"},
|
||||||
{999, "999"},
|
|
||||||
{1000, "1K"},
|
|
||||||
{1001, "1K"},
|
|
||||||
{1000000, "1M"},
|
{1000000, "1M"},
|
||||||
{125000000, "125M"},
|
{125000000, "125M"},
|
||||||
{500500000, "500.50M"},
|
{500500000, "500.50M"},
|
||||||
|
|||||||
@@ -305,10 +305,6 @@ func (b *testBackend) NewContext() ml.Context {
|
|||||||
return &testContext{}
|
return &testContext{}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *testBackend) SystemInfo() string {
|
|
||||||
return "not implemented"
|
|
||||||
}
|
|
||||||
|
|
||||||
type testContext struct{}
|
type testContext struct{}
|
||||||
|
|
||||||
func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
|
func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
|
||||||
|
|||||||
@@ -320,10 +320,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
|
|||||||
return nil, fmt.Errorf("unable to lookup executable path: %w", err)
|
return nil, fmt.Errorf("unable to lookup executable path: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if eval, err := filepath.EvalSymlinks(exe); err == nil {
|
|
||||||
exe = eval
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO - once fully switched to the Go runner, load the model here for tokenize/detokenize cgo access
|
// TODO - once fully switched to the Go runner, load the model here for tokenize/detokenize cgo access
|
||||||
s := &llmServer{
|
s := &llmServer{
|
||||||
port: port,
|
port: port,
|
||||||
|
|||||||
14
main.go
14
main.go
@@ -2,8 +2,6 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"os"
|
|
||||||
"os/signal"
|
|
||||||
|
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
|
|
||||||
@@ -11,15 +9,5 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
sigChan := make(chan os.Signal, 1)
|
|
||||||
signal.Notify(sigChan, os.Interrupt)
|
|
||||||
go func() {
|
|
||||||
<-sigChan
|
|
||||||
cancel()
|
|
||||||
}()
|
|
||||||
|
|
||||||
cobra.CheckErr(cmd.NewCLI().ExecuteContext(ctx))
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,7 +23,6 @@ type Backend interface {
|
|||||||
Config() Config
|
Config() Config
|
||||||
Get(name string) Tensor
|
Get(name string) Tensor
|
||||||
NewContext() Context
|
NewContext() Context
|
||||||
SystemInfo() string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var backends = make(map[string]func(*os.File) (Backend, error))
|
var backends = make(map[string]func(*os.File) (Backend, error))
|
||||||
|
|||||||
@@ -1,27 +1,11 @@
|
|||||||
package ggml
|
package ggml
|
||||||
|
|
||||||
/*
|
// #cgo CPPFLAGS: -I${SRCDIR}/ggml/include
|
||||||
#cgo CPPFLAGS: -I${SRCDIR}/ggml/include
|
// #include <stdlib.h>
|
||||||
#include <stdlib.h>
|
// #include <stdint.h>
|
||||||
#include <stdint.h>
|
// #include "ggml.h"
|
||||||
#include "ggml.h"
|
// #include "ggml-cpu.h"
|
||||||
#include "ggml-cpu.h"
|
// #include "ggml-backend.h"
|
||||||
#include "ggml-backend.h"
|
|
||||||
static struct ggml_backend_feature * getBackendFeatures(void *fp, ggml_backend_reg_t reg) {return ((ggml_backend_get_features_t)(fp))(reg);}
|
|
||||||
static struct ggml_backend_feature * getNextBackendFeatures(struct ggml_backend_feature * feature) { return &feature[1];}
|
|
||||||
|
|
||||||
typedef enum {COMP_UNKNOWN,COMP_GCC,COMP_CLANG} COMPILER;
|
|
||||||
COMPILER inline get_compiler() {
|
|
||||||
#if defined(__clang__)
|
|
||||||
return COMP_CLANG;
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
return COMP_GCC;
|
|
||||||
#else
|
|
||||||
return UNKNOWN_COMPILER;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
*/
|
|
||||||
import "C"
|
import "C"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -642,34 +626,3 @@ func (t *Tensor) Conv2D(ctx ml.Context, t2 ml.Tensor, s0, s1, p0, p1, d0, d1 int
|
|||||||
t: C.ggml_conv_2d(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1)),
|
t: C.ggml_conv_2d(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *Backend) SystemInfo() string {
|
|
||||||
var compiler string
|
|
||||||
switch C.get_compiler() {
|
|
||||||
case C.COMP_UNKNOWN:
|
|
||||||
compiler = "cgo(unknown_compiler)"
|
|
||||||
case C.COMP_GCC:
|
|
||||||
compiler = "cgo(gcc)"
|
|
||||||
case C.COMP_CLANG:
|
|
||||||
compiler = "cgo(clang)"
|
|
||||||
}
|
|
||||||
|
|
||||||
var s string
|
|
||||||
for i := range C.ggml_backend_reg_count() {
|
|
||||||
reg := C.ggml_backend_reg_get(i)
|
|
||||||
fName := C.CString("ggml_backend_get_features")
|
|
||||||
defer C.free(unsafe.Pointer(fName))
|
|
||||||
get_features_fn := C.ggml_backend_reg_get_proc_address(reg, fName)
|
|
||||||
if get_features_fn != nil {
|
|
||||||
s += C.GoString(C.ggml_backend_reg_name(reg))
|
|
||||||
s += " : "
|
|
||||||
for features := C.getBackendFeatures(get_features_fn, reg); features.name != nil; features = C.getNextBackendFeatures(features) {
|
|
||||||
s += C.GoString(features.name)
|
|
||||||
s += " = "
|
|
||||||
s += C.GoString(features.value)
|
|
||||||
s += " | "
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return s + compiler
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -47,6 +47,10 @@ var OnceLoad = sync.OnceFunc(func() {
|
|||||||
exe = "."
|
exe = "."
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if eval, err := filepath.EvalSymlinks(exe); err == nil {
|
||||||
|
exe = eval
|
||||||
|
}
|
||||||
|
|
||||||
// PATH, LD_LIBRARY_PATH, and DYLD_LIBRARY_PATH are often
|
// PATH, LD_LIBRARY_PATH, and DYLD_LIBRARY_PATH are often
|
||||||
// set by the parent process, however, use a default value
|
// set by the parent process, however, use a default value
|
||||||
// if the environment variable is not set.
|
// if the environment variable is not set.
|
||||||
|
|||||||
@@ -21,7 +21,6 @@ import (
|
|||||||
_ "github.com/ollama/ollama/ml/backend"
|
_ "github.com/ollama/ollama/ml/backend"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Options contains the inputs for a model forward pass
|
|
||||||
type Options struct {
|
type Options struct {
|
||||||
Inputs []int32
|
Inputs []int32
|
||||||
Positions []int32
|
Positions []int32
|
||||||
@@ -35,13 +34,11 @@ type config struct {
|
|||||||
Cache kvcache.Cache
|
Cache kvcache.Cache
|
||||||
}
|
}
|
||||||
|
|
||||||
// Base implements the common fields and methods for all models
|
|
||||||
type Base struct {
|
type Base struct {
|
||||||
b ml.Backend
|
b ml.Backend
|
||||||
config
|
config
|
||||||
}
|
}
|
||||||
|
|
||||||
// Backend returns the underlying backend that will run the model
|
|
||||||
func (m *Base) Backend() ml.Backend {
|
func (m *Base) Backend() ml.Backend {
|
||||||
return m.b
|
return m.b
|
||||||
}
|
}
|
||||||
@@ -50,7 +47,6 @@ func (m *Base) Config() config {
|
|||||||
return m.config
|
return m.config
|
||||||
}
|
}
|
||||||
|
|
||||||
// Model implements a specific model architecture, defining the forward pass and any model-specific configuration
|
|
||||||
type Model interface {
|
type Model interface {
|
||||||
Forward(ml.Context, Options) (ml.Tensor, error)
|
Forward(ml.Context, Options) (ml.Tensor, error)
|
||||||
|
|
||||||
@@ -60,7 +56,6 @@ type Model interface {
|
|||||||
|
|
||||||
var models = make(map[string]func(ml.Config) (Model, error))
|
var models = make(map[string]func(ml.Config) (Model, error))
|
||||||
|
|
||||||
// Register registers a model constructor for the given architecture
|
|
||||||
func Register(name string, f func(ml.Config) (Model, error)) {
|
func Register(name string, f func(ml.Config) (Model, error)) {
|
||||||
if _, ok := models[name]; ok {
|
if _, ok := models[name]; ok {
|
||||||
panic("model: model already registered")
|
panic("model: model already registered")
|
||||||
@@ -69,9 +64,8 @@ func Register(name string, f func(ml.Config) (Model, error)) {
|
|||||||
models[name] = f
|
models[name] = f
|
||||||
}
|
}
|
||||||
|
|
||||||
// New initializes a new model instance with the provided configuration based on the metadata in the model file
|
func New(s string) (Model, error) {
|
||||||
func New(modelPath string) (Model, error) {
|
r, err := os.Open(s)
|
||||||
r, err := os.Open(modelPath)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
package progress
|
package progress
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -14,8 +13,7 @@ type State interface {
|
|||||||
|
|
||||||
type Progress struct {
|
type Progress struct {
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
// buffer output to minimize flickering on all terminals
|
w io.Writer
|
||||||
w *bufio.Writer
|
|
||||||
|
|
||||||
pos int
|
pos int
|
||||||
|
|
||||||
@@ -24,7 +22,7 @@ type Progress struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func NewProgress(w io.Writer) *Progress {
|
func NewProgress(w io.Writer) *Progress {
|
||||||
p := &Progress{w: bufio.NewWriter(w)}
|
p := &Progress{w: w}
|
||||||
go p.start()
|
go p.start()
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
@@ -49,29 +47,26 @@ func (p *Progress) stop() bool {
|
|||||||
func (p *Progress) Stop() bool {
|
func (p *Progress) Stop() bool {
|
||||||
stopped := p.stop()
|
stopped := p.stop()
|
||||||
if stopped {
|
if stopped {
|
||||||
fmt.Fprintln(p.w)
|
fmt.Fprint(p.w, "\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
// show cursor
|
|
||||||
fmt.Fprint(p.w, "\033[?25h")
|
|
||||||
p.w.Flush()
|
|
||||||
return stopped
|
return stopped
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Progress) StopAndClear() bool {
|
func (p *Progress) StopAndClear() bool {
|
||||||
|
fmt.Fprint(p.w, "\033[?25l")
|
||||||
|
defer fmt.Fprint(p.w, "\033[?25h")
|
||||||
|
|
||||||
stopped := p.stop()
|
stopped := p.stop()
|
||||||
if stopped {
|
if stopped {
|
||||||
// clear all progress lines
|
// clear all progress lines
|
||||||
for range p.pos - 1 {
|
for i := range p.pos {
|
||||||
fmt.Fprint(p.w, "\033[A")
|
if i > 0 {
|
||||||
|
fmt.Fprint(p.w, "\033[A")
|
||||||
|
}
|
||||||
|
fmt.Fprint(p.w, "\033[2K\033[1G")
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Fprint(p.w, "\033[2K", "\033[1G")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// show cursor
|
|
||||||
fmt.Fprint(p.w, "\033[?25h")
|
|
||||||
p.w.Flush()
|
|
||||||
return stopped
|
return stopped
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -86,31 +81,30 @@ func (p *Progress) render() {
|
|||||||
p.mu.Lock()
|
p.mu.Lock()
|
||||||
defer p.mu.Unlock()
|
defer p.mu.Unlock()
|
||||||
|
|
||||||
fmt.Fprint(p.w, "\033[?2026h")
|
fmt.Fprint(p.w, "\033[?25l")
|
||||||
defer fmt.Fprint(p.w, "\033[?2026l")
|
defer fmt.Fprint(p.w, "\033[?25h")
|
||||||
|
|
||||||
for range p.pos - 1 {
|
// clear already rendered progress lines
|
||||||
fmt.Fprint(p.w, "\033[A")
|
for i := range p.pos {
|
||||||
|
if i > 0 {
|
||||||
|
fmt.Fprint(p.w, "\033[A")
|
||||||
|
}
|
||||||
|
fmt.Fprint(p.w, "\033[2K\033[1G")
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Fprint(p.w, "\033[1G")
|
|
||||||
|
|
||||||
// render progress lines
|
// render progress lines
|
||||||
for i, state := range p.states {
|
for i, state := range p.states {
|
||||||
fmt.Fprint(p.w, state.String(), "\033[K")
|
fmt.Fprint(p.w, state.String())
|
||||||
if i < len(p.states)-1 {
|
if i < len(p.states)-1 {
|
||||||
fmt.Fprint(p.w, "\n")
|
fmt.Fprint(p.w, "\n")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
p.pos = len(p.states)
|
p.pos = len(p.states)
|
||||||
p.w.Flush()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Progress) start() {
|
func (p *Progress) start() {
|
||||||
p.ticker = time.NewTicker(100 * time.Millisecond)
|
p.ticker = time.NewTicker(100 * time.Millisecond)
|
||||||
// hide cursor
|
|
||||||
fmt.Fprint(p.w, "\033[?25l")
|
|
||||||
for range p.ticker.C {
|
for range p.ticker.C {
|
||||||
p.render()
|
p.render()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -845,6 +845,8 @@ func (s *Server) loadModel(
|
|||||||
threads int,
|
threads int,
|
||||||
multiUserCache bool,
|
multiUserCache bool,
|
||||||
) {
|
) {
|
||||||
|
llama.BackendInit()
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
s.model, err = llama.LoadModelFromFile(mpath, params)
|
s.model, err = llama.LoadModelFromFile(mpath, params)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -930,8 +932,6 @@ func Execute(args []string) error {
|
|||||||
})
|
})
|
||||||
slog.SetDefault(slog.New(handler))
|
slog.SetDefault(slog.New(handler))
|
||||||
slog.Info("starting go runner")
|
slog.Info("starting go runner")
|
||||||
|
|
||||||
llama.BackendInit()
|
|
||||||
slog.Info("system", "info", llama.PrintSystemInfo(), "threads", *threads)
|
slog.Info("system", "info", llama.PrintSystemInfo(), "threads", *threads)
|
||||||
|
|
||||||
server := &Server{
|
server := &Server{
|
||||||
|
|||||||
@@ -813,8 +813,6 @@ func (s *Server) loadModel(
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Info("system", "info", s.model.Backend().SystemInfo() /* "threads", *threads */)
|
|
||||||
|
|
||||||
// TODO(jessegross): LoRA loading
|
// TODO(jessegross): LoRA loading
|
||||||
if lpath.String() != "" {
|
if lpath.String() != "" {
|
||||||
panic("loras are not yet implemented")
|
panic("loras are not yet implemented")
|
||||||
@@ -883,6 +881,7 @@ func Execute(args []string) error {
|
|||||||
})
|
})
|
||||||
slog.SetDefault(slog.New(handler))
|
slog.SetDefault(slog.New(handler))
|
||||||
slog.Info("starting ollama engine")
|
slog.Info("starting ollama engine")
|
||||||
|
// TODO(jessegross): Some system info would be useful
|
||||||
|
|
||||||
server := &Server{
|
server := &Server{
|
||||||
batchSize: *batchSize,
|
batchSize: *batchSize,
|
||||||
|
|||||||
Reference in New Issue
Block a user