Compare commits
4 Commits
progress-f
...
brucemacd/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7fa9694359 | ||
|
|
96510b9353 | ||
|
|
9f8c89354b | ||
|
|
8815a8ee25 |
4
.github/workflows/release.yaml
vendored
4
.github/workflows/release.yaml
vendored
@@ -329,9 +329,7 @@ jobs:
|
||||
done
|
||||
working-directory: dist/${{ matrix.os }}-${{ matrix.arch }}
|
||||
- run: |
|
||||
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do
|
||||
tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE --owner 0 --group 0 | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz);
|
||||
done
|
||||
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz); done
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: dist-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }}
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -14,3 +14,6 @@ test_data
|
||||
__debug_bin*
|
||||
llama/build
|
||||
llama/vendor
|
||||
model/testdata/models/*
|
||||
!model/testdata/models/*.md
|
||||
!model/testdata/models/*.json
|
||||
|
||||
@@ -24,7 +24,7 @@ set(GGML_LLAMAFILE ON)
|
||||
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
|
||||
set(GGML_CUDA_GRAPHS ON)
|
||||
|
||||
if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
|
||||
if((NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
|
||||
OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))
|
||||
set(GGML_CPU_ALL_VARIANTS ON)
|
||||
endif()
|
||||
|
||||
@@ -381,7 +381,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)
|
||||
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
|
||||
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
|
||||
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)
|
||||
|
||||
### Cloud
|
||||
|
||||
@@ -549,7 +548,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow)
|
||||
- [TextLLaMA](https://github.com/adarshM84/TextLLaMA) A Chrome Extension that helps you write emails, correct grammar, and translate into any language
|
||||
- [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai)
|
||||
- [LLM Telegram Bot](https://github.com/innightwolfsleep/llm_telegram_bot) (telegram bot, primary for RP. Oobabooga-like buttons, [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) API integration e.t.c)
|
||||
|
||||
### Supported backends
|
||||
|
||||
|
||||
@@ -126,8 +126,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return ctx.Err()
|
||||
return nil
|
||||
}
|
||||
|
||||
const maxBufferSize = 512 * format.KiloByte
|
||||
@@ -190,7 +189,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
|
||||
}
|
||||
}
|
||||
|
||||
return ctx.Err()
|
||||
return nil
|
||||
}
|
||||
|
||||
// GenerateResponseFunc is a function that [Client.Generate] invokes every time
|
||||
|
||||
38
cmd/cmd.go
38
cmd/cmd.go
@@ -15,11 +15,13 @@ import (
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/console"
|
||||
@@ -328,7 +330,6 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
||||
if err := PullHandler(cmd, []string{name}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
||||
}
|
||||
return info, err
|
||||
@@ -857,6 +858,17 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
|
||||
spinner := progress.NewSpinner("")
|
||||
p.Add("", spinner)
|
||||
|
||||
cancelCtx, cancel := context.WithCancel(cmd.Context())
|
||||
defer cancel()
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT)
|
||||
|
||||
go func() {
|
||||
<-sigChan
|
||||
cancel()
|
||||
}()
|
||||
|
||||
var state *displayResponseState = &displayResponseState{}
|
||||
var latest api.ChatResponse
|
||||
var fullResponse strings.Builder
|
||||
@@ -891,7 +903,10 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
|
||||
req.KeepAlive = opts.KeepAlive
|
||||
}
|
||||
|
||||
if err := client.Chat(cmd.Context(), req, fn); err != nil {
|
||||
if err := client.Chat(cancelCtx, req, fn); err != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -931,6 +946,17 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
||||
generateContext = []int{}
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(cmd.Context())
|
||||
defer cancel()
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT)
|
||||
|
||||
go func() {
|
||||
<-sigChan
|
||||
cancel()
|
||||
}()
|
||||
|
||||
var state *displayResponseState = &displayResponseState{}
|
||||
|
||||
fn := func(response api.GenerateResponse) error {
|
||||
@@ -966,7 +992,10 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
||||
KeepAlive: opts.KeepAlive,
|
||||
}
|
||||
|
||||
if err := client.Generate(cmd.Context(), &request, fn); err != nil {
|
||||
if err := client.Generate(ctx, &request, fn); err != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -988,7 +1017,8 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
||||
latest.Summary()
|
||||
}
|
||||
|
||||
cmd.SetContext(context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context))
|
||||
ctx = context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context)
|
||||
cmd.SetContext(ctx)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ Here's a quick example showing API access from `powershell`
|
||||
## Troubleshooting
|
||||
|
||||
Ollama on Windows stores files in a few different locations. You can view them in
|
||||
the explorer window by hitting `<Ctrl>+R` and type in:
|
||||
the explorer window by hitting `<cmd>+R` and type in:
|
||||
- `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
|
||||
- *app.log* contains most resent logs from the GUI application
|
||||
- *server.log* contains the most recent server logs
|
||||
|
||||
@@ -12,9 +12,6 @@ func TestHumanNumber(t *testing.T) {
|
||||
|
||||
testCases := []testCase{
|
||||
{0, "0"},
|
||||
{999, "999"},
|
||||
{1000, "1K"},
|
||||
{1001, "1K"},
|
||||
{1000000, "1M"},
|
||||
{125000000, "125M"},
|
||||
{500500000, "500.50M"},
|
||||
|
||||
@@ -305,10 +305,6 @@ func (b *testBackend) NewContext() ml.Context {
|
||||
return &testContext{}
|
||||
}
|
||||
|
||||
func (b *testBackend) SystemInfo() string {
|
||||
return "not implemented"
|
||||
}
|
||||
|
||||
type testContext struct{}
|
||||
|
||||
func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
|
||||
@@ -434,7 +430,7 @@ func (t *testTensor) Conv2D(ctx ml.Context, weight ml.Tensor, s0, s1, p0, p1, d0
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (t *testTensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, dim uint32, base, scale float32) ml.Tensor {
|
||||
func (t *testTensor) RoPE(ctx ml.Context, rc ml.RopeConfig) ml.Tensor {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
|
||||
14
main.go
14
main.go
@@ -2,8 +2,6 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"os/signal"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
@@ -11,15 +9,5 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, os.Interrupt)
|
||||
go func() {
|
||||
<-sigChan
|
||||
cancel()
|
||||
}()
|
||||
|
||||
cobra.CheckErr(cmd.NewCLI().ExecuteContext(ctx))
|
||||
cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
|
||||
}
|
||||
|
||||
@@ -23,7 +23,6 @@ type Backend interface {
|
||||
Config() Config
|
||||
Get(name string) Tensor
|
||||
NewContext() Context
|
||||
SystemInfo() string
|
||||
}
|
||||
|
||||
var backends = make(map[string]func(*os.File) (Backend, error))
|
||||
@@ -44,6 +43,42 @@ func NewBackend(f *os.File) (Backend, error) {
|
||||
return nil, fmt.Errorf("unsupported backend")
|
||||
}
|
||||
|
||||
// RopeType specifies the type of RoPE (Rotary Position Embedding) to use, these types are implemented in the backend
|
||||
type RopeType int
|
||||
|
||||
const (
|
||||
RopeTypeStandard RopeType = iota
|
||||
_ // not yet used
|
||||
RopeTypeNeoX
|
||||
)
|
||||
|
||||
// RopeConfig contains all configuration for the RoPE (Rotary Position Embedding) operation
|
||||
type RopeConfig struct {
|
||||
// PositionIDs contains the position indices for each token in the sequence
|
||||
// These indices are used to calculate the rotary embeddings
|
||||
PositionIDs Tensor
|
||||
|
||||
// RopeFactors is an optional tensor containing pre-computed rotation factors
|
||||
RopeFactors Tensor
|
||||
|
||||
// RopeDim specifies the dimension size for the rotary embeddings
|
||||
RopeDim uint32
|
||||
|
||||
// RopeType indicates which RoPE variant to use (e.g. normal or neox)
|
||||
RopeType RopeType
|
||||
|
||||
// OrigCtxLen stores the original context length the model was trained with
|
||||
OrigCtxLen int
|
||||
|
||||
// RopeBase is the base value used in the frequency calculation
|
||||
RopeBase float32
|
||||
|
||||
// RopeScale is a scaling factor applied to position indices
|
||||
RopeScale float32
|
||||
|
||||
// YaRN parameters can be added here if they need to be configurable
|
||||
}
|
||||
|
||||
type Context interface {
|
||||
Zeros(dtype DType, shape ...int) Tensor
|
||||
FromFloatSlice(s []float32, shape ...int) (Tensor, error)
|
||||
@@ -76,7 +111,7 @@ type Tensor interface {
|
||||
Scale(ctx Context, s float64) Tensor
|
||||
|
||||
Conv2D(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
|
||||
RoPE(ctx Context, positionIDs, ropeFactors Tensor, dim uint32, base, scale float32) Tensor
|
||||
RoPE(ctx Context, rc RopeConfig) Tensor
|
||||
|
||||
Tanh(ctx Context) Tensor
|
||||
GELU(ctx Context) Tensor
|
||||
|
||||
@@ -1,27 +1,11 @@
|
||||
package ggml
|
||||
|
||||
/*
|
||||
#cgo CPPFLAGS: -I${SRCDIR}/ggml/include
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include "ggml.h"
|
||||
#include "ggml-cpu.h"
|
||||
#include "ggml-backend.h"
|
||||
static struct ggml_backend_feature * getBackendFeatures(void *fp, ggml_backend_reg_t reg) {return ((ggml_backend_get_features_t)(fp))(reg);}
|
||||
static struct ggml_backend_feature * getNextBackendFeatures(struct ggml_backend_feature * feature) { return &feature[1];}
|
||||
|
||||
typedef enum {COMP_UNKNOWN,COMP_GCC,COMP_CLANG} COMPILER;
|
||||
COMPILER inline get_compiler() {
|
||||
#if defined(__clang__)
|
||||
return COMP_CLANG;
|
||||
#elif defined(__GNUC__)
|
||||
return COMP_GCC;
|
||||
#else
|
||||
return UNKNOWN_COMPILER;
|
||||
#endif
|
||||
}
|
||||
|
||||
*/
|
||||
// #cgo CPPFLAGS: -I${SRCDIR}/ggml/include
|
||||
// #include <stdlib.h>
|
||||
// #include <stdint.h>
|
||||
// #include "ggml.h"
|
||||
// #include "ggml-cpu.h"
|
||||
// #include "ggml-backend.h"
|
||||
import "C"
|
||||
|
||||
import (
|
||||
@@ -595,13 +579,9 @@ func (t *Tensor) View(ctx ml.Context, offset int, shape ...int) ml.Tensor {
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
ropeTypeNorm C.int = iota
|
||||
)
|
||||
|
||||
func (t *Tensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, ropeDim uint32, ropeBase, ropeScale float32) ml.Tensor {
|
||||
if ropeFactors == nil {
|
||||
ropeFactors = &Tensor{}
|
||||
func (t *Tensor) RoPE(ctx ml.Context, rc ml.RopeConfig) ml.Tensor {
|
||||
if rc.RopeFactors == nil {
|
||||
rc.RopeFactors = &Tensor{}
|
||||
}
|
||||
|
||||
dequant := t.t
|
||||
@@ -611,12 +591,15 @@ func (t *Tensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, ropeDi
|
||||
|
||||
return &Tensor{
|
||||
t: C.ggml_rope_ext(
|
||||
ctx.(*Context).ctx, dequant, positionIDs.(*Tensor).t, ropeFactors.(*Tensor).t,
|
||||
C.int(ropeDim),
|
||||
131072, // YaRN n_ctx_train
|
||||
ropeTypeNorm, // ROPE_TYPE_NORM
|
||||
C.float(ropeBase),
|
||||
C.float(ropeScale),
|
||||
ctx.(*Context).ctx,
|
||||
dequant,
|
||||
rc.PositionIDs.(*Tensor).t,
|
||||
rc.RopeFactors.(*Tensor).t,
|
||||
C.int(rc.RopeDim),
|
||||
C.int(rc.RopeType),
|
||||
C.int(rc.OrigCtxLen),
|
||||
C.float(rc.RopeBase),
|
||||
C.float(rc.RopeScale),
|
||||
0., // YaRN ext_factor
|
||||
1., // YaRN attn_factor
|
||||
32., // YaRN beta_fast
|
||||
@@ -642,34 +625,3 @@ func (t *Tensor) Conv2D(ctx ml.Context, t2 ml.Tensor, s0, s1, p0, p1, d0, d1 int
|
||||
t: C.ggml_conv_2d(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1)),
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Backend) SystemInfo() string {
|
||||
var compiler string
|
||||
switch C.get_compiler() {
|
||||
case C.COMP_UNKNOWN:
|
||||
compiler = "cgo(unknown_compiler)"
|
||||
case C.COMP_GCC:
|
||||
compiler = "cgo(gcc)"
|
||||
case C.COMP_CLANG:
|
||||
compiler = "cgo(clang)"
|
||||
}
|
||||
|
||||
var s string
|
||||
for i := range C.ggml_backend_reg_count() {
|
||||
reg := C.ggml_backend_reg_get(i)
|
||||
fName := C.CString("ggml_backend_get_features")
|
||||
defer C.free(unsafe.Pointer(fName))
|
||||
get_features_fn := C.ggml_backend_reg_get_proc_address(reg, fName)
|
||||
if get_features_fn != nil {
|
||||
s += C.GoString(C.ggml_backend_reg_name(reg))
|
||||
s += " : "
|
||||
for features := C.getBackendFeatures(get_features_fn, reg); features.name != nil; features = C.getNextBackendFeatures(features) {
|
||||
s += C.GoString(features.name)
|
||||
s += " = "
|
||||
s += C.GoString(features.value)
|
||||
s += " | "
|
||||
}
|
||||
}
|
||||
}
|
||||
return s + compiler
|
||||
}
|
||||
|
||||
@@ -21,7 +21,6 @@ import (
|
||||
_ "github.com/ollama/ollama/ml/backend"
|
||||
)
|
||||
|
||||
// Options contains the inputs for a model forward pass
|
||||
type Options struct {
|
||||
Inputs []int32
|
||||
Positions []int32
|
||||
@@ -35,13 +34,11 @@ type config struct {
|
||||
Cache kvcache.Cache
|
||||
}
|
||||
|
||||
// Base implements the common fields and methods for all models
|
||||
type Base struct {
|
||||
b ml.Backend
|
||||
config
|
||||
}
|
||||
|
||||
// Backend returns the underlying backend that will run the model
|
||||
func (m *Base) Backend() ml.Backend {
|
||||
return m.b
|
||||
}
|
||||
@@ -50,7 +47,6 @@ func (m *Base) Config() config {
|
||||
return m.config
|
||||
}
|
||||
|
||||
// Model implements a specific model architecture, defining the forward pass and any model-specific configuration
|
||||
type Model interface {
|
||||
Forward(ml.Context, Options) (ml.Tensor, error)
|
||||
|
||||
@@ -60,7 +56,6 @@ type Model interface {
|
||||
|
||||
var models = make(map[string]func(ml.Config) (Model, error))
|
||||
|
||||
// Register registers a model constructor for the given architecture
|
||||
func Register(name string, f func(ml.Config) (Model, error)) {
|
||||
if _, ok := models[name]; ok {
|
||||
panic("model: model already registered")
|
||||
@@ -69,9 +64,8 @@ func Register(name string, f func(ml.Config) (Model, error)) {
|
||||
models[name] = f
|
||||
}
|
||||
|
||||
// New initializes a new model instance with the provided configuration based on the metadata in the model file
|
||||
func New(modelPath string) (Model, error) {
|
||||
r, err := os.Open(modelPath)
|
||||
func New(s string) (Model, error) {
|
||||
r, err := os.Open(s)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
138
model/model_external_test.go
Normal file
138
model/model_external_test.go
Normal file
@@ -0,0 +1,138 @@
|
||||
// Package model_test provides external tests for the model package.
|
||||
// This test file specifically tests the forward pass functionality on models.
|
||||
// It is in a separate package (model_test) to avoid import cycles while still
|
||||
// being able to test the public API of the model package.
|
||||
package model_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/ml"
|
||||
"github.com/ollama/ollama/model"
|
||||
"github.com/ollama/ollama/sample"
|
||||
|
||||
_ "github.com/ollama/ollama/model/models"
|
||||
)
|
||||
|
||||
type modelTest struct {
|
||||
Prompt string `json:"prompt"`
|
||||
OutputContainsOne []string `json:"output_contains_one"`
|
||||
}
|
||||
|
||||
func TestForwardSimple(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping in short mode")
|
||||
}
|
||||
|
||||
// Read all JSON files from testdata/models
|
||||
files, err := os.ReadDir("testdata/models")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
if !strings.HasSuffix(file.Name(), ".json") {
|
||||
continue
|
||||
}
|
||||
|
||||
jsonPath := filepath.Join("testdata/models", file.Name())
|
||||
ggufPath := filepath.Join("testdata/models", strings.TrimSuffix(file.Name(), ".json")+".gguf")
|
||||
|
||||
// Skip if no corresponding .gguf file exists
|
||||
if _, err := os.Stat(ggufPath); err != nil {
|
||||
t.Logf("skipping %s: no corresponding GGUF file found", file.Name())
|
||||
continue
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(jsonPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var test modelTest
|
||||
if err := json.Unmarshal(data, &test); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
t.Run(strings.TrimSuffix(file.Name(), ".json"), func(t *testing.T) {
|
||||
m, err := model.New(ggufPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
m.Config().Cache.Init(m.Backend(), ml.DTypeF32, 2048)
|
||||
|
||||
inputs, err := m.(model.TextProcessor).Encode(test.Prompt)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var result []string
|
||||
for len(result) < 100 { // Limit to 100 tokens max
|
||||
options := model.Options{
|
||||
Inputs: inputs,
|
||||
Positions: make([]int32, len(inputs)),
|
||||
Sequences: make([]int, len(inputs)),
|
||||
Outputs: []int32{int32(len(inputs) - 1)},
|
||||
}
|
||||
for i := range options.Positions {
|
||||
options.Positions[i] = int32(i)
|
||||
options.Sequences[i] = 0
|
||||
}
|
||||
|
||||
ctx := m.Backend().NewContext()
|
||||
|
||||
modelOutput, err := model.Forward(ctx, m, options)
|
||||
if err != nil {
|
||||
ctx.Close()
|
||||
t.Fatal(fmt.Errorf("forward pass failed: %v", err))
|
||||
}
|
||||
|
||||
f32s := modelOutput.Floats()
|
||||
logits := make([]float64, len(f32s))
|
||||
for i, f32 := range f32s {
|
||||
logits[i] = float64(f32)
|
||||
}
|
||||
|
||||
token, err := sample.Sample(logits, sample.Greedy())
|
||||
if err != nil {
|
||||
ctx.Close()
|
||||
t.Fatal(fmt.Errorf("sampling failed: %v", err))
|
||||
}
|
||||
|
||||
ctx.Close()
|
||||
|
||||
// Greedy sampling: take the token with the highest logit
|
||||
nextToken := int32(token[0])
|
||||
if m.(model.TextProcessor).Is(nextToken, model.SpecialEOS) {
|
||||
break
|
||||
}
|
||||
|
||||
piece, err := m.(model.TextProcessor).Decode([]int32{nextToken})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
result = append(result, piece)
|
||||
output := strings.Join(result, "")
|
||||
|
||||
for _, expectedOutput := range test.OutputContainsOne {
|
||||
if strings.Contains(output, expectedOutput) {
|
||||
t.Logf("Test passed with output: %q (matched expected: %q)", output, expectedOutput)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Maintain full context by appending new token
|
||||
inputs = append(inputs, nextToken)
|
||||
}
|
||||
|
||||
t.Fatalf("Expected output containing one of %q but got: %q", test.OutputContainsOne, strings.Join(result, ""))
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -10,10 +10,10 @@ import (
|
||||
)
|
||||
|
||||
type Options struct {
|
||||
RopeFactors ml.Tensor `gguf:"rope_freqs.weight"`
|
||||
hiddenSize, numHeads, numKVHeads int
|
||||
eps, ropeBase, ropeScale float32
|
||||
ropeDim uint32
|
||||
RopeFactors ml.Tensor `gguf:"rope_freqs.weight"`
|
||||
ctxLen, hiddenSize, numHeads, numKVHeads int
|
||||
eps, ropeBase, ropeScale float32
|
||||
ropeDim uint32
|
||||
}
|
||||
|
||||
type Model struct {
|
||||
@@ -46,6 +46,7 @@ func New(c ml.Config) (model.Model, error) {
|
||||
numHeads: int(c.Uint("attention.head_count")),
|
||||
numKVHeads: int(c.Uint("attention.head_count_kv")),
|
||||
eps: c.Float("attention.layer_norm_rms_epsilon"),
|
||||
ctxLen: int(c.Uint("context_length")),
|
||||
ropeBase: c.Float("rope.freq_base"),
|
||||
ropeScale: c.Float("rope.freq_scale", 1),
|
||||
ropeDim: c.Uint("rope.dimension_count"),
|
||||
@@ -67,14 +68,23 @@ type SelfAttention struct {
|
||||
func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
|
||||
batchSize := hiddenState.Dim(1)
|
||||
headDim := opts.hiddenSize / opts.numHeads
|
||||
rc := ml.RopeConfig{
|
||||
PositionIDs: positionIDs,
|
||||
RopeFactors: opts.RopeFactors,
|
||||
RopeDim: opts.ropeDim,
|
||||
RopeType: ml.RopeTypeStandard,
|
||||
OrigCtxLen: opts.ctxLen,
|
||||
RopeBase: opts.ropeBase,
|
||||
RopeScale: opts.ropeScale,
|
||||
}
|
||||
|
||||
q := sa.Query.Forward(ctx, hiddenState)
|
||||
q = q.Reshape(ctx, headDim, opts.numHeads, batchSize)
|
||||
q = q.RoPE(ctx, positionIDs, opts.RopeFactors, opts.ropeDim, opts.ropeBase, opts.ropeScale)
|
||||
q = q.RoPE(ctx, rc)
|
||||
|
||||
k := sa.Key.Forward(ctx, hiddenState)
|
||||
k = k.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
||||
k = k.RoPE(ctx, positionIDs, opts.RopeFactors, opts.ropeDim, opts.ropeBase, opts.ropeScale)
|
||||
k = k.RoPE(ctx, rc)
|
||||
|
||||
v := sa.Value.Forward(ctx, hiddenState)
|
||||
v = v.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
||||
@@ -99,7 +109,18 @@ func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Ten
|
||||
}
|
||||
|
||||
func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
|
||||
return key.RoPE(ctx, shift, m.Options.RopeFactors, m.Options.ropeDim, m.Options.ropeBase, m.Options.ropeScale), nil
|
||||
return key.RoPE(
|
||||
ctx,
|
||||
ml.RopeConfig{
|
||||
PositionIDs: shift,
|
||||
RopeFactors: m.Options.RopeFactors,
|
||||
RopeDim: m.Options.ropeDim,
|
||||
RopeType: ml.RopeTypeStandard,
|
||||
OrigCtxLen: m.Options.ctxLen,
|
||||
RopeBase: m.Options.ropeBase,
|
||||
RopeScale: m.Options.ropeScale,
|
||||
},
|
||||
), nil
|
||||
}
|
||||
|
||||
type MLP struct {
|
||||
|
||||
@@ -19,14 +19,23 @@ type TextSelfAttention struct {
|
||||
func (sa *TextSelfAttention) Forward(ctx ml.Context, hiddenState, positions, _ ml.Tensor, cache *kvcache.WrapperCache, opts *TextModelOptions) ml.Tensor {
|
||||
batchSize := hiddenState.Dim(1)
|
||||
headDim := opts.hiddenSize / opts.numHeads
|
||||
rc := ml.RopeConfig{
|
||||
PositionIDs: positions,
|
||||
RopeFactors: opts.RopeFactors,
|
||||
RopeDim: opts.ropeDim,
|
||||
RopeType: ml.RopeTypeStandard,
|
||||
OrigCtxLen: opts.ctxLen,
|
||||
RopeBase: opts.ropeBase,
|
||||
RopeScale: opts.ropeScale,
|
||||
}
|
||||
|
||||
query := sa.Query.Forward(ctx, hiddenState)
|
||||
query = query.Reshape(ctx, headDim, opts.numHeads, batchSize)
|
||||
query = query.RoPE(ctx, positions, opts.RopeFactors, opts.ropeDim, opts.ropeBase, opts.ropeScale)
|
||||
query = query.RoPE(ctx, rc)
|
||||
|
||||
key := sa.Key.Forward(ctx, hiddenState)
|
||||
key = key.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
||||
key = key.RoPE(ctx, positions, opts.RopeFactors, opts.ropeDim, opts.ropeBase, opts.ropeScale)
|
||||
key = key.RoPE(ctx, rc)
|
||||
|
||||
value := sa.Value.Forward(ctx, hiddenState)
|
||||
value = value.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
||||
@@ -52,7 +61,18 @@ func (sa *TextSelfAttention) Forward(ctx ml.Context, hiddenState, positions, _ m
|
||||
|
||||
func (m *TextModel) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
|
||||
// This will only get called for layers in the cache, which are just the self attention layers
|
||||
return key.RoPE(ctx, shift, m.RopeFactors, m.ropeDim, m.ropeBase, m.ropeScale), nil
|
||||
return key.RoPE(
|
||||
ctx,
|
||||
ml.RopeConfig{
|
||||
PositionIDs: shift,
|
||||
RopeFactors: m.RopeFactors,
|
||||
RopeDim: m.ropeDim,
|
||||
RopeType: ml.RopeTypeStandard,
|
||||
OrigCtxLen: m.ctxLen,
|
||||
RopeBase: m.ropeBase,
|
||||
RopeScale: m.ropeScale,
|
||||
},
|
||||
), nil
|
||||
}
|
||||
|
||||
type TextMLP struct {
|
||||
@@ -189,9 +209,9 @@ func (d *TextDecoder) Forward(ctx ml.Context, hiddenState, positionIDs, mask, cr
|
||||
type TextModelOptions struct {
|
||||
RopeFactors ml.Tensor `gguf:"rope_freqs.weight"`
|
||||
|
||||
hiddenSize, numHeads, numKVHeads int
|
||||
eps, ropeBase, ropeScale float32
|
||||
ropeDim uint32
|
||||
ctxLen, hiddenSize, numHeads, numKVHeads int
|
||||
eps, ropeBase, ropeScale float32
|
||||
ropeDim uint32
|
||||
|
||||
crossAttentionLayers []uint32
|
||||
}
|
||||
|
||||
@@ -3,4 +3,5 @@ package models
|
||||
import (
|
||||
_ "github.com/ollama/ollama/model/models/llama"
|
||||
_ "github.com/ollama/ollama/model/models/mllama"
|
||||
_ "github.com/ollama/ollama/model/models/qwen2"
|
||||
)
|
||||
|
||||
222
model/models/qwen2/model.go
Normal file
222
model/models/qwen2/model.go
Normal file
@@ -0,0 +1,222 @@
|
||||
package qwen2
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/ollama/ollama/kvcache"
|
||||
"github.com/ollama/ollama/ml"
|
||||
"github.com/ollama/ollama/ml/nn"
|
||||
"github.com/ollama/ollama/model"
|
||||
)
|
||||
|
||||
type Options struct {
|
||||
RopeFactors ml.Tensor `gguf:"rope_freqs.weight"`
|
||||
contextLength int
|
||||
hiddenSize int
|
||||
numAttnHeads int
|
||||
numKVHeads int
|
||||
modelEpsilon float32
|
||||
ropeBaseFreq float32
|
||||
ropeFreqScale float32
|
||||
ropeDimensions uint32
|
||||
}
|
||||
|
||||
type Model struct {
|
||||
model.Base
|
||||
model.BytePairEncoding
|
||||
|
||||
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
||||
Layers []Layer `gguf:"blk"`
|
||||
OutputNorm *nn.RMSNorm `gguf:"output_norm"`
|
||||
Output *nn.Linear `gguf:"output,alt:token_embd"`
|
||||
|
||||
*Options
|
||||
}
|
||||
|
||||
func New(c ml.Config) (model.Model, error) {
|
||||
m := &Model{
|
||||
BytePairEncoding: model.NewBytePairEncoding(
|
||||
c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
|
||||
&model.Vocabulary{
|
||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||
Types: c.Uints("tokenizer.ggml.token_type"),
|
||||
Merges: c.Strings("tokenizer.ggml.merges"),
|
||||
BOS: int32(c.Uint("tokenizer.ggml.bos_token_id")),
|
||||
EOS: int32(c.Uint("tokenizer.ggml.eos_token_id")),
|
||||
},
|
||||
),
|
||||
Layers: make([]Layer, c.Uint("block_count")),
|
||||
Options: &Options{
|
||||
hiddenSize: int(c.Uint("embedding_length")),
|
||||
numAttnHeads: int(c.Uint("attention.head_count")),
|
||||
numKVHeads: int(c.Uint("attention.head_count_kv")),
|
||||
modelEpsilon: c.Float("attention.layer_norm_rms_epsilon"),
|
||||
contextLength: int(c.Uint("context_length")),
|
||||
ropeBaseFreq: c.Float("rope.freq_base"),
|
||||
ropeFreqScale: c.Float("rope.freq_scale", 1),
|
||||
ropeDimensions: c.Uint("rope.dimension_count", 64),
|
||||
},
|
||||
}
|
||||
|
||||
m.Cache = kvcache.NewCausalCache(m.Shift)
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Shift applies rotary position embeddings to the key tensor for causal attention caching
|
||||
func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
|
||||
return key.RoPE(
|
||||
ctx,
|
||||
ml.RopeConfig{
|
||||
PositionIDs: shift,
|
||||
RopeFactors: m.Options.RopeFactors,
|
||||
RopeDim: m.Options.ropeDimensions,
|
||||
RopeType: ml.RopeTypeNeoX,
|
||||
OrigCtxLen: m.Options.contextLength,
|
||||
RopeBase: m.Options.ropeBaseFreq,
|
||||
RopeScale: m.Options.ropeFreqScale,
|
||||
},
|
||||
), nil
|
||||
}
|
||||
|
||||
// SelfAttention implements the multi-head self-attention mechanism
|
||||
// with separate projections for query, key, value and output transformations
|
||||
type SelfAttention struct {
|
||||
Query *nn.Linear `gguf:"attn_q"`
|
||||
Key *nn.Linear `gguf:"attn_k"`
|
||||
Value *nn.Linear `gguf:"attn_v"`
|
||||
Output *nn.Linear `gguf:"attn_output"`
|
||||
}
|
||||
|
||||
func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, inputPositions ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
|
||||
// Initialize dimensions and configuration
|
||||
batchSize := hiddenState.Dim(1)
|
||||
headDimension := opts.hiddenSize / opts.numAttnHeads
|
||||
ropeConfig := ml.RopeConfig{
|
||||
PositionIDs: inputPositions,
|
||||
RopeFactors: nil,
|
||||
RopeDim: opts.ropeDimensions,
|
||||
RopeType: ml.RopeTypeNeoX,
|
||||
OrigCtxLen: opts.contextLength,
|
||||
RopeBase: opts.ropeBaseFreq,
|
||||
RopeScale: opts.ropeFreqScale,
|
||||
}
|
||||
|
||||
// Project and reshape query states with rotary embeddings
|
||||
queryStates := sa.Query.Forward(ctx, hiddenState)
|
||||
queryStates = queryStates.Reshape(ctx, headDimension, opts.numAttnHeads, batchSize)
|
||||
queryStates = queryStates.RoPE(ctx, ropeConfig)
|
||||
|
||||
// Project and reshape key states with rotary embeddings
|
||||
keyStates := sa.Key.Forward(ctx, hiddenState)
|
||||
keyStates = keyStates.Reshape(ctx, headDimension, opts.numKVHeads, batchSize)
|
||||
keyStates = keyStates.RoPE(ctx, ropeConfig)
|
||||
|
||||
// Project and reshape value states
|
||||
valueStates := sa.Value.Forward(ctx, hiddenState)
|
||||
valueStates = valueStates.Reshape(ctx, headDimension, opts.numKVHeads, batchSize)
|
||||
|
||||
// Update and retrieve from KV cache
|
||||
cache.Put(ctx, keyStates, valueStates)
|
||||
keyStates, valueStates, attentionMask := cache.Get(ctx)
|
||||
|
||||
// Prepare tensors for attention computation
|
||||
queryStates = queryStates.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
|
||||
keyStates = keyStates.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
|
||||
valueStates = valueStates.Permute(ctx, 1, 2, 0, 3).Contiguous(ctx)
|
||||
|
||||
// Apply scaling and attention mask to scores
|
||||
attentionScores := keyStates.MulmatFullPrec(ctx, queryStates)
|
||||
attentionScores = attentionScores.Scale(ctx, 1.0/math.Sqrt(float64(headDimension)))
|
||||
attentionScores = attentionScores.Add(ctx, attentionMask)
|
||||
// Compute scaled dot-product attention
|
||||
attentionProbs := attentionScores.Softmax(ctx)
|
||||
|
||||
// Apply attention weights and reshape
|
||||
weightedStates := valueStates.Mulmat(ctx, attentionProbs)
|
||||
weightedStates = weightedStates.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
|
||||
weightedStates = weightedStates.Reshape(ctx, opts.hiddenSize, batchSize)
|
||||
|
||||
// Project to output dimension
|
||||
return sa.Output.Forward(ctx, weightedStates)
|
||||
}
|
||||
|
||||
// MLP implements the feed-forward network component with SwiGLU activation
|
||||
type MLP struct {
|
||||
Up *nn.Linear `gguf:"ffn_up"`
|
||||
Down *nn.Linear `gguf:"ffn_down"`
|
||||
Gate *nn.Linear `gguf:"ffn_gate"`
|
||||
}
|
||||
|
||||
func (mlp *MLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *Options) ml.Tensor {
|
||||
// Apply SwiGLU activation gating
|
||||
gateActivation := mlp.Gate.Forward(ctx, hiddenState).SILU(ctx)
|
||||
upProjection := mlp.Up.Forward(ctx, hiddenState)
|
||||
intermediateStates := gateActivation.Mul(ctx, upProjection)
|
||||
|
||||
// Project back to hidden dimension
|
||||
return mlp.Down.Forward(ctx, intermediateStates)
|
||||
}
|
||||
|
||||
// Layer represents a single transformer layer combining self-attention and feed-forward components
|
||||
type Layer struct {
|
||||
AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
|
||||
SelfAttention *SelfAttention
|
||||
MLPNorm *nn.RMSNorm `gguf:"ffn_norm"`
|
||||
MLP *MLP
|
||||
}
|
||||
|
||||
func (l *Layer) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
|
||||
// Self-attention branch with residual connection
|
||||
residual := hiddenState
|
||||
|
||||
normalizedAttention := l.AttentionNorm.Forward(ctx, hiddenState, opts.modelEpsilon)
|
||||
attentionOutput := l.SelfAttention.Forward(ctx, normalizedAttention, positionIDs, cache, opts)
|
||||
hiddenState = attentionOutput.Add(ctx, residual)
|
||||
|
||||
// Feed-forward branch with residual connection
|
||||
residual = hiddenState
|
||||
normalizedMLP := l.MLPNorm.Forward(ctx, hiddenState, opts.modelEpsilon)
|
||||
mlpOutput := l.MLP.Forward(ctx, normalizedMLP, opts)
|
||||
output := mlpOutput.Add(ctx, residual)
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
func (m *Model) Forward(ctx ml.Context, opts model.Options) (ml.Tensor, error) {
|
||||
// Convert input tokens and positions to tensors
|
||||
inputTensor, err := ctx.FromIntSlice(opts.Inputs, len(opts.Inputs))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
positionsTensor, err := ctx.FromIntSlice(opts.Positions, len(opts.Positions))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Initial token embedding
|
||||
hiddenStates := m.TokenEmbedding.Forward(ctx, inputTensor)
|
||||
|
||||
// Process through transformer layers
|
||||
for i, layer := range m.Layers {
|
||||
m.Cache.SetLayer(i)
|
||||
hiddenStates = layer.Forward(ctx, hiddenStates, positionsTensor, m.Cache, m.Options)
|
||||
}
|
||||
|
||||
// Final layer normalization and output projection
|
||||
normalizedOutput := m.OutputNorm.Forward(ctx, hiddenStates, m.modelEpsilon)
|
||||
logits := m.Output.Forward(ctx, normalizedOutput)
|
||||
|
||||
// Extract requested output token positions
|
||||
outputsTensor, err := ctx.FromIntSlice(opts.Outputs, len(opts.Outputs))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return logits.Rows(ctx, outputsTensor), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
model.Register("qwen2", New)
|
||||
}
|
||||
10
model/testdata/models/README.md
vendored
Normal file
10
model/testdata/models/README.md
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# Test Model Directory
|
||||
|
||||
This directory is used for storing model files (like `.gguf` files) that are required to run the tests in `model_external_test.go`.
|
||||
|
||||
## Usage
|
||||
|
||||
- Place any model files you need for testing in this directory
|
||||
- The test file will look for any model files here (e.g., `llama3.gguf`)
|
||||
- All non-markdown files in this directory are git-ignored to prevent large model files from being committed to the repository
|
||||
- Only `.md` files (like this README) will be tracked in git
|
||||
7
model/testdata/models/qwen2_5.json
vendored
Normal file
7
model/testdata/models/qwen2_5.json
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"prompt": "<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\nhi<|im_end|>\n<|im_start|>assistant\n",
|
||||
"output_contains_one": [
|
||||
"Hello",
|
||||
"Hi"
|
||||
]
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
package progress
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
@@ -14,8 +13,7 @@ type State interface {
|
||||
|
||||
type Progress struct {
|
||||
mu sync.Mutex
|
||||
// buffer output to minimize flickering on all terminals
|
||||
w *bufio.Writer
|
||||
w io.Writer
|
||||
|
||||
pos int
|
||||
|
||||
@@ -24,7 +22,7 @@ type Progress struct {
|
||||
}
|
||||
|
||||
func NewProgress(w io.Writer) *Progress {
|
||||
p := &Progress{w: bufio.NewWriter(w)}
|
||||
p := &Progress{w: w}
|
||||
go p.start()
|
||||
return p
|
||||
}
|
||||
@@ -49,29 +47,26 @@ func (p *Progress) stop() bool {
|
||||
func (p *Progress) Stop() bool {
|
||||
stopped := p.stop()
|
||||
if stopped {
|
||||
fmt.Fprintln(p.w)
|
||||
fmt.Fprint(p.w, "\n")
|
||||
}
|
||||
|
||||
// show cursor
|
||||
fmt.Fprint(p.w, "\033[?25h")
|
||||
p.w.Flush()
|
||||
return stopped
|
||||
}
|
||||
|
||||
func (p *Progress) StopAndClear() bool {
|
||||
fmt.Fprint(p.w, "\033[?25l")
|
||||
defer fmt.Fprint(p.w, "\033[?25h")
|
||||
|
||||
stopped := p.stop()
|
||||
if stopped {
|
||||
// clear all progress lines
|
||||
for range p.pos - 1 {
|
||||
fmt.Fprint(p.w, "\033[A")
|
||||
for i := range p.pos {
|
||||
if i > 0 {
|
||||
fmt.Fprint(p.w, "\033[A")
|
||||
}
|
||||
fmt.Fprint(p.w, "\033[2K\033[1G")
|
||||
}
|
||||
|
||||
fmt.Fprint(p.w, "\033[2K", "\033[1G")
|
||||
}
|
||||
|
||||
// show cursor
|
||||
fmt.Fprint(p.w, "\033[?25h")
|
||||
p.w.Flush()
|
||||
return stopped
|
||||
}
|
||||
|
||||
@@ -86,31 +81,30 @@ func (p *Progress) render() {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
fmt.Fprint(p.w, "\033[?2026h")
|
||||
defer fmt.Fprint(p.w, "\033[?2026l")
|
||||
fmt.Fprint(p.w, "\033[?25l")
|
||||
defer fmt.Fprint(p.w, "\033[?25h")
|
||||
|
||||
for range p.pos - 1 {
|
||||
fmt.Fprint(p.w, "\033[A")
|
||||
// clear already rendered progress lines
|
||||
for i := range p.pos {
|
||||
if i > 0 {
|
||||
fmt.Fprint(p.w, "\033[A")
|
||||
}
|
||||
fmt.Fprint(p.w, "\033[2K\033[1G")
|
||||
}
|
||||
|
||||
fmt.Fprint(p.w, "\033[1G")
|
||||
|
||||
// render progress lines
|
||||
for i, state := range p.states {
|
||||
fmt.Fprint(p.w, state.String(), "\033[K")
|
||||
fmt.Fprint(p.w, state.String())
|
||||
if i < len(p.states)-1 {
|
||||
fmt.Fprint(p.w, "\n")
|
||||
}
|
||||
}
|
||||
|
||||
p.pos = len(p.states)
|
||||
p.w.Flush()
|
||||
}
|
||||
|
||||
func (p *Progress) start() {
|
||||
p.ticker = time.NewTicker(100 * time.Millisecond)
|
||||
// hide cursor
|
||||
fmt.Fprint(p.w, "\033[?25l")
|
||||
for range p.ticker.C {
|
||||
p.render()
|
||||
}
|
||||
|
||||
@@ -813,8 +813,6 @@ func (s *Server) loadModel(
|
||||
panic(err)
|
||||
}
|
||||
|
||||
slog.Info("system", "info", s.model.Backend().SystemInfo() /* "threads", *threads */)
|
||||
|
||||
// TODO(jessegross): LoRA loading
|
||||
if lpath.String() != "" {
|
||||
panic("loras are not yet implemented")
|
||||
@@ -883,6 +881,7 @@ func Execute(args []string) error {
|
||||
})
|
||||
slog.SetDefault(slog.New(handler))
|
||||
slog.Info("starting ollama engine")
|
||||
// TODO(jessegross): Some system info would be useful
|
||||
|
||||
server := &Server{
|
||||
batchSize: *batchSize,
|
||||
|
||||
Reference in New Issue
Block a user