Compare commits
10 Commits
jmorganca/
...
brucemacd/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
31d04eb795 | ||
|
|
7b5d916a9a | ||
|
|
33ad61b112 | ||
|
|
716e365615 | ||
|
|
3b4424ff98 | ||
|
|
0667baddc6 | ||
|
|
d006e1e09b | ||
|
|
df2680b4b9 | ||
|
|
010313bb63 | ||
|
|
5296f487a8 |
4
.github/workflows/release.yaml
vendored
4
.github/workflows/release.yaml
vendored
@@ -329,7 +329,9 @@ jobs:
|
||||
done
|
||||
working-directory: dist/${{ matrix.os }}-${{ matrix.arch }}
|
||||
- run: |
|
||||
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz); done
|
||||
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do
|
||||
tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE --owner 0 --group 0 | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz);
|
||||
done
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: dist-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }}
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -14,3 +14,6 @@ test_data
|
||||
__debug_bin*
|
||||
llama/build
|
||||
llama/vendor
|
||||
model/testdata/models/*
|
||||
!model/testdata/models/*.md
|
||||
!model/testdata/models/*.json
|
||||
|
||||
@@ -381,6 +381,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)
|
||||
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
|
||||
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
|
||||
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)
|
||||
|
||||
### Cloud
|
||||
|
||||
@@ -548,6 +549,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow)
|
||||
- [TextLLaMA](https://github.com/adarshM84/TextLLaMA) A Chrome Extension that helps you write emails, correct grammar, and translate into any language
|
||||
- [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai)
|
||||
- [LLM Telegram Bot](https://github.com/innightwolfsleep/llm_telegram_bot) (telegram bot, primary for RP. Oobabooga-like buttons, [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) API integration e.t.c)
|
||||
|
||||
### Supported backends
|
||||
|
||||
|
||||
@@ -19,6 +19,10 @@ var LibOllamaPath string = func() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
if eval, err := filepath.EvalSymlinks(exe); err == nil {
|
||||
exe = eval
|
||||
}
|
||||
|
||||
var libPath string
|
||||
switch runtime.GOOS {
|
||||
case "windows":
|
||||
|
||||
@@ -55,7 +55,7 @@ Here's a quick example showing API access from `powershell`
|
||||
## Troubleshooting
|
||||
|
||||
Ollama on Windows stores files in a few different locations. You can view them in
|
||||
the explorer window by hitting `<cmd>+R` and type in:
|
||||
the explorer window by hitting `<Ctrl>+R` and type in:
|
||||
- `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
|
||||
- *app.log* contains most resent logs from the GUI application
|
||||
- *server.log* contains the most recent server logs
|
||||
|
||||
@@ -12,6 +12,9 @@ func TestHumanNumber(t *testing.T) {
|
||||
|
||||
testCases := []testCase{
|
||||
{0, "0"},
|
||||
{999, "999"},
|
||||
{1000, "1K"},
|
||||
{1001, "1K"},
|
||||
{1000000, "1M"},
|
||||
{125000000, "125M"},
|
||||
{500500000, "500.50M"},
|
||||
|
||||
@@ -305,6 +305,10 @@ func (b *testBackend) NewContext() ml.Context {
|
||||
return &testContext{}
|
||||
}
|
||||
|
||||
func (b *testBackend) SystemInfo() string {
|
||||
return "not implemented"
|
||||
}
|
||||
|
||||
type testContext struct{}
|
||||
|
||||
func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
|
||||
|
||||
@@ -320,6 +320,10 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
|
||||
return nil, fmt.Errorf("unable to lookup executable path: %w", err)
|
||||
}
|
||||
|
||||
if eval, err := filepath.EvalSymlinks(exe); err == nil {
|
||||
exe = eval
|
||||
}
|
||||
|
||||
// TODO - once fully switched to the Go runner, load the model here for tokenize/detokenize cgo access
|
||||
s := &llmServer{
|
||||
port: port,
|
||||
|
||||
@@ -23,6 +23,7 @@ type Backend interface {
|
||||
Config() Config
|
||||
Get(name string) Tensor
|
||||
NewContext() Context
|
||||
SystemInfo() string
|
||||
}
|
||||
|
||||
var backends = make(map[string]func(*os.File) (Backend, error))
|
||||
|
||||
@@ -1,11 +1,27 @@
|
||||
package ggml
|
||||
|
||||
// #cgo CPPFLAGS: -I${SRCDIR}/ggml/include
|
||||
// #include <stdlib.h>
|
||||
// #include <stdint.h>
|
||||
// #include "ggml.h"
|
||||
// #include "ggml-cpu.h"
|
||||
// #include "ggml-backend.h"
|
||||
/*
|
||||
#cgo CPPFLAGS: -I${SRCDIR}/ggml/include
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include "ggml.h"
|
||||
#include "ggml-cpu.h"
|
||||
#include "ggml-backend.h"
|
||||
static struct ggml_backend_feature * getBackendFeatures(void *fp, ggml_backend_reg_t reg) {return ((ggml_backend_get_features_t)(fp))(reg);}
|
||||
static struct ggml_backend_feature * getNextBackendFeatures(struct ggml_backend_feature * feature) { return &feature[1];}
|
||||
|
||||
typedef enum {COMP_UNKNOWN,COMP_GCC,COMP_CLANG} COMPILER;
|
||||
COMPILER inline get_compiler() {
|
||||
#if defined(__clang__)
|
||||
return COMP_CLANG;
|
||||
#elif defined(__GNUC__)
|
||||
return COMP_GCC;
|
||||
#else
|
||||
return UNKNOWN_COMPILER;
|
||||
#endif
|
||||
}
|
||||
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
@@ -626,3 +642,34 @@ func (t *Tensor) Conv2D(ctx ml.Context, t2 ml.Tensor, s0, s1, p0, p1, d0, d1 int
|
||||
t: C.ggml_conv_2d(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1)),
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Backend) SystemInfo() string {
|
||||
var compiler string
|
||||
switch C.get_compiler() {
|
||||
case C.COMP_UNKNOWN:
|
||||
compiler = "cgo(unknown_compiler)"
|
||||
case C.COMP_GCC:
|
||||
compiler = "cgo(gcc)"
|
||||
case C.COMP_CLANG:
|
||||
compiler = "cgo(clang)"
|
||||
}
|
||||
|
||||
var s string
|
||||
for i := range C.ggml_backend_reg_count() {
|
||||
reg := C.ggml_backend_reg_get(i)
|
||||
fName := C.CString("ggml_backend_get_features")
|
||||
defer C.free(unsafe.Pointer(fName))
|
||||
get_features_fn := C.ggml_backend_reg_get_proc_address(reg, fName)
|
||||
if get_features_fn != nil {
|
||||
s += C.GoString(C.ggml_backend_reg_name(reg))
|
||||
s += " : "
|
||||
for features := C.getBackendFeatures(get_features_fn, reg); features.name != nil; features = C.getNextBackendFeatures(features) {
|
||||
s += C.GoString(features.name)
|
||||
s += " = "
|
||||
s += C.GoString(features.value)
|
||||
s += " | "
|
||||
}
|
||||
}
|
||||
}
|
||||
return s + compiler
|
||||
}
|
||||
|
||||
@@ -47,10 +47,6 @@ var OnceLoad = sync.OnceFunc(func() {
|
||||
exe = "."
|
||||
}
|
||||
|
||||
if eval, err := filepath.EvalSymlinks(exe); err == nil {
|
||||
exe = eval
|
||||
}
|
||||
|
||||
// PATH, LD_LIBRARY_PATH, and DYLD_LIBRARY_PATH are often
|
||||
// set by the parent process, however, use a default value
|
||||
// if the environment variable is not set.
|
||||
|
||||
@@ -21,6 +21,7 @@ import (
|
||||
_ "github.com/ollama/ollama/ml/backend"
|
||||
)
|
||||
|
||||
// Options contains the inputs for a model forward pass
|
||||
type Options struct {
|
||||
Inputs []int32
|
||||
Positions []int32
|
||||
@@ -34,11 +35,13 @@ type config struct {
|
||||
Cache kvcache.Cache
|
||||
}
|
||||
|
||||
// Base implements the common fields and methods for all models
|
||||
type Base struct {
|
||||
b ml.Backend
|
||||
config
|
||||
}
|
||||
|
||||
// Backend returns the underlying backend that will run the model
|
||||
func (m *Base) Backend() ml.Backend {
|
||||
return m.b
|
||||
}
|
||||
@@ -47,6 +50,7 @@ func (m *Base) Config() config {
|
||||
return m.config
|
||||
}
|
||||
|
||||
// Model implements a specific model architecture, defining the forward pass and any model-specific configuration
|
||||
type Model interface {
|
||||
Forward(ml.Context, Options) (ml.Tensor, error)
|
||||
|
||||
@@ -56,6 +60,7 @@ type Model interface {
|
||||
|
||||
var models = make(map[string]func(ml.Config) (Model, error))
|
||||
|
||||
// Register registers a model constructor for the given architecture
|
||||
func Register(name string, f func(ml.Config) (Model, error)) {
|
||||
if _, ok := models[name]; ok {
|
||||
panic("model: model already registered")
|
||||
@@ -64,8 +69,9 @@ func Register(name string, f func(ml.Config) (Model, error)) {
|
||||
models[name] = f
|
||||
}
|
||||
|
||||
func New(s string) (Model, error) {
|
||||
r, err := os.Open(s)
|
||||
// New initializes a new model instance with the provided configuration based on the metadata in the model file
|
||||
func New(modelPath string) (Model, error) {
|
||||
r, err := os.Open(modelPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
138
model/model_external_test.go
Normal file
138
model/model_external_test.go
Normal file
@@ -0,0 +1,138 @@
|
||||
// Package model_test provides external tests for the model package.
|
||||
// This test file specifically tests the forward pass functionality on models.
|
||||
// It is in a separate package (model_test) to avoid import cycles while still
|
||||
// being able to test the public API of the model package.
|
||||
package model_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/ml"
|
||||
"github.com/ollama/ollama/model"
|
||||
"github.com/ollama/ollama/sample"
|
||||
|
||||
_ "github.com/ollama/ollama/model/models"
|
||||
)
|
||||
|
||||
type modelTest struct {
|
||||
Prompt string `json:"prompt"`
|
||||
OutputContainsOne []string `json:"output_contains_one"`
|
||||
}
|
||||
|
||||
func TestForwardSimple(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping in short mode")
|
||||
}
|
||||
|
||||
// Read all JSON files from testdata/models
|
||||
files, err := os.ReadDir("testdata/models")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
if !strings.HasSuffix(file.Name(), ".json") {
|
||||
continue
|
||||
}
|
||||
|
||||
jsonPath := filepath.Join("testdata/models", file.Name())
|
||||
ggufPath := filepath.Join("testdata/models", strings.TrimSuffix(file.Name(), ".json")+".gguf")
|
||||
|
||||
// Skip if no corresponding .gguf file exists
|
||||
if _, err := os.Stat(ggufPath); err != nil {
|
||||
t.Logf("skipping %s: no corresponding GGUF file found", file.Name())
|
||||
continue
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(jsonPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var test modelTest
|
||||
if err := json.Unmarshal(data, &test); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
t.Run(strings.TrimSuffix(file.Name(), ".json"), func(t *testing.T) {
|
||||
m, err := model.New(ggufPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
m.Config().Cache.Init(m.Backend(), ml.DTypeF32, 2048)
|
||||
|
||||
inputs, err := m.(model.TextProcessor).Encode(test.Prompt)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var result []string
|
||||
for len(result) < 100 { // Limit to 100 tokens max
|
||||
options := model.Options{
|
||||
Inputs: inputs,
|
||||
Positions: make([]int32, len(inputs)),
|
||||
Sequences: make([]int, len(inputs)),
|
||||
Outputs: []int32{int32(len(inputs) - 1)},
|
||||
}
|
||||
for i := range options.Positions {
|
||||
options.Positions[i] = int32(i)
|
||||
options.Sequences[i] = 0
|
||||
}
|
||||
|
||||
ctx := m.Backend().NewContext()
|
||||
|
||||
modelOutput, err := model.Forward(ctx, m, options)
|
||||
if err != nil {
|
||||
ctx.Close()
|
||||
t.Fatal(fmt.Errorf("forward pass failed: %v", err))
|
||||
}
|
||||
|
||||
f32s := modelOutput.Floats()
|
||||
logits := make([]float64, len(f32s))
|
||||
for i, f32 := range f32s {
|
||||
logits[i] = float64(f32)
|
||||
}
|
||||
|
||||
token, err := sample.Sample(logits, sample.Greedy())
|
||||
if err != nil {
|
||||
ctx.Close()
|
||||
t.Fatal(fmt.Errorf("sampling failed: %v", err))
|
||||
}
|
||||
|
||||
ctx.Close()
|
||||
|
||||
// Greedy sampling: take the token with the highest logit
|
||||
nextToken := int32(token[0])
|
||||
if m.(model.TextProcessor).Is(nextToken, model.SpecialEOS) {
|
||||
break
|
||||
}
|
||||
|
||||
piece, err := m.(model.TextProcessor).Decode([]int32{nextToken})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
result = append(result, piece)
|
||||
output := strings.Join(result, "")
|
||||
|
||||
for _, expectedOutput := range test.OutputContainsOne {
|
||||
if strings.Contains(output, expectedOutput) {
|
||||
t.Logf("Test passed with output: %q (matched expected: %q)", output, expectedOutput)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Maintain full context by appending new token
|
||||
inputs = append(inputs, nextToken)
|
||||
}
|
||||
|
||||
t.Fatalf("Expected output containing one of %q but got: %q", test.OutputContainsOne, strings.Join(result, ""))
|
||||
})
|
||||
}
|
||||
}
|
||||
10
model/testdata/models/README.md
vendored
Normal file
10
model/testdata/models/README.md
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# Test Model Directory
|
||||
|
||||
This directory is used for storing model files (like `.gguf` files) that are required to run the tests in `model_external_test.go`.
|
||||
|
||||
## Usage
|
||||
|
||||
- Place any model files you need for testing in this directory
|
||||
- The test file will look for any model files here (e.g., `llama3.gguf`)
|
||||
- All non-markdown files in this directory are git-ignored to prevent large model files from being committed to the repository
|
||||
- Only `.md` files (like this README) will be tracked in git
|
||||
7
model/testdata/models/qwen2_5.json
vendored
Normal file
7
model/testdata/models/qwen2_5.json
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"prompt": "<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\nhi<|im_end|>\n<|im_start|>assistant\n",
|
||||
"output_contains_one": [
|
||||
"Hello",
|
||||
"Hi"
|
||||
]
|
||||
}
|
||||
@@ -845,8 +845,6 @@ func (s *Server) loadModel(
|
||||
threads int,
|
||||
multiUserCache bool,
|
||||
) {
|
||||
llama.BackendInit()
|
||||
|
||||
var err error
|
||||
s.model, err = llama.LoadModelFromFile(mpath, params)
|
||||
if err != nil {
|
||||
@@ -932,6 +930,8 @@ func Execute(args []string) error {
|
||||
})
|
||||
slog.SetDefault(slog.New(handler))
|
||||
slog.Info("starting go runner")
|
||||
|
||||
llama.BackendInit()
|
||||
slog.Info("system", "info", llama.PrintSystemInfo(), "threads", *threads)
|
||||
|
||||
server := &Server{
|
||||
|
||||
@@ -813,6 +813,8 @@ func (s *Server) loadModel(
|
||||
panic(err)
|
||||
}
|
||||
|
||||
slog.Info("system", "info", s.model.Backend().SystemInfo() /* "threads", *threads */)
|
||||
|
||||
// TODO(jessegross): LoRA loading
|
||||
if lpath.String() != "" {
|
||||
panic("loras are not yet implemented")
|
||||
@@ -881,7 +883,6 @@ func Execute(args []string) error {
|
||||
})
|
||||
slog.SetDefault(slog.New(handler))
|
||||
slog.Info("starting ollama engine")
|
||||
// TODO(jessegross): Some system info would be useful
|
||||
|
||||
server := &Server{
|
||||
batchSize: *batchSize,
|
||||
|
||||
Reference in New Issue
Block a user