Compare commits

..

10 Commits

Author SHA1 Message Date
Bruce MacDonald
31d04eb795 model: add a test for model forward pass during implementation
Adds a new test file to verify model forward pass behavior through
JSON-specified test cases. The framework loads model files (.gguf) and their
corresponding test specifications to validate expected outputs using greedy
sampling.
2025-02-18 14:21:10 -08:00
Michael Yang
7b5d916a9a ci: set owner/group in tarball
set owner and group when building the linux tarball so extracted files
are consistent. this is the behaviour of release tarballs in version
0.5.7 and lower
2025-02-18 20:11:09 +00:00
benhaotang
33ad61b112 Add OpenDeepResearcher-via-searxng to Community Integrations (#9138) 2025-02-18 11:39:11 -08:00
L. Jiang
716e365615 test: add test cases for HumanNumber (#9108) 2025-02-18 11:35:26 -08:00
innightwolfsleep
3b4424ff98 readme: add LLM Telegram Bot to community integrations (#9150) 2025-02-18 10:04:30 -05:00
James-William-Kincaid-III
0667baddc6 docs: fix incorrect shortcut key in windows.md (#9098) 2025-02-15 15:38:24 -05:00
Bruce MacDonald
d006e1e09b model: document high-level model interface (#9122) 2025-02-14 16:01:00 -08:00
Daniel Hiltgen
df2680b4b9 Wire up system info log for new engine (#9123) 2025-02-14 15:55:33 -08:00
Jesse Gross
010313bb63 llamarunner: Init GGML before printing system info
We currently print system info before the GGML backends are loaded.
This results in only getting information about the default lowest
common denominator runner. If we move up the GGML init then we can
see what we are actually running.

Before:
time=2025-02-14T11:15:07.606-08:00 level=INFO source=runner.go:935 msg=system info="CPU : LLAMAFILE = 1 | CPU : LLAMAFILE = 1 | cgo(gcc)" threads=24

After:
time=2025-02-14T11:16:02.936-08:00 level=INFO source=runner.go:935 msg=system info="CPU : LLAMAFILE = 1 | CPU : LLAMAFILE = 1 | CUDA : ARCHS = 890 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | LLAMAFILE = 1 | cgo(gcc)" threads=24
2025-02-14 11:41:53 -08:00
Jeffrey Morgan
5296f487a8 llm: attempt to evaluate symlinks, but do not fail (#9089)
provides a better approach to #9088 that will attempt to
evaluate symlinks (important for macOS where 'ollama' is
often a symlink), but use the result of os.Executable()
as a fallback in scenarios where filepath.EvalSymlinks
fails due to permission erorrs or other issues
2025-02-13 22:37:59 -08:00
17 changed files with 245 additions and 17 deletions

View File

@@ -329,7 +329,9 @@ jobs:
done
working-directory: dist/${{ matrix.os }}-${{ matrix.arch }}
- run: |
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz); done
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do
tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE --owner 0 --group 0 | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz);
done
- uses: actions/upload-artifact@v4
with:
name: dist-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }}

3
.gitignore vendored
View File

@@ -14,3 +14,6 @@ test_data
__debug_bin*
llama/build
llama/vendor
model/testdata/models/*
!model/testdata/models/*.md
!model/testdata/models/*.json

View File

@@ -381,6 +381,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)
### Cloud
@@ -548,6 +549,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow)
- [TextLLaMA](https://github.com/adarshM84/TextLLaMA) A Chrome Extension that helps you write emails, correct grammar, and translate into any language
- [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai)
- [LLM Telegram Bot](https://github.com/innightwolfsleep/llm_telegram_bot) (telegram bot, primary for RP. Oobabooga-like buttons, [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) API integration e.t.c)
### Supported backends

View File

@@ -19,6 +19,10 @@ var LibOllamaPath string = func() string {
return ""
}
if eval, err := filepath.EvalSymlinks(exe); err == nil {
exe = eval
}
var libPath string
switch runtime.GOOS {
case "windows":

View File

@@ -55,7 +55,7 @@ Here's a quick example showing API access from `powershell`
## Troubleshooting
Ollama on Windows stores files in a few different locations. You can view them in
the explorer window by hitting `<cmd>+R` and type in:
the explorer window by hitting `<Ctrl>+R` and type in:
- `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
- *app.log* contains most resent logs from the GUI application
- *server.log* contains the most recent server logs

View File

@@ -12,6 +12,9 @@ func TestHumanNumber(t *testing.T) {
testCases := []testCase{
{0, "0"},
{999, "999"},
{1000, "1K"},
{1001, "1K"},
{1000000, "1M"},
{125000000, "125M"},
{500500000, "500.50M"},

View File

@@ -305,6 +305,10 @@ func (b *testBackend) NewContext() ml.Context {
return &testContext{}
}
func (b *testBackend) SystemInfo() string {
return "not implemented"
}
type testContext struct{}
func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {

View File

@@ -320,6 +320,10 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
return nil, fmt.Errorf("unable to lookup executable path: %w", err)
}
if eval, err := filepath.EvalSymlinks(exe); err == nil {
exe = eval
}
// TODO - once fully switched to the Go runner, load the model here for tokenize/detokenize cgo access
s := &llmServer{
port: port,

View File

@@ -23,6 +23,7 @@ type Backend interface {
Config() Config
Get(name string) Tensor
NewContext() Context
SystemInfo() string
}
var backends = make(map[string]func(*os.File) (Backend, error))

View File

@@ -1,11 +1,27 @@
package ggml
// #cgo CPPFLAGS: -I${SRCDIR}/ggml/include
// #include <stdlib.h>
// #include <stdint.h>
// #include "ggml.h"
// #include "ggml-cpu.h"
// #include "ggml-backend.h"
/*
#cgo CPPFLAGS: -I${SRCDIR}/ggml/include
#include <stdlib.h>
#include <stdint.h>
#include "ggml.h"
#include "ggml-cpu.h"
#include "ggml-backend.h"
static struct ggml_backend_feature * getBackendFeatures(void *fp, ggml_backend_reg_t reg) {return ((ggml_backend_get_features_t)(fp))(reg);}
static struct ggml_backend_feature * getNextBackendFeatures(struct ggml_backend_feature * feature) { return &feature[1];}
typedef enum {COMP_UNKNOWN,COMP_GCC,COMP_CLANG} COMPILER;
COMPILER inline get_compiler() {
#if defined(__clang__)
return COMP_CLANG;
#elif defined(__GNUC__)
return COMP_GCC;
#else
return UNKNOWN_COMPILER;
#endif
}
*/
import "C"
import (
@@ -626,3 +642,34 @@ func (t *Tensor) Conv2D(ctx ml.Context, t2 ml.Tensor, s0, s1, p0, p1, d0, d1 int
t: C.ggml_conv_2d(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1)),
}
}
func (b *Backend) SystemInfo() string {
var compiler string
switch C.get_compiler() {
case C.COMP_UNKNOWN:
compiler = "cgo(unknown_compiler)"
case C.COMP_GCC:
compiler = "cgo(gcc)"
case C.COMP_CLANG:
compiler = "cgo(clang)"
}
var s string
for i := range C.ggml_backend_reg_count() {
reg := C.ggml_backend_reg_get(i)
fName := C.CString("ggml_backend_get_features")
defer C.free(unsafe.Pointer(fName))
get_features_fn := C.ggml_backend_reg_get_proc_address(reg, fName)
if get_features_fn != nil {
s += C.GoString(C.ggml_backend_reg_name(reg))
s += " : "
for features := C.getBackendFeatures(get_features_fn, reg); features.name != nil; features = C.getNextBackendFeatures(features) {
s += C.GoString(features.name)
s += " = "
s += C.GoString(features.value)
s += " | "
}
}
}
return s + compiler
}

View File

@@ -47,10 +47,6 @@ var OnceLoad = sync.OnceFunc(func() {
exe = "."
}
if eval, err := filepath.EvalSymlinks(exe); err == nil {
exe = eval
}
// PATH, LD_LIBRARY_PATH, and DYLD_LIBRARY_PATH are often
// set by the parent process, however, use a default value
// if the environment variable is not set.

View File

@@ -21,6 +21,7 @@ import (
_ "github.com/ollama/ollama/ml/backend"
)
// Options contains the inputs for a model forward pass
type Options struct {
Inputs []int32
Positions []int32
@@ -34,11 +35,13 @@ type config struct {
Cache kvcache.Cache
}
// Base implements the common fields and methods for all models
type Base struct {
b ml.Backend
config
}
// Backend returns the underlying backend that will run the model
func (m *Base) Backend() ml.Backend {
return m.b
}
@@ -47,6 +50,7 @@ func (m *Base) Config() config {
return m.config
}
// Model implements a specific model architecture, defining the forward pass and any model-specific configuration
type Model interface {
Forward(ml.Context, Options) (ml.Tensor, error)
@@ -56,6 +60,7 @@ type Model interface {
var models = make(map[string]func(ml.Config) (Model, error))
// Register registers a model constructor for the given architecture
func Register(name string, f func(ml.Config) (Model, error)) {
if _, ok := models[name]; ok {
panic("model: model already registered")
@@ -64,8 +69,9 @@ func Register(name string, f func(ml.Config) (Model, error)) {
models[name] = f
}
func New(s string) (Model, error) {
r, err := os.Open(s)
// New initializes a new model instance with the provided configuration based on the metadata in the model file
func New(modelPath string) (Model, error) {
r, err := os.Open(modelPath)
if err != nil {
return nil, err
}

View File

@@ -0,0 +1,138 @@
// Package model_test provides external tests for the model package.
// This test file specifically tests the forward pass functionality on models.
// It is in a separate package (model_test) to avoid import cycles while still
// being able to test the public API of the model package.
package model_test
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"testing"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/sample"
_ "github.com/ollama/ollama/model/models"
)
type modelTest struct {
Prompt string `json:"prompt"`
OutputContainsOne []string `json:"output_contains_one"`
}
func TestForwardSimple(t *testing.T) {
if testing.Short() {
t.Skip("skipping in short mode")
}
// Read all JSON files from testdata/models
files, err := os.ReadDir("testdata/models")
if err != nil {
t.Fatal(err)
}
for _, file := range files {
if !strings.HasSuffix(file.Name(), ".json") {
continue
}
jsonPath := filepath.Join("testdata/models", file.Name())
ggufPath := filepath.Join("testdata/models", strings.TrimSuffix(file.Name(), ".json")+".gguf")
// Skip if no corresponding .gguf file exists
if _, err := os.Stat(ggufPath); err != nil {
t.Logf("skipping %s: no corresponding GGUF file found", file.Name())
continue
}
data, err := os.ReadFile(jsonPath)
if err != nil {
t.Fatal(err)
}
var test modelTest
if err := json.Unmarshal(data, &test); err != nil {
t.Fatal(err)
}
t.Run(strings.TrimSuffix(file.Name(), ".json"), func(t *testing.T) {
m, err := model.New(ggufPath)
if err != nil {
t.Fatal(err)
}
m.Config().Cache.Init(m.Backend(), ml.DTypeF32, 2048)
inputs, err := m.(model.TextProcessor).Encode(test.Prompt)
if err != nil {
t.Fatal(err)
}
var result []string
for len(result) < 100 { // Limit to 100 tokens max
options := model.Options{
Inputs: inputs,
Positions: make([]int32, len(inputs)),
Sequences: make([]int, len(inputs)),
Outputs: []int32{int32(len(inputs) - 1)},
}
for i := range options.Positions {
options.Positions[i] = int32(i)
options.Sequences[i] = 0
}
ctx := m.Backend().NewContext()
modelOutput, err := model.Forward(ctx, m, options)
if err != nil {
ctx.Close()
t.Fatal(fmt.Errorf("forward pass failed: %v", err))
}
f32s := modelOutput.Floats()
logits := make([]float64, len(f32s))
for i, f32 := range f32s {
logits[i] = float64(f32)
}
token, err := sample.Sample(logits, sample.Greedy())
if err != nil {
ctx.Close()
t.Fatal(fmt.Errorf("sampling failed: %v", err))
}
ctx.Close()
// Greedy sampling: take the token with the highest logit
nextToken := int32(token[0])
if m.(model.TextProcessor).Is(nextToken, model.SpecialEOS) {
break
}
piece, err := m.(model.TextProcessor).Decode([]int32{nextToken})
if err != nil {
t.Fatal(err)
}
result = append(result, piece)
output := strings.Join(result, "")
for _, expectedOutput := range test.OutputContainsOne {
if strings.Contains(output, expectedOutput) {
t.Logf("Test passed with output: %q (matched expected: %q)", output, expectedOutput)
return
}
}
// Maintain full context by appending new token
inputs = append(inputs, nextToken)
}
t.Fatalf("Expected output containing one of %q but got: %q", test.OutputContainsOne, strings.Join(result, ""))
})
}
}

10
model/testdata/models/README.md vendored Normal file
View File

@@ -0,0 +1,10 @@
# Test Model Directory
This directory is used for storing model files (like `.gguf` files) that are required to run the tests in `model_external_test.go`.
## Usage
- Place any model files you need for testing in this directory
- The test file will look for any model files here (e.g., `llama3.gguf`)
- All non-markdown files in this directory are git-ignored to prevent large model files from being committed to the repository
- Only `.md` files (like this README) will be tracked in git

7
model/testdata/models/qwen2_5.json vendored Normal file
View File

@@ -0,0 +1,7 @@
{
"prompt": "<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\nhi<|im_end|>\n<|im_start|>assistant\n",
"output_contains_one": [
"Hello",
"Hi"
]
}

View File

@@ -845,8 +845,6 @@ func (s *Server) loadModel(
threads int,
multiUserCache bool,
) {
llama.BackendInit()
var err error
s.model, err = llama.LoadModelFromFile(mpath, params)
if err != nil {
@@ -932,6 +930,8 @@ func Execute(args []string) error {
})
slog.SetDefault(slog.New(handler))
slog.Info("starting go runner")
llama.BackendInit()
slog.Info("system", "info", llama.PrintSystemInfo(), "threads", *threads)
server := &Server{

View File

@@ -813,6 +813,8 @@ func (s *Server) loadModel(
panic(err)
}
slog.Info("system", "info", s.model.Backend().SystemInfo() /* "threads", *threads */)
// TODO(jessegross): LoRA loading
if lpath.String() != "" {
panic("loras are not yet implemented")
@@ -881,7 +883,6 @@ func Execute(args []string) error {
})
slog.SetDefault(slog.New(handler))
slog.Info("starting ollama engine")
// TODO(jessegross): Some system info would be useful
server := &Server{
batchSize: *batchSize,