Compare commits

..

10 Commits

Author SHA1 Message Date
Eva Ho
5aee34db9f fix tests 2025-11-19 15:57:19 -05:00
Eva Ho
32393f11d7 app/ui: add gemini-3-pro-preview to featured list 2025-11-19 14:59:29 -05:00
Patrick Devine
604e43b28d models: enable deepseek2 (deepseek v3.1 w/ MLA) on the new engine (#13151) 2025-11-18 22:03:50 -08:00
Jesse Gross
53985b3c4d kvcache: Use SetRows to store cache data
We currently copy data into the KV cache in contiguous buffers using
ggml_cpy(). ggml_set_rows() was introduced to allow scatter operation
so that contiguous buffers are no longer required. The direct primary
benefit of this is that we no longer need to perform defragmentation.

However, GGML recently removed an optimization for ggml_cpy() and
we picked it up in 544b673 "ggml update to b6840 (#12791)". This
caused a roughly 40% drop in token generation performance on CUDA
due to CUDA graphs no longer being used. By switching to
ggml_set_rows(), the original optimization is no longer necessary
and CUDA performance is restored.

Fixes #13112
2025-11-18 20:42:28 -08:00
Jesse Gross
b6e02cbbd2 ggml: Automatically make tensors contiguous on reshape
GGML requires tensors to be contiguous for reshape and if
this is not the case, it will assert fail. Contiguous is an
expensive operation, so it's best to do it lazily when it is
actually required rather than ahead of time when it may not
be needed.
2025-11-18 20:42:28 -08:00
Grace
91935631ac Renderer for Cogito v2 (#13139) 2025-11-18 19:06:34 -08:00
nicole pardal
8de30b568a nomic-embed-text model implementation (#13071) 2025-11-18 18:28:10 -08:00
Daniel Hiltgen
485da9fd35 win: exit instead of abort (#13138)
Calling abort on windows triggers the C++ runtime to attempt a debugger
attach, which causes the crashed runners to hang instead of exit, leading
to a timeout instead of a fast failure during discovery.
2025-11-18 16:33:33 -08:00
Michael Yang
0796d79d19 cuda: skip large batches
cuda panics on batches larger than 1024 so skip those and fallback to
cpu
2025-11-18 16:11:37 -08:00
Michael Yang
92981ae3f2 deepseekocr 2025-11-18 16:11:37 -08:00
41 changed files with 2200 additions and 362 deletions

View File

@@ -25,7 +25,7 @@ declare module "@/gotypes" {
}
Model.prototype.isCloud = function (): boolean {
return this.model.endsWith("cloud");
return this.model.endsWith("cloud") || this.model === "gemini-3-pro-preview";
};
// Helper function to convert Uint8Array to base64

View File

@@ -14,8 +14,8 @@ describe("Model merging logic", () => {
const merged = mergeModels(localModels);
// First verify cloud models are first and in FEATURED_MODELS order
const cloudModels = FEATURED_MODELS.filter((m: string) =>
m.endsWith("cloud"),
const cloudModels = FEATURED_MODELS.filter(
(m: string) => m.endsWith("cloud") || m === "gemini-3-pro-preview",
);
for (let i = 0; i < cloudModels.length; i++) {
expect(merged[i].model).toBe(cloudModels[i]);
@@ -24,7 +24,7 @@ describe("Model merging logic", () => {
// Then verify non-cloud featured models are next and in FEATURED_MODELS order
const nonCloudFeatured = FEATURED_MODELS.filter(
(m: string) => !m.endsWith("cloud"),
(m: string) => !m.endsWith("cloud") && m !== "gemini-3-pro-preview",
);
for (let i = 0; i < nonCloudFeatured.length; i++) {
const model = merged[i + cloudModels.length];
@@ -54,9 +54,9 @@ describe("Model merging logic", () => {
const cloudModels = merged.filter((m) => m.isCloud());
expect(cloudModels.length).toBe(0);
// Should have non-cloud featured models
// Should have non-cloud featured models (excluding gemini-3-pro-preview which is treated as cloud)
const nonCloudFeatured = FEATURED_MODELS.filter(
(m) => !m.endsWith("cloud"),
(m) => !m.endsWith("cloud") && m !== "gemini-3-pro-preview",
);
for (let i = 0; i < nonCloudFeatured.length; i++) {
const model = merged[i];
@@ -74,7 +74,9 @@ describe("Model merging logic", () => {
const merged = mergeModels([]);
// First verify cloud models are first and in FEATURED_MODELS order
const cloudModels = FEATURED_MODELS.filter((m) => m.endsWith("cloud"));
const cloudModels = FEATURED_MODELS.filter(
(m) => m.endsWith("cloud") || m === "gemini-3-pro-preview",
);
for (let i = 0; i < cloudModels.length; i++) {
expect(merged[i].model).toBe(cloudModels[i]);
expect(merged[i].isCloud()).toBe(true);
@@ -82,7 +84,7 @@ describe("Model merging logic", () => {
// Then verify non-cloud featured models are next and in FEATURED_MODELS order
const nonCloudFeatured = FEATURED_MODELS.filter(
(m) => !m.endsWith("cloud"),
(m) => !m.endsWith("cloud") && m !== "gemini-3-pro-preview",
);
for (let i = 0; i < nonCloudFeatured.length; i++) {
const model = merged[i + cloudModels.length];
@@ -104,7 +106,9 @@ describe("Model merging logic", () => {
const merged = mergeModels(localModels);
// First verify cloud models are first and in FEATURED_MODELS order
const cloudModels = FEATURED_MODELS.filter((m) => m.endsWith("cloud"));
const cloudModels = FEATURED_MODELS.filter(
(m) => m.endsWith("cloud") || m === "gemini-3-pro-preview",
);
for (let i = 0; i < cloudModels.length; i++) {
expect(merged[i].model).toBe(cloudModels[i]);
expect(merged[i].isCloud()).toBe(true);
@@ -112,7 +116,7 @@ describe("Model merging logic", () => {
// Then verify non-cloud featured models are next and in FEATURED_MODELS order
const nonCloudFeatured = FEATURED_MODELS.filter(
(m) => !m.endsWith("cloud"),
(m) => !m.endsWith("cloud") && m !== "gemini-3-pro-preview",
);
for (let i = 0; i < nonCloudFeatured.length; i++) {
const model = merged[i + cloudModels.length];

View File

@@ -4,6 +4,7 @@ import { Model } from "@/gotypes";
export const FEATURED_MODELS = [
"gpt-oss:120b-cloud",
"gpt-oss:20b-cloud",
"gemini-3-pro-preview",
"deepseek-v3.1:671b-cloud",
"qwen3-coder:480b-cloud",
"qwen3-vl:235b-cloud",
@@ -40,7 +41,9 @@ export function mergeModels(
const cloudModels = [...allModels.filter((m) => m.isCloud())];
// Add any cloud models from FEATURED_MODELS that aren't in local models
FEATURED_MODELS.filter((f) => f.endsWith("cloud")).forEach((cloudModel) => {
FEATURED_MODELS.filter(
(f) => f.endsWith("cloud") || f === "gemini-3-pro-preview",
).forEach((cloudModel) => {
if (!cloudModels.some((m) => m.model === cloudModel)) {
cloudModels.push(new Model({ model: cloudModel }));
}
@@ -48,7 +51,7 @@ export function mergeModels(
// 2. Get other featured models (non-cloud)
const featuredModels = FEATURED_MODELS.filter(
(f) => !f.endsWith("cloud"),
(f) => !f.endsWith("cloud") && f !== "gemini-3-pro-preview",
).map((model) => {
// Check if this model exists in local models
const localMatch = allModels.find(

View File

@@ -206,6 +206,8 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
conv = &commandrModel{}
case "GptOssForCausalLM":
conv = &gptossModel{}
case "DeepseekOCRForCausalLM":
conv = &deepseekocr{}
default:
return fmt.Errorf("unsupported architecture %q", p.Architectures[0])
}

View File

@@ -0,0 +1,136 @@
package convert
import (
"fmt"
"github.com/ollama/ollama/fs/ggml"
)
type deepseekocr struct {
ModelParameters
LanguageConfig struct {
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
HiddenSize uint32 `json:"hidden_size"`
HiddenLayers uint32 `json:"num_hidden_layers"`
IntermediateSize uint32 `json:"intermediate_size"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
NumRoutedExperts uint32 `json:"n_routed_experts"`
NumSharedExperts uint32 `json:"n_shared_experts"`
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
FirstKDenseReplace uint32 `json:"first_k_dense_replace"`
} `json:"language_config"`
VisionConfig struct {
ImageSize uint32 `json:"image_size"`
Width struct {
Vision struct {
Heads uint32 `json:"heads"`
ImageSize uint32 `json:"image_size"`
Layers uint32 `json:"layers"`
PatchSize uint32 `json:"patch_size"`
Width uint32 `json:"width"`
} `json:"clip-l-14-224"`
Sam struct {
GlobalAttentionIndexes []int32 `json:"global_attn_indexes"`
Heads uint32 `json:"heads"`
Layers uint32 `json:"layers"`
Width uint32 `json:"width"`
} `json:"sam_vit_b"`
}
} `json:"vision_config"`
}
func (m *deepseekocr) KV(t *Tokenizer) ggml.KV {
kv := m.ModelParameters.KV(t)
kv["general.architecture"] = "deepseekocr"
kv["block_count"] = m.LanguageConfig.HiddenLayers
kv["context_length"] = m.LanguageConfig.MaxPositionEmbeddings
kv["embedding_length"] = m.LanguageConfig.HiddenSize
kv["feed_forward_length"] = m.LanguageConfig.IntermediateSize
kv["attention.head_count"] = m.LanguageConfig.NumAttentionHeads
kv["attention.head_count_kv"] = m.LanguageConfig.NumKeyValueHeads
kv["expert_count"] = m.LanguageConfig.NumRoutedExperts
kv["expert_used_count"] = m.LanguageConfig.NumExpertsPerToken
kv["leading_dense_block_count"] = m.LanguageConfig.FirstKDenseReplace
kv["vision.block_count"] = m.VisionConfig.Width.Vision.Layers
kv["vision.embedding_length"] = m.VisionConfig.Width.Vision.Width
kv["vision.head_count"] = m.VisionConfig.Width.Vision.Heads
kv["vision.image_size"] = m.VisionConfig.Width.Vision.ImageSize
kv["vision.patch_size"] = m.VisionConfig.Width.Vision.PatchSize
kv["sam.block_count"] = m.VisionConfig.Width.Sam.Layers
kv["sam.embedding_length"] = m.VisionConfig.Width.Sam.Width
kv["sam.head_count"] = m.VisionConfig.Width.Sam.Heads
kv["sam.global_attention_indexes"] = m.VisionConfig.Width.Sam.GlobalAttentionIndexes
return kv
}
func (m *deepseekocr) Tensors(s []Tensor) (out []*ggml.Tensor) {
merges := make([]merge, m.LanguageConfig.HiddenLayers*3)
for i := range m.LanguageConfig.HiddenLayers {
merges[i*3+0] = merge{
fmt.Sprintf("blk.%d.mlp.experts.*.gate_proj.weight", i),
fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
}
merges[i*3+1] = merge{
fmt.Sprintf("blk.%d.mlp.experts.*.up_proj.weight", i),
fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
}
merges[i*3+2] = merge{
fmt.Sprintf("blk.%d.mlp.experts.*.down_proj.weight", i),
fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
}
}
out, s = mergeTensors(s, merges...)
for _, t := range s {
out = append(out, &ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
return out
}
func (m *deepseekocr) Replacements() []string {
return []string{
"model.embed_tokens", "token_embd",
"model.layers", "blk",
"input_layernorm", "attn_norm",
"self_attn.q_proj", "attn_q",
"self_attn.k_proj", "attn_k",
"self_attn.v_proj", "attn_v",
"self_attn.o_proj", "attn_output",
"post_attention_layernorm", "ffn_norm",
"mlp.gate_proj", "ffn_gate",
"mlp.up_proj", "ffn_up",
"mlp.down_proj", "ffn_down",
"mlp.gate", "ffn_gate_inp",
"mlp.shared_experts.gate_proj", "ffn_gate_shexp",
"mlp.shared_experts.up_proj", "ffn_up_shexp",
"mlp.shared_experts.down_proj", "ffn_down_shexp",
"model.norm", "output_norm",
"lm_head", "output",
"model.vision_model", "v",
"embeddings.patch_embedding", "patch_embd",
"embeddings.class_embedding", "class_embd",
"embeddings.position_embedding", "position_embd",
"transformer.layers", "blk",
"model.projector", "mm",
"model.image_newline", "mm.image_newline",
//nolint:misspell // this misspelling is upstream. fixing it breaks the model
"model.view_seperator", "mm.view_seperator",
"model.sam_model.patch_embed.proj", "s.patch_embd",
"model.sam_model.pos_embed", "s.position_embd",
"model.sam_model.blocks", "s.blk",
"model.sam_model.neck", "s.neck",
"model.sam_model.net_", "s.net_",
}
}

View File

@@ -44,7 +44,10 @@ func (t tensorBase) Kind() uint32 {
t.name == "v.positional_embedding_vlm" ||
t.name == "v.tile_position_embd.weight" ||
t.name == "v.pre_tile_position_embd.weight" ||
t.name == "v.post_tile_position_embd.weight" {
t.name == "v.post_tile_position_embd.weight" ||
t.name == "s.position_embd" ||
strings.HasSuffix(t.name, "rel_pos_h") ||
strings.HasSuffix(t.name, "rel_pos_w") {
// these tensors are always F32
return tensorKindFP32
}

View File

@@ -96,7 +96,10 @@ type safetensor struct {
func (st safetensor) Kind() uint32 {
kind := st.tensorBase.Kind()
if !strings.HasPrefix(st.name, "v.") && st.dtype == "BF16" && kind != tensorKindFP32 {
if st.dtype == "BF16" &&
!strings.HasPrefix(st.name, "v.") &&
!strings.HasPrefix(st.name, "s.") &&
kind != tensorKindFP32 {
kind = tensorKindBF16
}

View File

@@ -249,6 +249,8 @@ func (kv KV) OllamaEngineRequired() bool {
"qwen25vl",
"qwen3", "qwen3moe",
"qwen3vl", "qwen3vlmoe",
"deepseekocr",
"deepseek2",
}, kv.Architecture())
}

View File

@@ -3,7 +3,6 @@ package kvcache
import (
"errors"
"fmt"
"log/slog"
"math"
"slices"
@@ -40,18 +39,18 @@ type Causal struct {
// ** current forward pass **
// the active layer for Get and Put
curLayer int
// starting location for data storage for this batch
curLoc int
// size of the current batch
curBatchSize int
// locations for data storage for this batch
curLoc ml.Tensor
// mask of the cache as used by this batch
curMask ml.Tensor
// the active layer for Get and Put
curLayer int
// locations in the cache that are needed for this batch
curCellRange cellRange
@@ -206,45 +205,47 @@ func (c *Causal) StartForward(ctx ml.Context, batch input.Batch, reserve bool) e
c.curPositions = batch.Positions
c.opts.Except = nil
var locs []int32
if !reserve {
c.updateSlidingWindow()
var err error
c.curLoc, err = c.findStartLoc()
if errors.Is(err, ErrKvCacheFull) {
c.defrag()
c.curLoc, err = c.findStartLoc()
}
locs, err = c.findLocs()
if err != nil {
return err
}
for i, pos := range batch.Positions {
seq := batch.Sequences[i]
loc := int(locs[i])
c.cells[c.curLoc+i] = cacheCell{pos: pos, sequences: []int{seq}}
c.cells[loc] = cacheCell{pos: pos, sequences: []int{seq}}
seqRange, ok := c.cellRanges[seq]
if !ok {
seqRange = newRange()
}
seqRange.min = min(seqRange.min, c.curLoc+i)
c.curCellRange.min = min(c.curCellRange.min, c.curLoc+i)
seqRange.min = min(seqRange.min, loc)
c.curCellRange.min = min(c.curCellRange.min, loc)
seqRange.max = max(seqRange.max, c.curLoc+i)
c.curCellRange.max = max(c.curCellRange.max, c.curLoc+i)
seqRange.max = max(seqRange.max, loc)
c.curCellRange.max = max(c.curCellRange.max, loc)
c.cellRanges[seq] = seqRange
}
} else {
// If we are reserving memory, don't update any of the cache metadata but set the size
// to the worst case.
c.curLoc = 0
locs = make([]int32, c.curBatchSize)
for i := range locs {
locs[i] = int32(i)
}
c.curCellRange.min = 0
c.curCellRange.max = len(c.cells) - 1
}
c.curLoc = ctx.Input().FromInts(locs, len(locs))
c.curMask = c.buildMask(ctx)
return nil
@@ -257,22 +258,20 @@ func newRange() cellRange {
}
}
// Find the first contiguous block of at least curBatchSize
func (c *Causal) findStartLoc() (int, error) {
var start, count int
// Returns a slice of locations where each token in the batch should be stored
func (c *Causal) findLocs() ([]int32, error) {
loc := make([]int32, 0, c.curBatchSize)
for i := range c.cells {
if len(c.cells[i].sequences) == 0 {
count++
if count >= c.curBatchSize {
return start, nil
loc = append(loc, int32(i))
if len(loc) >= c.curBatchSize {
return loc, nil
}
} else {
start = i + 1
count = 0
}
}
return 0, fmt.Errorf("%w (cache: %v batch: %v)", ErrKvCacheFull, len(c.cells), c.curBatchSize)
return nil, fmt.Errorf("%w (cache: %v batch: %v)", ErrKvCacheFull, len(c.cells), c.curBatchSize)
}
func (c *Causal) updateSlidingWindow() {
@@ -402,145 +401,6 @@ func (c *Causal) buildMask(ctx ml.Context) ml.Tensor {
return maskTensor
}
func (c *Causal) moveCells(ctx ml.Context, src, dst, length int) {
for i, key := range c.keys {
if key == nil {
continue
}
kHeadDim := key.Dim(0)
numKVHeads := key.Dim(1)
rowSize := key.Stride(2)
kSrcView := key.View(ctx, rowSize*src, kHeadDim*numKVHeads*length)
kDstView := key.View(ctx, rowSize*dst, kHeadDim*numKVHeads*length)
value := c.values[i]
var vSrcView, vDstView ml.Tensor
if c.config.PermutedV {
vHeadDim := value.Dim(1)
elemSize := value.Stride(0)
vSrcView = value.View(ctx, elemSize*src, length, len(c.cells)*elemSize, vHeadDim*numKVHeads)
vDstView = value.View(ctx, elemSize*dst, length, len(c.cells)*elemSize, vHeadDim*numKVHeads)
} else {
vHeadDim := value.Dim(0)
rowSize := value.Stride(2)
vSrcView = value.View(ctx, rowSize*src, vHeadDim*numKVHeads*length)
vDstView = value.View(ctx, rowSize*dst, vHeadDim*numKVHeads*length)
}
ctx.Forward(
kSrcView.Copy(ctx, kDstView),
vSrcView.Copy(ctx, vDstView),
)
}
}
func (c *Causal) defrag() {
slog.Debug("defragmenting kv cache")
// Defrag strategy:
// - Search for empty holes at the beginning of the cache,
// filling them with active data starting at the end
// - If there are contiguous elements that need to be moved,
// combine them into a single operation by holding new moves
// until we see that the next one is non-contiguous
// - Fill up the context with the maximum number of operations it
// can hold then compute that and continue with a new context
//
// We could try to optimize placement by grouping blocks from
// the same sequences together but most likely the next forward
// pass will disrupt this anyways, so the real world benefit
// seems limited as this time.
ctx := c.backend.NewContext()
// For every move, 6 tensors are required per layer (2 views and a
// copy for each of k and v). We also need to refer to the original
// k and v cache tensors - once per layer, not per move.
layers := 0
for _, key := range c.keys {
if key == nil {
continue
}
layers++
}
maxMoves := (ctx.MaxGraphNodes() - 2*layers) / (6 * layers)
moves := 0
var pendingSrc, pendingDst, pendingLen int
src := len(c.cells) - 1
for dst := 0; dst < src; dst++ {
if len(c.cells[dst].sequences) == 0 {
for ; src > dst; src-- {
if len(c.cells[src].sequences) != 0 {
c.cells[dst] = c.cells[src]
c.cells[src] = cacheCell{}
if pendingLen > 0 {
if src == pendingSrc-pendingLen && dst == pendingDst+pendingLen {
pendingSrc = src
pendingLen++
break
} else {
c.moveCells(ctx, pendingSrc, pendingDst, pendingLen)
moves++
}
}
pendingSrc = src
pendingDst = dst
pendingLen = 1
break
}
}
}
if moves >= maxMoves {
ctx.Compute()
ctx.Close()
ctx = c.backend.NewContext()
moves = 0
}
}
if pendingLen > 0 {
c.moveCells(ctx, pendingSrc, pendingDst, pendingLen)
moves++
}
if moves > 0 {
ctx.Compute()
}
ctx.Close()
// Reset range metadata
for seq := range c.cellRanges {
seqRange := newRange()
for i, cell := range c.cells {
if slices.Contains(cell.sequences, seq) {
if i < seqRange.min {
seqRange.min = i
}
if i > seqRange.max {
seqRange.max = i
}
}
}
c.cellRanges[seq] = seqRange
}
c.updateSlidingWindow()
}
func (c *Causal) SetLayer(layer int) {
c.curLayer = layer
}
@@ -625,18 +485,25 @@ func (c *Causal) Put(ctx ml.Context, key, value ml.Tensor) {
}
}
rowSize := c.keys[c.curLayer].Stride(2)
ctx.Forward(key.Copy(ctx, c.keys[c.curLayer].View(ctx, rowSize*c.curLoc, kHeadDim*numKVHeads*batchSize)))
key = key.Reshape(ctx, kHeadDim*numKVHeads, batchSize)
keyCache := c.keys[c.curLayer]
keyCache = keyCache.Reshape(ctx, kHeadDim*numKVHeads, len(c.cells))
ctx.Forward(keyCache.SetRows(ctx, key, c.curLoc))
if c.config.PermutedV {
elemSize := c.values[c.curLayer].Stride(0)
value = value.Reshape(ctx, vHeadDim*numKVHeads, 1, batchSize)
value = value.Permute(ctx, 2, 0, 1, 3)
value = value.Permute(ctx, 1, 2, 0, 3)
ctx.Forward(value.Copy(ctx, c.values[c.curLayer].View(ctx, elemSize*c.curLoc, batchSize, len(c.cells)*elemSize, vHeadDim*numKVHeads)))
valueCache := c.values[c.curLayer]
valueCache = valueCache.Reshape(ctx, 1, len(c.cells), vHeadDim*numKVHeads)
ctx.Forward(valueCache.SetRows(ctx, value, c.curLoc))
} else {
rowSize := c.values[c.curLayer].Stride(2)
value = value.Reshape(ctx, vHeadDim*numKVHeads, batchSize)
valueCache := c.values[c.curLayer]
valueCache = valueCache.Reshape(ctx, vHeadDim*numKVHeads, len(c.cells))
ctx.Forward(value.Copy(ctx, c.values[c.curLayer].View(ctx, rowSize*c.curLoc, vHeadDim*numKVHeads*batchSize)))
ctx.Forward(valueCache.SetRows(ctx, value, c.curLoc))
}
}

View File

@@ -207,11 +207,11 @@ func TestSWAMem(t *testing.T) {
inShape: []int{1, 1, 2},
seqs: []int{0, 0},
pos: []int32{4, 5},
expected: []float32{4, 5, 6},
expectedShape: []int{1, 1, 3},
expected: []float32{5, 2, 3, 4, 6},
expectedShape: []int{1, 1, 5},
expectedMask: []float32{
0, 0, x,
x, 0, 0,
0, x, x, 0, x,
0, x, x, x, 0,
},
},
}
@@ -319,6 +319,8 @@ func TestRemove(t *testing.T) {
cache.Init(backend, ml.DTypeF16, 1, 16, 16)
x := float32(math.Inf(-1))
tests := []testCase{
{
name: "FirstBatch",
@@ -328,7 +330,12 @@ func TestRemove(t *testing.T) {
pos: []int32{0, 1, 0, 1},
expected: []float32{1, 2, 3, 4},
expectedShape: []int{1, 1, 4},
expectedMask: []float32{0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0},
expectedMask: []float32{
0, x, x, x,
0, 0, x, x,
x, x, 0, x,
x, x, 0, 0,
},
},
}
@@ -346,9 +353,12 @@ func TestRemove(t *testing.T) {
inShape: []int{1, 1, 2},
seqs: []int{0, 1},
pos: []int32{1, 2},
expected: []float32{1, 2, 3, 4, 5, 6},
expectedShape: []int{1, 1, 6},
expectedMask: []float32{0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, float32(math.Inf(-1)), 0},
expected: []float32{1, 5, 3, 4, 6},
expectedShape: []int{1, 1, 5},
expectedMask: []float32{
0, 0, x, x, x,
x, x, 0, 0, 0,
},
},
}
@@ -366,59 +376,12 @@ func TestRemove(t *testing.T) {
inShape: []int{1, 1, 2},
seqs: []int{0, 0},
pos: []int32{1, 2},
expected: []float32{7, 8, 3, 4, 4},
expectedShape: []int{1, 1, 5},
expectedMask: []float32{0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), 0},
},
}
testCache(t, backend, cache, tests)
}
func TestDefrag(t *testing.T) {
backend := &testBackend{}
cache := NewCausalCache(func(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
return key.Add(ctx, shift), nil
})
defer cache.Close()
cache.Init(backend, ml.DTypeF16, 1, 16, 16)
tests := []testCase{
{
name: "FirstBatch",
in: []float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
inShape: []int{1, 1, 16},
seqs: []int{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
pos: []int32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
expected: []float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
expectedShape: []int{1, 1, 16},
expectedMask: []float32{0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, 0, 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, 0, 0, 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, 0, 0, 0, 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, 0, 0, 0, 0, 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, float32(math.Inf(-1)), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
},
}
testCache(t, backend, cache, tests)
err := cache.Remove(0, 2, 4)
if err != nil {
panic(err)
}
err = cache.Remove(0, 13, math.MaxInt32)
if err != nil {
panic(err)
}
tests = []testCase{
{
name: "Defrag",
in: []float32{17, 18, 19},
inShape: []int{1, 1, 3},
seqs: []int{0, 0, 0},
pos: []int32{16, 17, 18},
expected: []float32{1, 2, 12, 13, 3, 4, 5, 6, 7, 8, 9, 10, 11, 17, 18, 19},
expectedShape: []int{1, 1, 16},
expectedMask: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, float32(math.Inf(-1)), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
expected: []float32{7, 4, 3, 4, 6, 8},
expectedShape: []int{1, 1, 6},
expectedMask: []float32{
0, 0, x, x, x, x,
0, 0, x, x, x, 0,
},
},
}
@@ -770,6 +733,15 @@ func (t *testTensor) Add(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
return out
}
func (t *testTensor) Reshape(ctx ml.Context, shape ...int) ml.Tensor {
return &testTensor{
dtype: t.dtype,
elementSize: t.elementSize,
data: t.data,
shape: shape,
}
}
func (t *testTensor) View(ctx ml.Context, offset int, shape ...int) ml.Tensor {
offset /= t.elementSize
@@ -778,6 +750,8 @@ func (t *testTensor) View(ctx ml.Context, offset int, shape ...int) ml.Tensor {
switch len(shape) {
case 1:
s = []int{shape[0]}
case 3:
s = []int{shape[0], shape[2]}
case 5:
s = []int{shape[0], shape[2], shape[4]}
default:
@@ -792,6 +766,86 @@ func (t *testTensor) View(ctx ml.Context, offset int, shape ...int) ml.Tensor {
return view
}
func (t *testTensor) SetRows(ctx ml.Context, src ml.Tensor, idxs ml.Tensor) ml.Tensor {
dst := t
srcTensor := src.(*testTensor)
idxTensor := idxs.(*testTensor)
shapeTo4D := func(shape []int) [4]int {
out := [4]int{1, 1, 1, 1}
for i := 0; i < len(shape) && i < 4; i++ {
out[i] = shape[i]
}
return out
}
computeStrides := func(shape [4]int) [4]int {
out := [4]int{1, 1, 1, 1}
for i := 1; i < 4; i++ {
out[i] = out[i-1] * shape[i-1]
}
return out
}
dstShape4D := shapeTo4D(dst.shape)
srcShape4D := shapeTo4D(srcTensor.shape)
idxShape4D := shapeTo4D(idxTensor.shape)
if dstShape4D[0] != srcShape4D[0] || dstShape4D[2] != srcShape4D[2] || dstShape4D[3] != srcShape4D[3] {
panic("SetRows requires matching tensor shapes")
}
if srcShape4D[1] != idxShape4D[0] {
panic("SetRows rows/index mismatch")
}
if srcShape4D[2]%idxShape4D[1] != 0 || srcShape4D[3]%idxShape4D[2] != 0 {
panic("SetRows cannot broadcast indices")
}
if idxShape4D[3] != 1 {
panic("SetRows expects 1D or 2D index tensors")
}
dstStride := computeStrides(dstShape4D)
srcStride := computeStrides(srcShape4D)
idxStride := computeStrides(idxShape4D)
numColumns := srcShape4D[0]
numRows := srcShape4D[1]
for dim3Index := range dstShape4D[3] {
for dim2Index := range dstShape4D[2] {
idxDim2 := 0
idxDim3 := 0
if idxShape4D[1] > 0 {
idxDim2 = dim2Index % idxShape4D[1]
}
if idxShape4D[2] > 0 {
idxDim3 = dim3Index % idxShape4D[2]
}
idxBase := idxDim3*idxStride[2] + idxDim2*idxStride[1]
srcBase := dim3Index*srcStride[3] + dim2Index*srcStride[2]
dstBase := dim3Index*dstStride[3] + dim2Index*dstStride[2]
for row := range numRows {
idx := int(idxTensor.data[idxBase+row*idxStride[0]])
if idx < 0 || idx >= dstShape4D[1] {
panic("SetRows index out of range")
}
srcOffset := srcBase + row*srcStride[1]
dstOffset := dstBase + idx*dstStride[1]
copy(dst.data[dstOffset:dstOffset+numColumns], srcTensor.data[srcOffset:srcOffset+numColumns])
}
}
}
return dst
}
func (t *testTensor) Copy(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
copy(t2.(*testTensor).data, t.data)
return nil

View File

@@ -38,7 +38,7 @@ index 44ae76d66..639d551a2 100644
#ifdef __cplusplus
}
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index d2c278a35..221e29509 100644
index ca02ea079..c12b069e5 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -73,6 +73,7 @@ DispatchLoaderDynamic & ggml_vk_default_dispatcher();

View File

@@ -11,7 +11,7 @@ vidmem optimization.
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 221e29509..18b7cbccf 100644
index c12b069e5..76c78c2ea 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -5654,14 +5654,11 @@ static void ggml_vk_buffer_copy(vk_buffer& dst, size_t dst_offset, vk_buffer& sr

View File

@@ -50,7 +50,7 @@ Subject: [PATCH] Vulkan MMQ Integer Dot Refactor and K-Quant support (#16536)
create mode 100644 ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 18b7cbccf..53b57c179 100644
index 76c78c2ea..7669ed206 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -488,6 +488,7 @@ struct vk_device_struct {

View File

@@ -58,7 +58,7 @@ index 639d551a2..e5c446d1d 100644
GGML_API size_t gguf_type_size(enum gguf_type type);
GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 53b57c179..b2855b078 100644
index 7669ed206..63a762ec2 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -387,12 +387,76 @@ static constexpr uint32_t num_argsort_pipelines = 11;

View File

@@ -31,7 +31,7 @@ Add new backend tests.
6 files changed, 371 insertions(+), 117 deletions(-)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index b2855b078..aaf4334b5 100644
index 63a762ec2..db92a7901 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -458,6 +458,11 @@ static topk_moe_mode ggml_vk_num_additional_ops_to_topk_moe_mode(uint32_t num) {

View File

@@ -9,7 +9,7 @@ Subject: [PATCH] vulkan: Handle argsort with a large number of rows (#16851)
2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index aaf4334b5..3604ceb04 100644
index db92a7901..e959674d1 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -1084,6 +1084,7 @@ struct vk_op_soft_max_push_constants {

View File

@@ -20,7 +20,7 @@ Subject: [PATCH] vulkan: Fix crash when FP16 mul_mat accumulation is not
1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 3604ceb04..80185d9f0 100644
index e959674d1..903050b0b 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -146,8 +146,13 @@ static void ggml_vk_destroy_pipeline(vk::Device& device, vk_pipeline& pipeline);

View File

@@ -0,0 +1,25 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Michael Yang <git@mxy.ng>
Date: Tue, 18 Nov 2025 11:13:04 -0800
Subject: [PATCH] ggml-cuda: skip large batches
cuda panics on batches larger than 1024 so mark it as unsupported to
fallback to cpu
---
ggml/src/ggml-cuda/ggml-cuda.cu | 3 +++
1 file changed, 3 insertions(+)
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index f1a20e7fe..1a71e07c9 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -3677,6 +3677,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
if (b->type == GGML_TYPE_F16 && a->type != GGML_TYPE_F16) {
return false;
}
+ if (op->op == GGML_OP_MUL_MAT && b->ne[2] * b->ne[3] > 1024) {
+ return false;
+ }
#ifdef GGML_USE_MUSA
const int cc = ggml_cuda_info().devices[dev_ctx->device].cc;
if (b->ne[2]*b->ne[3] > 1 && !ggml_is_transposed(a) && !ggml_is_transposed(b)) {

View File

@@ -0,0 +1,28 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <daniel@ollama.com>
Date: Tue, 18 Nov 2025 09:58:23 -0800
Subject: [PATCH] win: exit instead of abort
---
ggml/src/ggml.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 9be35c1be..923c33d05 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -229,8 +229,13 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
fprintf(stderr, "%s\n", message);
ggml_print_backtrace();
}
-
+#if defined(_WIN32)
+ fflush(stderr);
+ fflush(stdout);
+ exit(1);
+#else
abort();
+#endif
}
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp

View File

@@ -173,6 +173,7 @@ type Tensor interface {
Cos(ctx Context) Tensor
Tanh(ctx Context) Tensor
GELU(ctx Context, up ...Tensor) Tensor
QuickGELU(ctx Context, up ...Tensor) Tensor
SILU(ctx Context, up ...Tensor) Tensor
RELU(ctx Context, up ...Tensor) Tensor
Sigmoid(ctx Context) Tensor
@@ -193,6 +194,7 @@ type Tensor interface {
Repeat(ctx Context, dim, n int) Tensor
Concat(ctx Context, t2 Tensor, dim int) Tensor
Rows(ctx Context, t2 Tensor) Tensor
SetRows(ctx Context, src Tensor, idxs Tensor) Tensor
Copy(ctx Context, t2 Tensor) Tensor
Duplicate(ctx Context) Tensor
@@ -207,6 +209,8 @@ type Tensor interface {
Stddev(ctx Context) Tensor
Sqr(ctx Context) Tensor
Sqrt(ctx Context) Tensor
Interpolate(ctx Context, dims [4]int, samplingMode SamplingMode) Tensor
}
// ScaledDotProductAttention implements a fused attention
@@ -372,3 +376,10 @@ const (
DTypeI32
DTypeMXFP4
)
type SamplingMode int
const (
SamplingModeNearest SamplingMode = iota
SamplingModeBilinear
)

View File

@@ -314,7 +314,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
"altup_proj", "altup_unembd_proj",
"per_layer_token_embd", "per_layer_model_proj", "per_layer_proj_norm"):
createTensor(tensor{source: t}, output.bts, blocks)
case strings.HasPrefix(t.Name, "v.") || strings.HasPrefix(t.Name, "mm."):
case strings.HasPrefix(t.Name, "v.") || strings.HasPrefix(t.Name, "mm.") || strings.HasPrefix(t.Name, "s."):
// TODO: assign vision tensors to the gpu if possible
createTensor(tensor{source: t}, output.bts, blocks)
case contains(t.Name, "rope_freqs", "rope_factors_long", "rope_factors_short"):
@@ -1338,6 +1338,13 @@ func (t *Tensor) Rows(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
}
}
func (t *Tensor) SetRows(ctx ml.Context, src ml.Tensor, idxs ml.Tensor) ml.Tensor {
return &Tensor{
b: t.b,
t: C.ggml_set_rows(ctx.(*Context).ctx, t.t, src.(*Tensor).t, idxs.(*Tensor).t),
}
}
func (t *Tensor) Copy(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
return &Tensor{
b: t.b,
@@ -1378,6 +1385,10 @@ func inferShape(t *Tensor, shape []int) {
}
func (t *Tensor) Reshape(ctx ml.Context, shape ...int) ml.Tensor {
if !C.ggml_is_contiguous(t.t) {
return t.Contiguous(ctx, shape...)
}
if slices.Contains(shape, -1) {
inferShape(t, shape)
}
@@ -1567,6 +1578,16 @@ func (t *Tensor) GELU(ctx ml.Context, t2 ...ml.Tensor) ml.Tensor {
}
}
func (t *Tensor) QuickGELU(ctx ml.Context, t2 ...ml.Tensor) ml.Tensor {
var tt *C.struct_ggml_tensor
if len(t2) > 0 {
tt = C.ggml_geglu_quick_split(ctx.(*Context).ctx, t.t, t2[0].(*Tensor).t)
} else {
tt = C.ggml_gelu_quick_inplace(ctx.(*Context).ctx, t.t)
}
return &Tensor{b: t.b, t: tt}
}
func (t *Tensor) SILU(ctx ml.Context, t2 ...ml.Tensor) ml.Tensor {
if len(t2) > 0 {
return &Tensor{
@@ -1724,6 +1745,23 @@ func (t *Tensor) Sqrt(ctx ml.Context) ml.Tensor {
}
}
func (t *Tensor) Interpolate(ctx ml.Context, dims [4]int, samplingMode ml.SamplingMode) ml.Tensor {
var mode C.uint32_t
switch samplingMode {
case ml.SamplingModeNearest:
mode = C.GGML_SCALE_MODE_NEAREST
case ml.SamplingModeBilinear:
mode = C.GGML_SCALE_MODE_BILINEAR
default:
panic("unsupported interpolate mode")
}
return &Tensor{
b: t.b,
t: C.ggml_interpolate(ctx.(*Context).ctx, t.t, C.int64_t(dims[0]), C.int64_t(dims[1]), C.int64_t(dims[2]), C.int64_t(dims[3]), mode),
}
}
// Slice returns a view of the tensor sliced along dim from low to high in step steps.
// Slice panics if the dimension is invalid or the slice parameters are out of range.
// If dim=0 and step>1, the tensor is a copy rather than a view to ensure proper shape.

View File

@@ -3677,6 +3677,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
if (b->type == GGML_TYPE_F16 && a->type != GGML_TYPE_F16) {
return false;
}
if (op->op == GGML_OP_MUL_MAT && b->ne[2] * b->ne[3] > 1024) {
return false;
}
#ifdef GGML_USE_MUSA
const int cc = ggml_cuda_info().devices[dev_ctx->device].cc;
if (b->ne[2]*b->ne[3] > 1 && !ggml_is_transposed(a) && !ggml_is_transposed(b)) {

View File

@@ -229,8 +229,13 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
fprintf(stderr, "%s\n", message);
ggml_print_backtrace();
}
#if defined(_WIN32)
fflush(stderr);
fflush(stdout);
exit(1);
#else
abort();
#endif
}
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp

View File

@@ -25,12 +25,15 @@ const (
// Composite returns an image with the alpha channel removed by drawing over a white background.
func Composite(img image.Image) image.Image {
dst := image.NewRGBA(img.Bounds())
white := color.RGBA{255, 255, 255, 255}
draw.Draw(dst, dst.Bounds(), &image.Uniform{white}, image.Point{}, draw.Src)
draw.Draw(dst, dst.Bounds(), img, img.Bounds().Min, draw.Over)
return CompositeColor(img, white)
}
// CompositeColor returns an image with the alpha channel removed by drawing over a white background.
func CompositeColor(img image.Image, color color.Color) image.Image {
dst := image.NewRGBA(img.Bounds())
draw.Draw(dst, dst.Bounds(), &image.Uniform{color}, image.Point{}, draw.Src)
draw.Draw(dst, dst.Bounds(), img, img.Bounds().Min, draw.Over)
return dst
}
@@ -55,6 +58,31 @@ func Resize(img image.Image, newSize image.Point, method int) image.Image {
return dst
}
// Pad returns an image which has been resized to fit within a new size, preserving aspect ratio, and padded with a color.
func Pad(img image.Image, newSize image.Point, color color.Color, kernel draw.Interpolator) image.Image {
dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
draw.Draw(dst, dst.Bounds(), &image.Uniform{color}, image.Point{}, draw.Src)
var minPoint, maxPoint image.Point
if img.Bounds().Dx() > img.Bounds().Dy() {
// landscape
height := newSize.X * img.Bounds().Dy() / img.Bounds().Dx()
minPoint = image.Point{0, (newSize.Y - height) / 2}
maxPoint = image.Point{newSize.X, height + minPoint.Y}
} else {
// portrait
width := newSize.Y * img.Bounds().Dx() / img.Bounds().Dy()
minPoint = image.Point{(newSize.X - width) / 2, 0}
maxPoint = image.Point{minPoint.X + width, newSize.Y}
}
kernel.Scale(dst, image.Rectangle{
Min: minPoint,
Max: maxPoint,
}, img, img.Bounds(), draw.Over, nil)
return dst
}
// Normalize returns a slice of float32 containing each of the r, g, b values for an image normalized around a value.
func Normalize(img image.Image, mean, std [3]float32, rescale bool, channelFirst bool) []float32 {
var pixelVals []float32

View File

@@ -156,6 +156,7 @@ func New(c fs.Config) (model.Model, error) {
)),
},
},
true,
)
default:
return nil, model.ErrUnsupportedTokenizer

View File

@@ -236,6 +236,11 @@ type Model struct {
}
func New(c fs.Config) (model.Model, error) {
if c.Uint("attention.key_length_mla") == 0 {
// non-MLA models aren't yet supported
return nil, model.ErrUnsupportedModel
}
layers := make([]Layer, c.Uint("block_count"))
firstDenseLayerIndex := int(c.Uint("leading_dense_block_count"))

View File

@@ -0,0 +1,83 @@
package deepseekocr
import (
"bytes"
"image"
"image/color"
"math"
"slices"
"golang.org/x/image/draw"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/model/imageproc"
)
type ratio struct {
x, y int
}
func ProcessImage(ctx ml.Context, bts []byte) (ml.Tensor, ml.Tensor, []int, error) {
img, _, err := image.Decode(bytes.NewReader(bts))
if err != nil {
return nil, nil, nil, err
}
minNum, maxNum, imageSize, baseSize := 2, 9, 640, 1024
var targetRatios []ratio
for n := minNum; n <= maxNum; n++ {
for i := 1; i <= n; i++ {
for j := 1; j <= n; j++ {
if i*j <= maxNum && i*j >= minNum && !slices.Contains(targetRatios, ratio{i, j}) {
targetRatios = append(targetRatios, ratio{i, j})
}
}
}
}
targetRatio := findBestAspectRatio(targetRatios, img.Bounds().Dx(), img.Bounds().Dy(), imageSize)
targetWidth, targetHeight := imageSize*targetRatio.x, imageSize*targetRatio.y
blocks := targetRatio.x * targetRatio.y
mean := imageproc.ImageNetStandardMean
std := imageproc.ImageNetStandardSTD
var patches []float32
resized := imageproc.Resize(img, image.Point{X: targetWidth, Y: targetHeight}, imageproc.ResizeBilinear)
for i := range blocks {
patch := image.NewRGBA(image.Rect(0, 0, imageSize, imageSize))
draw.Draw(patch, patch.Bounds(), resized, image.Point{
X: i % (targetWidth / imageSize) * imageSize,
Y: i / (targetWidth / imageSize) * imageSize,
}, draw.Over)
patches = append(patches, imageproc.Normalize(patch, mean, std, true, true)...)
}
img = imageproc.CompositeColor(img, color.Gray{})
img = imageproc.Pad(img, image.Point{X: baseSize, Y: baseSize}, color.Gray{127}, draw.BiLinear)
return ctx.Input().FromFloats(patches, imageSize, imageSize, 3, blocks),
ctx.Input().FromFloats(imageproc.Normalize(img, mean, std, true, true), baseSize, baseSize, 3),
[]int{targetRatio.x, targetRatio.y},
nil
}
func findBestAspectRatio(targetRatios []ratio, width, height, imageSize int) ratio {
bestDiff := math.MaxFloat64
best := ratio{1, 1}
realRatio := float64(width) / float64(height)
for _, target := range targetRatios {
targetRatio := float64(target.x) / float64(target.y)
diff := math.Abs(realRatio - targetRatio)
if diff < bestDiff {
bestDiff = diff
best = target
} else if diff == bestDiff {
if float64(width*height) > 0.5*float64(imageSize*imageSize*best.x*best.y) {
best = target
}
}
}
return best
}

View File

@@ -0,0 +1,192 @@
package deepseekocr
import (
"math"
"slices"
"github.com/ollama/ollama/fs"
"github.com/ollama/ollama/kvcache"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml/nn"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/model/input"
)
type Model struct {
model.Base
model.TextProcessor
Sam *samModel `gguf:"s"`
Vision *visionModel `gguf:"v"`
Text *textModel
ImageNewline ml.Tensor `gguf:"mm.image_newline"`
//nolint:misspell // this misspelling is upstream. fixing it breaks the model
ViewSeperator ml.Tensor `gguf:"mm.view_seperator"`
Projector *nn.Linear `gguf:"mm.layers"`
}
func (m *Model) EncodeMultimodal(ctx ml.Context, bts []byte) ([]input.Multimodal, error) {
patches, original, crop, err := ProcessImage(ctx, bts)
if err != nil {
return nil, err
}
var outputs []ml.Tensor
if true { // TODO: local features if sum(patches) != 0
samOutputs := m.Sam.Forward(ctx, patches)
visionOutputs := m.Vision.Forward(ctx, patches, samOutputs)
samOutputs = samOutputs.Reshape(ctx, -1, samOutputs.Dim(2), samOutputs.Dim(3)).Permute(ctx, 1, 0, 2, 3)
visionOutputs = visionOutputs.Slice(ctx, 1, 1, visionOutputs.Dim(1), 1)
localOutputs := visionOutputs.Concat(ctx, samOutputs, 0)
localOutputs = m.Projector.Forward(ctx, localOutputs)
hw := int(math.Sqrt(float64(localOutputs.Dim(1))))
localOutputs = localOutputs.Reshape(ctx, -1, hw, crop[0], crop[1])
localOutputs = localOutputs.Permute(ctx, 0, 2, 1, 3)
localOutputs = localOutputs.Contiguous(ctx, -1, crop[0]*hw, crop[1]*hw)
localOutputs = localOutputs.Concat(ctx, m.ImageNewline.Repeat(ctx, 2, localOutputs.Dim(2)), 1)
localOutputs = localOutputs.Reshape(ctx, localOutputs.Dim(0), -1)
outputs = append(outputs, localOutputs)
}
samOutputs := m.Sam.Forward(ctx, original)
visionOutputs := m.Vision.Forward(ctx, original, samOutputs)
samOutputs = samOutputs.Reshape(ctx, -1, samOutputs.Dim(2), samOutputs.Dim(3)).Permute(ctx, 1, 0, 2, 3)
visionOutputs = visionOutputs.Slice(ctx, 1, 1, visionOutputs.Dim(1), 1)
globalOutputs := visionOutputs.Concat(ctx, samOutputs, 0)
globalOutputs = m.Projector.Forward(ctx, globalOutputs)
hw := int(math.Sqrt(float64(globalOutputs.Dim(1))))
globalOutputs = globalOutputs.Reshape(ctx, -1, hw, hw)
globalOutputs = globalOutputs.Concat(ctx, m.ImageNewline.Repeat(ctx, 2, globalOutputs.Dim(2)), 1)
globalOutputs = globalOutputs.Reshape(ctx, globalOutputs.Dim(0), -1)
outputs = append(outputs, globalOutputs, m.ViewSeperator)
return []input.Multimodal{
{Tensor: outputs[0].Stack(ctx, 1, outputs[1:]...)},
}, nil
}
func (m *Model) PostTokenize(inputs []*input.Input) ([]*input.Input, error) {
outputs := make([]*input.Input, 0, len(inputs))
for i := range inputs {
if inputs[i].Multimodal == nil {
outputs = append(outputs, inputs[i])
continue
}
t := inputs[i].Multimodal[0].Tensor
outputs = append(outputs, &input.Input{
Token: 128815,
Multimodal: inputs[i].Multimodal,
MultimodalHash: inputs[i].MultimodalHash,
SameBatch: t.Dim(1) - 1,
})
outputs = slices.Grow(outputs, t.Dim(1)-1)
outputs = append(outputs, slices.Repeat([]*input.Input{{Token: 128815}}, t.Dim(1)-1)...)
}
return outputs, nil
}
func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
inputsEmbeds := m.Text.TokenEmbedding.Forward(ctx, batch.Inputs).Duplicate(ctx)
positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))
for _, mm := range batch.Multimodal {
t := mm.Multimodal[0].Tensor
ctx.Forward(t.Copy(ctx, inputsEmbeds.View(ctx, mm.Index*inputsEmbeds.Stride(1), t.Dim(0)*t.Dim(1))))
}
hiddenStates := inputsEmbeds
for i, block := range m.Text.Blocks {
if m.Cache != nil {
m.Cache.SetLayer(i)
}
var outputs ml.Tensor
if i == len(m.Text.Blocks)-1 {
outputs = batch.Outputs
}
hiddenStates = block.Forward(ctx, hiddenStates, positions, outputs, m.Cache, m.Text.Options)
}
hiddenStates = m.Text.OutputNorm.Forward(ctx, hiddenStates, m.Text.Options.eps)
return m.Text.Output.Forward(ctx, hiddenStates), nil
}
func init() {
model.Register("deepseekocr", func(c fs.Config) (model.Model, error) {
textBlocks := make([]textBlock, c.Uint("block_count"))
leadingDenseBlockCount := int(c.Uint("leading_dense_block_count", 1))
for i := range textBlocks {
if i >= leadingDenseBlockCount {
textBlocks[i].FeedForward = &textMoe{}
} else {
textBlocks[i].FeedForward = &textMLP{}
}
}
m := Model{
TextProcessor: model.NewBytePairEncoding(
&model.Vocabulary{
Values: c.Strings("tokenizer.ggml.tokens"),
Types: c.Ints("tokenizer.ggml.token_type"),
Merges: c.Strings("tokenizer.ggml.merges"),
AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
BOS: []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
EOS: append(
[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
c.Ints("tokenizer.ggml.eos_token_ids")...,
),
},
// Split regex into multiple parts (according to DeepSeek3's regex)
"\\p{N}{1,3}",
`[一-龥぀-ゟ゠-ヿ]+`,
"[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+| ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+",
),
Text: &textModel{
Blocks: textBlocks,
Options: textOptions{
hiddenSize: int(c.Uint("embedding_length")),
numHeads: int(c.Uint("attention.head_count")),
numKVHeads: int(c.Uint("attention.head_count_kv")),
numExperts: int(c.Uint("expert_count")),
numExpertsUsed: int(c.Uint("expert_used_count")),
ropeBase: c.Float("rope.freq_base", 10_000),
ropeScale: c.Float("rope.scaling.factor", 1.0),
eps: c.Float("attention.layer_norm_rms_epsilon", 1e-6),
},
},
Vision: &visionModel{
Blocks: make([]visionBlock, c.Uint("vision.block_count")),
Options: visionOptions{
hiddenSize: int(c.Uint("vision.embedding_length")),
numHeads: int(c.Uint("vision.head_count")),
imageSize: int(c.Uint("vision.image_size", 224)),
patchSize: int(c.Uint("vision.patch_size", 14)),
eps: c.Float("vision.attention.layer_norm_epsilon", 1e-5),
},
},
Sam: &samModel{
Blocks: make([]samBlock, c.Uint("sam.block_count")),
Options: samOptions{
hiddenSize: int(c.Uint("sam.embedding_length")),
numHeads: int(c.Uint("sam.head_count")),
eps: c.Float("sam.attention.layer_norm_epsilon", 1e-6),
globalAttentionLayers: c.Ints("sam.global_attention_indexes"),
},
},
}
m.Cache = kvcache.NewCausalCache(m.Text.Shift)
return &m, nil
})
}

View File

@@ -0,0 +1,225 @@
package deepseekocr
import (
"math"
"slices"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml/nn"
)
type samModel struct {
PatchEmbedding *nn.Conv2D `gguf:"patch_embd"`
PositionEmbedding ml.Tensor `gguf:"position_embd"`
Blocks []samBlock `gguf:"blk"`
Neck *samNeck `gguf:"neck"`
Net2 *nn.Conv2D `gguf:"net_2"`
Net3 *nn.Conv2D `gguf:"net_3"`
Options samOptions
}
func (m *samModel) absolutePositionEmbedding(ctx ml.Context, hiddenStates ml.Tensor) ml.Tensor {
source := m.PositionEmbedding.Dim(1)
target := hiddenStates.Dim(2)
if source != target {
positionEmbed := m.PositionEmbedding.Permute(ctx, 2, 0, 1, 3)
positionEmbed = positionEmbed.Interpolate(ctx, [4]int{target, target, hiddenStates.Dim(0), 1}, ml.SamplingModeBilinear)
return positionEmbed.Permute(ctx, 1, 2, 0, 3).Contiguous(ctx)
}
return m.PositionEmbedding
}
func (m *samModel) Forward(ctx ml.Context, t ml.Tensor) ml.Tensor {
hiddenStates := m.PatchEmbedding.Forward(ctx, t, 16, 16, 0, 0, 1, 1)
hiddenStates = hiddenStates.Permute(ctx, 1, 2, 0, 3).Contiguous(ctx)
if m.PositionEmbedding != nil {
hiddenStates = hiddenStates.Add(ctx, m.absolutePositionEmbedding(ctx, hiddenStates))
}
for i, block := range m.Blocks {
var windowSize int
if !slices.Contains(m.Options.globalAttentionLayers, int32(i)) {
windowSize = 14
}
hiddenStates = block.Forward(ctx, hiddenStates, windowSize, m.Options)
}
hiddenStates = hiddenStates.Permute(ctx, 2, 0, 1, 3).Contiguous(ctx)
hiddenStates = m.Neck.Forward(ctx, hiddenStates, m.Options)
hiddenStates = m.Net2.Forward(ctx, hiddenStates, 2, 2, 1, 1, 1, 1)
hiddenStates = m.Net3.Forward(ctx, hiddenStates, 2, 2, 1, 1, 1, 1)
return hiddenStates
}
type samOptions struct {
hiddenSize,
numHeads int
eps float32
globalAttentionLayers []int32
}
func (o samOptions) headDim() int {
return o.hiddenSize / o.numHeads
}
type samBlock struct {
Norm1 *nn.LayerNorm `gguf:"norm1"`
Attention *samAttention `gguf:"attn"`
Norm2 *nn.LayerNorm `gguf:"norm2"`
FeedForward *samMLP `gguf:"mlp"`
}
func (m *samBlock) Forward(ctx ml.Context, hiddenStates ml.Tensor, windowSize int, opts samOptions) ml.Tensor {
c, w, h := hiddenStates.Dim(0), hiddenStates.Dim(1), hiddenStates.Dim(2)
residual := hiddenStates
hiddenStates = m.Norm1.Forward(ctx, hiddenStates, opts.eps)
var pw, ph int
if windowSize > 0 {
pw = (windowSize - hiddenStates.Dim(1)%windowSize) % windowSize
ph = (windowSize - hiddenStates.Dim(2)%windowSize) % windowSize
if pw > 0 || ph > 0 {
hiddenStates = hiddenStates.Pad(ctx, 0, pw, ph, 0)
}
hiddenStates = hiddenStates.Reshape(ctx, c*windowSize, (w+pw)/windowSize, windowSize, -1)
hiddenStates = hiddenStates.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx, c, windowSize, windowSize, -1)
}
hiddenStates = m.Attention.Forward(ctx, hiddenStates, opts)
if windowSize > 0 {
hiddenStates = hiddenStates.Reshape(ctx, c*windowSize, windowSize, (w+pw)/windowSize, -1)
hiddenStates = hiddenStates.Permute(ctx, 0, 2, 1, 3)
hiddenStates = hiddenStates.Contiguous(ctx, c, w+pw, h+ph, -1)
hiddenStates = hiddenStates.Pad(ctx, 0, -pw, -ph, 0)
}
hiddenStates = hiddenStates.Add(ctx, residual)
residual = hiddenStates
hiddenStates = m.Norm2.Forward(ctx, hiddenStates, opts.eps)
hiddenStates = m.FeedForward.Forward(ctx, hiddenStates, opts)
return hiddenStates.Add(ctx, residual)
}
type samAttention struct {
QKV *nn.Linear `gguf:"qkv"`
Output *nn.Linear `gguf:"proj"`
RelativePosition *struct {
Height ml.Tensor `gguf:"h"`
Width ml.Tensor `gguf:"w"`
} `gguf:",pre:rel_pos_"`
}
func relativeCoordinates(ctx ml.Context, qn, kn int) ml.Tensor {
s := make([]int32, qn*kn)
for i := range qn {
for j := range kn {
q := i * max(kn/qn, 1)
k := j * max(qn/kn, 1)
s[i*kn+j] = int32(q - k + (kn-1)*max(qn/kn, 1))
}
}
return ctx.Input().FromInts(s, qn*kn)
}
func relativePositions(ctx ml.Context, positions ml.Tensor, qn, kn int) ml.Tensor {
maxRelativeDistance := 2*max(qn, kn) - 1
if positions.Dim(1) != maxRelativeDistance {
// linear interpolation kernel not available so approx. with bilinear interpolation
positions = positions.Interpolate(ctx, [4]int{positions.Dim(0), maxRelativeDistance, 1, 1}, ml.SamplingModeBilinear)
}
rc := relativeCoordinates(ctx, qn, kn)
return positions.Rows(ctx, rc).Reshape(ctx, positions.Dim(0), kn, qn)
}
func (m *samAttention) decomposedRelativePositions(ctx ml.Context, query ml.Tensor, qn, kn []int) (ml.Tensor, ml.Tensor) {
qh, qw := qn[0], qn[1]
kh, kw := kn[0], kn[1]
rh := relativePositions(ctx, m.RelativePosition.Height, qh, kh)
rw := relativePositions(ctx, m.RelativePosition.Width, qw, kw)
query = query.Contiguous(ctx, query.Dim(0), qw, qh, -1)
rh = rh.Mulmat(ctx, query).Reshape(ctx, 1, kh, qh*qw, -1)
rw = rw.Mulmat(ctx, query.Permute(ctx, 0, 2, 1, 3)).Permute(ctx, 0, 2, 1, 3).Contiguous(ctx, kw, 1, qh*qw, -1)
return rh, rw
}
func (m *samAttention) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts samOptions) ml.Tensor {
w, h, b := hiddenStates.Dim(1), hiddenStates.Dim(2), hiddenStates.Dim(3)
qkv := m.QKV.Forward(ctx, hiddenStates)
qkv = qkv.Reshape(ctx, opts.headDim(), -1, w*h, b)
chunks := qkv.Chunk(ctx, 1, opts.numHeads)
query, key, value := chunks[0], chunks[1], chunks[2]
ctx.Forward(query, key, value)
query = query.Permute(ctx, 0, 2, 1, 3)
rh, rw := m.decomposedRelativePositions(ctx, query, []int{h, w}, []int{h, w})
mask := rh.Repeat(ctx, 0, rw.Dim(0)).Add(ctx, rw)
mask = mask.Reshape(ctx, h*w, -1, opts.numHeads, b)
key = key.Permute(ctx, 0, 2, 1, 3)
scores := key.MulmatFullPrec(ctx, query)
scores = scores.Scale(ctx, 1/math.Sqrt(float64(opts.headDim())))
scores = scores.Add(ctx, mask)
scores = scores.Softmax(ctx)
value = value.Permute(ctx, 1, 2, 0, 3).Contiguous(ctx)
attention := value.Mulmat(ctx, scores)
attention = attention.Permute(ctx, 0, 2, 1, 3)
attention = attention.Contiguous(ctx, -1, w, h, b)
return m.Output.Forward(ctx, attention)
}
type samMLP struct {
Lin1 *nn.Linear `gguf:"lin1"`
Lin2 *nn.Linear `gguf:"lin2"`
}
func (m *samMLP) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts samOptions) ml.Tensor {
return m.Lin2.Forward(ctx, m.Lin1.Forward(ctx, hiddenStates).GELU(ctx))
}
type LayerNorm2D struct {
Weight ml.Tensor `gguf:"weight"`
Bias ml.Tensor `gguf:"bias"`
}
func (ln *LayerNorm2D) Forward(ctx ml.Context, x ml.Tensor, eps float32) ml.Tensor {
x = x.Permute(ctx, 1, 2, 0, 3).Contiguous(ctx)
u := x.Mean(ctx)
d := x.Sub(ctx, u)
s := d.Sqr(ctx).Mean(ctx)
x = d.Div(ctx, s.Add(ctx, ctx.Input().FromFloats([]float32{eps}, 1)).Sqrt(ctx))
x = x.Mul(ctx, ln.Weight).Add(ctx, ln.Bias)
return x.Permute(ctx, 2, 0, 1, 3).Contiguous(ctx)
}
type samNeck struct {
C1 *nn.Conv2D `gguf:"0"`
LN1 *LayerNorm2D `gguf:"1"`
C2 *nn.Conv2D `gguf:"2"`
LN2 *LayerNorm2D `gguf:"3"`
}
func (m *samNeck) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts samOptions) ml.Tensor {
hiddenStates = m.C1.Forward(ctx, hiddenStates, 1, 1, 0, 0, 1, 1)
hiddenStates = m.LN1.Forward(ctx, hiddenStates, opts.eps)
hiddenStates = m.C2.Forward(ctx, hiddenStates, 1, 1, 1, 1, 1, 1)
hiddenStates = m.LN2.Forward(ctx, hiddenStates, opts.eps)
return hiddenStates
}

View File

@@ -0,0 +1,140 @@
package deepseekocr
import (
"math"
"github.com/ollama/ollama/kvcache"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml/nn"
"github.com/ollama/ollama/ml/nn/fast"
"github.com/ollama/ollama/ml/nn/rope"
)
type textModel struct {
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
Blocks []textBlock `gguf:"blk"`
OutputNorm *nn.RMSNorm `gguf:"output_norm"`
Output *nn.Linear `gguf:"output"`
Options textOptions
}
func (m *textModel) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
return m.Options.applyRotaryPositionalEmbedding(ctx, key, shift), nil
}
type textOptions struct {
hiddenSize,
numHeads,
numKVHeads,
numExperts,
numExpertsUsed int
ropeBase,
ropeScale,
eps float32
}
func (o textOptions) headDim() int {
return o.hiddenSize / o.numHeads
}
func (o textOptions) applyRotaryPositionalEmbedding(ctx ml.Context, t, p ml.Tensor) ml.Tensor {
return fast.RoPE(ctx, t, p, o.headDim(), o.ropeBase, 1/o.ropeScale, rope.WithTypeNeoX())
}
type textBlock struct {
AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
Attention *textAttention
MLPNNorm *nn.RMSNorm `gguf:"ffn_norm"`
FeedForward textFeedForward
}
func (m *textBlock) Forward(ctx ml.Context, hiddenStates, positions, outputs ml.Tensor, cache kvcache.Cache, opts textOptions) ml.Tensor {
residual := hiddenStates
hiddenStates = m.AttentionNorm.Forward(ctx, hiddenStates, opts.eps)
hiddenStates = m.Attention.Forward(ctx, hiddenStates, positions, cache, opts)
if outputs != nil {
hiddenStates = hiddenStates.Rows(ctx, outputs)
residual = residual.Rows(ctx, outputs)
}
hiddenStates = hiddenStates.Add(ctx, residual)
residual = hiddenStates
hiddenStates = m.MLPNNorm.Forward(ctx, hiddenStates, opts.eps)
hiddenStates = m.FeedForward.Forward(ctx, hiddenStates, opts)
return hiddenStates.Add(ctx, residual)
}
type textAttention struct {
Query *nn.Linear `gguf:"attn_q"`
Key *nn.Linear `gguf:"attn_k"`
Value *nn.Linear `gguf:"attn_v"`
Output *nn.Linear `gguf:"attn_output"`
}
func (m *textAttention) Forward(ctx ml.Context, hiddenStates, positions ml.Tensor, cache kvcache.Cache, opts textOptions) ml.Tensor {
query := m.Query.Forward(ctx, hiddenStates)
query = query.Reshape(ctx, opts.headDim(), opts.numHeads, -1)
key := m.Key.Forward(ctx, hiddenStates)
key = key.Reshape(ctx, opts.headDim(), opts.numKVHeads, -1)
value := m.Value.Forward(ctx, hiddenStates)
value = value.Reshape(ctx, opts.headDim(), opts.numKVHeads, -1)
query = opts.applyRotaryPositionalEmbedding(ctx, query, positions)
key = opts.applyRotaryPositionalEmbedding(ctx, key, positions)
attention := nn.Attention(ctx, query, key, value, 1./math.Sqrt(float64(opts.headDim())), cache)
attention = attention.Reshape(ctx, -1, attention.Dim(2))
return m.Output.Forward(ctx, attention)
}
type textFeedForward interface {
Forward(ml.Context, ml.Tensor, textOptions) ml.Tensor
}
type textMoe struct {
Router *nn.Linear `gguf:"ffn_gate_inp"`
Gate *nn.LinearBatch `gguf:"ffn_gate_exps"`
Up *nn.LinearBatch `gguf:"ffn_up_exps"`
Down *nn.LinearBatch `gguf:"ffn_down_exps"`
SharedExperts *textMLP `gguf:",suf:_shexp"`
}
func (m *textMoe) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts textOptions) ml.Tensor {
scores := m.Router.Forward(ctx, hiddenStates).Softmax(ctx)
indices := scores.TopK(ctx, opts.numExpertsUsed)
weights := scores.Reshape(ctx, 1, opts.numExperts, hiddenStates.Dim(1)).Rows(ctx, indices)
experts := hiddenStates.Reshape(ctx, hiddenStates.Dim(0), 1, hiddenStates.Dim(1))
experts = m.Gate.Forward(ctx, experts, indices).SILU(ctx, m.Up.Forward(ctx, experts, indices))
experts = m.Down.Forward(ctx, experts, indices)
experts = experts.Mul(ctx, weights)
expert := func(i int) ml.Tensor {
return experts.View(
ctx, i*experts.Stride(1), experts.Dim(0), experts.Stride(2), experts.Dim(2),
)
}
routedStates := expert(0)
for i := 1; i < opts.numExpertsUsed; i++ {
routedStates = routedStates.Add(ctx, expert(i))
}
sharedStates := m.SharedExperts.Forward(ctx, hiddenStates, opts)
return routedStates.Add(ctx, sharedStates)
}
type textMLP struct {
Gate *nn.Linear `gguf:"ffn_gate"`
Up *nn.Linear `gguf:"ffn_up"`
Down *nn.Linear `gguf:"ffn_down"`
}
func (m *textMLP) Forward(ctx ml.Context, hiddenStates ml.Tensor, _ textOptions) ml.Tensor {
hiddenStates = m.Gate.Forward(ctx, hiddenStates).SILU(ctx, m.Up.Forward(ctx, hiddenStates))
return m.Down.Forward(ctx, hiddenStates)
}

View File

@@ -0,0 +1,117 @@
package deepseekocr
import (
"math"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml/nn"
)
type visionModel struct {
PatchEmbedding *nn.Conv2D `gguf:"patch_embd"`
ClassEmbedding ml.Tensor `gguf:"class_embd"`
PositionEmbedding *nn.Embedding `gguf:"position_embd"`
PreLayerNorm *nn.LayerNorm `gguf:"pre_layrnorm"`
Blocks []visionBlock `gguf:"blk"`
Options visionOptions
}
func (m *visionModel) absolutePositionEmbedding(ctx ml.Context, embeds ml.Tensor) ml.Tensor {
numPatches := m.Options.imageSize / m.Options.patchSize * m.Options.imageSize / m.Options.patchSize
positions := ctx.Arange(0, float32(numPatches+1), 1, ml.DTypeI32)
positionEmbeds := m.PositionEmbedding.Forward(ctx, positions)
source := int(math.Sqrt(float64(positionEmbeds.Dim(1) - 1)))
target := int(math.Sqrt(float64(embeds.Dim(1) - 1)))
if source != target {
newPositionEmbeds := positionEmbeds.Slice(ctx, 1, 1, positionEmbeds.Dim(1), 1)
newPositionEmbeds = newPositionEmbeds.Reshape(ctx, -1, source, source)
newPositionEmbeds = newPositionEmbeds.Permute(ctx, 2, 0, 1, 3).Contiguous(ctx)
newPositionEmbeds = newPositionEmbeds.Interpolate(ctx, [4]int{target, target, embeds.Dim(0), 1}, ml.SamplingModeBilinear)
newPositionEmbeds = newPositionEmbeds.Permute(ctx, 1, 2, 0, 3)
newPositionEmbeds = newPositionEmbeds.Contiguous(ctx, -1, target*target)
positionEmbeds = positionEmbeds.Slice(ctx, 1, 0, 1, 1).Concat(ctx, newPositionEmbeds, 1)
}
return positionEmbeds
}
func (m *visionModel) Forward(ctx ml.Context, pixelValues, patchEmbeds ml.Tensor) ml.Tensor {
if patchEmbeds == nil {
patchEmbeds = m.PatchEmbedding.Forward(ctx, pixelValues, m.Options.patchSize, m.Options.patchSize, 0, 0, 1, 1)
}
patchEmbeds = patchEmbeds.Reshape(ctx, -1, patchEmbeds.Dim(2), patchEmbeds.Dim(3))
patchEmbeds = patchEmbeds.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx)
classEmbeds := m.ClassEmbedding.Repeat(ctx, 2, patchEmbeds.Dim(2))
embeds := classEmbeds.Concat(ctx, patchEmbeds, 1)
embeds = embeds.Add(ctx, m.absolutePositionEmbedding(ctx, embeds))
hiddenStates := m.PreLayerNorm.Forward(ctx, embeds, m.Options.eps)
for _, block := range m.Blocks {
hiddenStates = block.Forward(ctx, hiddenStates, m.Options)
}
return hiddenStates
}
type visionOptions struct {
hiddenSize,
numHeads int
eps float32
imageSize, patchSize int
}
func (o visionOptions) headDim() int {
return o.hiddenSize / o.numHeads
}
type visionBlock struct {
Norm1 *nn.LayerNorm `gguf:"layer_norm1"`
Attention *visionAttention `gguf:"self_attn"`
Norm2 *nn.LayerNorm `gguf:"layer_norm2"`
FeedForward *visionMLP `gguf:"mlp"`
}
func (m *visionBlock) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts visionOptions) ml.Tensor {
residual := hiddenStates
hiddenStates = m.Norm1.Forward(ctx, hiddenStates, opts.eps)
hiddenStates = m.Attention.Forward(ctx, hiddenStates, opts)
hiddenStates = hiddenStates.Add(ctx, residual)
residual = hiddenStates
hiddenStates = m.Norm2.Forward(ctx, hiddenStates, opts.eps)
hiddenStates = m.FeedForward.Forward(ctx, hiddenStates)
hiddenStates = hiddenStates.Add(ctx, residual)
return hiddenStates
}
type visionAttention struct {
QKV *nn.Linear `gguf:"qkv_proj"`
Output *nn.Linear `gguf:"out_proj"`
}
func (m *visionAttention) Forward(ctx ml.Context, t ml.Tensor, opts visionOptions) ml.Tensor {
qkv := m.QKV.Forward(ctx, t)
qkv = qkv.Reshape(ctx, opts.headDim(), -1, qkv.Dim(1), qkv.Dim(2))
chunks := qkv.Chunk(ctx, 1, opts.numHeads)
query, key, value := chunks[0], chunks[1], chunks[2]
attention := nn.Attention(ctx, query, key, value, 1/math.Sqrt(float64(opts.headDim())), nil)
attention = attention.Reshape(ctx, -1, attention.Dim(2), attention.Dim(3))
return m.Output.Forward(ctx, attention)
}
type visionMLP struct {
FC1 *nn.Linear `gguf:"fc1"`
FC2 *nn.Linear `gguf:"fc2"`
}
func (m *visionMLP) Forward(ctx ml.Context, t ml.Tensor) ml.Tensor {
return m.FC2.Forward(ctx, m.FC1.Forward(ctx, t).QuickGELU(ctx))
}

View File

@@ -3,6 +3,7 @@ package models
import (
_ "github.com/ollama/ollama/model/models/bert"
_ "github.com/ollama/ollama/model/models/deepseek2"
_ "github.com/ollama/ollama/model/models/deepseekocr"
_ "github.com/ollama/ollama/model/models/gemma2"
_ "github.com/ollama/ollama/model/models/gemma3"
_ "github.com/ollama/ollama/model/models/gemma3n"
@@ -11,6 +12,7 @@ import (
_ "github.com/ollama/ollama/model/models/llama4"
_ "github.com/ollama/ollama/model/models/mistral3"
_ "github.com/ollama/ollama/model/models/mllama"
_ "github.com/ollama/ollama/model/models/nomicbert"
_ "github.com/ollama/ollama/model/models/qwen2"
_ "github.com/ollama/ollama/model/models/qwen25vl"
_ "github.com/ollama/ollama/model/models/qwen3"

View File

@@ -0,0 +1,170 @@
package nomicbert
import (
"cmp"
"math"
"github.com/ollama/ollama/fs"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml/nn"
"github.com/ollama/ollama/ml/nn/fast"
"github.com/ollama/ollama/ml/nn/pooling"
"github.com/ollama/ollama/ml/nn/rope"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/model/input"
)
type Model struct {
model.Base
model.TextProcessor
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
TypeEmbedding *nn.Embedding `gguf:"token_types"`
TokenEmbeddingNorm *nn.LayerNorm `gguf:"token_embd_norm"`
Layers []EncoderLayer `gguf:"blk"`
Options
}
type Options struct {
hiddenSize int
numHeads int
headDim int
eps float32
poolingType pooling.Type
normalize bool
ropeFreqBase float32
}
// Single Encoder Layer
type EncoderLayer struct {
*Attention
AttentionNorm *nn.LayerNorm `gguf:"attn_output_norm"`
*MLP
MLPNorm *nn.LayerNorm `gguf:"layer_output_norm"`
}
type Attention struct {
QKV *nn.Linear `gguf:"attn_qkv"`
Output *nn.Linear `gguf:"attn_output"`
}
type MLP struct {
Gate *nn.Linear `gguf:"ffn_gate"`
Up *nn.Linear `gguf:"ffn_up"`
Down *nn.Linear `gguf:"ffn_down"`
}
func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
hiddenStates := m.TokenEmbedding.Forward(ctx, batch.Inputs)
typeEmbed := m.TypeEmbedding.Weight.Slice(ctx, 1, 0, 1, 1)
hiddenStates = hiddenStates.Add(ctx, typeEmbed)
hiddenStates = m.TokenEmbeddingNorm.Forward(ctx, hiddenStates, m.eps)
positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))
for _, layer := range m.Layers {
hiddenStates = layer.Forward(ctx, hiddenStates, positions, &m.Options)
}
hiddenStates = m.poolingType.Forward(ctx, hiddenStates)
if m.normalize {
hiddenStates = hiddenStates.L2Norm(ctx, 1e-12)
}
return hiddenStates, nil
}
func (e *EncoderLayer) Forward(ctx ml.Context, hiddenStates ml.Tensor, positions ml.Tensor, opts *Options) ml.Tensor {
residual := hiddenStates
hiddenStates = e.Attention.Forward(ctx, hiddenStates, positions, opts)
hiddenStates = hiddenStates.Add(ctx, residual)
hiddenStates = e.AttentionNorm.Forward(ctx, hiddenStates, opts.eps)
residual = hiddenStates
hiddenStates = e.MLP.Forward(ctx, hiddenStates)
hiddenStates = hiddenStates.Add(ctx, residual)
hiddenStates = e.MLPNorm.Forward(ctx, hiddenStates, opts.eps)
return hiddenStates
}
func (a *Attention) Forward(ctx ml.Context, hiddenStates ml.Tensor, positions ml.Tensor, opts *Options) ml.Tensor {
batchSize := hiddenStates.Dim(1)
qkv := a.QKV.Forward(ctx, hiddenStates)
qkv = qkv.Reshape(ctx, opts.headDim, opts.numHeads*3, batchSize)
chunks := qkv.Chunk(ctx, 1, opts.numHeads)
query, key, value := chunks[0], chunks[1], chunks[2]
query = fast.RoPE(ctx, query, positions, opts.headDim, opts.ropeFreqBase, 1.0, rope.WithTypeNeoX())
key = fast.RoPE(ctx, key, positions, opts.headDim, opts.ropeFreqBase, 1.0, rope.WithTypeNeoX())
attention := nn.Attention(ctx, query, key, value, 1.0/math.Sqrt(float64(opts.headDim)), nil)
attention = attention.Reshape(ctx, opts.hiddenSize, batchSize)
return a.Output.Forward(ctx, attention)
}
func (m *MLP) Forward(ctx ml.Context, hiddenStates ml.Tensor) ml.Tensor {
hidden := m.Gate.Forward(ctx, hiddenStates).SILU(ctx, m.Up.Forward(ctx, hiddenStates))
return m.Down.Forward(ctx, hidden)
}
func New(c fs.Config) (model.Model, error) {
hiddenSize := int(c.Uint("embedding_length"))
numHeads := int(c.Uint("attention.head_count"))
headDim := hiddenSize / numHeads
processor := model.NewWordPiece(
&model.Vocabulary{
Values: c.Strings("tokenizer.ggml.tokens"),
Scores: c.Floats("tokenizer.ggml.scores"),
Types: c.Ints("tokenizer.ggml.token_type"),
AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
BOS: []int32{
int32(cmp.Or(
c.Uint("tokenizer.ggml.cls_token_id"),
c.Uint("tokenizer.ggml.bos_token_id"),
)),
},
AddEOS: c.Bool("tokenizer.ggml.add_eos_token", true),
EOS: []int32{
int32(cmp.Or(
c.Uint("tokenizer.ggml.separator_token_id"),
c.Uint("tokenizer.ggml.eos_token_id"),
)),
},
},
false,
)
return &Model{
TextProcessor: processor,
Layers: make([]EncoderLayer, c.Uint("block_count")),
Options: Options{
hiddenSize: hiddenSize,
numHeads: numHeads,
headDim: headDim,
eps: c.Float("attention.layer_norm_epsilon"),
poolingType: pooling.Type(c.Uint("pooling_type")),
normalize: c.Bool("normalize_embeddings", false),
ropeFreqBase: c.Float("rope.freq_base", 1000.0),
},
}, nil
}
func init() {
model.Register("nomic-bert", New)
model.Register("nomic-bert_embed", New)
}

129
model/renderers/cogito.go Normal file
View File

@@ -0,0 +1,129 @@
package renderers
import (
"encoding/json"
"strings"
"github.com/ollama/ollama/api"
)
type CogitoRenderer struct {
isThinking bool
}
func (r *CogitoRenderer) Render(messages []api.Message, tools []api.Tool, thinkValue *api.ThinkValue) (string, error) {
var sb strings.Builder
defaultPrompt := "You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco."
// thinking is enabled: model must support it AND user must request it (true)
enableThinking := r.isThinking && (thinkValue != nil && thinkValue.Bool())
var systemPrompt string
var conversationMessages []api.Message
if len(messages) > 0 && messages[0].Role == "system" {
systemPrompt = messages[0].Content
conversationMessages = messages[1:]
} else {
conversationMessages = messages
}
var finalSystemPrompt string
if enableThinking {
finalSystemPrompt = "Enable deep thinking subroutine.\n\n" + defaultPrompt
if systemPrompt != "" {
finalSystemPrompt += "\n\n" + systemPrompt + "\n\n"
}
} else {
finalSystemPrompt = defaultPrompt
if systemPrompt != "" {
finalSystemPrompt += "\n\n" + systemPrompt
}
}
if len(tools) > 0 {
if finalSystemPrompt != "" {
finalSystemPrompt += "\nYou have the following functions available:\n"
} else {
finalSystemPrompt = "You have the following functions available:\n"
}
for _, tool := range tools {
toolJSON, _ := json.MarshalIndent(tool, "", " ") // TODO(gguo): double check json format
finalSystemPrompt += "```json\n" + string(toolJSON) + "\n```\n"
}
}
sb.WriteString("<begin▁of▁sentence>" + finalSystemPrompt)
outputsOpen := false
isLastUser := false
for i, message := range conversationMessages {
switch message.Role {
case "user":
isLastUser = true
sb.WriteString("<User>" + message.Content + "<Assistant>")
case "assistant":
isLastUser = false
if len(message.ToolCalls) > 0 {
if message.Content != "" {
sb.WriteString(message.Content)
}
sb.WriteString("<tool▁calls▁begin>")
for j, toolCall := range message.ToolCalls {
sb.WriteString("<tool▁call▁begin>function<tool▁sep>" + toolCall.Function.Name)
argsJSON, _ := json.Marshal(toolCall.Function.Arguments)
sb.WriteString("\n```json\n" + string(argsJSON) + "\n```")
sb.WriteString("<tool▁call▁end>")
if j < len(message.ToolCalls)-1 {
sb.WriteString("\n")
}
}
sb.WriteString("<tool▁calls▁end><end▁of▁sentence>")
} else {
sb.WriteString(message.Content + "<end▁of▁sentence>")
}
case "tool":
isLastUser = false
if !outputsOpen {
sb.WriteString("<tool▁outputs▁begin>")
outputsOpen = true
}
sb.WriteString("<tool▁output▁begin>" + message.Content + "<tool▁output▁end>")
hasNextTool := i+1 < len(conversationMessages) && conversationMessages[i+1].Role == "tool"
if hasNextTool {
sb.WriteString("\n")
} else {
sb.WriteString("<tool▁outputs▁end>")
outputsOpen = false
}
}
}
if outputsOpen {
sb.WriteString("<tool▁outputs▁end>")
}
if !isLastUser {
sb.WriteString("<Assistant>")
}
if enableThinking {
sb.WriteString("<think>\n")
}
return sb.String(), nil
}

View File

@@ -0,0 +1,491 @@
package renderers
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/api"
)
func TestCogitoRenderer(t *testing.T) {
tests := []struct {
name string
messages []api.Message
tools []api.Tool
thinkValue *api.ThinkValue
expected string
}{
{
name: "basic user message",
messages: []api.Message{
{Role: "user", Content: "Hello, how are you?"},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>Hello, how are you?<Assistant>`,
},
{
name: "basic with system message",
messages: []api.Message{
{Role: "system", Content: "You are a helpful assistant."},
{Role: "user", Content: "Hello, how are you?"},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.
You are a helpful assistant.<User>Hello, how are you?<Assistant>`,
},
{
name: "conversation with assistant response",
messages: []api.Message{
{Role: "user", Content: "What is the capital of France?"},
{Role: "assistant", Content: "The capital of France is Paris."},
{Role: "user", Content: "Fantastic!"},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>What is the capital of France?<Assistant>The capital of France is Paris.<end▁of▁sentence><User>Fantastic!<Assistant>`,
},
{
name: "thinking enabled without system",
messages: []api.Message{
{Role: "user", Content: "Hello, how are you?"},
},
thinkValue: &api.ThinkValue{Value: true},
expected: `<begin▁of▁sentence>Enable deep thinking subroutine.
You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>Hello, how are you?<Assistant><think>
`,
},
{
name: "thinking enabled with system",
messages: []api.Message{
{Role: "system", Content: "You are a helpful assistant."},
{Role: "user", Content: "Hello, how are you?"},
},
thinkValue: &api.ThinkValue{Value: true},
expected: `<begin▁of▁sentence>Enable deep thinking subroutine.
You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.
You are a helpful assistant.
<User>Hello, how are you?<Assistant><think>
`,
},
{
name: "thinking disabled",
messages: []api.Message{
{Role: "user", Content: "Hello, how are you?"},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>Hello, how are you?<Assistant>`,
},
{
name: "with tools",
messages: []api.Message{
{Role: "user", Content: "What's the weather like?"},
},
thinkValue: &api.ThinkValue{Value: false},
tools: []api.Tool{
{
Type: "function",
Function: api.ToolFunction{
Name: "get_weather",
Description: "Get current weather",
Parameters: api.ToolFunctionParameters{
Type: "object",
Properties: map[string]api.ToolProperty{
"location": {
Type: api.PropertyType{"string"},
Description: "City name",
},
},
Required: []string{"location"},
},
},
},
},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.
You have the following functions available:
` + "```json\n" + `{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather",
"parameters": {
"type": "object",
"required": [
"location"
],
"properties": {
"location": {
"type": "string",
"description": "City name"
}
}
}
}
}
` + "```\n" + `<User>What's the weather like?<Assistant>`,
},
{
name: "assistant with tool calls",
messages: []api.Message{
{Role: "user", Content: "What's the weather in Paris?"},
{
Role: "assistant",
Content: "I'll check the weather in Paris for you.",
ToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: api.ToolCallFunctionArguments{
"location": "Paris",
},
},
},
},
},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>What's the weather in Paris?<Assistant>I'll check the weather in Paris for you.<tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>get_weather
` + "```json\n" + `{"location":"Paris"}
` + "```" + `<tool▁call▁end><tool▁calls▁end><end▁of▁sentence><Assistant>`,
},
{
name: "tool response",
messages: []api.Message{
{Role: "user", Content: "What's the weather in Paris?"},
{
Role: "assistant",
ToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: api.ToolCallFunctionArguments{
"location": "Paris",
},
},
},
},
},
{Role: "tool", Content: "Temperature: 22°C, Sunny"},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>What's the weather in Paris?<Assistant><tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>get_weather
` + "```json\n" + `{"location":"Paris"}
` + "```" + `<tool▁call▁end><tool▁calls▁end><end▁of▁sentence><tool▁outputs▁begin><tool▁output▁begin>Temperature: 22°C, Sunny<tool▁output▁end><tool▁outputs▁end><Assistant>`,
},
{
name: "multiple tool responses",
messages: []api.Message{
{Role: "user", Content: "Get weather for Paris and London"},
{
Role: "assistant",
ToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: api.ToolCallFunctionArguments{
"location": "Paris",
},
},
},
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: api.ToolCallFunctionArguments{
"location": "London",
},
},
},
},
},
{Role: "tool", Content: "Paris: 22°C, Sunny"},
{Role: "tool", Content: "London: 18°C, Cloudy"},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>Get weather for Paris and London<Assistant><tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>get_weather
` + "```json\n" + `{"location":"Paris"}
` + "```" + `<tool▁call▁end>
<tool▁call▁begin>function<tool▁sep>get_weather
` + "```json\n" + `{"location":"London"}
` + "```" + `<tool▁call▁end><tool▁calls▁end><end▁of▁sentence><tool▁outputs▁begin><tool▁output▁begin>Paris: 22°C, Sunny<tool▁output▁end>
<tool▁output▁begin>London: 18°C, Cloudy<tool▁output▁end><tool▁outputs▁end><Assistant>`,
},
{
name: "thinking with tools",
messages: []api.Message{
{Role: "user", Content: "What's the weather like?"},
},
tools: []api.Tool{
{
Type: "function",
Function: api.ToolFunction{
Name: "get_weather",
Description: "Get current weather",
Parameters: api.ToolFunctionParameters{
Type: "object",
Properties: map[string]api.ToolProperty{
"location": {
Type: api.PropertyType{"string"},
Description: "City name",
},
},
Required: []string{"location"},
},
},
},
},
thinkValue: &api.ThinkValue{Value: true},
expected: `<begin▁of▁sentence>Enable deep thinking subroutine.
You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.
You have the following functions available:
` + "```json\n" + `{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather",
"parameters": {
"type": "object",
"required": [
"location"
],
"properties": {
"location": {
"type": "string",
"description": "City name"
}
}
}
}
}
` + "```\n" + `<User>What's the weather like?<Assistant><think>
`,
},
// test cases based on cogito
{
name: "single_turn_thinking_false",
messages: []api.Message{
{Role: "user", Content: "Hello"},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>Hello<Assistant>`,
},
{
name: "single_turn_thinking_true",
messages: []api.Message{
{Role: "user", Content: "Hello"},
},
thinkValue: &api.ThinkValue{Value: true},
expected: `<begin▁of▁sentence>Enable deep thinking subroutine.
You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>Hello<Assistant><think>
`,
},
{
name: "multi_turn_thinking_false",
messages: []api.Message{
{Role: "user", Content: "Hello"},
{Role: "assistant", Content: "Hi there!"},
{Role: "user", Content: "How are you?"},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>Hello<Assistant>Hi there!<end▁of▁sentence><User>How are you?<Assistant>`,
},
{
name: "multi_turn_thinking_true",
messages: []api.Message{
{Role: "user", Content: "Hello"},
{Role: "assistant", Content: "Hi there!"},
{Role: "user", Content: "How are you?"},
},
thinkValue: &api.ThinkValue{Value: true},
expected: `<begin▁of▁sentence>Enable deep thinking subroutine.
You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>Hello<Assistant>Hi there!<end▁of▁sentence><User>How are you?<Assistant><think>
`,
},
{
name: "multi_with_system_thinking_false",
messages: []api.Message{
{Role: "system", Content: "You are a helpful assistant"},
{Role: "user", Content: "Start"},
{Role: "assistant", Content: "Okay"},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.
You are a helpful assistant<User>Start<Assistant>Okay<end▁of▁sentence><Assistant>`,
},
{
name: "multi_with_system_thinking_true",
messages: []api.Message{
{Role: "system", Content: "You are a helpful assistant"},
{Role: "user", Content: "Start"},
{Role: "assistant", Content: "Okay"},
},
thinkValue: &api.ThinkValue{Value: true},
expected: `<begin▁of▁sentence>Enable deep thinking subroutine.
You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.
You are a helpful assistant
<User>Start<Assistant>Okay<end▁of▁sentence><Assistant><think>
`,
},
{
name: "multi_with_system2_thinking_false",
messages: []api.Message{
{Role: "system", Content: "You are a pirate chatbot who always responds in pirate speak!"},
{Role: "user", Content: "Give me a short introduction to LLMs."},
{Role: "assistant", Content: "Arrr! I'm a pirate"},
{Role: "user", Content: "Tell me more about LLMs."},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.
You are a pirate chatbot who always responds in pirate speak!<User>Give me a short introduction to LLMs.<Assistant>Arrr! I'm a pirate<end▁of▁sentence><User>Tell me more about LLMs.<Assistant>`,
},
{
name: "multi_with_system2_thinking_true",
messages: []api.Message{
{Role: "system", Content: "You are a pirate chatbot who always responds in pirate speak!"},
{Role: "user", Content: "Give me a short introduction to LLMs."},
{Role: "assistant", Content: "Arrr! I'm a pirate"},
{Role: "user", Content: "Tell me more about LLMs."},
},
thinkValue: &api.ThinkValue{Value: true},
expected: `<begin▁of▁sentence>Enable deep thinking subroutine.
You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.
You are a pirate chatbot who always responds in pirate speak!
<User>Give me a short introduction to LLMs.<Assistant>Arrr! I'm a pirate<end▁of▁sentence><User>Tell me more about LLMs.<Assistant><think>
`,
},
// tools
{
name: "tool_calls_only_no_content",
messages: []api.Message{
{Role: "user", Content: "Get weather for Paris"},
{
Role: "assistant",
ToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: api.ToolCallFunctionArguments{
"location": "Paris",
},
},
},
},
},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>Get weather for Paris<Assistant><tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>get_weather
` + "```json\n" + `{"location":"Paris"}
` + "```" + `<tool▁call▁end><tool▁calls▁end><end▁of▁sentence><Assistant>`,
},
{
name: "complex_tool_arguments",
messages: []api.Message{
{Role: "user", Content: "Process complex data"},
{
Role: "assistant",
ToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "process_data",
Arguments: api.ToolCallFunctionArguments{
"items": []any{"item1", "item2", "item3"},
"config": map[string]any{
"enabled": true,
"threshold": 0.95,
"tags": []string{"important", "urgent"},
},
},
},
},
},
},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>Process complex data<Assistant><tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>process_data
` + "```json\n" + `{"config":{"enabled":true,"tags":["important","urgent"],"threshold":0.95},"items":["item1","item2","item3"]}
` + "```" + `<tool▁call▁end><tool▁calls▁end><end▁of▁sentence><Assistant>`,
},
{
name: "empty_messages",
messages: []api.Message{
{Role: "system", Content: ""},
{Role: "user", Content: "Hello"},
{Role: "assistant", Content: ""},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>Hello<Assistant><end▁of▁sentence><Assistant>`,
},
{
name: "thinking_with_empty_assistant_content",
messages: []api.Message{
{Role: "user", Content: "Think about this"},
{Role: "assistant", Content: ""},
},
thinkValue: &api.ThinkValue{Value: true},
expected: `<begin▁of▁sentence>Enable deep thinking subroutine.
You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>Think about this<Assistant><end▁of▁sentence><Assistant><think>
`,
},
{
name: "multiple_system_messages",
messages: []api.Message{
{Role: "system", Content: "First instruction"},
{Role: "system", Content: "Second instruction"},
{Role: "user", Content: "Hello"},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.
First instruction<User>Hello<Assistant>`,
},
{
name: "special_characters_in_content",
messages: []api.Message{
{Role: "user", Content: "What about <|special|> tokens and \"quotes\"?"},
{Role: "assistant", Content: "They're handled normally in content."},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>What about <|special|> tokens and "quotes"?<Assistant>They're handled normally in content.<end▁of▁sentence><Assistant>`,
},
{
name: "long_conversation_multiple_rounds",
messages: []api.Message{
{Role: "user", Content: "Hi"},
{Role: "assistant", Content: "Hello!"},
{Role: "user", Content: "How are you?"},
{Role: "assistant", Content: "Good, thanks!"},
{Role: "user", Content: "What's the weather?"},
},
thinkValue: &api.ThinkValue{Value: false},
expected: `<begin▁of▁sentence>You are Cogito, an AI assistant created by Deep Cogito, which is an AI research lab based in San Francisco.<User>Hi<Assistant>Hello!<end▁of▁sentence><User>How are you?<Assistant>Good, thanks!<end▁of▁sentence><User>What's the weather?<Assistant>`,
},
}
renderer := &CogitoRenderer{isThinking: true}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rendered, err := renderer.Render(tt.messages, tt.tools, tt.thinkValue)
if err != nil {
t.Fatalf("Render() error = %v", err)
}
if diff := cmp.Diff(tt.expected, rendered); diff != "" {
t.Errorf("Render() mismatch (-want +got):\n%s", diff)
}
})
}
}

View File

@@ -56,6 +56,9 @@ func rendererForName(name string) Renderer {
case "qwen3-vl-thinking":
renderer := &Qwen3VLRenderer{isThinking: true, useImgTags: RenderImgTags}
return renderer
case "cogito":
renderer := &CogitoRenderer{isThinking: true}
return renderer
default:
return nil
}

View File

@@ -10,7 +10,8 @@ import (
)
type WordPiece struct {
vocab *Vocabulary
vocab *Vocabulary
lowercase bool
}
// ggmlPrefix is the prefix used by GGML vocabularies to indicate word boundaries.
@@ -114,8 +115,10 @@ func (wpm WordPiece) Encode(s string, addSpecial bool) ([]int32, error) {
subword = ggmlPrefix + subword
}
// TODO: some models might not want [ToLower]
piece = wpm.vocab.Encode(strings.ToLower(subword))
if wpm.lowercase {
subword = strings.ToLower(subword)
}
piece = wpm.vocab.Encode(subword)
if piece >= 0 {
break
}
@@ -160,8 +163,9 @@ func (wpm WordPiece) Vocabulary() *Vocabulary {
var _ TextProcessor = (*WordPiece)(nil)
func NewWordPiece(vocab *Vocabulary) WordPiece {
func NewWordPiece(vocab *Vocabulary, lowercase bool) WordPiece {
return WordPiece{
vocab: vocab,
vocab: vocab,
lowercase: lowercase,
}
}

View File

@@ -15,7 +15,9 @@ func TestWordPiece(t *testing.T) {
AddEOS: true,
BOS: []int32{1},
EOS: []int32{2},
})
},
true, // lowercase
)
ids, err := wpm.Encode("Hello world!", true)
if err != nil {

123
parser/expandpath_test.go Normal file
View File

@@ -0,0 +1,123 @@
package parser
import (
"os"
"os/user"
"path/filepath"
"runtime"
"testing"
)
func TestExpandPath(t *testing.T) {
mockCurrentUser := func() (*user.User, error) {
return &user.User{
Username: "testuser",
HomeDir: func() string {
if os.PathSeparator == '\\' {
return filepath.FromSlash("D:/home/testuser")
}
return "/home/testuser"
}(),
}, nil
}
mockLookupUser := func(username string) (*user.User, error) {
fakeUsers := map[string]string{
"testuser": func() string {
if os.PathSeparator == '\\' {
return filepath.FromSlash("D:/home/testuser")
}
return "/home/testuser"
}(),
"anotheruser": func() string {
if os.PathSeparator == '\\' {
return filepath.FromSlash("D:/home/anotheruser")
}
return "/home/anotheruser"
}(),
}
if homeDir, ok := fakeUsers[username]; ok {
return &user.User{
Username: username,
HomeDir: homeDir,
}, nil
}
return nil, os.ErrNotExist
}
pwd, err := os.Getwd()
if err != nil {
t.Fatal(err)
}
t.Run("unix tests", func(t *testing.T) {
if runtime.GOOS == "windows" {
return
}
tests := []struct {
path string
relativeDir string
expected string
shouldErr bool
}{
{"~", "", "/home/testuser", false},
{"~/myfolder/myfile.txt", "", "/home/testuser/myfolder/myfile.txt", false},
{"~anotheruser/docs/file.txt", "", "/home/anotheruser/docs/file.txt", false},
{"~nonexistentuser/file.txt", "", "", true},
{"relative/path/to/file", "", filepath.Join(pwd, "relative/path/to/file"), false},
{"/absolute/path/to/file", "", "/absolute/path/to/file", false},
{"/absolute/path/to/file", "someotherdir/", "/absolute/path/to/file", false},
{".", pwd, pwd, false},
{".", "", pwd, false},
{"somefile", "somedir", filepath.Join(pwd, "somedir", "somefile"), false},
}
for _, test := range tests {
result, err := expandPathImpl(test.path, test.relativeDir, mockCurrentUser, mockLookupUser)
if (err != nil) != test.shouldErr {
t.Errorf("expandPathImpl(%q) returned error: %v, expected error: %v", test.path, err != nil, test.shouldErr)
}
if result != test.expected && !test.shouldErr {
t.Errorf("expandPathImpl(%q) = %q, want %q", test.path, result, test.expected)
}
}
})
t.Run("windows tests", func(t *testing.T) {
if runtime.GOOS != "windows" {
return
}
tests := []struct {
path string
relativeDir string
expected string
shouldErr bool
}{
{"~", "", "D:\\home\\testuser", false},
{"~/myfolder/myfile.txt", "", "D:\\home\\testuser\\myfolder\\myfile.txt", false},
{"~anotheruser/docs/file.txt", "", "D:\\home\\anotheruser\\docs\\file.txt", false},
{"~nonexistentuser/file.txt", "", "", true},
{"relative\\path\\to\\file", "", filepath.Join(pwd, "relative\\path\\to\\file"), false},
{"D:\\absolute\\path\\to\\file", "", "D:\\absolute\\path\\to\\file", false},
{"D:\\absolute\\path\\to\\file", "someotherdir/", "D:\\absolute\\path\\to\\file", false},
{".", pwd, pwd, false},
{".", "", pwd, false},
{"somefile", "somedir", filepath.Join(pwd, "somedir", "somefile"), false},
}
for _, test := range tests {
result, err := expandPathImpl(test.path, test.relativeDir, mockCurrentUser, mockLookupUser)
if (err != nil) != test.shouldErr {
t.Errorf("expandPathImpl(%q) returned error: %v, expected error: %v", test.path, err != nil, test.shouldErr)
}
if result != test.expected && !test.shouldErr {
t.Errorf("expandPathImpl(%q) = %q, want %q", test.path, result, test.expected)
}
}
})
}

View File

@@ -620,43 +620,43 @@ func isValidCommand(cmd string) bool {
}
}
func expandPath(path, dir string) (string, error) {
if filepath.IsAbs(path) {
return path, nil
}
func expandPathImpl(path, relativeDir string, currentUserFunc func() (*user.User, error), lookupUserFunc func(string) (*user.User, error)) (string, error) {
if filepath.IsAbs(path) || strings.HasPrefix(path, "\\") || strings.HasPrefix(path, "/") {
return filepath.Abs(path)
} else if strings.HasPrefix(path, "~") {
var homeDir string
path, found := strings.CutPrefix(path, "~")
switch {
case !found:
// make path relative to dir
if !filepath.IsAbs(dir) {
// if dir is relative, make it absolute relative to cwd
cwd, err := os.Getwd()
if path == "~" || strings.HasPrefix(path, "~/") {
// Current user's home directory
currentUser, err := currentUserFunc()
if err != nil {
return "", err
return "", fmt.Errorf("failed to get current user: %w", err)
}
homeDir = currentUser.HomeDir
path = strings.TrimPrefix(path, "~")
} else {
// Specific user's home directory
parts := strings.SplitN(path[1:], "/", 2)
userInfo, err := lookupUserFunc(parts[0])
if err != nil {
return "", fmt.Errorf("failed to find user '%s': %w", parts[0], err)
}
homeDir = userInfo.HomeDir
if len(parts) > 1 {
path = "/" + parts[1]
} else {
path = ""
}
dir = filepath.Join(cwd, dir)
}
path = filepath.Join(dir, path)
case filepath.IsLocal(path):
// ~<user>/...
// make path relative to specified user's home
split := strings.SplitN(path, string(os.PathSeparator), 2)
u, err := user.Lookup(split[0])
if err != nil {
return "", err
}
split[0] = u.HomeDir
path = filepath.Join(split...)
default:
// ~ or ~/...
// make path relative to current user's home
home, err := os.UserHomeDir()
if err != nil {
return "", err
}
path = filepath.Join(home, path)
path = filepath.Join(homeDir, path)
} else {
path = filepath.Join(relativeDir, path)
}
return filepath.Clean(path), nil
return filepath.Abs(path)
}
func expandPath(path, relativeDir string) (string, error) {
return expandPathImpl(path, relativeDir, user.Current, user.Lookup)
}

View File

@@ -9,9 +9,7 @@ import (
"io"
"maps"
"os"
"os/user"
"path/filepath"
"runtime"
"strings"
"testing"
"unicode/utf16"
@@ -1128,62 +1126,3 @@ func TestFilesForModel(t *testing.T) {
})
}
}
func TestExpandPath(t *testing.T) {
home := t.TempDir()
t.Setenv("HOME", home)
t.Setenv("USERPROFILE", home)
cwd, err := os.Getwd()
if err != nil {
t.Fatal(err)
}
u, err := user.Current()
if err != nil {
t.Fatal(err)
}
volume := ""
if runtime.GOOS == "windows" {
volume = "D:"
}
cases := []struct {
input,
dir,
want string
err error
}{
{"~", "", home, nil},
{"~/path/to/file", "", filepath.Join(home, filepath.ToSlash("path/to/file")), nil},
{"~" + u.Username + "/path/to/file", "", filepath.Join(u.HomeDir, filepath.ToSlash("path/to/file")), nil},
{"~nonexistentuser/path/to/file", "", "", user.UnknownUserError("nonexistentuser")},
{"relative/path/to/file", "", filepath.Join(cwd, filepath.ToSlash("relative/path/to/file")), nil},
{volume + "/absolute/path/to/file", "", filepath.ToSlash(volume + "/absolute/path/to/file"), nil},
{volume + "/absolute/path/to/file", filepath.ToSlash("another/path"), filepath.ToSlash(volume + "/absolute/path/to/file"), nil},
{".", cwd, cwd, nil},
{".", "", cwd, nil},
{"", cwd, cwd, nil},
{"", "", cwd, nil},
{"file", "path/to", filepath.Join(cwd, filepath.ToSlash("path/to/file")), nil},
}
for _, tt := range cases {
t.Run(tt.input, func(t *testing.T) {
got, err := expandPath(tt.input, tt.dir)
// On Windows, user.Lookup does not map syscall errors to user.UnknownUserError
// so we special case the test to just check for an error.
// See https://cs.opensource.google/go/go/+/refs/tags/go1.25.1:src/os/user/lookup_windows.go;l=455
if runtime.GOOS != "windows" && !errors.Is(err, tt.err) {
t.Fatalf("expandPath(%q) error = %v, wantErr %v", tt.input, err, tt.err)
} else if tt.err != nil && err == nil {
t.Fatal("test case expected to fail on windows")
}
if got != tt.want {
t.Errorf("expandPath(%q) = %v, want %v", tt.input, got, tt.want)
}
})
}
}