diff --git a/api/types.go b/api/types.go
index 249ad0692..a27d9dc13 100644
--- a/api/types.go
+++ b/api/types.go
@@ -1018,7 +1018,7 @@ func (d Duration) MarshalJSON() ([]byte, error) {
if d.Duration < 0 {
return []byte("-1"), nil
}
- return []byte("\"" + d.Duration.String() + "\""), nil
+ return []byte("\"" + d.String() + "\""), nil
}
func (d *Duration) UnmarshalJSON(b []byte) (err error) {
@@ -1045,7 +1045,7 @@ func (d *Duration) UnmarshalJSON(b []byte) (err error) {
d.Duration = time.Duration(math.MaxInt64)
}
default:
- return fmt.Errorf("Unsupported type: '%s'", reflect.TypeOf(v))
+ return fmt.Errorf("unsupported type: '%s'", reflect.TypeOf(v))
}
return nil
diff --git a/app/tools/web_fetch.go b/app/tools/web_fetch.go
index ce019ddea..aa837930c 100644
--- a/app/tools/web_fetch.go
+++ b/app/tools/web_fetch.go
@@ -37,7 +37,7 @@ func (w *WebFetch) Description() string {
return "Crawl and extract text content from web pages"
}
-func (g *WebFetch) Schema() map[string]any {
+func (w *WebFetch) Schema() map[string]any {
schemaBytes := []byte(`{
"type": "object",
"properties": {
diff --git a/app/tools/web_search.go b/app/tools/web_search.go
index 0e79fe332..731d15935 100644
--- a/app/tools/web_search.go
+++ b/app/tools/web_search.go
@@ -46,7 +46,7 @@ func (w *WebSearch) Prompt() string {
return ""
}
-func (g *WebSearch) Schema() map[string]any {
+func (w *WebSearch) Schema() map[string]any {
schemaBytes := []byte(`{
"type": "object",
"properties": {
diff --git a/app/types/not/found.go b/app/types/not/found.go
index 9294e0155..84ebabfe4 100644
--- a/app/types/not/found.go
+++ b/app/types/not/found.go
@@ -19,10 +19,12 @@ import (
// Errors wrapping Found should provide additional context, e.g.
// fmt.Errorf("%w: %s", not.Found, key)
//
+//nolint:staticcheck
//lint:ignore ST1012 This is a sentinel error intended to be read like not.Found.
var Found = errors.New("not found")
// Available is an error that indicates that a value is not available.
//
+//nolint:staticcheck
//lint:ignore ST1012 This is a sentinel error intended to be read like not.Available.
var Available = errors.New("not available")
diff --git a/app/ui/ui.go b/app/ui/ui.go
index 86f26180f..1dafb362c 100644
--- a/app/ui/ui.go
+++ b/app/ui/ui.go
@@ -942,7 +942,7 @@ func (s *Server) chat(w http.ResponseWriter, r *http.Request) error {
} else {
onlyStandalone := true
for _, tc := range res.Message.ToolCalls {
- if !(tc.Function.Name == "web_search" || tc.Function.Name == "web_fetch") {
+ if tc.Function.Name != "web_search" && tc.Function.Name != "web_fetch" {
onlyStandalone = false
break
}
diff --git a/app/updater/updater_test.go b/app/updater/updater_test.go
index dea820c28..f635b3c7b 100644
--- a/app/updater/updater_test.go
+++ b/app/updater/updater_test.go
@@ -22,9 +22,7 @@ func TestIsNewReleaseAvailable(t *testing.T) {
var server *httptest.Server
server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/update.json" {
- w.Write([]byte(
- fmt.Sprintf(`{"version": "9.9.9", "url": "%s"}`,
- server.URL+"/9.9.9/"+Installer)))
+ fmt.Fprintf(w, `{"version": "9.9.9", "url": "%s"}`, server.URL+"/9.9.9/"+Installer)
// TODO - wire up the redirects to mimic real behavior
} else {
slog.Debug("unexpected request", "url", r.URL)
@@ -67,17 +65,16 @@ func TestBackgoundChecker(t *testing.T) {
var server *httptest.Server
server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- if r.URL.Path == "/update.json" {
- w.Write([]byte(
- fmt.Sprintf(`{"version": "9.9.9", "url": "%s"}`,
- server.URL+"/9.9.9/"+Installer)))
+ switch r.URL.Path {
+ case "/update.json":
+ fmt.Fprintf(w, `{"version": "9.9.9", "url": "%s"}`, server.URL+"/9.9.9/"+Installer)
// TODO - wire up the redirects to mimic real behavior
- } else if r.URL.Path == "/9.9.9/"+Installer {
+ case "/9.9.9/" + Installer:
buf := &bytes.Buffer{}
zw := zip.NewWriter(buf)
zw.Close()
io.Copy(w, buf)
- } else {
+ default:
slog.Debug("unexpected request", "url", r.URL)
}
}))
diff --git a/cmd/cmd.go b/cmd/cmd.go
index 079b60411..6b222c024 100644
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -1329,12 +1329,12 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
cancel()
}()
- var state *displayResponseState = &displayResponseState{}
+ var state = &displayResponseState{}
var thinkingContent strings.Builder
var latest api.ChatResponse
var fullResponse strings.Builder
- var thinkTagOpened bool = false
- var thinkTagClosed bool = false
+ var thinkTagOpened = false
+ var thinkTagClosed = false
role := "assistant"
@@ -1462,10 +1462,10 @@ func generate(cmd *cobra.Command, opts runOptions) error {
cancel()
}()
- var state *displayResponseState = &displayResponseState{}
+ var state = &displayResponseState{}
var thinkingContent strings.Builder
- var thinkTagOpened bool = false
- var thinkTagClosed bool = false
+ var thinkTagOpened = false
+ var thinkTagClosed = false
plainText := !term.IsTerminal(int(os.Stdout.Fd()))
@@ -1633,7 +1633,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
return err
}
if err := client.Heartbeat(cmd.Context()); err != nil {
- if !(strings.Contains(err.Error(), " refused") || strings.Contains(err.Error(), "could not connect")) {
+ if !strings.Contains(err.Error(), " refused") && !strings.Contains(err.Error(), "could not connect") {
return err
}
if err := startApp(cmd.Context(), client); err != nil {
diff --git a/cmd/cmd_test.go b/cmd/cmd_test.go
index 241f02b10..b604d8ce6 100644
--- a/cmd/cmd_test.go
+++ b/cmd/cmd_test.go
@@ -307,7 +307,7 @@ func TestDeleteHandler(t *testing.T) {
} else {
w.WriteHeader(http.StatusNotFound)
errPayload := `{"error":"model '%s' not found"}`
- w.Write([]byte(fmt.Sprintf(errPayload, req.Name)))
+ fmt.Fprintf(w, errPayload, req.Name)
}
return
}
diff --git a/cmd/interactive.go b/cmd/interactive.go
index d933d255f..37d3afce5 100644
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -130,7 +130,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
var sb strings.Builder
var multiline MultilineState
- var thinkExplicitlySet bool = opts.Think != nil
+ var thinkExplicitlySet = opts.Think != nil
for {
line, err := scanner.Readline()
diff --git a/convert/convert.go b/convert/convert.go
index f6afd8a32..836d7309b 100644
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -38,10 +38,10 @@ func (ModelParameters) KV(t *Tokenizer) ggml.KV {
"general.file_type": uint32(1),
"general.quantization_version": uint32(2),
"tokenizer.ggml.pre": t.Pre,
- "tokenizer.ggml.model": t.Vocabulary.Model,
- "tokenizer.ggml.tokens": t.Vocabulary.Tokens,
- "tokenizer.ggml.scores": t.Vocabulary.Scores,
- "tokenizer.ggml.token_type": t.Vocabulary.Types,
+ "tokenizer.ggml.model": t.Model,
+ "tokenizer.ggml.tokens": t.Tokens,
+ "tokenizer.ggml.scores": t.Scores,
+ "tokenizer.ggml.token_type": t.Types,
}
if len(t.Merges) > 0 {
@@ -231,20 +231,20 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
switch {
case vocabSize == 0:
- slog.Debug("vocabulary size was not explicitly set by the model", "default size", len(t.Vocabulary.Tokens))
- case vocabSize > len(t.Vocabulary.Tokens):
- slog.Debug("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens))
- for i := range vocabSize - len(t.Vocabulary.Tokens) {
- t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
- t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
- t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
+ slog.Debug("vocabulary size was not explicitly set by the model", "default size", len(t.Tokens))
+ case vocabSize > len(t.Tokens):
+ slog.Debug("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Tokens))
+ for i := range vocabSize - len(t.Tokens) {
+ t.Tokens = append(t.Tokens, fmt.Sprintf("[PAD%d]", i))
+ t.Scores = append(t.Scores, -1)
+ t.Types = append(t.Types, tokenTypeUserDefined)
}
- case vocabSize < len(t.Vocabulary.Tokens):
- slog.Debug("vocabulary is larger than expected", "want", vocabSize, "got", len(t.Vocabulary.Tokens))
- p.VocabSize = uint32(len(t.Vocabulary.Tokens))
- p.TextModel.VocabSize = uint32(len(t.Vocabulary.Tokens))
+ case vocabSize < len(t.Tokens):
+ slog.Debug("vocabulary is larger than expected", "want", vocabSize, "got", len(t.Tokens))
+ p.VocabSize = uint32(len(t.Tokens))
+ p.TextModel.VocabSize = uint32(len(t.Tokens))
default:
- slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
+ slog.Debug("vocabulary", "size", len(t.Tokens))
}
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
diff --git a/convert/convert_qwen2.go b/convert/convert_qwen2.go
index f3347795f..911223e1d 100644
--- a/convert/convert_qwen2.go
+++ b/convert/convert_qwen2.go
@@ -62,7 +62,7 @@ func (q *qwen2Model) Tensors(ts []Tensor) []*ggml.Tensor {
return out
}
-func (p *qwen2Model) Replacements() []string {
+func (q *qwen2Model) Replacements() []string {
return []string{
"lm_head", "output",
"model.embed_tokens", "token_embd",
diff --git a/convert/convert_qwen25vl.go b/convert/convert_qwen25vl.go
index 6e4c96408..23de7550e 100644
--- a/convert/convert_qwen25vl.go
+++ b/convert/convert_qwen25vl.go
@@ -90,9 +90,9 @@ func (q *qwen25VLModel) Tensors(ts []Tensor) []*ggml.Tensor {
return out
}
-func (p *qwen25VLModel) Replacements() []string {
+func (q *qwen25VLModel) Replacements() []string {
return append(
- p.qwen2Model.Replacements(),
+ q.qwen2Model.Replacements(),
"visual", "v",
"blocks", "blk",
"attn.proj", "attn_out",
diff --git a/convert/reader_torch.go b/convert/reader_torch.go
index 7f6d6c872..4ce83fb1b 100644
--- a/convert/reader_torch.go
+++ b/convert/reader_torch.go
@@ -54,6 +54,6 @@ func (t torch) Clone() Tensor {
}
}
-func (pt torch) WriteTo(w io.Writer) (int64, error) {
+func (t torch) WriteTo(w io.Writer) (int64, error) {
return 0, nil
}
diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go
index 6ce9724f2..3d8672bb3 100644
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -300,9 +300,9 @@ func (s Tensors) Items(prefix ...string) []*Tensor {
return items
}
-func (ts Tensors) GroupLayers() map[string]Layer {
+func (s Tensors) GroupLayers() map[string]Layer {
layers := make(map[string]Layer)
- for _, t := range ts.items {
+ for _, t := range s.items {
parts := strings.Split(t.Name, ".")
if index := slices.IndexFunc(parts, func(s string) bool { return s == "blk" || s == "mm" }); index != -1 {
if len(parts) > index+2 {
diff --git a/fs/ggml/type.go b/fs/ggml/type.go
index fb69352b6..8fcf719e2 100644
--- a/fs/ggml/type.go
+++ b/fs/ggml/type.go
@@ -136,8 +136,8 @@ func (t FileType) Value() uint32 {
return uint32(t)
}
-func (ftype FileType) ToTensorType() TensorType {
- switch ftype {
+func (t FileType) ToTensorType() TensorType {
+ switch t {
case FileTypeF32:
return TensorTypeF32
case FileTypeF16:
@@ -177,7 +177,7 @@ func (ftype FileType) ToTensorType() TensorType {
case fileTypeMXFP4:
return TensorTypeMXFP4
default:
- slog.Warn("unsupported file type", "type", ftype)
+ slog.Warn("unsupported file type", "type", t)
return 0 // F32
}
}
diff --git a/fs/gguf/keyvalue.go b/fs/gguf/keyvalue.go
index 5843326c1..e20eb56b6 100644
--- a/fs/gguf/keyvalue.go
+++ b/fs/gguf/keyvalue.go
@@ -11,7 +11,7 @@ type KeyValue struct {
}
func (kv KeyValue) Valid() bool {
- return kv.Key != "" && kv.Value.value != nil
+ return kv.Key != "" && kv.value != nil
}
type Value struct {
diff --git a/harmony/harmonyparser.go b/harmony/harmonyparser.go
index 4f405dc35..902729b14 100644
--- a/harmony/harmonyparser.go
+++ b/harmony/harmonyparser.go
@@ -319,11 +319,12 @@ func (h *HarmonyMessageHandler) AddContent(content string, toolParser *HarmonyTo
}
case HarmonyEventContentEmitted:
logutil.Trace("harmony event content", "content", event.Content, "state", h.state)
- if h.state == harmonyMessageState_Normal {
+ switch h.state {
+ case harmonyMessageState_Normal:
contentSb.WriteString(event.Content)
- } else if h.state == harmonyMessageState_Thinking {
+ case harmonyMessageState_Thinking:
thinkingSb.WriteString(event.Content)
- } else if h.state == harmonyMessageState_ToolCalling {
+ case harmonyMessageState_ToolCalling:
toolContentSb.WriteString(event.Content)
}
case HarmonyEventMessageEnd:
diff --git a/middleware/openai.go b/middleware/openai.go
index b2e43f165..06f5774df 100644
--- a/middleware/openai.go
+++ b/middleware/openai.go
@@ -84,7 +84,7 @@ func (w *ChatWriter) writeResponse(data []byte) (int, error) {
}
w.ResponseWriter.Header().Set("Content-Type", "text/event-stream")
- _, err = w.ResponseWriter.Write([]byte(fmt.Sprintf("data: %s\n\n", d)))
+ _, err = fmt.Fprintf(w.ResponseWriter, "data: %s\n\n", d)
if err != nil {
return 0, err
}
@@ -98,7 +98,7 @@ func (w *ChatWriter) writeResponse(data []byte) (int, error) {
if err != nil {
return 0, err
}
- _, err = w.ResponseWriter.Write([]byte(fmt.Sprintf("data: %s\n\n", d)))
+ _, err = fmt.Fprintf(w.ResponseWriter, "data: %s\n\n", d)
if err != nil {
return 0, err
}
@@ -123,7 +123,7 @@ func (w *ChatWriter) writeResponse(data []byte) (int, error) {
}
func (w *ChatWriter) Write(data []byte) (int, error) {
- code := w.ResponseWriter.Status()
+ code := w.Status()
if code != http.StatusOK {
return w.writeError(data)
}
@@ -150,7 +150,7 @@ func (w *CompleteWriter) writeResponse(data []byte) (int, error) {
}
w.ResponseWriter.Header().Set("Content-Type", "text/event-stream")
- _, err = w.ResponseWriter.Write([]byte(fmt.Sprintf("data: %s\n\n", d)))
+ _, err = fmt.Fprintf(w.ResponseWriter, "data: %s\n\n", d)
if err != nil {
return 0, err
}
@@ -164,7 +164,7 @@ func (w *CompleteWriter) writeResponse(data []byte) (int, error) {
if err != nil {
return 0, err
}
- _, err = w.ResponseWriter.Write([]byte(fmt.Sprintf("data: %s\n\n", d)))
+ _, err = fmt.Fprintf(w.ResponseWriter, "data: %s\n\n", d)
if err != nil {
return 0, err
}
@@ -189,7 +189,7 @@ func (w *CompleteWriter) writeResponse(data []byte) (int, error) {
}
func (w *CompleteWriter) Write(data []byte) (int, error) {
- code := w.ResponseWriter.Status()
+ code := w.Status()
if code != http.StatusOK {
return w.writeError(data)
}
@@ -214,7 +214,7 @@ func (w *ListWriter) writeResponse(data []byte) (int, error) {
}
func (w *ListWriter) Write(data []byte) (int, error) {
- code := w.ResponseWriter.Status()
+ code := w.Status()
if code != http.StatusOK {
return w.writeError(data)
}
@@ -240,7 +240,7 @@ func (w *RetrieveWriter) writeResponse(data []byte) (int, error) {
}
func (w *RetrieveWriter) Write(data []byte) (int, error) {
- code := w.ResponseWriter.Status()
+ code := w.Status()
if code != http.StatusOK {
return w.writeError(data)
}
@@ -265,7 +265,7 @@ func (w *EmbedWriter) writeResponse(data []byte) (int, error) {
}
func (w *EmbedWriter) Write(data []byte) (int, error) {
- code := w.ResponseWriter.Status()
+ code := w.Status()
if code != http.StatusOK {
return w.writeError(data)
}
diff --git a/ml/device.go b/ml/device.go
index a672245b3..7b3398b8d 100644
--- a/ml/device.go
+++ b/ml/device.go
@@ -431,15 +431,15 @@ const (
DuplicateDevice // The same physical device but different library/backend (overlapping device)
)
-func (a DeviceInfo) Compare(b DeviceInfo) DeviceComparison {
- if a.PCIID != b.PCIID {
+func (d DeviceInfo) Compare(b DeviceInfo) DeviceComparison {
+ if d.PCIID != b.PCIID {
return UniqueDevice
}
// If PCIID is empty, we have to use ID + library for uniqueness
- if a.PCIID == "" && a.DeviceID != b.DeviceID {
+ if d.PCIID == "" && d.DeviceID != b.DeviceID {
return UniqueDevice
}
- if a.Library == b.Library {
+ if d.Library == b.Library {
return SameBackendDevice
}
return DuplicateDevice
@@ -447,8 +447,8 @@ func (a DeviceInfo) Compare(b DeviceInfo) DeviceComparison {
// For a SameBackendDevice, return true if b is better than a
// e.g. newer GPU library version
-func (a DeviceInfo) IsBetter(b DeviceInfo) bool {
- aLib := a.LibraryPath[len(a.LibraryPath)-1]
+func (d DeviceInfo) IsBetter(b DeviceInfo) bool {
+ aLib := d.LibraryPath[len(d.LibraryPath)-1]
bLib := b.LibraryPath[len(b.LibraryPath)-1]
if aLib == bLib {
return false
@@ -475,7 +475,7 @@ func FlashAttentionSupported(l []DeviceInfo) bool {
for _, gpu := range l {
supportsFA := gpu.Library == "cpu" ||
gpu.Name == "Metal" || gpu.Library == "Metal" ||
- (gpu.Library == "CUDA" && gpu.DriverMajor >= 7 && !(gpu.ComputeMajor == 7 && gpu.ComputeMinor == 2)) ||
+ (gpu.Library == "CUDA" && gpu.DriverMajor >= 7 && (gpu.ComputeMajor != 7 || gpu.ComputeMinor != 2)) ||
gpu.Library == "ROCm" ||
gpu.Library == "Vulkan"
diff --git a/model/models/gemma2/model.go b/model/models/gemma2/model.go
index 06c71fc3b..fb92ebc93 100644
--- a/model/models/gemma2/model.go
+++ b/model/models/gemma2/model.go
@@ -128,7 +128,7 @@ func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Ten
}
func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
- return fast.RoPE(ctx, key, shift, m.Options.attnKeyLen, m.Options.ropeBase, 1/m.Options.ropeScale, rope.WithTypeNeoX()), nil
+ return fast.RoPE(ctx, key, shift, m.attnKeyLen, m.ropeBase, 1/m.ropeScale, rope.WithTypeNeoX()), nil
}
type MLP struct {
@@ -178,10 +178,10 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))
hiddenState := m.TokenEmbedding.Forward(ctx, batch.Inputs)
- hiddenState = hiddenState.Scale(ctx, math.Sqrt(float64(m.Options.hiddenSize)))
+ hiddenState = hiddenState.Scale(ctx, math.Sqrt(float64(m.hiddenSize)))
if len(m.Layers) == gemma27BLayerCount {
- m.Options.largeModelScaling = true
+ m.largeModelScaling = true
}
for i, layer := range m.Layers {
@@ -202,9 +202,9 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
hiddenState = m.Output.Forward(ctx, hiddenState)
// final logit softcap
- hiddenState = hiddenState.Scale(ctx, 1.0/float64(m.Options.finalLogitSoftcap))
+ hiddenState = hiddenState.Scale(ctx, 1.0/float64(m.finalLogitSoftcap))
hiddenState = hiddenState.Tanh(ctx)
- return hiddenState.Scale(ctx, float64(m.Options.finalLogitSoftcap)), nil
+ return hiddenState.Scale(ctx, float64(m.finalLogitSoftcap)), nil
}
func init() {
diff --git a/model/models/gemma3/model.go b/model/models/gemma3/model.go
index 62f51074a..538264597 100644
--- a/model/models/gemma3/model.go
+++ b/model/models/gemma3/model.go
@@ -96,15 +96,15 @@ func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) ([]input
return nil, err
}
- f32s, err := m.ImageProcessor.ProcessImage(image)
+ f32s, err := m.ProcessImage(image)
if err != nil {
return nil, err
}
pixelValues := ctx.Input().FromFloats(f32s,
- m.ImageProcessor.imageSize,
- m.ImageProcessor.imageSize,
- m.ImageProcessor.numChannels,
+ m.imageSize,
+ m.imageSize,
+ m.numChannels,
)
visionOutputs := m.VisionModel.Forward(ctx, pixelValues)
diff --git a/model/models/gemma3/model_text.go b/model/models/gemma3/model_text.go
index 8d1a1be6a..141687b49 100644
--- a/model/models/gemma3/model_text.go
+++ b/model/models/gemma3/model_text.go
@@ -111,12 +111,12 @@ func (sa *TextSelfAttention) Forward(ctx ml.Context, layer int, hiddenState, pos
}
func (m *TextModel) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
- ropeBase := m.TextConfig.ropeLocalBase
+ ropeBase := m.ropeLocalBase
if (layer+1)%gemmaGlobalCacheCount == 0 {
- ropeBase = m.TextConfig.ropeGlobalBase
+ ropeBase = m.ropeGlobalBase
}
- return fast.RoPE(ctx, key, shift, m.TextConfig.attnKeyLen, ropeBase, 1/m.TextConfig.ropeScale, rope.WithTypeNeoX()), nil
+ return fast.RoPE(ctx, key, shift, m.attnKeyLen, ropeBase, 1/m.ropeScale, rope.WithTypeNeoX()), nil
}
type TextMLP struct {
@@ -166,7 +166,7 @@ func (m *TextModel) Forward(ctx ml.Context, batch input.Batch, cache kvcache.Cac
positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))
hiddenState := m.TokenEmbedding.Forward(ctx, batch.Inputs)
- hiddenState = hiddenState.Scale(ctx, math.Sqrt(float64(m.TextConfig.hiddenSize)))
+ hiddenState = hiddenState.Scale(ctx, math.Sqrt(float64(m.hiddenSize)))
// set image embeddings
var except []int
diff --git a/model/models/mistral3/model.go b/model/models/mistral3/model.go
index e071d71a8..ec8da315d 100644
--- a/model/models/mistral3/model.go
+++ b/model/models/mistral3/model.go
@@ -53,7 +53,7 @@ func New(c fs.Config) (model.Model, error) {
MultiModalProjector: newMultiModalProjector(c),
}
- m.Cache = kvcache.NewCausalCache(m.TextModel.Shift)
+ m.Cache = kvcache.NewCausalCache(m.Shift)
return m, nil
}
@@ -109,12 +109,12 @@ func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) ([]input
return nil, err
}
- f32s, size, err := m.ImageProcessor.ProcessImage(image)
+ f32s, size, err := m.ProcessImage(image)
if err != nil {
return nil, err
}
- pixelValues := ctx.Input().FromFloats(f32s, size.X, size.Y, m.ImageProcessor.numChannels)
+ pixelValues := ctx.Input().FromFloats(f32s, size.X, size.Y, m.numChannels)
visionOutputs := m.VisionModel.Forward(ctx, pixelValues)
features, size := m.MultiModalProjector.Forward(ctx, visionOutputs, size)
diff --git a/model/models/mistral3/model_vision.go b/model/models/mistral3/model_vision.go
index d763df7a0..be725c496 100644
--- a/model/models/mistral3/model_vision.go
+++ b/model/models/mistral3/model_vision.go
@@ -133,7 +133,7 @@ func (m *VisionModel) Forward(ctx ml.Context, pixelValues ml.Tensor) ml.Tensor {
hiddenStates := m.PatchEmbedding.Forward(ctx, pixelValues, m.patchSize, m.patchSize, 0, 0, 1, 1)
hiddenStates = hiddenStates.Reshape(ctx, numPatches, m.hiddenSize)
hiddenStates = hiddenStates.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx)
- hiddenStates = m.EncoderNorm.Forward(ctx, hiddenStates, m.VisionModelOptions.eps)
+ hiddenStates = m.EncoderNorm.Forward(ctx, hiddenStates, m.eps)
// Prepare position IDs for 2D rope
positions := make([]int32, numPatches)
diff --git a/model/models/mllama/model.go b/model/models/mllama/model.go
index 58fd5adcf..f983f3c33 100644
--- a/model/models/mllama/model.go
+++ b/model/models/mllama/model.go
@@ -54,7 +54,7 @@ func New(c fs.Config) (model.Model, error) {
encoderCache := kvcache.NewEncoderCache()
encoderCache.SetConfig(ml.CacheConfig{})
- m.Cache = kvcache.NewWrapperCache(encoderCache, kvcache.NewCausalCache(m.TextModel.Shift))
+ m.Cache = kvcache.NewWrapperCache(encoderCache, kvcache.NewCausalCache(m.Shift))
return &m, nil
}
@@ -69,7 +69,7 @@ func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) ([]input
return nil, err
}
- f32s, ratio, err := m.ImageProcessor.ProcessImage(image)
+ f32s, ratio, err := m.ProcessImage(image)
if err != nil {
return nil, err
}
diff --git a/model/models/qwen25vl/model.go b/model/models/qwen25vl/model.go
index 13fa3fee1..b71eb3db8 100644
--- a/model/models/qwen25vl/model.go
+++ b/model/models/qwen25vl/model.go
@@ -48,7 +48,7 @@ func New(c fs.Config) (model.Model, error) {
ImageProcessor: newImageProcessor(c),
}
- m.Cache = kvcache.NewCausalCache(m.TextModel.Shift)
+ m.Cache = kvcache.NewCausalCache(m.Shift)
return m, nil
}
@@ -59,14 +59,13 @@ func (m *Model) PixelValues(ctx ml.Context, multimodalData []byte) (ml.Tensor, *
return nil, nil, err
}
- f32s, grid, err := m.ImageProcessor.ProcessImage(image)
+ f32s, grid, err := m.ProcessImage(image)
if err != nil {
return nil, nil, err
}
// Calculate tensor dimensions
- patchDim := m.ImageProcessor.numChannels * m.ImageProcessor.temporalPatchSize *
- m.ImageProcessor.patchSize * m.ImageProcessor.patchSize
+ patchDim := m.numChannels * m.temporalPatchSize * m.patchSize * m.patchSize
numPatches := grid.Temporal * grid.Height * grid.Width
pixelValues := ctx.Input().FromFloats(f32s, patchDim, numPatches)
diff --git a/model/models/qwen25vl/model_vision.go b/model/models/qwen25vl/model_vision.go
index 5cbb01f7e..4f85a8a57 100644
--- a/model/models/qwen25vl/model_vision.go
+++ b/model/models/qwen25vl/model_vision.go
@@ -228,7 +228,7 @@ func (m *VisionModel) Forward(ctx ml.Context, pixelValues ml.Tensor, grid *Grid)
cos = cos.Reshape(ctx, cos.Dim(0), 1, cos.Dim(1))
sin = sin.Reshape(ctx, sin.Dim(0), 1, sin.Dim(1))
- mask := blockDiagonalMask(ctx, hiddenStates.Dim(1), bounds, m.VisionModelOptions.numHeads)
+ mask := blockDiagonalMask(ctx, hiddenStates.Dim(1), bounds, m.numHeads)
// Apply encoder layers
for i, layer := range m.Layers {
if slices.Contains(m.fullAttnBlocks, int32(i)) {
diff --git a/model/models/qwen3/model.go b/model/models/qwen3/model.go
index 483439ac4..78c09d567 100644
--- a/model/models/qwen3/model.go
+++ b/model/models/qwen3/model.go
@@ -203,7 +203,7 @@ func (m *Model) forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
}
func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
- return m.Options.applyRotaryPositionEmbeddings(ctx, key, shift), nil
+ return m.applyRotaryPositionEmbeddings(ctx, key, shift), nil
}
var _ model.Model = (*Model)(nil)
diff --git a/model/renderers/qwen3vl.go b/model/renderers/qwen3vl.go
index 8ea4abbbe..b91cb17b5 100644
--- a/model/renderers/qwen3vl.go
+++ b/model/renderers/qwen3vl.go
@@ -98,7 +98,7 @@ func (r *Qwen3VLRenderer) Render(messages []api.Message, tools []api.Tool, _ *ap
if multiStepTool && message.Role == "user" {
// Check if content starts with
def add(return c "); diff != "" { + if diff := cmp.Diff(mock.Prompt, " def add(return c "); diff != "" { t.Errorf("mismatch (-got +want):\n%s", diff) } }) diff --git a/server/routes_generate_test.go b/server/routes_generate_test.go index 0a84267ff..2ef274f49 100644 --- a/server/routes_generate_test.go +++ b/server/routes_generate_test.go @@ -378,7 +378,7 @@ func TestGenerateChat(t *testing.T) { } } - mock.CompletionResponse.Content = "Hi!" + mock.Content = "Hi!" t.Run("messages", func(t *testing.T) { w := createRequest(t, s.ChatHandler, api.ChatRequest{ Model: "test", @@ -392,7 +392,7 @@ func TestGenerateChat(t *testing.T) { t.Errorf("expected status 200, got %d", w.Code) } - if diff := cmp.Diff(mock.CompletionRequest.Prompt, "user: Hello!\n"); diff != "" { + if diff := cmp.Diff(mock.Prompt, "user: Hello!\n"); diff != "" { t.Errorf("mismatch (-got +want):\n%s", diff) } @@ -422,14 +422,14 @@ func TestGenerateChat(t *testing.T) { t.Errorf("expected status 200, got %d", w.Code) } - if diff := cmp.Diff(mock.CompletionRequest.Prompt, "system: You are a helpful assistant.\nuser: Hello!\n"); diff != "" { + if diff := cmp.Diff(mock.Prompt, "system: You are a helpful assistant.\nuser: Hello!\n"); diff != "" { t.Errorf("mismatch (-got +want):\n%s", diff) } checkChatResponse(t, w.Body, "test-system", "Hi!") }) - mock.CompletionResponse.Content = "Abra kadabra!" + mock.Content = "Abra kadabra!" t.Run("messages with system", func(t *testing.T) { w := createRequest(t, s.ChatHandler, api.ChatRequest{ Model: "test-system", @@ -444,7 +444,7 @@ func TestGenerateChat(t *testing.T) { t.Errorf("expected status 200, got %d", w.Code) } - if diff := cmp.Diff(mock.CompletionRequest.Prompt, "system: You can perform magic tricks.\nuser: Hello!\n"); diff != "" { + if diff := cmp.Diff(mock.Prompt, "system: You can perform magic tricks.\nuser: Hello!\n"); diff != "" { t.Errorf("mismatch (-got +want):\n%s", diff) } @@ -467,7 +467,7 @@ func TestGenerateChat(t *testing.T) { t.Errorf("expected status 200, got %d", w.Code) } - if diff := cmp.Diff(mock.CompletionRequest.Prompt, "system: You are a helpful assistant.\nuser: Hello!\nassistant: I can help you with that.\nsystem: You can perform magic tricks.\nuser: Help me write tests.\n"); diff != "" { + if diff := cmp.Diff(mock.Prompt, "system: You are a helpful assistant.\nuser: Hello!\nassistant: I can help you with that.\nsystem: You can perform magic tricks.\nuser: Help me write tests.\n"); diff != "" { t.Errorf("mismatch (-got +want):\n%s", diff) } @@ -985,7 +985,7 @@ func TestGenerate(t *testing.T) { } } - mock.CompletionResponse.Content = "Hi!" + mock.Content = "Hi!" t.Run("prompt", func(t *testing.T) { w := createRequest(t, s.GenerateHandler, api.GenerateRequest{ Model: "test", @@ -997,7 +997,7 @@ func TestGenerate(t *testing.T) { t.Errorf("expected status 200, got %d", w.Code) } - if diff := cmp.Diff(mock.CompletionRequest.Prompt, "User: Hello! "); diff != "" { + if diff := cmp.Diff(mock.Prompt, "User: Hello! "); diff != "" { t.Errorf("mismatch (-got +want):\n%s", diff) } @@ -1025,14 +1025,14 @@ func TestGenerate(t *testing.T) { t.Errorf("expected status 200, got %d", w.Code) } - if diff := cmp.Diff(mock.CompletionRequest.Prompt, "System: You are a helpful assistant. User: Hello! "); diff != "" { + if diff := cmp.Diff(mock.Prompt, "System: You are a helpful assistant. User: Hello! "); diff != "" { t.Errorf("mismatch (-got +want):\n%s", diff) } checkGenerateResponse(t, w.Body, "test-system", "Hi!") }) - mock.CompletionResponse.Content = "Abra kadabra!" + mock.Content = "Abra kadabra!" t.Run("prompt with system", func(t *testing.T) { w := createRequest(t, s.GenerateHandler, api.GenerateRequest{ Model: "test-system", @@ -1045,7 +1045,7 @@ func TestGenerate(t *testing.T) { t.Errorf("expected status 200, got %d", w.Code) } - if diff := cmp.Diff(mock.CompletionRequest.Prompt, "System: You can perform magic tricks. User: Hello! "); diff != "" { + if diff := cmp.Diff(mock.Prompt, "System: You can perform magic tricks. User: Hello! "); diff != "" { t.Errorf("mismatch (-got +want):\n%s", diff) } @@ -1067,7 +1067,7 @@ func TestGenerate(t *testing.T) { t.Errorf("expected status 200, got %d", w.Code) } - if diff := cmp.Diff(mock.CompletionRequest.Prompt, "You can perform magic tricks. ### USER Help me write tests. "); diff != "" { + if diff := cmp.Diff(mock.Prompt, "You can perform magic tricks. ### USER Help me write tests. "); diff != "" { t.Errorf("mismatch (-got +want):\n%s", diff) } @@ -1097,7 +1097,7 @@ func TestGenerate(t *testing.T) { t.Errorf("expected status 200, got %d", w.Code) } - if diff := cmp.Diff(mock.CompletionRequest.Prompt, " def add(return c "); diff != "" { + if diff := cmp.Diff(mock.Prompt, " def add(return c "); diff != "" { t.Errorf("mismatch (-got +want):\n%s", diff) } }) @@ -1112,7 +1112,7 @@ func TestGenerate(t *testing.T) { t.Errorf("expected status 200, got %d", w.Code) } - if diff := cmp.Diff(mock.CompletionRequest.Prompt, "def add("); diff != "" { + if diff := cmp.Diff(mock.Prompt, "def add("); diff != "" { t.Errorf("mismatch (-got +want):\n%s", diff) } }) @@ -1129,7 +1129,7 @@ func TestGenerate(t *testing.T) { t.Errorf("expected status 200, got %d", w.Code) } - if diff := cmp.Diff(mock.CompletionRequest.Prompt, "Help me write tests."); diff != "" { + if diff := cmp.Diff(mock.Prompt, "Help me write tests."); diff != "" { t.Errorf("mismatch (-got +want):\n%s", diff) } }) diff --git a/server/sched.go b/server/sched.go index c5bc6692d..84ca18dd5 100644 --- a/server/sched.go +++ b/server/sched.go @@ -637,7 +637,7 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool } // Don't reload runner if num_gpu=-1 was provided - optsExisting := runner.Options.Runner + optsExisting := runner.Runner optsNew := req.opts.Runner if optsNew.NumGPU < 0 { optsExisting.NumGPU = -1 @@ -745,7 +745,7 @@ func (runner *runnerRef) LogValue() slog.Value { slog.String("model", runner.modelPath), ) if runner.Options != nil { - attrs = append(attrs, slog.Int("num_ctx", runner.Options.NumCtx)) + attrs = append(attrs, slog.Int("num_ctx", runner.NumCtx)) } return slog.GroupValue(attrs...) } diff --git a/server/sched_test.go b/server/sched_test.go index 678be954f..1861e381b 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -666,7 +666,7 @@ func TestSchedNeedsReload(t *testing.T) { req.opts.NumBatch = 1234 resp = runner.needsReload(ctx, req) require.True(t, resp) - req.opts.NumBatch = runner.Options.NumBatch + req.opts.NumBatch = runner.NumBatch llm.pingResp = errors.New("foo") resp = runner.needsReload(ctx, req) require.True(t, resp) diff --git a/template/template.go b/template/template.go index c90190d7a..fe683f7c6 100644 --- a/template/template.go +++ b/template/template.go @@ -155,7 +155,7 @@ func Parse(s string) (*Template, error) { if !slices.Contains(vars, "messages") && !slices.Contains(vars, "response") { // touch up the template and append {{ .Response }} - tmpl.Tree.Root.Nodes = append(tmpl.Tree.Root.Nodes, &response) + tmpl.Root.Nodes = append(tmpl.Root.Nodes, &response) } return &t, nil @@ -238,7 +238,7 @@ func (t *Template) Subtree(fn func(parse.Node) bool) *template.Template { return nil } - if n := walk(t.Tree.Root); n != nil { + if n := walk(t.Root); n != nil { return (&template.Template{ Tree: &parse.Tree{ Root: &parse.ListNode{ @@ -321,7 +321,7 @@ func (t *Template) Execute(w io.Writer, v Values) error { } var cut bool - nodes := deleteNode(t.Template.Root.Copy(), func(n parse.Node) bool { + nodes := deleteNode(t.Root.Copy(), func(n parse.Node) bool { if field, ok := n.(*parse.FieldNode); ok && slices.Contains(field.Ident, "Response") { cut = true return false diff --git a/template/template_test.go b/template/template_test.go index 74388d6ef..3436b075b 100644 --- a/template/template_test.go +++ b/template/template_test.go @@ -54,7 +54,7 @@ func TestNamed(t *testing.T) { t.Fatal(err) } - if tmpl.Tree.Root.String() == "" { + if tmpl.Root.String() == "" { t.Errorf("empty %s template", k) } }) diff --git a/thinking/template.go b/thinking/template.go index 20bd65ec1..798fa34c7 100644 --- a/thinking/template.go +++ b/thinking/template.go @@ -129,6 +129,6 @@ func rangeUsesField(rangeNode *parse.RangeNode, field string) bool { } return true } - templateVisit(rangeNode.BranchNode.Pipe, enterFn, nil) + templateVisit(rangeNode.Pipe, enterFn, nil) return found } diff --git a/tools/template.go b/tools/template.go index e22f06754..7ae990bf4 100644 --- a/tools/template.go +++ b/tools/template.go @@ -20,7 +20,7 @@ func parseTag(tmpl *template.Template) string { return "{" } - tc := findToolCallNode(tmpl.Tree.Root.Nodes) + tc := findToolCallNode(tmpl.Root.Nodes) if tc == nil { return "{" }