Merge pull request #5780 from ollama/mxyng/tools

fix parsing tool calls: break on unexpected eofs
fix parsing tool calls
2024-07-18 12:14:10 -07:00 · 2024-07-18 12:08:11 -07:00 · 2024-07-18 11:44:57 -07:00 · 2024-07-18 11:28:19 -07:00 · 2024-07-18 08:50:23 -07:00 · 2024-07-17 15:35:11 -07:00
18 changed files with 251 additions and 592 deletions
--- a/README.md
+++ b/README.md
@@ -295,6 +295,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
 - [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
 - [AI Studio](https://github.com/MindWorkAI/AI-Studio)
+- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)

 ### Terminal

--- a/api/types.go
+++ b/api/types.go
@@ -101,46 +101,29 @@ type ChatRequest struct {
 	KeepAlive *Duration `json:"keep_alive,omitempty"`

 	// Tools is an optional list of tools the model has access to.
-	Tools []Tool `json:"tools,omitempty"`
+	Tools `json:"tools,omitempty"`

 	// Options lists model-specific options.
 	Options map[string]interface{} `json:"options"`
 }

+type Tools []Tool
+
+func (t Tools) String() string {
+	bts, _ := json.Marshal(t)
+	return string(bts)
+}
+
 // Message is a single message in a chat sequence. The message contains the
 // role ("system", "user", or "assistant"), the content and an optional list
 // of images.
 type Message struct {
 	Role      string      `json:"role"`
-	Content   string      `json:"content,omitempty"`
+	Content   string      `json:"content"`
 	Images    []ImageData `json:"images,omitempty"`
 	ToolCalls []ToolCall  `json:"tool_calls,omitempty"`
 }

-type ToolCall struct {
-	Function struct {
-		Name      string         `json:"name"`
-		Arguments map[string]any `json:"arguments"`
-	} `json:"function"`
-}
-
-type Tool struct {
-	Type     string `json:"type"`
-	Function struct {
-		Name        string `json:"name"`
-		Description string `json:"description"`
-		Parameters  struct {
-			Type       string   `json:"type"`
-			Required   []string `json:"required"`
-			Properties map[string]struct {
-				Type        string   `json:"type"`
-				Description string   `json:"description"`
-				Enum        []string `json:"enum,omitempty"`
-			} `json:"properties"`
-		} `json:"parameters"`
-	} `json:"function"`
-}
-
 func (m *Message) UnmarshalJSON(b []byte) error {
 	type Alias Message
 	var a Alias
@@ -153,6 +136,46 @@ func (m *Message) UnmarshalJSON(b []byte) error {
 	return nil
 }

+type ToolCall struct {
+	Function ToolCallFunction `json:"function"`
+}
+
+type ToolCallFunction struct {
+	Name      string                    `json:"name"`
+	Arguments ToolCallFunctionArguments `json:"arguments"`
+}
+
+type ToolCallFunctionArguments map[string]any
+
+func (t *ToolCallFunctionArguments) String() string {
+	bts, _ := json.Marshal(t)
+	return string(bts)
+}
+
+type Tool struct {
+	Type     string       `json:"type"`
+	Function ToolFunction `json:"function"`
+}
+
+type ToolFunction struct {
+	Name        string `json:"name"`
+	Description string `json:"description"`
+	Parameters  struct {
+		Type       string   `json:"type"`
+		Required   []string `json:"required"`
+		Properties map[string]struct {
+			Type        string   `json:"type"`
+			Description string   `json:"description"`
+			Enum        []string `json:"enum,omitempty"`
+		} `json:"properties"`
+	} `json:"parameters"`
+}
+
+func (t *ToolFunction) String() string {
+	bts, _ := json.Marshal(t)
+	return string(bts)
+}
+
 // ChatResponse is the response returned by [Client.Chat]. Its fields are
 // similar to [GenerateResponse].
 type ChatResponse struct {
@@ -405,9 +428,6 @@ type GenerateResponse struct {
 	// Response is the textual response itself.
 	Response string `json:"response"`

-	// ToolCalls is the list of tools the model wants to call
-	ToolCalls []ToolCall `json:"tool_calls,omitempty"`
-
 	// Done specifies if the response is complete.
 	Done bool `json:"done"`

--- a/docs/openai.md
+++ b/docs/openai.md
@@ -103,10 +103,6 @@ curl http://localhost:11434/v1/chat/completions \
 - [ ] `user`
 - [ ] `n`

-#### Notes
-
- `usage.prompt_tokens` will be 0 for completions where prompt evaluation is cached
-
 ## Models

 Before using a model, pull it locally `ollama pull`:
--- a/llm/ggla.go
+++ b/llm/ggla.go
@@ -36,7 +36,6 @@ type ggla struct {

 	kv      KV
 	tensors []*Tensor
-	offset  int64
 }

 func newGGLA(container *containerGGLA) *ggla {
@@ -51,10 +50,7 @@ func (llm *ggla) KV() KV {
 }

 func (llm *ggla) Tensors() Tensors {
-	return Tensors{
-		Items:  llm.tensors,
-		Offset: llm.offset,
-	}
+	return llm.tensors
 }

 func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -112,38 +112,11 @@ func (kv KV) ChatTemplate() string {
 	return s
 }

-// Tensors type as a slice of pointers to Tensor
-// type Tensors []*Tensor
-
-type Tensors struct {
-	Items  []*Tensor
-	Offset int64
-}
-
-// Implement the Len method
-func (ts Tensors) Len() int {
-	return len(ts.Items)
-}
-
-// Implement the Swap method
-func (ts Tensors) Swap(i, j int) {
-	ts.Items[i], ts.Items[j] = ts.Items[j], ts.Items[i]
-}
-
-// Implement the Less method
-func (ts Tensors) Less(i, j int) bool {
-	var x, y int
-	if n, err := fmt.Sscanf(ts.Items[i].Name, "blk.%d", &x); err != nil || n != 1 {
-		return ts.Items[i].Name < ts.Items[j].Name
-	} else if n, err := fmt.Sscanf(ts.Items[j].Name, "blk.%d", &y); err != nil || n != 1 {
-		return ts.Items[i].Name < ts.Items[j].Name
-	}
-	return x < y
-}
+type Tensors []*Tensor

 func (ts Tensors) Layers() map[string]Layer {
 	layers := make(map[string]Layer)
-	for _, t := range ts.Items {
+	for _, t := range ts {
 		parts := strings.Split(t.Name, ".")
 		if parts[0] == "blk" {
 			// join first and second part, e.g. blk.%d
@@ -481,11 +454,3 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui

 	return
 }
-
-type TensorWriter struct {
-	io.Reader
-}
-
-func (tw TensorWriter) WriteTo(w io.Writer) (int64, error) {
-	return io.Copy(w, tw.Reader)
-}
--- a/llm/gguf.go
+++ b/llm/gguf.go
@@ -6,12 +6,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
-	"log/slog"
-	"slices"
-	"sort"
 	"strings"
-
-	"golang.org/x/exp/maps"
 )

 type containerGGUF struct {
@@ -92,7 +87,6 @@ type gguf struct {

 	kv      KV
 	tensors []*Tensor
-	offset  int64

 	parameters uint64

@@ -115,10 +109,7 @@ func (llm *gguf) KV() KV {
 }

 func (llm *gguf) Tensors() Tensors {
-	return Tensors{
-		Items:  llm.tensors,
-		Offset: llm.offset,
-	}
+	return llm.tensors
 }

 func (llm *gguf) numTensor() uint64 {
@@ -208,13 +199,12 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
 			return fmt.Errorf("failed to read tensor dimensions: %w", err)
 		}

-		shape := []uint64{}
+		shape := [4]uint64{1, 1, 1, 1}
 		for i := 0; uint32(i) < dims; i++ {
-			shapeVal, err := readGGUF[uint64](llm, rs)
+			shape[i], err = readGGUF[uint64](llm, rs)
 			if err != nil {
 				return fmt.Errorf("failed to read tensor shape: %w", err)
 			}
-			shape = append(shape, shapeVal)
 		}

 		kind, err := readGGUF[uint32](llm, rs)
@@ -231,7 +221,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
 			Name:   name,
 			Kind:   kind,
 			Offset: offset,
-			Shape:  shape,
+			Shape:  shape[:],
 		}

 		llm.tensors = append(llm.tensors, &tensor)
@@ -246,14 +236,6 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
 		alignment = 32
 	}

-	offset, err := rs.Seek(0, io.SeekCurrent)
-	if err != nil {
-		return fmt.Errorf("failed to get current offset: %w", err)
-	}
-
-	// align to next 32-byte boundary
-	llm.offset = offset + llm.padding(offset, int64(alignment))
-
 	for _, tensor := range llm.tensors {
 		offset, err := rs.Seek(0, io.SeekCurrent)
 		if err != nil {
@@ -279,12 +261,12 @@ func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
 	return t, err
 }

-func writeGGUF[V any](w io.Writer, t uint32, v V) error {
-	if err := binary.Write(w, binary.LittleEndian, t); err != nil {
+func writeGGUF[V any](llm *gguf, w io.Writer, t uint32, v V) error {
+	if err := binary.Write(w, llm.ByteOrder, t); err != nil {
 		return err
 	}

-	return binary.Write(w, binary.LittleEndian, v)
+	return binary.Write(w, llm.ByteOrder, v)
 }

 func readGGUFV1String(llm *gguf, r io.Reader) (string, error) {
@@ -348,12 +330,12 @@ func readGGUFString(llm *gguf, r io.Reader) (string, error) {
 	return string(buf), nil
 }

-func writeGGUFString(w io.Writer, s string) error {
-	if err := binary.Write(w, binary.LittleEndian, ggufTypeString); err != nil {
+func writeGGUFString(llm *gguf, w io.Writer, s string) error {
+	if err := binary.Write(w, llm.ByteOrder, ggufTypeString); err != nil {
 		return err
 	}

-	if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil {
+	if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
 		return err
 	}

@@ -362,9 +344,8 @@ func writeGGUFString(w io.Writer, s string) error {
 }

 type array struct {
-	size     int
-	values   []any
-	datatype uint32
+	size   int
+	values []any
 }

 func (a *array) MarshalJSON() ([]byte, error) {
@@ -444,7 +425,7 @@ func readGGUFArray(llm *gguf, r io.Reader) (*array, error) {
 		return nil, err
 	}

-	a := &array{size: int(n), datatype: t}
+	a := &array{size: int(n)}
 	if llm.canCollectArray(int(n)) {
 		a.values = make([]any, int(n))
 	}
@@ -495,21 +476,21 @@ func readGGUFArray(llm *gguf, r io.Reader) (*array, error) {
 	return a, nil
 }

-func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
-	if err := binary.Write(w, binary.LittleEndian, ggufTypeArray); err != nil {
+func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error {
+	if err := binary.Write(w, llm.ByteOrder, ggufTypeArray); err != nil {
 		return err
 	}

-	if err := binary.Write(w, binary.LittleEndian, t); err != nil {
+	if err := binary.Write(w, llm.ByteOrder, t); err != nil {
 		return err
 	}

-	if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil {
+	if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
 		return err
 	}

 	for _, e := range s {
-		if err := binary.Write(w, binary.LittleEndian, e); err != nil {
+		if err := binary.Write(w, llm.ByteOrder, e); err != nil {
 			return err
 		}
 	}
@@ -608,19 +589,19 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
 		var err error
 		switch v := v.(type) {
 		case uint32:
-			err = writeGGUF(ws, ggufTypeUint32, v)
+			err = writeGGUF(llm, ws, ggufTypeUint32, v)
 		case float32:
-			err = writeGGUF(ws, ggufTypeFloat32, v)
+			err = writeGGUF(llm, ws, ggufTypeFloat32, v)
 		case bool:
-			err = writeGGUF(ws, ggufTypeBool, v)
+			err = writeGGUF(llm, ws, ggufTypeBool, v)
 		case string:
-			err = writeGGUFString(ws, v)
+			err = writeGGUFString(llm, ws, v)
 		case []int32:
-			err = writeGGUFArray(ws, ggufTypeInt32, v)
+			err = writeGGUFArray(llm, ws, ggufTypeInt32, v)
 		case []uint32:
-			err = writeGGUFArray(ws, ggufTypeUint32, v)
+			err = writeGGUFArray(llm, ws, ggufTypeUint32, v)
 		case []float32:
-			err = writeGGUFArray(ws, ggufTypeFloat32, v)
+			err = writeGGUFArray(llm, ws, ggufTypeFloat32, v)
 		case []string:
 			if err := binary.Write(ws, llm.ByteOrder, ggufTypeArray); err != nil {
 				return err
@@ -653,7 +634,7 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {

 	for k, v := range kvCheck {
 		if !v {
-			return fmt.Errorf("didn't know how to write kv %s", k)
+			return fmt.Errorf("Didn't know how to write kv %s", k)
 		}
 	}

@@ -715,208 +696,3 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
 func (gguf) padding(offset, align int64) int64 {
 	return (align - offset%align) % align
 }
-
-// Reader and WriterTof
-type GGUFWriter struct {
-	KV
-	Tensors
-}
-
-type writeOffset struct {
-	io.Writer
-	offset int
-}
-
-func (wo *writeOffset) Write(p []byte) (int, error) {
-	n, err := wo.Writer.Write(p)
-	wo.offset += n
-	return n, err
-}
-
-var _ io.Reader = (*GGUFWriter)(nil)
-
-var _ io.WriterTo = (*GGUFWriter)(nil)
-
-func (GGUFWriter) Read([]byte) (int, error) {
-	panic("not implemeneted")
-}
-
-func (gguf GGUFWriter) WriteTo(w io.Writer) (int64, error) {
-	wo := &writeOffset{Writer: w}
-
-	if err := binary.Write(wo, binary.LittleEndian, []byte("GGUF")); err != nil {
-		return 0, err
-	}
-
-	if err := binary.Write(wo, binary.LittleEndian, uint32(3)); err != nil {
-		return 0, err
-	}
-
-	if err := binary.Write(wo, binary.LittleEndian, uint64(len(gguf.Tensors.Items))); err != nil {
-		return 0, err
-	}
-
-	if err := binary.Write(wo, binary.LittleEndian, uint64(len(gguf.KV)-1)); err != nil {
-		return 0, err
-	}
-
-	keys := maps.Keys(gguf.KV)
-	slices.Sort(keys)
-
-	for _, key := range keys {
-		switch key {
-		case "general.parameter_count":
-			// don't write general param count as its added in by us
-			continue
-		default:
-			if err := ggufWriteKV(wo, key, gguf.KV[key]); err != nil {
-				return 0, err
-			}
-		}
-	}
-	sort.Sort(gguf.Tensors)
-
-	var s uint64
-	for _, t := range gguf.Tensors.Items {
-		t.Offset = s
-		if err := ggufWriteTensorInfo(wo, t); err != nil {
-			return 0, err
-		}
-		s += t.Size()
-	}
-	tensorOffset := wo.offset
-
-	for _, t := range gguf.Tensors.Items {
-		if err := ggufWriteTensor(wo, t, wo.offset); err != nil {
-			return 0, err
-		}
-	}
-
-	return int64(tensorOffset), nil
-}
-
-func ggufWriteTensorInfo(ws io.Writer, t *Tensor) error {
-	if err := binary.Write(ws, binary.LittleEndian, uint64(len(t.Name))); err != nil {
-		return err
-	}
-
-	if err := binary.Write(ws, binary.LittleEndian, []byte(t.Name)); err != nil {
-		return err
-	}
-
-	if err := binary.Write(ws, binary.LittleEndian, uint32(len(t.Shape))); err != nil {
-		return err
-	}
-
-	for i := range len(t.Shape) {
-		if err := binary.Write(ws, binary.LittleEndian, t.Shape[len(t.Shape)-i-1]); err != nil {
-			return err
-		}
-	}
-
-	if err := binary.Write(ws, binary.LittleEndian, t.Kind); err != nil {
-		return err
-	}
-
-	return binary.Write(ws, binary.LittleEndian, t.Offset)
-}
-
-func ggufWriteTensor(ws io.Writer, t *Tensor, offset int) error {
-	slog.Debug(t.Name, "kind", t.Kind, "shape", t.Shape, "offset", t.Offset)
-	if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(int64(offset), 32)))); err != nil {
-		return err
-	}
-
-	_, err := t.WriteTo(ws)
-	return err
-}
-
-func ggufWriteKV(ws io.Writer, k string, v any) error {
-	slog.Debug(k, "type", fmt.Sprintf("%T", v))
-	if err := binary.Write(ws, binary.LittleEndian, uint64(len(k))); err != nil {
-		return err
-	}
-
-	if err := binary.Write(ws, binary.LittleEndian, []byte(k)); err != nil {
-		return err
-	}
-
-	var err error
-	switch v := v.(type) {
-	case uint32:
-		err = writeGGUF(ws, ggufTypeUint32, v)
-	case float32:
-		err = writeGGUF(ws, ggufTypeFloat32, v)
-	case bool:
-		err = writeGGUF(ws, ggufTypeBool, v)
-	case string:
-		err = writeGGUFString(ws, v)
-	case []int32:
-		err = writeGGUFArray(ws, ggufTypeInt32, v)
-	case []uint32:
-		err = writeGGUFArray(ws, ggufTypeUint32, v)
-	case []float32:
-		err = writeGGUFArray(ws, ggufTypeFloat32, v)
-	case []string:
-		if err := binary.Write(ws, binary.LittleEndian, ggufTypeArray); err != nil {
-			return err
-		}
-
-		if err := binary.Write(ws, binary.LittleEndian, ggufTypeString); err != nil {
-			return err
-		}
-
-		if err := binary.Write(ws, binary.LittleEndian, uint64(len(v))); err != nil {
-			return err
-		}
-
-		for _, e := range v {
-			if err := binary.Write(ws, binary.LittleEndian, uint64(len(e))); err != nil {
-				return err
-			}
-
-			if err := binary.Write(ws, binary.LittleEndian, []byte(e)); err != nil {
-				return err
-			}
-		}
-	case *array:
-		if v.size > 0 {
-			switch v.values[0].(type) {
-			case string:
-				if err := binary.Write(ws, binary.LittleEndian, ggufTypeArray); err != nil {
-					return err
-				}
-
-				if err := binary.Write(ws, binary.LittleEndian, ggufTypeString); err != nil {
-					return err
-				}
-
-				if err := binary.Write(ws, binary.LittleEndian, uint64(v.size)); err != nil {
-					return err
-				}
-
-				for _, e := range v.values {
-					if err := binary.Write(ws, binary.LittleEndian, uint64(len(e.(string)))); err != nil {
-						return err
-					}
-
-					if err := binary.Write(ws, binary.LittleEndian, []byte(e.(string))); err != nil {
-						return err
-					}
-				}
-			default:
-				err = writeGGUFArray(ws, v.datatype, v.values)
-			}
-		}
-
-	default:
-		return fmt.Errorf("improper type for '%s'", k)
-	}
-
-	return err
-}
-
-func ggufPadding(offset, align int64) int64 {
-	// we mod twice in the case offset%align = 0
-	return (align - offset%align) % align
-}
--- a/llm/gguf_test.go
+++ b/llm/gguf_test.go
@@ -1,187 +0,0 @@
-package llm
-
-import (
-	"crypto/sha256"
-	"fmt"
-	"io"
-	"math"
-	"os"
-	"path/filepath"
-	"testing"
-
-	"github.com/google/go-cmp/cmp"
-)
-
-// TestGGUFDecode tests the decoding and rewriting of (unsorted) GGUF files
-// To run, add GGUF files to /llm/testdata and add the name of the file to the tests slice
-// This creates a temporary file in /llm/testdata that will deleted only if the test passes
-// Note: map[Tensor.Name + " offset"] is commented since sorting will reorder the tensors
-// Comment out sort.Sort(gguf.Tensors) in gguf.go to test offsets
-func TestGGUFRewrite(t *testing.T) {
-	tests := []string{
-		"phi3.gguf",
-	}
-
-	for i := range tests {
-		tt := tests[i]
-		t.Run(tt, func(t *testing.T) {
-			t.Parallel()
-			p := filepath.Join("testdata", tt)
-
-			if _, err := os.Stat(p); err != nil {
-				t.Skip("file not found", p)
-			}
-
-			wantFile, err := os.Open(p)
-			if err != nil {
-				t.Fatal(err)
-			}
-			defer wantFile.Close()
-
-			// decode original gguf
-			_, wantGGML, err := decodeGGML(t, wantFile)
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			gotFile, err := os.CreateTemp("testdata", tt)
-			if err != nil {
-				t.Fatal(err)
-			}
-			defer func() {
-				gotFile.Close()
-				if !t.Failed() {
-					os.Remove(gotFile.Name())
-				}
-			}()
-
-			_, gotGGML, err := rewriteGGML(t, wantGGML, gotFile, wantFile)
-
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			diff, diff2 := compareGGML(t, gotGGML, wantGGML, gotFile, wantFile) 
-			if cmp.Diff(diff, diff2) != "" {
-				t.Fatalf("diff: \n%s", cmp.Diff(diff, diff2))
-			}
-		})
-	}
-}
-
-func compareGGML(t *testing.T, gotGGML, wantGGML *GGML, f *os.File, f2 *os.File) (map[string]string, map[string]string) {
-	got := make(map[string]string)
-	want := make(map[string]string)
-
-	gotKV := gotGGML.KV()
-	wantKV := wantGGML.KV()
-
-	if len(gotKV) != len(wantKV) {
-		t.Fatalf("got length: %d != want length: %d", len(gotKV), len(wantKV))
-	}
-
-	for k, v := range gotKV {
-		switch t := v.(type) {
-		case *array:
-			if diffy := cmp.Diff(t.values, wantKV[k].(*array).values); diffy != "" {
-				got[k] = diffy
-			}
-		default:
-			if v != wantKV[k] {
-				got[k] = fmt.Sprintf("kv1: %v, kv2: %v", v, want[k])
-			}
-		}
-	}
-
-	gotTensors := gotGGML.Tensors().Items
-	gotOffset := gotGGML.Tensors().Offset
-	wantTensors := wantGGML.Tensors().Items
-	wantOffset := wantGGML.Tensors().Offset
-
-	if len(gotTensors) != len(wantTensors) {
-		got["lenTensors"] = fmt.Sprintf("t1: %d, t2: %d", len(gotTensors), len(wantTensors))
-	}
-
-	for _, tensor := range gotTensors {
-		sha256sum := sha256.New()
-		sr := io.NewSectionReader(f, gotOffset+int64(tensor.Offset), int64(tensor.Size()))
-		var s int64
-		s, err := io.Copy(sha256sum, sr)
-		if err != nil {
-			t.Fatalf("error: %v", err)
-		}
-
-		got[tensor.Name] = fmt.Sprintf("%x", sha256sum.Sum(nil))
-		got[tensor.Name+" size"] = fmt.Sprintf("%d", s)
-		// got[tensor.Name+" offset"] = fmt.Sprintf("%v", tensor.Offset)
-	}
-
-	for _, tensor := range wantTensors {
-		sha256sum := sha256.New()
-		var s int64
-		sr := io.NewSectionReader(f2, wantOffset +int64(tensor.Offset), int64(tensor.Size()))
-		s, err := io.Copy(sha256sum, sr)
-		if err != nil {
-			t.Fatalf("error: %v", err)
-		}
-
-		want[tensor.Name] = fmt.Sprintf("%x", sha256sum.Sum(nil))
-		want[tensor.Name+" size"] = fmt.Sprintf("%d", s)
-		// want[tensor.Name+" offset"] = fmt.Sprintf("%v", tensor.Offset)
-	}
-	return got, want
-}
-
-func decodeGGML(t *testing.T, f *os.File) (int64, *GGML, error) {
-	ggml, n, err := DecodeGGML(f, math.MaxInt)
-	if err != nil {
-		t.Fatal(err)
-	}
-	return n, ggml, nil
-}
-
-func rewriteGGML(t *testing.T, ggml *GGML, gotFile *os.File, wantFile *os.File) (int64, *GGML, error) {
-	var tensors []*Tensor
-
-	for _, tensor := range ggml.Tensors().Items {
-		shape := make([]uint64, len(tensor.Shape))
-		for i := range len(tensor.Shape) {
-			shape[i] = tensor.Shape[len(tensor.Shape)-i-1]
-		}
-
-		tensors = append(tensors, &Tensor{
-			Name:  tensor.Name,
-			Kind:  tensor.Kind,
-			Shape: shape,
-
-			WriterTo: TensorWriter{
-				Reader: io.NewSectionReader(wantFile, ggml.Tensors().Offset+int64(tensor.Offset), int64(tensor.Size())),
-			},
-		})
-	}
-
-	reader := &GGUFWriter{
-		KV: ggml.KV(),
-		Tensors: Tensors{
-			Items:  tensors,
-			Offset: ggml.Tensors().Offset,
-		},
-	}
-
-	n, err := io.Copy(gotFile, reader)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	file, err := os.Open(gotFile.Name())
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	ggml2, _, err := DecodeGGML(file, math.MaxInt)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	return n, ggml2, nil
-}
--- a/openai/openai.go
+++ b/openai/openai.go
@@ -7,6 +7,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"log/slog"
 	"math/rand"
 	"net/http"
 	"strings"
@@ -29,8 +30,9 @@ type ErrorResponse struct {
 }

 type Message struct {
-	Role    string `json:"role"`
-	Content any    `json:"content"`
+	Role      string     `json:"role"`
+	Content   any        `json:"content"`
+	ToolCalls []ToolCall `json:"tool_calls,omitempty"`
 }

 type Choice struct {
@@ -78,6 +80,7 @@ type ChatCompletionRequest struct {
 	PresencePenalty  *float64        `json:"presence_penalty_penalty"`
 	TopP             *float64        `json:"top_p"`
 	ResponseFormat   *ResponseFormat `json:"response_format"`
+	Tools            []api.Tool      `json:"tools"`
 }

 type ChatCompletion struct {
@@ -111,6 +114,7 @@ type CompletionRequest struct {
 	Stream           bool     `json:"stream"`
 	Temperature      *float32 `json:"temperature"`
 	TopP             float32  `json:"top_p"`
+	Suffix           string   `json:"suffix"`
 }

 type Completion struct {
@@ -132,6 +136,15 @@ type CompletionChunk struct {
 	SystemFingerprint string                `json:"system_fingerprint"`
 }

+type ToolCall struct {
+	ID       string `json:"id"`
+	Type     string `json:"type"`
+	Function struct {
+		Name      string `json:"name"`
+		Arguments string `json:"arguments"`
+	} `json:"function"`
+}
+
 type Model struct {
 	Id      string `json:"id"`
 	Object  string `json:"object"`
@@ -170,7 +183,31 @@ func NewError(code int, message string) ErrorResponse {
 	return ErrorResponse{Error{Type: etype, Message: message}}
 }

+func toolCallId() string {
+	const letterBytes = "abcdefghijklmnopqrstuvwxyz0123456789"
+	b := make([]byte, 8)
+	for i := range b {
+		b[i] = letterBytes[rand.Intn(len(letterBytes))]
+	}
+	return "call_" + strings.ToLower(string(b))
+}
+
 func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
+	toolCalls := make([]ToolCall, len(r.Message.ToolCalls))
+	for i, tc := range r.Message.ToolCalls {
+		toolCalls[i].ID = toolCallId()
+		toolCalls[i].Type = "function"
+		toolCalls[i].Function.Name = tc.Function.Name
+
+		args, err := json.Marshal(tc.Function.Arguments)
+		if err != nil {
+			slog.Error("could not marshall function arguments to json", "error", err)
+			continue
+		}
+
+		toolCalls[i].Function.Arguments = string(args)
+	}
+
 	return ChatCompletion{
 		Id:                id,
 		Object:            "chat.completion",
@@ -179,7 +216,7 @@ func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
 		SystemFingerprint: "fp_ollama",
 		Choices: []Choice{{
 			Index:   0,
-			Message: Message{Role: r.Message.Role, Content: r.Message.Content},
+			Message: Message{Role: r.Message.Role, Content: r.Message.Content, ToolCalls: toolCalls},
 			FinishReason: func(reason string) *string {
 				if len(reason) > 0 {
 					return &reason
@@ -188,7 +225,6 @@ func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
 			}(r.DoneReason),
 		}},
 		Usage: Usage{
-			// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count
 			PromptTokens:     r.PromptEvalCount,
 			CompletionTokens: r.EvalCount,
 			TotalTokens:      r.PromptEvalCount + r.EvalCount,
@@ -234,7 +270,6 @@ func toCompletion(id string, r api.GenerateResponse) Completion {
 			}(r.DoneReason),
 		}},
 		Usage: Usage{
-			// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count
 			PromptTokens:     r.PromptEvalCount,
 			CompletionTokens: r.EvalCount,
 			TotalTokens:      r.PromptEvalCount + r.EvalCount,
@@ -367,7 +402,19 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
 			}
 			messages = append(messages, message)
 		default:
-			return nil, fmt.Errorf("invalid message content type: %T", content)
+			if msg.ToolCalls == nil {
+				return nil, fmt.Errorf("invalid message content type: %T", content)
+			}
+
+			toolCalls := make([]api.ToolCall, len(msg.ToolCalls))
+			for i, tc := range msg.ToolCalls {
+				toolCalls[i].Function.Name = tc.Function.Name
+				err := json.Unmarshal([]byte(tc.Function.Arguments), &toolCalls[i].Function.Arguments)
+				if err != nil {
+					return nil, fmt.Errorf("invalid tool call arguments")
+				}
+			}
+			messages = append(messages, api.Message{Role: msg.Role, ToolCalls: toolCalls})
 		}
 	}

@@ -425,6 +472,7 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
 		Format:   format,
 		Options:  options,
 		Stream:   &r.Stream,
+		Tools:    r.Tools,
 	}, nil
 }

@@ -475,6 +523,7 @@ func fromCompleteRequest(r CompletionRequest) (api.GenerateRequest, error) {
 		Prompt:  r.Prompt,
 		Options: options,
 		Stream:  &r.Stream,
+		Suffix:  r.Suffix,
 	}, nil
 }

--- a/openai/openai_test.go
+++ b/openai/openai_test.go
@@ -85,6 +85,7 @@ func TestMiddlewareRequests(t *testing.T) {
 					Prompt:      "Hello",
 					Temperature: &temp,
 					Stop:        []string{"\n", "stop"},
+					Suffix:      "suffix",
 				}

 				bodyBytes, _ := json.Marshal(body)
@@ -115,6 +116,10 @@ func TestMiddlewareRequests(t *testing.T) {
 				if stopTokens[0] != "\n" || stopTokens[1] != "stop" {
 					t.Fatalf("expected ['\\n', 'stop'], got %v", stopTokens)
 				}
+
+				if genReq.Suffix != "suffix" {
+					t.Fatalf("expected 'suffix', got %s", genReq.Suffix)
+				}
 			},
 		},
 		{
--- a/server/model.go
+++ b/server/model.go
@@ -13,7 +13,6 @@ import (
 	"os"
 	"path/filepath"
 	"slices"
-	"sort"
 	"strings"
 	"text/template/parse"

@@ -232,7 +231,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap

 	var offset int64
 	for offset < stat.Size() {
-		ggml, n, err := llm.DecodeGGML(file, -1)
+		ggml, n, err := llm.DecodeGGML(file, 0)
 		if errors.Is(err, io.EOF) {
 			break
 		} else if err != nil {
@@ -246,39 +245,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
 			mediatype = "application/vnd.ollama.image.projector"
 		}

-		var reader io.Reader = io.NewSectionReader(file, offset, n)
-		if !sort.IsSorted(ggml.Tensors()) {
-			// create a new Tensors containing Tensors that have a writeTo
-			var tensors []*llm.Tensor
-			ggmlTensors := ggml.Tensors()
-
-			for _, tensor := range ggmlTensors.Items {
-				shape := make([]uint64, len(tensor.Shape))
-				for i := range len(tensor.Shape) {
-					shape[i] = tensor.Shape[len(tensor.Shape)-i-1]
-				}
-
-				tensors = append(tensors, &llm.Tensor{
-					Name:  tensor.Name,
-					Kind:  tensor.Kind,
-					Shape: shape,
-
-					WriterTo: &llm.TensorWriter{
-						Reader: io.NewSectionReader(file, offset+ggmlTensors.Offset+int64(tensor.Offset), int64(tensor.Size())),
-					},
-				})
-			}
-
-			reader = &llm.GGUFWriter{
-				KV: ggml.KV(),
-				Tensors: llm.Tensors{
-					Items:  tensors,
-					Offset: ggmlTensors.Offset,
-				},
-			}
-		}
-
-		layer, err := NewLayer(reader, mediatype)
+		layer, err := NewLayer(io.NewSectionReader(file, offset, n), mediatype)
 		if err != nil {
 			return nil, err
 		}
@@ -344,12 +311,14 @@ func (m *Model) parseToolCalls(s string) ([]api.ToolCall, bool) {
 	}

 	var b bytes.Buffer
-	if err := tmpl.Execute(&b, map[string][]map[string]any{
+	if err := tmpl.Execute(&b, map[string][]api.ToolCall{
 		"ToolCalls": {
 			{
-				"Function": map[string]any{
-					"Name":      "@@name@@",
-					"Arguments": "@@arguments@@",
+				Function: api.ToolCallFunction{
+					Name: "@@name@@",
+					Arguments: api.ToolCallFunctionArguments{
+						"@@argument@@": 1,
+					},
 				},
 			},
 		},
@@ -357,7 +326,7 @@ func (m *Model) parseToolCalls(s string) ([]api.ToolCall, bool) {
 		return nil, false
 	}

-	var kv map[string]string
+	var kv map[string]any
 	// execute the subtree with placeholders to identify the keys
 	// trim any commands that might exist in the template
 	if err := json.Unmarshal(bytes.TrimSuffix(b.Bytes(), []byte(",")), &kv); err != nil {
@@ -367,17 +336,19 @@ func (m *Model) parseToolCalls(s string) ([]api.ToolCall, bool) {
 	// find the keys that correspond to the name and arguments fields
 	var name, arguments string
 	for k, v := range kv {
-		switch v {
-		case "@@name@@":
+		switch v.(type) {
+		case string:
 			name = k
-		case "@@arguments@@":
+		case map[string]any:
 			arguments = k
 		}
 	}

 	var objs []map[string]any
 	for offset := 0; offset < len(s); {
-		if err := json.NewDecoder(strings.NewReader(s[offset:])).Decode(&objs); errors.Is(err, io.EOF) {
+		var obj map[string]any
+		decoder := json.NewDecoder(strings.NewReader(s[offset:]))
+		if err := decoder.Decode(&obj); errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
 			break
 		} else if syntax := &(json.SyntaxError{}); errors.As(err, &syntax) {
 			// skip over any syntax errors
@@ -386,10 +357,11 @@ func (m *Model) parseToolCalls(s string) ([]api.ToolCall, bool) {
 			// skip over any unmarshalable types
 			offset += int(unmarshalType.Offset)
 		} else if err != nil {
+			slog.Error("parseToolCalls", "error", err)
 			return nil, false
 		} else {
-			// break when an object is decoded
-			break
+			offset += int(decoder.InputOffset())
+			objs = append(objs, obj)
 		}
 	}

--- a/server/model_test.go
+++ b/server/model_test.go
@@ -115,11 +115,6 @@ func TestExtractFromZipFile(t *testing.T) {
 	}
 }

-type function struct {
-	Name      string         `json:"name"`
-	Arguments map[string]any `json:"arguments"`
-}
-
 func readFile(t *testing.T, base, name string) *bytes.Buffer {
 	t.Helper()

@@ -167,6 +162,10 @@ The temperature in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.`,
 		{"command-r-plus", " The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.", false},
 		{"firefunction", ` functools[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`, true},
 		{"firefunction", " The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.", false},
+		{"llama3-groq-tool-use", `<tool_call>
+{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}
+{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}
+</tool_call>`, true},
 	}

 	var tools []api.Tool
@@ -181,18 +180,18 @@ The temperature in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.`,

 	calls := []api.ToolCall{
 		{
-			Function: function{
+			Function: api.ToolCallFunction{
 				Name: "get_current_weather",
-				Arguments: map[string]any{
+				Arguments: api.ToolCallFunctionArguments{
 					"format":   "fahrenheit",
 					"location": "San Francisco, CA",
 				},
 			},
 		},
 		{
-			Function: function{
+			Function: api.ToolCallFunction{
 				Name: "get_current_weather",
-				Arguments: map[string]any{
+				Arguments: api.ToolCallFunctionArguments{
 					"format":   "celsius",
 					"location": "Toronto, Canada",
 				},
--- a/server/routes.go
+++ b/server/routes.go
@@ -275,11 +275,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 		}

 		r.Response = sb.String()
-		if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
-			r.ToolCalls = toolCalls
-			r.Response = ""
-		}
-
 		c.JSON(http.StatusOK, r)
 		return
 	}
@@ -1295,7 +1290,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
 	}

 	caps := []Capability{CapabilityCompletion}
-	if req.Tools != nil {
+	if len(req.Tools) > 0 {
 		caps = append(caps, CapabilityTools)
 	}

@@ -1390,9 +1385,12 @@ func (s *Server) ChatHandler(c *gin.Context) {
 		}

 		resp.Message.Content = sb.String()
-		if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
-			resp.Message.ToolCalls = toolCalls
-			resp.Message.Content = ""
+
+		if len(req.Tools) > 0 {
+			if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
+				resp.Message.ToolCalls = toolCalls
+				resp.Message.Content = ""
+			}
 		}

 		c.JSON(http.StatusOK, resp)
--- a/server/testdata/tools/command-r-plus.gotmpl
+++ b/server/testdata/tools/command-r-plus.gotmpl
@@ -46,7 +46,7 @@ Action: ```json
 {{- range .ToolCalls }}
    {
        "tool_name": "{{ .Function.Name }}",
-        "parameters": {{ json .Function.Arguments }}
+        "parameters": {{ .Function.Arguments }}
    }
 {{- end }}
 ]```
--- a/server/testdata/tools/firefunction.gotmpl
+++ b/server/testdata/tools/firefunction.gotmpl
@@ -17,7 +17,7 @@ If you decide to call functions:

 Available functions as JSON spec:
 {{- if .Tools }}
-{{ json .Tools }}
+{{ .Tools }}
 {{- end }}<|eot_id|>
 {{- end }}
 {{- range .Messages }}<|start_header_id|>
@@ -25,7 +25,7 @@ Available functions as JSON spec:
 {{- end }}<|end_header_id|>
 {{- if .Content }}{{ .Content }}
 {{- else if .ToolCalls }} functools[
-{{- range .ToolCalls }}{{ "{" }}"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}{{ "}" }}
+{{- range .ToolCalls }}{{ "{" }}"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}{{ "}" }}
 {{- end }}]
 {{- end }}<|eot_id|>
 {{- end }}<|start_header_id|>assistant<|end_header_id|>
--- a/server/testdata/tools/llama3-groq-tool-use.gotmpl
+++ b/server/testdata/tools/llama3-groq-tool-use.gotmpl
@@ -0,0 +1,43 @@
+{{- if .Messages }}
+{{- if or .System .Tools }}<|start_header_id|>system<|end_header_id|>
+
+{{ .System }}
+{{- if .Tools }} You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
+<tool_call>
+{"name": <function-name>,"arguments": <args-dict>}
+</tool_call>
+
+Here are the available tools:
+<tools>
+{{- range .Tools }} {{ .Function }}
+{{- end }} </tools>
+{{- end }}
+{{- end }}<|eot_id|>
+{{- range .Messages }}
+{{- if ne .Role "system" }}<|start_header_id|>{{ .Role }}<|end_header_id|>
+
+{{ if eq .Role "user" }}{{ .Content }}
+{{- else if eq .Role "assistant" }}
+{{- if .Content }}{{ .Content }}
+{{- else if .ToolCalls }}<tool_call>
+{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
+{{- end }}
+</tool_call>
+{{- end }}
+{{- else if eq .Role "tool" }}<tool_response>
+{{ .Content }}
+</tool_response>
+{{- end }}<|eot_id|>
+{{- end }}
+{{- end }}<|start_header_id|>assistant<|end_header_id|>
+
+{{ else }}
+{{ if .System }}<|start_header_id|>system<|end_header_id|>
+
+{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
+
+{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
+
+{{ end }}{{ .Response }}
+{{- if .Response }}<|eot_id|>
+{{- end }}
--- a/server/testdata/tools/llama3-groq-tool-use.out
+++ b/server/testdata/tools/llama3-groq-tool-use.out
@@ -0,0 +1,24 @@
+<|start_header_id|>system<|end_header_id|>
+
+You are a knowledgable assistant. You can answer questions and perform tasks. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
+<tool_call>
+{"name": <function-name>,"arguments": <args-dict>}
+</tool_call>
+
+Here are the available tools:
+<tools> {"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the users location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}} </tools><|eot_id|><|start_header_id|>user<|end_header_id|>
+
+What's the weather like today in Paris?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+<tool_call>
+{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}
+</tool_call><|eot_id|><|start_header_id|>tool<|end_header_id|>
+
+<tool_response>
+22
+</tool_response><|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+The current temperature in Paris, France is 22 degrees Celsius.<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+What's the weather like today in San Francisco and Toronto?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
--- a/server/testdata/tools/mistral.gotmpl
+++ b/server/testdata/tools/mistral.gotmpl
@@ -1,13 +1,13 @@
 {{- range $index, $_ := .Messages }}
 {{- if eq .Role "user" }}
-{{- if and (eq (len (slice $.Messages $index)) 1) $.Tools }}[AVAILABLE_TOOLS] {{ json $.Tools }}[/AVAILABLE_TOOLS]
+{{- if and (eq (len (slice $.Messages $index)) 1) $.Tools }}[AVAILABLE_TOOLS] {{ $.Tools }}[/AVAILABLE_TOOLS]
 {{- end }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}

 {{ end }}{{ .Content }}[/INST]
 {{- else if eq .Role "assistant" }}
 {{- if .Content }} {{ .Content }}</s>
 {{- else if .ToolCalls }}[TOOL_CALLS] [
-{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}}
+{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
 {{- end }}]</s>
 {{- end }}
 {{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}}[/TOOL_RESULTS]
--- a/template/template.go
+++ b/template/template.go
@@ -150,9 +150,9 @@ func (t *Template) Vars() []string {

 type Values struct {
 	Messages []api.Message
-	Tools    []api.Tool
-	Prompt   string
-	Suffix   string
+	api.Tools
+	Prompt string
+	Suffix string

 	// forceLegacy is a flag used to test compatibility with legacy templates
 	forceLegacy bool
@@ -217,6 +217,7 @@ func (t *Template) Execute(w io.Writer, v Values) error {
 			"System":   system,
 			"Messages": messages,
 			"Tools":    v.Tools,
+			"Response": "",
 		})
 	}

@@ -270,8 +271,9 @@ func (t *Template) Execute(w io.Writer, v Values) error {

 	tree := parse.Tree{Root: nodes.(*parse.ListNode)}
 	if err := template.Must(template.New("").AddParseTree("", &tree)).Execute(&b, map[string]any{
-		"System": system,
-		"Prompt": prompt,
+		"System":   system,
+		"Prompt":   prompt,
+		"Response": "",
 	}); err != nil {
 		return err
 	}
Author	SHA1	Message	Date
Michael Yang	d0634b1596	Merge pull request #5780 from ollama/mxyng/tools fix parsing tool calls: break on unexpected eofs	2024-07-18 12:14:10 -07:00
Michael Yang	43606d6d6a	fix parsing tool calls	2024-07-18 12:08:11 -07:00
Jeffrey Morgan	70b1010fa5	server: check for empty tools array too (#5779 )	2024-07-18 11:44:57 -07:00
Jeffrey Morgan	84e5721f3a	always provide content even if empty (#5778 )	2024-07-18 11:28:19 -07:00
Jeffrey Morgan	319fb1ce03	server: only parse tool calls if tools are provided (#5771 ) * server: only parse tool calls if tools are provided * still set `resp.Message.Content`	2024-07-18 08:50:23 -07:00
Michael Yang	b255445557	marshal json automatically for some template values (#5758 )	2024-07-17 15:35:11 -07:00
Michael Yang	b23424bb3c	Merge pull request #5753 from ollama/mxyng/parse-tool-call parse tool call as individual objects	2024-07-17 11:47:53 -07:00
Michael Yang	5fd6988126	parse tool call as individual objects	2024-07-17 11:19:04 -07:00
Michael Yang	5b82960df8	stub response (#5750 )	2024-07-17 10:39:22 -07:00
Michael Yang	cc9a252d8c	Merge pull request #5732 from ollama/mxyng/cleanup remove ToolCall from GenerateResponse	2024-07-17 10:26:54 -07:00
Pákozdi György	d281a6e603	add sidellama link (#5702 )	2024-07-17 10:24:44 -07:00
royjhan	154f6f45d4	OpenAI: Support Tools (#5614 ) * reopen pr * tools * remove tc from stream for now * ID and Function * openai expects arguments to be a string (#5739) * mutually exclusive content and tool calls * clean up --------- Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>	2024-07-16 20:52:59 -07:00
royjhan	0d41623b52	OpenAI: Add Suffix to `v1/completions` (#5611 ) * add suffix * remove todo * remove TODO * add to test * rm outdated prompt tokens info md * fix test * fix test	2024-07-16 20:50:14 -07:00
Michael Yang	c279f96371	remove ToolCall from GenerateResponse	2024-07-16 15:22:49 -07:00