Compare commits
14 Commits
jyan/reord
...
v0.2.7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d0634b1596 | ||
|
|
43606d6d6a | ||
|
|
70b1010fa5 | ||
|
|
84e5721f3a | ||
|
|
319fb1ce03 | ||
|
|
b255445557 | ||
|
|
b23424bb3c | ||
|
|
5fd6988126 | ||
|
|
5b82960df8 | ||
|
|
cc9a252d8c | ||
|
|
d281a6e603 | ||
|
|
154f6f45d4 | ||
|
|
0d41623b52 | ||
|
|
c279f96371 |
@@ -295,6 +295,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
|
||||
- [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
|
||||
- [AI Studio](https://github.com/MindWorkAI/AI-Studio)
|
||||
- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
|
||||
|
||||
### Terminal
|
||||
|
||||
|
||||
78
api/types.go
78
api/types.go
@@ -101,46 +101,29 @@ type ChatRequest struct {
|
||||
KeepAlive *Duration `json:"keep_alive,omitempty"`
|
||||
|
||||
// Tools is an optional list of tools the model has access to.
|
||||
Tools []Tool `json:"tools,omitempty"`
|
||||
Tools `json:"tools,omitempty"`
|
||||
|
||||
// Options lists model-specific options.
|
||||
Options map[string]interface{} `json:"options"`
|
||||
}
|
||||
|
||||
type Tools []Tool
|
||||
|
||||
func (t Tools) String() string {
|
||||
bts, _ := json.Marshal(t)
|
||||
return string(bts)
|
||||
}
|
||||
|
||||
// Message is a single message in a chat sequence. The message contains the
|
||||
// role ("system", "user", or "assistant"), the content and an optional list
|
||||
// of images.
|
||||
type Message struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content,omitempty"`
|
||||
Content string `json:"content"`
|
||||
Images []ImageData `json:"images,omitempty"`
|
||||
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
||||
}
|
||||
|
||||
type ToolCall struct {
|
||||
Function struct {
|
||||
Name string `json:"name"`
|
||||
Arguments map[string]any `json:"arguments"`
|
||||
} `json:"function"`
|
||||
}
|
||||
|
||||
type Tool struct {
|
||||
Type string `json:"type"`
|
||||
Function struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Parameters struct {
|
||||
Type string `json:"type"`
|
||||
Required []string `json:"required"`
|
||||
Properties map[string]struct {
|
||||
Type string `json:"type"`
|
||||
Description string `json:"description"`
|
||||
Enum []string `json:"enum,omitempty"`
|
||||
} `json:"properties"`
|
||||
} `json:"parameters"`
|
||||
} `json:"function"`
|
||||
}
|
||||
|
||||
func (m *Message) UnmarshalJSON(b []byte) error {
|
||||
type Alias Message
|
||||
var a Alias
|
||||
@@ -153,6 +136,46 @@ func (m *Message) UnmarshalJSON(b []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type ToolCall struct {
|
||||
Function ToolCallFunction `json:"function"`
|
||||
}
|
||||
|
||||
type ToolCallFunction struct {
|
||||
Name string `json:"name"`
|
||||
Arguments ToolCallFunctionArguments `json:"arguments"`
|
||||
}
|
||||
|
||||
type ToolCallFunctionArguments map[string]any
|
||||
|
||||
func (t *ToolCallFunctionArguments) String() string {
|
||||
bts, _ := json.Marshal(t)
|
||||
return string(bts)
|
||||
}
|
||||
|
||||
type Tool struct {
|
||||
Type string `json:"type"`
|
||||
Function ToolFunction `json:"function"`
|
||||
}
|
||||
|
||||
type ToolFunction struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Parameters struct {
|
||||
Type string `json:"type"`
|
||||
Required []string `json:"required"`
|
||||
Properties map[string]struct {
|
||||
Type string `json:"type"`
|
||||
Description string `json:"description"`
|
||||
Enum []string `json:"enum,omitempty"`
|
||||
} `json:"properties"`
|
||||
} `json:"parameters"`
|
||||
}
|
||||
|
||||
func (t *ToolFunction) String() string {
|
||||
bts, _ := json.Marshal(t)
|
||||
return string(bts)
|
||||
}
|
||||
|
||||
// ChatResponse is the response returned by [Client.Chat]. Its fields are
|
||||
// similar to [GenerateResponse].
|
||||
type ChatResponse struct {
|
||||
@@ -405,9 +428,6 @@ type GenerateResponse struct {
|
||||
// Response is the textual response itself.
|
||||
Response string `json:"response"`
|
||||
|
||||
// ToolCalls is the list of tools the model wants to call
|
||||
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
||||
|
||||
// Done specifies if the response is complete.
|
||||
Done bool `json:"done"`
|
||||
|
||||
|
||||
@@ -103,10 +103,6 @@ curl http://localhost:11434/v1/chat/completions \
|
||||
- [ ] `user`
|
||||
- [ ] `n`
|
||||
|
||||
#### Notes
|
||||
|
||||
- `usage.prompt_tokens` will be 0 for completions where prompt evaluation is cached
|
||||
|
||||
## Models
|
||||
|
||||
Before using a model, pull it locally `ollama pull`:
|
||||
|
||||
@@ -36,7 +36,6 @@ type ggla struct {
|
||||
|
||||
kv KV
|
||||
tensors []*Tensor
|
||||
offset int64
|
||||
}
|
||||
|
||||
func newGGLA(container *containerGGLA) *ggla {
|
||||
@@ -51,10 +50,7 @@ func (llm *ggla) KV() KV {
|
||||
}
|
||||
|
||||
func (llm *ggla) Tensors() Tensors {
|
||||
return Tensors{
|
||||
Items: llm.tensors,
|
||||
Offset: llm.offset,
|
||||
}
|
||||
return llm.tensors
|
||||
}
|
||||
|
||||
func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {
|
||||
|
||||
39
llm/ggml.go
39
llm/ggml.go
@@ -112,38 +112,11 @@ func (kv KV) ChatTemplate() string {
|
||||
return s
|
||||
}
|
||||
|
||||
// Tensors type as a slice of pointers to Tensor
|
||||
// type Tensors []*Tensor
|
||||
|
||||
type Tensors struct {
|
||||
Items []*Tensor
|
||||
Offset int64
|
||||
}
|
||||
|
||||
// Implement the Len method
|
||||
func (ts Tensors) Len() int {
|
||||
return len(ts.Items)
|
||||
}
|
||||
|
||||
// Implement the Swap method
|
||||
func (ts Tensors) Swap(i, j int) {
|
||||
ts.Items[i], ts.Items[j] = ts.Items[j], ts.Items[i]
|
||||
}
|
||||
|
||||
// Implement the Less method
|
||||
func (ts Tensors) Less(i, j int) bool {
|
||||
var x, y int
|
||||
if n, err := fmt.Sscanf(ts.Items[i].Name, "blk.%d", &x); err != nil || n != 1 {
|
||||
return ts.Items[i].Name < ts.Items[j].Name
|
||||
} else if n, err := fmt.Sscanf(ts.Items[j].Name, "blk.%d", &y); err != nil || n != 1 {
|
||||
return ts.Items[i].Name < ts.Items[j].Name
|
||||
}
|
||||
return x < y
|
||||
}
|
||||
type Tensors []*Tensor
|
||||
|
||||
func (ts Tensors) Layers() map[string]Layer {
|
||||
layers := make(map[string]Layer)
|
||||
for _, t := range ts.Items {
|
||||
for _, t := range ts {
|
||||
parts := strings.Split(t.Name, ".")
|
||||
if parts[0] == "blk" {
|
||||
// join first and second part, e.g. blk.%d
|
||||
@@ -481,11 +454,3 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
type TensorWriter struct {
|
||||
io.Reader
|
||||
}
|
||||
|
||||
func (tw TensorWriter) WriteTo(w io.Writer) (int64, error) {
|
||||
return io.Copy(w, tw.Reader)
|
||||
}
|
||||
|
||||
276
llm/gguf.go
276
llm/gguf.go
@@ -6,12 +6,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/exp/maps"
|
||||
)
|
||||
|
||||
type containerGGUF struct {
|
||||
@@ -92,7 +87,6 @@ type gguf struct {
|
||||
|
||||
kv KV
|
||||
tensors []*Tensor
|
||||
offset int64
|
||||
|
||||
parameters uint64
|
||||
|
||||
@@ -115,10 +109,7 @@ func (llm *gguf) KV() KV {
|
||||
}
|
||||
|
||||
func (llm *gguf) Tensors() Tensors {
|
||||
return Tensors{
|
||||
Items: llm.tensors,
|
||||
Offset: llm.offset,
|
||||
}
|
||||
return llm.tensors
|
||||
}
|
||||
|
||||
func (llm *gguf) numTensor() uint64 {
|
||||
@@ -208,13 +199,12 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||
return fmt.Errorf("failed to read tensor dimensions: %w", err)
|
||||
}
|
||||
|
||||
shape := []uint64{}
|
||||
shape := [4]uint64{1, 1, 1, 1}
|
||||
for i := 0; uint32(i) < dims; i++ {
|
||||
shapeVal, err := readGGUF[uint64](llm, rs)
|
||||
shape[i], err = readGGUF[uint64](llm, rs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read tensor shape: %w", err)
|
||||
}
|
||||
shape = append(shape, shapeVal)
|
||||
}
|
||||
|
||||
kind, err := readGGUF[uint32](llm, rs)
|
||||
@@ -231,7 +221,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||
Name: name,
|
||||
Kind: kind,
|
||||
Offset: offset,
|
||||
Shape: shape,
|
||||
Shape: shape[:],
|
||||
}
|
||||
|
||||
llm.tensors = append(llm.tensors, &tensor)
|
||||
@@ -246,14 +236,6 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||
alignment = 32
|
||||
}
|
||||
|
||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get current offset: %w", err)
|
||||
}
|
||||
|
||||
// align to next 32-byte boundary
|
||||
llm.offset = offset + llm.padding(offset, int64(alignment))
|
||||
|
||||
for _, tensor := range llm.tensors {
|
||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
@@ -279,12 +261,12 @@ func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
|
||||
return t, err
|
||||
}
|
||||
|
||||
func writeGGUF[V any](w io.Writer, t uint32, v V) error {
|
||||
if err := binary.Write(w, binary.LittleEndian, t); err != nil {
|
||||
func writeGGUF[V any](llm *gguf, w io.Writer, t uint32, v V) error {
|
||||
if err := binary.Write(w, llm.ByteOrder, t); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return binary.Write(w, binary.LittleEndian, v)
|
||||
return binary.Write(w, llm.ByteOrder, v)
|
||||
}
|
||||
|
||||
func readGGUFV1String(llm *gguf, r io.Reader) (string, error) {
|
||||
@@ -348,12 +330,12 @@ func readGGUFString(llm *gguf, r io.Reader) (string, error) {
|
||||
return string(buf), nil
|
||||
}
|
||||
|
||||
func writeGGUFString(w io.Writer, s string) error {
|
||||
if err := binary.Write(w, binary.LittleEndian, ggufTypeString); err != nil {
|
||||
func writeGGUFString(llm *gguf, w io.Writer, s string) error {
|
||||
if err := binary.Write(w, llm.ByteOrder, ggufTypeString); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil {
|
||||
if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -362,9 +344,8 @@ func writeGGUFString(w io.Writer, s string) error {
|
||||
}
|
||||
|
||||
type array struct {
|
||||
size int
|
||||
values []any
|
||||
datatype uint32
|
||||
size int
|
||||
values []any
|
||||
}
|
||||
|
||||
func (a *array) MarshalJSON() ([]byte, error) {
|
||||
@@ -444,7 +425,7 @@ func readGGUFArray(llm *gguf, r io.Reader) (*array, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
a := &array{size: int(n), datatype: t}
|
||||
a := &array{size: int(n)}
|
||||
if llm.canCollectArray(int(n)) {
|
||||
a.values = make([]any, int(n))
|
||||
}
|
||||
@@ -495,21 +476,21 @@ func readGGUFArray(llm *gguf, r io.Reader) (*array, error) {
|
||||
return a, nil
|
||||
}
|
||||
|
||||
func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
|
||||
if err := binary.Write(w, binary.LittleEndian, ggufTypeArray); err != nil {
|
||||
func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error {
|
||||
if err := binary.Write(w, llm.ByteOrder, ggufTypeArray); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(w, binary.LittleEndian, t); err != nil {
|
||||
if err := binary.Write(w, llm.ByteOrder, t); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil {
|
||||
if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, e := range s {
|
||||
if err := binary.Write(w, binary.LittleEndian, e); err != nil {
|
||||
if err := binary.Write(w, llm.ByteOrder, e); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -608,19 +589,19 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
|
||||
var err error
|
||||
switch v := v.(type) {
|
||||
case uint32:
|
||||
err = writeGGUF(ws, ggufTypeUint32, v)
|
||||
err = writeGGUF(llm, ws, ggufTypeUint32, v)
|
||||
case float32:
|
||||
err = writeGGUF(ws, ggufTypeFloat32, v)
|
||||
err = writeGGUF(llm, ws, ggufTypeFloat32, v)
|
||||
case bool:
|
||||
err = writeGGUF(ws, ggufTypeBool, v)
|
||||
err = writeGGUF(llm, ws, ggufTypeBool, v)
|
||||
case string:
|
||||
err = writeGGUFString(ws, v)
|
||||
err = writeGGUFString(llm, ws, v)
|
||||
case []int32:
|
||||
err = writeGGUFArray(ws, ggufTypeInt32, v)
|
||||
err = writeGGUFArray(llm, ws, ggufTypeInt32, v)
|
||||
case []uint32:
|
||||
err = writeGGUFArray(ws, ggufTypeUint32, v)
|
||||
err = writeGGUFArray(llm, ws, ggufTypeUint32, v)
|
||||
case []float32:
|
||||
err = writeGGUFArray(ws, ggufTypeFloat32, v)
|
||||
err = writeGGUFArray(llm, ws, ggufTypeFloat32, v)
|
||||
case []string:
|
||||
if err := binary.Write(ws, llm.ByteOrder, ggufTypeArray); err != nil {
|
||||
return err
|
||||
@@ -653,7 +634,7 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
|
||||
|
||||
for k, v := range kvCheck {
|
||||
if !v {
|
||||
return fmt.Errorf("didn't know how to write kv %s", k)
|
||||
return fmt.Errorf("Didn't know how to write kv %s", k)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -715,208 +696,3 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
|
||||
func (gguf) padding(offset, align int64) int64 {
|
||||
return (align - offset%align) % align
|
||||
}
|
||||
|
||||
// Reader and WriterTof
|
||||
type GGUFWriter struct {
|
||||
KV
|
||||
Tensors
|
||||
}
|
||||
|
||||
type writeOffset struct {
|
||||
io.Writer
|
||||
offset int
|
||||
}
|
||||
|
||||
func (wo *writeOffset) Write(p []byte) (int, error) {
|
||||
n, err := wo.Writer.Write(p)
|
||||
wo.offset += n
|
||||
return n, err
|
||||
}
|
||||
|
||||
var _ io.Reader = (*GGUFWriter)(nil)
|
||||
|
||||
var _ io.WriterTo = (*GGUFWriter)(nil)
|
||||
|
||||
func (GGUFWriter) Read([]byte) (int, error) {
|
||||
panic("not implemeneted")
|
||||
}
|
||||
|
||||
func (gguf GGUFWriter) WriteTo(w io.Writer) (int64, error) {
|
||||
wo := &writeOffset{Writer: w}
|
||||
|
||||
if err := binary.Write(wo, binary.LittleEndian, []byte("GGUF")); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if err := binary.Write(wo, binary.LittleEndian, uint32(3)); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if err := binary.Write(wo, binary.LittleEndian, uint64(len(gguf.Tensors.Items))); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if err := binary.Write(wo, binary.LittleEndian, uint64(len(gguf.KV)-1)); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
keys := maps.Keys(gguf.KV)
|
||||
slices.Sort(keys)
|
||||
|
||||
for _, key := range keys {
|
||||
switch key {
|
||||
case "general.parameter_count":
|
||||
// don't write general param count as its added in by us
|
||||
continue
|
||||
default:
|
||||
if err := ggufWriteKV(wo, key, gguf.KV[key]); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
}
|
||||
sort.Sort(gguf.Tensors)
|
||||
|
||||
var s uint64
|
||||
for _, t := range gguf.Tensors.Items {
|
||||
t.Offset = s
|
||||
if err := ggufWriteTensorInfo(wo, t); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
s += t.Size()
|
||||
}
|
||||
tensorOffset := wo.offset
|
||||
|
||||
for _, t := range gguf.Tensors.Items {
|
||||
if err := ggufWriteTensor(wo, t, wo.offset); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
return int64(tensorOffset), nil
|
||||
}
|
||||
|
||||
func ggufWriteTensorInfo(ws io.Writer, t *Tensor) error {
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(t.Name))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, []byte(t.Name)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint32(len(t.Shape))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for i := range len(t.Shape) {
|
||||
if err := binary.Write(ws, binary.LittleEndian, t.Shape[len(t.Shape)-i-1]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, t.Kind); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return binary.Write(ws, binary.LittleEndian, t.Offset)
|
||||
}
|
||||
|
||||
func ggufWriteTensor(ws io.Writer, t *Tensor, offset int) error {
|
||||
slog.Debug(t.Name, "kind", t.Kind, "shape", t.Shape, "offset", t.Offset)
|
||||
if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(int64(offset), 32)))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err := t.WriteTo(ws)
|
||||
return err
|
||||
}
|
||||
|
||||
func ggufWriteKV(ws io.Writer, k string, v any) error {
|
||||
slog.Debug(k, "type", fmt.Sprintf("%T", v))
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(k))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, []byte(k)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var err error
|
||||
switch v := v.(type) {
|
||||
case uint32:
|
||||
err = writeGGUF(ws, ggufTypeUint32, v)
|
||||
case float32:
|
||||
err = writeGGUF(ws, ggufTypeFloat32, v)
|
||||
case bool:
|
||||
err = writeGGUF(ws, ggufTypeBool, v)
|
||||
case string:
|
||||
err = writeGGUFString(ws, v)
|
||||
case []int32:
|
||||
err = writeGGUFArray(ws, ggufTypeInt32, v)
|
||||
case []uint32:
|
||||
err = writeGGUFArray(ws, ggufTypeUint32, v)
|
||||
case []float32:
|
||||
err = writeGGUFArray(ws, ggufTypeFloat32, v)
|
||||
case []string:
|
||||
if err := binary.Write(ws, binary.LittleEndian, ggufTypeArray); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, ggufTypeString); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(v))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, e := range v {
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(e))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, []byte(e)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
case *array:
|
||||
if v.size > 0 {
|
||||
switch v.values[0].(type) {
|
||||
case string:
|
||||
if err := binary.Write(ws, binary.LittleEndian, ggufTypeArray); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, ggufTypeString); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(v.size)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, e := range v.values {
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(e.(string)))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, []byte(e.(string))); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
default:
|
||||
err = writeGGUFArray(ws, v.datatype, v.values)
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
return fmt.Errorf("improper type for '%s'", k)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func ggufPadding(offset, align int64) int64 {
|
||||
// we mod twice in the case offset%align = 0
|
||||
return (align - offset%align) % align
|
||||
}
|
||||
|
||||
187
llm/gguf_test.go
187
llm/gguf_test.go
@@ -1,187 +0,0 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
// TestGGUFDecode tests the decoding and rewriting of (unsorted) GGUF files
|
||||
// To run, add GGUF files to /llm/testdata and add the name of the file to the tests slice
|
||||
// This creates a temporary file in /llm/testdata that will deleted only if the test passes
|
||||
// Note: map[Tensor.Name + " offset"] is commented since sorting will reorder the tensors
|
||||
// Comment out sort.Sort(gguf.Tensors) in gguf.go to test offsets
|
||||
func TestGGUFRewrite(t *testing.T) {
|
||||
tests := []string{
|
||||
"phi3.gguf",
|
||||
}
|
||||
|
||||
for i := range tests {
|
||||
tt := tests[i]
|
||||
t.Run(tt, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
p := filepath.Join("testdata", tt)
|
||||
|
||||
if _, err := os.Stat(p); err != nil {
|
||||
t.Skip("file not found", p)
|
||||
}
|
||||
|
||||
wantFile, err := os.Open(p)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer wantFile.Close()
|
||||
|
||||
// decode original gguf
|
||||
_, wantGGML, err := decodeGGML(t, wantFile)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
gotFile, err := os.CreateTemp("testdata", tt)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
gotFile.Close()
|
||||
if !t.Failed() {
|
||||
os.Remove(gotFile.Name())
|
||||
}
|
||||
}()
|
||||
|
||||
_, gotGGML, err := rewriteGGML(t, wantGGML, gotFile, wantFile)
|
||||
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
diff, diff2 := compareGGML(t, gotGGML, wantGGML, gotFile, wantFile)
|
||||
if cmp.Diff(diff, diff2) != "" {
|
||||
t.Fatalf("diff: \n%s", cmp.Diff(diff, diff2))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func compareGGML(t *testing.T, gotGGML, wantGGML *GGML, f *os.File, f2 *os.File) (map[string]string, map[string]string) {
|
||||
got := make(map[string]string)
|
||||
want := make(map[string]string)
|
||||
|
||||
gotKV := gotGGML.KV()
|
||||
wantKV := wantGGML.KV()
|
||||
|
||||
if len(gotKV) != len(wantKV) {
|
||||
t.Fatalf("got length: %d != want length: %d", len(gotKV), len(wantKV))
|
||||
}
|
||||
|
||||
for k, v := range gotKV {
|
||||
switch t := v.(type) {
|
||||
case *array:
|
||||
if diffy := cmp.Diff(t.values, wantKV[k].(*array).values); diffy != "" {
|
||||
got[k] = diffy
|
||||
}
|
||||
default:
|
||||
if v != wantKV[k] {
|
||||
got[k] = fmt.Sprintf("kv1: %v, kv2: %v", v, want[k])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
gotTensors := gotGGML.Tensors().Items
|
||||
gotOffset := gotGGML.Tensors().Offset
|
||||
wantTensors := wantGGML.Tensors().Items
|
||||
wantOffset := wantGGML.Tensors().Offset
|
||||
|
||||
if len(gotTensors) != len(wantTensors) {
|
||||
got["lenTensors"] = fmt.Sprintf("t1: %d, t2: %d", len(gotTensors), len(wantTensors))
|
||||
}
|
||||
|
||||
for _, tensor := range gotTensors {
|
||||
sha256sum := sha256.New()
|
||||
sr := io.NewSectionReader(f, gotOffset+int64(tensor.Offset), int64(tensor.Size()))
|
||||
var s int64
|
||||
s, err := io.Copy(sha256sum, sr)
|
||||
if err != nil {
|
||||
t.Fatalf("error: %v", err)
|
||||
}
|
||||
|
||||
got[tensor.Name] = fmt.Sprintf("%x", sha256sum.Sum(nil))
|
||||
got[tensor.Name+" size"] = fmt.Sprintf("%d", s)
|
||||
// got[tensor.Name+" offset"] = fmt.Sprintf("%v", tensor.Offset)
|
||||
}
|
||||
|
||||
for _, tensor := range wantTensors {
|
||||
sha256sum := sha256.New()
|
||||
var s int64
|
||||
sr := io.NewSectionReader(f2, wantOffset +int64(tensor.Offset), int64(tensor.Size()))
|
||||
s, err := io.Copy(sha256sum, sr)
|
||||
if err != nil {
|
||||
t.Fatalf("error: %v", err)
|
||||
}
|
||||
|
||||
want[tensor.Name] = fmt.Sprintf("%x", sha256sum.Sum(nil))
|
||||
want[tensor.Name+" size"] = fmt.Sprintf("%d", s)
|
||||
// want[tensor.Name+" offset"] = fmt.Sprintf("%v", tensor.Offset)
|
||||
}
|
||||
return got, want
|
||||
}
|
||||
|
||||
func decodeGGML(t *testing.T, f *os.File) (int64, *GGML, error) {
|
||||
ggml, n, err := DecodeGGML(f, math.MaxInt)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return n, ggml, nil
|
||||
}
|
||||
|
||||
func rewriteGGML(t *testing.T, ggml *GGML, gotFile *os.File, wantFile *os.File) (int64, *GGML, error) {
|
||||
var tensors []*Tensor
|
||||
|
||||
for _, tensor := range ggml.Tensors().Items {
|
||||
shape := make([]uint64, len(tensor.Shape))
|
||||
for i := range len(tensor.Shape) {
|
||||
shape[i] = tensor.Shape[len(tensor.Shape)-i-1]
|
||||
}
|
||||
|
||||
tensors = append(tensors, &Tensor{
|
||||
Name: tensor.Name,
|
||||
Kind: tensor.Kind,
|
||||
Shape: shape,
|
||||
|
||||
WriterTo: TensorWriter{
|
||||
Reader: io.NewSectionReader(wantFile, ggml.Tensors().Offset+int64(tensor.Offset), int64(tensor.Size())),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
reader := &GGUFWriter{
|
||||
KV: ggml.KV(),
|
||||
Tensors: Tensors{
|
||||
Items: tensors,
|
||||
Offset: ggml.Tensors().Offset,
|
||||
},
|
||||
}
|
||||
|
||||
n, err := io.Copy(gotFile, reader)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
file, err := os.Open(gotFile.Name())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
ggml2, _, err := DecodeGGML(file, math.MaxInt)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
return n, ggml2, nil
|
||||
}
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"strings"
|
||||
@@ -29,8 +30,9 @@ type ErrorResponse struct {
|
||||
}
|
||||
|
||||
type Message struct {
|
||||
Role string `json:"role"`
|
||||
Content any `json:"content"`
|
||||
Role string `json:"role"`
|
||||
Content any `json:"content"`
|
||||
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
||||
}
|
||||
|
||||
type Choice struct {
|
||||
@@ -78,6 +80,7 @@ type ChatCompletionRequest struct {
|
||||
PresencePenalty *float64 `json:"presence_penalty_penalty"`
|
||||
TopP *float64 `json:"top_p"`
|
||||
ResponseFormat *ResponseFormat `json:"response_format"`
|
||||
Tools []api.Tool `json:"tools"`
|
||||
}
|
||||
|
||||
type ChatCompletion struct {
|
||||
@@ -111,6 +114,7 @@ type CompletionRequest struct {
|
||||
Stream bool `json:"stream"`
|
||||
Temperature *float32 `json:"temperature"`
|
||||
TopP float32 `json:"top_p"`
|
||||
Suffix string `json:"suffix"`
|
||||
}
|
||||
|
||||
type Completion struct {
|
||||
@@ -132,6 +136,15 @@ type CompletionChunk struct {
|
||||
SystemFingerprint string `json:"system_fingerprint"`
|
||||
}
|
||||
|
||||
type ToolCall struct {
|
||||
ID string `json:"id"`
|
||||
Type string `json:"type"`
|
||||
Function struct {
|
||||
Name string `json:"name"`
|
||||
Arguments string `json:"arguments"`
|
||||
} `json:"function"`
|
||||
}
|
||||
|
||||
type Model struct {
|
||||
Id string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
@@ -170,7 +183,31 @@ func NewError(code int, message string) ErrorResponse {
|
||||
return ErrorResponse{Error{Type: etype, Message: message}}
|
||||
}
|
||||
|
||||
func toolCallId() string {
|
||||
const letterBytes = "abcdefghijklmnopqrstuvwxyz0123456789"
|
||||
b := make([]byte, 8)
|
||||
for i := range b {
|
||||
b[i] = letterBytes[rand.Intn(len(letterBytes))]
|
||||
}
|
||||
return "call_" + strings.ToLower(string(b))
|
||||
}
|
||||
|
||||
func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
|
||||
toolCalls := make([]ToolCall, len(r.Message.ToolCalls))
|
||||
for i, tc := range r.Message.ToolCalls {
|
||||
toolCalls[i].ID = toolCallId()
|
||||
toolCalls[i].Type = "function"
|
||||
toolCalls[i].Function.Name = tc.Function.Name
|
||||
|
||||
args, err := json.Marshal(tc.Function.Arguments)
|
||||
if err != nil {
|
||||
slog.Error("could not marshall function arguments to json", "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
toolCalls[i].Function.Arguments = string(args)
|
||||
}
|
||||
|
||||
return ChatCompletion{
|
||||
Id: id,
|
||||
Object: "chat.completion",
|
||||
@@ -179,7 +216,7 @@ func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
|
||||
SystemFingerprint: "fp_ollama",
|
||||
Choices: []Choice{{
|
||||
Index: 0,
|
||||
Message: Message{Role: r.Message.Role, Content: r.Message.Content},
|
||||
Message: Message{Role: r.Message.Role, Content: r.Message.Content, ToolCalls: toolCalls},
|
||||
FinishReason: func(reason string) *string {
|
||||
if len(reason) > 0 {
|
||||
return &reason
|
||||
@@ -188,7 +225,6 @@ func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
|
||||
}(r.DoneReason),
|
||||
}},
|
||||
Usage: Usage{
|
||||
// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count
|
||||
PromptTokens: r.PromptEvalCount,
|
||||
CompletionTokens: r.EvalCount,
|
||||
TotalTokens: r.PromptEvalCount + r.EvalCount,
|
||||
@@ -234,7 +270,6 @@ func toCompletion(id string, r api.GenerateResponse) Completion {
|
||||
}(r.DoneReason),
|
||||
}},
|
||||
Usage: Usage{
|
||||
// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count
|
||||
PromptTokens: r.PromptEvalCount,
|
||||
CompletionTokens: r.EvalCount,
|
||||
TotalTokens: r.PromptEvalCount + r.EvalCount,
|
||||
@@ -367,7 +402,19 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
||||
}
|
||||
messages = append(messages, message)
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid message content type: %T", content)
|
||||
if msg.ToolCalls == nil {
|
||||
return nil, fmt.Errorf("invalid message content type: %T", content)
|
||||
}
|
||||
|
||||
toolCalls := make([]api.ToolCall, len(msg.ToolCalls))
|
||||
for i, tc := range msg.ToolCalls {
|
||||
toolCalls[i].Function.Name = tc.Function.Name
|
||||
err := json.Unmarshal([]byte(tc.Function.Arguments), &toolCalls[i].Function.Arguments)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid tool call arguments")
|
||||
}
|
||||
}
|
||||
messages = append(messages, api.Message{Role: msg.Role, ToolCalls: toolCalls})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -425,6 +472,7 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
||||
Format: format,
|
||||
Options: options,
|
||||
Stream: &r.Stream,
|
||||
Tools: r.Tools,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -475,6 +523,7 @@ func fromCompleteRequest(r CompletionRequest) (api.GenerateRequest, error) {
|
||||
Prompt: r.Prompt,
|
||||
Options: options,
|
||||
Stream: &r.Stream,
|
||||
Suffix: r.Suffix,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -85,6 +85,7 @@ func TestMiddlewareRequests(t *testing.T) {
|
||||
Prompt: "Hello",
|
||||
Temperature: &temp,
|
||||
Stop: []string{"\n", "stop"},
|
||||
Suffix: "suffix",
|
||||
}
|
||||
|
||||
bodyBytes, _ := json.Marshal(body)
|
||||
@@ -115,6 +116,10 @@ func TestMiddlewareRequests(t *testing.T) {
|
||||
if stopTokens[0] != "\n" || stopTokens[1] != "stop" {
|
||||
t.Fatalf("expected ['\\n', 'stop'], got %v", stopTokens)
|
||||
}
|
||||
|
||||
if genReq.Suffix != "suffix" {
|
||||
t.Fatalf("expected 'suffix', got %s", genReq.Suffix)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
|
||||
@@ -13,7 +13,6 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
"text/template/parse"
|
||||
|
||||
@@ -232,7 +231,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
|
||||
|
||||
var offset int64
|
||||
for offset < stat.Size() {
|
||||
ggml, n, err := llm.DecodeGGML(file, -1)
|
||||
ggml, n, err := llm.DecodeGGML(file, 0)
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
} else if err != nil {
|
||||
@@ -246,39 +245,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
|
||||
mediatype = "application/vnd.ollama.image.projector"
|
||||
}
|
||||
|
||||
var reader io.Reader = io.NewSectionReader(file, offset, n)
|
||||
if !sort.IsSorted(ggml.Tensors()) {
|
||||
// create a new Tensors containing Tensors that have a writeTo
|
||||
var tensors []*llm.Tensor
|
||||
ggmlTensors := ggml.Tensors()
|
||||
|
||||
for _, tensor := range ggmlTensors.Items {
|
||||
shape := make([]uint64, len(tensor.Shape))
|
||||
for i := range len(tensor.Shape) {
|
||||
shape[i] = tensor.Shape[len(tensor.Shape)-i-1]
|
||||
}
|
||||
|
||||
tensors = append(tensors, &llm.Tensor{
|
||||
Name: tensor.Name,
|
||||
Kind: tensor.Kind,
|
||||
Shape: shape,
|
||||
|
||||
WriterTo: &llm.TensorWriter{
|
||||
Reader: io.NewSectionReader(file, offset+ggmlTensors.Offset+int64(tensor.Offset), int64(tensor.Size())),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
reader = &llm.GGUFWriter{
|
||||
KV: ggml.KV(),
|
||||
Tensors: llm.Tensors{
|
||||
Items: tensors,
|
||||
Offset: ggmlTensors.Offset,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
layer, err := NewLayer(reader, mediatype)
|
||||
layer, err := NewLayer(io.NewSectionReader(file, offset, n), mediatype)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -344,12 +311,14 @@ func (m *Model) parseToolCalls(s string) ([]api.ToolCall, bool) {
|
||||
}
|
||||
|
||||
var b bytes.Buffer
|
||||
if err := tmpl.Execute(&b, map[string][]map[string]any{
|
||||
if err := tmpl.Execute(&b, map[string][]api.ToolCall{
|
||||
"ToolCalls": {
|
||||
{
|
||||
"Function": map[string]any{
|
||||
"Name": "@@name@@",
|
||||
"Arguments": "@@arguments@@",
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "@@name@@",
|
||||
Arguments: api.ToolCallFunctionArguments{
|
||||
"@@argument@@": 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -357,7 +326,7 @@ func (m *Model) parseToolCalls(s string) ([]api.ToolCall, bool) {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
var kv map[string]string
|
||||
var kv map[string]any
|
||||
// execute the subtree with placeholders to identify the keys
|
||||
// trim any commands that might exist in the template
|
||||
if err := json.Unmarshal(bytes.TrimSuffix(b.Bytes(), []byte(",")), &kv); err != nil {
|
||||
@@ -367,17 +336,19 @@ func (m *Model) parseToolCalls(s string) ([]api.ToolCall, bool) {
|
||||
// find the keys that correspond to the name and arguments fields
|
||||
var name, arguments string
|
||||
for k, v := range kv {
|
||||
switch v {
|
||||
case "@@name@@":
|
||||
switch v.(type) {
|
||||
case string:
|
||||
name = k
|
||||
case "@@arguments@@":
|
||||
case map[string]any:
|
||||
arguments = k
|
||||
}
|
||||
}
|
||||
|
||||
var objs []map[string]any
|
||||
for offset := 0; offset < len(s); {
|
||||
if err := json.NewDecoder(strings.NewReader(s[offset:])).Decode(&objs); errors.Is(err, io.EOF) {
|
||||
var obj map[string]any
|
||||
decoder := json.NewDecoder(strings.NewReader(s[offset:]))
|
||||
if err := decoder.Decode(&obj); errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
break
|
||||
} else if syntax := &(json.SyntaxError{}); errors.As(err, &syntax) {
|
||||
// skip over any syntax errors
|
||||
@@ -386,10 +357,11 @@ func (m *Model) parseToolCalls(s string) ([]api.ToolCall, bool) {
|
||||
// skip over any unmarshalable types
|
||||
offset += int(unmarshalType.Offset)
|
||||
} else if err != nil {
|
||||
slog.Error("parseToolCalls", "error", err)
|
||||
return nil, false
|
||||
} else {
|
||||
// break when an object is decoded
|
||||
break
|
||||
offset += int(decoder.InputOffset())
|
||||
objs = append(objs, obj)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -115,11 +115,6 @@ func TestExtractFromZipFile(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
type function struct {
|
||||
Name string `json:"name"`
|
||||
Arguments map[string]any `json:"arguments"`
|
||||
}
|
||||
|
||||
func readFile(t *testing.T, base, name string) *bytes.Buffer {
|
||||
t.Helper()
|
||||
|
||||
@@ -167,6 +162,10 @@ The temperature in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.`,
|
||||
{"command-r-plus", " The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.", false},
|
||||
{"firefunction", ` functools[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`, true},
|
||||
{"firefunction", " The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.", false},
|
||||
{"llama3-groq-tool-use", `<tool_call>
|
||||
{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}
|
||||
{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}
|
||||
</tool_call>`, true},
|
||||
}
|
||||
|
||||
var tools []api.Tool
|
||||
@@ -181,18 +180,18 @@ The temperature in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.`,
|
||||
|
||||
calls := []api.ToolCall{
|
||||
{
|
||||
Function: function{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get_current_weather",
|
||||
Arguments: map[string]any{
|
||||
Arguments: api.ToolCallFunctionArguments{
|
||||
"format": "fahrenheit",
|
||||
"location": "San Francisco, CA",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Function: function{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get_current_weather",
|
||||
Arguments: map[string]any{
|
||||
Arguments: api.ToolCallFunctionArguments{
|
||||
"format": "celsius",
|
||||
"location": "Toronto, Canada",
|
||||
},
|
||||
|
||||
@@ -275,11 +275,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
||||
}
|
||||
|
||||
r.Response = sb.String()
|
||||
if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
|
||||
r.ToolCalls = toolCalls
|
||||
r.Response = ""
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, r)
|
||||
return
|
||||
}
|
||||
@@ -1295,7 +1290,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
||||
}
|
||||
|
||||
caps := []Capability{CapabilityCompletion}
|
||||
if req.Tools != nil {
|
||||
if len(req.Tools) > 0 {
|
||||
caps = append(caps, CapabilityTools)
|
||||
}
|
||||
|
||||
@@ -1390,9 +1385,12 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
||||
}
|
||||
|
||||
resp.Message.Content = sb.String()
|
||||
if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
|
||||
resp.Message.ToolCalls = toolCalls
|
||||
resp.Message.Content = ""
|
||||
|
||||
if len(req.Tools) > 0 {
|
||||
if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
|
||||
resp.Message.ToolCalls = toolCalls
|
||||
resp.Message.Content = ""
|
||||
}
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, resp)
|
||||
|
||||
2
server/testdata/tools/command-r-plus.gotmpl
vendored
2
server/testdata/tools/command-r-plus.gotmpl
vendored
@@ -46,7 +46,7 @@ Action: ```json
|
||||
{{- range .ToolCalls }}
|
||||
{
|
||||
"tool_name": "{{ .Function.Name }}",
|
||||
"parameters": {{ json .Function.Arguments }}
|
||||
"parameters": {{ .Function.Arguments }}
|
||||
}
|
||||
{{- end }}
|
||||
]```
|
||||
|
||||
4
server/testdata/tools/firefunction.gotmpl
vendored
4
server/testdata/tools/firefunction.gotmpl
vendored
@@ -17,7 +17,7 @@ If you decide to call functions:
|
||||
|
||||
Available functions as JSON spec:
|
||||
{{- if .Tools }}
|
||||
{{ json .Tools }}
|
||||
{{ .Tools }}
|
||||
{{- end }}<|eot_id|>
|
||||
{{- end }}
|
||||
{{- range .Messages }}<|start_header_id|>
|
||||
@@ -25,7 +25,7 @@ Available functions as JSON spec:
|
||||
{{- end }}<|end_header_id|>
|
||||
{{- if .Content }}{{ .Content }}
|
||||
{{- else if .ToolCalls }} functools[
|
||||
{{- range .ToolCalls }}{{ "{" }}"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}{{ "}" }}
|
||||
{{- range .ToolCalls }}{{ "{" }}"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}{{ "}" }}
|
||||
{{- end }}]
|
||||
{{- end }}<|eot_id|>
|
||||
{{- end }}<|start_header_id|>assistant<|end_header_id|>
|
||||
43
server/testdata/tools/llama3-groq-tool-use.gotmpl
vendored
Normal file
43
server/testdata/tools/llama3-groq-tool-use.gotmpl
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
{{- if .Messages }}
|
||||
{{- if or .System .Tools }}<|start_header_id|>system<|end_header_id|>
|
||||
|
||||
{{ .System }}
|
||||
{{- if .Tools }} You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{"name": <function-name>,"arguments": <args-dict>}
|
||||
</tool_call>
|
||||
|
||||
Here are the available tools:
|
||||
<tools>
|
||||
{{- range .Tools }} {{ .Function }}
|
||||
{{- end }} </tools>
|
||||
{{- end }}
|
||||
{{- end }}<|eot_id|>
|
||||
{{- range .Messages }}
|
||||
{{- if ne .Role "system" }}<|start_header_id|>{{ .Role }}<|end_header_id|>
|
||||
|
||||
{{ if eq .Role "user" }}{{ .Content }}
|
||||
{{- else if eq .Role "assistant" }}
|
||||
{{- if .Content }}{{ .Content }}
|
||||
{{- else if .ToolCalls }}<tool_call>
|
||||
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
|
||||
{{- end }}
|
||||
</tool_call>
|
||||
{{- end }}
|
||||
{{- else if eq .Role "tool" }}<tool_response>
|
||||
{{ .Content }}
|
||||
</tool_response>
|
||||
{{- end }}<|eot_id|>
|
||||
{{- end }}
|
||||
{{- end }}<|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
{{ else }}
|
||||
{{ if .System }}<|start_header_id|>system<|end_header_id|>
|
||||
|
||||
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
|
||||
|
||||
{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
{{ end }}{{ .Response }}
|
||||
{{- if .Response }}<|eot_id|>
|
||||
{{- end }}
|
||||
24
server/testdata/tools/llama3-groq-tool-use.out
vendored
Normal file
24
server/testdata/tools/llama3-groq-tool-use.out
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
<|start_header_id|>system<|end_header_id|>
|
||||
|
||||
You are a knowledgable assistant. You can answer questions and perform tasks. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{"name": <function-name>,"arguments": <args-dict>}
|
||||
</tool_call>
|
||||
|
||||
Here are the available tools:
|
||||
<tools> {"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the users location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}} </tools><|eot_id|><|start_header_id|>user<|end_header_id|>
|
||||
|
||||
What's the weather like today in Paris?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
<tool_call>
|
||||
{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}
|
||||
</tool_call><|eot_id|><|start_header_id|>tool<|end_header_id|>
|
||||
|
||||
<tool_response>
|
||||
22
|
||||
</tool_response><|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
The current temperature in Paris, France is 22 degrees Celsius.<|eot_id|><|start_header_id|>user<|end_header_id|>
|
||||
|
||||
What's the weather like today in San Francisco and Toronto?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
4
server/testdata/tools/mistral.gotmpl
vendored
4
server/testdata/tools/mistral.gotmpl
vendored
@@ -1,13 +1,13 @@
|
||||
{{- range $index, $_ := .Messages }}
|
||||
{{- if eq .Role "user" }}
|
||||
{{- if and (eq (len (slice $.Messages $index)) 1) $.Tools }}[AVAILABLE_TOOLS] {{ json $.Tools }}[/AVAILABLE_TOOLS]
|
||||
{{- if and (eq (len (slice $.Messages $index)) 1) $.Tools }}[AVAILABLE_TOOLS] {{ $.Tools }}[/AVAILABLE_TOOLS]
|
||||
{{- end }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}
|
||||
|
||||
{{ end }}{{ .Content }}[/INST]
|
||||
{{- else if eq .Role "assistant" }}
|
||||
{{- if .Content }} {{ .Content }}</s>
|
||||
{{- else if .ToolCalls }}[TOOL_CALLS] [
|
||||
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}}
|
||||
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
|
||||
{{- end }}]</s>
|
||||
{{- end }}
|
||||
{{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}}[/TOOL_RESULTS]
|
||||
|
||||
@@ -150,9 +150,9 @@ func (t *Template) Vars() []string {
|
||||
|
||||
type Values struct {
|
||||
Messages []api.Message
|
||||
Tools []api.Tool
|
||||
Prompt string
|
||||
Suffix string
|
||||
api.Tools
|
||||
Prompt string
|
||||
Suffix string
|
||||
|
||||
// forceLegacy is a flag used to test compatibility with legacy templates
|
||||
forceLegacy bool
|
||||
@@ -217,6 +217,7 @@ func (t *Template) Execute(w io.Writer, v Values) error {
|
||||
"System": system,
|
||||
"Messages": messages,
|
||||
"Tools": v.Tools,
|
||||
"Response": "",
|
||||
})
|
||||
}
|
||||
|
||||
@@ -270,8 +271,9 @@ func (t *Template) Execute(w io.Writer, v Values) error {
|
||||
|
||||
tree := parse.Tree{Root: nodes.(*parse.ListNode)}
|
||||
if err := template.Must(template.New("").AddParseTree("", &tree)).Execute(&b, map[string]any{
|
||||
"System": system,
|
||||
"Prompt": prompt,
|
||||
"System": system,
|
||||
"Prompt": prompt,
|
||||
"Response": "",
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user