lint

update
remove types
2024-08-28 10:23:41 -07:00 · 2024-08-28 09:58:23 -07:00 · 2024-08-27 16:45:07 -07:00 · 2024-08-27 16:45:07 -07:00 · 2024-08-27 16:45:04 -07:00 · 2024-08-27 16:44:38 -07:00
41 changed files with 1482 additions and 571 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,4 @@
 llm/ext_server/* linguist-vendored
+llm/*.h linguist-vendored
 * text=auto
 *.go text eol=lf
--- a/.golangci.yaml
+++ b/.golangci.yaml
@@ -32,10 +32,6 @@ linters:
 linters-settings:
  gci:
    sections: [standard, default, localmodule]
-  staticcheck:
-    checks:
-      - all
-      - -SA1019 # omit Deprecated check
 severity:
  default-severity: error
  rules:
--- a/README.md
+++ b/README.md
@@ -337,8 +337,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
 - [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
 - [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
- [Nix package](https://search.nixos.org/packages?channel=24.05&show=ollama&from=0&size=50&sort=relevance&type=packages&query=ollama)
- [Flox](https://flox.dev/blog/ollama-part-one)

 ### Libraries

--- a/api/types.go
+++ b/api/types.go
@@ -296,17 +296,15 @@ type EmbeddingResponse struct {
 // CreateRequest is the request passed to [Client.Create].
 type CreateRequest struct {
 	Model     string `json:"model"`
+	Path      string `json:"path"`
 	Modelfile string `json:"modelfile"`
 	Stream    *bool  `json:"stream,omitempty"`
 	Quantize  string `json:"quantize,omitempty"`

-	// Deprecated: set the model name with Model instead
+	// Name is deprecated, see Model
 	Name string `json:"name"`

-	// Deprecated: set the file content with Modelfile instead
-	Path string `json:"path"`
-
-	// Deprecated: use Quantize instead
+	// Quantization is deprecated, see Quantize
 	Quantization string `json:"quantization,omitempty"`
 }

@@ -314,7 +312,7 @@ type CreateRequest struct {
 type DeleteRequest struct {
 	Model string `json:"model"`

-	// Deprecated: set the model name with Model instead
+	// Name is deprecated, see Model
 	Name string `json:"name"`
 }

@@ -329,7 +327,7 @@ type ShowRequest struct {

 	Options map[string]interface{} `json:"options"`

-	// Deprecated: set the model name with Model instead
+	// Name is deprecated, see Model
 	Name string `json:"name"`
 }

@@ -361,7 +359,7 @@ type PullRequest struct {
 	Password string `json:"password"`
 	Stream   *bool  `json:"stream,omitempty"`

-	// Deprecated: set the model name with Model instead
+	// Name is deprecated, see Model
 	Name string `json:"name"`
 }

@@ -382,7 +380,7 @@ type PushRequest struct {
 	Password string `json:"password"`
 	Stream   *bool  `json:"stream,omitempty"`

-	// Deprecated: set the model name with Model instead
+	// Name is deprecated, see Model
 	Name string `json:"name"`
 }

--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -124,6 +124,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	}

 	bars := make(map[string]*progress.Bar)
+	var quantizeSpin *progress.Spinner
 	fn := func(resp api.ProgressResponse) error {
 		if resp.Digest != "" {
 			spinner.Stop()
@@ -136,6 +137,15 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 			}

 			bar.Set(resp.Completed)
+		} else if strings.Contains(resp.Status, "quantizing") {
+			spinner.Stop()
+
+			if quantizeSpin != nil {
+				quantizeSpin.SetMessage(resp.Status)
+			} else {
+				quantizeSpin = progress.NewSpinner(resp.Status)
+				p.Add("quantize", quantizeSpin)
+			}
 		} else if status != resp.Status {
 			spinner.Stop()

--- a/convert/convert_test.go
+++ b/convert/convert_test.go
@@ -89,7 +89,7 @@ func TestMain(m *testing.M) {
 	os.Exit(m.Run())
 }

-func TestConvertModel(t *testing.T) {
+func TestConvertFull(t *testing.T) {
 	cases := []string{
 		"Meta-Llama-3-8B-Instruct",
 		"Meta-Llama-3.1-8B-Instruct",
@@ -140,107 +140,6 @@ func TestConvertModel(t *testing.T) {
 	}
 }

-func TestConvertInvalidDatatype(t *testing.T) {
-	f, err := os.CreateTemp(t.TempDir(), "testmodel")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-
-	tempDir := t.TempDir()
-	generateSafetensorTestData(t, tempDir)
-
-	err = ConvertModel(os.DirFS(tempDir), f)
-	if err == nil || err.Error() != "unsupported safetensors model" {
-		t.Errorf("expected error but didn't get one")
-	}
-}
-
-func generateSafetensorTestData(t *testing.T, tempDir string) {
-	type tensorData struct {
-		Offsets []int  `json:"data_offsets"`
-		Type    string `json:"dtype"`
-		Shape   []int  `json:"shape"`
-	}
-	offset := 4096 * 14336
-
-	td := map[string]*tensorData{}
-	td["model.layers.0.mlp.down_proj.weight"] = &tensorData{
-		Offsets: []int{0, offset},
-		Type:    "I8",
-		Shape:   []int{4096, 14336},
-	}
-	td["model.layers.0.mlp.down_proj.weight_format"] = &tensorData{
-		Offsets: []int{offset, offset},
-		Type:    "U8",
-		Shape:   []int{},
-	}
-
-	data, err := json.Marshal(td)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	var buf bytes.Buffer
-
-	l := int64(len(data))
-	err = binary.Write(&buf, binary.LittleEndian, l)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	_, err = buf.Write(data)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	fdata, err := os.Create(filepath.Join(tempDir, "model-00001-of-00001.safetensors"))
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer fdata.Close()
-
-	_, err = fdata.Write(buf.Bytes())
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	configData := `
-{
-  "architectures": [
-    "LlamaForCausalLM"
-  ]
-}
-`
-
-	f, err := os.Create(filepath.Join(tempDir, "config.json"))
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-
-	_, err = f.WriteString(configData)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	tokenizerData := `
-{
-}
-`
-
-	f, err = os.Create(filepath.Join(tempDir, "tokenizer.json"))
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-
-	_, err = f.WriteString(tokenizerData)
-	if err != nil {
-		t.Fatal(err)
-	}
-}
-
 func TestConvertAdapter(t *testing.T) {
 	type AdapterCase struct {
 		Name     string
--- a/convert/reader_safetensors.go
+++ b/convert/reader_safetensors.go
@@ -4,7 +4,6 @@ import (
 	"bytes"
 	"encoding/binary"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"io"
 	"io/fs"
@@ -51,10 +50,6 @@ func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]T

 		for _, key := range keys {
 			if value := headers[key]; value.Type != "" {
-				// bitsandbytes quantized models are unsupported
-				if len(value.Shape) == 0 {
-					return nil, errors.New("unsupported safetensors model")
-				}
 				ts = append(ts, safetensor{
 					fs:     fsys,
 					path:   p,
--- a/convert/tokenizer.go
+++ b/convert/tokenizer.go
@@ -100,21 +100,8 @@ func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error)
 		}

 		if template, ok := p["chat_template"]; ok {
-			var s []struct {
-				Name     string `json:"name"`
-				Template string `json:"template"`
-			}
-			if err := json.Unmarshal(template, &t.Template); err == nil {
-				// noop
-			} else if err := json.Unmarshal(template, &s); err == nil {
-				for _, e := range s {
-					if e.Name == "default" {
-						t.Template = e.Template
-						break
-					}
-				}
-			} else {
-				return nil, fmt.Errorf("invalid chat_template: %w", err)
+			if err := json.Unmarshal(template, &t.Template); err != nil {
+				return nil, err
 			}
 		}

@@ -154,6 +141,7 @@ func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error)
 }

 type tokenizer struct {
+	Version     string  `json:"version"`
 	AddedTokens []token `json:"added_tokens"`
 	Model       struct {
 		Type   string         `json:"type"`
@@ -251,7 +239,7 @@ func parseVocabulary(fsys fs.FS) (*Vocabulary, error) {
 		return pattern.Func(fsys)
 	}

-	return nil, errors.New("unknown tokenizer format")
+	return nil, errors.New("unknown tensor format")
 }

 type SpecialVocabulary struct {
--- a/convert/tokenizer_test.go
+++ b/convert/tokenizer_test.go
@@ -1,208 +0,0 @@
-package convert
-
-import (
-	"io"
-	"io/fs"
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-
-	"github.com/google/go-cmp/cmp"
-)
-
-func createTokenizerFS(t *testing.T, dir string, files map[string]io.Reader) fs.FS {
-	t.Helper()
-
-	for k, v := range files {
-		if err := func() error {
-			f, err := os.Create(filepath.Join(dir, k))
-			if err != nil {
-				return err
-			}
-			defer f.Close()
-
-			if _, err := io.Copy(f, v); err != nil {
-				return err
-			}
-
-			return nil
-		}(); err != nil {
-			t.Fatalf("unexpected error: %v", err)
-		}
-	}
-
-	return os.DirFS(dir)
-}
-
-func TestParseTokenizer(t *testing.T) {
-	cases := []struct {
-		name              string
-		fsys              fs.FS
-		specialTokenTypes []string
-		want              *Tokenizer
-	}{
-		{
-			name: "string chat template",
-			fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
-				"tokenizer.json": strings.NewReader(`{}`),
-				"tokenizer_config.json": strings.NewReader(`{
-					"chat_template": "<default template>"
-				}`),
-			}),
-			want: &Tokenizer{
-				Vocabulary: &Vocabulary{Model: "gpt2"},
-				Pre:        "default",
-				Template:   "<default template>",
-			},
-		},
-		{
-			name: "list chat template",
-			fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
-				"tokenizer.json": strings.NewReader(`{}`),
-				"tokenizer_config.json": strings.NewReader(`{
-					"chat_template": [
-						{
-							"name": "default",
-							"template": "<default template>"
-						},
-						{
-							"name": "tools",
-							"template": "<tools template>"
-						}
-					]
-				}`),
-			}),
-			want: &Tokenizer{
-				Vocabulary: &Vocabulary{Model: "gpt2"},
-				Pre:        "default",
-				Template:   "<default template>",
-			},
-		},
-		{
-			name: "added tokens",
-			fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
-				"tokenizer.json": strings.NewReader(`{
-					"added_tokens": [
-						{
-							"id": 999,
-							"content": "<unused999>",
-							"special": false
-						}
-					]
-				}`),
-			}),
-			want: &Tokenizer{
-				Vocabulary: &Vocabulary{
-					Model:  "gpt2",
-					Tokens: []string{"<unused999>"},
-					Scores: []float32{999},
-					Types:  []int32{4},
-				},
-				Pre: "default",
-			},
-		},
-		{
-			name: "added tokens overlap vocab",
-			fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
-				"tokenizer.json": strings.NewReader(`{
-					"added_tokens": [
-						{
-							"id": 0,
-							"content": "<pad>",
-							"special": true
-						}
-					],
-					"model": {
-						"vocab": {
-							"<pad>": 0
-						}
-					}
-				}`),
-			}),
-			want: &Tokenizer{
-				Vocabulary: &Vocabulary{
-					Model:  "gpt2",
-					Tokens: []string{"<pad>"},
-					Scores: []float32{0},
-					Types:  []int32{3},
-				},
-				Pre: "default",
-			},
-		},
-		{
-			name: "special token types",
-			fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
-				"tokenizer.json": strings.NewReader(`{
-					"added_tokens": [
-						{
-							"id": 0,
-							"content": "<pad>",
-							"special": true
-						},
-						{
-							"id": 1,
-							"content": "<eos>",
-							"special": true
-						},
-						{
-							"id": 2,
-							"content": "<bos>",
-							"special": true
-						},
-						{
-							"id": 3,
-							"content": "<unk>",
-							"special": true
-						}
-					],
-					"model": {
-						"vocab": {
-							"<pad>": 0,
-							"<eos>": 1,
-							"<bos>": 2,
-							"<unk>": 3
-						}
-					}
-				}`),
-				"tokenizer_config.json": strings.NewReader(`{
-					"add_bos_token": true,
-					"add_eos_token": false,
-					"bos_token": "<bos>",
-					"eos_token": "<eos>",
-					"pad_token": "<pad>",
-					"unk_token": "<unk>"
-				}`),
-			}),
-			specialTokenTypes: []string{"pad", "eos", "bos", "unk"},
-			want: &Tokenizer{
-				Vocabulary: &Vocabulary{
-					Model:  "gpt2",
-					Tokens: []string{"<pad>", "<eos>", "<bos>", "<unk>"},
-					Scores: []float32{0, 1, 2, 3},
-					Types:  []int32{3, 3, 3, 3},
-				},
-				SpecialVocabulary: []*SpecialVocabulary{
-					{Type: "pad", Content: "<pad>", ID: 0, AddToken: false},
-					{Type: "eos", Content: "<eos>", ID: 1, AddToken: false},
-					{Type: "bos", Content: "<bos>", ID: 2, AddToken: true},
-					{Type: "unk", Content: "<unk>", ID: 3, AddToken: false},
-				},
-				Pre: "default",
-			},
-		},
-	}
-
-	for _, tt := range cases {
-		t.Run(tt.name, func(t *testing.T) {
-			tokenizer, err := parseTokenizer(tt.fsys, tt.specialTokenTypes)
-			if err != nil {
-				t.Fatalf("unexpected error: %v", err)
-			}
-
-			if diff := cmp.Diff(tt.want, tokenizer); diff != "" {
-				t.Errorf("unexpected tokenizer (-want +got):\n%s", diff)
-			}
-		})
-	}
-}
--- a/docs/openai.md
+++ b/docs/openai.md
@@ -300,28 +300,3 @@ curl http://localhost:11434/v1/chat/completions \
        ]
    }'
 ```
-
-### Setting the context size
-
-The OpenAI API does not have a way of setting the context size for a model. If you need to change the context size, create a `Modelfile` which looks like:
-
-```modelfile
-FROM <some model>
-PARAMETER num_ctx <context size>
-```
-
-Use the `ollama create mymodel` command to create a new model with the updated context size. Call the API with the updated model name:
-
-```shell
-curl http://localhost:11434/v1/chat/completions \
-    -H "Content-Type: application/json" \
-    -d '{
-        "model": "mymodel",
-        "messages": [
-            {
-                "role": "user",
-                "content": "Hello!"
-            }
-        ]
-    }'
-```
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -30,7 +30,9 @@ func Host() *url.URL {
 		defaultPort = "443"
 	}

-	hostport, path, _ := strings.Cut(hostport, "/")
+	// trim trailing slashes
+	hostport = strings.TrimRight(hostport, "/")
+
 	host, port, err := net.SplitHostPort(hostport)
 	if err != nil {
 		host, port = "127.0.0.1", defaultPort
@@ -43,13 +45,15 @@ func Host() *url.URL {

 	if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
 		slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
-		port = defaultPort
+		return &url.URL{
+			Scheme: scheme,
+			Host:   net.JoinHostPort(host, defaultPort),
+		}
 	}

 	return &url.URL{
 		Scheme: scheme,
 		Host:   net.JoinHostPort(host, port),
-		Path:   path,
 	}
 }

--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@@ -13,35 +13,34 @@ func TestHost(t *testing.T) {
 		value  string
 		expect string
 	}{
-		"empty":               {"", "http://127.0.0.1:11434"},
-		"only address":        {"1.2.3.4", "http://1.2.3.4:11434"},
-		"only port":           {":1234", "http://:1234"},
-		"address and port":    {"1.2.3.4:1234", "http://1.2.3.4:1234"},
-		"hostname":            {"example.com", "http://example.com:11434"},
-		"hostname and port":   {"example.com:1234", "http://example.com:1234"},
-		"zero port":           {":0", "http://:0"},
-		"too large port":      {":66000", "http://:11434"},
-		"too small port":      {":-1", "http://:11434"},
-		"ipv6 localhost":      {"[::1]", "http://[::1]:11434"},
-		"ipv6 world open":     {"[::]", "http://[::]:11434"},
-		"ipv6 no brackets":    {"::1", "http://[::1]:11434"},
-		"ipv6 + port":         {"[::1]:1337", "http://[::1]:1337"},
-		"extra space":         {" 1.2.3.4 ", "http://1.2.3.4:11434"},
-		"extra quotes":        {"\"1.2.3.4\"", "http://1.2.3.4:11434"},
-		"extra space+quotes":  {" \" 1.2.3.4 \" ", "http://1.2.3.4:11434"},
-		"extra single quotes": {"'1.2.3.4'", "http://1.2.3.4:11434"},
-		"http":                {"http://1.2.3.4", "http://1.2.3.4:80"},
-		"http port":           {"http://1.2.3.4:4321", "http://1.2.3.4:4321"},
-		"https":               {"https://1.2.3.4", "https://1.2.3.4:443"},
-		"https port":          {"https://1.2.3.4:4321", "https://1.2.3.4:4321"},
-		"proxy path":          {"https://example.com/ollama", "https://example.com:443/ollama"},
+		"empty":               {"", "127.0.0.1:11434"},
+		"only address":        {"1.2.3.4", "1.2.3.4:11434"},
+		"only port":           {":1234", ":1234"},
+		"address and port":    {"1.2.3.4:1234", "1.2.3.4:1234"},
+		"hostname":            {"example.com", "example.com:11434"},
+		"hostname and port":   {"example.com:1234", "example.com:1234"},
+		"zero port":           {":0", ":0"},
+		"too large port":      {":66000", ":11434"},
+		"too small port":      {":-1", ":11434"},
+		"ipv6 localhost":      {"[::1]", "[::1]:11434"},
+		"ipv6 world open":     {"[::]", "[::]:11434"},
+		"ipv6 no brackets":    {"::1", "[::1]:11434"},
+		"ipv6 + port":         {"[::1]:1337", "[::1]:1337"},
+		"extra space":         {" 1.2.3.4 ", "1.2.3.4:11434"},
+		"extra quotes":        {"\"1.2.3.4\"", "1.2.3.4:11434"},
+		"extra space+quotes":  {" \" 1.2.3.4 \" ", "1.2.3.4:11434"},
+		"extra single quotes": {"'1.2.3.4'", "1.2.3.4:11434"},
+		"http":                {"http://1.2.3.4", "1.2.3.4:80"},
+		"http port":           {"http://1.2.3.4:4321", "1.2.3.4:4321"},
+		"https":               {"https://1.2.3.4", "1.2.3.4:443"},
+		"https port":          {"https://1.2.3.4:4321", "1.2.3.4:4321"},
 	}

 	for name, tt := range cases {
 		t.Run(name, func(t *testing.T) {
 			t.Setenv("OLLAMA_HOST", tt.value)
-			if host := Host(); host.String() != tt.expect {
-				t.Errorf("%s: expected %s, got %s", name, tt.expect, host.String())
+			if host := Host(); host.Host != tt.expect {
+				t.Errorf("%s: expected %s, got %s", name, tt.expect, host.Host)
 			}
 		})
 	}
--- a/llm/generate/gen_common.sh
+++ b/llm/generate/gen_common.sh
@@ -87,8 +87,6 @@ apply_patches() {
 build() {
    cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
    cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
-    # remove unnecessary build artifacts
-    rm -f ${BUILD_DIR}/bin/ggml-common.h ${BUILD_DIR}/bin/ggml-metal.metal
 }

 compress() {
--- a/llm/llama.h
+++ b/llm/llama.h
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -1,6 +1,6 @@
 package llm

-// #cgo CFLAGS: -Illama.cpp -Illama.cpp/include -Illama.cpp/ggml/include
+// #cgo CPPFLAGS: -Illama.cpp/ggml/include
 // #cgo LDFLAGS: -lllama -lggml -lstdc++ -lpthread
 // #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/build/darwin/arm64_static -L${SRCDIR}/build/darwin/arm64_static/src -L${SRCDIR}/build/darwin/arm64_static/ggml/src -framework Accelerate -framework Metal
 // #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/darwin/x86_64_static -L${SRCDIR}/build/darwin/x86_64_static/src -L${SRCDIR}/build/darwin/x86_64_static/ggml/src
@@ -9,12 +9,24 @@ package llm
 // #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/linux/x86_64_static -L${SRCDIR}/build/linux/x86_64_static/src -L${SRCDIR}/build/linux/x86_64_static/ggml/src
 // #cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/linux/arm64_static -L${SRCDIR}/build/linux/arm64_static/src -L${SRCDIR}/build/linux/arm64_static/ggml/src
 // #include <stdlib.h>
+// #include <stdatomic.h>
 // #include "llama.h"
+// bool update_quantize_progress(float progress, void* data) {
+//	atomic_int* atomicData = (atomic_int*)data;
+//  int intProgress = *((int*)&progress);
+//  atomic_store(atomicData, intProgress);
+//  return true;
+// }
 import "C"

 import (
 	"errors"
+	"fmt"
+	"sync/atomic"
+	"time"
 	"unsafe"
+
+	"github.com/ollama/ollama/api"
 )

 // SystemInfo is an unused example of calling llama.cpp functions using CGo
@@ -22,17 +34,49 @@ func SystemInfo() string {
 	return C.GoString(C.llama_print_system_info())
 }

-func Quantize(infile, outfile string, ftype fileType) error {
+func Quantize(infile, outfile string, ftype fileType, fn func(resp api.ProgressResponse), tensorCount int) error {
 	cinfile := C.CString(infile)
 	defer C.free(unsafe.Pointer(cinfile))

 	coutfile := C.CString(outfile)
 	defer C.free(unsafe.Pointer(coutfile))
-
 	params := C.llama_model_quantize_default_params()
 	params.nthread = -1
 	params.ftype = ftype.Value()

+	// Initialize "global" to store progress
+	store := (*int32)(C.malloc(C.sizeof_int))
+	defer C.free(unsafe.Pointer(store))
+
+	// Initialize store value, e.g., setting initial progress to 0
+	atomic.StoreInt32(store, 0)
+
+	params.quantize_callback_data = unsafe.Pointer(store)
+	params.quantize_callback = (C.llama_progress_callback)(C.update_quantize_progress)
+
+	ticker := time.NewTicker(30 * time.Millisecond)
+	done := make(chan struct{})
+	defer close(done)
+
+	go func() {
+		defer ticker.Stop()
+		for {
+			select {
+			case <-ticker.C:
+				progressInt := atomic.LoadInt32(store)
+				progress := *(*float32)(unsafe.Pointer(&progressInt))
+				fn(api.ProgressResponse{
+					Status: fmt.Sprintf("quantizing model %d%%", 100*int(progress)/tensorCount),
+				})
+			case <-done:
+				fn(api.ProgressResponse{
+					Status: fmt.Sprintf("quantizing model 100%%"),
+				})
+				return
+			}
+		}
+	}()
+
 	if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 {
 		return errors.New("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
 	}
--- a/llm/patches/10-quantize-callback.diff
+++ b/llm/patches/10-quantize-callback.diff
@@ -0,0 +1,52 @@
+From ed941590d59fc07b1ad21d6aa458588e47d1e446 Mon Sep 17 00:00:00 2001
+From: Josh Yan <jyan00017@gmail.com>
+Date: Wed, 10 Jul 2024 13:39:39 -0700
+Subject: [PATCH] quantize progress
+
+---
+ include/llama.h | 3 +++
+ src/llama.cpp   | 8 ++++++++
+ 2 files changed, 11 insertions(+)
+
+diff --git a/include/llama.h b/include/llama.h
+index bb4b05ba..613db68e 100644
+--- a/include/llama.h
+++ b/include/llama.h
+@@ -349,6 +349,9 @@ extern "C" {
+         bool keep_split;                     // quantize to the same number of shards
+         void * imatrix;                      // pointer to importance matrix data
+         void * kv_overrides;                 // pointer to vector containing overrides
+
+        llama_progress_callback quantize_callback;   // callback to report quantization progress
+        void * quantize_callback_data;               // user data for the callback
+     } llama_model_quantize_params;
+ 
+     // grammar types
+diff --git a/src/llama.cpp b/src/llama.cpp
+index 2b9ace28..ac640c02 100644
+--- a/src/llama.cpp
+++ b/src/llama.cpp
+@@ -18252,6 +18252,12 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
+     const auto tn = LLM_TN(model.arch);
+     new_ofstream(0);
+     for (int i = 0; i < ml.n_tensors; ++i) {
+        if (params->quantize_callback){
+            if (!params->quantize_callback(i, params->quantize_callback_data)) {
+                return;
+            }
+        }
+
+         auto weight = ml.get_weight(i);
+         struct ggml_tensor * tensor = weight->tensor;
+         if (weight->idx != cur_split && params->keep_split) {
+@@ -18789,6 +18795,8 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
+         /*.keep_split                  =*/ false,
+         /*.imatrix                     =*/ nullptr,
+         /*.kv_overrides                =*/ nullptr,
+        /*.quantize_callback           =*/ nullptr,
+        /*.quantize_callback_data      =*/ nullptr,
+     };
+ 
+     return result;
+-- 
+2.39.3 (Apple Git-146)
--- a/scripts/rh_linux_deps.sh
+++ b/scripts/rh_linux_deps.sh
@@ -30,7 +30,7 @@ if grep -i "centos" /etc/system-release >/dev/null; then
        dnf install -y rh-git227-git
        ln -s /opt/rh/rh-git227/root/usr/bin/git /usr/local/bin/git
    fi
-    dnf install -y devtoolset-10-gcc devtoolset-10-gcc-c++ pigz findutils
+    dnf install -y devtoolset-10-gcc devtoolset-10-gcc-c++ pigz
 elif grep -i "rocky" /etc/system-release >/dev/null; then
    # Temporary workaround until rocky 8 AppStream ships GCC 10.4 (10.3 is incompatible with NVCC)
    cat << EOF > /etc/yum.repos.d/Rocky-Vault.repo
@@ -45,7 +45,6 @@ EOF
    dnf install -y git \
        gcc-toolset-10-gcc-10.2.1-8.2.el8 \
        gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 \
-        findutils \
        pigz
 else
    echo "ERROR Unexpected distro"
--- a/server/images.go
+++ b/server/images.go
@@ -435,11 +435,14 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
 						return err
 					}

+					tensorCount := len(baseLayer.GGML.Tensors().Items)
 					ft := baseLayer.GGML.KV().FileType()
 					if !slices.Contains([]string{"F16", "F32"}, ft.String()) {
 						return errors.New("quantization is only supported for F16 and F32 models")
 					} else if want != ft {
-						fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", ft, quantization)})
+						fn(api.ProgressResponse{
+							Status: "quantizing model tensors",
+						})

 						blob, err := GetBlobsPath(baseLayer.Digest)
 						if err != nil {
@@ -453,7 +456,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
 						defer temp.Close()
 						defer os.Remove(temp.Name())

-						if err := llm.Quantize(blob, temp.Name(), want); err != nil {
+						if err := llm.Quantize(blob, temp.Name(), want, fn, tensorCount); err != nil {
 							return err
 						}

--- a/server/model_test.go
+++ b/server/model_test.go
@@ -139,7 +139,6 @@ The temperature in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.`,

 func TestParseFromFileFromLayer(t *testing.T) {
 	tempModels := t.TempDir()
-	t.Setenv("OLLAMA_MODELS", tempModels)

 	file, err := os.CreateTemp(tempModels, "")
 	if err != nil {
@@ -190,7 +189,6 @@ func TestParseFromFileFromLayer(t *testing.T) {

 func TestParseLayerFromCopy(t *testing.T) {
 	tempModels := t.TempDir()
-	t.Setenv("OLLAMA_MODELS", tempModels)

 	file2, err := os.CreateTemp(tempModels, "")
 	if err != nil {
--- a/server/modelpath.go
+++ b/server/modelpath.go
@@ -73,6 +73,18 @@ func ParseModelPath(name string) ModelPath {

 var errModelPathInvalid = errors.New("invalid model path")

+func (mp ModelPath) Validate() error {
+	if mp.Repository == "" {
+		return fmt.Errorf("%w: model repository name is required", errModelPathInvalid)
+	}
+
+	if strings.Contains(mp.Tag, ":") {
+		return fmt.Errorf("%w: ':' (colon) is not allowed in tag names", errModelPathInvalid)
+	}
+
+	return nil
+}
+
 func (mp ModelPath) GetNamespaceRepository() string {
 	return fmt.Sprintf("%s/%s", mp.Namespace, mp.Repository)
 }
@@ -93,11 +105,7 @@ func (mp ModelPath) GetShortTagname() string {

 // GetManifestPath returns the path to the manifest file for the given model path, it is up to the caller to create the directory if it does not exist.
 func (mp ModelPath) GetManifestPath() (string, error) {
-	if p := filepath.Join(mp.Registry, mp.Namespace, mp.Repository, mp.Tag); filepath.IsLocal(p) {
-		return filepath.Join(envconfig.Models(), "manifests", p), nil
-	}
-
-	return "", errModelPathInvalid
+	return filepath.Join(envconfig.Models(), "manifests", mp.Registry, mp.Namespace, mp.Repository, mp.Tag), nil
 }

 func (mp ModelPath) BaseURL() *url.URL {
--- a/server/modelpath_test.go
+++ b/server/modelpath_test.go
@@ -1,7 +1,6 @@
 package server

 import (
-	"errors"
 	"os"
 	"path/filepath"
 	"testing"
@@ -155,10 +154,3 @@ func TestParseModelPath(t *testing.T) {
 		})
 	}
 }
-
-func TestInsecureModelpath(t *testing.T) {
-	mp := ParseModelPath("../../..:something")
-	if _, err := mp.GetManifestPath(); !errors.Is(err, errModelPathInvalid) {
-		t.Errorf("expected error: %v", err)
-	}
-}
--- a/server/routes_create_test.go
+++ b/server/routes_create_test.go
@@ -593,9 +593,9 @@ func TestCreateDetectTemplate(t *testing.T) {

 		checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
 			filepath.Join(p, "blobs", "sha256-0d79f567714c62c048378f2107fb332dabee0135d080c302d884317da9433cc5"),
-			filepath.Join(p, "blobs", "sha256-35360843d0c84fb1506952a131bbef13cd2bb4a541251f22535170c05b56e672"),
 			filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
-			filepath.Join(p, "blobs", "sha256-de3959f841e9ef6b4b6255fa41cb9e0a45da89c3066aa72bdd07a4747f848990"),
+			filepath.Join(p, "blobs", "sha256-c608dc615584cd20d9d830363dabf8a4783ae5d34245c3d8c115edb3bc7b28e4"),
+			filepath.Join(p, "blobs", "sha256-ea34c57ba5b78b740aafe2aeb74dc6507fc3ad14170b64c26a04fb9e36c88d75"),
 		})
 	})

--- a/template/alfred.gotmpl
+++ b/template/alfred.gotmpl
@@ -1,2 +1 @@
-{{- range .Messages }}<start_{{ .Role }}>{{ .Content }}<end_message>
-{{- end }}<start_assistant>
+{{ if .System }}<start_system>{{ .System }}<end_message>{{ end }}{{ if .Prompt }}<start_user>{{ .Prompt }}<end_message>{{ end }}<start_assistant>{{ .Response }}<end_message>
--- a/template/alpaca.gotmpl
+++ b/template/alpaca.gotmpl
@@ -1,18 +1,8 @@
-{{- $system := "" }}
-{{- range .Messages }}
-{{- if eq .Role "system" }}
-{{- if not $system }}{{ $system = .Content }}
-{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
-{{- end }}
-{{- else if eq .Role "user" }}
-{{- if $system }}{{ $system }}
+{{ if .System }}{{ .System }}

-{{ $system = "" }}
-{{- end }}### Instruction:
-{{ .Content }}
+{{ end }}{{ if .Prompt }}### Instruction:
+{{ .Prompt }}

-{{ else if eq .Role "assistant" }}### Response:
-{{ .Content }}
+{{ end }}### Response:
+{{ .Response }}

-{{ end }}
-{{- end }}### Response:
--- a/template/chatml.gotmpl
+++ b/template/chatml.gotmpl
@@ -1,3 +1,6 @@
-{{- range .Messages }}<|im_start|>{{ .Role }}
-{{ .Content }}<|im_end|>
+{{ if .System }}<|im_start|>system
+{{ .System }}<|im_end|>
+{{ end }}{{ if .Prompt }}<|im_start|>user
+{{ .Prompt }}<|im_end|>
 {{ end }}<|im_start|>assistant
+{{ .Response }}<|im_end|>
--- a/template/chatqa.gotmpl
+++ b/template/chatqa.gotmpl
@@ -1,7 +1,6 @@
-{{- range .Messages }}
-{{- if eq .Role "system" }}System:
-{{- else if eq .Role "user" }}User:
-{{- else if eq .Role "assistant" }}Assistant:
-{{- end }} {{ .Content }}
+{{ if .System }}System: {{ .System }}
+
+{{ end }}{{ if .Prompt }}User: {{ .Prompt }}
+
+{{ end }}Assistant: {{ .Response }}

-{{ end }}Assistant:
--- a/template/codellama-70b-instruct.gotmpl
+++ b/template/codellama-70b-instruct.gotmpl
@@ -1,10 +1,10 @@
-{{- range .Messages }}Source:
-{{- if eq .Role "system" }} system
-{{- else if eq .Role "user" }} user
-{{- else if eq .Role "assistant" }} assistant
+{{ if .System }}Source: system
+
+ {{ .System }} <step> {{ end }}Source: user
+
+ {{ .Prompt }} <step> Source: assistant
+{{- if not .Response }}
+Destination: user
 {{- end }}

- {{ .Content }} <step> {{ end }}Source: assistant
-Destination: user
-
- 
+ {{ .Response }} <step> 
--- a/template/falcon-instruct.gotmpl
+++ b/template/falcon-instruct.gotmpl
@@ -1,8 +1,5 @@
-{{- range .Messages }}
-{{- if eq .Role "system" }}System: {{ .Content }}
-{{ continue }}
-{{- else if eq .Role "user" }}User:
-{{- else if eq .Role "assistant" }}Falcon:
-{{- end }}
-{{ .Content }}
+{{ if .System }}System: {{ .System }}
+{{ end }}{{ if .Prompt }}User:
+{{ .Prompt }}
 {{ end }}Falcon:
+{{ .Response }}
--- a/template/gemma-instruct.gotmpl
+++ b/template/gemma-instruct.gotmpl
@@ -1,16 +1,5 @@
-{{- $system := "" }}
-{{- range .Messages }}
-{{- if eq .Role "system" }}
-{{- if not $system }}{{ $system = .Content }}
-{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
-{{- end }}
-{{- continue }}
-{{- else if eq .Role "user" }}<start_of_turn>user
-{{- if $system }}
-{{ $system }}
-{{- $system = "" }}
-{{- end }}
-{{- else if eq .Role "assistant" }}<start_of_turn>model
-{{- end }}
-{{ .Content }}<end_of_turn>
-{{ end }}<start_of_turn>model
+<start_of_turn>user
+{{ if .System }}{{ .System }}
+{{ end }}{{ .Prompt }}<end_of_turn>
+<start_of_turn>model
+{{ .Response }}<end_of_turn>
--- a/template/granite-instruct.gotmpl
+++ b/template/granite-instruct.gotmpl
@@ -1,8 +1,9 @@
-{{- range .Messages }}
-{{- if eq .Role "system" }}System:
-{{- else if eq .Role "user" }}Question:
-{{- else if eq .Role "assistant" }}Answer:
-{{- end }}
-{{ .Content }}
+{{ if .System }}System:
+{{ .System }}
+
+{{ end }}{{ if .Prompt }}Question:
+{{ .Prompt }}

 {{ end }}Answer:
+{{ .Response }}
+
--- a/template/index.json
+++ b/template/index.json
@@ -91,10 +91,6 @@
    "template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
    "name": "llama3-instruct"
  },
-  {
-    "template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n    {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n            {%- for arg_name, arg_val in tool_call.arguments | items %}\n                {{- arg_name + '=\"' + arg_val + '\"' }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- endif %}\n                {%- endfor %}\n            {{- \")\" }}\n        {%- else  %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n            {{- '\"parameters\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- \"}\" }}\n        {%- endif %}\n        {%- if builtin_tools is defined %}\n            {#- This means we're in ipython mode #}\n            {{- \"<|eom_id|>\" }}\n        {%- else %}\n            {{- \"<|eot_id|>\" }}\n        {%- endif %}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
-    "name": "llama3-instruct"
-  },
  {
    "template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ 'Question:\n' + message['content'] + '\n\n' }}{% elif message['role'] == 'system' %}\n{{ 'System:\n' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Answer:\n'  + message['content'] + '\n\n' }}{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ 'Answer:\n' }}{% endif %}{% endfor %}",
    "name": "granite-instruct"
--- a/template/llama2-chat.gotmpl
+++ b/template/llama2-chat.gotmpl
@@ -1,14 +1,6 @@
-{{- $system := "" }}[INST] {{ range .Messages }}
-{{- if eq .Role "system" }}
-{{- if not $system }}{{ $system = .Content }}
-{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
-{{- end }}
-{{- else if eq .Role "user" }}<<SYS>>
-{{- if $system }}
-{{ $system }}
-{{ $system = "" }}
-{{- end }}<</SYS>>
+[INST] <<SYS>>
+{{- if .System }}
+{{ .System }}
+{{ end }}<</SYS>>

-{{ .Content }} [/INST]
-{{- else if eq .Role "assistant" }} {{ .Content }}</s><s>[INST] {{ end }}
-{{- end }}
+{{ .Prompt }} [/INST] {{ .Response }}</s><s>
--- a/template/llama3-instruct.gotmpl
+++ b/template/llama3-instruct.gotmpl
@@ -1,5 +1,7 @@
-{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
+{{ if .System }}<|start_header_id|>system<|end_header_id|>

-{{ .Content }}<|eot_id|>
-{{- end }}<|start_header_id|>assistant<|end_header_id|>
+{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>

+{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
+
+{{ .Response }}<|eot_id|>
--- a/template/magicoder.gotmpl
+++ b/template/magicoder.gotmpl
@@ -1,17 +1,8 @@
-{{- $system := "" }}
-{{- range .Messages }}
-{{- if eq .Role "system" }}
-{{- if not $system }}{{ $system = .Content }}
-{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
-{{- end }}
-{{- continue }}
-{{- else if eq .Role "user" }}
-{{- if $system }}{{ $system }}
+{{ if .System }}{{ .System }}

-{{ $system = "" }}
-{{- end }}@@ Instruction
-{{- else if eq .Role "assistant" }}@@ Response
-{{- end }}
-{{ .Content }}
+{{ end }}{{ if .Prompt }}@@ Instruction
+{{ .Prompt }}

 {{ end }}@@ Response
+{{ .Response }}
+
--- a/template/mistral-instruct.gotmpl
+++ b/template/mistral-instruct.gotmpl
@@ -1,6 +1,3 @@
-[INST] {{ range $index, $_ := .Messages }}
-{{- if eq .Role "system" }}{{ .Content }}
+[INST] {{ if .System }}{{ .System }}

-{{ else if eq .Role "user" }}{{ .Content }}[/INST]
-{{- else if eq .Role "assistant" }} {{ .Content }}</s>[INST] {{ end }}
-{{- end }}
+{{ end }}{{ .Prompt }}[/INST] {{ .Response }}</s>
--- a/template/openchat.gotmpl
+++ b/template/openchat.gotmpl
@@ -1,6 +1 @@
-{{- range .Messages }}GPT4 Correct
-{{- if eq .Role "system" }} System:
-{{- else if eq .Role "user" }} User:
-{{- else if eq .Role "assistant" }} Assistant:
-{{- end }} {{ .Content }}<|end_of_turn|>
-{{- end }}GPT4 Correct Assistant:
+{{ if .System }}GPT4 Correct System: {{ .System }}<|end_of_turn|>{{ end }}GPT4 Correct User: {{ .Prompt }}<|end_of_turn|>GPT4 Correct Assistant: {{ .Response }}<|end_of_turn|>
--- a/template/phi-3.gotmpl
+++ b/template/phi-3.gotmpl
@@ -1,3 +1,6 @@
-{{- range .Messages }}<|{{ .Role }}|>
-{{ .Content }}<|end|>
+{{ if .System }}<|system|>
+{{ .System }}<|end|>
+{{ end }}{{ if .Prompt }}<|user|>
+{{ .Prompt }}<|end|>
 {{ end }}<|assistant|>
+{{ .Response }}<|end|>
--- a/template/solar-instruct.gotmpl
+++ b/template/solar-instruct.gotmpl
@@ -1,11 +1,9 @@
-{{- range .Messages }}
-{{- if eq .Role "system" }}### System:
-{{- else if eq .Role "user" }}### User:
-{{- else if eq .Role "assistant" }}### Assistant:
-{{ .Content }}</s>
+{{ if .System }}### System:
+{{ .System }}

-{{ continue }}
-{{- end }}
-{{ .Content }}
+{{ end }}{{ if .Prompt }}### User:
+{{ .Prompt }}

 {{ end }}### Assistant:
+{{ .Response }}</s>
+
--- a/template/starcoder2-instruct.gotmpl
+++ b/template/starcoder2-instruct.gotmpl
@@ -1,18 +1,8 @@
-{{- $system := "" }}
-{{- range .Messages }}
-{{- if eq .Role "system" }}
-{{- if not $system }}{{ $system = .Content }}
-{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
-{{- end }}
-{{- else if eq .Role "user" }}
-{{- if $system }}{{ $system }}
+{{ if .System }}{{ .System }}

-{{ $system = "" }}
-{{- end }}### Instruction
-{{ .Content }}
+{{ end }}{{ if .Prompt }}### Instruction
+{{ .Prompt }}

-{{ else if eq .Role "assistant" }}### Response
-{{ .Content }}<|endoftext|>
+{{ end }}### Response
+{{ .Response }}<|endoftext|>

-{{ end }}
-{{- end }}### Response
--- a/template/vicuna.gotmpl
+++ b/template/vicuna.gotmpl
@@ -1,14 +1,4 @@
-{{- $system := "" }}
-{{- range .Messages }}
-{{- if eq .Role "system" }}
-{{- if not $system }}{{ $system = .Content }}
-{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
-{{- end }}
-{{- else if eq .Role "user" }}
-{{- if $system }}{{ $system }}
+{{ if .System }}{{ .System }}

-{{ $system = "" }}
-{{- end }}USER: {{ .Content }}
-{{ else if eq .Role "assistant" }}ASSISTANT: {{ .Content }}</s>
-{{ end }}
-{{- end }}ASSISTANT:
+{{ end }}{{ if .Prompt }}USER: {{ .Prompt }}
+{{ end }}ASSISTANT: {{ .Response }}</s>
--- a/template/zephyr.gotmpl
+++ b/template/zephyr.gotmpl
@@ -1,3 +1,6 @@
-{{- range .Messages }}<|{{ .Role }}|>
-{{ .Content }}</s>
+{{ if .System }}<|system|>
+{{ .System }}</s>
+{{ end }}{{ if .Prompt }}<|user|>
+{{ .Prompt }}</s>
 {{ end }}<|assistant|>
+{{ .Response }}</s>
Author	SHA1	Message	Date
Josh Yan	4da5d5beaa	lint	2024-08-28 10:23:41 -07:00
Josh Yan	cc17b02b23	update	2024-08-28 09:58:23 -07:00
Josh Yan	73d69bc90b	remove types	2024-08-27 16:45:07 -07:00
Josh Yan	9bc42f532b	rmv api type	2024-08-27 16:45:07 -07:00
Josh Yan	07c0f66f5e	rm print	2024-08-27 16:45:04 -07:00
Josh Yan	4a7bfca902	change progress msg	2024-08-27 16:44:38 -07:00
Josh Yan	04f2154505	fixed cgo	2024-08-27 16:44:38 -07:00
Josh Yan	de9b21b472	quantize progress	2024-08-27 16:44:32 -07:00