server: dont error on missing tokenizer.chat_template

2024-06-07 09:12:08 -07:00
12 changed files with 48 additions and 76 deletions
--- a/README.md
+++ b/README.md
@@ -326,7 +326,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
 - [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
 - [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
 - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
 - [LiteLLM](https://github.com/BerriAI/litellm)
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
@@ -382,7 +381,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
 - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
 - [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)

 ### Supported backends

--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -90,7 +90,6 @@ func init() {
 	NumParallel = 1
 	MaxRunners = 1
 	MaxQueuedRequests = 512
-	FlashAttention = true

 	LoadConfig()
 }
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -835,7 +835,7 @@ struct llama_server_context
        system_tokens.clear();

        if (!system_prompt.empty()) {
-            system_tokens = ::llama_tokenize(ctx, system_prompt, true);
+            system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);

            llama_batch_clear(batch);

@@ -1656,7 +1656,7 @@ struct llama_server_context
                    slot.t_start_process_prompt = ggml_time_us();
                    slot.t_start_genereration = 0;

-                    prompt_tokens = tokenize(slot.prompt, system_prompt.empty());  // add BOS if there isn't system prompt
+                    prompt_tokens = tokenize(slot.prompt, system_prompt.empty() && add_bos_token);  // add BOS if there isn't system prompt

                    slot.n_prompt_tokens = prompt_tokens.size();

--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -211,7 +211,7 @@ if [ -z "${ONEAPI_ROOT}" ]; then
    ONEAPI_ROOT=/opt/intel/oneapi
 fi

-if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
+if [ -d "${ONEAPI_ROOT}" ]; then
    echo "OneAPI libraries detected - building dynamic OneAPI library"
    init_vars
    source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -290,7 +290,7 @@ function build_cuda() {
 }

 function build_oneapi() {
-  if ((-not "${env:OLLAMA_SKIP_ONEAPI_GENERATE}") -and ("${env:ONEAPI_ROOT}"))  {
+  if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${env:ONEAPI_ROOT}"))  {
    # Get oneAPI version
    $script:ONEAPI_VERSION = icpx --version
    $script:ONEAPI_VERSION = [regex]::Match($script:ONEAPI_VERSION, '(?<=oneAPI DPC\+\+/C\+\+ Compiler )(?<version>\d+\.\d+\.\d+)').Value
--- a/llm/gguf.go
+++ b/llm/gguf.go
@@ -618,8 +618,22 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
 		}
 	}

+	offset, err := ws.Seek(0, io.SeekCurrent)
+	if err != nil {
+		return err
+	}
+
 	var alignment int64 = 32
+	padding := llm.padding(offset, alignment)
+	if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
+		return err
+	}
+
 	for _, tensor := range tensors {
+		if _, err := tensor.WriteTo(ws); err != nil {
+			return err
+		}
+
 		offset, err := ws.Seek(0, io.SeekCurrent)
 		if err != nil {
 			return err
@@ -629,10 +643,6 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
 		if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
 			return err
 		}
-
-		if _, err := tensor.WriteTo(ws); err != nil {
-			return err
-		}
 	}

 	return nil
--- a/server/images.go
+++ b/server/images.go
@@ -438,8 +438,6 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio

 					if s := baseLayer.GGML.KV().ChatTemplate(); s != "" {
 						if t, err := templates.NamedTemplate(s); err != nil {
-							slog.Debug("template detection", "error", err)
-						} else {
 							layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
 							if err != nil {
 								return err
--- a/server/routes_create_test.go
+++ b/server/routes_create_test.go
@@ -15,12 +15,11 @@ import (

 	"github.com/gin-gonic/gin"
 	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/llm"
 )

 var stream bool = false

-func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string {
+func createBinFile(t *testing.T) string {
 	t.Helper()

 	f, err := os.CreateTemp(t.TempDir(), "")
@@ -29,7 +28,19 @@ func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string {
 	}
 	defer f.Close()

-	if err := llm.NewGGUFV3(binary.LittleEndian).Encode(f, kv, ti); err != nil {
+	if err := binary.Write(f, binary.LittleEndian, []byte("GGUF")); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := binary.Write(f, binary.LittleEndian, uint32(3)); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := binary.Write(f, binary.LittleEndian, uint64(0)); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := binary.Write(f, binary.LittleEndian, uint64(0)); err != nil {
 		t.Fatal(err)
 	}

@@ -90,7 +101,7 @@ func TestCreateFromBin(t *testing.T) {
 	var s Server
 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
+		Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)),
 		Stream:    &stream,
 	})

@@ -115,7 +126,7 @@ func TestCreateFromModel(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
+		Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)),
 		Stream:    &stream,
 	})

@@ -155,7 +166,7 @@ func TestCreateRemovesLayers(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}", createBinFile(t, nil, nil)),
+		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}", createBinFile(t)),
 		Stream:    &stream,
 	})

@@ -175,7 +186,7 @@ func TestCreateRemovesLayers(t *testing.T) {

 	w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)),
+		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t)),
 		Stream:    &stream,
 	})

@@ -201,7 +212,7 @@ func TestCreateUnsetsSystem(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nSYSTEM Say hi!", createBinFile(t, nil, nil)),
+		Modelfile: fmt.Sprintf("FROM %s\nSYSTEM Say hi!", createBinFile(t)),
 		Stream:    &stream,
 	})

@@ -221,7 +232,7 @@ func TestCreateUnsetsSystem(t *testing.T) {

 	w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nSYSTEM \"\"", createBinFile(t, nil, nil)),
+		Modelfile: fmt.Sprintf("FROM %s\nSYSTEM \"\"", createBinFile(t)),
 		Stream:    &stream,
 	})

@@ -256,7 +267,7 @@ func TestCreateMergeParameters(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nPARAMETER temperature 1\nPARAMETER top_k 10\nPARAMETER stop USER:\nPARAMETER stop ASSISTANT:", createBinFile(t, nil, nil)),
+		Modelfile: fmt.Sprintf("FROM %s\nPARAMETER temperature 1\nPARAMETER top_k 10\nPARAMETER stop USER:\nPARAMETER stop ASSISTANT:", createBinFile(t)),
 		Stream:    &stream,
 	})

@@ -358,7 +369,7 @@ func TestCreateReplacesMessages(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nMESSAGE assistant \"What is my purpose?\"\nMESSAGE user \"You run tests.\"\nMESSAGE assistant \"Oh, my god.\"", createBinFile(t, nil, nil)),
+		Modelfile: fmt.Sprintf("FROM %s\nMESSAGE assistant \"What is my purpose?\"\nMESSAGE user \"You run tests.\"\nMESSAGE assistant \"Oh, my god.\"", createBinFile(t)),
 		Stream:    &stream,
 	})

@@ -433,7 +444,7 @@ func TestCreateTemplateSystem(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}\nSYSTEM Say hello!\nTEMPLATE {{ .System }} {{ .Prompt }}\nSYSTEM Say bye!", createBinFile(t, nil, nil)),
+		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}\nSYSTEM Say hello!\nTEMPLATE {{ .System }} {{ .Prompt }}\nSYSTEM Say bye!", createBinFile(t)),
 		Stream:    &stream,
 	})

@@ -478,7 +489,7 @@ func TestCreateLicenses(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nLICENSE MIT\nLICENSE Apache-2.0", createBinFile(t, nil, nil)),
+		Modelfile: fmt.Sprintf("FROM %s\nLICENSE MIT\nLICENSE Apache-2.0", createBinFile(t)),
 		Stream:    &stream,
 	})

@@ -515,46 +526,3 @@ func TestCreateLicenses(t *testing.T) {
 		t.Errorf("expected Apache-2.0, actual %s", apache)
 	}
 }
-
-func TestCreateDetectTemplate(t *testing.T) {
-	p := t.TempDir()
-	t.Setenv("OLLAMA_MODELS", p)
-	var s Server
-
-	t.Run("matched", func(t *testing.T) {
-		w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
-			Name: "test",
-			Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
-				"tokenizer.chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
-			}, nil)),
-			Stream: &stream,
-		})
-
-		if w.Code != http.StatusOK {
-			t.Fatalf("expected status code 200, actual %d", w.Code)
-		}
-
-		checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
-			filepath.Join(p, "blobs", "sha256-06cd2687a518d624073f125f1db1c5c727f77c75e84a138fe745186dbbbb4cd7"),
-			filepath.Join(p, "blobs", "sha256-542b217f179c7825eeb5bca3c77d2b75ed05bafbd3451d9188891a60a85337c6"),
-			filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
-		})
-	})
-
-	t.Run("unmatched", func(t *testing.T) {
-		w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
-			Name:      "test",
-			Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
-			Stream:    &stream,
-		})
-
-		if w.Code != http.StatusOK {
-			t.Fatalf("expected status code 200, actual %d", w.Code)
-		}
-
-		checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
-			filepath.Join(p, "blobs", "sha256-a4e5e156ddec27e286f75328784d7106b60a4eb1d246e950a001a3f944fbda99"),
-			filepath.Join(p, "blobs", "sha256-ca239d7bd8ea90e4a5d2e6bf88f8d74a47b14336e73eb4e18bed4dd325018116"),
-		})
-	})
-}
--- a/server/routes_delete_test.go
+++ b/server/routes_delete_test.go
@@ -16,7 +16,7 @@ func TestDelete(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
+		Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)),
 	})

 	if w.Code != http.StatusOK {
@@ -25,7 +25,7 @@ func TestDelete(t *testing.T) {

 	w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test2",
-		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)),
+		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t)),
 	})

 	if w.Code != http.StatusOK {
--- a/server/routes_list_test.go
+++ b/server/routes_list_test.go
@@ -29,7 +29,7 @@ func TestList(t *testing.T) {
 	for _, n := range expectNames {
 		createRequest(t, s.CreateModelHandler, api.CreateRequest{
 			Name:      n,
-			Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
+			Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)),
 		})
 	}

--- a/server/routes_test.go
+++ b/server/routes_test.go
@@ -261,7 +261,7 @@ func TestCase(t *testing.T) {
 		t.Run(tt, func(t *testing.T) {
 			w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 				Name:      tt,
-				Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
+				Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)),
 				Stream:    &stream,
 			})

@@ -277,7 +277,7 @@ func TestCase(t *testing.T) {
 			t.Run("create", func(t *testing.T) {
 				w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
 					Name:      strings.ToUpper(tt),
-					Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
+					Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)),
 					Stream:    &stream,
 				})

--- a/templates/template.go
+++ b/templates/template.go
@@ -30,8 +30,7 @@ var templatesOnce = sync.OnceValues(func() ([]*Template, error) {
 			return nil, err
 		}

-		// normalize line endings
-		t.Bytes = bytes.ReplaceAll(bts, []byte("\r\n"), []byte("\n"))
+		t.Bytes = bts
 	}

 	return templates, nil