merge conflicts

Merge branch 'main' into royh-precision
Merge pull request #5653 from ollama/mxyng/collect-system
2024-07-12 15:09:05 -07:00 · 2024-07-12 15:07:36 -07:00 · 2024-07-12 12:32:34 -07:00 · 2024-07-12 12:29:23 -07:00 · 2024-07-12 12:26:43 -07:00 · 2024-07-11 22:53:46 -07:00
36 changed files with 174 additions and 281 deletions
--- a/app/ollama.iss
+++ b/app/ollama.iss
@@ -127,6 +127,10 @@ Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\models"
 Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\history"
 ; NOTE: if the user has a custom OLLAMA_MODELS it will be preserved

+[InstallDelete]
+Type: filesandordirs; Name: "{%TEMP}\ollama*"
+Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama"
+
 [Messages]
 WizardReady=Ollama Windows Preview
 ReadyLabel1=%nLet's get you up and running with your own large language models.
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -657,7 +657,7 @@ func showInfo(resp *api.ShowResponse) {

 	modelData := [][]string{
 		{"arch", arch},
-		{"parameters", resp.Details.ParameterSize},
+		{"parameters", format.Parameters(uint64(resp.ModelInfo["general.parameter_count"].(float64)))},
 		{"quantization", resp.Details.QuantizationLevel},
 		{"context length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)].(float64))},
 		{"embedding length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.embedding_length", arch)].(float64))},
@@ -671,7 +671,7 @@ func showInfo(resp *api.ShowResponse) {
 	if resp.ProjectorInfo != nil {
 		projectorData := [][]string{
 			{"arch", "clip"},
-			{"parameters", format.HumanNumber(uint64(resp.ProjectorInfo["general.parameter_count"].(float64)))},
+			{"parameters", format.Parameters(uint64(resp.ProjectorInfo["general.parameter_count"].(float64)))},
 		}

 		if projectorType, ok := resp.ProjectorInfo["clip.projector_type"]; ok {
--- a/format/format.go
+++ b/format/format.go
@@ -9,9 +9,10 @@ const (
 	Thousand = 1000
 	Million  = Thousand * 1000
 	Billion  = Million * 1000
+	Trillion = Billion * 1000
 )

-func HumanNumber(b uint64) string {
+func RoundedParameter(b uint64) string {
 	switch {
 	case b >= Billion:
 		number := float64(b) / Billion
@@ -31,3 +32,33 @@ func HumanNumber(b uint64) string {
 		return fmt.Sprintf("%d", b)
 	}
 }
+
+func Parameters(b uint64) string {
+	switch {
+	case b >= Trillion:
+		number := float64(b) / Trillion
+		return fmt.Sprintf("%sT", decimalPlace(number))
+	case b >= Billion:
+		number := float64(b) / Billion
+		return fmt.Sprintf("%sB", decimalPlace(number))
+	case b >= Million:
+		number := float64(b) / Million
+		return fmt.Sprintf("%sM", decimalPlace(number))
+	case b >= Thousand:
+		number := float64(b) / Thousand
+		return fmt.Sprintf("%sK", decimalPlace(number))
+	default:
+		return fmt.Sprintf("%d", b)
+	}
+}
+
+func decimalPlace(number float64) string {
+	switch {
+	case number >= 100:
+		return fmt.Sprintf("%.0f", number)
+	case number >= 10:
+		return fmt.Sprintf("%.1f", number)
+	default:
+		return fmt.Sprintf("%.2f", number)
+	}
+}
--- a/format/format_test.go
+++ b/format/format_test.go
@@ -4,7 +4,7 @@ import (
 	"testing"
 )

-func TestHumanNumber(t *testing.T) {
+func TestRoundedParameter(t *testing.T) {
 	type testCase struct {
 		input    uint64
 		expected string
@@ -24,7 +24,34 @@ func TestHumanNumber(t *testing.T) {

 	for _, tc := range testCases {
 		t.Run(tc.expected, func(t *testing.T) {
-			result := HumanNumber(tc.input)
+			result := RoundedParameter(tc.input)
+			if result != tc.expected {
+				t.Errorf("Expected %s, got %s", tc.expected, result)
+			}
+		})
+	}
+}
+
+func TestParameters(t *testing.T) {
+	type testCase struct {
+		input    uint64
+		expected string
+	}
+
+	testCases := []testCase{
+		{26000000, "26.0M"},
+		{26000000000, "26.0B"},
+		{1000, "1.00K"},
+		{1000000, "1.00M"},
+		{1000000000, "1.00B"},
+		{1000000000000, "1.00T"},
+		{100, "100"},
+		{206000000, "206M"},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.expected, func(t *testing.T) {
+			result := Parameters(tc.input)
 			if result != tc.expected {
 				t.Errorf("Expected %s, got %s", tc.expected, result)
 			}
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -360,14 +360,17 @@ func GetGPUInfo() GpuInfoList {
 					"before",
 					"total", format.HumanBytes2(cpus[0].TotalMemory),
 					"free", format.HumanBytes2(cpus[0].FreeMemory),
+					"free_swap", format.HumanBytes2(cpus[0].FreeSwap),
 				),
 				slog.Group(
 					"now",
 					"total", format.HumanBytes2(mem.TotalMemory),
 					"free", format.HumanBytes2(mem.FreeMemory),
+					"free_swap", format.HumanBytes2(mem.FreeSwap),
 				),
 			)
 			cpus[0].FreeMemory = mem.FreeMemory
+			cpus[0].FreeSwap = mem.FreeSwap
 		}

 		var memInfo C.mem_info_t
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@@ -57,6 +57,7 @@ func GetCPUMem() (memInfo, error) {
 	return memInfo{
 		TotalMemory: uint64(C.getPhysicalMemory()),
 		FreeMemory:  uint64(C.getFreeMemory()),
+		// FreeSwap omitted as Darwin uses dynamic paging
 	}, nil
 }

--- a/gpu/gpu_linux.go
+++ b/gpu/gpu_linux.go
@@ -50,7 +50,7 @@ var OneapiMgmtName = "libze_intel_gpu.so"

 func GetCPUMem() (memInfo, error) {
 	var mem memInfo
-	var total, available, free, buffers, cached uint64
+	var total, available, free, buffers, cached, freeSwap uint64
 	f, err := os.Open("/proc/meminfo")
 	if err != nil {
 		return mem, err
@@ -70,20 +70,21 @@ func GetCPUMem() (memInfo, error) {
 			_, err = fmt.Sscanf(line, "Buffers:%d", &buffers)
 		case strings.HasPrefix(line, "Cached:"):
 			_, err = fmt.Sscanf(line, "Cached:%d", &cached)
+		case strings.HasPrefix(line, "SwapFree:"):
+			_, err = fmt.Sscanf(line, "SwapFree:%d", &freeSwap)
 		default:
 			continue
 		}
 		if err != nil {
 			return mem, err
 		}
-
-		if total > 0 && available > 0 {
-			mem.TotalMemory = total * format.KibiByte
-			mem.FreeMemory = available * format.KibiByte
-			return mem, nil
-		}
 	}
 	mem.TotalMemory = total * format.KibiByte
-	mem.FreeMemory = (free + buffers + cached) * format.KibiByte
+	mem.FreeSwap = freeSwap * format.KibiByte
+	if available > 0 {
+		mem.FreeMemory = available * format.KibiByte
+	} else {
+		mem.FreeMemory = (free + buffers + cached) * format.KibiByte
+	}
 	return mem, nil
 }
--- a/gpu/gpu_windows.go
+++ b/gpu/gpu_windows.go
@@ -51,5 +51,5 @@ func GetCPUMem() (memInfo, error) {
 	if r1 == 0 {
 		return memInfo{}, fmt.Errorf("GlobalMemoryStatusEx failed: %w", err)
 	}
-	return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys}, nil
+	return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil
 }
--- a/gpu/types.go
+++ b/gpu/types.go
@@ -10,6 +10,7 @@ import (
 type memInfo struct {
 	TotalMemory uint64 `json:"total_memory,omitempty"`
 	FreeMemory  uint64 `json:"free_memory,omitempty"`
+	FreeSwap    uint64 `json:"free_swap,omitempty"`
 }

 // Beginning of an `ollama info` command
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -33,7 +33,7 @@ func Quantize(infile, outfile string, ftype fileType) error {
 	params.ftype = ftype.Value()

 	if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 {
-		return fmt.Errorf("llama_model_quantize: %d", rc)
+		return fmt.Errorf("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
 	}

 	return nil
--- a/llm/server.go
+++ b/llm/server.go
@@ -88,6 +88,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 	var estimate MemoryEstimate
 	var systemTotalMemory uint64
 	var systemFreeMemory uint64
+	var systemSwapFreeMemory uint64

 	systemMemInfo, err := gpu.GetCPUMem()
 	if err != nil {
@@ -95,7 +96,8 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 	} else {
 		systemTotalMemory = systemMemInfo.TotalMemory
 		systemFreeMemory = systemMemInfo.FreeMemory
-		slog.Debug("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", systemFreeMemory)
+		systemSwapFreeMemory = systemMemInfo.FreeSwap
+		slog.Debug("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "free_swap", format.HumanBytes2(systemSwapFreeMemory))
 	}

 	// If the user wants zero GPU layers, reset the gpu list to be CPU/system ram info
@@ -125,9 +127,10 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 	// On linux, over-allocating CPU memory will almost always result in an error
 	if runtime.GOOS == "linux" {
 		systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize
-		if systemMemoryRequired > systemTotalMemory {
-			slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "system", format.HumanBytes2(systemTotalMemory))
-			return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(systemTotalMemory))
+		available := min(systemTotalMemory, systemFreeMemory+systemSwapFreeMemory)
+		if systemMemoryRequired > available {
+			slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "available", available, "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "swap", format.HumanBytes2(systemSwapFreeMemory))
+			return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(available))
 		}
 	}

--- a/server/images.go
+++ b/server/images.go
@@ -466,7 +466,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
 				if baseLayer.GGML != nil {
 					config.ModelFormat = cmp.Or(config.ModelFormat, baseLayer.GGML.Name())
 					config.ModelFamily = cmp.Or(config.ModelFamily, baseLayer.GGML.KV().Architecture())
-					config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount()))
+					config.ModelType = cmp.Or(config.ModelType, format.RoundedParameter(baseLayer.GGML.KV().ParameterCount()))
 					config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType().String())
 					config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture())
 				}
--- a/server/routes_create_test.go
+++ b/server/routes_create_test.go
@@ -546,8 +546,8 @@ func TestCreateDetectTemplate(t *testing.T) {

 		checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
 			filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
-			filepath.Join(p, "blobs", "sha256-68b0323b2f21572bc09ba07554b16b379a5713ee48ef8c25a7661a1f71cfce77"),
-			filepath.Join(p, "blobs", "sha256-eb72fb7c550ee1f1dec4039bd65382acecf5f7536a30fb7ccace39a8d0cb590b"),
+			filepath.Join(p, "blobs", "sha256-c608dc615584cd20d9d830363dabf8a4783ae5d34245c3d8c115edb3bc7b28e4"),
+			filepath.Join(p, "blobs", "sha256-f836ee110db21567f826332e4cedd746c06d10664fd5a9ea3659e3683a944510"),
 		})
 	})

--- a/template/alfred.gotmpl
+++ b/template/alfred.gotmpl
@@ -1,8 +1 @@
-{{- if .Messages }}
-{{- if .System }}<start_system>{{ .System }}<end_message>
-{{- end }}
-{{- range .Messages }}<start_{{ .Role }}>{{ .Content }}<end_message>
-{{- end }}<start_assistant>
-{{- else -}}
-{{ if .System }}<start_system>{{ .System }}<end_message>{{ end }}{{ if .Prompt }}<start_user>{{ .Prompt }}<end_message>{{ end }}<start_assistant>{{ .Response }}<end_message>
-{{- end -}}
+{{ if .System }}<start_system>{{ .System }}<end_message>{{ end }}{{ if .Prompt }}<start_user>{{ .Prompt }}<end_message>{{ end }}<start_assistant>{{ .Response }}<end_message>
--- a/template/alpaca.gotmpl
+++ b/template/alpaca.gotmpl
@@ -1,15 +1,3 @@
-{{- if .Messages }}
-{{- if .System }}{{ .System }}
-
-{{ end }}
-{{- range .Messages }}
-{{- if eq .Role "user" }}### Instruction:
-{{- else if eq .Role "assistant" }}### Response:
-{{- end }}
-{{ .Content }}
-
-{{ end }}### Response:
-{{ else -}}
 {{ if .System }}{{ .System }}

 {{ end }}{{ if .Prompt }}### Instruction:
@@ -18,4 +6,3 @@
 {{ end }}### Response:
 {{ .Response }}

-{{ end -}}
--- a/template/chatml.gotmpl
+++ b/template/chatml.gotmpl
@@ -1,15 +1,6 @@
-{{- if .Messages }}
-{{- if .System }}<|im_start|>system
-{{ .System }}<|im_end|>
-{{ end }}
-{{- range .Messages }}<|im_start|>{{ .Role }}
-{{ .Content }}<|im_end|>
-{{ end }}<|im_start|>assistant
-{{ else -}}
 {{ if .System }}<|im_start|>system
 {{ .System }}<|im_end|>
 {{ end }}{{ if .Prompt }}<|im_start|>user
 {{ .Prompt }}<|im_end|>
 {{ end }}<|im_start|>assistant
 {{ .Response }}<|im_end|>
-{{ end -}}
--- a/template/chatqa.gotmpl
+++ b/template/chatqa.gotmpl
@@ -1,18 +1,6 @@
-{{- if .Messages }}
-{{- if .System }}System: {{ .System }}
-
-{{ end }}
-{{- range .Messages }}
-{{- if eq .Role "user" }}User:
-{{- else if eq .Role "assistant" }}Assistant:
-{{- end }} {{ .Content }}
-
-{{ end }}Assistant:
-{{- else -}}
 {{ if .System }}System: {{ .System }}

 {{ end }}{{ if .Prompt }}User: {{ .Prompt }}

 {{ end }}Assistant: {{ .Response }}

-{{ end -}}
--- a/template/codellama-70b-instruct.gotmpl
+++ b/template/codellama-70b-instruct.gotmpl
@@ -1,19 +1,10 @@
-{{- if .Messages }}
-{{- if .System }}Source: system
-
- {{ .System }} <step> {{ end }}
-{{- range .Messages }}Source: {{ .Role }}
-
- {{ .Content }} <step> {{ end }}Source: assistant
-Destination: user
-
- {{ else -}}
 {{ if .System }}Source: system

 {{ .System }} <step> {{ end }}Source: user

 {{ .Prompt }} <step> Source: assistant
+{{- if not .Response }}
 Destination: user
+{{- end }}

- {{ .Response }} <step>
-{{- end -}}
+ {{ .Response }} <step> 
--- a/template/falcon-instruct.gotmpl
+++ b/template/falcon-instruct.gotmpl
@@ -1,15 +1,5 @@
-{{- if .Messages }}
-{{- if .System }}System: {{ .System }}
-{{ end }}
-{{- range .Messages }}
-{{- if eq .Role "user" }}User:
-{{ else if eq .Role "assistant" }}Falcon:
-{{ end }}{{ .Content }}
-{{ end }}Falcon:
-{{ else -}}
 {{ if .System }}System: {{ .System }}
 {{ end }}{{ if .Prompt }}User:
 {{ .Prompt }}
 {{ end }}Falcon:
 {{ .Response }}
-{{ end -}}
--- a/template/gemma-instruct.gotmpl
+++ b/template/gemma-instruct.gotmpl
@@ -1,17 +1,5 @@
-{{- if .Messages }}
-{{- range $index, $_ := .Messages }}<start_of_turn>
-{{- if eq .Role "user" }}user
-{{- if and $.System (eq $index 0) }}
-{{ $.System }}
-{{- end }}
-{{- else if eq .Role "assistant" }}model
-{{- end }}
-{{ .Content }}<end_of_turn>
-{{ end }}<start_of_turn>model
-{{ else -}}
 <start_of_turn>user
 {{ if .System }}{{ .System }}
 {{ end }}{{ .Prompt }}<end_of_turn>
 <start_of_turn>model
 {{ .Response }}<end_of_turn>
-{{ end -}}
--- a/template/granite-instruct.gotmpl
+++ b/template/granite-instruct.gotmpl
@@ -1,16 +1,3 @@
-{{- if .Messages }}
-{{- if .System }}System:
-{{ .System }}
-
-{{ end }}
-{{- range .Messages }}
-{{- if eq .Role "user" }}Question:
-{{- else if eq .Role "assistant" }}Answer:
-{{- end }}
-{{ .Content }}
-
-{{ end }}Answer:
-{{ else -}}
 {{ if .System }}System:
 {{ .System }}

@@ -20,4 +7,3 @@
 {{ end }}Answer:
 {{ .Response }}

-{{ end -}}
--- a/template/llama2-chat.gotmpl
+++ b/template/llama2-chat.gotmpl
@@ -1,16 +1,6 @@
-{{- if .Messages }}
-{{- range $index, $_ := .Messages }}
-{{- if eq .Role "user" }}[INST] {{ if eq $index 0 }}<<SYS>>
-{{- if $.System }}
-{{ $.System }}
+[INST] <<SYS>>
+{{- if .System }}
+{{ .System }}
 {{ end }}<</SYS>>

-{{ end }}{{ .Content }}
-{{- else }} [/INST] {{ .Content }}</s><s>
-{{- end }}
-{{- end }} [/INST]
-{{- else -}}
-[INST] <<SYS>>{{ if .System }}{{ .System }}{{ end }}<</SYS>>
-
-{{ .Prompt }} [/INST] {{ .Response }}</s>
-{{- end -}}
+{{ .Prompt }} [/INST] {{ .Response }}</s><s>
--- a/template/llama3-instruct.gotmpl
+++ b/template/llama3-instruct.gotmpl
@@ -1,19 +1,7 @@
-{{- if .Messages }}
-{{- if .System }}<|start_header_id|>system<|end_header_id|>
-
-{{ .System }}<|eot_id|>
-{{- end }}
-{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
-
-{{ .Content }}<|eot_id|>
-{{- end }}<|start_header_id|>assistant<|end_header_id|>
-
-{{ else -}}
 {{ if .System }}<|start_header_id|>system<|end_header_id|>

 {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>

 {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>

-{{ .Response }}<|eot_id|>
-{{- end -}}
+{{ .Response }}<|eot_id|>
--- a/template/magicoder.gotmpl
+++ b/template/magicoder.gotmpl
@@ -1,15 +1,3 @@
-{{- if .Messages }}
-{{- if .System }}{{ .System }}
-
-{{ end }}
-{{- range .Messages }}
-{{- if eq .Role "user" }}@@ Instruction
-{{- else if eq .Role "assistant" }}@@ Response
-{{- end }}
-{{ .Content }}
-
-{{ end }}@@ Response
-{{ else -}}
 {{ if .System }}{{ .System }}

 {{ end }}{{ if .Prompt }}@@ Instruction
@@ -18,4 +6,3 @@
 {{ end }}@@ Response
 {{ .Response }}

-{{ end -}}
--- a/template/mistral-instruct.gotmpl
+++ b/template/mistral-instruct.gotmpl
@@ -1,10 +1,3 @@
-{{- if .Messages }}
-{{- range $index, $_ := .Messages }}
-{{- if eq .Role "user" }}[INST] {{ if and $.System (eq (len (slice $.Messages $index)) 1) }}{{ $.System }}
-{{ end }}{{ .Content }}
-{{- else if eq .Role "assistant" }}[/INST] {{ .Content }}</s>
-{{- end }}
-{{- end }}[/INST]
-{{- else -}}
-[INST] {{ if .System }}{{ .System }} {{ end }}{{ .Prompt }}[/INST] {{ .Response }}</s>
-{{- end -}}
+[INST] {{ if .System }}{{ .System }}
+
+{{ end }}{{ .Prompt }}[/INST] {{ .Response }}</s>
--- a/template/openchat.gotmpl
+++ b/template/openchat.gotmpl
@@ -1,11 +1 @@
-{{- if .Messages }}
-{{- if .System }}GPT4 Correct System: {{ .System }}<|end_of_turn|>
-{{- end }}
-{{- range .Messages }}GPT4 Correct
-{{- if eq .Role "user" }} User:
-{{- else if eq .Role "assistant" }} Assistant:
-{{- end }} {{ .Content }}<|end_of_turn|>
-{{- end }}GPT4 Correct Assistant:
-{{- else -}}
-{{ if .System }}GPT4 Correct System: {{ .System }}<|end_of_turn|>{{ end }}GPT4 Correct User: {{ .Prompt }}<|end_of_turn|>GPT4 Correct Assistant: {{ .Response }}<|end_of_turn|>
-{{- end -}}
+{{ if .System }}GPT4 Correct System: {{ .System }}<|end_of_turn|>{{ end }}GPT4 Correct User: {{ .Prompt }}<|end_of_turn|>GPT4 Correct Assistant: {{ .Response }}<|end_of_turn|>
--- a/template/phi-3.gotmpl
+++ b/template/phi-3.gotmpl
@@ -1,15 +1,6 @@
-{{- if .Messages }}
-{{- if .System }}<|system|>
-{{ .System }}<|end|>
-{{ end }}
-{{- range .Messages }}<|{{ .Role }}|>
-{{ .Content }}<|end|>
-{{ end }}<|assistant|>
-{{ else -}}
 {{ if .System }}<|system|>
 {{ .System }}<|end|>
 {{ end }}{{ if .Prompt }}<|user|>
 {{ .Prompt }}<|end|>
 {{ end }}<|assistant|>
 {{ .Response }}<|end|>
-{{ end -}}
--- a/template/solar-instruct.gotmpl
+++ b/template/solar-instruct.gotmpl
@@ -1,16 +1,3 @@
-{{- if .Messages }}
-{{- if .System }}### System:
-{{ .System }}
-
-{{ end }}
-{{- range .Messages }}
-{{- if eq .Role "user" }}### User:
-{{ .Content }}
-{{ else if eq .Role "assistant" }}### Assistant:
-{{ .Content }}</s>
-{{ end }}
-{{ end }}### Assistant:
-{{ else -}}
 {{ if .System }}### System:
 {{ .System }}

@@ -20,4 +7,3 @@
 {{ end }}### Assistant:
 {{ .Response }}</s>

-{{ end -}}
--- a/template/starcoder2-instruct.gotmpl
+++ b/template/starcoder2-instruct.gotmpl
@@ -1,17 +1,3 @@
-{{- if .Messages }}
-{{- if .System }}{{ .System }}
-
-{{ end }}
-{{- range .Messages }}
-{{- if eq .Role "user" }}### Instruction
-{{ .Content }}
-
-{{ else if eq .Role "assistant" }}### Response
-{{ .Content }}<|endoftext|>
-
-{{ end }}
-{{- end }}### Response
-{{ else -}}
 {{ if .System }}{{ .System }}

 {{ end }}{{ if .Prompt }}### Instruction
@@ -20,4 +6,3 @@
 {{ end }}### Response
 {{ .Response }}<|endoftext|>

-{{ end -}}
--- a/template/template.go
+++ b/template/template.go
@@ -161,11 +161,10 @@ func (t *Template) Execute(w io.Writer, v Values) error {
 	var prompt, response string
 	for i, m := range collated {
 		switch m.Role {
+		case "system":
+			system = m.Content
 		case "user":
 			prompt = m.Content
-			if i != 0 {
-				system = ""
-			}
 		case "assistant":
 			response = m.Content
 		}
@@ -179,6 +178,7 @@ func (t *Template) Execute(w io.Writer, v Values) error {
 				return err
 			}

+			system = ""
 			prompt = ""
 			response = ""
 		}
@@ -209,25 +209,16 @@ func (t *Template) Execute(w io.Writer, v Values) error {
 	return err
 }

-type messages []*api.Message
-
 // collate messages based on role. consecutive messages of the same role are merged
-// into a single message. collate also pulls out and merges messages with Role == "system"
-// which are templated separately. As a side effect, it mangles message content adding image
-// tags ([img-%d]) as needed
-func collate(msgs []api.Message) (system string, collated messages) {
+// into a single message. collate also collects and returns all system messages.
+// collate mutates message content adding image tags ([img-%d]) as needed
+func collate(msgs []api.Message) (string, []*api.Message) {
 	var n int
+
+	var system []string
+	var collated []*api.Message
 	for i := range msgs {
 		msg := msgs[i]
-		if msg.Role == "system" {
-			if system != "" {
-				system += "\n\n"
-			}
-
-			system += msg.Content
-			continue
-		}
-
 		for range msg.Images {
 			imageTag := fmt.Sprintf("[img-%d]", n)
 			if !strings.Contains(msg.Content, "[img]") {
@@ -238,6 +229,10 @@ func collate(msgs []api.Message) (system string, collated messages) {
 			n++
 		}

+		if msg.Role == "system" {
+			system = append(system, msg.Content)
+		}
+
 		if len(collated) > 0 && collated[len(collated)-1].Role == msg.Role {
 			collated[len(collated)-1].Content += "\n\n" + msg.Content
 		} else {
@@ -245,7 +240,7 @@ func collate(msgs []api.Message) (system string, collated messages) {
 		}
 	}

-	return
+	return strings.Join(system, "\n\n"), collated
 }

 func parseNode(n parse.Node) []string {
--- a/template/template_test.go
+++ b/template/template_test.go
@@ -116,12 +116,20 @@ func TestTemplate(t *testing.T) {
 						t.Fatal(err)
 					}

-					if diff := cmp.Diff(actual.Bytes(), expect); diff != "" {
+					bts := actual.Bytes()
+
+					if slices.Contains([]string{"chatqa.gotmpl", "llama2-chat.gotmpl", "mistral-instruct.gotmpl", "openchat.gotmpl", "vicuna.gotmpl"}, match) && bts[len(bts)-1] == ' ' {
+						t.Log("removing trailing space from output")
+						bts = bts[:len(bts)-1]
+					}
+
+					if diff := cmp.Diff(bts, expect); diff != "" {
 						t.Errorf("mismatch (-got +want):\n%s", diff)
 					}
 				})

 				t.Run("legacy", func(t *testing.T) {
+					t.Skip("legacy outputs are currently default outputs")
 					var legacy bytes.Buffer
 					if err := tmpl.Execute(&legacy, Values{Messages: tt, forceLegacy: true}); err != nil {
 						t.Fatal(err)
@@ -154,11 +162,13 @@ func TestParse(t *testing.T) {
 		{"{{ .System }} {{ .Prompt }} {{ .Response }}", []string{"prompt", "response", "system"}},
 		{"{{ with .Tools }}{{ . }}{{ end }} {{ .System }} {{ .Prompt }}", []string{"prompt", "response", "system", "tools"}},
 		{"{{ range .Messages }}{{ .Role }} {{ .Content }}{{ end }}", []string{"content", "messages", "role"}},
-		{"{{ range .Messages }}{{ if eq .Role \"system\" }}SYSTEM: {{ .Content }}{{ else if eq .Role \"user\" }}USER: {{ .Content }}{{ else if eq .Role \"assistant\" }}ASSISTANT: {{ .Content }}{{ end }}{{ end }}", []string{"content", "messages", "role"}},
+		{`{{- range .Messages }}
+{{- if eq .Role "system" }}SYSTEM:
+{{- else if eq .Role "user" }}USER:
+{{- else if eq .Role "assistant" }}ASSISTANT:
+{{- end }} {{ .Content }}
+{{- end }}`, []string{"content", "messages", "role"}},
 		{`{{- if .Messages }}
-{{- if .System }}<|im_start|>system
-{{ .System }}<|im_end|>
-{{ end }}
 {{- range .Messages }}<|im_start|>{{ .Role }}
 {{ .Content }}<|im_end|>
 {{ end }}<|im_start|>assistant
@@ -200,12 +210,17 @@ func TestExecuteWithMessages(t *testing.T) {
 		{
 			"mistral",
 			[]template{
-				{"no response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `},
-				{"response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`},
-				{"messages", `{{- range $index, $_ := .Messages }}
-{{- if eq .Role "user" }}[INST] {{ if and (eq $index 0) $.System }}{{ $.System }}{{ "\n\n" }}
-{{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}
-{{- end }}
+				{"no response", `[INST] {{ if .System }}{{ .System }}
+
+{{ end }}{{ .Prompt }}[/INST] `},
+				{"response", `[INST] {{ if .System }}{{ .System }}
+
+{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`},
+				{"messages", `[INST] {{ if .System }}{{ .System }}
+
+{{ end }}
+{{- range .Messages }}
+{{- if eq .Role "user" }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}[INST] {{ end }}
 {{- end }}`},
 			},
 			Values{
@@ -220,13 +235,17 @@ func TestExecuteWithMessages(t *testing.T) {
 		{
 			"mistral system",
 			[]template{
-				{"no response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `},
-				{"response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`},
-				{"messages", `
-{{- range $index, $_ := .Messages }}
-{{- if eq .Role "user" }}[INST] {{ if and (eq $index 0) $.System }}{{ $.System }}{{ "\n\n" }}
-{{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}
-{{- end }}
+				{"no response", `[INST] {{ if .System }}{{ .System }}
+
+{{ end }}{{ .Prompt }}[/INST] `},
+				{"response", `[INST] {{ if .System }}{{ .System }}
+
+{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`},
+				{"messages", `[INST] {{ if .System }}{{ .System }}
+
+{{ end }}
+{{- range .Messages }}
+{{- if eq .Role "user" }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}[INST] {{ end }}
 {{- end }}`},
 			},
 			Values{
@@ -253,12 +272,9 @@ Hello friend![/INST] Hello human![INST] What is your name?[/INST] `,
 {{ .Response }}<|im_end|>
 `},
 				{"messages", `
-{{- range $index, $_ := .Messages }}
-{{- if and (eq .Role "user") (eq $index 0) $.System }}<|im_start|>system
-{{ $.System }}<|im_end|>{{ "\n" }}
-{{- end }}<|im_start|>{{ .Role }}
-{{ .Content }}<|im_end|>{{ "\n" }}
-{{- end }}<|im_start|>assistant
+{{- range $index, $_ := .Messages }}<|im_start|>{{ .Role }}
+{{ .Content }}<|im_end|>
+{{ end }}<|im_start|>assistant
 `},
 			},
 			Values{
@@ -291,9 +307,11 @@ What is your name?<|im_end|>
 `},
 				{"messages", `
 {{- range .Messages }}
-{{- if eq .Role "user" }}Question: {{ .Content }}{{ "\n\n" }}
-{{- else if eq .Role "assistant" }}Answer: {{ .Content }}{{ "\n\n" }}
-{{- end }}
+{{- if eq .Role "user" }}Question: {{ .Content }}
+
+{{ else if eq .Role "assistant" }}Answer: {{ .Content }}
+
+{{ end }}
 {{- end }}Answer: `},
 			},
 			Values{
--- a/template/testdata/llama2-chat.gotmpl/system-user-assistant-user
+++ b/template/testdata/llama2-chat.gotmpl/system-user-assistant-user
@@ -2,4 +2,6 @@
 You are a helpful assistant.
 <</SYS>>

-Hello, how are you? [/INST] I'm doing great. How can I help you today?</s><s>[INST] I'd like to show off how chat templating works! [/INST]
+Hello, how are you? [/INST] I'm doing great. How can I help you today?</s><s>[INST] <<SYS>><</SYS>>
+
+I'd like to show off how chat templating works! [/INST]
--- a/template/testdata/llama2-chat.gotmpl/user-assistant-user
+++ b/template/testdata/llama2-chat.gotmpl/user-assistant-user
@@ -1,3 +1,5 @@
 [INST] <<SYS>><</SYS>>

-Hello, how are you? [/INST] I'm doing great. How can I help you today?</s><s>[INST] I'd like to show off how chat templating works! [/INST]
+Hello, how are you? [/INST] I'm doing great. How can I help you today?</s><s>[INST] <<SYS>><</SYS>>
+
+I'd like to show off how chat templating works! [/INST]
--- a/template/testdata/mistral-instruct.gotmpl/system-user-assistant-user
+++ b/template/testdata/mistral-instruct.gotmpl/system-user-assistant-user
@@ -1,2 +1,3 @@
-[INST] Hello, how are you?[/INST] I'm doing great. How can I help you today?</s>[INST] You are a helpful assistant.
-I'd like to show off how chat templating works![/INST]
+[INST] You are a helpful assistant.
+
+Hello, how are you?[/INST] I'm doing great. How can I help you today?</s>[INST] I'd like to show off how chat templating works![/INST]
--- a/template/vicuna.gotmpl
+++ b/template/vicuna.gotmpl
@@ -1,15 +1,4 @@
-{{- if .Messages }}
-{{- if .System }}{{ .System }}
-
-{{ end }}
-{{- range .Messages }}
-{{- if eq .Role "user" }}USER: {{ .Content }}
-{{ else if eq .Role "assistant" }}ASSISTANT: {{ .Content }}</s>
-{{ end }}
-{{- end }}ASSISTANT:
-{{- else -}}
 {{ if .System }}{{ .System }}

 {{ end }}{{ if .Prompt }}USER: {{ .Prompt }}
 {{ end }}ASSISTANT: {{ .Response }}</s>
-{{ end -}}
--- a/template/zephyr.gotmpl
+++ b/template/zephyr.gotmpl
@@ -1,15 +1,6 @@
-{{- if .Messages }}
-{{- if .System }}<|system|>
-{{ .System }}</s>
-{{ end }}
-{{- range .Messages }}<|{{ .Role }}|>
-{{ .Content }}</s>
-{{ end }}<|assistant|>
-{{ else -}}
 {{ if .System }}<|system|>
 {{ .System }}</s>
 {{ end }}{{ if .Prompt }}<|user|>
 {{ .Prompt }}</s>
 {{ end }}<|assistant|>
 {{ .Response }}</s>
-{{ end -}}
Author	SHA1	Message	Date
Roy Han	e210f8763f	merge conflicts	2024-07-12 15:09:05 -07:00
royjhan	3971c2333f	Merge branch 'main' into royh-precision	2024-07-12 15:07:36 -07:00
Michael Yang	e5c65a85df	Merge pull request #5653 from ollama/mxyng/collect-system template: preprocess message and collect system	2024-07-12 12:32:34 -07:00
Jeffrey Morgan	33627331a3	app: also clean up tempdir runners on install (#5646 )	2024-07-12 12:29:23 -07:00
Michael Yang	36c87c433b	template: preprocess message and collect system	2024-07-12 12:26:43 -07:00
Jeffrey Morgan	179737feb7	Clean up old files when installing on Windows (#5645 ) * app: always clean up install dir; force close applications * remove wildcard * revert `CloseApplications` * whitespace * update `LOCALAPPDATA` var	2024-07-11 22:53:46 -07:00
Michael Yang	47353f5ee4	Merge pull request #5639 from ollama/mxyng/unaggregated-system	2024-07-11 17:48:50 -07:00
Josh	10e768826c	fix: quant err message (#5616 )	2024-07-11 17:24:29 -07:00
Michael Yang	5056bb9c01	rename aggregate to contents	2024-07-11 17:00:26 -07:00
Jeffrey Morgan	c4cf8ad559	llm: avoid loading model if system memory is too small (#5637 ) * llm: avoid loading model if system memory is too small * update log * Instrument swap free space On linux and windows, expose how much swap space is available so we can take that into consideration when scheduling models * use `systemSwapFreeMemory` in check --------- Co-authored-by: Daniel Hiltgen <daniel@ollama.com>	2024-07-11 16:42:57 -07:00
Michael Yang	57ec6901eb	revert embedded templates to use prompt/response This reverts commit `19753c18c0`. for compat. messages will be added at a later date	2024-07-11 14:49:35 -07:00
Michael Yang	e64f9ebb44	do no automatically aggregate system messages	2024-07-11 14:49:35 -07:00
Roy Han	c71698426c	Separate Rounding Functions	2024-06-24 11:09:08 -07:00
Roy Han	f93cdfdfae	Standardize with ollama.com	2024-06-24 10:53:15 -07:00
Roy Han	af370ac178	Parameter Precision	2024-06-20 10:38:31 -07:00