types/model: make Name.Filepath substitute colons in host with ("%")

This makes the filepath legal on all supported platforms. Fixes #4088
2024-05-02 22:35:19 -07:00
43 changed files with 965 additions and 953 deletions
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -34,6 +34,7 @@ import (
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/auth"
 	"github.com/ollama/ollama/format"
+	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/progress"
 	"github.com/ollama/ollama/server"
 	"github.com/ollama/ollama/types/errtypes"
@@ -56,13 +57,13 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	p := progress.NewProgress(os.Stderr)
 	defer p.Stop()

-	f, err := os.Open(filename)
+	modelfile, err := os.Open(filename)
 	if err != nil {
 		return err
 	}
-	defer f.Close()
+	defer modelfile.Close()

-	modelfile, err := model.ParseFile(f)
+	commands, err := parser.Parse(modelfile)
 	if err != nil {
 		return err
 	}
@@ -76,10 +77,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	spinner := progress.NewSpinner(status)
 	p.Add(status, spinner)

-	for i := range modelfile.Commands {
-		switch modelfile.Commands[i].Name {
+	for i := range commands {
+		switch commands[i].Name {
 		case "model", "adapter":
-			path := modelfile.Commands[i].Args
+			path := commands[i].Args
 			if path == "~" {
 				path = home
 			} else if strings.HasPrefix(path, "~/") {
@@ -91,7 +92,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 			}

 			fi, err := os.Stat(path)
-			if errors.Is(err, os.ErrNotExist) && modelfile.Commands[i].Name == "model" {
+			if errors.Is(err, os.ErrNotExist) && commands[i].Name == "model" {
 				continue
 			} else if err != nil {
 				return err
@@ -114,7 +115,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				return err
 			}

-			modelfile.Commands[i].Args = "@" + digest
+			commands[i].Args = "@"+digest
 		}
 	}

@@ -144,7 +145,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {

 	quantization, _ := cmd.Flags().GetString("quantization")

-	request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantization: quantization}
+	request := api.CreateRequest{Name: args[0], Modelfile: parser.Format(commands), Quantization: quantization}
 	if err := client.Create(cmd.Context(), &request, fn); err != nil {
 		return err
 	}
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -5,7 +5,6 @@ import (
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
-	"io"
 	"log/slog"
 	"os"
 	"path/filepath"
@@ -48,7 +47,7 @@ type ByteOrder interface {
 type ModelArch interface {
 	GetTensors() error
 	LoadVocab() error
-	WriteGGUF(io.WriteSeeker) error
+	WriteGGUF() (string, error)
 }

 type ModelFormat interface {
--- a/convert/gemma.go
+++ b/convert/gemma.go
@@ -94,7 +94,7 @@ func (m *GemmaModel) LoadVocab() error {
 	return nil
 }

-func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error {
+func (m *GemmaModel) WriteGGUF() (string, error) {
 	kv := llm.KV{
 		"general.architecture":                   "gemma",
 		"general.name":                           m.Name,
@@ -122,5 +122,16 @@ func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error {
 		"tokenizer.ggml.add_eos_token":    false,
 	}

-	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
+	f, err := os.CreateTemp("", "ollama-gguf")
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+
+	mod := llm.NewGGUFV3(m.Params.ByteOrder)
+	if err := mod.Encode(f, kv, m.Tensors); err != nil {
+		return "", err
+	}
+
+	return f.Name(), nil
 }
--- a/convert/llama.go
+++ b/convert/llama.go
@@ -132,7 +132,7 @@ func (m *LlamaModel) LoadVocab() error {
 	return nil
 }

-func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
+func (m *LlamaModel) WriteGGUF() (string, error) {
 	kv := llm.KV{
 		"general.architecture":                   "llama",
 		"general.name":                           m.Name,
@@ -161,9 +161,16 @@ func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {

 	f, err := os.CreateTemp("", "ollama-gguf")
 	if err != nil {
-		return err
+		return "", err
 	}
 	defer f.Close()

-	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(f, kv, m.Tensors)
+	mod := llm.NewGGUFV3(m.Params.ByteOrder)
+	if err := mod.Encode(f, kv, m.Tensors); err != nil {
+		return "", err
+	}
+
+	slog.Debug(fmt.Sprintf("gguf file = %s", f.Name()))
+
+	return f.Name(), nil
 }
--- a/convert/mistral.go
+++ b/convert/mistral.go
@@ -132,7 +132,7 @@ func (m *MistralModel) LoadVocab() error {
 	return nil
 }

-func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
+func (m *MistralModel) WriteGGUF() (string, error) {
 	kv := llm.KV{
 		"general.architecture":                   "llama",
 		"general.name":                           m.Name,
@@ -158,5 +158,16 @@ func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
 		"tokenizer.ggml.unknown_token_id": uint32(0),
 	}

-	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
+	f, err := os.CreateTemp("", "ollama-gguf")
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+
+	mod := llm.NewGGUFV3(m.Params.ByteOrder)
+	if err := mod.Encode(f, kv, m.Tensors); err != nil {
+		return "", err
+	}
+
+	return f.Name(), nil
 }
--- a/convert/mixtral.go
+++ b/convert/mixtral.go
@@ -1,7 +1,7 @@
 package convert

 import (
-	"io"
+	"os"
 	"regexp"

 	"github.com/ollama/ollama/llm"
@@ -47,7 +47,7 @@ func (m *MixtralModel) LoadVocab() error {
 	return nil
 }

-func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error {
+func (m *MixtralModel) WriteGGUF() (string, error) {
 	kv := llm.KV{
 		"general.architecture":          "llama",
 		"general.name":                  m.Name,
@@ -81,5 +81,16 @@ func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error {
 		"tokenizer.ggml.add_eos_token":    false,
 	}

-	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
+	f, err := os.CreateTemp("", "ollama-gguf")
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+
+	mod := llm.NewGGUFV3(m.Params.ByteOrder)
+	if err := mod.Encode(f, kv, m.Tensors); err != nil {
+		return "", err
+	}
+
+	return f.Name(), nil
 }
--- a/docs/api.md
+++ b/docs/api.md
@@ -17,7 +17,7 @@

 ### Model names

-Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
+Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama2:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.

 ### Durations

@@ -66,7 +66,7 @@ Enable JSON mode by setting the `format` parameter to `json`. This will structur

 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3",
+  "model": "llama2",
  "prompt": "Why is the sky blue?"
 }'
 ```
@@ -77,7 +77,7 @@ A stream of JSON objects is returned:

 ```json
 {
-  "model": "llama3",
+  "model": "llama2",
  "created_at": "2023-08-04T08:52:19.385406455-07:00",
  "response": "The",
  "done": false
@@ -99,7 +99,7 @@ To calculate how fast the response is generated in tokens per second (token/s),

 ```json
 {
-  "model": "llama3",
+  "model": "llama2",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "response": "",
  "done": true,
@@ -121,7 +121,7 @@ A response can be received in one reply when streaming is off.

 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3",
+  "model": "llama2",
  "prompt": "Why is the sky blue?",
  "stream": false
 }'
@@ -133,7 +133,7 @@ If `stream` is set to `false`, the response will be a single JSON object:

 ```json
 {
-  "model": "llama3",
+  "model": "llama2",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "response": "The sky is blue because it is the color of the sky.",
  "done": true,
@@ -155,7 +155,7 @@ If `stream` is set to `false`, the response will be a single JSON object:

 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3",
+  "model": "llama2",
  "prompt": "What color is the sky at different times of the day? Respond using JSON",
  "format": "json",
  "stream": false
@@ -166,7 +166,7 @@ curl http://localhost:11434/api/generate -d '{

 ```json
 {
-  "model": "llama3",
+  "model": "llama2",
  "created_at": "2023-11-09T21:07:55.186497Z",
  "response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
  "done": true,
@@ -289,7 +289,7 @@ If you want to set custom options for the model at runtime rather than in the Mo

 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3",
+  "model": "llama2",
  "prompt": "Why is the sky blue?",
  "stream": false,
  "options": {
@@ -332,7 +332,7 @@ curl http://localhost:11434/api/generate -d '{

 ```json
 {
-  "model": "llama3",
+  "model": "llama2",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "response": "The sky is blue because it is the color of the sky.",
  "done": true,
@@ -354,7 +354,7 @@ If an empty prompt is provided, the model will be loaded into memory.

 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3"
+  "model": "llama2"
 }'
 ```

@@ -364,7 +364,7 @@ A single JSON object is returned:

 ```json
 {
-  "model": "llama3",
+  "model": "llama2",
  "created_at": "2023-12-18T19:52:07.071755Z",
  "response": "",
  "done": true
@@ -407,7 +407,7 @@ Send a chat message with a streaming response.

 ```shell
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3",
+  "model": "llama2",
  "messages": [
    {
      "role": "user",
@@ -423,7 +423,7 @@ A stream of JSON objects is returned:

 ```json
 {
-  "model": "llama3",
+  "model": "llama2",
  "created_at": "2023-08-04T08:52:19.385406455-07:00",
  "message": {
    "role": "assistant",
@@ -438,7 +438,7 @@ Final response:

 ```json
 {
-  "model": "llama3",
+  "model": "llama2",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "done": true,
  "total_duration": 4883583458,
@@ -456,7 +456,7 @@ Final response:

 ```shell
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3",
+  "model": "llama2",
  "messages": [
    {
      "role": "user",
@@ -471,7 +471,7 @@ curl http://localhost:11434/api/chat -d '{

 ```json
 {
-  "model": "registry.ollama.ai/library/llama3:latest",
+  "model": "registry.ollama.ai/library/llama2:latest",
  "created_at": "2023-12-12T14:13:43.416799Z",
  "message": {
    "role": "assistant",
@@ -495,7 +495,7 @@ Send a chat message with a conversation history. You can use this same approach

 ```shell
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3",
+  "model": "llama2",
  "messages": [
    {
      "role": "user",
@@ -519,7 +519,7 @@ A stream of JSON objects is returned:

 ```json
 {
-  "model": "llama3",
+  "model": "llama2",
  "created_at": "2023-08-04T08:52:19.385406455-07:00",
  "message": {
    "role": "assistant",
@@ -533,7 +533,7 @@ Final response:

 ```json
 {
-  "model": "llama3",
+  "model": "llama2",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "done": true,
  "total_duration": 8113331500,
@@ -591,7 +591,7 @@ curl http://localhost:11434/api/chat -d '{

 ```shell
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3",
+  "model": "llama2",
  "messages": [
    {
      "role": "user",
@@ -609,7 +609,7 @@ curl http://localhost:11434/api/chat -d '{

 ```json
 {
-  "model": "registry.ollama.ai/library/llama3:latest",
+  "model": "registry.ollama.ai/library/llama2:latest",
  "created_at": "2023-12-12T14:13:43.416799Z",
  "message": {
    "role": "assistant",
@@ -651,7 +651,7 @@ Create a new model from a `Modelfile`.
 ```shell
 curl http://localhost:11434/api/create -d '{
  "name": "mario",
-  "modelfile": "FROM llama3\nSYSTEM You are mario from Super Mario Bros."
+  "modelfile": "FROM llama2\nSYSTEM You are mario from Super Mario Bros."
 }'
 ```

@@ -758,7 +758,7 @@ A single JSON object will be returned.
      }
    },
    {
-      "name": "llama3:latest",
+      "name": "llama2:latest",
      "modified_at": "2023-12-07T09:32:18.757212583-08:00",
      "size": 3825819519,
      "digest": "fe938a131f40e6f6d40083c9f0f430a515233eb2edaa6d72eb85c50d64f2300e",
@@ -792,7 +792,7 @@ Show information about a model including details, modelfile, template, parameter

 ```shell
 curl http://localhost:11434/api/show -d '{
-  "name": "llama3"
+  "name": "llama2"
 }'
 ```

@@ -827,8 +827,8 @@ Copy a model. Creates a model with another name from an existing model.

 ```shell
 curl http://localhost:11434/api/copy -d '{
-  "source": "llama3",
-  "destination": "llama3-backup"
+  "source": "llama2",
+  "destination": "llama2-backup"
 }'
 ```

@@ -854,7 +854,7 @@ Delete a model and its data.

 ```shell
 curl -X DELETE http://localhost:11434/api/delete -d '{
-  "name": "llama3:13b"
+  "name": "llama2:13b"
 }'
 ```

@@ -882,7 +882,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where

 ```shell
 curl http://localhost:11434/api/pull -d '{
-  "name": "llama3"
+  "name": "llama2"
 }'
 ```

--- a/docs/faq.md
+++ b/docs/faq.md
@@ -32,7 +32,7 @@ When using the API, specify the `num_ctx` parameter:

 ```
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3",
+  "model": "llama2",
  "prompt": "Why is the sky blue?",
  "options": {
    "num_ctx": 4096
@@ -88,9 +88,9 @@ On windows, Ollama inherits your user and system environment variables.

 3. Edit or create New variable(s) for your user account for `OLLAMA_HOST`, `OLLAMA_MODELS`, etc.

-4. Click OK/Apply to save
+4. Click OK/Apply to save 

-5. Run `ollama` from a new terminal window
+5. Run `ollama` from a new terminal window 


 ## How can I expose Ollama on my network?
@@ -221,12 +221,12 @@ The `keep_alive` parameter can be set to:

 For example, to preload a model and leave it in memory use:
 ```shell
-curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": -1}'
+curl http://localhost:11434/api/generate -d '{"model": "llama2", "keep_alive": -1}'
 ```

 To unload the model and free up memory use:
 ```shell
-curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": 0}'
+curl http://localhost:11434/api/generate -d '{"model": "llama2", "keep_alive": 0}'
 ```

 Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable.
--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@@ -10,7 +10,7 @@ A model file is the blueprint to create and share models with Ollama.
 - [Examples](#examples)
 - [Instructions](#instructions)
  - [FROM (Required)](#from-required)
-    - [Build from llama3](#build-from-llama3)
+    - [Build from llama2](#build-from-llama2)
    - [Build from a bin file](#build-from-a-bin-file)
  - [PARAMETER](#parameter)
    - [Valid Parameters and Values](#valid-parameters-and-values)
@@ -48,7 +48,7 @@ INSTRUCTION arguments
 An example of a `Modelfile` creating a mario blueprint:

 ```modelfile
-FROM llama3
+FROM llama2
 # sets the temperature to 1 [higher is more creative, lower is more coherent]
 PARAMETER temperature 1
 # sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
@@ -67,25 +67,33 @@ To use this:

 More examples are available in the [examples directory](../examples).

-To view the Modelfile of a given model, use the `ollama show --modelfile` command.
+### `Modelfile`s in [ollama.com/library][1]
+
+There are two ways to view `Modelfile`s underlying the models in [ollama.com/library][1]:
+
+- Option 1: view a details page from a model's tags page:
+  1.  Go to a particular model's tags (e.g. https://ollama.com/library/llama2/tags)
+  2.  Click on a tag (e.g. https://ollama.com/library/llama2:13b)
+  3.  Scroll down to "Layers"
+      - Note: if the [`FROM` instruction](#from-required) is not present,
+        it means the model was created from a local file
+- Option 2: use `ollama show` to print the `Modelfile` for any local models like so:

  ```bash
-  > ollama show --modelfile llama3
+  > ollama show --modelfile llama2:13b
  # Modelfile generated by "ollama show"
  # To build a new Modelfile based on this one, replace the FROM line with:
-  # FROM llama3:latest
-  FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29
-  TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
+  # FROM llama2:13b

-  {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
+  FROM /root/.ollama/models/blobs/sha256:123abc
+  TEMPLATE """[INST] {{ if .System }}<<SYS>>{{ .System }}<</SYS>>

-  {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
-
-  {{ .Response }}<|eot_id|>"""
-  PARAMETER stop "<|start_header_id|>"
-  PARAMETER stop "<|end_header_id|>"
-  PARAMETER stop "<|eot_id|>"
-  PARAMETER stop "<|reserved_special_token"
+  {{ end }}{{ .Prompt }} [/INST] """
+  SYSTEM """"""
+  PARAMETER stop [INST]
+  PARAMETER stop [/INST]
+  PARAMETER stop <<SYS>>
+  PARAMETER stop <</SYS>>
  ```

 ## Instructions
@@ -98,10 +106,10 @@ The `FROM` instruction defines the base model to use when creating a model.
 FROM <model name>:<tag>
 ```

-#### Build from llama3
+#### Build from llama2

 ```modelfile
-FROM llama3
+FROM llama2
 ```

 A list of available base models:
--- a/docs/openai.md
+++ b/docs/openai.md
@@ -25,7 +25,7 @@ chat_completion = client.chat.completions.create(
            'content': 'Say this is a test',
        }
    ],
-    model='llama3',
+    model='llama2',
 )
 ```

@@ -43,7 +43,7 @@ const openai = new OpenAI({

 const chatCompletion = await openai.chat.completions.create({
  messages: [{ role: 'user', content: 'Say this is a test' }],
-  model: 'llama3',
+  model: 'llama2',
 })
 ```

@@ -53,7 +53,7 @@ const chatCompletion = await openai.chat.completions.create({
 curl http://localhost:11434/v1/chat/completions \
    -H "Content-Type: application/json" \
    -d '{
-        "model": "llama3",
+        "model": "llama2",
        "messages": [
            {
                "role": "system",
@@ -113,7 +113,7 @@ curl http://localhost:11434/v1/chat/completions \
 Before using a model, pull it locally `ollama pull`:

 ```shell
-ollama pull llama3
+ollama pull llama2
 ```

 ### Default model names
@@ -121,7 +121,7 @@ ollama pull llama3
 For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name:

 ```
-ollama cp llama3 gpt-3.5-turbo
+ollama cp llama2 gpt-3.5-turbo
 ```

 Afterwards, this new model name can be specified the `model` field:
--- a/docs/tutorials/langchainjs.md
+++ b/docs/tutorials/langchainjs.md
@@ -15,7 +15,7 @@ import { Ollama } from "langchain/llms/ollama";

 const ollama = new Ollama({
  baseUrl: "http://localhost:11434",
-  model: "llama3",
+  model: "llama2",
 });

 const answer = await ollama.invoke(`why is the sky blue?`);
@@ -23,10 +23,10 @@ const answer = await ollama.invoke(`why is the sky blue?`);
 console.log(answer);
 ```

-That will get us the same thing as if we ran `ollama run llama3 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
+That will get us the same thing as if we ran `ollama run llama2 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.

 ```bash
-npm install cheerio
+npm install cheerio 
 ```

 ```javascript
--- a/docs/windows.md
+++ b/docs/windows.md
@@ -1,4 +1,3 @@
-<<<<<<< HEAD
 # Ollama Windows Preview

 Welcome to the Ollama Windows preview.
@@ -28,7 +27,7 @@ Logs will often be helpful in diagnosing the problem (see

 Here's a quick example showing API access from `powershell`
 ```powershell
-(Invoke-WebRequest -method POST -Body '{"model":"llama3", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
+(Invoke-WebRequest -method POST -Body '{"model":"llama2", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
 ```

 ## Troubleshooting
--- a/examples/bash-comparemodels/README.md
+++ b/examples/bash-comparemodels/README.md
@@ -2,7 +2,7 @@

 When calling `ollama`, you can pass it a file to run all the prompts in the file, one after the other:

-`ollama run llama3 < sourcequestions.txt`
+`ollama run llama2 < sourcequestions.txt`

 This concept is used in the following example.

--- a/examples/go-chat/main.go
+++ b/examples/go-chat/main.go
@@ -35,7 +35,7 @@ func main() {

 	ctx := context.Background()
 	req := &api.ChatRequest{
-		Model:    "llama3",
+		Model:    "llama2",
 		Messages: messages,
 	}

--- a/examples/langchain-python-rag-document/main.py
+++ b/examples/langchain-python-rag-document/main.py
@@ -40,9 +40,9 @@ while True:
        continue

    # Prompt
-    template = """Use the following pieces of context to answer the question at the end.
-    If you don't know the answer, just say that you don't know, don't try to make up an answer.
-    Use three sentences maximum and keep the answer as concise as possible.
+    template = """Use the following pieces of context to answer the question at the end. 
+    If you don't know the answer, just say that you don't know, don't try to make up an answer. 
+    Use three sentences maximum and keep the answer as concise as possible. 
    {context}
    Question: {question}
    Helpful Answer:"""
@@ -51,11 +51,11 @@ while True:
        template=template,
    )

-    llm = Ollama(model="llama3:8b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
+    llm = Ollama(model="llama2:13b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=vectorstore.as_retriever(),
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    )

-    result = qa_chain({"query": query})
+    result = qa_chain({"query": query})
--- a/examples/langchain-python-simple/README.md
+++ b/examples/langchain-python-simple/README.md
@@ -4,10 +4,10 @@ This example is a basic "hello world" of using LangChain with Ollama.

 ## Running the Example

-1. Ensure you have the `llama3` model installed:
+1. Ensure you have the `llama2` model installed:

   ```bash
-   ollama pull llama3
+   ollama pull llama2
   ```

 2. Install the Python Requirements.
@@ -21,3 +21,4 @@ This example is a basic "hello world" of using LangChain with Ollama.
   ```bash
   python main.py
   ```
+  
--- a/examples/langchain-python-simple/main.py
+++ b/examples/langchain-python-simple/main.py
@@ -1,6 +1,6 @@
 from langchain.llms import Ollama

 input = input("What is your question?")
-llm = Ollama(model="llama3")
+llm = Ollama(model="llama2")
 res = llm.predict(input)
 print (res)
--- a/examples/modelfile-mario/Modelfile
+++ b/examples/modelfile-mario/Modelfile
@@ -1,4 +1,4 @@
-FROM llama3
+FROM llama2
 PARAMETER temperature 1
 SYSTEM """
 You are Mario from super mario bros, acting as an assistant.
--- a/examples/modelfile-mario/readme.md
+++ b/examples/modelfile-mario/readme.md
@@ -2,12 +2,12 @@

 # Example character: Mario

-This example shows how to create a basic character using Llama3 as the base model.
+This example shows how to create a basic character using Llama2 as the base model.

 To run this example:

 1. Download the Modelfile
-2. `ollama pull llama3` to get the base model used in the model file.
+2. `ollama pull llama2` to get the base model used in the model file.
 3. `ollama create NAME -f ./Modelfile`
 4. `ollama run NAME`

@@ -18,7 +18,7 @@ Ask it some questions like "Who are you?" or "Is Peach in trouble again?"
 What the model file looks like:

 ```
-FROM llama3
+FROM llama2
 PARAMETER temperature 1
 SYSTEM """
 You are Mario from Super Mario Bros, acting as an assistant.
--- a/examples/python-json-datagenerator/predefinedschema.py
+++ b/examples/python-json-datagenerator/predefinedschema.py
@@ -2,16 +2,16 @@ import requests
 import json
 import random

-model = "llama3"
+model = "llama2"
 template = {
-  "firstName": "",
-  "lastName": "",
+  "firstName": "", 
+  "lastName": "", 
  "address": {
-    "street": "",
-    "city": "",
-    "state": "",
+    "street": "", 
+    "city": "", 
+    "state": "", 
    "zipCode": ""
-  },
+  }, 
  "phoneNumber": ""
 }

--- a/examples/python-json-datagenerator/randomaddresses.py
+++ b/examples/python-json-datagenerator/randomaddresses.py
@@ -12,7 +12,7 @@ countries = [
    "France",
 ]
 country = random.choice(countries)
-model = "llama3"
+model = "llama2"

 prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."

--- a/examples/python-json-datagenerator/readme.md
+++ b/examples/python-json-datagenerator/readme.md
@@ -6,10 +6,10 @@ There are two python scripts in this example. `randomaddresses.py` generates ran

 ## Running the Example

-1. Ensure you have the `llama3` model installed:
+1. Ensure you have the `llama2` model installed:

   ```bash
-   ollama pull llama3
+   ollama pull llama2
   ```

 2. Install the Python Requirements.
--- a/examples/python-simplechat/client.py
+++ b/examples/python-simplechat/client.py
@@ -2,7 +2,7 @@ import json
 import requests

 # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
-model = "llama3"  # TODO: update this for whatever model you wish to use
+model = "llama2"  # TODO: update this for whatever model you wish to use


 def chat(messages):
--- a/examples/python-simplechat/readme.md
+++ b/examples/python-simplechat/readme.md
@@ -4,10 +4,10 @@ The **chat** endpoint is one of two ways to generate text from an LLM with Ollam

 ## Running the Example

-1. Ensure you have the `llama3` model installed:
+1. Ensure you have the `llama2` model installed:

   ```bash
-   ollama pull llama3
+   ollama pull llama2
   ```

 2. Install the Python Requirements.
--- a/examples/typescript-mentors/README.md
+++ b/examples/typescript-mentors/README.md
@@ -4,10 +4,10 @@ This example demonstrates how one would create a set of 'mentors' you can have a

 ## Usage

-1. Add llama3 to have the mentors ask your questions:
+1. Add llama2 to have the mentors ask your questions:

   ```bash
-   ollama pull llama3
+   ollama pull llama2
   ```

 2. Install prerequisites:
--- a/examples/typescript-mentors/character-generator.ts
+++ b/examples/typescript-mentors/character-generator.ts
@@ -15,7 +15,7 @@ async function characterGenerator() {
  ollama.setModel("stablebeluga2:70b-q4_K_M");
  const bio = await ollama.generate(`create a bio of ${character} in a single long paragraph. Instead of saying '${character} is...' or '${character} was...' use language like 'You are...' or 'You were...'. Then create a paragraph describing the speaking mannerisms and style of ${character}. Don't include anything about how ${character} looked or what they sounded like, just focus on the words they said. Instead of saying '${character} would say...' use language like 'You should say...'. If you use quotes, always use single quotes instead of double quotes. If there are any specific words or phrases you used a lot, show how you used them. `);

-  const thecontents = `FROM llama3\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`;
+  const thecontents = `FROM llama2\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`;

  fs.writeFile(path.join(directory, 'Modelfile'), thecontents, (err: any) => {
    if (err) throw err;
@@ -23,4 +23,4 @@ async function characterGenerator() {
  });
 }

-characterGenerator();
+characterGenerator();
--- a/examples/typescript-simplechat/client.ts
+++ b/examples/typescript-simplechat/client.ts
@@ -1,6 +1,6 @@
 import * as readline from "readline";

-const model = "llama3";
+const model = "llama2";
 type Message = {
  role: "assistant" | "user" | "system";
  content: string;
@@ -74,4 +74,4 @@ async function main() {

 }

-main();
+main();
--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -107,7 +107,7 @@ func startServer(ctx context.Context, ollamaHost string) error {

 	if tmp := os.Getenv("OLLAMA_HOST"); tmp != ollamaHost {
 		slog.Info("setting env", "OLLAMA_HOST", ollamaHost)
-		t.Setenv("OLLAMA_HOST", ollamaHost)
+		os.Setenv("OLLAMA_HOST", ollamaHost)
 	}

 	slog.Info("starting server", "url", ollamaHost)
--- a/llm/filetype.go
+++ b/llm/filetype.go
@@ -1,165 +0,0 @@
-package llm
-
-import "fmt"
-
-type fileType uint32
-
-const (
-	fileTypeF32 fileType = iota
-	fileTypeF16
-	fileTypeQ4_0
-	fileTypeQ4_1
-	fileTypeQ4_1_F16
-	fileTypeQ4_2 // unused
-	fileTypeQ4_3 // unused
-	fileTypeQ8_0
-	fileTypeQ5_0
-	fileTypeQ5_1
-	fileTypeQ2_K
-	fileTypeQ3_K_S
-	fileTypeQ3_K_M
-	fileTypeQ3_K_L
-	fileTypeQ4_K_S
-	fileTypeQ4_K_M
-	fileTypeQ5_K_S
-	fileTypeQ5_K_M
-	fileTypeQ6_K
-	fileTypeIQ2_XXS
-	fileTypeIQ2_XS
-	fileTypeQ2_K_S
-	fileTypeQ3_K_XS
-	fileTypeIQ3_XXS
-	fileTypeIQ1_S
-	fileTypeIQ4_NL
-	fileTypeIQ3_S
-	fileTypeIQ2_S
-	fileTypeIQ4_XS
-
-	fileTypeUnknown
-)
-
-func ParseFileType(s string) (fileType, error) {
-	switch s {
-	case "F32":
-		return fileTypeF32, nil
-	case "F16":
-		return fileTypeF16, nil
-	case "Q4_0":
-		return fileTypeQ4_0, nil
-	case "Q4_1":
-		return fileTypeQ4_1, nil
-	case "Q4_1_F16":
-		return fileTypeQ4_1_F16, nil
-	case "Q8_0":
-		return fileTypeQ8_0, nil
-	case "Q5_0":
-		return fileTypeQ5_0, nil
-	case "Q5_1":
-		return fileTypeQ5_1, nil
-	case "Q2_K":
-		return fileTypeQ2_K, nil
-	case "Q3_K_S":
-		return fileTypeQ3_K_S, nil
-	case "Q3_K_M":
-		return fileTypeQ3_K_M, nil
-	case "Q3_K_L":
-		return fileTypeQ3_K_L, nil
-	case "Q4_K_S":
-		return fileTypeQ4_K_S, nil
-	case "Q4_K_M":
-		return fileTypeQ4_K_M, nil
-	case "Q5_K_S":
-		return fileTypeQ5_K_S, nil
-	case "Q5_K_M":
-		return fileTypeQ5_K_M, nil
-	case "Q6_K":
-		return fileTypeQ6_K, nil
-	case "IQ2_XXS":
-		return fileTypeIQ2_XXS, nil
-	case "IQ2_XS":
-		return fileTypeIQ2_XS, nil
-	case "Q2_K_S":
-		return fileTypeQ2_K_S, nil
-	case "Q3_K_XS":
-		return fileTypeQ3_K_XS, nil
-	case "IQ1_S":
-		return fileTypeIQ1_S, nil
-	case "IQ4_NL":
-		return fileTypeIQ4_NL, nil
-	case "IQ3_S":
-		return fileTypeIQ3_S, nil
-	case "IQ2_S":
-		return fileTypeIQ2_S, nil
-	case "IQ4_XS":
-		return fileTypeIQ4_XS, nil
-	case "IQ3_XXS":
-		return fileTypeIQ3_XXS, nil
-	default:
-		return fileTypeUnknown, fmt.Errorf("unknown fileType: %s", s)
-	}
-}
-
-func (t fileType) String() string {
-	switch t {
-	case fileTypeF32:
-		return "F32"
-	case fileTypeF16:
-		return "F16"
-	case fileTypeQ4_0:
-		return "Q4_0"
-	case fileTypeQ4_1:
-		return "Q4_1"
-	case fileTypeQ4_1_F16:
-		return "Q4_1_F16"
-	case fileTypeQ8_0:
-		return "Q8_0"
-	case fileTypeQ5_0:
-		return "Q5_0"
-	case fileTypeQ5_1:
-		return "Q5_1"
-	case fileTypeQ2_K:
-		return "Q2_K"
-	case fileTypeQ3_K_S:
-		return "Q3_K_S"
-	case fileTypeQ3_K_M:
-		return "Q3_K_M"
-	case fileTypeQ3_K_L:
-		return "Q3_K_L"
-	case fileTypeQ4_K_S:
-		return "Q4_K_S"
-	case fileTypeQ4_K_M:
-		return "Q4_K_M"
-	case fileTypeQ5_K_S:
-		return "Q5_K_S"
-	case fileTypeQ5_K_M:
-		return "Q5_K_M"
-	case fileTypeQ6_K:
-		return "Q6_K"
-	case fileTypeIQ2_XXS:
-		return "IQ2_XXS"
-	case fileTypeIQ2_XS:
-		return "IQ2_XS"
-	case fileTypeQ2_K_S:
-		return "Q2_K_S"
-	case fileTypeQ3_K_XS:
-		return "Q3_K_XS"
-	case fileTypeIQ1_S:
-		return "IQ1_S"
-	case fileTypeIQ4_NL:
-		return "IQ4_NL"
-	case fileTypeIQ3_S:
-		return "IQ3_S"
-	case fileTypeIQ2_S:
-		return "IQ2_S"
-	case fileTypeIQ4_XS:
-		return "IQ4_XS"
-	case fileTypeIQ3_XXS:
-		return "IQ3_XXS"
-	default:
-		return "unknown"
-	}
-}
-
-func (t fileType) Value() uint32 {
-	return uint32(t)
-}
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -13,6 +13,82 @@ type GGML struct {
 	model
 }

+const (
+	fileTypeF32 uint32 = iota
+	fileTypeF16
+	fileTypeQ4_0
+	fileTypeQ4_1
+	fileTypeQ4_1_F16
+	fileTypeQ8_0 uint32 = iota + 2
+	fileTypeQ5_0
+	fileTypeQ5_1
+	fileTypeQ2_K
+	fileTypeQ3_K_S
+	fileTypeQ3_K_M
+	fileTypeQ3_K_L
+	fileTypeQ4_K_S
+	fileTypeQ4_K_M
+	fileTypeQ5_K_S
+	fileTypeQ5_K_M
+	fileTypeQ6_K
+	fileTypeIQ2_XXS
+	fileTypeIQ2_XS
+	fileTypeQ2_K_S
+	fileTypeQ3_K_XS
+	fileTypeIQ3_XXS
+)
+
+func fileType(fileType uint32) string {
+	switch fileType {
+	case fileTypeF32:
+		return "F32"
+	case fileTypeF16:
+		return "F16"
+	case fileTypeQ4_0:
+		return "Q4_0"
+	case fileTypeQ4_1:
+		return "Q4_1"
+	case fileTypeQ4_1_F16:
+		return "Q4_1_F16"
+	case fileTypeQ8_0:
+		return "Q8_0"
+	case fileTypeQ5_0:
+		return "Q5_0"
+	case fileTypeQ5_1:
+		return "Q5_1"
+	case fileTypeQ2_K:
+		return "Q2_K"
+	case fileTypeQ3_K_S:
+		return "Q3_K_S"
+	case fileTypeQ3_K_M:
+		return "Q3_K_M"
+	case fileTypeQ3_K_L:
+		return "Q3_K_L"
+	case fileTypeQ4_K_S:
+		return "Q4_K_S"
+	case fileTypeQ4_K_M:
+		return "Q4_K_M"
+	case fileTypeQ5_K_S:
+		return "Q5_K_S"
+	case fileTypeQ5_K_M:
+		return "Q5_K_M"
+	case fileTypeQ6_K:
+		return "Q6_K"
+	case fileTypeIQ2_XXS:
+		return "IQ2_XXS"
+	case fileTypeIQ2_XS:
+		return "IQ2_XS"
+	case fileTypeQ2_K_S:
+		return "Q2_K_S"
+	case fileTypeQ3_K_XS:
+		return "Q3_K_XS"
+	case fileTypeIQ3_XXS:
+		return "IQ3_XXS"
+	default:
+		return "unknown"
+	}
+}
+
 type model interface {
 	KV() KV
 	Tensors() Tensors
@@ -47,7 +123,7 @@ func (kv KV) ParameterCount() uint64 {

 func (kv KV) FileType() string {
 	if u64 := kv.u64("general.file_type"); u64 > 0 {
-		return fileType(uint32(u64)).String()
+		return fileType(uint32(u64))
 	}

 	return "unknown"
@@ -125,9 +201,9 @@ type Tensor struct {

 func (t Tensor) blockSize() uint64 {
 	switch {
-	case t.Kind < 2 || (t.Kind > 23 && t.Kind < 29):
+	case t.Kind < 2:
 		return 1
-	case t.Kind < 10 || t.Kind == 20:
+	case t.Kind < 10:
 		return 32
 	default:
 		return 256
@@ -172,16 +248,6 @@ func (t Tensor) typeSize() uint64 {
 		return 2 + 2*blockSize/8 + blockSize/32
 	case 18: // IQ3_XXS
 		return 2 + 3*blockSize/8
-	case 19: // IQ1_S
-		return 2 + blockSize/8 + blockSize/16
-	case 20: // IQ4_NL
-		return 2 + blockSize/2
-	case 21: // IQ3_S
-		return 2 + 2*blockSize/8 + blockSize/8 + blockSize/32 + 4
-	case 22: // IQ2_S
-		return 2 + blockSize/4 + blockSize/16
-	case 23: // IQ4_XS
-		return 4 + blockSize/2 + blockSize/64
 	default:
 		return 0
 	}
@@ -220,23 +286,6 @@ const (

 var ErrUnsupportedFormat = errors.New("unsupported model format")

-func DetectGGMLType(b []byte) string {
-	switch binary.LittleEndian.Uint32(b[:4]) {
-	case FILE_MAGIC_GGML:
-		return "ggml"
-	case FILE_MAGIC_GGMF:
-		return "ggmf"
-	case FILE_MAGIC_GGJT:
-		return "ggjt"
-	case FILE_MAGIC_GGLA:
-		return "ggla"
-	case FILE_MAGIC_GGUF_LE, FILE_MAGIC_GGUF_BE:
-		return "gguf"
-	default:
-		return ""
-	}
-}
-
 func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error) {
 	var magic uint32
 	if err := binary.Read(rs, binary.LittleEndian, &magic); err != nil {
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -20,7 +20,7 @@ func SystemInfo() string {
 	return C.GoString(C.llama_print_system_info())
 }

-func Quantize(infile, outfile string, ftype fileType) error {
+func Quantize(infile, outfile, filetype string) error {
 	cinfile := C.CString(infile)
 	defer C.free(unsafe.Pointer(cinfile))

@@ -29,10 +29,58 @@ func Quantize(infile, outfile string, ftype fileType) error {

 	params := C.llama_model_quantize_default_params()
 	params.nthread = -1
-	params.ftype = ftype.Value()

-	if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 {
-		return fmt.Errorf("llama_model_quantize: %d", rc)
+	switch filetype {
+	case "F32":
+		params.ftype = fileTypeF32
+	case "F16":
+		params.ftype = fileTypeF16
+	case "Q4_0":
+		params.ftype = fileTypeQ4_0
+	case "Q4_1":
+		params.ftype = fileTypeQ4_1
+	case "Q4_1_F16":
+		params.ftype = fileTypeQ4_1_F16
+	case "Q8_0":
+		params.ftype = fileTypeQ8_0
+	case "Q5_0":
+		params.ftype = fileTypeQ5_0
+	case "Q5_1":
+		params.ftype = fileTypeQ5_1
+	case "Q2_K":
+		params.ftype = fileTypeQ2_K
+	case "Q3_K_S":
+		params.ftype = fileTypeQ3_K_S
+	case "Q3_K_M":
+		params.ftype = fileTypeQ3_K_M
+	case "Q3_K_L":
+		params.ftype = fileTypeQ3_K_L
+	case "Q4_K_S":
+		params.ftype = fileTypeQ4_K_S
+	case "Q4_K_M":
+		params.ftype = fileTypeQ4_K_M
+	case "Q5_K_S":
+		params.ftype = fileTypeQ5_K_S
+	case "Q5_K_M":
+		params.ftype = fileTypeQ5_K_M
+	case "Q6_K":
+		params.ftype = fileTypeQ6_K
+	case "IQ2_XXS":
+		params.ftype = fileTypeIQ2_XXS
+	case "IQ2_XS":
+		params.ftype = fileTypeIQ2_XS
+	case "Q2_K_S":
+		params.ftype = fileTypeQ2_K_S
+	case "Q3_K_XS":
+		params.ftype = fileTypeQ3_K_XS
+	case "IQ3_XXS":
+		params.ftype = fileTypeIQ3_XXS
+	default:
+		return fmt.Errorf("unknown filetype: %s", filetype)
+	}
+
+	if retval := C.llama_model_quantize(cinfile, coutfile, &params); retval != 0 {
+		return fmt.Errorf("llama_model_quantize: %d", retval)
 	}

 	return nil
--- a/types/model/file.go
+++ b/types/model/file.go
@@ -1,4 +1,4 @@
-package model
+package parser

 import (
 	"bufio"
@@ -10,41 +10,11 @@ import (
 	"strings"
 )

-type File struct {
-	Commands []Command
-}
-
-func (f File) String() string {
-	var sb strings.Builder
-	for _, cmd := range f.Commands {
-		fmt.Fprintln(&sb, cmd.String())
-	}
-
-	return sb.String()
-}
-
 type Command struct {
 	Name string
 	Args string
 }

-func (c Command) String() string {
-	var sb strings.Builder
-	switch c.Name {
-	case "model":
-		fmt.Fprintf(&sb, "FROM %s", c.Args)
-	case "license", "template", "system", "adapter":
-		fmt.Fprintf(&sb, "%s %s", strings.ToUpper(c.Name), quote(c.Args))
-	case "message":
-		role, message, _ := strings.Cut(c.Args, ": ")
-		fmt.Fprintf(&sb, "MESSAGE %s %s", role, quote(message))
-	default:
-		fmt.Fprintf(&sb, "PARAMETER %s %s", c.Name, quote(c.Args))
-	}
-
-	return sb.String()
-}
-
 type state int

 const (
@@ -62,14 +32,38 @@ var (
 	errInvalidCommand     = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
 )

-func ParseFile(r io.Reader) (*File, error) {
+func Format(cmds []Command) string {
+	var sb strings.Builder
+	for _, cmd := range cmds {
+		name := cmd.Name
+		args := cmd.Args
+
+		switch cmd.Name {
+		case "model":
+			name = "from"
+			args = cmd.Args
+		case "license", "template", "system", "adapter":
+			args = quote(args)
+		case "message":
+			role, message, _ := strings.Cut(cmd.Args, ": ")
+			args = role + " " + quote(message)
+		default:
+			name = "parameter"
+			args = cmd.Name + " " + quote(cmd.Args)
+		}
+
+		fmt.Fprintln(&sb, strings.ToUpper(name), args)
+	}
+
+	return sb.String()
+}
+
+func Parse(r io.Reader) (cmds []Command, err error) {
 	var cmd Command
 	var curr state
 	var b bytes.Buffer
 	var role string

-	var f File
-
 	br := bufio.NewReader(r)
 	for {
 		r, _, err := br.ReadRune()
@@ -134,7 +128,7 @@ func ParseFile(r io.Reader) (*File, error) {
 				}

 				cmd.Args = s
-				f.Commands = append(f.Commands, cmd)
+				cmds = append(cmds, cmd)
 			}

 			b.Reset()
@@ -163,14 +157,14 @@ func ParseFile(r io.Reader) (*File, error) {
 		}

 		cmd.Args = s
-		f.Commands = append(f.Commands, cmd)
+		cmds = append(cmds, cmd)
 	default:
 		return nil, io.ErrUnexpectedEOF
 	}

-	for _, cmd := range f.Commands {
+	for _, cmd := range cmds {
 		if cmd.Name == "model" {
-			return &f, nil
+			return cmds, nil
 		}
 	}

--- a/types/model/file_test.go
+++ b/types/model/file_test.go
@@ -1,4 +1,4 @@
-package model
+package parser

 import (
 	"bytes"
@@ -10,7 +10,7 @@ import (
 	"github.com/stretchr/testify/assert"
 )

-func TestParseFileFile(t *testing.T) {
+func TestParser(t *testing.T) {
 	input := `
 FROM model1
 ADAPTER adapter1
@@ -22,8 +22,8 @@ TEMPLATE template1

 	reader := strings.NewReader(input)

-	modelfile, err := ParseFile(reader)
-	assert.NoError(t, err)
+	commands, err := Parse(reader)
+	assert.Nil(t, err)

 	expectedCommands := []Command{
 		{Name: "model", Args: "model1"},
@@ -34,10 +34,10 @@ TEMPLATE template1
 		{Name: "template", Args: "template1"},
 	}

-	assert.Equal(t, expectedCommands, modelfile.Commands)
+	assert.Equal(t, expectedCommands, commands)
 }

-func TestParseFileFrom(t *testing.T) {
+func TestParserFrom(t *testing.T) {
 	var cases = []struct {
 		input    string
 		expected []Command
@@ -85,16 +85,14 @@ func TestParseFileFrom(t *testing.T) {

 	for _, c := range cases {
 		t.Run("", func(t *testing.T) {
-			modelfile, err := ParseFile(strings.NewReader(c.input))
+			commands, err := Parse(strings.NewReader(c.input))
 			assert.ErrorIs(t, err, c.err)
-			if modelfile != nil {
-				assert.Equal(t, c.expected, modelfile.Commands)
-			}
+			assert.Equal(t, c.expected, commands)
 		})
 	}
 }

-func TestParseFileParametersMissingValue(t *testing.T) {
+func TestParserParametersMissingValue(t *testing.T) {
 	input := `
 FROM foo
 PARAMETER param1
@@ -102,21 +100,21 @@ PARAMETER param1

 	reader := strings.NewReader(input)

-	_, err := ParseFile(reader)
+	_, err := Parse(reader)
 	assert.ErrorIs(t, err, io.ErrUnexpectedEOF)
 }

-func TestParseFileBadCommand(t *testing.T) {
+func TestParserBadCommand(t *testing.T) {
 	input := `
 FROM foo
 BADCOMMAND param1 value1
 `
-	_, err := ParseFile(strings.NewReader(input))
+	_, err := Parse(strings.NewReader(input))
 	assert.ErrorIs(t, err, errInvalidCommand)

 }

-func TestParseFileMessages(t *testing.T) {
+func TestParserMessages(t *testing.T) {
 	var cases = []struct {
 		input    string
 		expected []Command
@@ -125,34 +123,34 @@ func TestParseFileMessages(t *testing.T) {
 		{
 			`
 FROM foo
-MESSAGE system You are a file parser. Always parse things.
+MESSAGE system You are a Parser. Always Parse things.
 `,
 			[]Command{
 				{Name: "model", Args: "foo"},
-				{Name: "message", Args: "system: You are a file parser. Always parse things."},
+				{Name: "message", Args: "system: You are a Parser. Always Parse things."},
 			},
 			nil,
 		},
 		{
 			`
 FROM foo
-MESSAGE system You are a file parser. Always parse things.`,
+MESSAGE system You are a Parser. Always Parse things.`,
 			[]Command{
 				{Name: "model", Args: "foo"},
-				{Name: "message", Args: "system: You are a file parser. Always parse things."},
+				{Name: "message", Args: "system: You are a Parser. Always Parse things."},
 			},
 			nil,
 		},
 		{
 			`
 FROM foo
-MESSAGE system You are a file parser. Always parse things.
+MESSAGE system You are a Parser. Always Parse things.
 MESSAGE user Hey there!
 MESSAGE assistant Hello, I want to parse all the things!
 `,
 			[]Command{
 				{Name: "model", Args: "foo"},
-				{Name: "message", Args: "system: You are a file parser. Always parse things."},
+				{Name: "message", Args: "system: You are a Parser. Always Parse things."},
 				{Name: "message", Args: "user: Hey there!"},
 				{Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
 			},
@@ -162,12 +160,12 @@ MESSAGE assistant Hello, I want to parse all the things!
 			`
 FROM foo
 MESSAGE system """
-You are a multiline file parser. Always parse things.
+You are a multiline Parser. Always Parse things.
 """
 			`,
 			[]Command{
 				{Name: "model", Args: "foo"},
-				{Name: "message", Args: "system: \nYou are a multiline file parser. Always parse things.\n"},
+				{Name: "message", Args: "system: \nYou are a multiline Parser. Always Parse things.\n"},
 			},
 			nil,
 		},
@@ -198,16 +196,14 @@ MESSAGE system`,

 	for _, c := range cases {
 		t.Run("", func(t *testing.T) {
-			modelfile, err := ParseFile(strings.NewReader(c.input))
+			commands, err := Parse(strings.NewReader(c.input))
 			assert.ErrorIs(t, err, c.err)
-			if modelfile != nil {
-				assert.Equal(t, c.expected, modelfile.Commands)
-			}
+			assert.Equal(t, c.expected, commands)
 		})
 	}
 }

-func TestParseFileQuoted(t *testing.T) {
+func TestParserQuoted(t *testing.T) {
 	var cases = []struct {
 		multiline string
 		expected  []Command
@@ -352,16 +348,14 @@ TEMPLATE """

 	for _, c := range cases {
 		t.Run("", func(t *testing.T) {
-			modelfile, err := ParseFile(strings.NewReader(c.multiline))
+			commands, err := Parse(strings.NewReader(c.multiline))
 			assert.ErrorIs(t, err, c.err)
-			if modelfile != nil {
-				assert.Equal(t, c.expected, modelfile.Commands)
-			}
+			assert.Equal(t, c.expected, commands)
 		})
 	}
 }

-func TestParseFileParameters(t *testing.T) {
+func TestParserParameters(t *testing.T) {
 	var cases = map[string]struct {
 		name, value string
 	}{
@@ -410,18 +404,18 @@ func TestParseFileParameters(t *testing.T) {
 			var b bytes.Buffer
 			fmt.Fprintln(&b, "FROM foo")
 			fmt.Fprintln(&b, "PARAMETER", k)
-			modelfile, err := ParseFile(&b)
-			assert.NoError(t, err)
+			commands, err := Parse(&b)
+			assert.Nil(t, err)

 			assert.Equal(t, []Command{
 				{Name: "model", Args: "foo"},
 				{Name: v.name, Args: v.value},
-			}, modelfile.Commands)
+			}, commands)
 		})
 	}
 }

-func TestParseFileComments(t *testing.T) {
+func TestParserComments(t *testing.T) {
 	var cases = []struct {
 		input    string
 		expected []Command
@@ -439,14 +433,14 @@ FROM foo

 	for _, c := range cases {
 		t.Run("", func(t *testing.T) {
-			modelfile, err := ParseFile(strings.NewReader(c.input))
-			assert.NoError(t, err)
-			assert.Equal(t, c.expected, modelfile.Commands)
+			commands, err := Parse(strings.NewReader(c.input))
+			assert.Nil(t, err)
+			assert.Equal(t, c.expected, commands)
 		})
 	}
 }

-func TestParseFileFormatParseFile(t *testing.T) {
+func TestParseFormatParse(t *testing.T) {
 	var cases = []string{
 		`
 FROM foo
@@ -455,7 +449,7 @@ LICENSE MIT
 PARAMETER param1 value1
 PARAMETER param2 value2
 TEMPLATE template1
-MESSAGE system You are a file parser. Always parse things.
+MESSAGE system You are a Parser. Always Parse things.
 MESSAGE user Hey there!
 MESSAGE assistant Hello, I want to parse all the things!
 `,
@@ -494,13 +488,13 @@ MESSAGE assistant Hello, I want to parse all the things!

 	for _, c := range cases {
 		t.Run("", func(t *testing.T) {
-			modelfile, err := ParseFile(strings.NewReader(c))
+			commands, err := Parse(strings.NewReader(c))
 			assert.NoError(t, err)

-			modelfile2, err := ParseFile(strings.NewReader(modelfile.String()))
+			commands2, err := Parse(strings.NewReader(Format(commands)))
 			assert.NoError(t, err)

-			assert.Equal(t, modelfile, modelfile2)
+			assert.Equal(t, commands, commands2)
 		})
 	}

--- a/server/fixblobs.go
+++ b/server/fixblobs.go
@@ -3,6 +3,7 @@ package server
 import (
 	"os"
 	"path/filepath"
+	"runtime"
 	"strings"
 )

@@ -24,3 +25,44 @@ func fixBlobs(dir string) error {
 		return nil
 	})
 }
+
+// fixManifests walks the provided dir and replaces (":") to ("%") for all
+// manifest files on non-Windows systems.
+func fixManifests(dir string) error {
+	if runtime.GOOS == "windows" {
+		return nil
+	}
+	return filepath.Walk(dir, func(oldPath string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+		if info.IsDir() {
+			return nil
+		}
+
+		var partNum int
+		newPath := []byte(oldPath)
+		for i := len(newPath) - 1; i >= 0; i-- {
+			if partNum > 3 {
+				break
+			}
+			if partNum == 3 {
+				if newPath[i] == ':' {
+					newPath[i] = '%'
+					break
+				}
+				continue
+			}
+			if newPath[i] == '/' {
+				partNum++
+			}
+		}
+
+		newDir, _ := filepath.Split(string(newPath))
+		if err := os.MkdirAll(newDir, 0o755); err != nil {
+			return err
+		}
+
+		return os.Rename(oldPath, string(newPath))
+	})
+}
--- a/server/fixblobs_test.go
+++ b/server/fixblobs_test.go
@@ -64,6 +64,55 @@ func TestFixBlobs(t *testing.T) {
 	}
 }

+func TestFixManifests(t *testing.T) {
+	cases := []struct {
+		path []string
+		want []string
+	}{
+		{path: []string{}, want: []string{}},
+		{path: []string{"h/n/m/t"}, want: []string{"h/n/m/t"}},
+		{path: []string{"h:p/n/m/t"}, want: []string{"h%p/n/m/t"}},
+		{path: []string{"x:y/h:p/n/m/t"}, want: []string{"x:y/h%p/n/m/t"}},
+	}
+
+	for _, tt := range cases {
+		t.Run(strings.Join(tt.path, "|"), func(t *testing.T) {
+			hasColon := slices.ContainsFunc(tt.path, func(s string) bool { return strings.Contains(s, ":") })
+			if hasColon && runtime.GOOS == "windows" {
+				t.Skip("skipping test on windows")
+			}
+
+			rootDir := t.TempDir()
+			for _, path := range tt.path {
+				fullPath := filepath.Join(rootDir, path)
+				fullDir, _ := filepath.Split(fullPath)
+
+				t.Logf("creating dir %s", fullDir)
+				if err := os.MkdirAll(fullDir, 0o755); err != nil {
+					t.Fatal(err)
+				}
+
+				t.Logf("writing file %s", fullPath)
+				if err := os.WriteFile(fullPath, nil, 0o644); err != nil {
+					t.Fatal(err)
+				}
+			}
+
+			if err := fixManifests(rootDir); err != nil {
+				t.Fatal(err)
+			}
+
+			got := slurpFiles(os.DirFS(rootDir))
+
+			slices.Sort(tt.want)
+			slices.Sort(got)
+			if !slices.Equal(got, tt.want) {
+				t.Fatalf("got = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
 func slurpFiles(fsys fs.FS) []string {
 	var sfs []string
 	fn := func(path string, d fs.DirEntry, err error) error {
--- a/server/images.go
+++ b/server/images.go
@@ -1,8 +1,8 @@
 package server

 import (
+	"archive/zip"
 	"bytes"
-	"cmp"
 	"context"
 	"crypto/sha256"
 	"encoding/base64"
@@ -11,6 +11,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"io/fs"
 	"log"
 	"log/slog"
 	"net/http"
@@ -25,8 +26,10 @@ import (

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/auth"
+	"github.com/ollama/ollama/convert"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/llm"
+	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/types/errtypes"
 	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
@@ -60,74 +63,46 @@ func (m *Model) IsEmbedding() bool {
 	return slices.Contains(m.Config.ModelFamilies, "bert") || slices.Contains(m.Config.ModelFamilies, "nomic-bert")
 }

-func (m *Model) String() string {
-	var modelfile model.File
-
-	modelfile.Commands = append(modelfile.Commands, model.Command{
-		Name: "model",
-		Args: m.ModelPath,
-	})
+func (m *Model) Commands() (cmds []parser.Command) {
+	cmds = append(cmds, parser.Command{Name: "model", Args: m.ModelPath})

 	if m.Template != "" {
-		modelfile.Commands = append(modelfile.Commands, model.Command{
-			Name: "template",
-			Args: m.Template,
-		})
+		cmds = append(cmds, parser.Command{Name: "template", Args: m.Template})
 	}

 	if m.System != "" {
-		modelfile.Commands = append(modelfile.Commands, model.Command{
-			Name: "system",
-			Args: m.System,
-		})
+		cmds = append(cmds, parser.Command{Name: "system", Args: m.System})
 	}

 	for _, adapter := range m.AdapterPaths {
-		modelfile.Commands = append(modelfile.Commands, model.Command{
-			Name: "adapter",
-			Args: adapter,
-		})
+		cmds = append(cmds, parser.Command{Name: "adapter", Args: adapter})
 	}

 	for _, projector := range m.ProjectorPaths {
-		modelfile.Commands = append(modelfile.Commands, model.Command{
-			Name: "projector",
-			Args: projector,
-		})
+		cmds = append(cmds, parser.Command{Name: "projector", Args: projector})
 	}

 	for k, v := range m.Options {
 		switch v := v.(type) {
 		case []any:
 			for _, s := range v {
-				modelfile.Commands = append(modelfile.Commands, model.Command{
-					Name: k,
-					Args: fmt.Sprintf("%v", s),
-				})
+				cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", s)})
 			}
 		default:
-			modelfile.Commands = append(modelfile.Commands, model.Command{
-				Name: k,
-				Args: fmt.Sprintf("%v", v),
-			})
+			cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", v)})
 		}
 	}

 	for _, license := range m.License {
-		modelfile.Commands = append(modelfile.Commands, model.Command{
-			Name: "license",
-			Args: license,
-		})
+		cmds = append(cmds, parser.Command{Name: "license", Args: license})
 	}

 	for _, msg := range m.Messages {
-		modelfile.Commands = append(modelfile.Commands, model.Command{
-			Name: "message",
-			Args: fmt.Sprintf("%s %s", msg.Role, msg.Content),
-		})
+		cmds = append(cmds, parser.Command{Name: "message", Args: fmt.Sprintf("%s %s", msg.Role, msg.Content)})
 	}

-	return modelfile.String()
+	return cmds
+
 }

 type Message struct {
@@ -155,6 +130,36 @@ type ConfigV2 struct {
 	RootFS       RootFS `json:"rootfs"`
 }

+func (c *ConfigV2) SetModelFormat(format string) {
+	if c.ModelFormat == "" {
+		c.ModelFormat = format
+	}
+}
+
+func (c *ConfigV2) SetModelFamily(families ...string) {
+	for _, family := range families {
+		if c.ModelFamily == "" {
+			c.ModelFamily = family
+		}
+
+		if !slices.Contains(c.ModelFamilies, family) {
+			c.ModelFamilies = append(c.ModelFamilies, family)
+		}
+	}
+}
+
+func (c *ConfigV2) SetModelType(modelType string) {
+	if c.ModelType == "" {
+		c.ModelType = modelType
+	}
+}
+
+func (c *ConfigV2) SetFileType(fileType string) {
+	if c.FileType == "" {
+		c.FileType = fileType
+	}
+}
+
 type RootFS struct {
 	Type    string   `json:"type"`
 	DiffIDs []string `json:"diff_ids"`
@@ -299,7 +304,7 @@ func GetModel(name string) (*Model, error) {
 	return model, nil
 }

-func realpath(rel, from string) string {
+func realpath(mfDir, from string) string {
 	abspath, err := filepath.Abs(from)
 	if err != nil {
 		return from
@@ -316,15 +321,22 @@ func realpath(rel, from string) string {
 		return filepath.Join(home, from[2:])
 	}

-	if _, err := os.Stat(filepath.Join(rel, from)); err == nil {
+	if _, err := os.Stat(filepath.Join(mfDir, from)); err == nil {
 		// this is a file relative to the Modelfile
-		return filepath.Join(rel, from)
+		return filepath.Join(mfDir, from)
 	}

 	return abspath
 }

-func CreateModel(ctx context.Context, name, modelFileDir, quantization string, modelfile *model.File, fn func(resp api.ProgressResponse)) (err error) {
+func CreateModel(ctx context.Context, name, modelFileDir, quantization string, commands []parser.Command, fn func(resp api.ProgressResponse)) error {
+	deleteMap := make(map[string]struct{})
+	if manifest, _, err := GetManifest(ParseModelPath(name)); err == nil {
+		for _, layer := range append(manifest.Layers, manifest.Config) {
+			deleteMap[layer.Digest] = struct{}{}
+		}
+	}
+
 	config := ConfigV2{
 		OS:           "linux",
 		Architecture: "amd64",
@@ -333,181 +345,250 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m
 		},
 	}

-	var messages []*api.Message
-	parameters := make(map[string]any)
+	var layers Layers
+	messages := []string{}

-	var layers []*Layer
-	for _, c := range modelfile.Commands {
+	params := make(map[string][]string)
+	fromParams := make(map[string]any)
+
+	for _, c := range commands {
 		mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)

 		switch c.Name {
-		case "model", "adapter":
-			var baseLayers []*layerWithGGML
-			if name := model.ParseName(c.Args); name.IsValid() {
-				baseLayers, err = parseFromModel(ctx, name, fn)
-				if err != nil {
-					return err
-				}
-			} else if strings.HasPrefix(c.Args, "@") {
-				blobpath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@"))
+		case "model":
+			if strings.HasPrefix(c.Args, "@") {
+				blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@"))
 				if err != nil {
 					return err
 				}

-				blob, err := os.Open(blobpath)
-				if err != nil {
-					return err
-				}
-				defer blob.Close()
-
-				baseLayers, err = parseFromFile(ctx, blob, fn)
-				if err != nil {
-					return err
-				}
-			} else if file, err := os.Open(realpath(modelFileDir, c.Args)); err == nil {
-				defer file.Close()
-
-				baseLayers, err = parseFromFile(ctx, file, fn)
-				if err != nil {
-					return err
-				}
-			} else {
-				return fmt.Errorf("invalid model reference: %s", c.Args)
+				c.Args = blobPath
 			}

-			for _, baseLayer := range baseLayers {
-				if quantization != "" &&
-					baseLayer.MediaType == "application/vnd.ollama.image.model" &&
-					baseLayer.GGML != nil &&
-					baseLayer.GGML.Name() == "gguf" {
-					ftype, err := llm.ParseFileType(quantization)
-					if err != nil {
-						return err
-					}
+			pathName := realpath(modelFileDir, c.Args)

-					filetype := baseLayer.GGML.KV().FileType()
-					if !slices.Contains([]string{"F16", "F32"}, filetype) {
-						return errors.New("quantization is only supported for F16 and F32 models")
-					}
-
-					fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", filetype, quantization)})
-
-					blob, err := GetBlobsPath(baseLayer.Digest)
-					if err != nil {
-						return err
-					}
-
-					temp, err := os.CreateTemp(filepath.Dir(blob), quantization)
-					if err != nil {
-						return err
-					}
-					defer temp.Close()
-					defer os.Remove(temp.Name())
-
-					if err := llm.Quantize(blob, temp.Name(), ftype); err != nil {
-						return err
-					}
-
-					baseLayer.Layer, err = NewLayer(temp, baseLayer.Layer.MediaType)
-					if err != nil {
-						return err
-					}
+			ggufName, err := convertModel(name, pathName, fn)
+			if err != nil {
+				var pathErr *fs.PathError
+				switch {
+				case errors.Is(err, zip.ErrFormat):
+					// it's not a safetensor archive
+				case errors.As(err, &pathErr):
+					// it's not a file on disk, could be a model reference
+				default:
+					return err
 				}
-
-				if baseLayer.GGML != nil {
-					config.ModelFormat = cmp.Or(config.ModelFormat, baseLayer.GGML.Name())
-					config.ModelFamily = cmp.Or(config.ModelFamily, baseLayer.GGML.KV().Architecture())
-					config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount()))
-					config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType())
-					config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture())
-				}
-
-				layers = append(layers, baseLayer.Layer)
 			}
-		case "license", "template", "system":
-			blob := strings.NewReader(c.Args)
-			layer, err := NewLayer(blob, mediatype)
+
+			if ggufName != "" {
+				pathName = ggufName
+				defer os.RemoveAll(ggufName)
+
+				if quantization != "" {
+					quantization = strings.ToUpper(quantization)
+					fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", "F16", quantization)})
+					tempfile, err := os.CreateTemp(filepath.Dir(ggufName), quantization)
+					if err != nil {
+						return err
+					}
+					defer os.RemoveAll(tempfile.Name())
+
+					if err := llm.Quantize(ggufName, tempfile.Name(), quantization); err != nil {
+						return err
+					}
+
+					if err := tempfile.Close(); err != nil {
+						return err
+					}
+
+					pathName = tempfile.Name()
+				}
+			}
+
+			bin, err := os.Open(pathName)
+			if err != nil {
+				// not a file on disk so must be a model reference
+				modelpath := ParseModelPath(c.Args)
+				manifest, _, err := GetManifest(modelpath)
+				switch {
+				case errors.Is(err, os.ErrNotExist):
+					fn(api.ProgressResponse{Status: "pulling model"})
+					if err := PullModel(ctx, c.Args, &registryOptions{}, fn); err != nil {
+						return err
+					}
+
+					manifest, _, err = GetManifest(modelpath)
+					if err != nil {
+						return err
+					}
+				case err != nil:
+					return err
+				}
+
+				fn(api.ProgressResponse{Status: "reading model metadata"})
+				fromConfigPath, err := GetBlobsPath(manifest.Config.Digest)
+				if err != nil {
+					return err
+				}
+
+				fromConfigFile, err := os.Open(fromConfigPath)
+				if err != nil {
+					return err
+				}
+				defer fromConfigFile.Close()
+
+				var fromConfig ConfigV2
+				if err := json.NewDecoder(fromConfigFile).Decode(&fromConfig); err != nil {
+					return err
+				}
+
+				// if the model is still not in gguf format, error out
+				if fromConfig.ModelFormat != "gguf" {
+					return fmt.Errorf("%s is not in gguf format, this base model is not compatible with this version of ollama", c.Args)
+				}
+
+				config.SetModelFormat(fromConfig.ModelFormat)
+				config.SetModelFamily(append(fromConfig.ModelFamilies, fromConfig.ModelFamily)...)
+				config.SetModelType(fromConfig.ModelType)
+				config.SetFileType(fromConfig.FileType)
+
+				for _, layer := range manifest.Layers {
+					deleteMap[layer.Digest] = struct{}{}
+					if layer.MediaType == "application/vnd.ollama.image.params" {
+						fromParamsPath, err := GetBlobsPath(layer.Digest)
+						if err != nil {
+							return err
+						}
+
+						fromParamsFile, err := os.Open(fromParamsPath)
+						if err != nil {
+							return err
+						}
+						defer fromParamsFile.Close()
+
+						if err := json.NewDecoder(fromParamsFile).Decode(&fromParams); err != nil {
+							return err
+						}
+					}
+
+					layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, modelpath.GetShortTagname())
+					if err != nil {
+						return err
+					}
+
+					layers.Add(layer)
+				}
+
+				deleteMap[manifest.Config.Digest] = struct{}{}
+				continue
+			}
+			defer bin.Close()
+
+			var offset int64
+			for {
+				fn(api.ProgressResponse{Status: "creating model layer"})
+				if _, err := bin.Seek(offset, io.SeekStart); err != nil {
+					return err
+				}
+
+				ggml, size, err := llm.DecodeGGML(bin)
+				if errors.Is(err, io.EOF) {
+					break
+				} else if errors.Is(err, llm.ErrUnsupportedFormat) {
+					return fmt.Errorf("model binary specified in FROM field is not a valid gguf format model, %w", err)
+				} else if err != nil {
+					return err
+				}
+
+				config.SetModelFormat(ggml.Name())
+				config.SetModelFamily(ggml.KV().Architecture())
+				config.SetModelType(format.HumanNumber(ggml.KV().ParameterCount()))
+				config.SetFileType(ggml.KV().FileType())
+
+				mediatype := mediatype
+				if ggml.KV().Architecture() == "clip" {
+					mediatype = "application/vnd.ollama.image.projector"
+				}
+
+				sr := io.NewSectionReader(bin, offset, size)
+				layer, err := NewLayer(sr, mediatype)
+				if err != nil {
+					return err
+				}
+
+				layers.Add(layer)
+
+				offset += size
+			}
+		case "adapter":
+			if strings.HasPrefix(c.Args, "@") {
+				blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@"))
+				if err != nil {
+					return err
+				}
+
+				c.Args = blobPath
+			}
+
+			fn(api.ProgressResponse{Status: "creating adapter layer"})
+			bin, err := os.Open(realpath(modelFileDir, c.Args))
+			if err != nil {
+				return err
+			}
+			defer bin.Close()
+
+			_, size, err := llm.DecodeGGML(bin)
 			if err != nil {
 				return err
 			}

-			if c.Name != "license" {
-				// replace
-				layers = slices.DeleteFunc(layers, func(layer *Layer) bool {
-					return layer.MediaType == mediatype
-				})
+			sr := io.NewSectionReader(bin, 0, size)
+			layer, err := NewLayer(sr, mediatype)
+			if err != nil {
+				return err
 			}

-			layers = append(layers, layer)
+			layers.Add(layer)
+		case "license":
+			fn(api.ProgressResponse{Status: "creating license layer"})
+
+			bin := strings.NewReader(c.Args)
+			layer, err := NewLayer(bin, mediatype)
+			if err != nil {
+				return err
+			}
+
+			layers.Add(layer)
+		case "template", "system":
+			fn(api.ProgressResponse{Status: fmt.Sprintf("creating %s layer", c.Name)})
+
+			bin := strings.NewReader(c.Args)
+			layer, err := NewLayer(bin, mediatype)
+			if err != nil {
+				return err
+			}
+
+			layers.Replace(layer)
 		case "message":
-			role, content, ok := strings.Cut(c.Args, ": ")
-			if !ok {
-				return fmt.Errorf("invalid message: %s", c.Args)
-			}
-
-			messages = append(messages, &api.Message{Role: role, Content: content})
+			messages = append(messages, c.Args)
 		default:
-			ps, err := api.FormatParams(map[string][]string{c.Name: {c.Args}})
-			if err != nil {
-				return err
-			}
-
-			for k, v := range ps {
-				if ks, ok := parameters[k].([]string); ok {
-					parameters[k] = append(ks, v.([]string)...)
-				} else if vs, ok := v.([]string); ok {
-					parameters[k] = vs
-				} else {
-					parameters[k] = v
-				}
-			}
+			params[c.Name] = append(params[c.Name], c.Args)
 		}
 	}

-	var err2 error
-	layers = slices.DeleteFunc(layers, func(layer *Layer) bool {
-		switch layer.MediaType {
-		case "application/vnd.ollama.image.message":
-			// if there are new messages, remove the inherited ones
-			if len(messages) > 0 {
-				return true
-			}
-
-			return false
-		case "application/vnd.ollama.image.params":
-			// merge inherited parameters with new ones
-			r, err := layer.Open()
-			if err != nil {
-				err2 = err
-				return false
-			}
-			defer r.Close()
-
-			var ps map[string]any
-			if err := json.NewDecoder(r).Decode(&ps); err != nil {
-				err2 = err
-				return false
-			}
-
-			for k, v := range ps {
-				if _, ok := parameters[k]; !ok {
-					parameters[k] = v
-				}
-			}
-
-			return true
-		default:
-			return false
-		}
-	})
-
-	if err2 != nil {
-		return err2
-	}
-
 	if len(messages) > 0 {
+		fn(api.ProgressResponse{Status: "creating parameters layer"})
+
+		msgs := make([]api.Message, 0)
+
+		for _, m := range messages {
+			// todo: handle images
+			msg := strings.SplitN(m, ": ", 2)
+			msgs = append(msgs, api.Message{Role: msg[0], Content: msg[1]})
+		}
+
 		var b bytes.Buffer
-		if err := json.NewEncoder(&b).Encode(messages); err != nil {
+		if err := json.NewEncoder(&b).Encode(msgs); err != nil {
 			return err
 		}

@@ -516,25 +597,39 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m
 			return err
 		}

-		layers = append(layers, layer)
+		layers.Replace(layer)
 	}

-	if len(parameters) > 0 {
-		var b bytes.Buffer
-		if err := json.NewEncoder(&b).Encode(parameters); err != nil {
+	if len(params) > 0 {
+		fn(api.ProgressResponse{Status: "creating parameters layer"})
+
+		formattedParams, err := api.FormatParams(params)
+		if err != nil {
 			return err
 		}

+		for k, v := range fromParams {
+			if _, ok := formattedParams[k]; !ok {
+				formattedParams[k] = v
+			}
+		}
+
+		var b bytes.Buffer
+		if err := json.NewEncoder(&b).Encode(formattedParams); err != nil {
+			return err
+		}
+
+		fn(api.ProgressResponse{Status: "creating config layer"})
 		layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
 		if err != nil {
 			return err
 		}

-		layers = append(layers, layer)
+		layers.Replace(layer)
 	}

-	digests := make([]string, len(layers))
-	for i, layer := range layers {
+	digests := make([]string, len(layers.items))
+	for i, layer := range layers.items {
 		digests[i] = layer.Digest
 	}

@@ -545,38 +640,36 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m
 		return err
 	}

-	layer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
+	configLayer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
 	if err != nil {
 		return err
 	}

-	for _, layer := range append(layers, layer) {
-		if layer.status != "" {
-			fn(api.ProgressResponse{Status: layer.status})
-		}
-	}
+	delete(deleteMap, configLayer.Digest)

-	unref := make(map[string]struct{})
-	if manifest, _, err := GetManifest(ParseModelPath(name)); err == nil {
-		for _, layer := range manifest.Layers {
-			if !slices.Contains(digests, layer.Digest) {
-				unref[layer.Digest] = struct{}{}
-			}
+	for _, layer := range append(layers.items, configLayer) {
+		committed, err := layer.Commit()
+		if err != nil {
+			return err
 		}

-		if manifest.Config.Digest != layer.Digest {
-			unref[manifest.Config.Digest] = struct{}{}
+		status := "writing layer"
+		if !committed {
+			status = "using already created layer"
 		}
+
+		fn(api.ProgressResponse{Status: fmt.Sprintf("%s %s", status, layer.Digest)})
+
+		delete(deleteMap, layer.Digest)
 	}

 	fn(api.ProgressResponse{Status: "writing manifest"})
-	if err := WriteManifest(name, layer, layers); err != nil {
+	if err := WriteManifest(name, configLayer, layers.items); err != nil {
 		return err
 	}

-	if os.Getenv("OLLAMA_NOPRUNE") == "" && len(unref) > 0 {
-		fn(api.ProgressResponse{Status: "removing unused layers"})
-		if err := deleteUnusedLayers(nil, unref, false); err != nil {
+	if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
+		if err := deleteUnusedLayers(nil, deleteMap, false); err != nil {
 			return err
 		}
 	}
@@ -585,6 +678,74 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m
 	return nil
 }

+func convertModel(name, path string, fn func(resp api.ProgressResponse)) (string, error) {
+	r, err := zip.OpenReader(path)
+	if err != nil {
+		return "", err
+	}
+	defer r.Close()
+
+	tempDir, err := os.MkdirTemp("", "ollama-convert")
+	if err != nil {
+		return "", err
+	}
+	defer os.RemoveAll(tempDir)
+
+	fn(api.ProgressResponse{Status: "unpacking model metadata"})
+	for _, f := range r.File {
+		fpath := filepath.Join(tempDir, f.Name)
+		outFile, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
+		if err != nil {
+			return "", err
+		}
+
+		rc, err := f.Open()
+		if err != nil {
+			return "", err
+		}
+
+		_, err = io.Copy(outFile, rc)
+		if err != nil {
+			return "", err
+		}
+
+		outFile.Close()
+		rc.Close()
+	}
+
+	mf, err := convert.GetModelFormat(tempDir)
+	if err != nil {
+		return "", err
+	}
+
+	params, err := mf.GetParams(tempDir)
+	if err != nil {
+		return "", err
+	}
+
+	mArch, err := mf.GetModelArch(name, tempDir, params)
+	if err != nil {
+		return "", err
+	}
+
+	fn(api.ProgressResponse{Status: "processing tensors"})
+	if err := mArch.GetTensors(); err != nil {
+		return "", err
+	}
+
+	if err := mArch.LoadVocab(); err != nil {
+		return "", err
+	}
+
+	fn(api.ProgressResponse{Status: "converting model"})
+	path, err = mArch.WriteGGUF()
+	if err != nil {
+		return "", err
+	}
+
+	return path, nil
+}
+
 func CopyModel(src, dst model.Name) error {
 	if !dst.IsFullyQualified() {
 		return model.Unqualified(dst)
--- a/server/layers.go
+++ b/server/layers.go
@@ -5,14 +5,39 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"strings"
+
+	"golang.org/x/exp/slices"
 )

+type Layers struct {
+	items []*Layer
+}
+
+func (ls *Layers) Add(layer *Layer) {
+	if layer.Size > 0 {
+		ls.items = append(ls.items, layer)
+	}
+}
+
+func (ls *Layers) Replace(layer *Layer) {
+	if layer.Size > 0 {
+		mediatype := layer.MediaType
+		layers := slices.DeleteFunc(ls.items, func(l *Layer) bool {
+			return l.MediaType == mediatype
+		})
+
+		ls.items = append(layers, layer)
+	}
+}
+
 type Layer struct {
 	MediaType string `json:"mediaType"`
 	Digest    string `json:"digest"`
 	Size      int64  `json:"size"`
 	From      string `json:"from,omitempty"`
-	status    string
+
+	tempFileName string
 }

 func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
@@ -21,12 +46,14 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
 		return nil, err
 	}

-	temp, err := os.CreateTemp(blobs, "sha256-")
+	const delimiter = "-"
+
+	pattern := strings.Join([]string{"sha256", "*-partial"}, delimiter)
+	temp, err := os.CreateTemp(blobs, pattern)
 	if err != nil {
 		return nil, err
 	}
 	defer temp.Close()
-	defer os.Remove(temp.Name())

 	sha256sum := sha256.New()
 	n, err := io.Copy(io.MultiWriter(temp, sha256sum), r)
@@ -34,29 +61,11 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
 		return nil, err
 	}

-	if err := temp.Close(); err != nil {
-		return nil, err
-	}
-
-	digest := fmt.Sprintf("sha256:%x", sha256sum.Sum(nil))
-	blob, err := GetBlobsPath(digest)
-	if err != nil {
-		return nil, err
-	}
-
-	status := "using existing layer"
-	if _, err := os.Stat(blob); err != nil {
-		status = "creating new layer"
-		if err := os.Rename(temp.Name(), blob); err != nil {
-			return nil, err
-		}
-	}
-
 	return &Layer{
-		MediaType: mediatype,
-		Digest:    digest,
-		Size:      n,
-		status:    fmt.Sprintf("%s %s", status, digest),
+		MediaType:    mediatype,
+		Digest:       fmt.Sprintf("sha256:%x", sha256sum.Sum(nil)),
+		Size:         n,
+		tempFileName: temp.Name(),
 	}, nil
 }

@@ -76,15 +85,21 @@ func NewLayerFromLayer(digest, mediatype, from string) (*Layer, error) {
 		Digest:    digest,
 		Size:      fi.Size(),
 		From:      from,
-		status:    fmt.Sprintf("using existing layer %s", digest),
 	}, nil
 }

-func (l *Layer) Open() (io.ReadCloser, error) {
+func (l *Layer) Commit() (bool, error) {
+	// always remove temp
+	defer os.Remove(l.tempFileName)
+
 	blob, err := GetBlobsPath(l.Digest)
 	if err != nil {
-		return nil, err
+		return false, err
 	}

-	return os.Open(blob)
+	if _, err := os.Stat(blob); err != nil {
+		return true, os.Rename(l.tempFileName, blob)
+	}
+
+	return false, nil
 }
--- a/server/model.go
+++ b/server/model.go
@@ -1,259 +0,0 @@
-package server
-
-import (
-	"archive/zip"
-	"bytes"
-	"context"
-	"errors"
-	"fmt"
-	"io"
-	"net/http"
-	"os"
-	"path/filepath"
-
-	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/convert"
-	"github.com/ollama/ollama/llm"
-	"github.com/ollama/ollama/types/model"
-)
-
-type layerWithGGML struct {
-	*Layer
-	*llm.GGML
-}
-
-func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) {
-	modelpath := ParseModelPath(name.String())
-	manifest, _, err := GetManifest(modelpath)
-	switch {
-	case errors.Is(err, os.ErrNotExist):
-		if err := PullModel(ctx, name.String(), &registryOptions{}, fn); err != nil {
-			return nil, err
-		}
-
-		modelpath = ParseModelPath(name.String())
-		manifest, _, err = GetManifest(modelpath)
-		if err != nil {
-			return nil, err
-		}
-	case err != nil:
-		return nil, err
-	}
-
-	for _, layer := range manifest.Layers {
-		layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, modelpath.GetShortTagname())
-		if err != nil {
-			return nil, err
-		}
-
-		switch layer.MediaType {
-		case "application/vnd.ollama.image.model",
-			"application/vnd.ollama.image.projector",
-			"application/vnd.ollama.image.adapter":
-			blobpath, err := GetBlobsPath(layer.Digest)
-			if err != nil {
-				return nil, err
-			}
-
-			blob, err := os.Open(blobpath)
-			if err != nil {
-				return nil, err
-			}
-			defer blob.Close()
-
-			ggml, _, err := llm.DecodeGGML(blob)
-			if err != nil {
-				return nil, err
-			}
-
-			layers = append(layers, &layerWithGGML{layer, ggml})
-		default:
-			layers = append(layers, &layerWithGGML{layer, nil})
-		}
-
-	}
-
-	return layers, nil
-}
-
-func parseFromZipFile(_ context.Context, file *os.File, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) {
-	stat, err := file.Stat()
-	if err != nil {
-		return nil, err
-	}
-
-	r, err := zip.NewReader(file, stat.Size())
-	if err != nil {
-		return nil, err
-	}
-
-	tempdir, err := os.MkdirTemp(filepath.Dir(file.Name()), "")
-	if err != nil {
-		return nil, err
-	}
-	defer os.RemoveAll(tempdir)
-
-	fn(api.ProgressResponse{Status: "unpacking model metadata"})
-	for _, f := range r.File {
-		// TODO(mxyng): this should not write out all files to disk
-		outfile, err := os.Create(filepath.Join(tempdir, f.Name))
-		if err != nil {
-			return nil, err
-		}
-
-		infile, err := f.Open()
-		if err != nil {
-			return nil, err
-		}
-
-		if _, err = io.Copy(outfile, infile); err != nil {
-			return nil, err
-		}
-
-		if err := outfile.Close(); err != nil {
-			return nil, err
-		}
-
-		if err := infile.Close(); err != nil {
-			return nil, err
-		}
-	}
-
-	mf, err := convert.GetModelFormat(tempdir)
-	if err != nil {
-		return nil, err
-	}
-
-	params, err := mf.GetParams(tempdir)
-	if err != nil {
-		return nil, err
-	}
-
-	mArch, err := mf.GetModelArch("", tempdir, params)
-	if err != nil {
-		return nil, err
-	}
-
-	fn(api.ProgressResponse{Status: "processing tensors"})
-	if err := mArch.GetTensors(); err != nil {
-		return nil, err
-	}
-
-	if err := mArch.LoadVocab(); err != nil {
-		return nil, err
-	}
-
-	fn(api.ProgressResponse{Status: "converting model"})
-
-	// TODO(mxyng): this should write directly into a layer
-	// e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model")
-	temp, err := os.CreateTemp(tempdir, "fp16")
-	if err != nil {
-		return nil, err
-	}
-	defer temp.Close()
-	defer os.Remove(temp.Name())
-
-	if err = mArch.WriteGGUF(temp); err != nil {
-		return nil, err
-	}
-
-	if _, err := temp.Seek(0, io.SeekStart); err != nil {
-		return nil, err
-	}
-
-	layer, err := NewLayer(temp, "application/vnd.ollama.image.model")
-	if err != nil {
-		return nil, fmt.Errorf("aaa: %w", err)
-	}
-
-	blobpath, err := GetBlobsPath(layer.Digest)
-	if err != nil {
-		return nil, err
-	}
-
-	bin, err := os.Open(blobpath)
-	if err != nil {
-		return nil, err
-	}
-	defer bin.Close()
-
-	ggml, _, err := llm.DecodeGGML(bin)
-	if err != nil {
-		return nil, err
-	}
-
-	layer, err = NewLayerFromLayer(layer.Digest, layer.MediaType, "")
-	if err != nil {
-		return nil, err
-	}
-
-	layers = append(layers, &layerWithGGML{layer, ggml})
-	return layers, nil
-}
-
-func parseFromFile(ctx context.Context, file *os.File, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) {
-	sr := io.NewSectionReader(file, 0, 512)
-	contentType, err := detectContentType(sr)
-	if err != nil {
-		return nil, err
-	}
-
-	switch contentType {
-	case "gguf", "ggla":
-		// noop
-	case "application/zip":
-		return parseFromZipFile(ctx, file, fn)
-	default:
-		return nil, fmt.Errorf("unsupported content type: %s", contentType)
-	}
-
-	stat, err := file.Stat()
-	if err != nil {
-		return nil, err
-	}
-
-	var offset int64
-	for offset < stat.Size() {
-		ggml, n, err := llm.DecodeGGML(file)
-		if errors.Is(err, io.EOF) {
-			break
-		} else if err != nil {
-			return nil, err
-		}
-
-		mediatype := "application/vnd.ollama.image.model"
-		if ggml.Name() == "ggla" {
-			mediatype = "application/vnd.ollama.image.adapter"
-		} else if ggml.KV().Architecture() == "clip" {
-			mediatype = "application/vnd.ollama.image.projector"
-		}
-
-		layer, err := NewLayer(io.NewSectionReader(file, offset, n), mediatype)
-		if err != nil {
-			return nil, err
-		}
-
-		layers = append(layers, &layerWithGGML{layer, ggml})
-		offset = n
-	}
-
-	return layers, nil
-}
-
-func detectContentType(r io.Reader) (string, error) {
-	var b bytes.Buffer
-	if _, err := io.Copy(&b, r); err != nil {
-		return "", err
-	}
-
-	if contentType := llm.DetectGGMLType(b.Bytes()); contentType != "" {
-		return contentType, nil
-	}
-
-	if contentType := http.DetectContentType(b.Bytes()); contentType != "application/octet-stream" {
-		return contentType, nil
-	}
-
-	return "unknown", nil
-}
--- a/server/routes.go
+++ b/server/routes.go
@@ -1,7 +1,6 @@
 package server

 import (
-	"cmp"
 	"context"
 	"encoding/json"
 	"errors"
@@ -29,6 +28,7 @@ import (
 	"github.com/ollama/ollama/gpu"
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/openai"
+	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
 )
@@ -522,17 +522,28 @@ func (s *Server) PushModelHandler(c *gin.Context) {

 func (s *Server) CreateModelHandler(c *gin.Context) {
 	var req api.CreateRequest
-	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
+	err := c.ShouldBindJSON(&req)
+	switch {
+	case errors.Is(err, io.EOF):
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
 		return
-	} else if err != nil {
+	case err != nil:
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
 	}

-	name := model.ParseName(cmp.Or(req.Model, req.Name))
-	if !name.IsValid() {
-		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid model name"})
+	var model string
+	if req.Model != "" {
+		model = req.Model
+	} else if req.Name != "" {
+		model = req.Name
+	} else {
+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
+		return
+	}
+
+	if err := ParseModelPath(model).Validate(); err != nil {
+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
 	}

@@ -541,19 +552,19 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
 		return
 	}

-	var r io.Reader = strings.NewReader(req.Modelfile)
+	var modelfile io.Reader = strings.NewReader(req.Modelfile)
 	if req.Path != "" && req.Modelfile == "" {
-		f, err := os.Open(req.Path)
+		mf, err := os.Open(req.Path)
 		if err != nil {
 			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
 			return
 		}
-		defer f.Close()
+		defer mf.Close()

-		r = f
+		modelfile = mf
 	}

-	modelfile, err := model.ParseFile(r)
+	commands, err := parser.Parse(modelfile)
 	if err != nil {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
@@ -569,7 +580,7 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
 		ctx, cancel := context.WithCancel(c.Request.Context())
 		defer cancel()

-		if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), strings.ToUpper(req.Quantization), modelfile, fn); err != nil {
+		if err := CreateModel(ctx, model, filepath.Dir(req.Path), req.Quantization, commands, fn); err != nil {
 			ch <- gin.H{"error": err.Error()}
 		}
 	}()
@@ -721,7 +732,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 	fmt.Fprintln(&sb, "# Modelfile generate by \"ollama show\"")
 	fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
 	fmt.Fprintf(&sb, "# FROM %s\n\n", model.ShortName)
-	fmt.Fprint(&sb, model.String())
+	fmt.Fprint(&sb, parser.Format(model.Commands()))
 	resp.Modelfile = sb.String()

 	return resp, nil
@@ -861,6 +872,11 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
 		return
 	}

+	if _, err := layer.Commit(); err != nil {
+		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+		return
+	}
+
 	c.Status(http.StatusCreated)
 }

@@ -1030,6 +1046,13 @@ func Serve(ln net.Listener) error {
 	if err := fixBlobs(blobsDir); err != nil {
 		return err
 	}
+	manifestsDir, err := GetManifestPath()
+	if err != nil {
+		return err
+	}
+	if err := fixManifests(manifestsDir); err != nil {
+		return err
+	}

 	if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
 		// clean up unused layers and manifests
--- a/server/routes_test.go
+++ b/server/routes_test.go
@@ -17,7 +17,7 @@ import (
 	"github.com/stretchr/testify/assert"

 	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/types/model"
+	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/version"
 )

@@ -55,13 +55,13 @@ func Test_Routes(t *testing.T) {
 	createTestModel := func(t *testing.T, name string) {
 		fname := createTestFile(t, "ollama-model")

-		r := strings.NewReader(fmt.Sprintf("FROM %s\nPARAMETER seed 42\nPARAMETER top_p 0.9\nPARAMETER stop foo\nPARAMETER stop bar", fname))
-		modelfile, err := model.ParseFile(r)
+		modelfile := strings.NewReader(fmt.Sprintf("FROM %s\nPARAMETER seed 42\nPARAMETER top_p 0.9\nPARAMETER stop foo\nPARAMETER stop bar", fname))
+		commands, err := parser.Parse(modelfile)
 		assert.Nil(t, err)
 		fn := func(resp api.ProgressResponse) {
 			t.Logf("Status: %s", resp.Status)
 		}
-		err = CreateModel(context.TODO(), name, "", "", modelfile, fn)
+		err = CreateModel(context.TODO(), name, "", "", commands, fn)
 		assert.Nil(t, err)
 	}

@@ -124,12 +124,14 @@ func Test_Routes(t *testing.T) {
 			Method: http.MethodPost,
 			Path:   "/api/create",
 			Setup: func(t *testing.T, req *http.Request) {
-				fname := createTestFile(t, "ollama-model")
+				f, err := os.CreateTemp(t.TempDir(), "ollama-model")
+				assert.Nil(t, err)
+				defer f.Close()

 				stream := false
 				createReq := api.CreateRequest{
 					Name:      "t-bone",
-					Modelfile: fmt.Sprintf("FROM %s", fname),
+					Modelfile: fmt.Sprintf("FROM %s", f.Name()),
 					Stream:    &stream,
 				}
 				jsonData, err := json.Marshal(createReq)
@@ -214,25 +216,27 @@ func Test_Routes(t *testing.T) {
 	httpSrv := httptest.NewServer(router)
 	t.Cleanup(httpSrv.Close)

-	t.Setenv("OLLAMA_MODELS", t.TempDir())
+	workDir, err := os.MkdirTemp("", "ollama-test")
+	assert.Nil(t, err)
+	defer os.RemoveAll(workDir)
+	os.Setenv("OLLAMA_MODELS", workDir)

 	for _, tc := range testCases {
-		t.Run(tc.Name, func(t *testing.T) {
-			u := httpSrv.URL + tc.Path
-			req, err := http.NewRequestWithContext(context.TODO(), tc.Method, u, nil)
-			assert.Nil(t, err)
+		t.Logf("Running Test: [%s]", tc.Name)
+		u := httpSrv.URL + tc.Path
+		req, err := http.NewRequestWithContext(context.TODO(), tc.Method, u, nil)
+		assert.Nil(t, err)

-			if tc.Setup != nil {
-				tc.Setup(t, req)
-			}
+		if tc.Setup != nil {
+			tc.Setup(t, req)
+		}

-			resp, err := httpSrv.Client().Do(req)
-			assert.Nil(t, err)
-			defer resp.Body.Close()
+		resp, err := httpSrv.Client().Do(req)
+		assert.Nil(t, err)
+		defer resp.Body.Close()

-			if tc.Expected != nil {
-				tc.Expected(t, resp)
-			}
-		})
+		if tc.Expected != nil {
+			tc.Expected(t, resp)
+		}
 	}
 }
--- a/server/sched_test.go
+++ b/server/sched_test.go
@@ -58,7 +58,7 @@ func TestInitScheduler(t *testing.T) {
 }

 func TestLoad(t *testing.T) {
-	ctx, done := context.WithTimeout(context.Background(), 20*time.Millisecond)
+	ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
 	defer done()
 	s := InitScheduler(ctx)
 	var ggml *llm.GGML // value not used in tests
@@ -174,7 +174,7 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
 }

 func TestRequests(t *testing.T) {
-	ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond)
+	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
 	defer done()

 	// Same model, same request
@@ -329,7 +329,7 @@ func TestRequests(t *testing.T) {
 }

 func TestGetRunner(t *testing.T) {
-	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
+	ctx, done := context.WithTimeout(context.Background(), 20*time.Millisecond)
 	defer done()

 	// Same model, same request
@@ -391,7 +391,7 @@ func TestGetRunner(t *testing.T) {

 // TODO - add one scenario that triggers the bogus finished event with positive ref count
 func TestPrematureExpired(t *testing.T) {
-	ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond)
+	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
 	defer done()

 	// Same model, same request
@@ -436,7 +436,7 @@ func TestPrematureExpired(t *testing.T) {
 }

 func TestUseLoadedRunner(t *testing.T) {
-	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
+	ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
 	req := &LlmRequest{
 		ctx:             ctx,
 		opts:            api.DefaultOptions(),
@@ -461,7 +461,7 @@ func TestUseLoadedRunner(t *testing.T) {
 }

 func TestUpdateFreeSpace(t *testing.T) {
-	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
+	ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
 	defer done()
 	gpus := gpu.GpuInfoList{
 		{
@@ -494,7 +494,7 @@ func TestUpdateFreeSpace(t *testing.T) {
 }

 func TestFindRunnerToUnload(t *testing.T) {
-	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
+	ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
 	defer done()
 	req := &LlmRequest{
 		ctx:  ctx,
@@ -518,7 +518,7 @@ func TestFindRunnerToUnload(t *testing.T) {
 }

 func TestNeedsReload(t *testing.T) {
-	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
+	ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
 	defer done()

 	llm := &mockLlm{}
@@ -562,7 +562,7 @@ func TestNeedsReload(t *testing.T) {
 }

 func TestUnloadAllRunners(t *testing.T) {
-	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
+	ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
 	defer done()

 	llm1 := &mockLlm{}
--- a/types/model/name.go
+++ b/types/model/name.go
@@ -161,7 +161,7 @@ func ParseNameBare(s string) Name {
 	}

 	scheme, host, ok := strings.Cut(s, "://")
-	if ! ok {
+	if !ok {
 		host = scheme
 	}
 	n.Host = host
@@ -243,7 +243,7 @@ func (n Name) Filepath() string {
 		panic("illegal attempt to get filepath of invalid name")
 	}
 	return strings.ToLower(filepath.Join(
-		n.Host,
+		strings.Replace(n.Host, ":", "%", 1),
 		n.Namespace,
 		n.Model,
 		n.Tag,
--- a/types/model/name_test.go
+++ b/types/model/name_test.go
@@ -27,7 +27,7 @@ func TestParseNameParts(t *testing.T) {
 				Model:     "model",
 				Tag:       "tag",
 			},
-			wantFilepath: filepath.Join("host:port", "namespace", "model", "tag"),
+			wantFilepath: filepath.Join("host%port", "namespace", "model", "tag"),
 		},
 		{
 			in: "host/namespace/model:tag",
@@ -47,7 +47,7 @@ func TestParseNameParts(t *testing.T) {
 				Model:     "model",
 				Tag:       "tag",
 			},
-			wantFilepath: filepath.Join("host:port", "namespace", "model", "tag"),
+			wantFilepath: filepath.Join("host%port", "namespace", "model", "tag"),
 		},
 		{
 			in: "host/namespace/model",
@@ -65,7 +65,7 @@ func TestParseNameParts(t *testing.T) {
 				Namespace: "namespace",
 				Model:     "model",
 			},
-			wantFilepath: filepath.Join("host:port", "namespace", "model", "latest"),
+			wantFilepath: filepath.Join("host%port", "namespace", "model", "latest"),
 		},
 		{
 			in: "namespace/model",
@@ -127,6 +127,15 @@ func TestParseNameParts(t *testing.T) {
 			},
 			wantValidDigest: true,
 		},
+		{
+			in: "y.com:443/n/model",
+			want: Name{
+				Host:      "y.com:443",
+				Namespace: "n",
+				Model:     "model",
+			},
+			wantFilepath: filepath.Join("y.com%443", "n", "model", "latest"),
+		},
 	}

 	for _, tt := range cases {