Compare commits
1 Commits
v0.1.15
...
mattw/nopr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0d4fa34aee |
11
README.md
11
README.md
@@ -104,7 +104,7 @@ FROM llama2
|
||||
# set the temperature to 1 [higher is more creative, lower is more coherent]
|
||||
PARAMETER temperature 1
|
||||
|
||||
# set the system message
|
||||
# set the system prompt
|
||||
SYSTEM """
|
||||
You are Mario from Super Mario Bros. Answer as Mario, the assistant, only.
|
||||
"""
|
||||
@@ -205,8 +205,7 @@ Finally, in a separate shell, run a model:
|
||||
## REST API
|
||||
|
||||
Ollama has a REST API for running and managing models.
|
||||
|
||||
### Generate a response
|
||||
For example, to generate text from a model:
|
||||
|
||||
```
|
||||
curl http://localhost:11434/api/generate -d '{
|
||||
@@ -215,7 +214,7 @@ curl http://localhost:11434/api/generate -d '{
|
||||
}'
|
||||
```
|
||||
|
||||
### Chat with a model
|
||||
Or send a chat message (coming in 0.1.14):
|
||||
|
||||
```
|
||||
curl http://localhost:11434/api/chat -d '{
|
||||
@@ -254,10 +253,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [gptel Emacs client](https://github.com/karthink/gptel)
|
||||
- [Oatmeal](https://github.com/dustinblackman/oatmeal)
|
||||
|
||||
### Database
|
||||
|
||||
- [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md)
|
||||
|
||||
### Package managers
|
||||
|
||||
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
|
||||
|
||||
58
api/types.go
58
api/types.go
@@ -31,18 +31,15 @@ func (e StatusError) Error() string {
|
||||
}
|
||||
}
|
||||
|
||||
type ImageData []byte
|
||||
|
||||
type GenerateRequest struct {
|
||||
Model string `json:"model"`
|
||||
Prompt string `json:"prompt"`
|
||||
System string `json:"system"`
|
||||
Template string `json:"template"`
|
||||
Context []int `json:"context,omitempty"`
|
||||
Stream *bool `json:"stream,omitempty"`
|
||||
Raw bool `json:"raw,omitempty"`
|
||||
Format string `json:"format"`
|
||||
Images []ImageData `json:"images,omitempty"`
|
||||
Model string `json:"model"`
|
||||
Prompt string `json:"prompt"`
|
||||
System string `json:"system"`
|
||||
Template string `json:"template"`
|
||||
Context []int `json:"context,omitempty"`
|
||||
Stream *bool `json:"stream,omitempty"`
|
||||
Raw bool `json:"raw,omitempty"`
|
||||
Format string `json:"format"`
|
||||
|
||||
Options map[string]interface{} `json:"options"`
|
||||
}
|
||||
@@ -57,9 +54,8 @@ type ChatRequest struct {
|
||||
}
|
||||
|
||||
type Message struct {
|
||||
Role string `json:"role"` // one of ["system", "user", "assistant"]
|
||||
Content string `json:"content"`
|
||||
Images []ImageData `json:"images, omitempty"`
|
||||
Role string `json:"role"` // one of ["system", "user", "assistant"]
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
type ChatResponse struct {
|
||||
@@ -152,12 +148,11 @@ type ShowRequest struct {
|
||||
}
|
||||
|
||||
type ShowResponse struct {
|
||||
License string `json:"license,omitempty"`
|
||||
Modelfile string `json:"modelfile,omitempty"`
|
||||
Parameters string `json:"parameters,omitempty"`
|
||||
Template string `json:"template,omitempty"`
|
||||
System string `json:"system,omitempty"`
|
||||
Details ModelDetails `json:"details,omitempty"`
|
||||
License string `json:"license,omitempty"`
|
||||
Modelfile string `json:"modelfile,omitempty"`
|
||||
Parameters string `json:"parameters,omitempty"`
|
||||
Template string `json:"template,omitempty"`
|
||||
System string `json:"system,omitempty"`
|
||||
}
|
||||
|
||||
type CopyRequest struct {
|
||||
@@ -193,11 +188,10 @@ type ListResponse struct {
|
||||
}
|
||||
|
||||
type ModelResponse struct {
|
||||
Name string `json:"name"`
|
||||
ModifiedAt time.Time `json:"modified_at"`
|
||||
Size int64 `json:"size"`
|
||||
Digest string `json:"digest"`
|
||||
Details ModelDetails `json:"details,omitempty"`
|
||||
Name string `json:"name"`
|
||||
ModifiedAt time.Time `json:"modified_at"`
|
||||
Size int64 `json:"size"`
|
||||
Digest string `json:"digest"`
|
||||
}
|
||||
|
||||
type TokenResponse struct {
|
||||
@@ -209,18 +203,20 @@ type GenerateResponse struct {
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
Response string `json:"response"`
|
||||
|
||||
ModelConfiguration ModelConfiguration `json:"model_configuration"`
|
||||
|
||||
Done bool `json:"done"`
|
||||
Context []int `json:"context,omitempty"`
|
||||
|
||||
Metrics
|
||||
}
|
||||
|
||||
type ModelDetails struct {
|
||||
Format string `json:"format"`
|
||||
Family string `json:"family"`
|
||||
Families []string `json:"families"`
|
||||
ParameterSize string `json:"parameter_size"`
|
||||
QuantizationLevel string `json:"quantization_level"`
|
||||
type ModelConfiguration struct {
|
||||
ModelFormat string `json:"model_format"`
|
||||
ModelFamily string `json:"model_family"`
|
||||
ModelFamilies []string `json:"model_families"`
|
||||
ModelType string `json:"model_type"`
|
||||
FileType string `json:"file_type"`
|
||||
}
|
||||
|
||||
func (m *Metrics) Summary() {
|
||||
|
||||
177
cmd/cmd.go
177
cmd/cmd.go
@@ -17,9 +17,7 @@ import (
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"slices"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
@@ -38,8 +36,6 @@ import (
|
||||
"github.com/jmorganca/ollama/version"
|
||||
)
|
||||
|
||||
type ImageData []byte
|
||||
|
||||
func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
filename, _ := cmd.Flags().GetString("file")
|
||||
filename, err := filepath.Abs(filename)
|
||||
@@ -422,7 +418,6 @@ func RunGenerate(cmd *cobra.Command, args []string) error {
|
||||
Model: args[0],
|
||||
WordWrap: os.Getenv("TERM") == "xterm-256color",
|
||||
Options: map[string]interface{}{},
|
||||
Images: []ImageData{},
|
||||
}
|
||||
|
||||
format, err := cmd.Flags().GetString("format")
|
||||
@@ -432,6 +427,7 @@ func RunGenerate(cmd *cobra.Command, args []string) error {
|
||||
opts.Format = format
|
||||
|
||||
prompts := args[1:]
|
||||
|
||||
// prepend stdin to the prompt if provided
|
||||
if !term.IsTerminal(int(os.Stdin.Fd())) {
|
||||
in, err := io.ReadAll(os.Stdin)
|
||||
@@ -470,7 +466,6 @@ type generateOptions struct {
|
||||
Format string
|
||||
System string
|
||||
Template string
|
||||
Images []ImageData
|
||||
Options map[string]interface{}
|
||||
}
|
||||
|
||||
@@ -556,10 +551,6 @@ func generate(cmd *cobra.Command, opts generateOptions) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
images := make([]api.ImageData, 0)
|
||||
for _, i := range opts.Images {
|
||||
images = append(images, api.ImageData(i))
|
||||
}
|
||||
request := api.GenerateRequest{
|
||||
Model: opts.Model,
|
||||
Prompt: opts.Prompt,
|
||||
@@ -568,7 +559,6 @@ func generate(cmd *cobra.Command, opts generateOptions) error {
|
||||
System: opts.System,
|
||||
Template: opts.Template,
|
||||
Options: opts.Options,
|
||||
Images: images,
|
||||
}
|
||||
|
||||
if err := client.Generate(ctx, &request, fn); err != nil {
|
||||
@@ -595,9 +585,7 @@ func generate(cmd *cobra.Command, opts generateOptions) error {
|
||||
latest.Summary()
|
||||
}
|
||||
|
||||
ctx = context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context)
|
||||
cmd.SetContext(ctx)
|
||||
|
||||
cmd.SetContext(context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -610,31 +598,11 @@ const (
|
||||
MultilineTemplate
|
||||
)
|
||||
|
||||
func modelIsMultiModal(cmd *cobra.Command, name string) bool {
|
||||
// get model details
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't connect to ollama server")
|
||||
return false
|
||||
}
|
||||
|
||||
req := api.ShowRequest{Name: name}
|
||||
resp, err := client.Show(cmd.Context(), &req)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return slices.Contains(resp.Details.Families, "clip")
|
||||
}
|
||||
|
||||
func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||
multiModal := modelIsMultiModal(cmd, opts.Model)
|
||||
|
||||
// load the model
|
||||
loadOpts := generateOptions{
|
||||
Model: opts.Model,
|
||||
Prompt: "",
|
||||
Images: []ImageData{},
|
||||
}
|
||||
if err := generate(cmd, loadOpts); err != nil {
|
||||
return err
|
||||
@@ -654,7 +622,7 @@ func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||
usageSet := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter ... Set a parameter")
|
||||
fmt.Fprintln(os.Stderr, " /set system <string> Set system message")
|
||||
fmt.Fprintln(os.Stderr, " /set system <string> Set system prompt")
|
||||
fmt.Fprintln(os.Stderr, " /set template <string> Set prompt template")
|
||||
fmt.Fprintln(os.Stderr, " /set history Enable history")
|
||||
fmt.Fprintln(os.Stderr, " /set nohistory Disable history")
|
||||
@@ -672,7 +640,7 @@ func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||
fmt.Fprintln(os.Stderr, " /show license Show model license")
|
||||
fmt.Fprintln(os.Stderr, " /show modelfile Show Modelfile for this model")
|
||||
fmt.Fprintln(os.Stderr, " /show parameters Show parameters for this model")
|
||||
fmt.Fprintln(os.Stderr, " /show system Show system message")
|
||||
fmt.Fprintln(os.Stderr, " /show system Show system prompt")
|
||||
fmt.Fprintln(os.Stderr, " /show template Show prompt template")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
@@ -733,10 +701,9 @@ func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||
// if the prompt so far starts with """ then we're in multiline mode
|
||||
// and we need to keep reading until we find a line that ends with """
|
||||
cut, found := strings.CutSuffix(line, `"""`)
|
||||
prompt += cut
|
||||
prompt += cut + "\n"
|
||||
|
||||
if !found {
|
||||
prompt += "\n"
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -747,11 +714,11 @@ func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||
case MultilineSystem:
|
||||
opts.System = prompt
|
||||
prompt = ""
|
||||
fmt.Println("Set system message.")
|
||||
fmt.Println("Set system template.")
|
||||
case MultilineTemplate:
|
||||
opts.Template = prompt
|
||||
prompt = ""
|
||||
fmt.Println("Set prompt template.")
|
||||
fmt.Println("Set model template.")
|
||||
}
|
||||
multiline = MultilineNone
|
||||
case strings.HasPrefix(line, `"""`) && len(prompt) == 0:
|
||||
@@ -822,18 +789,17 @@ func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||
line = strings.TrimPrefix(line, `"""`)
|
||||
if strings.HasPrefix(args[2], `"""`) {
|
||||
cut, found := strings.CutSuffix(line, `"""`)
|
||||
prompt += cut
|
||||
prompt += cut + "\n"
|
||||
if found {
|
||||
opts.System = prompt
|
||||
if args[1] == "system" {
|
||||
opts.System = prompt
|
||||
fmt.Println("Set system message.")
|
||||
fmt.Println("Set system template.")
|
||||
} else {
|
||||
opts.Template = prompt
|
||||
fmt.Println("Set prompt template.")
|
||||
}
|
||||
prompt = ""
|
||||
} else {
|
||||
prompt = `"""` + prompt + "\n"
|
||||
prompt = `"""` + prompt
|
||||
if args[1] == "system" {
|
||||
multiline = MultilineSystem
|
||||
} else {
|
||||
@@ -843,7 +809,7 @@ func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||
}
|
||||
} else {
|
||||
opts.System = line
|
||||
fmt.Println("Set system message.")
|
||||
fmt.Println("Set system template.")
|
||||
}
|
||||
default:
|
||||
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
|
||||
@@ -895,7 +861,7 @@ func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||
case resp.System != "":
|
||||
fmt.Println(resp.System + "\n")
|
||||
default:
|
||||
fmt.Print("No system message was specified for this model.\n\n")
|
||||
fmt.Print("No system prompt was specified for this model.\n\n")
|
||||
}
|
||||
case "template":
|
||||
switch {
|
||||
@@ -936,26 +902,6 @@ func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||
|
||||
if len(prompt) > 0 && multiline == MultilineNone {
|
||||
opts.Prompt = prompt
|
||||
if multiModal {
|
||||
newPrompt, images, err := extractFileNames(prompt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
opts.Prompt = newPrompt
|
||||
|
||||
// reset the context if we find another image
|
||||
if len(images) > 0 {
|
||||
opts.Images = images
|
||||
ctx := cmd.Context()
|
||||
ctx = context.WithValue(ctx, generateContextKey("context"), []int{})
|
||||
cmd.SetContext(ctx)
|
||||
}
|
||||
if len(opts.Images) == 0 {
|
||||
fmt.Println("This model requires you to add a jpeg, png, or svg image.\n")
|
||||
prompt = ""
|
||||
continue
|
||||
}
|
||||
}
|
||||
if err := generate(cmd, opts); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -965,57 +911,6 @@ func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeFilePath(fp string) string {
|
||||
// Define a map of escaped characters and their replacements
|
||||
replacements := map[string]string{
|
||||
"\\ ": " ", // Escaped space
|
||||
"\\(": "(", // Escaped left parenthesis
|
||||
"\\)": ")", // Escaped right parenthesis
|
||||
"\\[": "[", // Escaped left square bracket
|
||||
"\\]": "]", // Escaped right square bracket
|
||||
"\\{": "{", // Escaped left curly brace
|
||||
"\\}": "}", // Escaped right curly brace
|
||||
"\\$": "$", // Escaped dollar sign
|
||||
"\\&": "&", // Escaped ampersand
|
||||
"\\;": ";", // Escaped semicolon
|
||||
"\\'": "'", // Escaped single quote
|
||||
"\\\\": "\\", // Escaped backslash
|
||||
"\\*": "*", // Escaped asterisk
|
||||
"\\?": "?", // Escaped question mark
|
||||
}
|
||||
|
||||
for escaped, actual := range replacements {
|
||||
fp = strings.ReplaceAll(fp, escaped, actual)
|
||||
}
|
||||
return fp
|
||||
}
|
||||
|
||||
func extractFileNames(input string) (string, []ImageData, error) {
|
||||
// Regex to match file paths starting with / or ./ and include escaped spaces (\ or %20)
|
||||
// and followed by more characters and a file extension
|
||||
regexPattern := `(?:\./|/)[\S\\ ]+?\.(?i:jpg|jpeg|png|svg)\b`
|
||||
re := regexp.MustCompile(regexPattern)
|
||||
|
||||
filePaths := re.FindAllString(input, -1)
|
||||
var imgs []ImageData
|
||||
|
||||
for _, fp := range filePaths {
|
||||
nfp := normalizeFilePath(fp)
|
||||
data, err := getImageData(nfp)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("Couldn't process image: %q\n", err)
|
||||
return "", imgs, err
|
||||
}
|
||||
fmt.Printf("Added image '%s'\n", nfp)
|
||||
input = strings.ReplaceAll(input, fp, "")
|
||||
imgs = append(imgs, data)
|
||||
}
|
||||
return input, imgs, nil
|
||||
}
|
||||
|
||||
func RunServer(cmd *cobra.Command, _ []string) error {
|
||||
host, port, err := net.SplitHostPort(os.Getenv("OLLAMA_HOST"))
|
||||
if err != nil {
|
||||
@@ -1042,50 +937,6 @@ func RunServer(cmd *cobra.Command, _ []string) error {
|
||||
return server.Serve(ln, origins)
|
||||
}
|
||||
|
||||
func getImageData(filePath string) ([]byte, error) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
buf := make([]byte, 512)
|
||||
_, err = file.Read(buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
contentType := http.DetectContentType(buf)
|
||||
allowedTypes := []string{"image/jpeg", "image/jpg", "image/svg+xml", "image/png"}
|
||||
if !slices.Contains(allowedTypes, contentType) {
|
||||
return nil, fmt.Errorf("invalid image type: %s", contentType)
|
||||
}
|
||||
|
||||
info, err := file.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Check if the file size exceeds 100MB
|
||||
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
|
||||
if info.Size() > maxSize {
|
||||
return nil, fmt.Errorf("file size exceeds maximum limit (100MB).")
|
||||
}
|
||||
|
||||
buf = make([]byte, info.Size())
|
||||
_, err = file.Seek(0, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
_, err = io.ReadFull(file, buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
func initializeKeypair() error {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
@@ -1252,7 +1103,7 @@ func NewCLI() *cobra.Command {
|
||||
showCmd.Flags().Bool("modelfile", false, "Show Modelfile of a model")
|
||||
showCmd.Flags().Bool("parameters", false, "Show parameters of a model")
|
||||
showCmd.Flags().Bool("template", false, "Show template of a model")
|
||||
showCmd.Flags().Bool("system", false, "Show system message of a model")
|
||||
showCmd.Flags().Bool("system", false, "Show system prompt of a model")
|
||||
|
||||
runCmd := &cobra.Command{
|
||||
Use: "run MODEL [PROMPT]",
|
||||
|
||||
11
docs/api.md
11
docs/api.md
@@ -3,7 +3,6 @@
|
||||
## Endpoints
|
||||
|
||||
- [Generate a completion](#generate-a-completion)
|
||||
- [Generate a chat completion](#generate-a-chat-completion)
|
||||
- [Create a Model](#create-a-model)
|
||||
- [List Local Models](#list-local-models)
|
||||
- [Show Model Information](#show-model-information)
|
||||
@@ -44,7 +43,7 @@ Advanced parameters (optional):
|
||||
|
||||
- `format`: the format to return a response in. Currently the only accepted value is `json`
|
||||
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
|
||||
- `system`: system message to (overrides what is defined in the `Modelfile`)
|
||||
- `system`: system prompt to (overrides what is defined in the `Modelfile`)
|
||||
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
|
||||
- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
|
||||
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
|
||||
@@ -253,7 +252,7 @@ curl http://localhost:11434/api/generate -d '{
|
||||
"penalize_newline": true,
|
||||
"stop": ["\n", "user:"],
|
||||
"numa": false,
|
||||
"num_ctx": 1024,
|
||||
"num_ctx": 4,
|
||||
"num_batch": 2,
|
||||
"num_gqa": 1,
|
||||
"num_gpu": 1,
|
||||
@@ -268,7 +267,7 @@ curl http://localhost:11434/api/generate -d '{
|
||||
"rope_frequency_base": 1.1,
|
||||
"rope_frequency_scale": 0.8,
|
||||
"num_thread": 8
|
||||
}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -291,7 +290,7 @@ curl http://localhost:11434/api/generate -d '{
|
||||
}
|
||||
```
|
||||
|
||||
## Generate a chat completion
|
||||
## Send Chat Messages (coming in 0.1.14)
|
||||
|
||||
```shell
|
||||
POST /api/chat
|
||||
@@ -548,7 +547,7 @@ A single JSON object will be returned.
|
||||
POST /api/show
|
||||
```
|
||||
|
||||
Show details about a model including modelfile, template, parameters, license, and system message.
|
||||
Show details about a model including modelfile, template, parameters, license, and system prompt.
|
||||
|
||||
### Parameters
|
||||
|
||||
|
||||
@@ -95,6 +95,10 @@ The manifest lists all the layers used in this model. You will see a `media type
|
||||
|
||||
To modify where models are stored, you can use the `OLLAMA_MODELS` environment variable. Note that on Linux this means defining `OLLAMA_MODELS` in a drop-in `/etc/systemd/system/ollama.service.d` service file, reloading systemd, and restarting the ollama service.
|
||||
|
||||
### I downloaded most of a model yesterday, but it's gone today. What happened?
|
||||
|
||||
When the Ollama server starts, it looks for fragments of models that still exist on the system and cleans them out. If you have an Internet connection that can't complete a model download all at once, this can be frustrating. Adding the OLLAMA_NOPRUNE environment variable will prevent the server from pruning incomplete files.
|
||||
|
||||
## Does Ollama send my prompts and answers back to Ollama.ai to use in any way?
|
||||
|
||||
No. Anything you do with Ollama, such as generate a response from the model, stays with you. We don't collect any data about how you use the model. You are always in control of your own data.
|
||||
|
||||
@@ -30,14 +30,14 @@ The format of the `Modelfile`:
|
||||
INSTRUCTION arguments
|
||||
```
|
||||
|
||||
| Instruction | Description |
|
||||
| ----------------------------------- | -------------------------------------------------------------- |
|
||||
| [`FROM`](#from-required) (required) | Defines the base model to use. |
|
||||
| [`PARAMETER`](#parameter) | Sets the parameters for how Ollama will run the model. |
|
||||
| [`TEMPLATE`](#template) | The full prompt template to be sent to the model. |
|
||||
| [`SYSTEM`](#system) | Specifies the system message that will be set in the template. |
|
||||
| [`ADAPTER`](#adapter) | Defines the (Q)LoRA adapters to apply to the model. |
|
||||
| [`LICENSE`](#license) | Specifies the legal license. |
|
||||
| Instruction | Description |
|
||||
| ----------------------------------- | ------------------------------------------------------------- |
|
||||
| [`FROM`](#from-required) (required) | Defines the base model to use. |
|
||||
| [`PARAMETER`](#parameter) | Sets the parameters for how Ollama will run the model. |
|
||||
| [`TEMPLATE`](#template) | The full prompt template to be sent to the model. |
|
||||
| [`SYSTEM`](#system) | Specifies the system prompt that will be set in the template. |
|
||||
| [`ADAPTER`](#adapter) | Defines the (Q)LoRA adapters to apply to the model. |
|
||||
| [`LICENSE`](#license) | Specifies the legal license. |
|
||||
|
||||
## Examples
|
||||
|
||||
@@ -52,7 +52,7 @@ PARAMETER temperature 1
|
||||
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
|
||||
PARAMETER num_ctx 4096
|
||||
|
||||
# sets a custom system message to specify the behavior of the chat assistant
|
||||
# sets a custom system prompt to specify the behavior of the chat assistant
|
||||
SYSTEM You are Mario from super mario bros, acting as an assistant.
|
||||
```
|
||||
|
||||
@@ -70,9 +70,9 @@ More examples are available in the [examples directory](../examples).
|
||||
There are two ways to view `Modelfile`s underlying the models in [ollama.ai/library][1]:
|
||||
|
||||
- Option 1: view a details page from a model's tags page:
|
||||
1. Go to a particular model's tags (e.g. https://ollama.ai/library/llama2/tags)
|
||||
2. Click on a tag (e.g. https://ollama.ai/library/llama2:13b)
|
||||
3. Scroll down to "Layers"
|
||||
1. Go to a particular model's tags (e.g. https://ollama.ai/library/llama2/tags)
|
||||
2. Click on a tag (e.g. https://ollama.ai/library/llama2:13b)
|
||||
3. Scroll down to "Layers"
|
||||
- Note: if the [`FROM` instruction](#from-required) is not present,
|
||||
it means the model was created from a local file
|
||||
- Option 2: use `ollama show` to print the `Modelfile` like so:
|
||||
@@ -152,15 +152,15 @@ PARAMETER <parameter> <parametervalue>
|
||||
|
||||
### TEMPLATE
|
||||
|
||||
`TEMPLATE` of the full prompt template to be passed into the model. It may include (optionally) a system message and a user's prompt. This is used to create a full custom prompt, and syntax may be model specific. You can usually find the template for a given model in the readme for that model.
|
||||
`TEMPLATE` of the full prompt template to be passed into the model. It may include (optionally) a system prompt and a user's prompt. This is used to create a full custom prompt, and syntax may be model specific. You can usually find the template for a given model in the readme for that model.
|
||||
|
||||
#### Template Variables
|
||||
|
||||
| Variable | Description |
|
||||
| --------------- | ------------------------------------------------------------------------------------------------------------- |
|
||||
| `{{ .System }}` | The system message used to specify custom behavior, this must also be set in the Modelfile as an instruction. |
|
||||
| `{{ .Prompt }}` | The incoming prompt, this is not specified in the model file and will be set based on input. |
|
||||
| `{{ .First }}` | A boolean value used to render specific template information for the first generation of a session. |
|
||||
| Variable | Description |
|
||||
| --------------- | ------------------------------------------------------------------------------------------------------------ |
|
||||
| `{{ .System }}` | The system prompt used to specify custom behavior, this must also be set in the Modelfile as an instruction. |
|
||||
| `{{ .Prompt }}` | The incoming prompt, this is not specified in the model file and will be set based on input. |
|
||||
| `{{ .First }}` | A boolean value used to render specific template information for the first generation of a session. |
|
||||
|
||||
```modelfile
|
||||
TEMPLATE """
|
||||
@@ -180,7 +180,7 @@ SYSTEM """<system message>"""
|
||||
|
||||
### SYSTEM
|
||||
|
||||
The `SYSTEM` instruction specifies the system message to be used in the template, if applicable.
|
||||
The `SYSTEM` instruction specifies the system prompt to be used in the template, if applicable.
|
||||
|
||||
```modelfile
|
||||
SYSTEM """<system message>"""
|
||||
|
||||
20
llm/falcon.go
Normal file
20
llm/falcon.go
Normal file
@@ -0,0 +1,20 @@
|
||||
package llm
|
||||
|
||||
const (
|
||||
falconModelType7B = 32
|
||||
falconModelType40B = 60
|
||||
falconModelType180B = 80
|
||||
)
|
||||
|
||||
func falconModelType(numLayer uint32) string {
|
||||
switch numLayer {
|
||||
case 32:
|
||||
return "7B"
|
||||
case 60:
|
||||
return "40B"
|
||||
case 80:
|
||||
return "180B"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
14
llm/ggml.go
14
llm/ggml.go
@@ -93,8 +93,6 @@ func (c *containerGGML) Name() string {
|
||||
}
|
||||
|
||||
func (c *containerGGML) Decode(ro *readSeekOffset) (model, error) {
|
||||
// file contents aren't decoded
|
||||
ro.Seek(0, io.SeekEnd)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -117,10 +115,6 @@ func (c *containerGGMF) Decode(ro *readSeekOffset) (model, error) {
|
||||
}
|
||||
|
||||
c.version = version
|
||||
|
||||
// remaining file contents aren't decoded
|
||||
ro.Seek(0, io.SeekEnd)
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -147,10 +141,6 @@ func (c *containerGGJT) Decode(ro *readSeekOffset) (model, error) {
|
||||
// different model types may have different layouts for hyperparameters
|
||||
var llama llamaModel
|
||||
binary.Read(ro, binary.LittleEndian, &llama.hyperparameters)
|
||||
|
||||
// remaining file contents aren't decoded
|
||||
ro.Seek(0, io.SeekEnd)
|
||||
|
||||
return &llama, nil
|
||||
}
|
||||
|
||||
@@ -173,10 +163,6 @@ func (c *containerLORA) Decode(ro *readSeekOffset) (model, error) {
|
||||
}
|
||||
|
||||
c.version = version
|
||||
|
||||
// remaining file contents aren't decoded
|
||||
ro.Seek(0, io.SeekEnd)
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
|
||||
21
llm/gguf.go
21
llm/gguf.go
@@ -120,6 +120,27 @@ func (llm *ggufModel) ModelType() string {
|
||||
return format.HumanNumber(llm.parameters)
|
||||
}
|
||||
|
||||
switch llm.ModelFamily() {
|
||||
case "llama":
|
||||
if blocks, ok := llm.kv["llama.block_count"].(uint32); ok {
|
||||
heads, headsOK := llm.kv["llama.head_count"].(uint32)
|
||||
headKVs, headsKVsOK := llm.kv["llama.head_count_kv"].(uint32)
|
||||
if headsOK && headsKVsOK && heads/headKVs == 8 {
|
||||
return "70B"
|
||||
}
|
||||
|
||||
return llamaModelType(blocks)
|
||||
}
|
||||
case "falcon":
|
||||
if blocks, ok := llm.kv["falcon.block_count"].(uint32); ok {
|
||||
return falconModelType(blocks)
|
||||
}
|
||||
case "starcoder":
|
||||
if blocks, ok := llm.kv["starcoder.block_count"].(uint32); ok {
|
||||
return starCoderModelType(blocks)
|
||||
}
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
|
||||
173
llm/llama.go
173
llm/llama.go
@@ -59,7 +59,6 @@ ws ::= ([ \t\n] ws)?
|
||||
var llamaCppEmbed embed.FS
|
||||
|
||||
type ModelRunner struct {
|
||||
Type string // "gguf" or "ggml"
|
||||
Path string // path to the model runner executable
|
||||
Accelerated bool
|
||||
}
|
||||
@@ -73,25 +72,25 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
if runtime.GOARCH == "arm64" {
|
||||
runners = []ModelRunner{{Type: runnerType, Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}}
|
||||
runners = []ModelRunner{{Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}}
|
||||
} else {
|
||||
runners = []ModelRunner{{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}}
|
||||
runners = []ModelRunner{{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}}
|
||||
}
|
||||
case "linux":
|
||||
runners = []ModelRunner{
|
||||
{Type: runnerType, Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true},
|
||||
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
||||
{Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true},
|
||||
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
||||
}
|
||||
case "windows":
|
||||
// TODO: select windows GPU runner here when available
|
||||
runners = []ModelRunner{
|
||||
{Type: runnerType, Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true},
|
||||
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
|
||||
{Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true},
|
||||
{Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
|
||||
}
|
||||
default:
|
||||
log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
|
||||
runners = []ModelRunner{
|
||||
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
||||
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -149,7 +148,6 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||
for _, r := range runners {
|
||||
// clean the ModelRunner paths so that they match the OS we are running on
|
||||
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{
|
||||
Type: r.Type,
|
||||
Path: filepath.Clean(path.Join(workDir, r.Path)),
|
||||
Accelerated: r.Accelerated,
|
||||
})
|
||||
@@ -223,14 +221,8 @@ type Running struct {
|
||||
*StatusWriter // captures error messages from the llama runner process
|
||||
}
|
||||
|
||||
type ImageData struct {
|
||||
Data []byte `json:"data"`
|
||||
ID int `json:"id"`
|
||||
}
|
||||
|
||||
type llama struct {
|
||||
api.Options
|
||||
ImageData []ImageData
|
||||
Running
|
||||
}
|
||||
|
||||
@@ -410,13 +402,11 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner
|
||||
}
|
||||
|
||||
port := rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
|
||||
params := append(params, "--port", strconv.Itoa(port))
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cmd := exec.CommandContext(
|
||||
ctx,
|
||||
runner.Path,
|
||||
params...,
|
||||
append(params, "--port", strconv.Itoa(port))...,
|
||||
)
|
||||
|
||||
var libraryPaths []string
|
||||
@@ -545,17 +535,17 @@ type prediction struct {
|
||||
}
|
||||
|
||||
const maxBufferSize = 512 * format.KiloByte
|
||||
const maxRetries = 6
|
||||
|
||||
type PredictOpts struct {
|
||||
Model string
|
||||
Prompt string
|
||||
Format string
|
||||
Images []api.ImageData
|
||||
CheckpointStart time.Time
|
||||
CheckpointLoaded time.Time
|
||||
}
|
||||
|
||||
type PredictResult struct {
|
||||
Model string
|
||||
CreatedAt time.Time
|
||||
TotalDuration time.Duration
|
||||
LoadDuration time.Duration
|
||||
@@ -567,20 +557,7 @@ type PredictResult struct {
|
||||
EvalDuration time.Duration
|
||||
}
|
||||
|
||||
// IsRetryable checks if the line matches a condition that can be retried
|
||||
func isRetryable(line []byte) bool {
|
||||
return bytes.Contains(line, []byte("slot unavailable"))
|
||||
}
|
||||
|
||||
func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(PredictResult)) error {
|
||||
imageData := llm.ImageData
|
||||
if len(predict.Images) > 0 {
|
||||
for cnt, i := range predict.Images {
|
||||
imageData = append(imageData, ImageData{Data: i, ID: cnt})
|
||||
}
|
||||
}
|
||||
log.Printf("loaded %d images", len(imageData))
|
||||
|
||||
request := map[string]any{
|
||||
"prompt": predict.Prompt,
|
||||
"stream": true,
|
||||
@@ -602,78 +579,59 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
|
||||
"penalize_nl": llm.PenalizeNewline,
|
||||
"seed": llm.Seed,
|
||||
"stop": llm.Stop,
|
||||
"image_data": imageData,
|
||||
}
|
||||
|
||||
if predict.Format == "json" {
|
||||
request["grammar"] = jsonGrammar
|
||||
}
|
||||
|
||||
retryDelay := 100 * time.Microsecond
|
||||
for retries := 0; retries < maxRetries; retries++ {
|
||||
if retries > 0 {
|
||||
time.Sleep(retryDelay) // wait before retrying
|
||||
retryDelay *= 2 // exponential backoff
|
||||
}
|
||||
// Handling JSON marshaling with special characters unescaped.
|
||||
buffer := &bytes.Buffer{}
|
||||
enc := json.NewEncoder(buffer)
|
||||
enc.SetEscapeHTML(false)
|
||||
|
||||
// Handling JSON marshaling with special characters unescaped.
|
||||
buffer := &bytes.Buffer{}
|
||||
enc := json.NewEncoder(buffer)
|
||||
enc.SetEscapeHTML(false)
|
||||
if err := enc.Encode(request); err != nil {
|
||||
return fmt.Errorf("failed to marshal data: %v", err)
|
||||
}
|
||||
|
||||
if err := enc.Encode(request); err != nil {
|
||||
return fmt.Errorf("failed to marshal data: %v", err)
|
||||
}
|
||||
endpoint := fmt.Sprintf("http://127.0.0.1:%d/completion", llm.Port)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, buffer)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating POST request: %v", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
endpoint := fmt.Sprintf("http://127.0.0.1:%d/completion", llm.Port)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, buffer)
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("POST predict: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
bodyBytes, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating POST request: %v", err)
|
||||
return fmt.Errorf("failed reading llm error response: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
log.Printf("llm predict error: %s", bodyBytes)
|
||||
return fmt.Errorf("%s", bodyBytes)
|
||||
}
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("POST predict: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
bodyBytes, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading llm error response: %w", err)
|
||||
scanner := bufio.NewScanner(resp.Body)
|
||||
// increase the buffer size to avoid running out of space
|
||||
buf := make([]byte, 0, maxBufferSize)
|
||||
scanner.Buffer(buf, maxBufferSize)
|
||||
for scanner.Scan() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// This handles the request cancellation
|
||||
return ctx.Err()
|
||||
default:
|
||||
line := scanner.Bytes()
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
log.Printf("llm predict error: %s", bodyBytes)
|
||||
return fmt.Errorf("%s", bodyBytes)
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(resp.Body)
|
||||
// increase the buffer size to avoid running out of space
|
||||
buf := make([]byte, 0, maxBufferSize)
|
||||
scanner.Buffer(buf, maxBufferSize)
|
||||
|
||||
retryNeeded := false
|
||||
for scanner.Scan() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// This handles the request cancellation
|
||||
return ctx.Err()
|
||||
default:
|
||||
line := scanner.Bytes()
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if isRetryable(line) {
|
||||
retryNeeded = true
|
||||
break
|
||||
}
|
||||
|
||||
evt, ok := bytes.CutPrefix(line, []byte("data: "))
|
||||
if !ok {
|
||||
return fmt.Errorf("error parsing llm response stream: %s", line)
|
||||
}
|
||||
|
||||
if evt, ok := bytes.CutPrefix(line, []byte("data: ")); ok {
|
||||
var p prediction
|
||||
if err := json.Unmarshal(evt, &p); err != nil {
|
||||
return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
|
||||
@@ -681,6 +639,7 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
|
||||
|
||||
if p.Content != "" {
|
||||
fn(PredictResult{
|
||||
Model: predict.Model,
|
||||
CreatedAt: time.Now().UTC(),
|
||||
Content: p.Content,
|
||||
})
|
||||
@@ -688,6 +647,7 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
|
||||
|
||||
if p.Stop {
|
||||
fn(PredictResult{
|
||||
Model: predict.Model,
|
||||
CreatedAt: time.Now().UTC(),
|
||||
TotalDuration: time.Since(predict.CheckpointStart),
|
||||
|
||||
@@ -701,26 +661,21 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
if strings.Contains(err.Error(), "unexpected EOF") {
|
||||
// this means the llama runner subprocess crashed
|
||||
llm.Close()
|
||||
if llm.StatusWriter != nil && llm.StatusWriter.LastErrMsg != "" {
|
||||
return fmt.Errorf("llama runner exited: %v", llm.StatusWriter.LastErrMsg)
|
||||
}
|
||||
return fmt.Errorf("llama runner exited, you may not have enough available memory to run this model")
|
||||
}
|
||||
return fmt.Errorf("error reading llm response: %v", err)
|
||||
}
|
||||
|
||||
if !retryNeeded {
|
||||
return nil // success
|
||||
}
|
||||
}
|
||||
|
||||
// should never reach here ideally
|
||||
return fmt.Errorf("max retries exceeded")
|
||||
if err := scanner.Err(); err != nil {
|
||||
if strings.Contains(err.Error(), "unexpected EOF") {
|
||||
// this means the llama runner subprocess crashed
|
||||
llm.Close()
|
||||
if llm.StatusWriter != nil && llm.StatusWriter.LastErrMsg != "" {
|
||||
return fmt.Errorf("llama runner exited: %v", llm.StatusWriter.LastErrMsg)
|
||||
}
|
||||
return fmt.Errorf("llama runner exited, you may not have enough available memory to run this model")
|
||||
}
|
||||
return fmt.Errorf("error reading llm response: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type TokenizeRequest struct {
|
||||
|
||||
23
llm/starcoder.go
Normal file
23
llm/starcoder.go
Normal file
@@ -0,0 +1,23 @@
|
||||
package llm
|
||||
|
||||
const (
|
||||
starCoderModelType1B = 24
|
||||
starCoderModelType3B = 36
|
||||
starCoderModelType7B = 42
|
||||
starCoderModelType15B = 40
|
||||
)
|
||||
|
||||
func starCoderModelType(numLayer uint32) string {
|
||||
switch numLayer {
|
||||
case 24:
|
||||
return "1B"
|
||||
case 36:
|
||||
return "3B"
|
||||
case 42:
|
||||
return "7B"
|
||||
case 40:
|
||||
return "15B"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
@@ -192,7 +192,14 @@ func (i *Instance) Readline() (string, error) {
|
||||
case CharCtrlW:
|
||||
buf.DeleteWord()
|
||||
case CharCtrlZ:
|
||||
return handleCharCtrlZ(fd, termios)
|
||||
if err := UnsetRawMode(fd, termios); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
syscall.Kill(0, syscall.SIGSTOP)
|
||||
|
||||
// on resume...
|
||||
return "", nil
|
||||
case CharEnter:
|
||||
output := buf.String()
|
||||
if output != "" {
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
//go:build !windows
|
||||
|
||||
package readline
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func handleCharCtrlZ(fd int, termios *Termios) (string, error) {
|
||||
if err := UnsetRawMode(fd, termios); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
syscall.Kill(0, syscall.SIGSTOP)
|
||||
|
||||
// on resume...
|
||||
return "", nil
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
package readline
|
||||
|
||||
func handleCharCtrlZ(fd int, state *State) (string, error) {
|
||||
// not supported
|
||||
return "", nil
|
||||
}
|
||||
@@ -46,7 +46,6 @@ type Model struct {
|
||||
System string
|
||||
License []string
|
||||
Digest string
|
||||
Size int64
|
||||
Options map[string]interface{}
|
||||
}
|
||||
|
||||
@@ -66,7 +65,7 @@ func (m *Model) Prompt(p PromptVars) (string, error) {
|
||||
}
|
||||
|
||||
if p.System == "" {
|
||||
// use the default system message for this model if one is not specified
|
||||
// use the default system prompt for this model if one is not specified
|
||||
p.System = m.System
|
||||
}
|
||||
|
||||
@@ -86,10 +85,9 @@ func (m *Model) Prompt(p PromptVars) (string, error) {
|
||||
return prompt.String(), nil
|
||||
}
|
||||
|
||||
func (m *Model) ChatPrompt(msgs []api.Message) (string, []api.ImageData, error) {
|
||||
func (m *Model) ChatPrompt(msgs []api.Message) (string, error) {
|
||||
// build the prompt from the list of messages
|
||||
var prompt strings.Builder
|
||||
var currentImages []api.ImageData
|
||||
currentVars := PromptVars{
|
||||
First: true,
|
||||
}
|
||||
@@ -109,36 +107,35 @@ func (m *Model) ChatPrompt(msgs []api.Message) (string, []api.ImageData, error)
|
||||
case "system":
|
||||
if currentVars.System != "" {
|
||||
if err := writePrompt(); err != nil {
|
||||
return "", nil, err
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
currentVars.System = msg.Content
|
||||
case "user":
|
||||
if currentVars.Prompt != "" {
|
||||
if err := writePrompt(); err != nil {
|
||||
return "", nil, err
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
currentVars.Prompt = msg.Content
|
||||
currentImages = msg.Images
|
||||
case "assistant":
|
||||
currentVars.Response = msg.Content
|
||||
if err := writePrompt(); err != nil {
|
||||
return "", nil, err
|
||||
return "", err
|
||||
}
|
||||
default:
|
||||
return "", nil, fmt.Errorf("invalid role: %s, role must be one of [system, user, assistant]", msg.Role)
|
||||
return "", fmt.Errorf("invalid role: %s, role must be one of [system, user, assistant]", msg.Role)
|
||||
}
|
||||
}
|
||||
|
||||
// Append the last set of vars if they are non-empty
|
||||
if currentVars.Prompt != "" || currentVars.System != "" {
|
||||
if err := writePrompt(); err != nil {
|
||||
return "", nil, err
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
return prompt.String(), currentImages, nil
|
||||
return prompt.String(), nil
|
||||
}
|
||||
|
||||
type ManifestV2 struct {
|
||||
@@ -149,16 +146,12 @@ type ManifestV2 struct {
|
||||
}
|
||||
|
||||
type ConfigV2 struct {
|
||||
ModelFormat string `json:"model_format"`
|
||||
ModelFamily string `json:"model_family"`
|
||||
ModelFamilies []string `json:"model_families"`
|
||||
ModelType string `json:"model_type"`
|
||||
FileType string `json:"file_type"`
|
||||
|
||||
// required by spec
|
||||
Architecture string `json:"architecture"`
|
||||
OS string `json:"os"`
|
||||
RootFS RootFS `json:"rootfs"`
|
||||
|
||||
api.ModelConfiguration
|
||||
}
|
||||
|
||||
func (c *ConfigV2) SetModelFormat(format string) {
|
||||
@@ -245,7 +238,6 @@ func GetModel(name string) (*Model, error) {
|
||||
Digest: digest,
|
||||
Template: "{{ .Prompt }}",
|
||||
License: []string{},
|
||||
Size: manifest.GetTotalSize(),
|
||||
}
|
||||
|
||||
filename, err := GetBlobsPath(manifest.Config.Digest)
|
||||
@@ -549,7 +541,6 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
||||
}
|
||||
}
|
||||
|
||||
// xxx - can this be removed?
|
||||
if config.ModelType == "65B" {
|
||||
if gqa, ok := formattedParams["gqa"].(int); ok && gqa == 8 {
|
||||
config.ModelType = "70B"
|
||||
|
||||
129
server/routes.go
129
server/routes.go
@@ -156,9 +156,9 @@ func GenerateHandler(c *gin.Context) {
|
||||
defer loaded.mu.Unlock()
|
||||
|
||||
checkpointStart := time.Now()
|
||||
|
||||
var req api.GenerateRequest
|
||||
err := c.ShouldBindJSON(&req)
|
||||
|
||||
switch {
|
||||
case errors.Is(err, io.EOF):
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
||||
@@ -199,9 +199,10 @@ func GenerateHandler(c *gin.Context) {
|
||||
// an empty request loads the model
|
||||
if req.Prompt == "" && req.Template == "" && req.System == "" {
|
||||
c.JSON(http.StatusOK, api.GenerateResponse{
|
||||
CreatedAt: time.Now().UTC(),
|
||||
Model: req.Model,
|
||||
Done: true})
|
||||
CreatedAt: time.Now().UTC(),
|
||||
Model: req.Model,
|
||||
ModelConfiguration: model.Config.ModelConfiguration,
|
||||
Done: true})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -260,10 +261,11 @@ func GenerateHandler(c *gin.Context) {
|
||||
}
|
||||
|
||||
resp := api.GenerateResponse{
|
||||
Model: req.Model,
|
||||
CreatedAt: r.CreatedAt,
|
||||
Done: r.Done,
|
||||
Response: r.Content,
|
||||
Model: r.Model,
|
||||
ModelConfiguration: model.Config.ModelConfiguration,
|
||||
CreatedAt: r.CreatedAt,
|
||||
Done: r.Done,
|
||||
Response: r.Content,
|
||||
Metrics: api.Metrics{
|
||||
TotalDuration: r.TotalDuration,
|
||||
LoadDuration: r.LoadDuration,
|
||||
@@ -288,11 +290,11 @@ func GenerateHandler(c *gin.Context) {
|
||||
|
||||
// Start prediction
|
||||
predictReq := llm.PredictOpts{
|
||||
Model: model.Name,
|
||||
Prompt: prompt,
|
||||
Format: req.Format,
|
||||
CheckpointStart: checkpointStart,
|
||||
CheckpointLoaded: checkpointLoaded,
|
||||
Images: req.Images,
|
||||
}
|
||||
if err := loaded.runner.Predict(c.Request.Context(), predictReq, fn); err != nil {
|
||||
ch <- gin.H{"error": err.Error()}
|
||||
@@ -300,30 +302,19 @@ func GenerateHandler(c *gin.Context) {
|
||||
}()
|
||||
|
||||
if req.Stream != nil && !*req.Stream {
|
||||
// Accumulate responses into the final response
|
||||
var final api.GenerateResponse
|
||||
// Wait for the channel to close
|
||||
var r api.GenerateResponse
|
||||
var sb strings.Builder
|
||||
for resp := range ch {
|
||||
switch r := resp.(type) {
|
||||
case api.GenerateResponse:
|
||||
sb.WriteString(r.Response)
|
||||
final = r
|
||||
case gin.H:
|
||||
if errorMsg, ok := r["error"].(string); ok {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
|
||||
return
|
||||
} else {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"})
|
||||
return
|
||||
}
|
||||
default:
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"})
|
||||
var ok bool
|
||||
if r, ok = resp.(api.GenerateResponse); !ok {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
sb.WriteString(r.Response)
|
||||
}
|
||||
|
||||
final.Response = sb.String()
|
||||
c.JSON(http.StatusOK, final)
|
||||
r.Response = sb.String()
|
||||
c.JSON(http.StatusOK, r)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -615,19 +606,10 @@ func GetModelInfo(name string) (*api.ShowResponse, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
modelDetails := api.ModelDetails{
|
||||
Format: model.Config.ModelFormat,
|
||||
Family: model.Config.ModelFamily,
|
||||
Families: model.Config.ModelFamilies,
|
||||
ParameterSize: model.Config.ModelType,
|
||||
QuantizationLevel: model.Config.FileType,
|
||||
}
|
||||
|
||||
resp := &api.ShowResponse{
|
||||
License: strings.Join(model.License, "\n"),
|
||||
System: model.System,
|
||||
Template: model.Template,
|
||||
Details: modelDetails,
|
||||
}
|
||||
|
||||
mf, err := ShowModelfile(model)
|
||||
@@ -677,42 +659,25 @@ func ListModelsHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
modelResponse := func(modelName string) (api.ModelResponse, error) {
|
||||
model, err := GetModel(modelName)
|
||||
if err != nil {
|
||||
return api.ModelResponse{}, err
|
||||
}
|
||||
|
||||
modelDetails := api.ModelDetails{
|
||||
Format: model.Config.ModelFormat,
|
||||
Family: model.Config.ModelFamily,
|
||||
Families: model.Config.ModelFamilies,
|
||||
ParameterSize: model.Config.ModelType,
|
||||
QuantizationLevel: model.Config.FileType,
|
||||
}
|
||||
|
||||
return api.ModelResponse{
|
||||
Name: model.ShortName,
|
||||
Size: model.Size,
|
||||
Digest: model.Digest,
|
||||
Details: modelDetails,
|
||||
}, nil
|
||||
}
|
||||
|
||||
walkFunc := func(path string, info os.FileInfo, _ error) error {
|
||||
if !info.IsDir() {
|
||||
dir, file := filepath.Split(path)
|
||||
dir = strings.Trim(strings.TrimPrefix(dir, fp), string(os.PathSeparator))
|
||||
tag := strings.Join([]string{dir, file}, ":")
|
||||
|
||||
resp, err := modelResponse(tag)
|
||||
mp := ParseModelPath(tag)
|
||||
manifest, digest, err := GetManifest(mp)
|
||||
if err != nil {
|
||||
log.Printf("skipping file: %s", fp)
|
||||
return nil
|
||||
}
|
||||
|
||||
resp.ModifiedAt = info.ModTime()
|
||||
models = append(models, resp)
|
||||
models = append(models, api.ModelResponse{
|
||||
Name: mp.GetShortTagname(),
|
||||
Size: manifest.GetTotalSize(),
|
||||
Digest: digest,
|
||||
ModifiedAt: info.ModTime(),
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -887,7 +852,7 @@ func Serve(ln net.Listener, allowOrigins []string) error {
|
||||
if runtime.GOOS == "linux" {
|
||||
// check compatibility to log warnings
|
||||
if _, err := llm.CheckVRAM(); err != nil {
|
||||
log.Print(err.Error())
|
||||
log.Printf(err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -994,7 +959,7 @@ func ChatHandler(c *gin.Context) {
|
||||
|
||||
checkpointLoaded := time.Now()
|
||||
|
||||
prompt, images, err := model.ChatPrompt(req.Messages)
|
||||
prompt, err := model.ChatPrompt(req.Messages)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
@@ -1011,7 +976,7 @@ func ChatHandler(c *gin.Context) {
|
||||
loaded.expireTimer.Reset(sessionDuration)
|
||||
|
||||
resp := api.ChatResponse{
|
||||
Model: req.Model,
|
||||
Model: r.Model,
|
||||
CreatedAt: r.CreatedAt,
|
||||
Done: r.Done,
|
||||
Metrics: api.Metrics{
|
||||
@@ -1033,11 +998,11 @@ func ChatHandler(c *gin.Context) {
|
||||
|
||||
// Start prediction
|
||||
predictReq := llm.PredictOpts{
|
||||
Model: model.Name,
|
||||
Prompt: prompt,
|
||||
Format: req.Format,
|
||||
CheckpointStart: checkpointStart,
|
||||
CheckpointLoaded: checkpointLoaded,
|
||||
Images: images,
|
||||
}
|
||||
if err := loaded.runner.Predict(c.Request.Context(), predictReq, fn); err != nil {
|
||||
ch <- gin.H{"error": err.Error()}
|
||||
@@ -1045,33 +1010,21 @@ func ChatHandler(c *gin.Context) {
|
||||
}()
|
||||
|
||||
if req.Stream != nil && !*req.Stream {
|
||||
// Accumulate responses into the final response
|
||||
var final api.ChatResponse
|
||||
// Wait for the channel to close
|
||||
var r api.ChatResponse
|
||||
var sb strings.Builder
|
||||
for resp := range ch {
|
||||
switch r := resp.(type) {
|
||||
case api.ChatResponse:
|
||||
if r.Message != nil {
|
||||
sb.WriteString(r.Message.Content)
|
||||
}
|
||||
|
||||
final = r
|
||||
case gin.H:
|
||||
if errorMsg, ok := r["error"].(string); ok {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
|
||||
return
|
||||
} else {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"})
|
||||
return
|
||||
}
|
||||
default:
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"})
|
||||
var ok bool
|
||||
if r, ok = resp.(api.ChatResponse); !ok {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
if r.Message != nil {
|
||||
sb.WriteString(r.Message.Content)
|
||||
}
|
||||
}
|
||||
|
||||
final.Message = &api.Message{Role: "assistant", Content: sb.String()}
|
||||
c.JSON(http.StatusOK, final)
|
||||
r.Message = &api.Message{Role: "assistant", Content: sb.String()}
|
||||
c.JSON(http.StatusOK, r)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user