Compare commits

..

6 Commits

Author SHA1 Message Date
ParthSareen
6556540655 User interface prototype 2024-12-19 16:43:36 -08:00
ParthSareen
3f60fd57e3 Remove /template API 2024-12-19 14:47:51 -08:00
ParthSareen
38cd80d52c Add dry run option for chat request 2024-12-19 14:17:29 -08:00
ParthSareen
c9a46140e6 Warn user on truncation - ollama logs 2024-12-19 13:48:25 -08:00
ParthSareen
1d529d8b7b Add /template endpoint 2024-12-18 15:23:27 -08:00
Jeffrey Morgan
a72f2dce45 scripts: sign renamed macOS binary (#8131) 2024-12-17 18:03:49 -08:00
12 changed files with 84 additions and 193 deletions

View File

@@ -103,10 +103,18 @@ type ChatRequest struct {
// Tools is an optional list of tools the model has access to.
Tools `json:"tools,omitempty"`
Debug *Debug `json:"debug,omitempty"`
Dry bool `json:"dry,omitempty"`
// Options lists model-specific options.
Options map[string]interface{} `json:"options"`
}
type Debug struct {
Include []string `json:"include,omitempty"`
}
type Tools []Tool
func (t Tools) String() string {
@@ -190,6 +198,8 @@ type ChatResponse struct {
Message Message `json:"message"`
DoneReason string `json:"done_reason,omitempty"`
Debug map[string]any `json:"debug,omitempty"`
Done bool `json:"done"`
Metrics

View File

@@ -8,7 +8,6 @@ import (
"crypto/ed25519"
"crypto/rand"
"crypto/sha256"
"encoding/base64"
"encoding/json"
"encoding/pem"
"errors"
@@ -18,11 +17,9 @@ import (
"math"
"net"
"net/http"
"net/url"
"os"
"os/signal"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
@@ -33,13 +30,11 @@ import (
"github.com/containerd/console"
"github.com/mattn/go-runewidth"
"github.com/olekukonko/tablewriter"
"github.com/pkg/browser"
"github.com/spf13/cobra"
"golang.org/x/crypto/ssh"
"golang.org/x/term"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/auth"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/llama"
@@ -47,7 +42,6 @@ import (
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/progress"
"github.com/ollama/ollama/server"
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
)
@@ -522,64 +516,6 @@ func RunHandler(cmd *cobra.Command, args []string) error {
return generate(cmd, opts)
}
// unknownKey handles key validation when a connection fails due to an unknown key.
// It attempts to open the browser for interactive sessions to let users connect their key,
// falling back to command-line instructions for non-interactive sessions.
// Returns nil if browser flow succeeds, or an error with connection instructions otherwise.
func unknownKey(unknownKeyErr error) error {
// find SSH public key in the error message
// TODO (brucemacd): the API should return structured errors so that this message parsing isn't needed
sshKeyPattern := `ssh-\w+ [^\s"]+`
re := regexp.MustCompile(sshKeyPattern)
matches := re.FindStringSubmatch(unknownKeyErr.Error())
if len(matches) > 0 {
serverPubKey := matches[0]
localPubKey, err := auth.GetPublicKey()
if err != nil {
return unknownKeyErr
}
if runtime.GOOS == "linux" && serverPubKey != localPubKey {
// try the ollama service public key
svcPubKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub")
if err != nil {
return unknownKeyErr
}
localPubKey = strings.TrimSpace(string(svcPubKey))
}
// check if the returned public key matches the local public key, this prevents adding a remote key to the user's account
if serverPubKey != localPubKey {
return unknownKeyErr
}
if term.IsTerminal(int(os.Stdout.Fd())) && !envconfig.Noninteractive() {
// URL encode the key and device name for the browser URL
encodedKey := base64.RawURLEncoding.EncodeToString([]byte(localPubKey))
d, _ := os.Hostname()
encodedDevice := url.QueryEscape(d)
browserURL := fmt.Sprintf("https://ollama.com/connect?host=%s&key=%s", encodedDevice, encodedKey)
if err := browser.OpenURL(browserURL); err == nil {
fmt.Println("Opening browser to connect your device...")
return nil
}
}
var msg strings.Builder
msg.WriteString(unknownKeyErr.Error())
msg.WriteString("\n\nYour ollama key is:\n")
msg.WriteString(localPubKey)
msg.WriteString("\nAdd your key at:\n")
msg.WriteString("https://ollama.com/settings/keys")
return errors.New(msg.String())
}
return unknownKeyErr
}
func PushHandler(cmd *cobra.Command, args []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
@@ -628,19 +564,10 @@ func PushHandler(cmd *cobra.Command, args []string) error {
request := api.PushRequest{Name: args[0], Insecure: insecure}
n := model.ParseName(args[0])
isOllamaHost := strings.HasSuffix(n.Host, ".ollama.ai") || strings.HasSuffix(n.Host, ".ollama.com")
if err := client.Push(cmd.Context(), &request, fn); err != nil {
if spinner != nil {
spinner.Stop()
}
if p != nil {
p.Stop()
}
if strings.Contains(err.Error(), errtypes.UnknownOllamaKeyErrMsg) && isOllamaHost {
// the user has not added their ollama key to ollama.com
// return an error with a more user-friendly message
return unknownKey(err)
}
if strings.Contains(err.Error(), "access denied") {
return errors.New("you are not authorized to push to this namespace, create the model under a namespace you own")
}
@@ -651,7 +578,7 @@ func PushHandler(cmd *cobra.Command, args []string) error {
spinner.Stop()
destination := n.String()
if isOllamaHost {
if strings.HasSuffix(n.Host, ".ollama.ai") || strings.HasSuffix(n.Host, ".ollama.com") {
destination = "https://ollama.com/" + strings.TrimSuffix(n.DisplayShortest(), ":latest")
}
fmt.Printf("\nYou can find your model at:\n\n")
@@ -1547,8 +1474,6 @@ func NewCLI() *cobra.Command {
envVars["OLLAMA_GPU_OVERHEAD"],
envVars["OLLAMA_LOAD_TIMEOUT"],
})
case pushCmd:
appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_NONINTERACTIVE"]})
default:
appendEnvDocs(cmd, envs)
}

View File

@@ -15,7 +15,6 @@ import (
"github.com/spf13/cobra"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/types/errtypes"
)
func TestShowInfo(t *testing.T) {
@@ -369,13 +368,15 @@ func TestGetModelfileName(t *testing.T) {
func TestPushHandler(t *testing.T) {
tests := []struct {
name string
modelName string
serverResponse map[string]func(w http.ResponseWriter, r *http.Request)
expectedError string
expectedOutput string
}{
{
modelName: "successful-push",
name: "successful push",
modelName: "test-model",
serverResponse: map[string]func(w http.ResponseWriter, r *http.Request){
"/api/push": func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
@@ -388,8 +389,8 @@ func TestPushHandler(t *testing.T) {
return
}
if req.Name != "successful-push" {
t.Errorf("expected model name 'successful-push', got %s", req.Name)
if req.Name != "test-model" {
t.Errorf("expected model name 'test-model', got %s", req.Name)
}
// Simulate progress updates
@@ -408,10 +409,11 @@ func TestPushHandler(t *testing.T) {
}
},
},
expectedOutput: "\nYou can find your model at:\n\n\thttps://ollama.com/successful-push\n",
expectedOutput: "\nYou can find your model at:\n\n\thttps://ollama.com/test-model\n",
},
{
modelName: "unauthorized-push",
name: "unauthorized push",
modelName: "unauthorized-model",
serverResponse: map[string]func(w http.ResponseWriter, r *http.Request){
"/api/push": func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
@@ -426,29 +428,10 @@ func TestPushHandler(t *testing.T) {
},
expectedError: "you are not authorized to push to this namespace, create the model under a namespace you own",
},
{
modelName: "unknown-key-err",
serverResponse: map[string]func(w http.ResponseWriter, r *http.Request){
"/api/push": func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusUnauthorized)
uerr := errtypes.UnknownOllamaKey{
Key: "aaa",
}
err := json.NewEncoder(w).Encode(map[string]string{
"error": uerr.Error(),
})
if err != nil {
t.Fatal(err)
}
},
},
expectedError: "unauthorized: unknown ollama key \"aaa\"",
},
}
for _, tt := range tests {
t.Run(tt.modelName, func(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if handler, ok := tt.serverResponse[r.URL.Path]; ok {
handler(w, r)

View File

@@ -165,9 +165,6 @@ var (
IntelGPU = Bool("OLLAMA_INTEL_GPU")
// MultiUserCache optimizes prompt caching for multi-user scenarios
MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
// Noninteractive is true when CLI interactive features should be disabled.
// This affects features like automatic browser opening.
Noninteractive = Bool("OLLAMA_NONINTERACTIVE")
)
func String(s string) func() string {
@@ -253,7 +250,6 @@ func AsMap() map[string]EnvVar {
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
"OLLAMA_NONINTERACTIVE": {"OLLAMA_NONINTERACTIVE", Noninteractive(), "Disable interactive CLI features, such as automatically opening the browser"},
// Informational
"HTTP_PROXY": {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},

1
go.mod
View File

@@ -36,7 +36,6 @@ require (
github.com/gogo/protobuf v1.3.2 // indirect
github.com/google/flatbuffers v24.3.25+incompatible // indirect
github.com/kr/text v0.2.0 // indirect
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/rivo/uniseg v0.2.0 // indirect

3
go.sum
View File

@@ -159,8 +159,6 @@ github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2
github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI=
github.com/pierrec/lz4/v4 v4.1.8 h1:ieHkV+i2BRzngO4Wd/3HGowuZStgq6QkPsD1eolNAO4=
github.com/pierrec/lz4/v4 v4.1.8/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -283,7 +281,6 @@ golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=

View File

@@ -15,28 +15,36 @@ export CGO_CXXFLAGS=-mmacosx-version-min=11.3
export CGO_LDFLAGS=-mmacosx-version-min=11.3
rm -rf llama/build dist/darwin-*
# Generate the universal ollama binary for stand-alone usage: metal + avx
echo "Building binary"
echo "Building darwin arm64"
GOOS=darwin ARCH=arm64 GOARCH=arm64 make -j 8 dist
echo "Building darwin amd64 with AVX enabled"
GOOS=darwin ARCH=amd64 GOARCH=amd64 CUSTOM_CPU_FLAGS="avx" make -j 8 dist_exe
# Generate the universal ollama binary for stand-alone usage: metal + avx
lipo -create -output dist/ollama-darwin dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
# sign the binary and rename it
if [ -n "$APPLE_IDENTITY" ]; then
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama-darwin
else
echo "WARNING: Skipping code signing - set APPLE_IDENTITY"
fi
ditto -c -k --keepParent dist/ollama-darwin dist/temp.zip
if [ -n "$APPLE_IDENTITY" ]; then
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
fi
rm -f dist/temp.zip
# Build the app bundle
echo "Building app"
echo "Building darwin amd64 with runners"
rm dist/darwin-amd64/bin/ollama
GOOS=darwin ARCH=amd64 GOARCH=amd64 make -j 8 dist
# Generate the universal ollama binary for the app bundle: metal + no-avx
lipo -create -output dist/ollama dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
if [ -n "$APPLE_IDENTITY" ]; then
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
else
echo "Skipping code signing - set APPLE_IDENTITY"
fi
chmod +x dist/ollama
# build and optionally sign the mac app
npm install --prefix macapp
if [ -n "$APPLE_IDENTITY" ]; then
@@ -46,14 +54,3 @@ else
fi
cp macapp/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip
# sign the binary and rename it
if [ -n "$APPLE_IDENTITY" ]; then
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama
else
echo "WARNING: Skipping code signing - set APPLE_IDENTITY"
fi
ditto -c -k --keepParent dist/ollama dist/temp.zip
if [ -n "$APPLE_IDENTITY" ]; then
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
fi
rm -f dist/temp.zip

View File

@@ -23,16 +23,13 @@ import (
"strings"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/auth"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/llama"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/types/registry"
"github.com/ollama/ollama/version"
)
@@ -987,6 +984,8 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
return fmt.Sprintf("sha256:%x", h.Sum(nil)), n
}
var errUnauthorized = errors.New("unauthorized: access denied")
func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *registryOptions) (*http.Response, error) {
for range 2 {
resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
@@ -1024,33 +1023,13 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
if err != nil {
return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
}
var re registry.Errs
if err := json.Unmarshal(responseBody, &re); err == nil && len(re.Errors) > 0 {
if re.HasCode(registry.ErrCodeAnonymous) {
// if the error is due to anonymous access return a custom error
// this error is used by the CLI to direct a user to add their key to an account
pubKey, nestedErr := auth.GetPublicKey()
if nestedErr != nil {
slog.Error(fmt.Sprintf("couldn't get public key: %v", nestedErr))
return nil, re
}
return nil, errtypes.UnknownOllamaKey{
Key: pubKey,
}
}
return nil, re
}
// Fallback to returning the raw response if parsing fails
return nil, fmt.Errorf("%d: %s", resp.StatusCode, responseBody)
default:
return resp, nil
}
}
// should never be reached
return nil, fmt.Errorf("failed to make upload request")
return nil, errUnauthorized
}
// testMakeRequestDialContext specifies the dial function for the http client in

View File

@@ -82,6 +82,10 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
}
currMsgIdx := n
// Warn user if messages are truncated from the input
if numTruncatedMessages := len(msgs[0:currMsgIdx]); numTruncatedMessages > 0 {
slog.Warn("truncated first messages from input", "num_truncated", numTruncatedMessages)
}
for cnt, msg := range msgs[currMsgIdx:] {
prefix := ""

View File

@@ -1539,6 +1539,34 @@ func (s *Server) ChatHandler(c *gin.Context) {
return
}
if req.Dry {
var debug map[string]any
if req.Debug != nil && req.Debug.Include != nil && slices.Contains(req.Debug.Include, "prompt") {
debug = map[string]any{"prompt": prompt}
}
tokens, err := r.Tokenize(c.Request.Context(), prompt)
if err != nil {
slog.Error("tokenize error", "error", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, api.ChatResponse{
Model: req.Model,
CreatedAt: time.Now().UTC(),
Message: api.Message{Role: "assistant", Content: ""},
Done: true,
DoneReason: "dry_run",
Debug: debug,
Metrics: api.Metrics{
PromptEvalCount: len(tokens),
PromptEvalDuration: 0,
EvalCount: 0,
EvalDuration: 0,
},
})
return
}
slog.Debug("chat request", "images", len(images), "prompt", prompt)
ch := make(chan any)
@@ -1571,6 +1599,16 @@ func (s *Server) ChatHandler(c *gin.Context) {
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
}
if req.Debug != nil && req.Debug.Include != nil && slices.Contains(req.Debug.Include, "prompt") {
res.Debug = map[string]any{"prompt": prompt}
if req.Stream != nil && !*req.Stream {
tempMsg := res.Message
res.Message = api.Message{Role: "assistant", Content: ""}
ch <- res
res.Message = tempMsg
}
}
// TODO: tool call checking and filtering should be moved outside of this callback once streaming
// however this was a simple change for now without reworking streaming logic of this (and other)
// handlers

View File

@@ -16,6 +16,6 @@ type UnknownOllamaKey struct {
Key string
}
func (e UnknownOllamaKey) Error() string {
func (e *UnknownOllamaKey) Error() string {
return fmt.Sprintf("unauthorized: %s %q", UnknownOllamaKeyErrMsg, strings.TrimSpace(e.Key))
}

View File

@@ -1,37 +0,0 @@
package registry
import (
"fmt"
"slices"
"strings"
)
const ErrCodeAnonymous = "ANONYMOUS_ACCESS_DENIED"
type Err struct {
Code string `json:"code"`
Message string `json:"message"`
}
// Errs represents the structure of error responses from the registry
// TODO (brucemacd): this struct should be imported from some shared package that is used between the registry and ollama
type Errs struct {
Errors []Err `json:"errors"`
}
func (e Errs) Error() string {
if len(e.Errors) == 0 {
return "unknown registry error"
}
var msgs []string
for _, err := range e.Errors {
msgs = append(msgs, fmt.Sprintf("%s: %s", err.Code, err.Message))
}
return strings.Join(msgs, "; ")
}
func (e Errs) HasCode(code string) bool {
return slices.ContainsFunc(e.Errors, func(err Err) bool {
return err.Code == code
})
}