Compare commits
1 Commits
jmorganca/
...
parth/fix-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fc3c398ca2 |
@@ -8,6 +8,8 @@ linters:
|
|||||||
- containedctx
|
- containedctx
|
||||||
- contextcheck
|
- contextcheck
|
||||||
- errcheck
|
- errcheck
|
||||||
|
- exportloopref
|
||||||
|
- gci
|
||||||
- gocheckcompilerdirectives
|
- gocheckcompilerdirectives
|
||||||
- gofmt
|
- gofmt
|
||||||
- gofumpt
|
- gofumpt
|
||||||
@@ -28,6 +30,8 @@ linters:
|
|||||||
- wastedassign
|
- wastedassign
|
||||||
- whitespace
|
- whitespace
|
||||||
linters-settings:
|
linters-settings:
|
||||||
|
gci:
|
||||||
|
sections: [standard, default, localmodule]
|
||||||
staticcheck:
|
staticcheck:
|
||||||
checks:
|
checks:
|
||||||
- all
|
- all
|
||||||
|
|||||||
2
Makefile
2
Makefile
@@ -8,9 +8,11 @@ include make/cuda-v12-defs.make
|
|||||||
include make/rocm-defs.make
|
include make/rocm-defs.make
|
||||||
|
|
||||||
ifeq ($(CUSTOM_CPU_FLAGS),)
|
ifeq ($(CUSTOM_CPU_FLAGS),)
|
||||||
|
ifneq ($(OS),darwin)
|
||||||
ifeq ($(ARCH),amd64)
|
ifeq ($(ARCH),amd64)
|
||||||
RUNNER_TARGETS=cpu
|
RUNNER_TARGETS=cpu
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present
|
# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present
|
||||||
ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),)
|
ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),)
|
||||||
ifneq ($(CUDA_11_COMPILER),)
|
ifneq ($(CUDA_11_COMPILER),)
|
||||||
|
|||||||
@@ -407,8 +407,8 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
|
|
||||||
### Database
|
### Database
|
||||||
|
|
||||||
- [pgai](https://github.com/timescale/pgai) - PostgreSQL as a vector database (Create and search embeddings from Ollama models using pgvector)
|
- [PostgreSQL extension pgai](https://github.com/timescale/pgai) (Create and search embeddings from Ollama models using pgvector)
|
||||||
- [Get started guide](https://github.com/timescale/pgai/blob/main/docs/vectorizer-quick-start.md)
|
- [Get started guide](https://github.com/timescale/pgai/blob/main/docs/ollama.md)
|
||||||
- [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md) (Connects Ollama models with nearly 200 data platforms and apps)
|
- [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md) (Connects Ollama models with nearly 200 data platforms and apps)
|
||||||
- [chromem-go](https://github.com/philippgille/chromem-go/blob/v0.5.0/embed_ollama.go) with [example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama)
|
- [chromem-go](https://github.com/philippgille/chromem-go/blob/v0.5.0/embed_ollama.go) with [example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama)
|
||||||
- [Kangaroo](https://github.com/dbkangaroo/kangaroo) (AI-powered SQL client and admin tool for popular databases)
|
- [Kangaroo](https://github.com/dbkangaroo/kangaroo) (AI-powered SQL client and admin tool for popular databases)
|
||||||
|
|||||||
@@ -674,6 +674,21 @@ type CompletionResponse struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
|
func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
|
||||||
|
if err := s.sem.Acquire(ctx, 1); err != nil {
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
|
slog.Info("aborting completion request due to client closing the connection")
|
||||||
|
} else {
|
||||||
|
slog.Error("Failed to acquire semaphore", "error", err)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer s.sem.Release(1)
|
||||||
|
|
||||||
|
// put an upper limit on num_predict to avoid the model running on forever
|
||||||
|
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
|
||||||
|
req.Options.NumPredict = 10 * s.options.NumCtx
|
||||||
|
}
|
||||||
|
|
||||||
request := map[string]any{
|
request := map[string]any{
|
||||||
"prompt": req.Prompt,
|
"prompt": req.Prompt,
|
||||||
"stream": true,
|
"stream": true,
|
||||||
@@ -699,10 +714,16 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
|||||||
"cache_prompt": true,
|
"cache_prompt": true,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Make sure the server is ready
|
||||||
|
status, err := s.getServerStatusRetry(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
} else if status != ServerStatusReady {
|
||||||
|
return fmt.Errorf("unexpected server status: %s", status.ToString())
|
||||||
|
}
|
||||||
|
|
||||||
if len(req.Format) > 0 {
|
if len(req.Format) > 0 {
|
||||||
switch {
|
switch {
|
||||||
case bytes.Equal(req.Format, []byte(`""`)) || bytes.Equal(req.Format, []byte(`null`)):
|
|
||||||
// fallthrough
|
|
||||||
case bytes.Equal(req.Format, []byte(`"json"`)):
|
case bytes.Equal(req.Format, []byte(`"json"`)):
|
||||||
request["grammar"] = grammarJSON
|
request["grammar"] = grammarJSON
|
||||||
case bytes.HasPrefix(req.Format, []byte("{")):
|
case bytes.HasPrefix(req.Format, []byte("{")):
|
||||||
@@ -713,33 +734,10 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
|||||||
}
|
}
|
||||||
request["grammar"] = string(g)
|
request["grammar"] = string(g)
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("invalid format: %q; expected \"json\" or a valid JSON Schema", req.Format)
|
slog.Warn("invalid format: expected \"json\" or a JSON schema")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := s.sem.Acquire(ctx, 1); err != nil {
|
|
||||||
if errors.Is(err, context.Canceled) {
|
|
||||||
slog.Info("aborting completion request due to client closing the connection")
|
|
||||||
} else {
|
|
||||||
slog.Error("Failed to acquire semaphore", "error", err)
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer s.sem.Release(1)
|
|
||||||
|
|
||||||
// put an upper limit on num_predict to avoid the model running on forever
|
|
||||||
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
|
|
||||||
req.Options.NumPredict = 10 * s.options.NumCtx
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure the server is ready
|
|
||||||
status, err := s.getServerStatusRetry(ctx)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
} else if status != ServerStatusReady {
|
|
||||||
return fmt.Errorf("unexpected server status: %s", status.ToString())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handling JSON marshaling with special characters unescaped.
|
// Handling JSON marshaling with special characters unescaped.
|
||||||
buffer := &bytes.Buffer{}
|
buffer := &bytes.Buffer{}
|
||||||
enc := json.NewEncoder(buffer)
|
enc := json.NewEncoder(buffer)
|
||||||
|
|||||||
@@ -1,63 +0,0 @@
|
|||||||
package llm
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
|
||||||
"golang.org/x/sync/semaphore"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestLLMServerCompletionFormat(t *testing.T) {
|
|
||||||
// This test was written to fix an already deployed issue. It is a bit
|
|
||||||
// of a mess, and but it's good enough, until we can refactoring the
|
|
||||||
// Completion method to be more testable.
|
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
|
||||||
s := &llmServer{
|
|
||||||
sem: semaphore.NewWeighted(1), // required to prevent nil panic
|
|
||||||
}
|
|
||||||
|
|
||||||
checkInvalid := func(format string) {
|
|
||||||
t.Helper()
|
|
||||||
err := s.Completion(ctx, CompletionRequest{
|
|
||||||
Options: new(api.Options),
|
|
||||||
Format: []byte(format),
|
|
||||||
}, nil)
|
|
||||||
|
|
||||||
want := fmt.Sprintf("invalid format: %q; expected \"json\" or a valid JSON Schema", format)
|
|
||||||
if err == nil || !strings.Contains(err.Error(), want) {
|
|
||||||
t.Fatalf("err = %v; want %q", err, want)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
checkInvalid("X") // invalid format
|
|
||||||
checkInvalid(`"X"`) // invalid JSON Schema
|
|
||||||
|
|
||||||
cancel() // prevent further processing if request makes it past the format check
|
|
||||||
|
|
||||||
checkCanceled := func(err error) {
|
|
||||||
t.Helper()
|
|
||||||
if !errors.Is(err, context.Canceled) {
|
|
||||||
t.Fatalf("Completion: err = %v; expected context.Canceled", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
valids := []string{`"json"`, `{"type":"object"}`, ``, `""`, `null`}
|
|
||||||
for _, valid := range valids {
|
|
||||||
err := s.Completion(ctx, CompletionRequest{
|
|
||||||
Options: new(api.Options),
|
|
||||||
Format: []byte(valid),
|
|
||||||
}, nil)
|
|
||||||
checkCanceled(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
err := s.Completion(ctx, CompletionRequest{
|
|
||||||
Options: new(api.Options),
|
|
||||||
Format: nil, // missing format
|
|
||||||
}, nil)
|
|
||||||
checkCanceled(err)
|
|
||||||
}
|
|
||||||
@@ -19,7 +19,6 @@ const config: ForgeConfig = {
|
|||||||
icon: './assets/icon.icns',
|
icon: './assets/icon.icns',
|
||||||
extraResource: [
|
extraResource: [
|
||||||
'../dist/ollama',
|
'../dist/ollama',
|
||||||
'../dist/darwin-amd64/lib',
|
|
||||||
path.join(__dirname, './assets/iconTemplate.png'),
|
path.join(__dirname, './assets/iconTemplate.png'),
|
||||||
path.join(__dirname, './assets/iconTemplate@2x.png'),
|
path.join(__dirname, './assets/iconTemplate@2x.png'),
|
||||||
path.join(__dirname, './assets/iconUpdateTemplate.png'),
|
path.join(__dirname, './assets/iconUpdateTemplate.png'),
|
||||||
@@ -43,7 +42,7 @@ const config: ForgeConfig = {
|
|||||||
}
|
}
|
||||||
: {}),
|
: {}),
|
||||||
osxUniversal: {
|
osxUniversal: {
|
||||||
x64ArchFiles: '**/ollama*',
|
x64ArchFiles: '**/ollama',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
rebuildConfig: {},
|
rebuildConfig: {},
|
||||||
|
|||||||
@@ -72,7 +72,6 @@ func locateRunnersOnce() {
|
|||||||
paths := []string{
|
paths := []string{
|
||||||
filepath.Join(filepath.Dir(exe), "llama", "build", runtime.GOOS+"-"+runtime.GOARCH, "runners"),
|
filepath.Join(filepath.Dir(exe), "llama", "build", runtime.GOOS+"-"+runtime.GOARCH, "runners"),
|
||||||
filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama", "runners"),
|
filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama", "runners"),
|
||||||
filepath.Join(filepath.Dir(exe), "lib", "ollama", "runners"),
|
|
||||||
}
|
}
|
||||||
for _, path := range paths {
|
for _, path := range paths {
|
||||||
if _, err := os.Stat(path); err == nil {
|
if _, err := os.Stat(path); err == nil {
|
||||||
|
|||||||
@@ -18,18 +18,10 @@ rm -rf llama/build dist/darwin-*
|
|||||||
echo "Building darwin arm64"
|
echo "Building darwin arm64"
|
||||||
GOOS=darwin ARCH=arm64 GOARCH=arm64 make -j 8 dist
|
GOOS=darwin ARCH=arm64 GOARCH=arm64 make -j 8 dist
|
||||||
echo "Building darwin amd64 with AVX enabled"
|
echo "Building darwin amd64 with AVX enabled"
|
||||||
GOOS=darwin ARCH=amd64 GOARCH=amd64 CUSTOM_CPU_FLAGS="avx" make -j 8 dist_exe
|
GOOS=darwin ARCH=amd64 GOARCH=amd64 CUSTOM_CPU_FLAGS="avx" make -j 8 dist
|
||||||
|
|
||||||
# Generate the universal ollama binary for stand-alone usage: metal + avx
|
|
||||||
lipo -create -output dist/ollama-darwin dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
|
|
||||||
|
|
||||||
echo "Building darwin amd64 with runners"
|
|
||||||
rm dist/darwin-amd64/bin/ollama
|
|
||||||
GOOS=darwin ARCH=amd64 GOARCH=amd64 make -j 8 dist
|
|
||||||
# Generate the universal ollama binary for the app bundle: metal + no-avx
|
|
||||||
lipo -create -output dist/ollama dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
|
lipo -create -output dist/ollama dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
|
||||||
|
|
||||||
|
|
||||||
if [ -n "$APPLE_IDENTITY" ]; then
|
if [ -n "$APPLE_IDENTITY" ]; then
|
||||||
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
|
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
|
||||||
else
|
else
|
||||||
@@ -56,4 +48,5 @@ ditto -c -k --keepParent dist/ollama dist/temp.zip
|
|||||||
if [ -n "$APPLE_IDENTITY" ]; then
|
if [ -n "$APPLE_IDENTITY" ]; then
|
||||||
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
|
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
|
||||||
fi
|
fi
|
||||||
|
mv dist/ollama dist/ollama-darwin
|
||||||
rm -f dist/temp.zip
|
rm -f dist/temp.zip
|
||||||
|
|||||||
Reference in New Issue
Block a user