Compare commits

..

5 Commits

Author SHA1 Message Date
jmorganca
04314765f2 llm: consider null format same as empty value 2024-12-17 09:16:01 -08:00
Jascha Beste
2cde4b8817 readme: change getting started guide link for pgai (#8119) 2024-12-16 22:13:23 -08:00
Blake Mizerany
87f0a49fe6 llm: do not silently fail for supplied, but invalid formats (#8130)
Changes in #8002 introduced fixes for bugs with mangling JSON Schemas.
It also fixed a bug where the server would silently fail when clients
requested invalid formats. It also, unfortunately, introduced a bug
where the server would reject requests with an empty format, which
should be allowed.

The change in #8127 updated the code to allow the empty format, but also
reintroduced the regression where the server would silently fail when
the format was set, but invalid.

This commit fixes both regressions. The server does not reject the empty
format, but it does reject invalid formats. It also adds tests to help
us catch regressions in the future.

Also, the updated code provides a more detailed error message when a
client sends a non-empty, but invalid format, echoing the invalid format
in the response.

This commits also takes the opportunity to remove superfluous linter
checks.
2024-12-16 21:57:49 -08:00
Jeffrey Morgan
0f06a6daa7 llm: loosen format check to default to no format (#8127) 2024-12-16 18:45:46 -08:00
Daniel Hiltgen
8f805dd74b darwin: restore multiple runners for x86 (#8125)
In 0.5.2 we simplified packaging to have avx only for macos x86.  It looks like
there may still be some non-AVX systems out there, so this puts back the prior
logic of building no-AVX for the primary binary, and now 2 runners for avx and avx2.
These will be packaged in the App bundle only, so the stand-alone binary will now be
without AVX support on macos.  On arm, we'll also see these runners reported
as available in the log, but they're dormant and will never be used at runtime.
2024-12-16 18:45:02 -08:00
8 changed files with 103 additions and 35 deletions

View File

@@ -8,8 +8,6 @@ linters:
- containedctx
- contextcheck
- errcheck
- exportloopref
- gci
- gocheckcompilerdirectives
- gofmt
- gofumpt
@@ -30,8 +28,6 @@ linters:
- wastedassign
- whitespace
linters-settings:
gci:
sections: [standard, default, localmodule]
staticcheck:
checks:
- all

View File

@@ -8,11 +8,9 @@ include make/cuda-v12-defs.make
include make/rocm-defs.make
ifeq ($(CUSTOM_CPU_FLAGS),)
ifneq ($(OS),darwin)
ifeq ($(ARCH),amd64)
RUNNER_TARGETS=cpu
endif
endif
# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present
ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),)
ifneq ($(CUDA_11_COMPILER),)

View File

@@ -407,8 +407,8 @@ See the [API documentation](./docs/api.md) for all endpoints.
### Database
- [PostgreSQL extension pgai](https://github.com/timescale/pgai) (Create and search embeddings from Ollama models using pgvector)
- [Get started guide](https://github.com/timescale/pgai/blob/main/docs/ollama.md)
- [pgai](https://github.com/timescale/pgai) - PostgreSQL as a vector database (Create and search embeddings from Ollama models using pgvector)
- [Get started guide](https://github.com/timescale/pgai/blob/main/docs/vectorizer-quick-start.md)
- [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md) (Connects Ollama models with nearly 200 data platforms and apps)
- [chromem-go](https://github.com/philippgille/chromem-go/blob/v0.5.0/embed_ollama.go) with [example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama)
- [Kangaroo](https://github.com/dbkangaroo/kangaroo) (AI-powered SQL client and admin tool for popular databases)

View File

@@ -674,21 +674,6 @@ type CompletionResponse struct {
}
func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
if err := s.sem.Acquire(ctx, 1); err != nil {
if errors.Is(err, context.Canceled) {
slog.Info("aborting completion request due to client closing the connection")
} else {
slog.Error("Failed to acquire semaphore", "error", err)
}
return err
}
defer s.sem.Release(1)
// put an upper limit on num_predict to avoid the model running on forever
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
req.Options.NumPredict = 10 * s.options.NumCtx
}
request := map[string]any{
"prompt": req.Prompt,
"stream": true,
@@ -714,16 +699,10 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
"cache_prompt": true,
}
// Make sure the server is ready
status, err := s.getServerStatusRetry(ctx)
if err != nil {
return err
} else if status != ServerStatusReady {
return fmt.Errorf("unexpected server status: %s", status.ToString())
}
if len(req.Format) > 0 {
switch {
case bytes.Equal(req.Format, []byte(`""`)) || bytes.Equal(req.Format, []byte(`null`)):
// fallthrough
case bytes.Equal(req.Format, []byte(`"json"`)):
request["grammar"] = grammarJSON
case bytes.HasPrefix(req.Format, []byte("{")):
@@ -734,10 +713,33 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
}
request["grammar"] = string(g)
default:
slog.Warn("invalid format: expected \"json\" or a JSON schema")
return fmt.Errorf("invalid format: %q; expected \"json\" or a valid JSON Schema", req.Format)
}
}
if err := s.sem.Acquire(ctx, 1); err != nil {
if errors.Is(err, context.Canceled) {
slog.Info("aborting completion request due to client closing the connection")
} else {
slog.Error("Failed to acquire semaphore", "error", err)
}
return err
}
defer s.sem.Release(1)
// put an upper limit on num_predict to avoid the model running on forever
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
req.Options.NumPredict = 10 * s.options.NumCtx
}
// Make sure the server is ready
status, err := s.getServerStatusRetry(ctx)
if err != nil {
return err
} else if status != ServerStatusReady {
return fmt.Errorf("unexpected server status: %s", status.ToString())
}
// Handling JSON marshaling with special characters unescaped.
buffer := &bytes.Buffer{}
enc := json.NewEncoder(buffer)

63
llm/server_test.go Normal file
View File

@@ -0,0 +1,63 @@
package llm
import (
"context"
"errors"
"fmt"
"strings"
"testing"
"github.com/ollama/ollama/api"
"golang.org/x/sync/semaphore"
)
func TestLLMServerCompletionFormat(t *testing.T) {
// This test was written to fix an already deployed issue. It is a bit
// of a mess, and but it's good enough, until we can refactoring the
// Completion method to be more testable.
ctx, cancel := context.WithCancel(context.Background())
s := &llmServer{
sem: semaphore.NewWeighted(1), // required to prevent nil panic
}
checkInvalid := func(format string) {
t.Helper()
err := s.Completion(ctx, CompletionRequest{
Options: new(api.Options),
Format: []byte(format),
}, nil)
want := fmt.Sprintf("invalid format: %q; expected \"json\" or a valid JSON Schema", format)
if err == nil || !strings.Contains(err.Error(), want) {
t.Fatalf("err = %v; want %q", err, want)
}
}
checkInvalid("X") // invalid format
checkInvalid(`"X"`) // invalid JSON Schema
cancel() // prevent further processing if request makes it past the format check
checkCanceled := func(err error) {
t.Helper()
if !errors.Is(err, context.Canceled) {
t.Fatalf("Completion: err = %v; expected context.Canceled", err)
}
}
valids := []string{`"json"`, `{"type":"object"}`, ``, `""`, `null`}
for _, valid := range valids {
err := s.Completion(ctx, CompletionRequest{
Options: new(api.Options),
Format: []byte(valid),
}, nil)
checkCanceled(err)
}
err := s.Completion(ctx, CompletionRequest{
Options: new(api.Options),
Format: nil, // missing format
}, nil)
checkCanceled(err)
}

View File

@@ -19,6 +19,7 @@ const config: ForgeConfig = {
icon: './assets/icon.icns',
extraResource: [
'../dist/ollama',
'../dist/darwin-amd64/lib',
path.join(__dirname, './assets/iconTemplate.png'),
path.join(__dirname, './assets/iconTemplate@2x.png'),
path.join(__dirname, './assets/iconUpdateTemplate.png'),
@@ -42,7 +43,7 @@ const config: ForgeConfig = {
}
: {}),
osxUniversal: {
x64ArchFiles: '**/ollama',
x64ArchFiles: '**/ollama*',
},
},
rebuildConfig: {},

View File

@@ -72,6 +72,7 @@ func locateRunnersOnce() {
paths := []string{
filepath.Join(filepath.Dir(exe), "llama", "build", runtime.GOOS+"-"+runtime.GOARCH, "runners"),
filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama", "runners"),
filepath.Join(filepath.Dir(exe), "lib", "ollama", "runners"),
}
for _, path := range paths {
if _, err := os.Stat(path); err == nil {

View File

@@ -18,10 +18,18 @@ rm -rf llama/build dist/darwin-*
echo "Building darwin arm64"
GOOS=darwin ARCH=arm64 GOARCH=arm64 make -j 8 dist
echo "Building darwin amd64 with AVX enabled"
GOOS=darwin ARCH=amd64 GOARCH=amd64 CUSTOM_CPU_FLAGS="avx" make -j 8 dist
GOOS=darwin ARCH=amd64 GOARCH=amd64 CUSTOM_CPU_FLAGS="avx" make -j 8 dist_exe
# Generate the universal ollama binary for stand-alone usage: metal + avx
lipo -create -output dist/ollama-darwin dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
echo "Building darwin amd64 with runners"
rm dist/darwin-amd64/bin/ollama
GOOS=darwin ARCH=amd64 GOARCH=amd64 make -j 8 dist
# Generate the universal ollama binary for the app bundle: metal + no-avx
lipo -create -output dist/ollama dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
if [ -n "$APPLE_IDENTITY" ]; then
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
else
@@ -48,5 +56,4 @@ ditto -c -k --keepParent dist/ollama dist/temp.zip
if [ -n "$APPLE_IDENTITY" ]; then
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
fi
mv dist/ollama dist/ollama-darwin
rm -f dist/temp.zip