Compare commits
11 Commits
progress-f
...
v0.5.12-rc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ba9ec3d05e | ||
|
|
7c168b08c9 | ||
|
|
3d4cc7833c | ||
|
|
351a85d9ea | ||
|
|
bda4ef6c56 | ||
|
|
1e438b237c | ||
|
|
d721a02e7d | ||
|
|
778603a818 | ||
|
|
3c874df46e | ||
|
|
d2eb226c91 | ||
|
|
5f8c03189e |
4
.github/workflows/release.yaml
vendored
4
.github/workflows/release.yaml
vendored
@@ -160,6 +160,10 @@ jobs:
|
|||||||
echo "$hipPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
echo "$hipPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||||
echo "CC=$hipPath\bin\clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
echo "CC=$hipPath\bin\clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
echo "CXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
echo "CXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
|
- if: matrix.preset == 'CPU'
|
||||||
|
run: |
|
||||||
|
echo "CC=clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
|
echo "CXX=clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
- if: ${{ !cancelled() && steps.cache-install.outputs.cache-hit != 'true' }}
|
- if: ${{ !cancelled() && steps.cache-install.outputs.cache-hit != 'true' }}
|
||||||
uses: actions/cache/save@v4
|
uses: actions/cache/save@v4
|
||||||
with:
|
with:
|
||||||
|
|||||||
@@ -382,6 +382,9 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
|
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
|
||||||
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
|
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
|
||||||
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)
|
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)
|
||||||
|
- [AntSK](https://github.com/AIDotNet/AntSK) (Out-of-the-box & Adaptable RAG Chatbot)
|
||||||
|
- [MaxKB](https://github.com/1Panel-dev/MaxKB/) (Ready-to-use & flexible RAG Chatbot)
|
||||||
|
- [yla](https://github.com/danielekp/yla) (Web interface to freely interact with your customized models)
|
||||||
|
|
||||||
### Cloud
|
### Cloud
|
||||||
|
|
||||||
|
|||||||
@@ -126,8 +126,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
return ctx.Err()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const maxBufferSize = 512 * format.KiloByte
|
const maxBufferSize = 512 * format.KiloByte
|
||||||
@@ -190,7 +189,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ctx.Err()
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// GenerateResponseFunc is a function that [Client.Generate] invokes every time
|
// GenerateResponseFunc is a function that [Client.Generate] invokes every time
|
||||||
|
|||||||
38
cmd/cmd.go
38
cmd/cmd.go
@@ -15,11 +15,13 @@ import (
|
|||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
"os/signal"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/containerd/console"
|
"github.com/containerd/console"
|
||||||
@@ -328,7 +330,6 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
|||||||
if err := PullHandler(cmd, []string{name}); err != nil {
|
if err := PullHandler(cmd, []string{name}); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
||||||
}
|
}
|
||||||
return info, err
|
return info, err
|
||||||
@@ -857,6 +858,17 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
|
|||||||
spinner := progress.NewSpinner("")
|
spinner := progress.NewSpinner("")
|
||||||
p.Add("", spinner)
|
p.Add("", spinner)
|
||||||
|
|
||||||
|
cancelCtx, cancel := context.WithCancel(cmd.Context())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
sigChan := make(chan os.Signal, 1)
|
||||||
|
signal.Notify(sigChan, syscall.SIGINT)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
<-sigChan
|
||||||
|
cancel()
|
||||||
|
}()
|
||||||
|
|
||||||
var state *displayResponseState = &displayResponseState{}
|
var state *displayResponseState = &displayResponseState{}
|
||||||
var latest api.ChatResponse
|
var latest api.ChatResponse
|
||||||
var fullResponse strings.Builder
|
var fullResponse strings.Builder
|
||||||
@@ -891,7 +903,10 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
|
|||||||
req.KeepAlive = opts.KeepAlive
|
req.KeepAlive = opts.KeepAlive
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := client.Chat(cmd.Context(), req, fn); err != nil {
|
if err := client.Chat(cancelCtx, req, fn); err != nil {
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -931,6 +946,17 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
|||||||
generateContext = []int{}
|
generateContext = []int{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(cmd.Context())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
sigChan := make(chan os.Signal, 1)
|
||||||
|
signal.Notify(sigChan, syscall.SIGINT)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
<-sigChan
|
||||||
|
cancel()
|
||||||
|
}()
|
||||||
|
|
||||||
var state *displayResponseState = &displayResponseState{}
|
var state *displayResponseState = &displayResponseState{}
|
||||||
|
|
||||||
fn := func(response api.GenerateResponse) error {
|
fn := func(response api.GenerateResponse) error {
|
||||||
@@ -966,7 +992,10 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
|||||||
KeepAlive: opts.KeepAlive,
|
KeepAlive: opts.KeepAlive,
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := client.Generate(cmd.Context(), &request, fn); err != nil {
|
if err := client.Generate(ctx, &request, fn); err != nil {
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -988,7 +1017,8 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
|||||||
latest.Summary()
|
latest.Summary()
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd.SetContext(context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context))
|
ctx = context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context)
|
||||||
|
cmd.SetContext(ctx)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
"github.com/google/go-cmp/cmp"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
@@ -490,6 +491,96 @@ func TestPushHandler(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestListHandler(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
args []string
|
||||||
|
serverResponse []api.ListModelResponse
|
||||||
|
expectedError string
|
||||||
|
expectedOutput string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "list all models",
|
||||||
|
args: []string{},
|
||||||
|
serverResponse: []api.ListModelResponse{
|
||||||
|
{Name: "model1", Digest: "sha256:abc123", Size: 1024, ModifiedAt: time.Now().Add(-24 * time.Hour)},
|
||||||
|
{Name: "model2", Digest: "sha256:def456", Size: 2048, ModifiedAt: time.Now().Add(-48 * time.Hour)},
|
||||||
|
},
|
||||||
|
expectedOutput: "NAME ID SIZE MODIFIED \n" +
|
||||||
|
"model1 sha256:abc12 1.0 KB 24 hours ago \n" +
|
||||||
|
"model2 sha256:def45 2.0 KB 2 days ago \n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "filter models by prefix",
|
||||||
|
args: []string{"model1"},
|
||||||
|
serverResponse: []api.ListModelResponse{
|
||||||
|
{Name: "model1", Digest: "sha256:abc123", Size: 1024, ModifiedAt: time.Now().Add(-24 * time.Hour)},
|
||||||
|
{Name: "model2", Digest: "sha256:def456", Size: 2048, ModifiedAt: time.Now().Add(-24 * time.Hour)},
|
||||||
|
},
|
||||||
|
expectedOutput: "NAME ID SIZE MODIFIED \n" +
|
||||||
|
"model1 sha256:abc12 1.0 KB 24 hours ago \n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "server error",
|
||||||
|
args: []string{},
|
||||||
|
expectedError: "server error",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.URL.Path != "/api/tags" || r.Method != http.MethodGet {
|
||||||
|
t.Errorf("unexpected request to %s %s", r.Method, r.URL.Path)
|
||||||
|
http.Error(w, "not found", http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if tt.expectedError != "" {
|
||||||
|
http.Error(w, tt.expectedError, http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
response := api.ListResponse{Models: tt.serverResponse}
|
||||||
|
if err := json.NewEncoder(w).Encode(response); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer mockServer.Close()
|
||||||
|
|
||||||
|
t.Setenv("OLLAMA_HOST", mockServer.URL)
|
||||||
|
|
||||||
|
cmd := &cobra.Command{}
|
||||||
|
cmd.SetContext(context.TODO())
|
||||||
|
|
||||||
|
// Capture stdout
|
||||||
|
oldStdout := os.Stdout
|
||||||
|
r, w, _ := os.Pipe()
|
||||||
|
os.Stdout = w
|
||||||
|
|
||||||
|
err := ListHandler(cmd, tt.args)
|
||||||
|
|
||||||
|
// Restore stdout and get output
|
||||||
|
w.Close()
|
||||||
|
os.Stdout = oldStdout
|
||||||
|
output, _ := io.ReadAll(r)
|
||||||
|
|
||||||
|
if tt.expectedError == "" {
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("expected no error, got %v", err)
|
||||||
|
}
|
||||||
|
if got := string(output); got != tt.expectedOutput {
|
||||||
|
t.Errorf("expected output:\n%s\ngot:\n%s", tt.expectedOutput, got)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if err == nil || !strings.Contains(err.Error(), tt.expectedError) {
|
||||||
|
t.Errorf("expected error containing %q, got %v", tt.expectedError, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestCreateHandler(t *testing.T) {
|
func TestCreateHandler(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
|
|||||||
@@ -0,0 +1,285 @@
|
|||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: jmorganca <jmorganca@gmail.com>
|
||||||
|
Date: Sun, 16 Feb 2025 20:00:22 -0500
|
||||||
|
Subject: [PATCH] use std::filesystem::path instead of wstring
|
||||||
|
|
||||||
|
---
|
||||||
|
ggml/src/ggml-backend-reg.cpp | 116 ++++++++++++----------------------
|
||||||
|
1 file changed, 40 insertions(+), 76 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
|
||||||
|
index 84b21dd8..de78feae 100644
|
||||||
|
--- a/ggml/src/ggml-backend-reg.cpp
|
||||||
|
+++ b/ggml/src/ggml-backend-reg.cpp
|
||||||
|
@@ -72,16 +72,6 @@
|
||||||
|
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-static std::wstring utf8_to_utf16(const std::string & str) {
|
||||||
|
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
||||||
|
- return converter.from_bytes(str);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static std::string utf16_to_utf8(const std::wstring & str) {
|
||||||
|
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
||||||
|
- return converter.to_bytes(str);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
#if defined(__clang__)
|
||||||
|
# pragma clang diagnostic pop
|
||||||
|
#endif
|
||||||
|
@@ -96,12 +86,12 @@ struct dl_handle_deleter {
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
-static dl_handle * dl_load_library(const std::wstring & path) {
|
||||||
|
+static dl_handle * dl_load_library(const std::filesystem::path & path) {
|
||||||
|
// suppress error dialogs for missing DLLs
|
||||||
|
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||||
|
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||||
|
|
||||||
|
- HMODULE handle = LoadLibraryW(path.c_str());
|
||||||
|
+ HMODULE handle = LoadLibraryW(path.wstring().c_str());
|
||||||
|
|
||||||
|
SetErrorMode(old_mode);
|
||||||
|
|
||||||
|
@@ -129,8 +119,8 @@ struct dl_handle_deleter {
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
-static void * dl_load_library(const std::wstring & path) {
|
||||||
|
- dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||||
|
+static void * dl_load_library(const std::filesystem::path & path) {
|
||||||
|
+ dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||||
|
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
@@ -222,11 +212,11 @@ struct ggml_backend_registry {
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
- ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
|
||||||
|
+ ggml_backend_reg_t load_backend(const std::filesystem::path & path, bool silent) {
|
||||||
|
dl_handle_ptr handle { dl_load_library(path) };
|
||||||
|
if (!handle) {
|
||||||
|
if (!silent) {
|
||||||
|
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
|
||||||
|
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path.string().c_str());
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
@@ -234,7 +224,7 @@ struct ggml_backend_registry {
|
||||||
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||||
|
if (score_fn && score_fn() == 0) {
|
||||||
|
if (!silent) {
|
||||||
|
- GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
|
||||||
|
+ GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path.string().c_str());
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
@@ -242,7 +232,7 @@ struct ggml_backend_registry {
|
||||||
|
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
||||||
|
if (!backend_init_fn) {
|
||||||
|
if (!silent) {
|
||||||
|
- GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
|
||||||
|
+ GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path.string().c_str());
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
@@ -251,16 +241,16 @@ struct ggml_backend_registry {
|
||||||
|
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
||||||
|
if (!silent) {
|
||||||
|
if (!reg) {
|
||||||
|
- GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
|
||||||
|
+ GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path.string().c_str());
|
||||||
|
} else {
|
||||||
|
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
||||||
|
- __func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
||||||
|
+ __func__, path.string().c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
- GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
|
||||||
|
+ GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path.string().c_str());
|
||||||
|
|
||||||
|
register_backend(reg, score_fn ? score_fn() : -1, std::move(handle));
|
||||||
|
|
||||||
|
@@ -396,14 +386,14 @@ ggml_backend_t ggml_backend_init_best(void) {
|
||||||
|
|
||||||
|
// Dynamic loading
|
||||||
|
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
||||||
|
- return get_reg().load_backend(utf8_to_utf16(path), false);
|
||||||
|
+ return get_reg().load_backend(path, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
||||||
|
get_reg().unload_backend(reg, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static std::wstring get_executable_path() {
|
||||||
|
+static std::filesystem::path get_executable_path() {
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
// get executable path
|
||||||
|
std::vector<char> path;
|
||||||
|
@@ -415,15 +405,9 @@ static std::wstring get_executable_path() {
|
||||||
|
}
|
||||||
|
path.resize(size);
|
||||||
|
}
|
||||||
|
- std::string base_path(path.data(), size);
|
||||||
|
- // remove executable name
|
||||||
|
- auto last_slash = base_path.find_last_of('/');
|
||||||
|
- if (last_slash != std::string::npos) {
|
||||||
|
- base_path = base_path.substr(0, last_slash);
|
||||||
|
- }
|
||||||
|
- return utf8_to_utf16(base_path + "/");
|
||||||
|
+
|
||||||
|
+ return std::filesystem::path(path.data()).parent_path();
|
||||||
|
#elif defined(__linux__) || defined(__FreeBSD__)
|
||||||
|
- std::string base_path = ".";
|
||||||
|
std::vector<char> path(1024);
|
||||||
|
while (true) {
|
||||||
|
// get executable path
|
||||||
|
@@ -436,76 +420,56 @@ static std::wstring get_executable_path() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (len < (ssize_t) path.size()) {
|
||||||
|
- base_path = std::string(path.data(), len);
|
||||||
|
- // remove executable name
|
||||||
|
- auto last_slash = base_path.find_last_of('/');
|
||||||
|
- if (last_slash != std::string::npos) {
|
||||||
|
- base_path = base_path.substr(0, last_slash);
|
||||||
|
- }
|
||||||
|
- break;
|
||||||
|
+ return std::filesystem::path(path.data()).parent_path();
|
||||||
|
}
|
||||||
|
path.resize(path.size() * 2);
|
||||||
|
}
|
||||||
|
-
|
||||||
|
- return utf8_to_utf16(base_path + "/");
|
||||||
|
#elif defined(_WIN32)
|
||||||
|
std::vector<wchar_t> path(MAX_PATH);
|
||||||
|
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
|
||||||
|
if (len == 0) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
- std::wstring base_path(path.data(), len);
|
||||||
|
- // remove executable name
|
||||||
|
- auto last_slash = base_path.find_last_of('\\');
|
||||||
|
- if (last_slash != std::string::npos) {
|
||||||
|
- base_path = base_path.substr(0, last_slash);
|
||||||
|
- }
|
||||||
|
- return base_path + L"\\";
|
||||||
|
-#else
|
||||||
|
- return {};
|
||||||
|
-#endif
|
||||||
|
-}
|
||||||
|
|
||||||
|
-static std::wstring backend_filename_prefix() {
|
||||||
|
-#ifdef _WIN32
|
||||||
|
- return L"ggml-";
|
||||||
|
+ return std::filesystem::path(path.data()).parent_path();
|
||||||
|
#else
|
||||||
|
- return L"libggml-";
|
||||||
|
+ return {};
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
-static std::wstring backend_filename_suffix() {
|
||||||
|
+static std::string backend_filename_prefix() {
|
||||||
|
#ifdef _WIN32
|
||||||
|
- return L".dll";
|
||||||
|
+ return "ggml-";
|
||||||
|
#else
|
||||||
|
- return L".so";
|
||||||
|
+ return "libggml-";
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
-static std::wstring path_separator() {
|
||||||
|
+static std::string backend_filename_suffix() {
|
||||||
|
#ifdef _WIN32
|
||||||
|
- return L"\\";
|
||||||
|
+ return ".dll";
|
||||||
|
#else
|
||||||
|
- return L"/";
|
||||||
|
+ return ".so";
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
||||||
|
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
||||||
|
// TODO: search system paths
|
||||||
|
- std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
|
||||||
|
- std::vector<std::wstring> search_paths;
|
||||||
|
+ namespace fs = std::filesystem;
|
||||||
|
+ std::string file_prefix = backend_filename_prefix() + name + "-";
|
||||||
|
+ std::vector<fs::path> search_paths;
|
||||||
|
+
|
||||||
|
if (user_search_path == nullptr) {
|
||||||
|
- search_paths.push_back(L"." + path_separator());
|
||||||
|
+ search_paths.push_back(fs::current_path());
|
||||||
|
search_paths.push_back(get_executable_path());
|
||||||
|
} else {
|
||||||
|
- search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
|
||||||
|
+ search_paths.push_back(fs::u8path(user_search_path));
|
||||||
|
}
|
||||||
|
|
||||||
|
int best_score = 0;
|
||||||
|
- std::wstring best_path;
|
||||||
|
+ fs::path best_path;
|
||||||
|
|
||||||
|
- namespace fs = std::filesystem;
|
||||||
|
for (const auto & search_path : search_paths) {
|
||||||
|
if (!fs::exists(search_path)) {
|
||||||
|
continue;
|
||||||
|
@@ -514,31 +478,31 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
||||||
|
for (const auto & entry : dir_it) {
|
||||||
|
try {
|
||||||
|
if (entry.is_regular_file()) {
|
||||||
|
- std::wstring filename = entry.path().filename().wstring();
|
||||||
|
- std::wstring ext = entry.path().extension().wstring();
|
||||||
|
+ std::string filename = entry.path().filename().string();
|
||||||
|
+ std::string ext = entry.path().extension().string();
|
||||||
|
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
||||||
|
- dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
|
||||||
|
+ dl_handle_ptr handle { dl_load_library(entry.path()) };
|
||||||
|
if (!handle) {
|
||||||
|
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
||||||
|
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||||
|
if (!score_fn) {
|
||||||
|
- GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
||||||
|
+ GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
int s = score_fn();
|
||||||
|
- GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
|
||||||
|
+ GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
|
||||||
|
if (s > best_score) {
|
||||||
|
best_score = s;
|
||||||
|
- best_path = entry.path().wstring();
|
||||||
|
+ best_path = entry.path();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
- GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
|
||||||
|
+ GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, entry.path().string().c_str(), e.what());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -546,7 +510,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
||||||
|
if (best_score == 0) {
|
||||||
|
// try to load the base backend
|
||||||
|
for (const auto & search_path : search_paths) {
|
||||||
|
- std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
|
||||||
|
+ fs::path path = fs::path(search_path) / (backend_filename_prefix() + name + backend_filename_suffix());
|
||||||
|
if (fs::exists(path)) {
|
||||||
|
return get_reg().load_backend(path, silent);
|
||||||
|
}
|
||||||
24
llama/patches/0019-remove-amx.patch
Normal file
24
llama/patches/0019-remove-amx.patch
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Michael Yang <mxyng@pm.me>
|
||||||
|
Date: Tue, 18 Feb 2025 14:47:21 -0800
|
||||||
|
Subject: [PATCH] remove amx
|
||||||
|
|
||||||
|
---
|
||||||
|
ggml/src/CMakeLists.txt | 4 ----
|
||||||
|
1 file changed, 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
|
||||||
|
index 72b488dd..50828717 100644
|
||||||
|
--- a/ggml/src/CMakeLists.txt
|
||||||
|
+++ b/ggml/src/CMakeLists.txt
|
||||||
|
@@ -293,10 +293,6 @@ if (GGML_CPU_ALL_VARIANTS)
|
||||||
|
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
||||||
|
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
||||||
|
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
|
||||||
|
- if (NOT MSVC)
|
||||||
|
- # MSVC doesn't support AMX
|
||||||
|
- ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
||||||
|
- endif()
|
||||||
|
else ()
|
||||||
|
ggml_add_cpu_backend_variant_impl("")
|
||||||
|
endif()
|
||||||
14
main.go
14
main.go
@@ -2,8 +2,6 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"os"
|
|
||||||
"os/signal"
|
|
||||||
|
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
|
|
||||||
@@ -11,15 +9,5 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
sigChan := make(chan os.Signal, 1)
|
|
||||||
signal.Notify(sigChan, os.Interrupt)
|
|
||||||
go func() {
|
|
||||||
<-sigChan
|
|
||||||
cancel()
|
|
||||||
}()
|
|
||||||
|
|
||||||
cobra.CheckErr(cmd.NewCLI().ExecuteContext(ctx))
|
|
||||||
}
|
}
|
||||||
|
|||||||
4
ml/backend/ggml/ggml/src/CMakeLists.txt
vendored
4
ml/backend/ggml/ggml/src/CMakeLists.txt
vendored
@@ -293,10 +293,6 @@ if (GGML_CPU_ALL_VARIANTS)
|
|||||||
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
||||||
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
||||||
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
|
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
|
||||||
if (NOT MSVC)
|
|
||||||
# MSVC doesn't support AMX
|
|
||||||
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
|
||||||
endif()
|
|
||||||
else ()
|
else ()
|
||||||
ggml_add_cpu_backend_variant_impl("")
|
ggml_add_cpu_backend_variant_impl("")
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
116
ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
vendored
116
ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
vendored
@@ -72,16 +72,6 @@
|
|||||||
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static std::wstring utf8_to_utf16(const std::string & str) {
|
|
||||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
|
||||||
return converter.from_bytes(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::string utf16_to_utf8(const std::wstring & str) {
|
|
||||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
|
||||||
return converter.to_bytes(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(__clang__)
|
#if defined(__clang__)
|
||||||
# pragma clang diagnostic pop
|
# pragma clang diagnostic pop
|
||||||
#endif
|
#endif
|
||||||
@@ -96,12 +86,12 @@ struct dl_handle_deleter {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static dl_handle * dl_load_library(const std::wstring & path) {
|
static dl_handle * dl_load_library(const std::filesystem::path & path) {
|
||||||
// suppress error dialogs for missing DLLs
|
// suppress error dialogs for missing DLLs
|
||||||
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||||
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||||
|
|
||||||
HMODULE handle = LoadLibraryW(path.c_str());
|
HMODULE handle = LoadLibraryW(path.wstring().c_str());
|
||||||
|
|
||||||
SetErrorMode(old_mode);
|
SetErrorMode(old_mode);
|
||||||
|
|
||||||
@@ -129,8 +119,8 @@ struct dl_handle_deleter {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static void * dl_load_library(const std::wstring & path) {
|
static void * dl_load_library(const std::filesystem::path & path) {
|
||||||
dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
|
dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||||
|
|
||||||
return handle;
|
return handle;
|
||||||
}
|
}
|
||||||
@@ -222,11 +212,11 @@ struct ggml_backend_registry {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
|
ggml_backend_reg_t load_backend(const std::filesystem::path & path, bool silent) {
|
||||||
dl_handle_ptr handle { dl_load_library(path) };
|
dl_handle_ptr handle { dl_load_library(path) };
|
||||||
if (!handle) {
|
if (!handle) {
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
|
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path.string().c_str());
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@@ -234,7 +224,7 @@ struct ggml_backend_registry {
|
|||||||
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||||
if (score_fn && score_fn() == 0) {
|
if (score_fn && score_fn() == 0) {
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
|
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path.string().c_str());
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@@ -242,7 +232,7 @@ struct ggml_backend_registry {
|
|||||||
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
||||||
if (!backend_init_fn) {
|
if (!backend_init_fn) {
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
|
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path.string().c_str());
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@@ -251,16 +241,16 @@ struct ggml_backend_registry {
|
|||||||
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
if (!reg) {
|
if (!reg) {
|
||||||
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
|
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path.string().c_str());
|
||||||
} else {
|
} else {
|
||||||
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
||||||
__func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
__func__, path.string().c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
|
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path.string().c_str());
|
||||||
|
|
||||||
register_backend(reg, score_fn ? score_fn() : -1, std::move(handle));
|
register_backend(reg, score_fn ? score_fn() : -1, std::move(handle));
|
||||||
|
|
||||||
@@ -396,14 +386,14 @@ ggml_backend_t ggml_backend_init_best(void) {
|
|||||||
|
|
||||||
// Dynamic loading
|
// Dynamic loading
|
||||||
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
||||||
return get_reg().load_backend(utf8_to_utf16(path), false);
|
return get_reg().load_backend(path, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
||||||
get_reg().unload_backend(reg, true);
|
get_reg().unload_backend(reg, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::wstring get_executable_path() {
|
static std::filesystem::path get_executable_path() {
|
||||||
#if defined(__APPLE__)
|
#if defined(__APPLE__)
|
||||||
// get executable path
|
// get executable path
|
||||||
std::vector<char> path;
|
std::vector<char> path;
|
||||||
@@ -415,15 +405,9 @@ static std::wstring get_executable_path() {
|
|||||||
}
|
}
|
||||||
path.resize(size);
|
path.resize(size);
|
||||||
}
|
}
|
||||||
std::string base_path(path.data(), size);
|
|
||||||
// remove executable name
|
return std::filesystem::path(path.data()).parent_path();
|
||||||
auto last_slash = base_path.find_last_of('/');
|
|
||||||
if (last_slash != std::string::npos) {
|
|
||||||
base_path = base_path.substr(0, last_slash);
|
|
||||||
}
|
|
||||||
return utf8_to_utf16(base_path + "/");
|
|
||||||
#elif defined(__linux__) || defined(__FreeBSD__)
|
#elif defined(__linux__) || defined(__FreeBSD__)
|
||||||
std::string base_path = ".";
|
|
||||||
std::vector<char> path(1024);
|
std::vector<char> path(1024);
|
||||||
while (true) {
|
while (true) {
|
||||||
// get executable path
|
// get executable path
|
||||||
@@ -436,76 +420,56 @@ static std::wstring get_executable_path() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (len < (ssize_t) path.size()) {
|
if (len < (ssize_t) path.size()) {
|
||||||
base_path = std::string(path.data(), len);
|
return std::filesystem::path(path.data()).parent_path();
|
||||||
// remove executable name
|
|
||||||
auto last_slash = base_path.find_last_of('/');
|
|
||||||
if (last_slash != std::string::npos) {
|
|
||||||
base_path = base_path.substr(0, last_slash);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
path.resize(path.size() * 2);
|
path.resize(path.size() * 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
return utf8_to_utf16(base_path + "/");
|
|
||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
std::vector<wchar_t> path(MAX_PATH);
|
std::vector<wchar_t> path(MAX_PATH);
|
||||||
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
|
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
|
||||||
if (len == 0) {
|
if (len == 0) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
std::wstring base_path(path.data(), len);
|
|
||||||
// remove executable name
|
return std::filesystem::path(path.data()).parent_path();
|
||||||
auto last_slash = base_path.find_last_of('\\');
|
|
||||||
if (last_slash != std::string::npos) {
|
|
||||||
base_path = base_path.substr(0, last_slash);
|
|
||||||
}
|
|
||||||
return base_path + L"\\";
|
|
||||||
#else
|
#else
|
||||||
return {};
|
return {};
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::wstring backend_filename_prefix() {
|
static std::string backend_filename_prefix() {
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
return L"ggml-";
|
return "ggml-";
|
||||||
#else
|
#else
|
||||||
return L"libggml-";
|
return "libggml-";
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::wstring backend_filename_suffix() {
|
static std::string backend_filename_suffix() {
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
return L".dll";
|
return ".dll";
|
||||||
#else
|
#else
|
||||||
return L".so";
|
return ".so";
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::wstring path_separator() {
|
|
||||||
#ifdef _WIN32
|
|
||||||
return L"\\";
|
|
||||||
#else
|
|
||||||
return L"/";
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
||||||
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
||||||
// TODO: search system paths
|
// TODO: search system paths
|
||||||
std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
|
namespace fs = std::filesystem;
|
||||||
std::vector<std::wstring> search_paths;
|
std::string file_prefix = backend_filename_prefix() + name + "-";
|
||||||
|
std::vector<fs::path> search_paths;
|
||||||
|
|
||||||
if (user_search_path == nullptr) {
|
if (user_search_path == nullptr) {
|
||||||
search_paths.push_back(L"." + path_separator());
|
search_paths.push_back(fs::current_path());
|
||||||
search_paths.push_back(get_executable_path());
|
search_paths.push_back(get_executable_path());
|
||||||
} else {
|
} else {
|
||||||
search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
|
search_paths.push_back(fs::u8path(user_search_path));
|
||||||
}
|
}
|
||||||
|
|
||||||
int best_score = 0;
|
int best_score = 0;
|
||||||
std::wstring best_path;
|
fs::path best_path;
|
||||||
|
|
||||||
namespace fs = std::filesystem;
|
|
||||||
for (const auto & search_path : search_paths) {
|
for (const auto & search_path : search_paths) {
|
||||||
if (!fs::exists(search_path)) {
|
if (!fs::exists(search_path)) {
|
||||||
continue;
|
continue;
|
||||||
@@ -514,31 +478,31 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
|||||||
for (const auto & entry : dir_it) {
|
for (const auto & entry : dir_it) {
|
||||||
try {
|
try {
|
||||||
if (entry.is_regular_file()) {
|
if (entry.is_regular_file()) {
|
||||||
std::wstring filename = entry.path().filename().wstring();
|
std::string filename = entry.path().filename().string();
|
||||||
std::wstring ext = entry.path().extension().wstring();
|
std::string ext = entry.path().extension().string();
|
||||||
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
||||||
dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
|
dl_handle_ptr handle { dl_load_library(entry.path()) };
|
||||||
if (!handle) {
|
if (!handle) {
|
||||||
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||||
if (!score_fn) {
|
if (!score_fn) {
|
||||||
GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
int s = score_fn();
|
int s = score_fn();
|
||||||
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
|
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
|
||||||
if (s > best_score) {
|
if (s > best_score) {
|
||||||
best_score = s;
|
best_score = s;
|
||||||
best_path = entry.path().wstring();
|
best_path = entry.path();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (const std::exception & e) {
|
} catch (const std::exception & e) {
|
||||||
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
|
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, entry.path().string().c_str(), e.what());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -546,7 +510,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
|||||||
if (best_score == 0) {
|
if (best_score == 0) {
|
||||||
// try to load the base backend
|
// try to load the base backend
|
||||||
for (const auto & search_path : search_paths) {
|
for (const auto & search_path : search_paths) {
|
||||||
std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
|
fs::path path = fs::path(search_path) / (backend_filename_prefix() + name + backend_filename_suffix());
|
||||||
if (fs::exists(path)) {
|
if (fs::exists(path)) {
|
||||||
return get_reg().load_backend(path, silent);
|
return get_reg().load_backend(path, silent);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,29 +49,29 @@ func (p *Progress) stop() bool {
|
|||||||
func (p *Progress) Stop() bool {
|
func (p *Progress) Stop() bool {
|
||||||
stopped := p.stop()
|
stopped := p.stop()
|
||||||
if stopped {
|
if stopped {
|
||||||
fmt.Fprintln(p.w)
|
fmt.Fprint(p.w, "\n")
|
||||||
|
p.w.Flush()
|
||||||
}
|
}
|
||||||
|
|
||||||
// show cursor
|
|
||||||
fmt.Fprint(p.w, "\033[?25h")
|
|
||||||
p.w.Flush()
|
|
||||||
return stopped
|
return stopped
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Progress) StopAndClear() bool {
|
func (p *Progress) StopAndClear() bool {
|
||||||
|
defer p.w.Flush()
|
||||||
|
|
||||||
|
fmt.Fprint(p.w, "\033[?25l")
|
||||||
|
defer fmt.Fprint(p.w, "\033[?25h")
|
||||||
|
|
||||||
stopped := p.stop()
|
stopped := p.stop()
|
||||||
if stopped {
|
if stopped {
|
||||||
// clear all progress lines
|
// clear all progress lines
|
||||||
for range p.pos - 1 {
|
for i := range p.pos {
|
||||||
fmt.Fprint(p.w, "\033[A")
|
if i > 0 {
|
||||||
|
fmt.Fprint(p.w, "\033[A")
|
||||||
|
}
|
||||||
|
fmt.Fprint(p.w, "\033[2K\033[1G")
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Fprint(p.w, "\033[2K", "\033[1G")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// show cursor
|
|
||||||
fmt.Fprint(p.w, "\033[?25h")
|
|
||||||
p.w.Flush()
|
|
||||||
return stopped
|
return stopped
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -86,13 +86,19 @@ func (p *Progress) render() {
|
|||||||
p.mu.Lock()
|
p.mu.Lock()
|
||||||
defer p.mu.Unlock()
|
defer p.mu.Unlock()
|
||||||
|
|
||||||
|
defer p.w.Flush()
|
||||||
|
|
||||||
|
// eliminate flickering on terminals that support synchronized output
|
||||||
fmt.Fprint(p.w, "\033[?2026h")
|
fmt.Fprint(p.w, "\033[?2026h")
|
||||||
defer fmt.Fprint(p.w, "\033[?2026l")
|
defer fmt.Fprint(p.w, "\033[?2026l")
|
||||||
|
|
||||||
|
fmt.Fprint(p.w, "\033[?25l")
|
||||||
|
defer fmt.Fprint(p.w, "\033[?25h")
|
||||||
|
|
||||||
|
// move the cursor back to the beginning
|
||||||
for range p.pos - 1 {
|
for range p.pos - 1 {
|
||||||
fmt.Fprint(p.w, "\033[A")
|
fmt.Fprint(p.w, "\033[A")
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Fprint(p.w, "\033[1G")
|
fmt.Fprint(p.w, "\033[1G")
|
||||||
|
|
||||||
// render progress lines
|
// render progress lines
|
||||||
@@ -104,13 +110,10 @@ func (p *Progress) render() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
p.pos = len(p.states)
|
p.pos = len(p.states)
|
||||||
p.w.Flush()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Progress) start() {
|
func (p *Progress) start() {
|
||||||
p.ticker = time.NewTicker(100 * time.Millisecond)
|
p.ticker = time.NewTicker(100 * time.Millisecond)
|
||||||
// hide cursor
|
|
||||||
fmt.Fprint(p.w, "\033[?25l")
|
|
||||||
for range p.ticker.C {
|
for range p.ticker.C {
|
||||||
p.render()
|
p.render()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1131,7 +1131,7 @@ func (s *Server) GenerateRoutes() http.Handler {
|
|||||||
config.AllowWildcard = true
|
config.AllowWildcard = true
|
||||||
config.AllowBrowserExtensions = true
|
config.AllowBrowserExtensions = true
|
||||||
config.AllowHeaders = []string{"Authorization", "Content-Type", "User-Agent", "Accept", "X-Requested-With"}
|
config.AllowHeaders = []string{"Authorization", "Content-Type", "User-Agent", "Accept", "X-Requested-With"}
|
||||||
openAIProperties := []string{"lang", "package-version", "os", "arch", "retry-count", "runtime", "runtime-version", "async", "helper-method", "poll-helper", "custom-poll-interval"}
|
openAIProperties := []string{"lang", "package-version", "os", "arch", "retry-count", "runtime", "runtime-version", "async", "helper-method", "poll-helper", "custom-poll-interval", "timeout"}
|
||||||
for _, prop := range openAIProperties {
|
for _, prop := range openAIProperties {
|
||||||
config.AllowHeaders = append(config.AllowHeaders, "x-stainless-"+prop)
|
config.AllowHeaders = append(config.AllowHeaders, "x-stainless-"+prop)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -179,7 +179,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
|||||||
if allReliable {
|
if allReliable {
|
||||||
// HACK
|
// HACK
|
||||||
os.Setenv("OLLAMA_MAX_LOADED_MODELS", strconv.Itoa(defaultModelsPerGPU*len(gpus)))
|
os.Setenv("OLLAMA_MAX_LOADED_MODELS", strconv.Itoa(defaultModelsPerGPU*len(gpus)))
|
||||||
slog.Debug("updating default concurrency", "OLLAMA_MAX_LOADED_MODELS", envconfig.MaxRunners, "gpu_count", len(gpus))
|
slog.Debug("updating default concurrency", "OLLAMA_MAX_LOADED_MODELS", envconfig.MaxRunners(), "gpu_count", len(gpus))
|
||||||
} else {
|
} else {
|
||||||
// HACK
|
// HACK
|
||||||
os.Setenv("OLLAMA_MAX_LOADED_MODELS", strconv.Itoa(len(gpus)))
|
os.Setenv("OLLAMA_MAX_LOADED_MODELS", strconv.Itoa(len(gpus)))
|
||||||
|
|||||||
Reference in New Issue
Block a user