Compare commits
58 Commits
whitespace
...
v0.1.28
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
21347e1ed6 | ||
|
|
3b4bab3dc5 | ||
|
|
cbd6e3b38e | ||
|
|
b830afa716 | ||
|
|
bd1d8b0d14 | ||
|
|
25c2912120 | ||
|
|
0e19476b56 | ||
|
|
fa2f2b3563 | ||
|
|
cbf4970e0f | ||
|
|
74468513bd | ||
|
|
794a916a72 | ||
|
|
76e5d9ec88 | ||
|
|
076237b8ea | ||
|
|
53d694c67f | ||
|
|
5aa6bfea94 | ||
|
|
1cde63dd64 | ||
|
|
98e0b7e94f | ||
|
|
061e8f6abc | ||
|
|
a189810df6 | ||
|
|
e95b896790 | ||
|
|
1f087c4d26 | ||
|
|
5d7ea6616f | ||
|
|
2a4b128ae3 | ||
|
|
fc483274ad | ||
|
|
fd10a2ad4b | ||
|
|
b291f63188 | ||
|
|
f58856bf6f | ||
|
|
275ea01587 | ||
|
|
8782dd5628 | ||
|
|
11bfff8ee1 | ||
|
|
7c0167a8f6 | ||
|
|
74d898e37d | ||
|
|
c6e8b00718 | ||
|
|
be9980ef13 | ||
|
|
646a0dedb9 | ||
|
|
7f964d938c | ||
|
|
e6b8a139ff | ||
|
|
bdc0ea1ba5 | ||
|
|
7fab7918cc | ||
|
|
74c1bdba0d | ||
|
|
f983ef7f5f | ||
|
|
1ae1c33651 | ||
|
|
efe040f8c0 | ||
|
|
2a7553ce09 | ||
|
|
10af6070a9 | ||
|
|
92423b0600 | ||
|
|
b3eac61cac | ||
|
|
287ba11500 | ||
|
|
63861f58cc | ||
|
|
f0425d3de9 | ||
|
|
210b65268e | ||
|
|
949d7b1c48 | ||
|
|
897b213468 | ||
|
|
4613a080e7 | ||
|
|
ace2cdf1c6 | ||
|
|
eed92bc19a | ||
|
|
e0a2f46466 | ||
|
|
01ff2e14db |
@@ -132,6 +132,7 @@ ENV OLLAMA_HOST 0.0.0.0
|
||||
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
|
||||
ENTRYPOINT ["/bin/ollama"]
|
||||
CMD ["serve"]
|
||||
|
||||
14
README.md
14
README.md
@@ -62,6 +62,8 @@ Here are some example models that can be downloaded:
|
||||
| Orca Mini | 3B | 1.9GB | `ollama run orca-mini` |
|
||||
| Vicuna | 7B | 3.8GB | `ollama run vicuna` |
|
||||
| LLaVA | 7B | 4.5GB | `ollama run llava` |
|
||||
| Gemma | 2B | 1.4GB | `ollama run gemma:2b` |
|
||||
| Gemma | 7B | 4.8GB | `ollama run gemma:7b` |
|
||||
|
||||
> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
|
||||
|
||||
@@ -258,19 +260,23 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
### Web & Desktop
|
||||
|
||||
- [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt)
|
||||
- [Enchanted (macOS native)](https://github.com/AugustDev/enchanted)
|
||||
- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
|
||||
- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
|
||||
- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
|
||||
- [Minimalistic React UI for Ollama Models](https://github.com/richawo/minimal-llm-ui)
|
||||
- [Open WebUI](https://github.com/open-webui/open-webui)
|
||||
- [Ollamac](https://github.com/kevinhermawan/Ollamac)
|
||||
- [big-AGI](https://github.com/enricoros/big-agi/blob/main/docs/config-ollama.md)
|
||||
- [big-AGI](https://github.com/enricoros/big-AGI/blob/main/docs/config-local-ollama.md)
|
||||
- [Cheshire Cat assistant framework](https://github.com/cheshire-cat-ai/core)
|
||||
- [Amica](https://github.com/semperai/amica)
|
||||
- [chatd](https://github.com/BruceMacD/chatd)
|
||||
- [Ollama-SwiftUI](https://github.com/kghandour/Ollama-SwiftUI)
|
||||
- [MindMac](https://mindmac.app)
|
||||
- [NextJS Web Interface for Ollama](https://github.com/jakobhoeg/nextjs-ollama-llm-ui)
|
||||
- [Msty](https://msty.app)
|
||||
- [Chatbox](https://github.com/Bin-Huang/Chatbox)
|
||||
- [NextChat](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web) with [Get Started Doc](https://docs.nextchat.dev/models/ollama)
|
||||
|
||||
### Terminal
|
||||
|
||||
@@ -301,6 +307,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
|
||||
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
|
||||
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
|
||||
- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
|
||||
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
|
||||
- [LangChain4j](https://github.com/langchain4j/langchain4j/tree/main/langchain4j-ollama)
|
||||
- [LiteLLM](https://github.com/BerriAI/litellm)
|
||||
@@ -315,8 +322,10 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [LangChainDart](https://github.com/davidmigloz/langchain_dart)
|
||||
- [Semantic Kernel - Python](https://github.com/microsoft/semantic-kernel/tree/main/python/semantic_kernel/connectors/ai/ollama)
|
||||
- [Haystack](https://github.com/deepset-ai/haystack-integrations/blob/main/integrations/ollama.md)
|
||||
- [Elixir LangChain](https://github.com/brainlid/langchain)
|
||||
- [Ollama for R - rollama](https://github.com/JBGruber/rollama)
|
||||
- [Ollama-ex for Elixir](https://github.com/lebrunel/ollama-ex)
|
||||
- [Ollama Connector for SAP ABAP](https://github.com/b-tocs/abap_btocs_ollama)
|
||||
|
||||
### Mobile
|
||||
|
||||
@@ -337,6 +346,9 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
|
||||
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
|
||||
- [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)
|
||||
- [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot)
|
||||
- [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
|
||||
- [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
|
||||
- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
|
||||
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
|
||||
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
|
||||
|
||||
@@ -21,7 +21,7 @@ import (
|
||||
|
||||
type Client struct {
|
||||
base *url.URL
|
||||
http http.Client
|
||||
http *http.Client
|
||||
}
|
||||
|
||||
func checkError(resp *http.Response, body []byte) error {
|
||||
@@ -66,30 +66,13 @@ func ClientFromEnvironment() (*Client, error) {
|
||||
}
|
||||
}
|
||||
|
||||
client := Client{
|
||||
return &Client{
|
||||
base: &url.URL{
|
||||
Scheme: scheme,
|
||||
Host: net.JoinHostPort(host, port),
|
||||
},
|
||||
}
|
||||
|
||||
mockRequest, err := http.NewRequest(http.MethodHead, client.base.String(), nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
proxyURL, err := http.ProxyFromEnvironment(mockRequest)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client.http = http.Client{
|
||||
Transport: &http.Transport{
|
||||
Proxy: http.ProxyURL(proxyURL),
|
||||
},
|
||||
}
|
||||
|
||||
return &client, nil
|
||||
http: http.DefaultClient,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *Client) do(ctx context.Context, method, path string, reqData, respData any) error {
|
||||
|
||||
@@ -83,7 +83,7 @@ type Metrics struct {
|
||||
EvalDuration time.Duration `json:"eval_duration,omitempty"`
|
||||
}
|
||||
|
||||
// Options specfied in GenerateRequest, if you add a new option here add it to the API docs also
|
||||
// Options specified in GenerateRequest, if you add a new option here add it to the API docs also
|
||||
type Options struct {
|
||||
Runner
|
||||
|
||||
@@ -121,7 +121,6 @@ type Runner struct {
|
||||
VocabOnly bool `json:"vocab_only,omitempty"`
|
||||
UseMMap bool `json:"use_mmap,omitempty"`
|
||||
UseMLock bool `json:"use_mlock,omitempty"`
|
||||
EmbeddingOnly bool `json:"embedding_only,omitempty"`
|
||||
RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
|
||||
RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
|
||||
NumThread int `json:"num_thread,omitempty"`
|
||||
@@ -395,7 +394,6 @@ func DefaultOptions() Options {
|
||||
UseMLock: false,
|
||||
UseMMap: true,
|
||||
UseNUMA: false,
|
||||
EmbeddingOnly: true,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,20 +34,6 @@ type UpdateResponse struct {
|
||||
UpdateVersion string `json:"version"`
|
||||
}
|
||||
|
||||
func getClient(req *http.Request) http.Client {
|
||||
proxyURL, err := http.ProxyFromEnvironment(req)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to handle proxy: %s", err))
|
||||
return http.Client{}
|
||||
}
|
||||
|
||||
return http.Client{
|
||||
Transport: &http.Transport{
|
||||
Proxy: http.ProxyURL(proxyURL),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
|
||||
var updateResp UpdateResponse
|
||||
|
||||
@@ -83,10 +69,9 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
|
||||
}
|
||||
req.Header.Set("Authorization", signature)
|
||||
req.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
|
||||
client := getClient(req)
|
||||
|
||||
slog.Debug("checking for available update", "requestURL", requestURL)
|
||||
resp, err := client.Do(req)
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to check for update: %s", err))
|
||||
return false, updateResp
|
||||
@@ -101,6 +86,11 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to read body response: %s", err))
|
||||
}
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
slog.Info(fmt.Sprintf("check update error %d - %.96s", resp.StatusCode, string(body)))
|
||||
return false, updateResp
|
||||
}
|
||||
err = json.Unmarshal(body, &updateResp)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("malformed response checking for update: %s", err))
|
||||
@@ -119,8 +109,8 @@ func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
client := getClient(req)
|
||||
resp, err := client.Do(req)
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error checking update: %w", err)
|
||||
}
|
||||
@@ -151,7 +141,7 @@ func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
|
||||
cleanupOldDownloads()
|
||||
|
||||
req.Method = http.MethodGet
|
||||
resp, err = client.Do(req)
|
||||
resp, err = http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error checking update: %w", err)
|
||||
}
|
||||
|
||||
@@ -49,9 +49,6 @@ SetupLogging=yes
|
||||
CloseApplications=yes
|
||||
RestartApplications=no
|
||||
|
||||
; Make sure they can at least download llama2 as a minimum
|
||||
ExtraDiskSpaceRequired=3826806784
|
||||
|
||||
; https://jrsoftware.org/ishelp/index.php?topic=setup_wizardimagefile
|
||||
WizardSmallImageFile=.\assets\setup.bmp
|
||||
|
||||
@@ -116,7 +113,8 @@ Filename: "{cmd}"; Parameters: "/c timeout 5"; Flags: runhidden
|
||||
Type: filesandordirs; Name: "{%TEMP}\ollama*"
|
||||
Type: filesandordirs; Name: "{%LOCALAPPDATA}\Ollama"
|
||||
Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama"
|
||||
Type: filesandordirs; Name: "{%USERPROFILE}\.ollama"
|
||||
Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\models"
|
||||
Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\history"
|
||||
; NOTE: if the user has a custom OLLAMA_MODELS it will be preserved
|
||||
|
||||
[Messages]
|
||||
|
||||
27
cmd/cmd.go
27
cmd/cmd.go
@@ -718,39 +718,36 @@ func initializeKeypair() error {
|
||||
_, err = os.Stat(privKeyPath)
|
||||
if os.IsNotExist(err) {
|
||||
fmt.Printf("Couldn't find '%s'. Generating new private key.\n", privKeyPath)
|
||||
_, privKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
cryptoPublicKey, cryptoPrivateKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
privKeyBytes, err := format.OpenSSHPrivateKey(privKey, "")
|
||||
privateKeyBytes, err := ssh.MarshalPrivateKey(cryptoPrivateKey, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = os.MkdirAll(filepath.Dir(privKeyPath), 0o755)
|
||||
if err != nil {
|
||||
if err := os.MkdirAll(filepath.Dir(privKeyPath), 0o755); err != nil {
|
||||
return fmt.Errorf("could not create directory %w", err)
|
||||
}
|
||||
|
||||
err = os.WriteFile(privKeyPath, pem.EncodeToMemory(privKeyBytes), 0o600)
|
||||
if err := os.WriteFile(privKeyPath, pem.EncodeToMemory(privateKeyBytes), 0o600); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sshPublicKey, err := ssh.NewPublicKey(cryptoPublicKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sshPrivateKey, err := ssh.NewSignerFromKey(privKey)
|
||||
if err != nil {
|
||||
publicKeyBytes := ssh.MarshalAuthorizedKey(sshPublicKey)
|
||||
|
||||
if err := os.WriteFile(pubKeyPath, publicKeyBytes, 0o644); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pubKeyData := ssh.MarshalAuthorizedKey(sshPrivateKey.PublicKey())
|
||||
|
||||
err = os.WriteFile(pubKeyPath, pubKeyData, 0o644)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Printf("Your new public key is: \n\n%s\n", string(pubKeyData))
|
||||
fmt.Printf("Your new public key is: \n\n%s\n", publicKeyBytes)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -321,7 +321,6 @@ curl http://localhost:11434/api/generate -d '{
|
||||
"vocab_only": false,
|
||||
"use_mmap": true,
|
||||
"use_mlock": false,
|
||||
"embedding_only": false,
|
||||
"rope_frequency_base": 1.1,
|
||||
"rope_frequency_scale": 0.8,
|
||||
"num_thread": 8
|
||||
|
||||
@@ -113,9 +113,9 @@ If a different directory needs to be used, set the environment variable `OLLAMA_
|
||||
|
||||
Refer to the section [above](#how-do-i-configure-ollama-server) for how to set environment variables on your platform.
|
||||
|
||||
## Does Ollama send my prompts and answers back to Ollama.ai to use in any way?
|
||||
## Does Ollama send my prompts and answers back to ollama.com?
|
||||
|
||||
No, Ollama runs entirely locally, and conversation data will never leave your machine.
|
||||
No. Ollama runs locally, and conversation data does not leave your machine.
|
||||
|
||||
## How can I use Ollama in Visual Studio Code?
|
||||
|
||||
|
||||
@@ -124,7 +124,10 @@ ollama run example "What is your favourite condiment?"
|
||||
Publishing models is in early alpha. If you'd like to publish your model to share with others, follow these steps:
|
||||
|
||||
1. Create [an account](https://ollama.com/signup)
|
||||
2. Run `cat ~/.ollama/id_ed25519.pub` (or `type %USERPROFILE%\.ollama\id_ed25519.pub` on Windows) to view your Ollama public key. Copy this to the clipboard.
|
||||
2. Copy your Ollama public key:
|
||||
- macOS: `cat ~/.ollama/id_ed25519.pub`
|
||||
- Windows: `type %USERPROFILE%\.ollama\id_ed25519.pub`
|
||||
- Linux: `cat /usr/share/ollama/.ollama/id_ed25519.pub`
|
||||
3. Add your public key to your [Ollama account](https://ollama.com/settings/keys)
|
||||
|
||||
Next, copy your model to your username's namespace:
|
||||
|
||||
@@ -42,12 +42,12 @@ text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
|
||||
all_splits = text_splitter.split_documents(data)
|
||||
```
|
||||
|
||||
It's split up, but we have to find the relevant splits and then submit those to the model. We can do this by creating embeddings and storing them in a vector database. We can use Ollama directly to instantiate an embedding model. We will use ChromaDB in this example for a vector database. `pip install GPT4All chromadb`
|
||||
It's split up, but we have to find the relevant splits and then submit those to the model. We can do this by creating embeddings and storing them in a vector database. We can use Ollama directly to instantiate an embedding model. We will use ChromaDB in this example for a vector database. `pip install chromadb`
|
||||
|
||||
```python
|
||||
from langchain.embeddings import OllamaEmbeddings
|
||||
from langchain.vectorstores import Chroma
|
||||
oembed = OllamaEmbeddings(base_url="http://localhost:11434", model="llama2")
|
||||
oembed = OllamaEmbeddings(base_url="http://localhost:11434", model="nomic-embed-text")
|
||||
vectorstore = Chroma.from_documents(documents=all_splits, embedding=oembed)
|
||||
```
|
||||
|
||||
@@ -66,7 +66,7 @@ The next thing is to send the question and the relevant parts of the docs to the
|
||||
```python
|
||||
from langchain.chains import RetrievalQA
|
||||
qachain=RetrievalQA.from_chain_type(ollama, retriever=vectorstore.as_retriever())
|
||||
qachain({"query": question})
|
||||
qachain.invoke({"query": question})
|
||||
```
|
||||
|
||||
The answer received from this chain was:
|
||||
|
||||
21
examples/python-chat-app/README.md
Normal file
21
examples/python-chat-app/README.md
Normal file
@@ -0,0 +1,21 @@
|
||||
# Ollama Chat App
|
||||
|
||||
Build a Llama2 chat app using Streamlit and Ollama.
|
||||
|
||||
## Running the Example
|
||||
|
||||
1. Ensure you have the `llama2` model installed:
|
||||
|
||||
```bash
|
||||
ollama pull llama2
|
||||
```
|
||||
2. Install the Python Requirements.
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
3. Run the example:
|
||||
|
||||
```bash
|
||||
python main.py
|
||||
```
|
||||
@@ -1,102 +0,0 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Code originally from https://go-review.googlesource.com/c/crypto/+/218620
|
||||
|
||||
// TODO: replace with upstream once the above change is merged and released.
|
||||
|
||||
package format
|
||||
|
||||
import (
|
||||
"crypto"
|
||||
"crypto/ed25519"
|
||||
"crypto/rand"
|
||||
"encoding/binary"
|
||||
"encoding/pem"
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
const privateKeyAuthMagic = "openssh-key-v1\x00"
|
||||
|
||||
type openSSHEncryptedPrivateKey struct {
|
||||
CipherName string
|
||||
KDFName string
|
||||
KDFOptions string
|
||||
KeysCount uint32
|
||||
PubKey []byte
|
||||
KeyBlocks []byte
|
||||
}
|
||||
|
||||
type openSSHPrivateKey struct {
|
||||
Check1 uint32
|
||||
Check2 uint32
|
||||
Keytype string
|
||||
Rest []byte `ssh:"rest"`
|
||||
}
|
||||
|
||||
type openSSHEd25519PrivateKey struct {
|
||||
Pub []byte
|
||||
Priv []byte
|
||||
Comment string
|
||||
Pad []byte `ssh:"rest"`
|
||||
}
|
||||
|
||||
func OpenSSHPrivateKey(key crypto.PrivateKey, comment string) (*pem.Block, error) {
|
||||
var check uint32
|
||||
if err := binary.Read(rand.Reader, binary.BigEndian, &check); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var pk1 openSSHPrivateKey
|
||||
pk1.Check1 = check
|
||||
pk1.Check2 = check
|
||||
|
||||
var w openSSHEncryptedPrivateKey
|
||||
w.KeysCount = 1
|
||||
|
||||
if k, ok := key.(*ed25519.PrivateKey); ok {
|
||||
key = *k
|
||||
}
|
||||
|
||||
switch k := key.(type) {
|
||||
case ed25519.PrivateKey:
|
||||
pub, priv := k[32:], k
|
||||
key := openSSHEd25519PrivateKey{
|
||||
Pub: pub,
|
||||
Priv: priv,
|
||||
Comment: comment,
|
||||
}
|
||||
|
||||
pk1.Keytype = ssh.KeyAlgoED25519
|
||||
pk1.Rest = ssh.Marshal(key)
|
||||
|
||||
w.PubKey = ssh.Marshal(struct {
|
||||
KeyType string
|
||||
Pub []byte
|
||||
}{
|
||||
ssh.KeyAlgoED25519, pub,
|
||||
})
|
||||
default:
|
||||
return nil, fmt.Errorf("ssh: unknown key type %T", k)
|
||||
}
|
||||
|
||||
w.KeyBlocks = openSSHPadding(ssh.Marshal(pk1), 8)
|
||||
|
||||
w.CipherName, w.KDFName, w.KDFOptions = "none", "none", ""
|
||||
|
||||
return &pem.Block{
|
||||
Type: "OPENSSH PRIVATE KEY",
|
||||
Bytes: append([]byte(privateKeyAuthMagic), ssh.Marshal(w)...),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func openSSHPadding(block []byte, blocksize int) []byte {
|
||||
for i, j := 0, len(block); (j+i)%blocksize != 0; i++ {
|
||||
block = append(block, byte(i+1))
|
||||
}
|
||||
|
||||
return block
|
||||
}
|
||||
16
go.mod
16
go.mod
@@ -3,8 +3,10 @@ module github.com/jmorganca/ollama
|
||||
go 1.21
|
||||
|
||||
require (
|
||||
github.com/containerd/console v1.0.3
|
||||
github.com/emirpasic/gods v1.18.1
|
||||
github.com/gin-gonic/gin v1.9.1
|
||||
github.com/google/uuid v1.0.0
|
||||
github.com/olekukonko/tablewriter v0.0.5
|
||||
github.com/spf13/cobra v1.7.0
|
||||
github.com/stretchr/testify v1.8.4
|
||||
@@ -12,26 +14,12 @@ require (
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/containerd/console v1.0.3 // indirect
|
||||
github.com/cratonica/2goarray v0.0.0-20190331194516-514510793eaa // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/getlantern/context v0.0.0-20190109183933-c447772a6520 // indirect
|
||||
github.com/getlantern/errors v0.0.0-20190325191628-abdb3e3e36f7 // indirect
|
||||
github.com/getlantern/golog v0.0.0-20190830074920-4ef2e798c2d7 // indirect
|
||||
github.com/getlantern/hex v0.0.0-20190417191902-c6586a6fe0b7 // indirect
|
||||
github.com/getlantern/hidden v0.0.0-20190325191715-f02dbb02be55 // indirect
|
||||
github.com/getlantern/ops v0.0.0-20190325191751-d70cb0d6f85f // indirect
|
||||
github.com/getlantern/systray v1.2.2 // indirect
|
||||
github.com/go-stack/stack v1.8.0 // indirect
|
||||
github.com/google/uuid v1.0.0 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.14 // indirect
|
||||
github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c // indirect
|
||||
github.com/pborman/uuid v1.2.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/rivo/uniseg v0.2.0 // indirect
|
||||
)
|
||||
|
||||
|
||||
require (
|
||||
github.com/bytedance/sonic v1.9.1 // indirect
|
||||
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
|
||||
|
||||
29
go.sum
29
go.sum
@@ -7,8 +7,6 @@ github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583j
|
||||
github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
|
||||
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/cratonica/2goarray v0.0.0-20190331194516-514510793eaa h1:Wg+722vs7a2zQH5lR9QWYsVbplKeffaQFIs5FTdfNNo=
|
||||
github.com/cratonica/2goarray v0.0.0-20190331194516-514510793eaa/go.mod h1:6Arca19mRx58CA7OWEd7Wu1NpC1rd3uDnNs6s1pj/DI=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
@@ -17,20 +15,6 @@ github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc
|
||||
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
|
||||
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
|
||||
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
|
||||
github.com/getlantern/context v0.0.0-20190109183933-c447772a6520 h1:NRUJuo3v3WGC/g5YiyF790gut6oQr5f3FBI88Wv0dx4=
|
||||
github.com/getlantern/context v0.0.0-20190109183933-c447772a6520/go.mod h1:L+mq6/vvYHKjCX2oez0CgEAJmbq1fbb/oNJIWQkBybY=
|
||||
github.com/getlantern/errors v0.0.0-20190325191628-abdb3e3e36f7 h1:6uJ+sZ/e03gkbqZ0kUG6mfKoqDb4XMAzMIwlajq19So=
|
||||
github.com/getlantern/errors v0.0.0-20190325191628-abdb3e3e36f7/go.mod h1:l+xpFBrCtDLpK9qNjxs+cHU6+BAdlBaxHqikB6Lku3A=
|
||||
github.com/getlantern/golog v0.0.0-20190830074920-4ef2e798c2d7 h1:guBYzEaLz0Vfc/jv0czrr2z7qyzTOGC9hiQ0VC+hKjk=
|
||||
github.com/getlantern/golog v0.0.0-20190830074920-4ef2e798c2d7/go.mod h1:zx/1xUUeYPy3Pcmet8OSXLbF47l+3y6hIPpyLWoR9oc=
|
||||
github.com/getlantern/hex v0.0.0-20190417191902-c6586a6fe0b7 h1:micT5vkcr9tOVk1FiH8SWKID8ultN44Z+yzd2y/Vyb0=
|
||||
github.com/getlantern/hex v0.0.0-20190417191902-c6586a6fe0b7/go.mod h1:dD3CgOrwlzca8ed61CsZouQS5h5jIzkK9ZWrTcf0s+o=
|
||||
github.com/getlantern/hidden v0.0.0-20190325191715-f02dbb02be55 h1:XYzSdCbkzOC0FDNrgJqGRo8PCMFOBFL9py72DRs7bmc=
|
||||
github.com/getlantern/hidden v0.0.0-20190325191715-f02dbb02be55/go.mod h1:6mmzY2kW1TOOrVy+r41Za2MxXM+hhqTtY3oBKd2AgFA=
|
||||
github.com/getlantern/ops v0.0.0-20190325191751-d70cb0d6f85f h1:wrYrQttPS8FHIRSlsrcuKazukx/xqO/PpLZzZXsF+EA=
|
||||
github.com/getlantern/ops v0.0.0-20190325191751-d70cb0d6f85f/go.mod h1:D5ao98qkA6pxftxoqzibIBBrLSUli+kYnJqrgBf9cIA=
|
||||
github.com/getlantern/systray v1.2.2 h1:dCEHtfmvkJG7HZ8lS/sLklTH4RKUcIsKrAD9sThoEBE=
|
||||
github.com/getlantern/systray v1.2.2/go.mod h1:pXFOI1wwqwYXEhLPm9ZGjS2u/vVELeIgNMY5HvhHhcE=
|
||||
github.com/gin-contrib/cors v1.4.0 h1:oJ6gwtUl3lqV0WEIwM/LxPF1QZ5qe2lGWdY2+bz7y0g=
|
||||
github.com/gin-contrib/cors v1.4.0/go.mod h1:bs9pNM0x/UsmHPBWT2xZz9ROh8xYjYkiURUfmBoMlcs=
|
||||
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
|
||||
@@ -40,6 +24,7 @@ github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg=
|
||||
github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU=
|
||||
github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
||||
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||
github.com/go-playground/locales v0.14.0/go.mod h1:sawfccIbzZTqEDETgFXqTho0QybSa7l++s0DH+LDiLs=
|
||||
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
||||
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
|
||||
@@ -49,8 +34,6 @@ github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91
|
||||
github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos=
|
||||
github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js=
|
||||
github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU=
|
||||
github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk=
|
||||
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
|
||||
github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||
@@ -79,8 +62,6 @@ github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY=
|
||||
github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
|
||||
github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4=
|
||||
github.com/lxn/walk v0.0.0-20210112085537-c389da54e794/go.mod h1:E23UucZGqpuUANJooIbHWCufXvOcT6E7Stq81gU+CSQ=
|
||||
github.com/lxn/win v0.0.0-20210218163916-a377121e959e/go.mod h1:KxxjdtRkfNoYDCUP5ryK7XJJNTnpC8atvtmTheChOtk=
|
||||
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
|
||||
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
|
||||
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
@@ -94,12 +75,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
|
||||
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
|
||||
github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c h1:rp5dCmg/yLR3mgFuSOe4oEnDDmGLROTvMragMUXpTQw=
|
||||
github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c/go.mod h1:X07ZCGwUbLaax7L0S3Tw4hpejzu63ZrrQiUe6W0hcy0=
|
||||
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0=
|
||||
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y=
|
||||
github.com/pborman/uuid v1.2.1 h1:+ZZIw58t/ozdjRaXh/3awHfmWRbzYxJoAdNJxe/3pvw=
|
||||
github.com/pborman/uuid v1.2.1/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k=
|
||||
github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo=
|
||||
github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ=
|
||||
github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4=
|
||||
@@ -112,7 +89,6 @@ github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTE
|
||||
github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8=
|
||||
github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966/go.mod h1:sUM3LWHvSMaG192sy56D9F7CNvL7jUJVXoqM1QKLnog=
|
||||
github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I=
|
||||
github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0=
|
||||
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
|
||||
@@ -149,14 +125,12 @@ golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
|
||||
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
|
||||
golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
|
||||
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
|
||||
golang.org/x/sys v0.0.0-20201018230417-eeed37f84f13/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
|
||||
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
@@ -173,7 +147,6 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0
|
||||
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
|
||||
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
|
||||
google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
|
||||
gopkg.in/Knetic/govaluate.v3 v3.0.0/go.mod h1:csKLBORsPbafmSCGTEh3U7Ozmsuq8ZSIlKk1bcqph0E=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
//go:build darwin
|
||||
|
||||
package gpu
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -x objective-c
|
||||
#cgo LDFLAGS: -framework Foundation -framework CoreGraphics -framework Metal
|
||||
#include "gpu_info_darwin.h"
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"runtime"
|
||||
|
||||
"github.com/pbnjay/memory"
|
||||
)
|
||||
|
||||
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
|
||||
@@ -15,19 +17,8 @@ func CheckVRAM() (int64, error) {
|
||||
// gpu not supported, this may not be metal
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// on macOS, there's already buffer for available vram (see below) so just return the total
|
||||
systemMemory := int64(memory.TotalMemory())
|
||||
|
||||
// macOS limits how much memory is available to the GPU based on the amount of system memory
|
||||
// TODO: handle case where iogpu.wired_limit_mb is set to a higher value
|
||||
if systemMemory <= 36*1024*1024*1024 {
|
||||
systemMemory = systemMemory * 2 / 3
|
||||
} else {
|
||||
systemMemory = systemMemory * 3 / 4
|
||||
}
|
||||
|
||||
return systemMemory, nil
|
||||
recommendedMaxVRAM := int64(C.getRecommendedMaxVRAM())
|
||||
return recommendedMaxVRAM, nil
|
||||
}
|
||||
|
||||
func GetGPUInfo() GpuInfo {
|
||||
|
||||
@@ -156,7 +156,7 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
|
||||
}
|
||||
|
||||
LOG(h.verbose, "[%d] CUDA totalMem %ld\n", i, memInfo.total);
|
||||
LOG(h.verbose, "[%d] CUDA usedMem %ld\n", i, memInfo.free);
|
||||
LOG(h.verbose, "[%d] CUDA usedMem %ld\n", i, memInfo.used);
|
||||
|
||||
resp->total += memInfo.total;
|
||||
resp->free += memInfo.free;
|
||||
|
||||
3
gpu/gpu_info_darwin.h
Normal file
3
gpu/gpu_info_darwin.h
Normal file
@@ -0,0 +1,3 @@
|
||||
#import <Metal/Metal.h>
|
||||
#include <stdint.h>
|
||||
uint64_t getRecommendedMaxVRAM();
|
||||
11
gpu/gpu_info_darwin.m
Normal file
11
gpu/gpu_info_darwin.m
Normal file
@@ -0,0 +1,11 @@
|
||||
//go:build darwin
|
||||
#include "gpu_info_darwin.h"
|
||||
|
||||
uint64_t getRecommendedMaxVRAM()
|
||||
{
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
uint64_t result = device.recommendedMaxWorkingSetSize;
|
||||
CFRelease(device);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -106,7 +106,12 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
|
||||
sparams.memory_f16 = C.bool(opts.F16KV)
|
||||
sparams.use_mlock = C.bool(opts.UseMLock)
|
||||
sparams.use_mmap = C.bool(opts.UseMMap)
|
||||
sparams.numa = C.bool(opts.UseNUMA)
|
||||
|
||||
if opts.UseNUMA {
|
||||
sparams.numa = C.int(1)
|
||||
} else {
|
||||
sparams.numa = C.int(0)
|
||||
}
|
||||
|
||||
sparams.lora_adapters = nil
|
||||
for i := 0; i < len(adapters); i++ {
|
||||
@@ -194,7 +199,6 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
|
||||
request["grammar"] = jsonGrammar
|
||||
}
|
||||
|
||||
var whitespace int
|
||||
retryDelay := 100 * time.Microsecond
|
||||
for retries := 0; retries < maxRetries; retries++ {
|
||||
if retries > 0 {
|
||||
@@ -253,24 +257,6 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
|
||||
break out
|
||||
}
|
||||
|
||||
// detect if p.Content is entirely whitespace
|
||||
if predict.Format == "json" && strings.TrimSpace(p.Content) == "" {
|
||||
whitespace++
|
||||
|
||||
// if we get 100 consecutive whitespace responses, cancel
|
||||
if whitespace > 100 {
|
||||
slog.Debug("cancelling due to excessive whitespace")
|
||||
C.dyn_llama_server_completion_cancel(llm.s, resp.id, &resp)
|
||||
if resp.id < 0 {
|
||||
return extServerResponseToErr(resp)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
whitespace = 0
|
||||
}
|
||||
|
||||
if p.Content != "" {
|
||||
fn(PredictResult{
|
||||
Content: p.Content,
|
||||
|
||||
@@ -80,7 +80,7 @@ void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) {
|
||||
params.main_gpu = sparams->main_gpu;
|
||||
params.use_mlock = sparams->use_mlock;
|
||||
params.use_mmap = sparams->use_mmap;
|
||||
params.numa = sparams->numa;
|
||||
params.numa = (ggml_numa_strategy)sparams->numa;
|
||||
params.embedding = sparams->embedding;
|
||||
if (sparams->model != NULL) {
|
||||
params.model = sparams->model;
|
||||
@@ -111,7 +111,8 @@ void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) {
|
||||
}
|
||||
#endif
|
||||
|
||||
llama_backend_init(params.numa);
|
||||
llama_backend_init();
|
||||
llama_numa_init(params.numa);
|
||||
|
||||
// load the model
|
||||
if (!llama->load_model(params)) {
|
||||
@@ -145,9 +146,9 @@ void llama_server_start() {
|
||||
llama->queue_tasks.on_new_task(std::bind(
|
||||
&llama_server_context::process_single_task, llama, std::placeholders::_1));
|
||||
llama->queue_tasks.on_finish_multitask(std::bind(
|
||||
&llama_server_context::on_finish_multitask, llama, std::placeholders::_1));
|
||||
llama->queue_tasks.on_all_tasks_finished(std::bind(
|
||||
&llama_server_context::run_on_all_tasks_finished, llama));
|
||||
&llama_server_context::on_finish_multitask, llama, std::placeholders::_1));
|
||||
llama->queue_tasks.on_run_slots(std::bind(
|
||||
&llama_server_context::update_slots, llama));
|
||||
llama->queue_results.on_multitask_update(std::bind(
|
||||
&llama_server_queue::update_multitask,
|
||||
&llama->queue_tasks,
|
||||
@@ -208,7 +209,6 @@ void llama_server_completion(const char *json_req, ext_server_resp_t *resp) {
|
||||
void llama_server_completion_next_result(const int task_id,
|
||||
ext_server_task_result_t *resp) {
|
||||
assert(llama != NULL && resp != NULL);
|
||||
std::string msg;
|
||||
resp->id = -1;
|
||||
resp->stop = false;
|
||||
resp->error = false;
|
||||
|
||||
@@ -41,7 +41,7 @@ typedef struct ext_server_params {
|
||||
int32_t main_gpu; // the GPU that is used for scratch and small tensors
|
||||
bool use_mlock; // force system to keep model in RAM
|
||||
bool use_mmap; // use mmap if possible
|
||||
bool numa; // attempt optimizations that help on some NUMA systems
|
||||
int numa; // attempt optimizations that help on some NUMA systems
|
||||
bool embedding; // get only sentence embedding
|
||||
ext_server_lora_adapter_t *lora_adapters;
|
||||
char *mmproj;
|
||||
|
||||
@@ -101,7 +101,7 @@ compress_libs() {
|
||||
pids=""
|
||||
rm -rf ${BUILD_DIR}/lib/*.${LIB_EXT}*.gz
|
||||
for lib in ${BUILD_DIR}/lib/*.${LIB_EXT}* ; do
|
||||
gzip --best -f ${lib} &
|
||||
gzip -n --best -f ${lib} &
|
||||
pids+=" $!"
|
||||
done
|
||||
echo
|
||||
|
||||
@@ -154,8 +154,9 @@ apply_patches
|
||||
# -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
|
||||
# -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
|
||||
|
||||
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on", "-DLLAMA_NATIVE=off")
|
||||
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
|
||||
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu"
|
||||
write-host "Building LCD CPU"
|
||||
@@ -164,6 +165,7 @@ install
|
||||
sign
|
||||
compress_libs
|
||||
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx"
|
||||
write-host "Building AVX CPU"
|
||||
@@ -172,6 +174,7 @@ install
|
||||
sign
|
||||
compress_libs
|
||||
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
|
||||
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx2"
|
||||
write-host "Building AVX2 CPU"
|
||||
|
||||
15
llm/ggml.go
15
llm/ggml.go
@@ -31,6 +31,11 @@ const (
|
||||
fileTypeQ5_K_S
|
||||
fileTypeQ5_K_M
|
||||
fileTypeQ6_K
|
||||
fileTypeIQ2_XXS
|
||||
fileTypeIQ2_XS
|
||||
fileTypeQ2_K_S
|
||||
fileTypeQ3_K_XS
|
||||
fileTypeIQ3_XXS
|
||||
)
|
||||
|
||||
func fileType(fileType uint32) string {
|
||||
@@ -69,6 +74,16 @@ func fileType(fileType uint32) string {
|
||||
return "Q5_K_M"
|
||||
case fileTypeQ6_K:
|
||||
return "Q6_K"
|
||||
case fileTypeIQ2_XXS:
|
||||
return "IQ2_XXS"
|
||||
case fileTypeIQ2_XS:
|
||||
return "IQ2_XS"
|
||||
case fileTypeQ2_K_S:
|
||||
return "Q2_K_S"
|
||||
case fileTypeQ3_K_XS:
|
||||
return "Q3_K_XS"
|
||||
case fileTypeIQ3_XXS:
|
||||
return "IQ3_XXS"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
@@ -115,6 +115,14 @@ func (t tensor) typeSize() uint64 {
|
||||
return 2 + 2 + 12 + blockSize/8 + blockSize/2
|
||||
case 14: // Q6_K
|
||||
return blockSize/2 + blockSize/4 + blockSize/16 + 2
|
||||
case 15: // Q8_K
|
||||
return 2 + blockSize + 2*blockSize/16
|
||||
case 16: // IQ2_XXS
|
||||
return 2 + 2*blockSize/8
|
||||
case 17: // IQ2_XS
|
||||
return 2 + 2*blockSize/8 + blockSize/32
|
||||
case 18: // IQ3_XXS
|
||||
return 2 + 3*blockSize/8
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
|
||||
Submodule llm/llama.cpp updated: 6c00a06692...c29af7e225
@@ -1,9 +1,9 @@
|
||||
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
|
||||
index d86d7e04..2694e92e 100644
|
||||
index 2b2f4a0f..afac49af 100644
|
||||
--- a/examples/server/server.cpp
|
||||
+++ b/examples/server/server.cpp
|
||||
@@ -901,13 +901,15 @@ struct llama_server_context
|
||||
slot.sent_count += result.text_to_send.size();
|
||||
@@ -997,13 +997,15 @@ struct llama_server_context
|
||||
slot.n_sent_text += result.text_to_send.size();
|
||||
// add the token to slot queue and cache
|
||||
}
|
||||
- slot.add_token_string(result);
|
||||
|
||||
@@ -1,30 +1,29 @@
|
||||
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
|
||||
index 3102762c..568ac1d0 100644
|
||||
index 2b2f4a0f..25857bdd 100644
|
||||
--- a/examples/server/server.cpp
|
||||
+++ b/examples/server/server.cpp
|
||||
@@ -307,6 +307,10 @@ struct llama_client_slot
|
||||
}
|
||||
};
|
||||
@@ -31,6 +31,10 @@
|
||||
#include <atomic>
|
||||
#include <signal.h>
|
||||
|
||||
+#ifdef GGML_USE_CUBLAS
|
||||
+extern "C" GGML_CALL void ggml_free_cublas(void);
|
||||
+#endif
|
||||
+
|
||||
struct llama_server_context
|
||||
{
|
||||
llama_model *model = nullptr;
|
||||
@@ -353,6 +357,10 @@ struct llama_server_context
|
||||
using json = nlohmann::json;
|
||||
|
||||
struct server_params {
|
||||
@@ -363,6 +367,9 @@ struct llama_server_context
|
||||
llama_free_model(model);
|
||||
model = nullptr;
|
||||
}
|
||||
+#ifdef GGML_USE_CUBLAS
|
||||
+ ggml_free_cublas();
|
||||
+#endif
|
||||
+
|
||||
}
|
||||
|
||||
bool load_model(const gpt_params ¶ms_)
|
||||
@@ -3093,6 +3101,7 @@ int main(int argc, char **argv)
|
||||
@@ -3494,6 +3501,7 @@ int main(int argc, char **argv)
|
||||
sigemptyset (&sigint_action.sa_mask);
|
||||
sigint_action.sa_flags = 0;
|
||||
sigaction(SIGINT, &sigint_action, NULL);
|
||||
@@ -32,16 +31,11 @@ index 3102762c..568ac1d0 100644
|
||||
#elif defined (_WIN32)
|
||||
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
|
||||
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
|
||||
@@ -3106,3 +3115,4 @@ int main(int argc, char **argv)
|
||||
llama_backend_free();
|
||||
return 0;
|
||||
}
|
||||
+
|
||||
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
|
||||
index 96976f24..3543920e 100644
|
||||
index 0c6501e9..75c12723 100644
|
||||
--- a/ggml-cuda.cu
|
||||
+++ b/ggml-cuda.cu
|
||||
@@ -39,6 +39,7 @@
|
||||
@@ -43,6 +43,7 @@
|
||||
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
|
||||
#define cublasComputeType_t hipblasDatatype_t //deprecated, new hipblasComputeType_t not in 5.6
|
||||
#define cublasCreate hipblasCreate
|
||||
@@ -49,30 +43,30 @@ index 96976f24..3543920e 100644
|
||||
#define cublasGemmEx hipblasGemmEx
|
||||
#define cublasGemmBatchedEx hipblasGemmBatchedEx
|
||||
#define cublasGemmStridedBatchedEx hipblasGemmStridedBatchedEx
|
||||
@@ -7928,10 +7929,11 @@ GGML_CALL bool ggml_cublas_loaded(void) {
|
||||
@@ -8694,10 +8695,10 @@ GGML_CALL bool ggml_cublas_loaded(void) {
|
||||
return g_cublas_loaded;
|
||||
}
|
||||
|
||||
+static bool g_cublas_initialized = false;
|
||||
+
|
||||
GGML_CALL void ggml_init_cublas() {
|
||||
-GGML_CALL void ggml_init_cublas() {
|
||||
- static bool initialized = false;
|
||||
+static bool g_cublas_initialized = false;
|
||||
|
||||
- if (!initialized) {
|
||||
+GGML_CALL void ggml_init_cublas() {
|
||||
+ if (!g_cublas_initialized) {
|
||||
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
// Workaround for a rocBLAS bug when using multiple graphics cards:
|
||||
@@ -7941,7 +7943,7 @@ GGML_CALL void ggml_init_cublas() {
|
||||
@@ -8707,7 +8708,7 @@ GGML_CALL void ggml_init_cublas() {
|
||||
#endif
|
||||
|
||||
if (cudaGetDeviceCount(&g_device_count) != cudaSuccess) {
|
||||
- initialized = true;
|
||||
+ g_cublas_initialized = true;
|
||||
g_cublas_loaded = false;
|
||||
fprintf(stderr, "%s: no " GGML_CUDA_NAME " devices found, " GGML_CUDA_NAME " will be disabled\n", __func__);
|
||||
return;
|
||||
}
|
||||
@@ -8011,7 +8013,7 @@ GGML_CALL void ggml_init_cublas() {
|
||||
@@ -8778,7 +8779,7 @@ GGML_CALL void ggml_init_cublas() {
|
||||
// configure logging to stdout
|
||||
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
|
||||
|
||||
@@ -81,7 +75,7 @@ index 96976f24..3543920e 100644
|
||||
g_cublas_loaded = true;
|
||||
}
|
||||
}
|
||||
@@ -11528,3 +11530,17 @@ GGML_CALL int ggml_backend_cuda_reg_devices() {
|
||||
@@ -12345,3 +12346,22 @@ GGML_CALL int ggml_backend_cuda_reg_devices() {
|
||||
}
|
||||
return device_count;
|
||||
}
|
||||
@@ -89,28 +83,32 @@ index 96976f24..3543920e 100644
|
||||
+extern "C" GGML_CALL void ggml_free_cublas(void);
|
||||
+GGML_CALL void ggml_free_cublas(void) {
|
||||
+ for (int id = 0; id < g_device_count; ++id) {
|
||||
+#if !defined(GGML_USE_HIPBLAS)
|
||||
+ CU_CHECK(cuMemUnmap(g_cuda_pool_addr[id], g_cuda_pool_size[id]));
|
||||
+ g_cuda_pool_size[id] = 0;
|
||||
+ g_cuda_pool_addr[id] = 0;
|
||||
+#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__))
|
||||
+ if (g_device_caps[id].vmm) {
|
||||
+ CU_CHECK(cuMemUnmap(g_cuda_pool_addr[id], g_cuda_pool_size[id]));
|
||||
+ g_cuda_pool_size[id] = 0;
|
||||
+ g_cuda_pool_addr[id] = 0;
|
||||
+ }
|
||||
+#endif
|
||||
+ // TODO: free legacy non-vmm memory
|
||||
+ // destroy cublas handle
|
||||
+ CUBLAS_CHECK(cublasDestroy(g_cublas_handles[id]));
|
||||
+ g_cublas_handles[id] = nullptr;
|
||||
+ }
|
||||
+
|
||||
+ g_cublas_initialized = false;
|
||||
+}
|
||||
\ No newline at end of file
|
||||
diff --git a/ggml-cuda.h b/ggml-cuda.h
|
||||
index b1ebd61d..b4c80c2c 100644
|
||||
index b1ebd61d..6dd58ddf 100644
|
||||
--- a/ggml-cuda.h
|
||||
+++ b/ggml-cuda.h
|
||||
@@ -20,6 +20,9 @@ extern "C" {
|
||||
// Always success. To check if CUDA is actually loaded, use `ggml_cublas_loaded`.
|
||||
GGML_API GGML_CALL void ggml_init_cublas(void);
|
||||
@@ -23,6 +23,9 @@ GGML_API GGML_CALL void ggml_init_cublas(void);
|
||||
// Returns `true` if there are available CUDA devices and cublas loads successfully; otherwise, it returns `false`.
|
||||
GGML_API GGML_CALL bool ggml_cublas_loaded(void);
|
||||
|
||||
+// Release CUDA resources
|
||||
+GGML_API GGML_CALL void ggml_free_cublas(void);
|
||||
+
|
||||
// Returns `true` if there are available CUDA devices and cublas loads successfully; otherwise, it returns `false`.
|
||||
GGML_API GGML_CALL bool ggml_cublas_loaded(void);
|
||||
GGML_API GGML_CALL void * ggml_cuda_host_malloc(size_t size);
|
||||
GGML_API GGML_CALL void ggml_cuda_host_free(void * ptr);
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
|
||||
index a0b46970..7800c6e7 100644
|
||||
--- a/examples/server/server.cpp
|
||||
+++ b/examples/server/server.cpp
|
||||
@@ -28,6 +28,7 @@
|
||||
#include <chrono>
|
||||
#include <condition_variable>
|
||||
#include <atomic>
|
||||
+#include <signal.h>
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
@@ -2511,6 +2512,9 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con
|
||||
}
|
||||
}
|
||||
|
||||
+std::function<void(int)> shutdown_handler;
|
||||
+inline void signal_handler(int signal) { shutdown_handler(signal); }
|
||||
+
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
#if SERVER_VERBOSE != 1
|
||||
@@ -3128,8 +3132,25 @@ int main(int argc, char **argv)
|
||||
std::placeholders::_2,
|
||||
std::placeholders::_3
|
||||
));
|
||||
- llama.queue_tasks.start_loop();
|
||||
|
||||
+ shutdown_handler = [&](int) {
|
||||
+ llama.queue_tasks.terminate();
|
||||
+ };
|
||||
+
|
||||
+#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
||||
+ struct sigaction sigint_action;
|
||||
+ sigint_action.sa_handler = signal_handler;
|
||||
+ sigemptyset (&sigint_action.sa_mask);
|
||||
+ sigint_action.sa_flags = 0;
|
||||
+ sigaction(SIGINT, &sigint_action, NULL);
|
||||
+#elif defined (_WIN32)
|
||||
+ auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
|
||||
+ return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
|
||||
+ };
|
||||
+ SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
|
||||
+#endif
|
||||
+ llama.queue_tasks.start_loop();
|
||||
+ svr.stop();
|
||||
t.join();
|
||||
|
||||
llama_backend_free();
|
||||
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
|
||||
index 54854896..0ee670db 100644
|
||||
--- a/examples/server/utils.hpp
|
||||
+++ b/examples/server/utils.hpp
|
||||
@@ -220,6 +220,7 @@ inline std::string format_chatml(std::vector<json> messages)
|
||||
struct llama_server_queue {
|
||||
int id = 0;
|
||||
std::mutex mutex_tasks;
|
||||
+ bool running;
|
||||
// queues
|
||||
std::vector<task_server> queue_tasks;
|
||||
std::vector<task_server> queue_tasks_deferred;
|
||||
@@ -278,9 +279,18 @@ struct llama_server_queue {
|
||||
queue_tasks_deferred.clear();
|
||||
}
|
||||
|
||||
- // Start the main loop. This call is blocking
|
||||
- [[noreturn]]
|
||||
+ // end the start_loop routine
|
||||
+ void terminate() {
|
||||
+ {
|
||||
+ std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||
+ running = false;
|
||||
+ }
|
||||
+ condition_tasks.notify_all();
|
||||
+ }
|
||||
+
|
||||
+ // Start the main loop.
|
||||
void start_loop() {
|
||||
+ running = true;
|
||||
while (true) {
|
||||
// new task arrived
|
||||
LOG_VERBOSE("have new task", {});
|
||||
@@ -324,8 +334,12 @@ struct llama_server_queue {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||
if (queue_tasks.empty()) {
|
||||
+ if (!running) {
|
||||
+ LOG_VERBOSE("ending start_loop", {});
|
||||
+ return;
|
||||
+ }
|
||||
condition_tasks.wait(lock, [&]{
|
||||
- return !queue_tasks.empty();
|
||||
+ return (!queue_tasks.empty() || !running);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -5,13 +5,15 @@ set -eu
|
||||
export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
|
||||
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
|
||||
|
||||
IMAGE_NAME=${IMAGE_NAME:-"ollama/ollama"}
|
||||
BUILD_PLATFORM=${BUILD_PLATFORM:-"linux/arm64,linux/amd64"}
|
||||
docker build \
|
||||
--load \
|
||||
--platform=linux/arm64,linux/amd64 \
|
||||
--platform=${BUILD_PLATFORM} \
|
||||
--build-arg=VERSION \
|
||||
--build-arg=GOFLAGS \
|
||||
-f Dockerfile \
|
||||
-t ollama/ollama:$VERSION \
|
||||
-t ${IMAGE_NAME}:$VERSION \
|
||||
.
|
||||
|
||||
docker build \
|
||||
@@ -21,5 +23,12 @@ docker build \
|
||||
--build-arg=GOFLAGS \
|
||||
--target runtime-rocm \
|
||||
-f Dockerfile \
|
||||
-t ollama/ollama:$VERSION-rocm \
|
||||
-t ${IMAGE_NAME}:$VERSION-rocm \
|
||||
.
|
||||
|
||||
docker tag ${IMAGE_NAME}:$VERSION ${IMAGE_NAME}:latest
|
||||
docker tag ${IMAGE_NAME}:$VERSION-rocm ${IMAGE_NAME}:rocm
|
||||
|
||||
echo "To release, run:"
|
||||
echo " docker push ${IMAGE_NAME}:$VERSION && docker push ${IMAGE_NAME}:latest"
|
||||
echo " docker push ${IMAGE_NAME}:$VERSION-rocm && docker push ${IMAGE_NAME}:rocm"
|
||||
@@ -53,13 +53,14 @@ function buildOllama() {
|
||||
write-host "Building ollama CLI"
|
||||
& go generate ./...
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& go build "-ldflags=-w -s ""-X=github.com/jmorganca/ollama/version.Version=$script:VERSION"" ""-X=github.com/jmorganca/ollama/server.mode=release""" .
|
||||
& go build -ldflags "-s -w -X=github.com/jmorganca/ollama/version.Version=$script:VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
if ("${env:KEY_CONTAINER}") {
|
||||
& "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" `
|
||||
/csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
}
|
||||
New-Item -ItemType Directory -Path .\dist -Force
|
||||
cp .\ollama.exe .\dist\ollama-windows-amd64.exe
|
||||
}
|
||||
|
||||
@@ -67,7 +68,7 @@ function buildApp() {
|
||||
write-host "Building Ollama App"
|
||||
cd "${script:SRC_DIR}\app"
|
||||
& windres -l 0 -o ollama.syso ollama.rc
|
||||
& go build "-ldflags=-H windowsgui -w -s ""-X=github.com/jmorganca/ollama/version.Version=$script:VERSION"" ""-X=github.com/jmorganca/ollama/server.mode=release""" .
|
||||
& go build -ldflags "-s -w -H windowsgui -X=github.com/jmorganca/ollama/version.Version=$script:VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
if ("${env:KEY_CONTAINER}") {
|
||||
& "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" `
|
||||
@@ -129,4 +130,4 @@ try {
|
||||
} finally {
|
||||
set-location $script:SRC_DIR
|
||||
$env:PKG_VERSION=""
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,7 +72,7 @@ $SUDO install -o0 -g0 -m755 -d $BINDIR
|
||||
$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $BINDIR/ollama
|
||||
|
||||
install_success() {
|
||||
status 'The Ollama API is now available at 0.0.0.0:11434.'
|
||||
status 'The Ollama API is now available at 127.0.0.1:11434.'
|
||||
status 'Install complete. Run "ollama" from the command line.'
|
||||
}
|
||||
trap install_success EXIT
|
||||
@@ -88,6 +88,10 @@ configure_systemd() {
|
||||
status "Adding ollama user to render group..."
|
||||
$SUDO usermod -a -G render ollama
|
||||
fi
|
||||
if getent group video >/dev/null 2>&1; then
|
||||
status "Adding ollama user to video group..."
|
||||
$SUDO usermod -a -G video ollama
|
||||
fi
|
||||
|
||||
status "Adding current user to ollama group..."
|
||||
$SUDO usermod -a -G ollama $(whoami)
|
||||
|
||||
@@ -52,6 +52,10 @@ type Model struct {
|
||||
Messages []Message
|
||||
}
|
||||
|
||||
func (m *Model) IsEmbedding() bool {
|
||||
return slices.Contains(m.Config.ModelFamilies, "bert") || slices.Contains(m.Config.ModelFamilies, "nomic-bert")
|
||||
}
|
||||
|
||||
type Message struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
@@ -1103,18 +1107,7 @@ func makeRequest(ctx context.Context, method string, requestURL *url.URL, header
|
||||
req.ContentLength = contentLength
|
||||
}
|
||||
|
||||
proxyURL, err := http.ProxyFromEnvironment(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client := http.Client{
|
||||
Transport: &http.Transport{
|
||||
Proxy: http.ProxyURL(proxyURL),
|
||||
},
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -121,13 +121,15 @@ func ChatPrompt(tmpl string, messages []api.Message, window int, encode func(str
|
||||
p = prompt{}
|
||||
}
|
||||
|
||||
p.Prompt = msg.Content
|
||||
|
||||
var sb strings.Builder
|
||||
for range msg.Images {
|
||||
p.Prompt += fmt.Sprintf(" [img-%d]", imgId)
|
||||
fmt.Fprintf(&sb, "[img-%d] ", imgId)
|
||||
p.images = append(p.images, imgId)
|
||||
imgId += 1
|
||||
}
|
||||
|
||||
sb.WriteString(msg.Content)
|
||||
p.Prompt = sb.String()
|
||||
case "assistant":
|
||||
if p.Response != "" {
|
||||
prompts = append(prompts, p)
|
||||
|
||||
@@ -155,7 +155,7 @@ func TestChatPrompt(t *testing.T) {
|
||||
{Role: "user", Content: "Hello", Images: []api.ImageData{[]byte("base64")}},
|
||||
},
|
||||
window: 1024,
|
||||
want: "You are a Wizard. Hello [img-0]",
|
||||
want: "You are a Wizard. [img-0] Hello",
|
||||
},
|
||||
{
|
||||
name: "images truncated",
|
||||
@@ -165,7 +165,7 @@ func TestChatPrompt(t *testing.T) {
|
||||
{Role: "user", Content: "Hello", Images: []api.ImageData{[]byte("img1"), []byte("img2")}},
|
||||
},
|
||||
window: 1024,
|
||||
want: "You are a Wizard. Hello [img-1]",
|
||||
want: "You are a Wizard. [img-0] [img-1] Hello",
|
||||
},
|
||||
{
|
||||
name: "empty list",
|
||||
@@ -198,7 +198,7 @@ func TestChatPrompt(t *testing.T) {
|
||||
}
|
||||
|
||||
if got != tc.want {
|
||||
t.Errorf("got = %v, want %v", got, tc.want)
|
||||
t.Errorf("got: %q, want: %q", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -191,6 +191,11 @@ func GenerateHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if model.IsEmbedding() {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "embedding models do not support generate"})
|
||||
return
|
||||
}
|
||||
|
||||
opts, err := modelOptions(model, req.Options)
|
||||
if err != nil {
|
||||
if errors.Is(err, api.ErrInvalidOpts) {
|
||||
@@ -245,6 +250,19 @@ func GenerateHandler(c *gin.Context) {
|
||||
slog.Debug("generate handler", "system", req.System)
|
||||
|
||||
var sb strings.Builder
|
||||
for i := range req.Images {
|
||||
fmt.Fprintf(&sb, "[img-%d] ", i)
|
||||
}
|
||||
|
||||
sb.WriteString(req.Prompt)
|
||||
|
||||
p, err := Prompt(req.Template, req.System, sb.String(), "", true)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
sb.Reset()
|
||||
if req.Context != nil {
|
||||
prev, err := loaded.runner.Decode(c.Request.Context(), req.Context)
|
||||
if err != nil {
|
||||
@@ -255,18 +273,6 @@ func GenerateHandler(c *gin.Context) {
|
||||
sb.WriteString(prev)
|
||||
}
|
||||
|
||||
// write image tags
|
||||
// TODO: limit the number of images to fit in the context similar to the chat endpoint
|
||||
for i := range req.Images {
|
||||
req.Prompt += fmt.Sprintf(" [img-%d]", i)
|
||||
}
|
||||
|
||||
p, err := Prompt(req.Template, req.System, req.Prompt, "", true)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
sb.WriteString(p)
|
||||
|
||||
prompt = sb.String()
|
||||
@@ -379,7 +385,7 @@ func GenerateHandler(c *gin.Context) {
|
||||
streamResponse(c, ch)
|
||||
}
|
||||
|
||||
func EmbeddingHandler(c *gin.Context) {
|
||||
func EmbeddingsHandler(c *gin.Context) {
|
||||
loaded.mu.Lock()
|
||||
defer loaded.mu.Unlock()
|
||||
|
||||
@@ -432,8 +438,9 @@ func EmbeddingHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if !loaded.Options.EmbeddingOnly {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "embedding option must be set to true"})
|
||||
// an empty request loads the model
|
||||
if req.Prompt == "" {
|
||||
c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: []float64{}})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -942,7 +949,7 @@ func (s *Server) GenerateRoutes() http.Handler {
|
||||
r.POST("/api/pull", PullModelHandler)
|
||||
r.POST("/api/generate", GenerateHandler)
|
||||
r.POST("/api/chat", ChatHandler)
|
||||
r.POST("/api/embeddings", EmbeddingHandler)
|
||||
r.POST("/api/embeddings", EmbeddingsHandler)
|
||||
r.POST("/api/create", CreateModelHandler)
|
||||
r.POST("/api/push", PushModelHandler)
|
||||
r.POST("/api/copy", CopyModelHandler)
|
||||
@@ -1143,6 +1150,11 @@ func ChatHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if model.IsEmbedding() {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "embedding models do not support chat"})
|
||||
return
|
||||
}
|
||||
|
||||
opts, err := modelOptions(model, req.Options)
|
||||
if err != nil {
|
||||
if errors.Is(err, api.ErrInvalidOpts) {
|
||||
|
||||
@@ -12,7 +12,6 @@ import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
@@ -177,16 +176,14 @@ func (b *blobUpload) Run(ctx context.Context, opts *registryOptions) {
|
||||
requestURL := <-b.nextURL
|
||||
|
||||
// calculate md5 checksum and add it to the commit request
|
||||
var sb strings.Builder
|
||||
md5sum := md5.New()
|
||||
for _, part := range b.Parts {
|
||||
sb.Write(part.Sum(nil))
|
||||
md5sum.Write(part.Sum(nil))
|
||||
}
|
||||
|
||||
md5sum := md5.Sum([]byte(sb.String()))
|
||||
|
||||
values := requestURL.Query()
|
||||
values.Add("digest", b.Digest)
|
||||
values.Add("etag", fmt.Sprintf("%x-%d", md5sum, len(b.Parts)))
|
||||
values.Add("etag", fmt.Sprintf("%x-%d", md5sum.Sum(nil), len(b.Parts)))
|
||||
requestURL.RawQuery = values.Encode()
|
||||
|
||||
headers := make(http.Header)
|
||||
|
||||
Reference in New Issue
Block a user