Compare commits
11 Commits
v0.5.8-rc1
...
v0.5.8-rc1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f4711da7bd | ||
|
|
38117fba83 | ||
|
|
1f766c36fb | ||
|
|
484a99e428 | ||
|
|
ec6121c331 | ||
|
|
b86c0a1500 | ||
|
|
7e402ebb8c | ||
|
|
b901a712c6 | ||
|
|
abb8dd57f8 | ||
|
|
a400df48c0 | ||
|
|
6ab4ba4c26 |
2
.github/workflows/release.yaml
vendored
2
.github/workflows/release.yaml
vendored
@@ -242,7 +242,7 @@ jobs:
|
||||
dist\${{ matrix.os }}-${{ matrix.arch }}-app.exe
|
||||
|
||||
windows-sign:
|
||||
runs-on: windows
|
||||
runs-on: windows-2022
|
||||
environment: release
|
||||
needs: [windows-depends, windows-build]
|
||||
steps:
|
||||
|
||||
@@ -85,6 +85,11 @@ if(CMAKE_CUDA_COMPILER)
|
||||
)
|
||||
endif()
|
||||
|
||||
set(WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX "^gfx(906|908|90a):xnack[+-]$"
|
||||
CACHE STRING
|
||||
"Regular expression describing AMDGPU_TARGETS not supported on Windows. Override to force building these targets. Default \"^gfx(906|908|90a):xnack[+-]$\"."
|
||||
)
|
||||
|
||||
check_language(HIP)
|
||||
if(CMAKE_HIP_COMPILER)
|
||||
set(HIP_PLATFORM "amd")
|
||||
@@ -92,6 +97,8 @@ if(CMAKE_HIP_COMPILER)
|
||||
find_package(hip REQUIRED)
|
||||
if(NOT AMDGPU_TARGETS)
|
||||
list(FILTER AMDGPU_TARGETS INCLUDE REGEX "^gfx(900|94[012]|101[02]|1030|110[012])$")
|
||||
elseif(WIN32 AND WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX)
|
||||
list(FILTER AMDGPU_TARGETS EXCLUDE REGEX ${WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX})
|
||||
endif()
|
||||
|
||||
if(AMDGPU_TARGETS)
|
||||
|
||||
@@ -56,7 +56,7 @@
|
||||
"name": "ROCm 6",
|
||||
"inherits": [ "ROCm" ],
|
||||
"cacheVariables": {
|
||||
"AMDGPU_TARGETS": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
|
||||
"AMDGPU_TARGETS": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-"
|
||||
}
|
||||
}
|
||||
],
|
||||
|
||||
51
README.md
51
README.md
@@ -18,7 +18,7 @@ Get up and running with large language models.
|
||||
|
||||
### Linux
|
||||
|
||||
```
|
||||
```shell
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
```
|
||||
|
||||
@@ -42,7 +42,7 @@ The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `olla
|
||||
|
||||
To run and chat with [Llama 3.2](https://ollama.com/library/llama3.2):
|
||||
|
||||
```
|
||||
```shell
|
||||
ollama run llama3.2
|
||||
```
|
||||
|
||||
@@ -54,6 +54,8 @@ Here are some example models that can be downloaded:
|
||||
|
||||
| Model | Parameters | Size | Download |
|
||||
| ------------------ | ---------- | ----- | -------------------------------- |
|
||||
| DeepSeek-R1 | 7B | 4.7GB | `ollama run deepseek-r1` |
|
||||
| DeepSeek-R1 | 671B | 404GB | `ollama run deepseek-r1:671b` |
|
||||
| Llama 3.3 | 70B | 43GB | `ollama run llama3.3` |
|
||||
| Llama 3.2 | 3B | 2.0GB | `ollama run llama3.2` |
|
||||
| Llama 3.2 | 1B | 1.3GB | `ollama run llama3.2:1b` |
|
||||
@@ -92,13 +94,13 @@ Ollama supports importing GGUF models in the Modelfile:
|
||||
|
||||
2. Create the model in Ollama
|
||||
|
||||
```
|
||||
```shell
|
||||
ollama create example -f Modelfile
|
||||
```
|
||||
|
||||
3. Run the model
|
||||
|
||||
```
|
||||
```shell
|
||||
ollama run example
|
||||
```
|
||||
|
||||
@@ -110,7 +112,7 @@ See the [guide](docs/import.md) on importing models for more information.
|
||||
|
||||
Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.2` model:
|
||||
|
||||
```
|
||||
```shell
|
||||
ollama pull llama3.2
|
||||
```
|
||||
|
||||
@@ -145,13 +147,13 @@ For more information on working with a Modelfile, see the [Modelfile](docs/model
|
||||
|
||||
`ollama create` is used to create a model from a Modelfile.
|
||||
|
||||
```
|
||||
```shell
|
||||
ollama create mymodel -f ./Modelfile
|
||||
```
|
||||
|
||||
### Pull a model
|
||||
|
||||
```
|
||||
```shell
|
||||
ollama pull llama3.2
|
||||
```
|
||||
|
||||
@@ -159,13 +161,13 @@ ollama pull llama3.2
|
||||
|
||||
### Remove a model
|
||||
|
||||
```
|
||||
```shell
|
||||
ollama rm llama3.2
|
||||
```
|
||||
|
||||
### Copy a model
|
||||
|
||||
```
|
||||
```shell
|
||||
ollama cp llama3.2 my-model
|
||||
```
|
||||
|
||||
@@ -184,37 +186,39 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol
|
||||
|
||||
```
|
||||
ollama run llava "What's in this image? /Users/jmorgan/Desktop/smile.png"
|
||||
The image features a yellow smiley face, which is likely the central focus of the picture.
|
||||
```
|
||||
|
||||
> **Output**: The image features a yellow smiley face, which is likely the central focus of the picture.
|
||||
|
||||
### Pass the prompt as an argument
|
||||
|
||||
```shell
|
||||
ollama run llama3.2 "Summarize this file: $(cat README.md)"
|
||||
```
|
||||
$ ollama run llama3.2 "Summarize this file: $(cat README.md)"
|
||||
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
|
||||
```
|
||||
|
||||
> **Output**: Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
|
||||
|
||||
### Show model information
|
||||
|
||||
```
|
||||
```shell
|
||||
ollama show llama3.2
|
||||
```
|
||||
|
||||
### List models on your computer
|
||||
|
||||
```
|
||||
```shell
|
||||
ollama list
|
||||
```
|
||||
|
||||
### List which models are currently loaded
|
||||
|
||||
```
|
||||
```shell
|
||||
ollama ps
|
||||
```
|
||||
|
||||
### Stop a model which is currently running
|
||||
|
||||
```
|
||||
```shell
|
||||
ollama stop llama3.2
|
||||
```
|
||||
|
||||
@@ -230,13 +234,13 @@ See the [developer guide](https://github.com/ollama/ollama/blob/main/docs/develo
|
||||
|
||||
Next, start the server:
|
||||
|
||||
```
|
||||
```shell
|
||||
./ollama serve
|
||||
```
|
||||
|
||||
Finally, in a separate shell, run a model:
|
||||
|
||||
```
|
||||
```shell
|
||||
./ollama run llama3.2
|
||||
```
|
||||
|
||||
@@ -246,7 +250,7 @@ Ollama has a REST API for running and managing models.
|
||||
|
||||
### Generate a response
|
||||
|
||||
```
|
||||
```shell
|
||||
curl http://localhost:11434/api/generate -d '{
|
||||
"model": "llama3.2",
|
||||
"prompt":"Why is the sky blue?"
|
||||
@@ -255,7 +259,7 @@ curl http://localhost:11434/api/generate -d '{
|
||||
|
||||
### Chat with a model
|
||||
|
||||
```
|
||||
```shell
|
||||
curl http://localhost:11434/api/chat -d '{
|
||||
"model": "llama3.2",
|
||||
"messages": [
|
||||
@@ -353,6 +357,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Web management](https://github.com/lemonit-eric-mao/ollama-web-management) (Web management page)
|
||||
- [Promptery](https://github.com/promptery/promptery) (desktop client for Ollama.)
|
||||
- [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama)
|
||||
- [chat-ollama](https://github.com/annilq/chat-ollama) (a React Native client for Ollama)
|
||||
- [SpaceLlama](https://github.com/tcsenpai/spacellama) (Firefox and Chrome extension to quickly summarize web pages with ollama in a sidebar)
|
||||
- [YouLama](https://github.com/tcsenpai/youlama) (Webapp to quickly summarize any YouTube video, supporting Invidious as well)
|
||||
- [DualMind](https://github.com/tcsenpai/dualmind) (Experimental app allowing two models to talk to each other in the terminal or in a web interface)
|
||||
@@ -374,6 +379,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [MinimalNextOllamaChat](https://github.com/anilkay/MinimalNextOllamaChat) (Minimal Web UI for Chat and Model Control)
|
||||
- [Chipper](https://github.com/TilmanGriesel/chipper) AI interface for tinkerers (Ollama, Haystack RAG, Python)
|
||||
- [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)
|
||||
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
|
||||
|
||||
### Cloud
|
||||
|
||||
@@ -487,6 +493,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Ollama for Haskell](https://github.com/tusharad/ollama-haskell)
|
||||
- [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in unified API)
|
||||
- [LlmTornado](https://github.com/lofcz/llmtornado) (C# library providing a unified interface for major FOSS & Commercial inference APIs)
|
||||
- [Ollama for Zig](https://github.com/dravenk/ollama-zig)
|
||||
|
||||
### Mobile
|
||||
|
||||
@@ -544,7 +551,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
|
||||
|
||||
### Observability
|
||||
|
||||
- [Lunary](https://lunary.ai/docs/integrations/ollama) is the leading open-source LLM observability platform. It provides a variety of enterprise-grade features such as real-time analytics, prompt templates management, PII masking, and comprehensive agent tracing.
|
||||
- [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics.
|
||||
- [HoneyHive](https://docs.honeyhive.ai/integrations/ollama) is an AI observability and evaluation platform for AI agents. Use HoneyHive to evaluate agent performance, interrogate failures, and monitor quality in production.
|
||||
- [Langfuse](https://langfuse.com/docs/integrations/ollama) is an open source LLM observability platform that enables teams to collaboratively monitor, evaluate and debug AI applications.
|
||||
|
||||
@@ -2,9 +2,10 @@
|
||||
|
||||
Run the examples in this directory with:
|
||||
|
||||
```
|
||||
```shell
|
||||
go run example_name/main.go
|
||||
```
|
||||
|
||||
## Chat - Chat with a model
|
||||
- [chat/main.go](chat/main.go)
|
||||
|
||||
|
||||
@@ -17,6 +17,6 @@ If you want to build the installer, youll need to install
|
||||
In the top directory of this repo, run the following powershell script
|
||||
to build the ollama CLI, ollama app, and ollama installer.
|
||||
|
||||
```
|
||||
```powershell
|
||||
powershell -ExecutionPolicy Bypass -File .\scripts\build_windows.ps1
|
||||
```
|
||||
|
||||
33
docs/api.md
33
docs/api.md
@@ -31,7 +31,7 @@ Certain endpoints stream responses as JSON objects. Streaming can be disabled by
|
||||
|
||||
## Generate a completion
|
||||
|
||||
```shell
|
||||
```
|
||||
POST /api/generate
|
||||
```
|
||||
|
||||
@@ -485,7 +485,7 @@ A single JSON object is returned:
|
||||
|
||||
## Generate a chat completion
|
||||
|
||||
```shell
|
||||
```
|
||||
POST /api/chat
|
||||
```
|
||||
|
||||
@@ -878,6 +878,7 @@ curl http://localhost:11434/api/chat -d '{
|
||||
```
|
||||
|
||||
##### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "llama3.2",
|
||||
@@ -924,7 +925,7 @@ A single JSON object is returned:
|
||||
|
||||
## Create a Model
|
||||
|
||||
```shell
|
||||
```
|
||||
POST /api/create
|
||||
```
|
||||
|
||||
@@ -1020,7 +1021,7 @@ curl http://localhost:11434/api/create -d '{
|
||||
|
||||
A stream of JSON objects is returned:
|
||||
|
||||
```
|
||||
```json
|
||||
{"status":"quantizing F16 model to Q4_K_M"}
|
||||
{"status":"creating new layer sha256:667b0c1932bc6ffc593ed1d03f895bf2dc8dc6df21db3042284a6f4416b06a29"}
|
||||
{"status":"using existing layer sha256:11ce4ee3e170f6adebac9a991c22e22ab3f8530e154ee669954c4bc73061c258"}
|
||||
@@ -1051,7 +1052,7 @@ curl http://localhost:11434/api/create -d '{
|
||||
|
||||
A stream of JSON objects is returned:
|
||||
|
||||
```
|
||||
```json
|
||||
{"status":"parsing GGUF"}
|
||||
{"status":"using existing layer sha256:432f310a77f4650a88d0fd59ecdd7cebed8d684bafea53cbff0473542964f0c3"}
|
||||
{"status":"writing manifest"}
|
||||
@@ -1118,7 +1119,7 @@ Return 200 OK if the blob exists, 404 Not Found if it does not.
|
||||
|
||||
## Push a Blob
|
||||
|
||||
```shell
|
||||
```
|
||||
POST /api/blobs/:digest
|
||||
```
|
||||
|
||||
@@ -1142,7 +1143,7 @@ Return 201 Created if the blob was successfully created, 400 Bad Request if the
|
||||
|
||||
## List Local Models
|
||||
|
||||
```shell
|
||||
```
|
||||
GET /api/tags
|
||||
```
|
||||
|
||||
@@ -1195,7 +1196,7 @@ A single JSON object will be returned.
|
||||
|
||||
## Show Model Information
|
||||
|
||||
```shell
|
||||
```
|
||||
POST /api/show
|
||||
```
|
||||
|
||||
@@ -1261,7 +1262,7 @@ curl http://localhost:11434/api/show -d '{
|
||||
|
||||
## Copy a Model
|
||||
|
||||
```shell
|
||||
```
|
||||
POST /api/copy
|
||||
```
|
||||
|
||||
@@ -1284,7 +1285,7 @@ Returns a 200 OK if successful, or a 404 Not Found if the source model doesn't e
|
||||
|
||||
## Delete a Model
|
||||
|
||||
```shell
|
||||
```
|
||||
DELETE /api/delete
|
||||
```
|
||||
|
||||
@@ -1310,7 +1311,7 @@ Returns a 200 OK if successful, 404 Not Found if the model to be deleted doesn't
|
||||
|
||||
## Pull a Model
|
||||
|
||||
```shell
|
||||
```
|
||||
POST /api/pull
|
||||
```
|
||||
|
||||
@@ -1382,7 +1383,7 @@ if `stream` is set to false, then the response is a single JSON object:
|
||||
|
||||
## Push a Model
|
||||
|
||||
```shell
|
||||
```
|
||||
POST /api/push
|
||||
```
|
||||
|
||||
@@ -1447,7 +1448,7 @@ If `stream` is set to `false`, then the response is a single JSON object:
|
||||
|
||||
## Generate Embeddings
|
||||
|
||||
```shell
|
||||
```
|
||||
POST /api/embed
|
||||
```
|
||||
|
||||
@@ -1515,7 +1516,7 @@ curl http://localhost:11434/api/embed -d '{
|
||||
```
|
||||
|
||||
## List Running Models
|
||||
```shell
|
||||
```
|
||||
GET /api/ps
|
||||
```
|
||||
|
||||
@@ -1562,7 +1563,7 @@ A single JSON object will be returned.
|
||||
|
||||
> Note: this endpoint has been superseded by `/api/embed`
|
||||
|
||||
```shell
|
||||
```
|
||||
POST /api/embeddings
|
||||
```
|
||||
|
||||
@@ -1602,7 +1603,7 @@ curl http://localhost:11434/api/embeddings -d '{
|
||||
|
||||
## Version
|
||||
|
||||
```shell
|
||||
```
|
||||
GET /api/version
|
||||
```
|
||||
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
Install prerequisites:
|
||||
|
||||
- [Go](https://go.dev/doc/install)
|
||||
- C/C++ Compiler e.g. Clang on macOS, [TDM-GCC](https://jmeubank.github.io/tdm-gcc/download/) (Windows amd64) or [llvm-mingw](https://github.com/mstorsjo/llvm-mingw) (Windows arm64), GCC/Clang on Linux.
|
||||
- C/C++ Compiler e.g. Clang on macOS, [TDM-GCC](https://github.com/jmeubank/tdm-gcc/releases/latest) (Windows amd64) or [llvm-mingw](https://github.com/mstorsjo/llvm-mingw) (Windows arm64), GCC/Clang on Linux.
|
||||
|
||||
Then build and run Ollama from the root directory of the repository:
|
||||
|
||||
```
|
||||
```shell
|
||||
go run . serve
|
||||
```
|
||||
|
||||
@@ -23,14 +23,14 @@ Install prerequisites:
|
||||
|
||||
Then, configure and build the project:
|
||||
|
||||
```
|
||||
```shell
|
||||
cmake -B build
|
||||
cmake --build build
|
||||
```
|
||||
|
||||
Lastly, run Ollama:
|
||||
|
||||
```
|
||||
```shell
|
||||
go run . serve
|
||||
```
|
||||
|
||||
@@ -57,14 +57,14 @@ Install prerequisites:
|
||||
|
||||
Then, configure and build the project:
|
||||
|
||||
```
|
||||
```shell
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
```
|
||||
|
||||
Lastly, run Ollama:
|
||||
|
||||
```
|
||||
```shell
|
||||
go run . serve
|
||||
```
|
||||
|
||||
@@ -88,26 +88,26 @@ Install prerequisites:
|
||||
|
||||
Then, configure and build the project:
|
||||
|
||||
```
|
||||
```shell
|
||||
cmake -B build
|
||||
cmake --build build
|
||||
```
|
||||
|
||||
Lastly, run Ollama:
|
||||
|
||||
```
|
||||
```shell
|
||||
go run . serve
|
||||
```
|
||||
|
||||
## Docker
|
||||
|
||||
```
|
||||
```shell
|
||||
docker build .
|
||||
```
|
||||
|
||||
### ROCm
|
||||
|
||||
```
|
||||
```shell
|
||||
docker build --build-arg FLAVOR=rocm .
|
||||
```
|
||||
|
||||
@@ -115,7 +115,7 @@ docker build --build-arg FLAVOR=rocm .
|
||||
|
||||
To run tests, use `go test`:
|
||||
|
||||
```
|
||||
```shell
|
||||
go test ./...
|
||||
```
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
### CPU only
|
||||
|
||||
```bash
|
||||
```shell
|
||||
docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
||||
```
|
||||
|
||||
@@ -11,42 +11,46 @@ Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-
|
||||
|
||||
#### Install with Apt
|
||||
1. Configure the repository
|
||||
```bash
|
||||
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
|
||||
| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
|
||||
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
|
||||
| sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||
sudo apt-get update
|
||||
```
|
||||
|
||||
```shell
|
||||
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
|
||||
| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
|
||||
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
|
||||
| sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||
sudo apt-get update
|
||||
```
|
||||
|
||||
2. Install the NVIDIA Container Toolkit packages
|
||||
```bash
|
||||
sudo apt-get install -y nvidia-container-toolkit
|
||||
```
|
||||
|
||||
```shell
|
||||
sudo apt-get install -y nvidia-container-toolkit
|
||||
```
|
||||
|
||||
#### Install with Yum or Dnf
|
||||
1. Configure the repository
|
||||
|
||||
```bash
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
|
||||
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||
```
|
||||
```shell
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
|
||||
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||
```
|
||||
|
||||
2. Install the NVIDIA Container Toolkit packages
|
||||
|
||||
```bash
|
||||
sudo yum install -y nvidia-container-toolkit
|
||||
```
|
||||
```shell
|
||||
sudo yum install -y nvidia-container-toolkit
|
||||
```
|
||||
|
||||
#### Configure Docker to use Nvidia driver
|
||||
```
|
||||
|
||||
```shell
|
||||
sudo nvidia-ctk runtime configure --runtime=docker
|
||||
sudo systemctl restart docker
|
||||
```
|
||||
|
||||
#### Start the container
|
||||
|
||||
```bash
|
||||
```shell
|
||||
docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
||||
```
|
||||
|
||||
@@ -57,7 +61,7 @@ docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ol
|
||||
|
||||
To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
|
||||
|
||||
```
|
||||
```shell
|
||||
docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
|
||||
```
|
||||
|
||||
@@ -65,7 +69,7 @@ docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 114
|
||||
|
||||
Now you can run a model:
|
||||
|
||||
```
|
||||
```shell
|
||||
docker exec -it ollama ollama run llama3.2
|
||||
```
|
||||
|
||||
|
||||
22
docs/faq.md
22
docs/faq.md
@@ -24,7 +24,7 @@ By default, Ollama uses a context window size of 2048 tokens.
|
||||
|
||||
To change this when using `ollama run`, use `/set parameter`:
|
||||
|
||||
```
|
||||
```shell
|
||||
/set parameter num_ctx 4096
|
||||
```
|
||||
|
||||
@@ -46,10 +46,15 @@ Use the `ollama ps` command to see what models are currently loaded into memory.
|
||||
|
||||
```shell
|
||||
ollama ps
|
||||
NAME ID SIZE PROCESSOR UNTIL
|
||||
llama3:70b bcfb190ca3a7 42 GB 100% GPU 4 minutes from now
|
||||
```
|
||||
|
||||
> **Output**:
|
||||
>
|
||||
> ```
|
||||
> NAME ID SIZE PROCESSOR UNTIL
|
||||
> llama3:70b bcfb190ca3a7 42 GB 100% GPU 4 minutes from now
|
||||
> ```
|
||||
|
||||
The `Processor` column will show which memory the model was loaded in to:
|
||||
* `100% GPU` means the model was loaded entirely into the GPU
|
||||
* `100% CPU` means the model was loaded entirely in system memory
|
||||
@@ -66,7 +71,7 @@ If Ollama is run as a macOS application, environment variables should be set usi
|
||||
1. For each environment variable, call `launchctl setenv`.
|
||||
|
||||
```bash
|
||||
launchctl setenv OLLAMA_HOST "0.0.0.0"
|
||||
launchctl setenv OLLAMA_HOST "0.0.0.0:11434"
|
||||
```
|
||||
|
||||
2. Restart Ollama application.
|
||||
@@ -81,14 +86,14 @@ If Ollama is run as a systemd service, environment variables should be set using
|
||||
|
||||
```ini
|
||||
[Service]
|
||||
Environment="OLLAMA_HOST=0.0.0.0"
|
||||
Environment="OLLAMA_HOST=0.0.0.0:11434"
|
||||
```
|
||||
|
||||
3. Save and exit.
|
||||
|
||||
4. Reload `systemd` and restart Ollama:
|
||||
|
||||
```bash
|
||||
```shell
|
||||
systemctl daemon-reload
|
||||
systemctl restart ollama
|
||||
```
|
||||
@@ -221,16 +226,19 @@ properties.
|
||||
If you are using the API you can preload a model by sending the Ollama server an empty request. This works with both the `/api/generate` and `/api/chat` API endpoints.
|
||||
|
||||
To preload the mistral model using the generate endpoint, use:
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/generate -d '{"model": "mistral"}'
|
||||
```
|
||||
|
||||
To use the chat completions endpoint, use:
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/chat -d '{"model": "mistral"}'
|
||||
```
|
||||
|
||||
To preload a model using the CLI, use the command:
|
||||
|
||||
```shell
|
||||
ollama run llama3.2 ""
|
||||
```
|
||||
@@ -250,11 +258,13 @@ If you're using the API, use the `keep_alive` parameter with the `/api/generate`
|
||||
* '0' which will unload the model immediately after generating a response
|
||||
|
||||
For example, to preload a model and leave it in memory use:
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/generate -d '{"model": "llama3.2", "keep_alive": -1}'
|
||||
```
|
||||
|
||||
To unload the model and free up memory use:
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/generate -d '{"model": "llama3.2", "keep_alive": 0}'
|
||||
```
|
||||
|
||||
@@ -20,13 +20,13 @@ Make sure that you use the same base model in the `FROM` command as you used to
|
||||
|
||||
Now run `ollama create` from the directory where the `Modelfile` was created:
|
||||
|
||||
```bash
|
||||
```shell
|
||||
ollama create my-model
|
||||
```
|
||||
|
||||
Lastly, test the model:
|
||||
|
||||
```bash
|
||||
```shell
|
||||
ollama run my-model
|
||||
```
|
||||
|
||||
|
||||
@@ -119,7 +119,7 @@ sudo systemctl status ollama
|
||||
|
||||
To customize the installation of Ollama, you can edit the systemd service file or the environment variables by running:
|
||||
|
||||
```
|
||||
```shell
|
||||
sudo systemctl edit ollama
|
||||
```
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ A model file is the blueprint to create and share models with Ollama.
|
||||
|
||||
The format of the `Modelfile`:
|
||||
|
||||
```modelfile
|
||||
```
|
||||
# comment
|
||||
INSTRUCTION arguments
|
||||
```
|
||||
@@ -49,7 +49,7 @@ INSTRUCTION arguments
|
||||
|
||||
An example of a `Modelfile` creating a mario blueprint:
|
||||
|
||||
```modelfile
|
||||
```
|
||||
FROM llama3.2
|
||||
# sets the temperature to 1 [higher is more creative, lower is more coherent]
|
||||
PARAMETER temperature 1
|
||||
@@ -69,24 +69,30 @@ To use this:
|
||||
|
||||
To view the Modelfile of a given model, use the `ollama show --modelfile` command.
|
||||
|
||||
```bash
|
||||
> ollama show --modelfile llama3.2
|
||||
# Modelfile generated by "ollama show"
|
||||
# To build a new Modelfile based on this one, replace the FROM line with:
|
||||
# FROM llama3.2:latest
|
||||
FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29
|
||||
TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
|
||||
```shell
|
||||
ollama show --modelfile llama3.2
|
||||
```
|
||||
|
||||
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
|
||||
> **Output**:
|
||||
>
|
||||
> ```
|
||||
> # Modelfile generated by "ollama show"
|
||||
> # To build a new Modelfile based on this one, replace the FROM line with:
|
||||
> # FROM llama3.2:latest
|
||||
> FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29
|
||||
> TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
|
||||
>
|
||||
> {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
|
||||
>
|
||||
> {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
|
||||
>
|
||||
> {{ .Response }}<|eot_id|>"""
|
||||
> PARAMETER stop "<|start_header_id|>"
|
||||
> PARAMETER stop "<|end_header_id|>"
|
||||
> PARAMETER stop "<|eot_id|>"
|
||||
> PARAMETER stop "<|reserved_special_token"
|
||||
> ```
|
||||
|
||||
{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
{{ .Response }}<|eot_id|>"""
|
||||
PARAMETER stop "<|start_header_id|>"
|
||||
PARAMETER stop "<|end_header_id|>"
|
||||
PARAMETER stop "<|eot_id|>"
|
||||
PARAMETER stop "<|reserved_special_token"
|
||||
```
|
||||
|
||||
## Instructions
|
||||
|
||||
@@ -94,13 +100,13 @@ To view the Modelfile of a given model, use the `ollama show --modelfile` comman
|
||||
|
||||
The `FROM` instruction defines the base model to use when creating a model.
|
||||
|
||||
```modelfile
|
||||
```
|
||||
FROM <model name>:<tag>
|
||||
```
|
||||
|
||||
#### Build from existing model
|
||||
|
||||
```modelfile
|
||||
```
|
||||
FROM llama3.2
|
||||
```
|
||||
|
||||
@@ -111,7 +117,7 @@ Additional models can be found at:
|
||||
|
||||
#### Build from a Safetensors model
|
||||
|
||||
```modelfile
|
||||
```
|
||||
FROM <model directory>
|
||||
```
|
||||
|
||||
@@ -125,7 +131,7 @@ Currently supported model architectures:
|
||||
|
||||
#### Build from a GGUF file
|
||||
|
||||
```modelfile
|
||||
```
|
||||
FROM ./ollama-model.gguf
|
||||
```
|
||||
|
||||
@@ -136,7 +142,7 @@ The GGUF file location should be specified as an absolute path or relative to th
|
||||
|
||||
The `PARAMETER` instruction defines a parameter that can be set when the model is run.
|
||||
|
||||
```modelfile
|
||||
```
|
||||
PARAMETER <parameter> <parametervalue>
|
||||
```
|
||||
|
||||
@@ -183,7 +189,7 @@ TEMPLATE """{{ if .System }}<|im_start|>system
|
||||
|
||||
The `SYSTEM` instruction specifies the system message to be used in the template, if applicable.
|
||||
|
||||
```modelfile
|
||||
```
|
||||
SYSTEM """<system message>"""
|
||||
```
|
||||
|
||||
@@ -193,7 +199,7 @@ The `ADAPTER` instruction specifies a fine tuned LoRA adapter that should apply
|
||||
|
||||
#### Safetensor adapter
|
||||
|
||||
```modelfile
|
||||
```
|
||||
ADAPTER <path to safetensor adapter>
|
||||
```
|
||||
|
||||
@@ -204,7 +210,7 @@ Currently supported Safetensor adapters:
|
||||
|
||||
#### GGUF adapter
|
||||
|
||||
```modelfile
|
||||
```
|
||||
ADAPTER ./ollama-lora.gguf
|
||||
```
|
||||
|
||||
@@ -212,7 +218,7 @@ ADAPTER ./ollama-lora.gguf
|
||||
|
||||
The `LICENSE` instruction allows you to specify the legal license under which the model used with this Modelfile is shared or distributed.
|
||||
|
||||
```modelfile
|
||||
```
|
||||
LICENSE """
|
||||
<license text>
|
||||
"""
|
||||
@@ -222,7 +228,7 @@ LICENSE """
|
||||
|
||||
The `MESSAGE` instruction allows you to specify a message history for the model to use when responding. Use multiple iterations of the MESSAGE command to build up a conversation which will guide the model to answer in a similar way.
|
||||
|
||||
```modelfile
|
||||
```
|
||||
MESSAGE <role> <message>
|
||||
```
|
||||
|
||||
@@ -237,7 +243,7 @@ MESSAGE <role> <message>
|
||||
|
||||
#### Example conversation
|
||||
|
||||
```modelfile
|
||||
```
|
||||
MESSAGE user Is Toronto in Canada?
|
||||
MESSAGE assistant yes
|
||||
MESSAGE user Is Sacramento in Canada?
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# OpenAI compatibility
|
||||
|
||||
> **Note:** OpenAI compatibility is experimental and is subject to major adjustments including breaking changes. For fully-featured access to the Ollama API, see the Ollama [Python library](https://github.com/ollama/ollama-python), [JavaScript library](https://github.com/ollama/ollama-js) and [REST API](https://github.com/ollama/ollama/blob/main/docs/api.md).
|
||||
> [!NOTE]
|
||||
> OpenAI compatibility is experimental and is subject to major adjustments including breaking changes. For fully-featured access to the Ollama API, see the Ollama [Python library](https://github.com/ollama/ollama-python), [JavaScript library](https://github.com/ollama/ollama-js) and [REST API](https://github.com/ollama/ollama/blob/main/docs/api.md).
|
||||
|
||||
Ollama provides experimental compatibility with parts of the [OpenAI API](https://platform.openai.com/docs/api-reference) to help connect existing applications to Ollama.
|
||||
|
||||
@@ -59,8 +60,10 @@ embeddings = client.embeddings.create(
|
||||
input=["why is the sky blue?", "why is the grass green?"],
|
||||
)
|
||||
```
|
||||
|
||||
#### Structured outputs
|
||||
```py
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
from openai import OpenAI
|
||||
|
||||
@@ -144,7 +147,7 @@ const embedding = await openai.embeddings.create({
|
||||
|
||||
### `curl`
|
||||
|
||||
``` shell
|
||||
```shell
|
||||
curl http://localhost:11434/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
@@ -319,7 +322,7 @@ ollama pull llama3.2
|
||||
|
||||
For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name:
|
||||
|
||||
```
|
||||
```shell
|
||||
ollama cp llama3.2 gpt-3.5-turbo
|
||||
```
|
||||
|
||||
@@ -343,7 +346,7 @@ curl http://localhost:11434/v1/chat/completions \
|
||||
|
||||
The OpenAI API does not have a way of setting the context size for a model. If you need to change the context size, create a `Modelfile` which looks like:
|
||||
|
||||
```modelfile
|
||||
```
|
||||
FROM <some model>
|
||||
PARAMETER num_ctx <context size>
|
||||
```
|
||||
|
||||
@@ -17,6 +17,7 @@ When you run Ollama in a **container**, the logs go to stdout/stderr in the cont
|
||||
```shell
|
||||
docker logs <container-name>
|
||||
```
|
||||
|
||||
(Use `docker ps` to find the container name)
|
||||
|
||||
If manually running `ollama serve` in a terminal, the logs will be on that terminal.
|
||||
@@ -28,6 +29,7 @@ When you run Ollama on **Windows**, there are a few different locations. You can
|
||||
- `explorer %TEMP%` where temporary executable files are stored in one or more `ollama*` directories
|
||||
|
||||
To enable additional debug logging to help troubleshoot problems, first **Quit the running app from the tray menu** then in a powershell terminal
|
||||
|
||||
```powershell
|
||||
$env:OLLAMA_DEBUG="1"
|
||||
& "ollama app.exe"
|
||||
@@ -49,12 +51,13 @@ Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v11 rocm_v5]
|
||||
|
||||
You can set OLLAMA_LLM_LIBRARY to any of the available LLM libraries to bypass autodetection, so for example, if you have a CUDA card, but want to force the CPU LLM library with AVX2 vector support, use:
|
||||
|
||||
```
|
||||
```shell
|
||||
OLLAMA_LLM_LIBRARY="cpu_avx2" ollama serve
|
||||
```
|
||||
|
||||
You can see what features your CPU has with the following.
|
||||
```
|
||||
|
||||
```shell
|
||||
cat /proc/cpuinfo| grep flags | head -1
|
||||
```
|
||||
|
||||
@@ -62,8 +65,8 @@ cat /proc/cpuinfo| grep flags | head -1
|
||||
|
||||
If you run into problems on Linux and want to install an older version, or you'd like to try out a pre-release before it's officially released, you can tell the install script which version to install.
|
||||
|
||||
```sh
|
||||
curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION="0.1.29" sh
|
||||
```shell
|
||||
curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.5.7 sh
|
||||
```
|
||||
|
||||
## Linux tmp noexec
|
||||
|
||||
@@ -47,6 +47,7 @@ If Ollama is already running, Quit the tray application and relaunch it from the
|
||||
## API Access
|
||||
|
||||
Here's a quick example showing API access from `powershell`
|
||||
|
||||
```powershell
|
||||
(Invoke-WebRequest -method POST -Body '{"model":"llama3.2", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
|
||||
```
|
||||
|
||||
@@ -8,7 +8,7 @@ Ollama vendors [llama.cpp](https://github.com/ggerganov/llama.cpp/) and [ggml](h
|
||||
|
||||
If you update the vendoring code, start by running the following command to establish the tracking llama.cpp repo in the `./vendor/` directory.
|
||||
|
||||
```
|
||||
```shell
|
||||
make -f Makefile.sync apply-patches
|
||||
```
|
||||
|
||||
@@ -22,7 +22,7 @@ When updating to a newer base commit, the existing patches may not apply cleanly
|
||||
|
||||
Start by applying the patches. If any of the patches have conflicts, the `git am` will stop at the first failure.
|
||||
|
||||
```
|
||||
```shell
|
||||
make -f Makefile.sync apply-patches
|
||||
```
|
||||
|
||||
@@ -30,7 +30,7 @@ If there are conflicts, you will see an error message. Resolve the conflicts in
|
||||
|
||||
Once all patches are applied, commit the changes to the tracking repository.
|
||||
|
||||
```
|
||||
```shell
|
||||
make -f Makefile.sync format-patches sync
|
||||
```
|
||||
|
||||
@@ -38,13 +38,13 @@ make -f Makefile.sync format-patches sync
|
||||
|
||||
When working on new fixes or features that impact vendored code, use the following model. First get a clean tracking repo with all current patches applied:
|
||||
|
||||
```
|
||||
```shell
|
||||
make -f Makefile.sync clean apply-patches
|
||||
```
|
||||
|
||||
Iterate until you're ready to submit PRs. Once your code is ready, commit a change in the `./vendor/` directory, then generate the patches for ollama with
|
||||
|
||||
```
|
||||
```shell
|
||||
make -f Makefile.sync format-patches
|
||||
```
|
||||
|
||||
|
||||
55
llama/patches/0016-remove-sgemm-global-variables.patch
Normal file
55
llama/patches/0016-remove-sgemm-global-variables.patch
Normal file
@@ -0,0 +1,55 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: jmorganca <jmorganca@gmail.com>
|
||||
Date: Sun, 9 Feb 2025 17:22:15 -0800
|
||||
Subject: [PATCH] remove sgemm global variables
|
||||
|
||||
removes the 'iq4nlt' global variable in sgemm.cpp that causes
|
||||
a runtime crash when calling dlopen on ggml-cpu libraries as
|
||||
its initialization depends on AVX instructions the host machine
|
||||
may not have
|
||||
---
|
||||
ggml/src/ggml-cpu/llamafile/sgemm.cpp | 17 +++++++++--------
|
||||
1 file changed, 9 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ggml/src/ggml-cpu/llamafile/sgemm.cpp
|
||||
index 8fce576c..3f260ce5 100644
|
||||
--- a/ggml/src/ggml-cpu/llamafile/sgemm.cpp
|
||||
+++ b/ggml/src/ggml-cpu/llamafile/sgemm.cpp
|
||||
@@ -279,14 +279,6 @@ template <> inline __m256bh load(const float *p) {
|
||||
}
|
||||
#endif
|
||||
|
||||
-////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
-// CONSTANTS
|
||||
-
|
||||
-#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__)
|
||||
-static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
|
||||
-static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl);
|
||||
-#endif
|
||||
-
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// FLOATING POINT MATRIX MULTIPLICATION
|
||||
|
||||
@@ -613,6 +605,14 @@ class tinyBLAS_Q0_AVX {
|
||||
TC *C, int64_t ldc,
|
||||
int ith, int nth)
|
||||
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
|
||||
+ const int8_t kvalues_iq4nl[16] = {
|
||||
+ -127, -104, -83, -65,
|
||||
+ -49, -35, -22, -10,
|
||||
+ 1, 13, 25, 38,
|
||||
+ 53, 69, 89, 113
|
||||
+ };
|
||||
+
|
||||
+ iq4nlt = _mm_loadu_si128((const __m128i *)kvalues_iq4nl);
|
||||
}
|
||||
|
||||
void matmul(int64_t m, int64_t n) {
|
||||
@@ -1037,6 +1037,7 @@ class tinyBLAS_Q0_AVX {
|
||||
const int64_t ldc;
|
||||
const int ith;
|
||||
const int nth;
|
||||
+ __m128i iq4nlt;
|
||||
};
|
||||
#endif // __AVX__
|
||||
|
||||
@@ -4,18 +4,18 @@
|
||||
|
||||
A minimial runner for loading a model and running inference via a http web server.
|
||||
|
||||
```
|
||||
```shell
|
||||
./runner -model <model binary>
|
||||
```
|
||||
|
||||
### Completion
|
||||
|
||||
```
|
||||
```shell
|
||||
curl -X POST -H "Content-Type: application/json" -d '{"prompt": "hi"}' http://localhost:8080/completion
|
||||
```
|
||||
|
||||
### Embeddings
|
||||
|
||||
```
|
||||
```shell
|
||||
curl -X POST -H "Content-Type: application/json" -d '{"prompt": "turn me into an embedding"}' http://localhost:8080/embedding
|
||||
```
|
||||
|
||||
@@ -6,14 +6,14 @@ This app builds upon Ollama to provide a desktop experience for running models.
|
||||
|
||||
First, build the `ollama` binary:
|
||||
|
||||
```
|
||||
```shell
|
||||
cd ..
|
||||
go build .
|
||||
```
|
||||
|
||||
Then run the desktop app with `npm start`:
|
||||
|
||||
```
|
||||
```shell
|
||||
cd macapp
|
||||
npm install
|
||||
npm start
|
||||
|
||||
@@ -279,14 +279,6 @@ template <> inline __m256bh load(const float *p) {
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// CONSTANTS
|
||||
|
||||
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__)
|
||||
static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
|
||||
static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl);
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// FLOATING POINT MATRIX MULTIPLICATION
|
||||
|
||||
@@ -613,6 +605,14 @@ class tinyBLAS_Q0_AVX {
|
||||
TC *C, int64_t ldc,
|
||||
int ith, int nth)
|
||||
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
|
||||
const int8_t kvalues_iq4nl[16] = {
|
||||
-127, -104, -83, -65,
|
||||
-49, -35, -22, -10,
|
||||
1, 13, 25, 38,
|
||||
53, 69, 89, 113
|
||||
};
|
||||
|
||||
iq4nlt = _mm_loadu_si128((const __m128i *)kvalues_iq4nl);
|
||||
}
|
||||
|
||||
void matmul(int64_t m, int64_t n) {
|
||||
@@ -1037,6 +1037,7 @@ class tinyBLAS_Q0_AVX {
|
||||
const int64_t ldc;
|
||||
const int ith;
|
||||
const int nth;
|
||||
__m128i iq4nlt;
|
||||
};
|
||||
#endif // __AVX__
|
||||
|
||||
|
||||
@@ -162,8 +162,11 @@ function gatherDependencies() {
|
||||
$depArch=$script:TARGET_ARCH
|
||||
}
|
||||
if ($depArch -eq "x64") {
|
||||
write-host "cp ${env:VCToolsRedistDir}\${depArch}\Microsoft.VC*.CRT\msvcp140*.dll ${script:DIST_DIR}\lib\ollama\"
|
||||
cp "${env:VCToolsRedistDir}\${depArch}\Microsoft.VC*.CRT\msvcp140*.dll" "${script:DIST_DIR}\lib\ollama\"
|
||||
write-host "cp ${env:VCToolsRedistDir}\${depArch}\Microsoft.VC*.CRT\vcruntime140.dll ${script:DIST_DIR}\lib\ollama\"
|
||||
cp "${env:VCToolsRedistDir}\${depArch}\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DIST_DIR}\lib\ollama\"
|
||||
write-host "cp ${env:VCToolsRedistDir}\${depArch}\Microsoft.VC*.CRT\vcruntime140_1.dll ${script:DIST_DIR}\lib\ollama\"
|
||||
cp "${env:VCToolsRedistDir}\${depArch}\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DIST_DIR}\lib\ollama\"
|
||||
$llvmCrtDir="$env:VCToolsRedistDir\..\..\..\Tools\Llvm\${depArch}\bin"
|
||||
foreach ($part in $("runtime", "stdio", "filesystem", "math", "convert", "heap", "string", "time", "locale", "environment")) {
|
||||
|
||||
Reference in New Issue
Block a user