Compare commits

..

29 Commits

Author SHA1 Message Date
Daniel Hiltgen
652c273f0e Merge pull request #5049 from dhiltgen/cuda_v12
Cuda v12
2024-08-19 11:14:24 -07:00
Daniel Hiltgen
88e7705079 Merge pull request #6402 from rick-github/numParallel
Override numParallel in pickBestPartialFitByLibrary() only if unset.
2024-08-19 11:07:22 -07:00
Daniel Hiltgen
f9e31da946 Review comments 2024-08-19 10:36:15 -07:00
Daniel Hiltgen
88bb9e3328 Adjust layout to bin+lib/ollama 2024-08-19 09:38:53 -07:00
Daniel Hiltgen
3b19cdba2a Remove Jetpack 2024-08-19 09:38:53 -07:00
Daniel Hiltgen
927d98a6cd Add windows cuda v12 + v11 support 2024-08-19 09:38:53 -07:00
Daniel Hiltgen
f6c811b320 Enable cuda v12 flags 2024-08-19 09:38:53 -07:00
Daniel Hiltgen
4fe3a556fa Add cuda v12 variant and selection logic
Based on compute capability and driver version, pick
v12 or v11 cuda variants.
2024-08-19 09:38:53 -07:00
Daniel Hiltgen
fc3b4cda89 Report GPU variant in log 2024-08-19 09:38:53 -07:00
Daniel Hiltgen
d470ebe78b Add Jetson cuda variants for arm
This adds new variants for arm64 specific to Jetson platforms
2024-08-19 09:38:53 -07:00
Daniel Hiltgen
c7bcb00319 Wire up ccache and pigz in the docker based build
This should help speed things up a little
2024-08-19 09:38:53 -07:00
Daniel Hiltgen
74d45f0102 Refactor linux packaging
This adjusts linux to follow a similar model to windows with a discrete archive
(zip/tgz) to cary the primary executable, and dependent libraries. Runners are
still carried as payloads inside the main binary

Darwin retain the payload model where the go binary is fully self contained.
2024-08-19 09:38:53 -07:00
Jeffrey Morgan
9fddef3731 server: limit upload parts to 16 (#6411) 2024-08-19 09:20:52 -07:00
Richard Lyons
885cf45087 Fix white space. 2024-08-18 03:07:16 +02:00
Richard Lyons
9352eeb752 Reset NumCtx. 2024-08-18 02:55:01 +02:00
Richard Lyons
0ad0e738cd Override numParallel only if unset. 2024-08-18 01:43:26 +02:00
Daniel Hiltgen
d29cd4c2ed Merge pull request #6381 from eust-w/main
fix: Add tooltip to system tray icon
2024-08-15 15:31:15 -07:00
eust-w
a84c05cf91 fix: Add tooltip to system tray icon
- Updated setIcon method to include tooltip text for the system tray icon.
- Added NIF_TIP flag and set the tooltip text using UTF16 encoding.

Resolves: #6372
2024-08-16 06:00:12 +08:00
Michael Yang
e3d7f32af7 Merge pull request #6363 from ollama/mxyng/fix-noprune
fix: noprune on pull
2024-08-15 12:20:38 -07:00
Michael Yang
3a75e74e34 only skip invalid json manifests 2024-08-15 10:29:14 -07:00
Michael Yang
237dccba1e skip invalid manifest files 2024-08-14 16:55:45 -07:00
Michael Yang
b3f75fc812 fix noprune 2024-08-14 15:48:51 -07:00
Jeffrey Morgan
8200c371ae add CONTRIBUTING.md (#6349) 2024-08-14 15:19:50 -07:00
longtao
0a8d6ea86d Fix typo and improve readability (#5964)
* Fix typo and improve readability

Summary:
* Rename updatAvailableMenuID to updateAvailableMenuID
* Replace unused cmd parameter with _ in RunServer function
* Fix typos in comments

(cherry picked from commit 5b8715f0b04773369e8eb1f9e6737995a0ab3ba7)

* Update api/client.go

Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>

---------

Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2024-08-13 17:54:19 -07:00
Blake Mizerany
8e1050f366 server: reduce max connections used in download (#6347)
The previous value of 64 was WAY too high and unnecessary. It reached
diminishing returns and blew past it. This is a more reasonable number
for _most_ normal cases. For users on cloud servers with excellent
network quality, this will keep screaming for them, without hitting our
CDN limits. For users with relatively poor network quality, this will
keep them from saturating their network and causing other issues.
2024-08-13 16:47:35 -07:00
Bruce MacDonald
eda8a32a09 update chatml template format to latest in docs (#6344) 2024-08-13 16:39:18 -07:00
Michael Yang
a0a40aa20c Merge pull request #6346 from ollama/mxyng/lint 2024-08-13 14:58:35 -07:00
Michael Yang
2697d7f5aa lint
- fixes printf: non-constant format string in call to fmt.Printf
- fixes SA1032: arguments have the wrong order
- disables testifylint
2024-08-13 14:36:33 -07:00
Pamela Fox
1f32276178 Update openai.md to remove extra checkbox (#6345) 2024-08-13 13:36:05 -07:00
41 changed files with 609 additions and 381 deletions

View File

@@ -187,6 +187,13 @@ jobs:
generate-windows-cuda:
environment: release
runs-on: windows
strategy:
matrix:
cuda:
- version: "11"
url: 'https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe'
- version: "12"
url: 'https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe'
env:
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
steps:
@@ -220,11 +227,11 @@ jobs:
with:
go-version-file: go.mod
cache: true
- name: 'Install CUDA'
- name: 'Install CUDA ${{ matrix.cuda.version }}'
run: |
$ErrorActionPreference = "Stop"
write-host "downloading CUDA Installer"
Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
Invoke-WebRequest -Uri "${{ matrix.cuda.url }}" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
write-host "Installing CUDA"
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
write-host "Completed CUDA"
@@ -256,7 +263,7 @@ jobs:
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
- uses: actions/upload-artifact@v4
with:
name: generate-windows-cuda
name: generate-windows-cuda-${{ matrix.cuda.version }}
path: |
llm/build/**/bin/*
dist/windows-amd64/**
@@ -265,6 +272,7 @@ jobs:
name: windows-cuda-deps
path: dist/deps/*
# Import the prior generation steps and build the final windows assets
build-windows:
environment: release
@@ -314,7 +322,10 @@ jobs:
name: generate-windows-cpu
- uses: actions/download-artifact@v4
with:
name: generate-windows-cuda
name: generate-windows-cuda-11
- uses: actions/download-artifact@v4
with:
name: generate-windows-cuda-12
- uses: actions/download-artifact@v4
with:
name: windows-cuda-deps
@@ -363,7 +374,6 @@ jobs:
- run: |
./scripts/build_linux.sh
./scripts/build_docker.sh
mv dist/deps/* dist/
- uses: actions/upload-artifact@v4
with:
name: dist-linux-amd64

View File

@@ -24,7 +24,6 @@ linters:
- nosprintfhostport
- staticcheck
- tenv
- testifylint
- unconvert
- unused
- usestdlibvars

37
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,37 @@
# Contributing to Ollama
Thank you for your interest in contributing to Ollama! Here are a few guidelines to help get you started.
## Set up
See the [development documentation](./docs/development.md) for instructions on how to build and run Ollama locally.
## Pull requests
### Ideal issues
* [Bugs](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Abug): issues where Ollama stops working or where it results in an unexpected error.
* [Performance](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Aperformance): issues to make Ollama faster at model inference, downloading or uploading.
* [Security](https://github.com/ollama/ollama/blob/main/SECURITY.md): issues that could lead to a security vulnerability. As mentioned in [SECURITY.md](https://github.com/ollama/ollama/blob/main/SECURITY.md), please do not disclose security vulnerabilities publicly.
### Issues that are harder to review
* New features: new features (e.g. API fields, environment variables) add surface area to Ollama and make it harder to maintain in the long run as they cannot be removed without potentially breaking users in the future.
* Refactoring: large code improvements are important, but can be harder or take longer to review and merge.
* Documentation: small updates to fill in or dorrect missing documentation is helpful, however large documentation additions can be hard to maintain over time.
### Issues that may not be accepted
* Changes that break backwards compatibility in Ollama's API (including the OpenAI-compatible API)
* Changes that add significant friction to the user experience
* Changes that create a large future maintenance burden for maintainers and contributors
### Best practices
* Commit messages: please leave both a title and a description in your commit messages. The title should be a short summary of the changes, with a leading word that explains the section of the code being changed (e.g. `api: fix parsing of prompt field`) . In the description, leave a short 2-3 sentences that explain more about the change and its impact.
* Tests: please add test coverage to changes where possible.
* Minimize dependencies: avoid adding new dependencies unless absolutely necessary.
## Need help?
If you need help with anything, feel free to reach out to us on our [Discord server](https://discord.gg/ollama).

View File

@@ -1,7 +1,9 @@
ARG GOLANG_VERSION=1.22.5
ARG CMAKE_VERSION=3.22.1
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
ARG CUDA_VERSION=11.3.1
ARG CUDA_VERSION_11=11.3.1
ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
ARG CUDA_VERSION_12=12.4.0
ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
ARG ROCM_VERSION=6.1.2
# Copy the minimal context we need to run the generate scripts
@@ -10,7 +12,7 @@ COPY .git .git
COPY .gitmodules .gitmodules
COPY llm llm
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_11-devel-centos7 AS cuda-11-build-amd64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
@@ -18,9 +20,34 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
ARG CUDA_V11_ARCHITECTURES
ENV GOARCH amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
CUDA_VARIANT="_v11" \
bash gen_linux.sh
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_12-devel-centos7 AS cuda-12-build-amd64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ARG CUDA_V12_ARCHITECTURES
ENV GOARCH amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
CUDA_VARIANT="_v12" \
OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
bash gen_linux.sh
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
@@ -28,7 +55,32 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
ARG CUDA_V11_ARCHITECTURES
ENV GOARCH arm64
RUN OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
CUDA_VARIANT="_v11" \
bash gen_linux.sh
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ARG CUDA_V12_ARCHITECTURES
ENV GOARCH arm64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
CUDA_VARIANT="_v12" \
OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
bash gen_linux.sh
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
ARG CMAKE_VERSION
@@ -40,15 +92,11 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ARG AMDGPU_TARGETS
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
RUN mkdir /tmp/scratch && \
for dep in $(zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm*/bin/deps.txt.gz) ; do \
cp ${dep} /tmp/scratch/ || exit 1 ; \
done && \
(cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \
mkdir -p /go/src/github.com/ollama/ollama/dist/deps/ && \
(cd /tmp/scratch/ && tar czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz . )
ENV GOARCH amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh
RUN mkdir -p ../../dist/linux-amd64/lib/ollama && \
(cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64/lib/ollama && tar xf - )
FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
ARG CMAKE_VERSION
@@ -59,16 +107,21 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS
ENV GOARCH amd64
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_CPU_TARGET="static" bash gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" bash gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" bash gen_linux.sh
FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
ARG CMAKE_VERSION
@@ -79,12 +132,15 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS
ENV GOARCH arm64
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_CPU_TARGET="static" bash gen_linux.sh
FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
# Intermediate stage used for ./scripts/build_linux.sh
@@ -95,12 +151,16 @@ COPY . .
COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/
ARG GOFLAGS
ARG CGO_CFLAGS
RUN go build -trimpath .
RUN --mount=type=cache,target=/root/.ccache \
go build -trimpath -o dist/linux-amd64/bin/ollama .
# Intermediate stage used for ./scripts/build_linux.sh
FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
@@ -109,23 +169,36 @@ ARG GOLANG_VERSION
WORKDIR /go/src/github.com/ollama/ollama
COPY . .
COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
ARG GOFLAGS
ARG CGO_CFLAGS
RUN go build -trimpath .
RUN --mount=type=cache,target=/root/.ccache \
go build -trimpath -o dist/linux-arm64/bin/ollama .
# Strip out ROCm dependencies to keep the primary image lean
FROM --platform=linux/amd64 ubuntu:22.04 as amd64-libs-without-rocm
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /scratch/
RUN cd /scratch/ollama/ && rm -rf rocblas libamd* libdrm* libroc* libhip* libhsa*
# Runtime stages
FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64
COPY --from=amd64-libs-without-rocm /scratch/ /lib/
RUN apt-get update && apt-get install -y ca-certificates
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
RUN apt-get update && apt-get install -y ca-certificates
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
RUN update-pciids
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
RUN ln -s /opt/rocm/lib /lib/ollama
EXPOSE 11434
ENV OLLAMA_HOST 0.0.0.0

View File

@@ -298,7 +298,7 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) {
return &lr, nil
}
// List running models.
// ListRunning lists running models.
func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
var lr ProcessResponse
if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
@@ -333,7 +333,7 @@ func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, err
return &resp, nil
}
// Hearbeat checks if the server has started and is responsive; if yes, it
// Heartbeat checks if the server has started and is responsive; if yes, it
// returns nil, otherwise an error.
func (c *Client) Heartbeat(ctx context.Context) error {
if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {

View File

@@ -87,20 +87,11 @@ DialogFontSize=12
[Files]
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
Source: "..\ollama.exe"; DestDir: "{app}\bin"; Flags: ignoreversion 64bit
Source: "..\dist\windows-{#ARCH}\lib\ollama\runners\*"; DestDir: "{app}\lib\ollama\runners"; Flags: ignoreversion 64bit recursesubdirs
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
#if DirExists("..\dist\windows-amd64\cuda")
Source: "..\dist\windows-amd64\cuda\*"; DestDir: "{app}\cuda\"; Flags: ignoreversion recursesubdirs
#endif
#if DirExists("..\dist\windows-amd64\oneapi")
Source: "..\dist\windows-amd64\oneapi\*"; DestDir: "{app}\oneapi\"; Flags: ignoreversion recursesubdirs
#endif
#if DirExists("..\dist\windows-amd64\rocm")
Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
#endif
Source: "..\dist\windows-amd64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Flags: ignoreversion recursesubdirs
[Icons]
Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
@@ -108,7 +99,7 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
[Run]
Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
Filename: "{cmd}"; Parameters: "/C set PATH={app}\bin;%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
[UninstallRun]
; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden
@@ -143,8 +134,8 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
[Registry]
Root: HKCU; Subkey: "Environment"; \
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}"; \
Check: NeedsAddPath('{app}')
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}\bin"; \
Check: NeedsAddPath('{app}\bin')
[Code]

View File

@@ -11,12 +11,12 @@ import (
)
const (
updatAvailableMenuID = 1
updateMenuID = updatAvailableMenuID + 1
separatorMenuID = updateMenuID + 1
diagLogsMenuID = separatorMenuID + 1
diagSeparatorMenuID = diagLogsMenuID + 1
quitMenuID = diagSeparatorMenuID + 1
updateAvailableMenuID = 1
updateMenuID = updateAvailableMenuID + 1
separatorMenuID = updateMenuID + 1
diagLogsMenuID = separatorMenuID + 1
diagSeparatorMenuID = diagLogsMenuID + 1
quitMenuID = diagSeparatorMenuID + 1
)
func (t *winTray) initMenus() error {
@@ -35,7 +35,7 @@ func (t *winTray) initMenus() error {
func (t *winTray) UpdateAvailable(ver string) error {
if !t.updateNotified {
slog.Debug("updating menu and sending notification for new update")
if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
if err := t.addOrUpdateMenuItem(updateAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
return fmt.Errorf("unable to create menu entries %w", err)
}
if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {

View File

@@ -11,6 +11,7 @@ import (
"path/filepath"
"sort"
"sync"
"syscall"
"unsafe"
"golang.org/x/sys/windows"
@@ -433,7 +434,12 @@ func (t *winTray) setIcon(src string) error {
t.muNID.Lock()
defer t.muNID.Unlock()
t.nid.Icon = h
t.nid.Flags |= NIF_ICON
t.nid.Flags |= NIF_ICON | NIF_TIP
if toolTipUTF16, err := syscall.UTF16FromString(commontray.ToolTip); err == nil {
copy(t.nid.Tip[:], toolTipUTF16)
} else {
return err
}
t.nid.Size = uint32(unsafe.Sizeof(*t.nid))
return t.nid.modify()

View File

@@ -61,6 +61,7 @@ const (
MIIM_SUBMENU = 0x00000004
MIM_APPLYTOSUBMENUS = 0x80000000
NIF_ICON = 0x00000002
NIF_TIP = 0x00000004
NIF_INFO = 0x00000010
NIF_MESSAGE = 0x00000001
SW_HIDE = 0

View File

@@ -1125,7 +1125,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
return nil
}
func RunServer(cmd *cobra.Command, _ []string) error {
func RunServer(_ *cobra.Command, _ []string) error {
if err := initializeKeypair(); err != nil {
return err
}

View File

@@ -20,13 +20,12 @@ GPU.
## Manual install
### Download the `ollama` binary
### Download `ollama`
Ollama is distributed as a self-contained binary. Download it to a directory in your PATH:
Download and extract the Linux package:
```bash
sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
sudo chmod +x /usr/bin/ollama
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
```
### Adding Ollama as a startup service (recommended)
@@ -96,8 +95,7 @@ curl -fsSL https://ollama.com/install.sh | sh
Or by downloading the ollama binary:
```bash
sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
sudo chmod +x /usr/bin/ollama
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
```
## Installing specific versions

View File

@@ -182,7 +182,6 @@ curl http://localhost:11434/v1/embeddings \
- [x] Reproducible outputs
- [x] Vision
- [x] Tools (streaming support coming soon)
- [ ] Vision
- [ ] Logprobs
#### Supported request fields

View File

@@ -112,15 +112,9 @@ Keep the following tips and best practices in mind when working with Go template
ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
```gotmpl
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}
{{- range .Messages }}<|im_start|>{{ .Role }}
{{ .Content }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ else }}
{{ if .System }}<|im_start|>system
{{ .System }}<|im_end|>
```
### Example Tools

View File

@@ -174,7 +174,7 @@ func RunnersDir() (p string) {
defer func() {
if p == "" {
slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama/runners'")
}
}()
@@ -190,17 +190,17 @@ func RunnersDir() (p string) {
}
var paths []string
for _, root := range []string{filepath.Dir(exe), cwd} {
for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), ".."), cwd} {
paths = append(paths,
root,
filepath.Join(root, "windows-"+runtime.GOARCH),
filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
)
}
// Try a few variations to improve developer experience when building from source in the local tree
for _, path := range paths {
candidate := filepath.Join(path, "ollama_runners")
candidate := filepath.Join(path, "lib", "ollama", "runners")
if _, err := os.Stat(candidate); err == nil {
p = candidate
break

View File

@@ -54,7 +54,7 @@ func commonAMDValidateLibDir() (string, error) {
// Installer payload location if we're running the installed binary
exe, err := os.Executable()
if err == nil {
rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm")
rocmTargetDir := filepath.Join(filepath.Dir(exe), "..", "lib", "ollama")
if rocmLibUsable(rocmTargetDir) {
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
return rocmTargetDir, nil

View File

@@ -153,7 +153,7 @@ func AMDValidateLibDir() (string, error) {
// Installer payload (if we're running from some other location)
localAppData := os.Getenv("LOCALAPPDATA")
appDir := filepath.Join(localAppData, "Programs", "Ollama")
rocmTargetDir := filepath.Join(appDir, "rocm")
rocmTargetDir := filepath.Join(appDir, "..", "lib", "ollama")
if rocmLibUsable(rocmTargetDir) {
slog.Debug("detected ollama installed ROCm at " + rocmTargetDir)
return rocmTargetDir, nil

View File

@@ -4,9 +4,17 @@ package gpu
import (
"log/slog"
"os"
"regexp"
"runtime"
"strconv"
"strings"
)
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")
func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
ids := []string{}
for _, info := range gpuInfo {
@@ -19,3 +27,38 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
}
return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
}
func cudaVariant(gpuInfo CudaGPUInfo) string {
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
if CudaTegra != "" {
ver := strings.Split(CudaTegra, ".")
if len(ver) > 0 {
return "jetpack" + ver[0]
}
} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
r := regexp.MustCompile(` R(\d+) `)
m := r.FindSubmatch(data)
if len(m) != 2 {
slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version")
} else {
if l4t, err := strconv.Atoi(string(m[1])); err == nil {
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
// https://developer.nvidia.com/embedded/jetpack-archive
switch l4t {
case 35:
return "jetpack5"
case 36:
return "jetpack6"
default:
slog.Info("unsupported L4T version", "nv_tegra_release", string(data))
}
}
}
}
}
if gpuInfo.computeMajor < 6 || gpuInfo.DriverMajor < 12 {
return "v11"
}
return "v12"
}

View File

@@ -64,10 +64,6 @@ var RocmComputeMin = 9
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")
// Note: gpuMutex must already be held
func initCudaHandles() *cudaHandles {
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
@@ -215,7 +211,7 @@ func GetGPUInfo() GpuInfoList {
GpuInfo: GpuInfo{
memInfo: mem,
Library: "cpu",
Variant: cpuCapability,
Variant: cpuCapability.String(),
ID: "0",
},
},
@@ -229,11 +225,7 @@ func GetGPUInfo() GpuInfoList {
return GpuInfoList{cpus[0].GpuInfo}
}
// On windows we bundle the nvidia library one level above the runner dir
depPath := ""
if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" {
depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "cuda")
}
depPath := LibraryDir()
// Load ALL libraries
cHandles = initCudaHandles()
@@ -269,11 +261,23 @@ func GetGPUInfo() GpuInfoList {
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
gpuInfo.computeMajor = int(memInfo.major)
gpuInfo.computeMinor = int(memInfo.minor)
gpuInfo.MinimumMemory = cudaMinimumMemory
gpuInfo.DependencyPath = depPath
variant := cudaVariant(gpuInfo)
if depPath != "" {
gpuInfo.DependencyPath = depPath
// Check for variant specific directory
if variant != "" {
if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil {
gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+variant)
}
}
}
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DriverMajor = driverMajor
gpuInfo.DriverMinor = driverMinor
gpuInfo.Variant = variant
// query the management library as well so we can record any skew between the two
// which represents overhead on the GPU we must set aside on subsequent updates
@@ -306,13 +310,6 @@ func GetGPUInfo() GpuInfoList {
if envconfig.IntelGPU() {
oHandles = initOneAPIHandles()
if oHandles != nil && oHandles.oneapi != nil {
// On windows we bundle the oneapi library one level above the runner dir
depPath = ""
if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" {
depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "oneapi")
}
for d := range oHandles.oneapi.num_drivers {
if oHandles.oneapi == nil {
// shouldn't happen
@@ -467,10 +464,12 @@ func GetGPUInfo() GpuInfoList {
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
var ldPaths []string
var patterns []string
gpuLibPaths := []string{}
slog.Debug("Searching for GPU library", "name", baseLibName)
// Start with our bundled libraries
patterns := []string{filepath.Join(LibraryDir(), baseLibName)}
switch runtime.GOOS {
case "windows":
ldPaths = strings.Split(os.Getenv("PATH"), ";")
@@ -479,13 +478,14 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
default:
return gpuLibPaths
}
// Start with whatever we find in the PATH/LD_LIBRARY_PATH
// Then with whatever we find in the PATH/LD_LIBRARY_PATH
for _, ldPath := range ldPaths {
d, err := filepath.Abs(ldPath)
if err != nil {
continue
}
patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
patterns = append(patterns, filepath.Join(d, baseLibName))
}
patterns = append(patterns, defaultPatterns...)
slog.Debug("gpu library search", "globs", patterns)
@@ -641,3 +641,31 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
return "", ""
}
}
func LibraryDir() string {
// On Windows/linux we bundle the dependencies at the same level as the executable
appExe, err := os.Executable()
if err != nil {
slog.Warn("failed to lookup executable path", "error", err)
}
cwd, err := os.Getwd()
if err != nil {
slog.Warn("failed to lookup working directory", "error", err)
}
// Scan for any of our dependeices, and pick first match
for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), ".."), cwd} {
libDep := filepath.Join("lib", "ollama")
if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
return filepath.Join(root, libDep)
}
// Developer mode, local build
if _, err := os.Stat(filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
return filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)
}
if _, err := os.Stat(filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
return filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)
}
}
slog.Warn("unable to locate gpu dependency libraries")
return ""
}

View File

@@ -25,7 +25,7 @@ func GetGPUInfo() GpuInfoList {
return []GpuInfo{
{
Library: "cpu",
Variant: GetCPUCapability(),
Variant: GetCPUCapability().String(),
memInfo: mem,
},
}
@@ -48,7 +48,7 @@ func GetCPUInfo() GpuInfoList {
return []GpuInfo{
{
Library: "cpu",
Variant: GetCPUCapability(),
Variant: GetCPUCapability().String(),
memInfo: mem,
},
}

View File

@@ -47,7 +47,7 @@ var (
CudartMgmtName = "libcudart.so*"
NvcudaMgmtName = "libcuda.so*"
NvmlMgmtName = "" // not currently wired on linux
OneapiMgmtName = "libze_intel_gpu.so"
OneapiMgmtName = "libze_intel_gpu.so*"
)
func GetCPUMem() (memInfo, error) {

View File

@@ -19,7 +19,7 @@ type GpuInfo struct {
Library string `json:"library,omitempty"`
// Optional variant to select (e.g. versions, cpu feature flags)
Variant CPUCapability `json:"variant"`
Variant string `json:"variant"`
// MinimumMemory represents the minimum memory required to use the GPU
MinimumMemory uint64 `json:"-"`
@@ -53,8 +53,10 @@ type CPUInfo struct {
type CudaGPUInfo struct {
GpuInfo
OSOverhead uint64 // Memory overhead between the driver library and management library
index int //nolint:unused,nolintlint
OSOverhead uint64 // Memory overhead between the driver library and management library
index int //nolint:unused,nolintlint
computeMajor int //nolint:unused,nolintlint
computeMinor int //nolint:unused,nolintlint
}
type CudaGPUInfoList []CudaGPUInfo
@@ -81,8 +83,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
for _, info := range l {
found := false
requested := info.Library
if info.Variant != CPUCapabilityNone {
requested += "_" + info.Variant.String()
if info.Variant != CPUCapabilityNone.String() {
requested += "_" + info.Variant
}
for i, lib := range libs {
if lib == requested {
@@ -105,6 +107,7 @@ func (l GpuInfoList) LogDetails() {
slog.Info("inference compute",
"id", g.ID,
"library", g.Library,
"variant", g.Variant,
"compute", g.Compute,
"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
"name", g.Name,

View File

@@ -1,12 +1,13 @@
set(TARGET ollama_llama_server)
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
set(LLAMA_SERVER_LDFLAGS $ENV{LLAMA_SERVER_LDFLAGS})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
install(TARGETS ${TARGET} RUNTIME)
target_compile_definitions(${TARGET} PRIVATE
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
)
target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT})
target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT} ${LLAMA_SERVER_LDFLAGS})
if (WIN32)
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
endif()

View File

@@ -9,11 +9,14 @@ init_vars() {
ARCH="arm64"
;;
*)
ARCH=$(uname -m | sed -e "s/aarch64/arm64/g")
echo "GOARCH must be set"
echo "this script is meant to be run from within go generate"
exit 1
;;
esac
LLAMACPP_DIR=../llama.cpp
CMAKE_DEFS=""
CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on"
CMAKE_TARGETS="--target ollama_llama_server"
if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}"
@@ -27,6 +30,7 @@ init_vars() {
WHOLE_ARCHIVE="-Wl,-force_load"
NO_WHOLE_ARCHIVE=""
GCC_ARCH="-arch ${ARCH}"
DIST_BASE=../../dist/darwin-${GOARCH}/
;;
"Linux")
LIB_EXT="so"
@@ -35,6 +39,7 @@ init_vars() {
# Cross compiling not supported on linux - Use docker
GCC_ARCH=""
DIST_BASE=../../dist/linux-${GOARCH}/
;;
*)
;;
@@ -42,6 +47,7 @@ init_vars() {
if [ -z "${CMAKE_CUDA_ARCHITECTURES}" ] ; then
CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
fi
GZIP=$(which pigz 2>/dev/null || echo "gzip")
}
git_module_setup() {
@@ -85,26 +91,36 @@ build() {
compress() {
echo "Compressing payloads to reduce overall binary size..."
pids=""
rm -rf ${BUILD_DIR}/bin/*.gz
for f in ${BUILD_DIR}/bin/* ; do
gzip -n --best -f ${f} &
pids+=" $!"
${GZIP} -n --best -f ${f} &
compress_pids+=" $!"
done
# check for lib directory
if [ -d ${BUILD_DIR}/lib ]; then
for f in ${BUILD_DIR}/lib/* ; do
gzip -n --best -f ${f} &
pids+=" $!"
${GZIP} -n --best -f ${f} &
compress_pids+=" $!"
done
fi
echo
for pid in ${pids}; do
}
wait_for_compress() {
for pid in ${compress_pids}; do
wait $pid
done
echo "Finished compression"
}
install() {
echo "Installing libraries to bin dir ${BUILD_DIR}/bin/"
for lib in $(find ${BUILD_DIR} -name \*.${LIB_EXT}); do
rm -f "${BUILD_DIR}/bin/$(basename ${lib})"
cp -af "${lib}" "${BUILD_DIR}/bin/"
done
}
# Keep the local tree clean after we're done with the build
cleanup() {
(cd ${LLAMACPP_DIR}/ && git checkout CMakeLists.txt)

View File

@@ -6,6 +6,7 @@
set -ex
set -o pipefail
compress_pids=""
echo "Starting darwin generate script"
source $(dirname $0)/gen_common.sh
init_vars
@@ -98,4 +99,5 @@ case "${GOARCH}" in
esac
cleanup
wait_for_compress
echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"

View File

@@ -13,6 +13,7 @@
set -ex
set -o pipefail
compress_pids=""
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
amdGPUs() {
@@ -51,7 +52,7 @@ if [ -z "${CUDACXX}" ]; then
export CUDACXX=$(command -v nvcc)
fi
fi
COMMON_CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
COMMON_CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
source $(dirname $0)/gen_common.sh
init_vars
git_module_setup
@@ -77,10 +78,11 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
init_vars
echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
BUILD_DIR="../build/linux/${ARCH}/cpu"
echo "Building custom CPU"
build
install
compress
else
# Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
@@ -93,7 +95,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
# -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
# -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
#
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
@@ -103,6 +105,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
BUILD_DIR="../build/linux/${ARCH}/cpu"
echo "Building LCD CPU"
build
install
compress
fi
@@ -120,6 +123,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
BUILD_DIR="../build/linux/${ARCH}/cpu_avx"
echo "Building AVX CPU"
build
install
compress
fi
@@ -133,6 +137,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
BUILD_DIR="../build/linux/${ARCH}/cpu_avx2"
echo "Building AVX2 CPU"
build
install
compress
fi
fi
@@ -160,7 +165,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
echo "CUDA libraries detected - building dynamic CUDA library"
init_vars
CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
if [ -n "${CUDA_MAJOR}" ]; then
if [ -n "${CUDA_MAJOR}" -a -z "${CUDA_VARIANT}" ]; then
CUDA_VARIANT=_v${CUDA_MAJOR}
fi
if [ "${ARCH}" == "arm64" ]; then
@@ -178,29 +183,19 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}"
echo "Building custom CUDA GPU"
else
CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_FLAGS=-t8 -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
fi
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS}"
export CUDAFLAGS="-t8"
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off"
BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
EXTRA_LIBS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/lib/ollama}"
build
# Carry the CUDA libs as payloads to help reduce dependency burden on users
#
# TODO - in the future we may shift to packaging these separately and conditionally
# downloading them in the install script.
DEPS="$(ldd ${BUILD_DIR}/bin/ollama_llama_server )"
for lib in libcudart.so libcublas.so libcublasLt.so ; do
DEP=$(echo "${DEPS}" | grep ${lib} | cut -f1 -d' ' | xargs || true)
if [ -n "${DEP}" -a -e "${CUDA_LIB_DIR}/${DEP}" ]; then
cp "${CUDA_LIB_DIR}/${DEP}" "${BUILD_DIR}/bin/"
elif [ -e "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" ]; then
cp "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" "${BUILD_DIR}/bin/"
elif [ -e "${CUDART_LIB_DIR}/${lib}" ]; then
cp -d ${CUDART_LIB_DIR}/${lib}* "${BUILD_DIR}/bin/"
else
cp -d "${CUDA_LIB_DIR}/${lib}*" "${BUILD_DIR}/bin/"
fi
install
echo "Installing CUDA dependencies in ${CUDA_DIST_DIR}"
mkdir -p "${CUDA_DIST_DIR}"
for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do
cp -a "${lib}" "${CUDA_DIST_DIR}"
done
compress
@@ -218,21 +213,24 @@ if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
CC=icx
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
BUILD_DIR="../build/linux/${ARCH}/oneapi"
EXTRA_LIBS="-fsycl -Wl,-rpath,${ONEAPI_ROOT}/compiler/latest/lib,-rpath,${ONEAPI_ROOT}/mkl/latest/lib,-rpath,${ONEAPI_ROOT}/tbb/latest/lib,-rpath,${ONEAPI_ROOT}/compiler/latest/opt/oclfpga/linux64/lib -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
ONEAPI_DIST_DIR="${DIST_BASE}/lib/ollama"
export LLAMA_SERVER_LDFLAGS="-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it
build
# copy oneAPI dependencies
mkdir -p "${ONEAPI_DIST_DIR}"
for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do
cp "${dep}" "${BUILD_DIR}/bin/"
cp -a "${dep}" "${ONEAPI_DIST_DIR}"
done
cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${ONEAPI_DIST_DIR}"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${ONEAPI_DIST_DIR}"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${ONEAPI_DIST_DIR}"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${ONEAPI_DIST_DIR}"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${ONEAPI_DIST_DIR}"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${ONEAPI_DIST_DIR}"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${ONEAPI_DIST_DIR}"
install
compress
fi
@@ -262,23 +260,21 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
echo "Building custom ROCM GPU"
fi
BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}"
EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
ROCM_DIST_DIR="${DIST_BASE}/lib/ollama"
# TODO figure out how to disable runpath (rpath)
# export CMAKE_HIP_FLAGS="-fno-rtlib-add-rpath" # doesn't work
export LLAMA_SERVER_LDFLAGS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
build
# Record the ROCM dependencies
rm -f "${BUILD_DIR}/bin/deps.txt"
touch "${BUILD_DIR}/bin/deps.txt"
for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo ); do
echo "${dep}" >> "${BUILD_DIR}/bin/deps.txt"
# copy the ROCM dependencies
mkdir -p "${ROCM_DIST_DIR}"
for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${ARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo ); do
cp -a "${dep}"* "${ROCM_DIST_DIR}"
done
# bomb out if for some reason we didn't get a few deps
if [ $(cat "${BUILD_DIR}/bin/deps.txt" | wc -l ) -lt 8 ] ; then
cat "${BUILD_DIR}/bin/deps.txt"
echo "ERROR: deps file short"
exit 1
fi
install
compress
fi
cleanup
wait_for_compress
echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"

View File

@@ -35,7 +35,7 @@ function init_vars {
)
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
$script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower()
$script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
$script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\runners"
md "$script:DIST_BASE" -ea 0 > $null
if ($env:CGO_CFLAGS -contains "-g") {
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
@@ -117,7 +117,7 @@ function build {
if ($cmakeDefs -contains "-G") {
$extra=@("-j8")
} else {
$extra= @("--", "/p:CL_MPcount=8")
$extra= @("--", "/maxCpuCount:8")
}
write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra"
& cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra
@@ -261,7 +261,7 @@ function build_cuda() {
if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) {
# Then build cuda as a dynamically loaded library
$nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
$script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
$script:CUDA_VERSION=((get-item ($nvcc | split-path | split-path)).Basename -Split "\.")[0]
if ($null -ne $script:CUDA_VERSION) {
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION
}
@@ -273,9 +273,9 @@ function build_cuda() {
"-DGGML_CUDA=ON",
"-DGGML_AVX=on",
"-DGGML_AVX2=off",
"-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR",
"-DCMAKE_CUDA_FLAGS=-t8",
"-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}"
"-DCMAKE_CUDA_FLAGS=-t6",
"-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}",
"-DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=$env:CUDA_PATH"
)
if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
@@ -286,12 +286,11 @@ function build_cuda() {
sign
install
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" -ea 0 > $null
write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" -ea 0 > $null
write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
} else {
write-host "Skipping CUDA generation step"
}
@@ -325,18 +324,17 @@ function build_oneapi() {
sign
install
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" -ea 0 > $null
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" -ea 0 > $null
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
} else {
Write-Host "Skipping oneAPI generation step"
}
@@ -386,12 +384,11 @@ function build_rocm() {
sign
install
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null
cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\rocblas\library\" -ea 0 > $null
cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\"
cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\rocblas\library\"
} else {
write-host "Skipping ROCm generation step"
}

View File

@@ -82,8 +82,8 @@ func serversForGpu(info gpu.GpuInfo) []string {
// glob workDir for files that start with ollama_
availableServers := getAvailableServers()
requested := info.Library
if info.Variant != gpu.CPUCapabilityNone {
requested += "_" + info.Variant.String()
if info.Variant != gpu.CPUCapabilityNone.String() {
requested += "_" + info.Variant
}
servers := []string{}

View File

@@ -306,20 +306,18 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
if runtime.GOOS == "windows" {
pathEnv = "PATH"
}
// prepend the server directory to LD_LIBRARY_PATH/PATH and the parent dir for common dependencies
libraryPaths := []string{dir, filepath.Dir(dir)}
// Start with the server directory for the LD_LIBRARY_PATH/PATH
libraryPaths := []string{dir}
if libraryPath, ok := os.LookupEnv(pathEnv); ok {
// Append our runner directory to the path
// This will favor system libraries over our bundled library dependencies
// favor our bundled library dependencies over system libraries
libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
}
// Note: we always put the dependency path first
// since this was the exact version we verified for AMD GPUs
// and we favor what the user had in their path
// since this was the exact version we compiled/linked against
if gpus[0].DependencyPath != "" {
// TODO refine for multi-gpu support
// assume gpus from the same library have the same dependency path
libraryPaths = append([]string{gpus[0].DependencyPath}, libraryPaths...)
}

View File

@@ -62,7 +62,7 @@ func (b *Buffer) MoveLeft() {
rLength := runewidth.RuneWidth(r)
if b.DisplayPos%b.LineWidth == 0 {
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width))
fmt.Print(CursorUp + CursorBOL + CursorRightN(b.Width))
if rLength == 2 {
fmt.Print(CursorLeft)
}
@@ -74,7 +74,7 @@ func (b *Buffer) MoveLeft() {
fmt.Print(CursorLeft)
}
} else {
fmt.Print(cursorLeftN(rLength))
fmt.Print(CursorLeftN(rLength))
}
b.Pos -= 1
@@ -115,15 +115,15 @@ func (b *Buffer) MoveRight() {
b.DisplayPos += rLength
if b.DisplayPos%b.LineWidth == 0 {
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt())))
fmt.Print(CursorDown + CursorBOL + CursorRightN(len(b.Prompt.prompt())))
} else if (b.DisplayPos-rLength)%b.LineWidth == b.LineWidth-1 && hasSpace {
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt())+rLength))
fmt.Print(CursorDown + CursorBOL + CursorRightN(len(b.Prompt.prompt())+rLength))
b.DisplayPos += 1
} else if b.LineHasSpace.Size() > 0 && b.DisplayPos%b.LineWidth == b.LineWidth-1 && hasSpace {
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt())))
fmt.Print(CursorDown + CursorBOL + CursorRightN(len(b.Prompt.prompt())))
b.DisplayPos += 1
} else {
fmt.Print(cursorRightN(rLength))
fmt.Print(CursorRightN(rLength))
}
}
}
@@ -154,7 +154,7 @@ func (b *Buffer) MoveToStart() {
fmt.Print(CursorUp)
}
}
fmt.Printf(CursorBOL + cursorRightN(len(b.Prompt.prompt())))
fmt.Print(CursorBOL + CursorRightN(len(b.Prompt.prompt())))
b.Pos = 0
b.DisplayPos = 0
}
@@ -169,9 +169,9 @@ func (b *Buffer) MoveToEnd() {
fmt.Print(CursorDown)
}
remainder := b.DisplaySize() % b.LineWidth
fmt.Printf(CursorBOL + cursorRightN(len(b.Prompt.prompt())+remainder))
fmt.Print(CursorBOL + CursorRightN(len(b.Prompt.prompt())+remainder))
} else {
fmt.Print(cursorRightN(b.DisplaySize() - b.DisplayPos))
fmt.Print(CursorRightN(b.DisplaySize() - b.DisplayPos))
}
b.Pos = b.Buf.Size()
@@ -286,8 +286,7 @@ func (b *Buffer) drawRemaining() {
remLength := runewidth.StringWidth(remainingText)
if len(currLine) > 0 {
fmt.Printf(ClearToEOL + currLine)
fmt.Print(cursorLeftN(currLineSpace))
fmt.Print(ClearToEOL + currLine + CursorLeftN(currLineSpace))
} else {
fmt.Print(ClearToEOL)
}
@@ -301,9 +300,9 @@ func (b *Buffer) drawRemaining() {
}
if (b.DisplayPos+currLineSpace)%b.LineWidth == 0 && currLine == remainingText {
fmt.Print(cursorRightN(currLineSpace))
fmt.Print(CursorRightN(currLineSpace))
fmt.Printf("\n%s", b.Prompt.AltPrompt)
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width-currLineSpace))
fmt.Print(CursorUp + CursorBOL + CursorRightN(b.Width-currLineSpace))
}
// render the other lines
@@ -333,9 +332,7 @@ func (b *Buffer) drawRemaining() {
lineLength += runewidth.RuneWidth(c)
fmt.Printf("%c", c)
}
fmt.Print(ClearToEOL)
fmt.Print(cursorUpN(totalLines))
fmt.Printf(CursorBOL + cursorRightN(b.Width-currLineSpace))
fmt.Print(ClearToEOL + CursorUpN(totalLines) + CursorBOL + CursorRightN(b.Width-currLineSpace))
hasSpace := b.GetLineSpacing(b.DisplayPos / b.LineWidth)
@@ -357,8 +354,7 @@ func (b *Buffer) Remove() {
if b.DisplayPos%b.LineWidth == 0 {
// if the user backspaces over the word boundary, do this magic to clear the line
// and move to the end of the previous line
fmt.Printf(CursorBOL + ClearToEOL)
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width))
fmt.Print(CursorBOL + ClearToEOL + CursorUp + CursorBOL + CursorRightN(b.Width))
if b.DisplaySize()%b.LineWidth < (b.DisplaySize()-rLength)%b.LineWidth {
b.LineHasSpace.Remove(b.DisplayPos/b.LineWidth - 1)
@@ -370,24 +366,23 @@ func (b *Buffer) Remove() {
}
if rLength == 2 {
fmt.Print(CursorLeft + " " + cursorLeftN(2))
fmt.Print(CursorLeft + " " + CursorLeftN(2))
} else {
fmt.Print(" " + CursorLeft)
}
} else if (b.DisplayPos-rLength)%b.LineWidth == 0 && hasSpace {
fmt.Printf(CursorBOL + ClearToEOL)
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width))
fmt.Print(CursorBOL + ClearToEOL + CursorUp + CursorBOL + CursorRightN(b.Width))
if b.Pos == b.Buf.Size() {
b.LineHasSpace.Remove(b.DisplayPos/b.LineWidth - 1)
}
b.DisplayPos -= 1
} else {
fmt.Print(cursorLeftN(rLength))
fmt.Print(CursorLeftN(rLength))
for range rLength {
fmt.Print(" ")
}
fmt.Print(cursorLeftN(rLength))
fmt.Print(CursorLeftN(rLength))
}
var eraseExtraLine bool
@@ -405,9 +400,9 @@ func (b *Buffer) Remove() {
// are trailing characters which go over the line width boundary
if eraseExtraLine {
remainingLines := (b.DisplaySize() - b.DisplayPos) / b.LineWidth
fmt.Printf(cursorDownN(remainingLines+1) + CursorBOL + ClearToEOL)
fmt.Print(CursorDownN(remainingLines+1) + CursorBOL + ClearToEOL)
place := b.DisplayPos % b.LineWidth
fmt.Printf(cursorUpN(remainingLines+1) + cursorRightN(place+len(b.Prompt.prompt())))
fmt.Print(CursorUpN(remainingLines+1) + CursorRightN(place+len(b.Prompt.prompt())))
}
}
}
@@ -422,9 +417,9 @@ func (b *Buffer) Delete() {
if b.DisplaySize()%b.LineWidth == 0 {
if b.DisplayPos != b.DisplaySize() {
remainingLines := (b.DisplaySize() - b.DisplayPos) / b.LineWidth
fmt.Printf(cursorDownN(remainingLines) + CursorBOL + ClearToEOL)
fmt.Print(CursorDownN(remainingLines) + CursorBOL + ClearToEOL)
place := b.DisplayPos % b.LineWidth
fmt.Printf(cursorUpN(remainingLines) + cursorRightN(place+len(b.Prompt.prompt())))
fmt.Print(CursorUpN(remainingLines) + CursorRightN(place+len(b.Prompt.prompt())))
}
}
}
@@ -471,17 +466,17 @@ func (b *Buffer) DeleteWord() {
}
func (b *Buffer) ClearScreen() {
fmt.Printf(ClearScreen + CursorReset + b.Prompt.prompt())
fmt.Print(ClearScreen + CursorReset + b.Prompt.prompt())
if b.IsEmpty() {
ph := b.Prompt.placeholder()
fmt.Printf(ColorGrey + ph + cursorLeftN(len(ph)) + ColorDefault)
fmt.Print(ColorGrey + ph + CursorLeftN(len(ph)) + ColorDefault)
} else {
currPos := b.DisplayPos
currIndex := b.Pos
b.Pos = 0
b.DisplayPos = 0
b.drawRemaining()
fmt.Printf(CursorReset + cursorRightN(len(b.Prompt.prompt())))
fmt.Print(CursorReset + CursorRightN(len(b.Prompt.prompt())))
if currPos > 0 {
targetLine := currPos / b.LineWidth
if targetLine > 0 {
@@ -491,10 +486,10 @@ func (b *Buffer) ClearScreen() {
}
remainder := currPos % b.LineWidth
if remainder > 0 {
fmt.Print(cursorRightN(remainder))
fmt.Print(CursorRightN(remainder))
}
if currPos%b.LineWidth == 0 {
fmt.Printf(CursorBOL + b.Prompt.AltPrompt)
fmt.Print(CursorBOL + b.Prompt.AltPrompt)
}
}
b.Pos = currIndex
@@ -513,13 +508,13 @@ func (b *Buffer) Replace(r []rune) {
b.Buf.Clear()
fmt.Printf(CursorBOL + ClearToEOL)
fmt.Print(CursorBOL + ClearToEOL)
for range lineNums {
fmt.Print(CursorUp + CursorBOL + ClearToEOL)
}
fmt.Printf(CursorBOL + b.Prompt.prompt())
fmt.Print(CursorBOL + b.Prompt.prompt())
for _, c := range r {
b.Add(c)
@@ -545,19 +540,3 @@ func (b *Buffer) StringNM(n, m int) string {
}
return s
}
func cursorLeftN(n int) string {
return fmt.Sprintf(CursorLeftN, n)
}
func cursorRightN(n int) string {
return fmt.Sprintf(CursorRightN, n)
}
func cursorUpN(n int) string {
return fmt.Sprintf(CursorUpN, n)
}
func cursorDownN(n int) string {
return fmt.Sprintf(CursorDownN, n)
}

View File

@@ -98,7 +98,7 @@ func (i *Instance) Readline() (string, error) {
showPlaceholder := !i.Pasting || i.Prompt.UseAlt
if buf.IsEmpty() && showPlaceholder {
ph := i.Prompt.placeholder()
fmt.Printf(ColorGrey + ph + fmt.Sprintf(CursorLeftN, len(ph)) + ColorDefault)
fmt.Print(ColorGrey + ph + CursorLeftN(len(ph)) + ColorDefault)
}
r, err := i.Terminal.Read()

View File

@@ -1,5 +1,7 @@
package readline
import "strconv"
const (
CharNull = 0
CharLineStart = 1
@@ -41,34 +43,49 @@ const (
)
const (
CursorUp = "\033[1A"
CursorDown = "\033[1B"
CursorRight = "\033[1C"
CursorLeft = "\033[1D"
Esc = "\x1b"
CursorSave = "\033[s"
CursorRestore = "\033[u"
CursorSave = Esc + "[s"
CursorRestore = Esc + "[u"
CursorUpN = "\033[%dA"
CursorDownN = "\033[%dB"
CursorRightN = "\033[%dC"
CursorLeftN = "\033[%dD"
CursorEOL = Esc + "[E"
CursorBOL = Esc + "[1G"
CursorHide = Esc + "[?25l"
CursorShow = Esc + "[?25h"
CursorEOL = "\033[E"
CursorBOL = "\033[1G"
CursorHide = "\033[?25l"
CursorShow = "\033[?25h"
ClearToEOL = Esc + "[K"
ClearLine = Esc + "[2K"
ClearScreen = Esc + "[2J"
CursorReset = Esc + "[0;0f"
ClearToEOL = "\033[K"
ClearLine = "\033[2K"
ClearScreen = "\033[2J"
CursorReset = "\033[0;0f"
ColorGrey = Esc + "[38;5;245m"
ColorDefault = Esc + "[0m"
ColorGrey = "\033[38;5;245m"
ColorDefault = "\033[0m"
StartBracketedPaste = Esc + "[?2004h"
EndBracketedPaste = Esc + "[?2004l"
)
StartBracketedPaste = "\033[?2004h"
EndBracketedPaste = "\033[?2004l"
func CursorUpN(n int) string {
return Esc + "[" + strconv.Itoa(n) + "A"
}
func CursorDownN(n int) string {
return Esc + "[" + strconv.Itoa(n) + "B"
}
func CursorRightN(n int) string {
return Esc + "[" + strconv.Itoa(n) + "C"
}
func CursorLeftN(n int) string {
return Esc + "[" + strconv.Itoa(n) + "D"
}
var (
CursorUp = CursorUpN(1)
CursorDown = CursorDownN(1)
CursorRight = CursorRightN(1)
CursorLeft = CursorLeftN(1)
)
const (

View File

@@ -4,6 +4,7 @@ set -eu
export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
GZIP=$(which pigz 2>/dev/null || echo "gzip")
BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""}
@@ -21,11 +22,10 @@ for TARGETARCH in ${BUILD_ARCH}; do
-t builder:$TARGETARCH \
.
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/ollama ./dist/ollama-linux-$TARGETARCH
if [ "$TARGETARCH" = "amd64" ]; then
docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/deps/ ./dist/
fi
rm -rf ./dist/linux-$TARGETARCH
docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH ./dist
docker rm builder-$TARGETARCH
echo "Compressing final linux bundle..."
rm -f ./dist/ollama-linux-$TARGETARCH.tgz
(cd dist/linux-$TARGETARCH && tar cf - . | ${GZIP} --best > ../ollama-linux-$TARGETARCH.tgz )
done

View File

@@ -7,6 +7,7 @@
$ErrorActionPreference = "Stop"
function checkEnv() {
$script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower()
$script:TARGET_ARCH=$Env:PROCESSOR_ARCHITECTURE.ToLower()
Write-host "Building for ${script:TARGET_ARCH}"
write-host "Locating required tools and paths"
@@ -15,26 +16,23 @@ function checkEnv() {
$MSVC_INSTALL=(Get-CimInstance MSFT_VSInstance -Namespace root/cimv2/vs)[0].InstallLocation
$env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0]
}
# Try to find the CUDA dir
if ($null -eq $env:NVIDIA_DIR) {
# Locate CUDA versions
# Note: this assumes every version found will be built
$cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue')
if ($cudaList.length -eq 0) {
$d=(get-command -ea 'silentlycontinue' nvcc).path
if ($d -ne $null) {
$script:NVIDIA_DIR=($d| split-path -parent)
} else {
$cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue')
if ($cudaList.length > 0) {
$script:NVIDIA_DIR=$cudaList[0]
}
if ($null -ne $d) {
$script:CUDA_DIRS=@($d| split-path -parent)
}
} else {
$script:NVIDIA_DIR=$env:NVIDIA_DIR
$script:CUDA_DIRS=$cudaList
}
$script:INNO_SETUP_DIR=(get-item "C:\Program Files*\Inno Setup*\")[0]
$script:DEPS_DIR="${script:SRC_DIR}\dist\windows-${script:TARGET_ARCH}"
$env:CGO_ENABLED="1"
echo "Checking version"
Write-Output "Checking version"
if (!$env:VERSION) {
$data=(git describe --tags --first-parent --abbrev=7 --long --dirty --always)
$pattern="v(.+)"
@@ -71,7 +69,48 @@ function checkEnv() {
function buildOllama() {
write-host "Building ollama CLI"
if ($null -eq ${env:OLLAMA_SKIP_GENERATE}) {
& go generate ./...
Remove-Item -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}"
# TODO - consider trying to parallelize this with Start-ThreadJob, but env vars can't be used to toggle
# which targets to build
# Start by skipping CUDA to build everything else
pwsh -Command { $env:OLLAMA_SKIP_CUDA_GENERATE="1"; & go generate ./... }
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
# Then skip everyhting else and build all the CUDA variants
foreach ($env:CUDA_LIB_DIR in $script:CUDA_DIRS) {
write-host "Building CUDA ${env:CUDA_LIB_DIR}"
if ($env:CUDA_LIB_DIR.Contains("v12")) {
pwsh -Command {
$env:OLLAMA_SKIP_CUDA_GENERATE=""
$env:OLLAMA_SKIP_STATIC_GENERATE="1"
$env:OLLAMA_SKIP_CPU_GENERATE="1"
$env:OLLAMA_SKIP_ONEAPI_GENERATE="1"
$env:OLLAMA_SKIP_ROCM_GENERATE="1"
$env:CMAKE_CUDA_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
$env:OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on"
$env:CUDA_PATH=split-path -path $env:CUDA_LIB_DIR -parent
$env:PATH="$envs:CUDA_LIB_DIR;$env:PATH"
& go generate ./...
}
} else {
pwsh -Command {
$env:OLLAMA_SKIP_CUDA_GENERATE=""
$env:OLLAMA_SKIP_STATIC_GENERATE="1"
$env:OLLAMA_SKIP_CPU_GENERATE="1"
$env:OLLAMA_SKIP_ONEAPI_GENERATE="1"
$env:OLLAMA_SKIP_ROCM_GENERATE="1"
$env:CMAKE_CUDA_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
$env:OLLAMA_CUSTOM_CUDA_DEFS=""
$env:CUDA_PATH=split-path -path $env:CUDA_LIB_DIR -parent
$env:PATH="$envs:CUDA_LIB_DIR;$env:PATH"
& go generate ./...
}
}
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
} else {
write-host "Skipping generate step with OLLAMA_SKIP_GENERATE set"
@@ -83,8 +122,8 @@ function buildOllama() {
/csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\ -Force
cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\
New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\bin\ -Force
cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\bin\
}
function buildApp() {
@@ -103,22 +142,22 @@ function buildApp() {
function gatherDependencies() {
write-host "Gathering runtime dependencies"
cd "${script:SRC_DIR}"
md "${script:DEPS_DIR}\ollama_runners" -ea 0 > $null
md "${script:DEPS_DIR}\lib\ollama" -ea 0 > $null
# TODO - this varies based on host build system and MSVC version - drive from dumpbin output
# currently works for Win11 + MSVC 2019 + Cuda V11
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\msvcp140*.dll" "${script:DEPS_DIR}\ollama_runners\"
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\ollama_runners\"
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\ollama_runners\"
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\msvcp140*.dll" "${script:DEPS_DIR}\lib\ollama\"
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\lib\ollama\"
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\lib\ollama\"
foreach ($part in $("runtime", "stdio", "filesystem", "math", "convert", "heap", "string", "time", "locale", "environment")) {
cp "$env:VCToolsRedistDir\..\..\..\Tools\Llvm\x64\bin\api-ms-win-crt-${part}*.dll" "${script:DEPS_DIR}\ollama_runners\"
cp "$env:VCToolsRedistDir\..\..\..\Tools\Llvm\x64\bin\api-ms-win-crt-${part}*.dll" "${script:DEPS_DIR}\lib\ollama\"
}
cp "${script:SRC_DIR}\app\ollama_welcome.ps1" "${script:SRC_DIR}\dist\"
if ("${env:KEY_CONTAINER}") {
write-host "about to sign"
foreach ($file in (get-childitem "${script:DEPS_DIR}\cuda\cu*.dll") + @("${script:SRC_DIR}\dist\ollama_welcome.ps1")){
foreach ($file in (get-childitem "${script:DEPS_DIR}\lib\ollama\cu*.dll") + @("${script:SRC_DIR}\dist\ollama_welcome.ps1")){
write-host "signing $file"
& "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" `
/csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} $file

View File

@@ -63,16 +63,36 @@ if [ -n "$NEEDS" ]; then
exit 1
fi
status "Downloading ollama..."
curl --fail --show-error --location --progress-bar -o $TEMP_DIR/ollama "https://ollama.com/download/ollama-linux-${ARCH}${VER_PARAM}"
for BINDIR in /usr/local/bin /usr/bin /bin; do
echo $PATH | grep -q $BINDIR && break || continue
done
OLLAMA_INSTALL_DIR=$(dirname ${BINDIR})
status "Installing ollama to $BINDIR..."
status "Installing ollama to $OLLAMA_INSTALL_DIR"
$SUDO install -o0 -g0 -m755 -d $BINDIR
$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $BINDIR/ollama
$SUDO install -o0 -g0 -m755 -d "$OLLAMA_INSTALL_DIR"
if curl -I --silent --fail --location "https://ollama.com/download/ollama-linux-${ARCH}.tgz${VER_PARAM}" >/dev/null ; then
status "Downloading Linux ${ARCH} bundle"
curl --fail --show-error --location --progress-bar \
"https://ollama.com/download/ollama-linux-${ARCH}.tgz${VER_PARAM}" | \
$SUDO tar -xzf - -C "$OLLAMA_INSTALL_DIR"
BUNDLE=1
if [ "$OLLAMA_INSTALL_DIR/bin/ollama" != "$BINDIR/ollama" ] ; then
status "Making ollama accessible in the PATH in $BINDIR"
$SUDO ln -sf "$OLLAMA_INSTALL_DIR/ollama" "$BINDIR/ollama"
fi
else
status "Downloading Linux ${ARCH} CLI"
curl --fail --show-error --location --progress-bar -o "$TEMP_DIR/ollama"\
"https://ollama.com/download/ollama-linux-${ARCH}${VER_PARAM}"
$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $OLLAMA_INSTALL_DIR/ollama
BUNDLE=0
if [ "$OLLAMA_INSTALL_DIR/ollama" != "$BINDIR/ollama" ] ; then
status "Making ollama accessible in the PATH in $BINDIR"
$SUDO ln -sf "$OLLAMA_INSTALL_DIR/ollama" "$BINDIR/ollama"
fi
fi
install_success() {
status 'The Ollama API is now available at 127.0.0.1:11434.'
@@ -178,6 +198,11 @@ if ! check_gpu lspci nvidia && ! check_gpu lshw nvidia && ! check_gpu lspci amdg
fi
if check_gpu lspci amdgpu || check_gpu lshw amdgpu; then
if [ $BUNDLE -ne 0 ]; then
install_success
status "AMD GPU ready."
exit 0
fi
# Look for pre-existing ROCm v6 before downloading the dependencies
for search in "${HIP_PATH:-''}" "${ROCM_PATH:-''}" "/opt/rocm" "/usr/lib64"; do
if [ -n "${search}" ] && [ -e "${search}/libhipblas.so.2" -o -e "${search}/lib/libhipblas.so.2" ]; then

View File

@@ -3,6 +3,7 @@
# Script for common Dockerfile dependency installation in redhat linux based images
set -ex
set -o pipefail
MACHINE=$(uname -m)
if grep -i "centos" /etc/system-release >/dev/null; then
@@ -29,7 +30,7 @@ if grep -i "centos" /etc/system-release >/dev/null; then
dnf install -y rh-git227-git
ln -s /opt/rh/rh-git227/root/usr/bin/git /usr/local/bin/git
fi
dnf install -y devtoolset-10-gcc devtoolset-10-gcc-c++
dnf install -y devtoolset-10-gcc devtoolset-10-gcc-c++ pigz
elif grep -i "rocky" /etc/system-release >/dev/null; then
# Temporary workaround until rocky 8 AppStream ships GCC 10.4 (10.3 is incompatible with NVCC)
cat << EOF > /etc/yum.repos.d/Rocky-Vault.repo
@@ -43,12 +44,21 @@ gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-rockyofficial
EOF
dnf install -y git \
gcc-toolset-10-gcc-10.2.1-8.2.el8 \
gcc-toolset-10-gcc-c++-10.2.1-8.2.el8
gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 \
pigz
else
echo "ERROR Unexpected distro"
exit 1
fi
if [ "${MACHINE}" = "x86_64" ] ; then
curl -s -L https://github.com/ccache/ccache/releases/download/v4.10.2/ccache-4.10.2-linux-x86_64.tar.xz | tar -Jx -C /tmp --strip-components 1 && \
mv /tmp/ccache /usr/local/bin/
else
yum -y install epel-release
yum install -y ccache
fi
if [ -n "${CMAKE_VERSION}" ]; then
curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1
fi

View File

@@ -94,7 +94,7 @@ func (p *blobDownloadPart) UnmarshalJSON(b []byte) error {
}
const (
numDownloadParts = 64
numDownloadParts = 16
minDownloadPartSize int64 = 100 * format.MegaByte
maxDownloadPartSize int64 = 1000 * format.MegaByte
)

View File

@@ -215,25 +215,20 @@ func GetManifest(mp ModelPath) (*Manifest, string, error) {
return nil, "", err
}
if _, err = os.Stat(fp); err != nil {
return nil, "", err
}
var manifest *Manifest
bts, err := os.ReadFile(fp)
f, err := os.Open(fp)
if err != nil {
return nil, "", fmt.Errorf("couldn't open file '%s'", fp)
return nil, "", err
}
defer f.Close()
shaSum := sha256.Sum256(bts)
shaStr := hex.EncodeToString(shaSum[:])
sha256sum := sha256.New()
if err := json.Unmarshal(bts, &manifest); err != nil {
var manifest Manifest
if err := json.NewDecoder(io.TeeReader(f, sha256sum)).Decode(&manifest); err != nil {
return nil, "", err
}
return manifest, shaStr, nil
return &manifest, hex.EncodeToString(sha256sum.Sum(nil)), nil
}
func GetModel(name string) (*Model, error) {
@@ -692,43 +687,18 @@ func CopyModel(src, dst model.Name) error {
return err
}
func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{}) error {
fp, err := GetManifestPath()
func deleteUnusedLayers(deleteMap map[string]struct{}) error {
manifests, err := Manifests()
if err != nil {
return err
}
walkFunc := func(path string, info os.FileInfo, _ error) error {
if info.IsDir() {
return nil
}
dir, file := filepath.Split(path)
dir = strings.Trim(strings.TrimPrefix(dir, fp), string(os.PathSeparator))
tag := strings.Join([]string{dir, file}, ":")
fmp := ParseModelPath(tag)
// skip the manifest we're trying to delete
if skipModelPath != nil && skipModelPath.GetFullTagname() == fmp.GetFullTagname() {
return nil
}
// save (i.e. delete from the deleteMap) any files used in other manifests
manifest, _, err := GetManifest(fmp)
if err != nil {
return err
}
for _, manifest := range manifests {
for _, layer := range manifest.Layers {
delete(deleteMap, layer.Digest)
}
delete(deleteMap, manifest.Config.Digest)
return nil
}
if err := filepath.Walk(fp, walkFunc); err != nil {
return err
}
// only delete the files which are still in the deleteMap
@@ -781,8 +751,7 @@ func PruneLayers() error {
slog.Info(fmt.Sprintf("total blobs: %d", len(deleteMap)))
err = deleteUnusedLayers(nil, deleteMap)
if err != nil {
if err := deleteUnusedLayers(deleteMap); err != nil {
slog.Error(fmt.Sprintf("couldn't remove unused layers: %v", err))
return nil
}
@@ -877,26 +846,19 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
mp := ParseModelPath(name)
var manifest *Manifest
var err error
var noprune string
// build deleteMap to prune unused layers
deleteMap := make(map[string]struct{})
if !envconfig.NoPrune() {
manifest, _, err = GetManifest(mp)
if err != nil && !errors.Is(err, os.ErrNotExist) {
return err
manifest, _, err := GetManifest(mp)
if errors.Is(err, os.ErrNotExist) {
// noop
} else if err != nil && !errors.Is(err, os.ErrNotExist) {
return err
} else {
for _, l := range manifest.Layers {
deleteMap[l.Digest] = struct{}{}
}
if manifest != nil {
for _, l := range manifest.Layers {
deleteMap[l.Digest] = struct{}{}
}
if manifest.Config.Digest != "" {
deleteMap[manifest.Config.Digest] = struct{}{}
}
if manifest.Config.Digest != "" {
deleteMap[manifest.Config.Digest] = struct{}{}
}
}
@@ -975,11 +937,9 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
return err
}
if noprune == "" {
fn(api.ProgressResponse{Status: "removing any unused layers"})
err = deleteUnusedLayers(nil, deleteMap)
if err != nil {
slog.Error(fmt.Sprintf("couldn't remove unused layers: %v", err))
if !envconfig.NoPrune() && len(deleteMap) > 0 {
fn(api.ProgressResponse{Status: "removing unused layers"})
if err := deleteUnusedLayers(deleteMap); err != nil {
fn(api.ProgressResponse{Status: fmt.Sprintf("couldn't remove unused layers: %v", err)})
}
}
@@ -1000,12 +960,12 @@ func pullModelManifest(ctx context.Context, mp ModelPath, regOpts *registryOptio
}
defer resp.Body.Close()
var m *Manifest
var m Manifest
if err := json.NewDecoder(resp.Body).Decode(&m); err != nil {
return nil, err
}
return m, err
return &m, err
}
// GetSHA256Digest returns the SHA256 hash of a given buffer and returns it, and the size of buffer

View File

@@ -5,6 +5,7 @@ import (
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
"os"
@@ -150,14 +151,16 @@ func Manifests() (map[model.Name]*Manifest, error) {
n := model.ParseNameFromFilepath(rel)
if !n.IsValid() {
slog.Warn("bad manifest name", "path", rel, "error", err)
slog.Warn("bad manifest name", "path", rel)
continue
}
m, err := ParseNamedManifest(n)
if err != nil {
if syntax := &(json.SyntaxError{}); errors.As(err, &syntax) {
slog.Warn("bad manifest", "name", n, "error", err)
continue
} else if err != nil {
return nil, fmt.Errorf("%s: %w", n, err)
}
ms[n] = m

View File

@@ -418,7 +418,7 @@ func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList,
// some older models are not compatible with newer versions of llama.cpp
// show a generalized compatibility error until there is a better way to
// check for model compatibility
if errors.Is(llm.ErrUnsupportedFormat, err) || strings.Contains(err.Error(), "failed to load model") {
if errors.Is(err, llm.ErrUnsupportedFormat) || strings.Contains(err.Error(), "failed to load model") {
err = fmt.Errorf("%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`", err, req.model.ShortName)
}
slog.Info("NewLlamaServer failed", "model", req.model.ModelPath, "error", err)
@@ -734,7 +734,10 @@ func pickBestFullFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoL
// If multiple Libraries are detected, pick the Library which loads the most layers for the model
func pickBestPartialFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numParallel *int) gpu.GpuInfoList {
*numParallel = 1
if *numParallel <= 0 {
*numParallel = 1
req.opts.NumCtx = req.origNumCtx
}
byLibrary := gpus.ByLibrary()
if len(byLibrary) <= 1 {
return gpus

View File

@@ -45,7 +45,7 @@ type blobUpload struct {
}
const (
numUploadParts = 64
numUploadParts = 16
minUploadPartSize int64 = 100 * format.MegaByte
maxUploadPartSize int64 = 1000 * format.MegaByte
)

View File

@@ -219,7 +219,7 @@ func (n Name) String() string {
return b.String()
}
// DisplayShort returns a short string version of the name.
// DisplayShortest returns a short string version of the name.
func (n Name) DisplayShortest() string {
var sb strings.Builder