Merging the latest stable (#2)
* Applied 00-fix-vulkan-building.patch * Implemented vulkan backend based on the work done by whyvl, Dts0, McBane87 and others Tested on AMD Ryzen 7 8845HS w/ Radeon 780M Graphics with ROCm disabled ``` [GIN-debug] POST /v1/chat/completions --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (6 handlers) [GIN-debug] POST /v1/completions --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (6 handlers) [GIN-debug] POST /v1/embeddings --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (6 handlers) [GIN-debug] GET /v1/models --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (6 handlers) [GIN-debug] GET /v1/models/:model --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (6 handlers) time=2025-03-11T13:00:40.793Z level=INFO source=gpu.go:199 msg="vulkan: load libvulkan and libcap ok" time=2025-03-11T13:00:40.877Z level=INFO source=gpu.go:421 msg="error looking up vulkan GPU memory" error="device is a CPU" time=2025-03-11T13:00:40.878Z level=WARN source=amd_linux.go:443 msg="amdgpu detected, but no compatible rocm library found. Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install" time=2025-03-11T13:00:40.878Z level=WARN source=amd_linux.go:348 msg="unable to verify rocm library: no suitable rocm found, falling back to CPU" time=2025-03-11T13:00:40.879Z level=INFO source=types.go:137 msg="inference compute" id=0 library=vulkan variant="" compute=1.3 driver=1.3 name="AMD Radeon Graphics (RADV GFX1103_R1)" total="15.6 GiB" available="15.6 GiB" ``` ``` # ollama run phi4:14b >>> /set verbose Set 'verbose' mode. >>> how's it going? Hello! I'm here to help you with any questions or tasks you have. How can I assist you today? 😊 total duration: 3.341959745s load duration: 18.165612ms prompt eval count: 15 token(s) prompt eval duration: 475ms prompt eval rate: 31.58 tokens/s eval count: 26 token(s) eval duration: 2.846s eval rate: 9.14 tokens/s >>> ```
This commit is contained in:
parent
31606b2feb
commit
6b1f84e171
217
Dockerfile
217
Dockerfile
|
|
@ -1,98 +1,151 @@
|
|||
FROM --platform=linux/amd64 library/ubuntu:noble as builder
|
||||
# vim: filetype=dockerfile
|
||||
|
||||
ENV DEBIAN_FRONTEND="noninteractive"
|
||||
ARG FLAVOR=${TARGETARCH}
|
||||
|
||||
ENV VULKAN_VER_BASE="1.3.296"
|
||||
ENV VULKAN_VER="${VULKAN_VER_BASE}.0"
|
||||
ENV UBUNTU_VERSION="noble"
|
||||
ARG ROCMVERSION=6.3.3
|
||||
ARG JETPACK5VERSION=r35.4.1
|
||||
ARG JETPACK6VERSION=r36.4.0
|
||||
ARG CMAKEVERSION=3.31.2
|
||||
ARG VULKANVERSION=1.4.304.1
|
||||
|
||||
ENV GOLANG_VERSION="1.22.8"
|
||||
ENV GOARCH="amd64"
|
||||
ENV CGO_ENABLED=1
|
||||
# CUDA v11 requires gcc v10. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
|
||||
FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
|
||||
RUN yum install -y yum-utils \
|
||||
&& yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
|
||||
&& rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
|
||||
&& dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \
|
||||
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
|
||||
ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
||||
ARG VULKANVERSION
|
||||
RUN wget https://sdk.lunarg.com/sdk/download/${VULKANVERSION}/linux/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz -O /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \
|
||||
&& tar xvf /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \
|
||||
&& dnf -y install ninja-build libcap-devel \
|
||||
&& ln -s /usr/bin/python3 /usr/bin/python \
|
||||
&& /${VULKANVERSION}/vulkansdk -j 8 vulkan-headers \
|
||||
&& /${VULKANVERSION}/vulkansdk -j 8 shaderc
|
||||
RUN cp -r /${VULKANVERSION}/x86_64/include/* /usr/local/include/ \
|
||||
&& cp -r /${VULKANVERSION}/x86_64/lib/* /usr/local/lib
|
||||
ENV PATH=/${VULKANVERSION}/x86_64/bin:$PATH
|
||||
|
||||
FROM --platform=linux/arm64 almalinux:8 AS base-arm64
|
||||
# install epel-release for ccache
|
||||
RUN yum install -y yum-utils epel-release \
|
||||
&& dnf install -y clang ccache \
|
||||
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
|
||||
ENV CC=clang CXX=clang++
|
||||
|
||||
FROM base-${TARGETARCH} AS base
|
||||
ARG CMAKEVERSION
|
||||
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
|
||||
COPY CMakeLists.txt CMakePresets.json .
|
||||
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
||||
ENV LDFLAGS=-s
|
||||
|
||||
# Default mirror was very slow
|
||||
RUN \
|
||||
sed -i 's/archive.ubuntu.com/gb.archive.ubuntu.com/g' /etc/apt/sources.list.d/ubuntu.sources
|
||||
FROM base AS cpu
|
||||
RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++
|
||||
ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
cmake --preset 'CPU' \
|
||||
&& cmake --build --parallel --preset 'CPU' \
|
||||
&& cmake --install build --component CPU --strip --parallel 8
|
||||
|
||||
RUN \
|
||||
apt-get update && \
|
||||
apt-get install -y ca-certificates build-essential ccache cmake wget git curl rsync xz-utils libcap-dev
|
||||
FROM base AS cuda-11
|
||||
ARG CUDA11VERSION=11.3
|
||||
RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
|
||||
ENV PATH=/usr/local/cuda-11/bin:$PATH
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
cmake --preset 'CUDA 11' \
|
||||
&& cmake --build --parallel --preset 'CUDA 11' \
|
||||
&& cmake --install build --component CUDA --strip --parallel 8
|
||||
|
||||
RUN \
|
||||
mkdir -p /usr/local 2>/dev/null || true && \
|
||||
curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-${GOARCH}.tar.gz | tar -xz -C /usr/local && \
|
||||
ln -s /usr/local/go/bin/go /usr/local/bin/go && \
|
||||
ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt
|
||||
FROM base AS cuda-12
|
||||
ARG CUDA12VERSION=12.8
|
||||
RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
|
||||
ENV PATH=/usr/local/cuda-12/bin:$PATH
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
cmake --preset 'CUDA 12' \
|
||||
&& cmake --build --parallel --preset 'CUDA 12' \
|
||||
&& cmake --install build --component CUDA --strip --parallel 8
|
||||
|
||||
FROM base AS rocm-6
|
||||
ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
cmake --preset 'ROCm 6' \
|
||||
&& cmake --build --parallel --preset 'ROCm 6' \
|
||||
&& cmake --install build --component HIP --strip --parallel 8
|
||||
|
||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5
|
||||
ARG CMAKEVERSION
|
||||
RUN apt-get update && apt-get install -y curl ccache \
|
||||
&& curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
|
||||
COPY CMakeLists.txt CMakePresets.json .
|
||||
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
cmake --preset 'JetPack 5' \
|
||||
&& cmake --build --parallel --preset 'JetPack 5' \
|
||||
&& cmake --install build --component CUDA --strip --parallel 8
|
||||
|
||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK6VERSION} AS jetpack-6
|
||||
ARG CMAKEVERSION
|
||||
RUN apt-get update && apt-get install -y curl ccache \
|
||||
&& curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
|
||||
COPY CMakeLists.txt CMakePresets.json .
|
||||
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
cmake --preset 'JetPack 6' \
|
||||
&& cmake --build --parallel --preset 'JetPack 6' \
|
||||
&& cmake --install build --component CUDA --strip --parallel 8
|
||||
|
||||
FROM base AS vulkan
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
cmake --preset 'Vulkan' \
|
||||
&& cmake --build --parallel --preset 'Vulkan' \
|
||||
&& cmake --install build --component Vulkan --strip --parallel 8
|
||||
|
||||
|
||||
RUN \
|
||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | gpg --dearmor -o /etc/apt/trusted.gpg.d/lunarg-signing-key-pub.gpg && \
|
||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-${UBUNTU_VERSION}.list https://packages.lunarg.com/vulkan/${VULKAN_VER_BASE}/lunarg-vulkan-${VULKAN_VER_BASE}-${UBUNTU_VERSION}.list && \
|
||||
apt update && apt install -y vulkan-sdk
|
||||
FROM base AS build
|
||||
WORKDIR /go/src/github.com/ollama/ollama
|
||||
COPY go.mod go.sum .
|
||||
RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
|
||||
ENV PATH=/usr/local/go/bin:$PATH
|
||||
RUN go mod download
|
||||
COPY . .
|
||||
ARG GOFLAGS="'-ldflags=-w -s'"
|
||||
ENV CGO_ENABLED=1
|
||||
RUN --mount=type=cache,target=/root/.cache/go-build \
|
||||
go build -trimpath -buildmode=pie -o /bin/ollama .
|
||||
|
||||
# Last testet ollama-vulkan commit:
|
||||
# 2d443b3dd660a1fd2760d64538512df93648b4bb
|
||||
COPY patches/ /tmp/patches/
|
||||
RUN \
|
||||
git clone https://github.com/pufferffish/ollama-vulkan.git "/tmp/ollama-vulkan-git" && \
|
||||
cd "/tmp/ollama-vulkan-git" && \
|
||||
git checkout 2d443b3dd660a1fd2760d64538512df93648b4bb -b ollama_vulkan_stable && \
|
||||
git config user.name "Builder" && git config user.email "builder@local" && \
|
||||
git remote add ollama_vanilla https://github.com/ollama/ollama.git && \
|
||||
git fetch ollama_vanilla --tags && git checkout v0.5.13 -b ollama_vanilla_stable && \
|
||||
git checkout ollama_vanilla_stable && git merge ollama_vulkan_stable --allow-unrelated-histories --no-edit && \
|
||||
for p in /tmp/patches/*.patch; do patch -p1 < $p; done
|
||||
RUN \
|
||||
cd "/tmp/ollama-vulkan-git" && \
|
||||
make -f Makefile.sync clean sync
|
||||
FROM --platform=linux/amd64 scratch AS amd64
|
||||
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
|
||||
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
|
||||
COPY --from=vulkan dist/lib/ollama/vulkan /lib/ollama/vulkan
|
||||
|
||||
FROM --platform=linux/arm64 scratch AS arm64
|
||||
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
|
||||
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
|
||||
COPY --from=jetpack-5 dist/lib/ollama/cuda_v11 lib/ollama/cuda_jetpack5
|
||||
COPY --from=jetpack-6 dist/lib/ollama/cuda_v12 lib/ollama/cuda_jetpack6
|
||||
|
||||
FROM builder AS cpu-build
|
||||
RUN \
|
||||
cd "/tmp/ollama-vulkan-git" && \
|
||||
cmake --preset CPU && cmake --build --parallel --preset CPU && \
|
||||
cmake --install build --component CPU --strip
|
||||
FROM scratch AS rocm
|
||||
COPY --from=rocm-6 dist/lib/ollama/rocm /lib/ollama/rocm
|
||||
|
||||
FROM builder AS vulkan-build
|
||||
RUN \
|
||||
cd "/tmp/ollama-vulkan-git" && \
|
||||
cmake --preset Vulkan && \
|
||||
cmake --build --parallel --preset Vulkan && \
|
||||
cmake --install build --component Vulkan --strip
|
||||
|
||||
FROM builder AS binary-build
|
||||
RUN \
|
||||
cd "/tmp/ollama-vulkan-git" && \
|
||||
. scripts/env.sh && \
|
||||
mkdir -p dist/bin && \
|
||||
go build -trimpath -buildmode=pie -o dist/bin/ollama .
|
||||
|
||||
|
||||
FROM --platform=linux/amd64 library/ubuntu:noble
|
||||
RUN \
|
||||
apt-get update && apt -y dist-upgrade && \
|
||||
apt-get install -y ca-certificates libcap2 libvulkan1 && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install ROCm
|
||||
RUN \
|
||||
apt update && \
|
||||
apt install -y wget python3-setuptools python3-wheel && \
|
||||
wget https://repo.radeon.com/amdgpu-install/6.3.3/ubuntu/noble/amdgpu-install_6.3.60303-1_all.deb -O /tmp/amdgpu-install_6.3.60303-1_all.deb && \
|
||||
apt install -y /tmp/amdgpu-install_6.3.60303-1_all.deb && \
|
||||
apt update && apt install -y rocm && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
COPY --from=cpu-build /tmp/ollama-vulkan-git/dist/lib/ollama/ /lib/ollama/
|
||||
COPY --from=vulkan-build /tmp/ollama-vulkan-git/dist/lib/ollama/vulkan/ /lib/ollama/vulkan/
|
||||
COPY --from=binary-build /tmp/ollama-vulkan-git/dist/bin/ /bin/
|
||||
|
||||
RUN find /lib/ollama && find /bin/ollama
|
||||
FROM ${FLAVOR} AS archive
|
||||
ARG VULKANVERSION
|
||||
COPY --from=cpu dist/lib/ollama /lib/ollama
|
||||
COPY --from=build /bin/ollama /bin/ollama
|
||||
|
||||
FROM ubuntu:24.04
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y ca-certificates libcap2 libvulkan1 \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
COPY --from=archive /bin /usr/bin
|
||||
COPY --from=archive /lib/ollama /usr/lib/ollama
|
||||
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
ENV OLLAMA_HOST=0.0.0.0:11434
|
||||
EXPOSE 11434
|
||||
ENV OLLAMA_HOST 0.0.0.0
|
||||
|
||||
ENTRYPOINT ["/bin/ollama"]
|
||||
CMD ["serve"]
|
||||
CMD ["serve"]
|
||||
Loading…
Reference in New Issue