Compare commits
1 Commits
dhiltgen/r
...
v0.4.0-rc4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b3058e57e1 |
32
.github/workflows/release.yaml
vendored
32
.github/workflows/release.yaml
vendored
@@ -167,12 +167,23 @@ jobs:
|
||||
$cores = (Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores
|
||||
make -j $cores
|
||||
name: make
|
||||
- name: 'gather rocm dependencies'
|
||||
run: |
|
||||
$HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
||||
md "dist\deps\bin\rocblas\library"
|
||||
cp "${HIP_PATH}\bin\hipblas.dll" "dist\deps\bin\"
|
||||
cp "${HIP_PATH}\bin\rocblas.dll" "dist\deps\bin\"
|
||||
cp "${HIP_PATH}\bin\rocblas\library\*" "dist\deps\bin\rocblas\library\"
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: generate-windows-rocm
|
||||
path: |
|
||||
build/**/*
|
||||
dist/windows-amd64/**
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: windows-rocm-deps
|
||||
path: dist/deps/*
|
||||
|
||||
# CUDA generation step
|
||||
generate-windows-cuda:
|
||||
@@ -246,12 +257,24 @@ jobs:
|
||||
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
||||
$cores = (Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores
|
||||
make -j $cores
|
||||
- name: 'gather cuda dependencies'
|
||||
run: |
|
||||
$NVIDIA_DIR=(resolve-path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*\bin\')[0]
|
||||
md "dist\deps"
|
||||
cp "${NVIDIA_DIR}\cudart64_*.dll" "dist\deps\"
|
||||
cp "${NVIDIA_DIR}\cublas64_*.dll" "dist\deps\"
|
||||
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: generate-windows-cuda-${{ matrix.cuda.version }}
|
||||
path: |
|
||||
build/**/*
|
||||
dist/windows-amd64/**
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: windows-cuda-deps-${{ matrix.cuda.version }}
|
||||
path: dist/deps/*
|
||||
|
||||
|
||||
# windows arm64 generate, go build, and zip file (no installer)
|
||||
# Output of this build is aggregated into the final x86 build
|
||||
@@ -457,6 +480,15 @@ jobs:
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: generate-windows-cuda-12
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: windows-cuda-deps-11
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: windows-cuda-deps-12
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: windows-rocm-deps
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: generate-windows-rocm
|
||||
|
||||
@@ -46,8 +46,8 @@ package llama
|
||||
#cgo rocm CFLAGS: -DGGML_USE_CUDA -DGGML_USE_HIPBLAS -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
|
||||
#cgo rocm CXXFLAGS: -DGGML_USE_CUDA -DGGML_USE_HIPBLAS -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
|
||||
#cgo rocm LDFLAGS: -L${SRCDIR} -lggml_rocm -lhipblas -lamdhip64 -lrocblas
|
||||
#cgo windows CFLAGS: -Wno-discarded-qualifiers -D_WIN32_WINNT=0x602
|
||||
#cgo windows CXXFLAGS: -D_WIN32_WINNT=0x602
|
||||
#cgo windows CFLAGS: -Wno-discarded-qualifiers
|
||||
#cgo windows CFLAGS: -Wno-discarded-qualifiers
|
||||
#cgo windows LDFLAGS: -lmsvcrt
|
||||
#cgo windows LDFLAGS: -lmsvcrt -static-libstdc++ -static-libgcc -static
|
||||
#cgo windows,amd64 LDFLAGS: -L${SRCDIR}/build/Windows/amd64
|
||||
|
||||
@@ -17,8 +17,8 @@ else ifeq ($(OS),linux)
|
||||
GPU_LIB_DIR_LINUX := $(HIP_PATH)/lib
|
||||
GPU_COMPILER_LINUX := $(shell X=$$(which hipcc 2>/dev/null) && echo $$X)
|
||||
GPU_COMPILER:=$(GPU_COMPILER_LINUX)
|
||||
ROCM_TRANSITIVE_LIBS_INITIAL = $(sort $(shell ldd $(GPU_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf))
|
||||
GPU_TRANSITIVE_LIBS = $(sort $(shell readlink -f $(ROCM_TRANSITIVE_LIBS_INITIAL)) $(ROCM_TRANSITIVE_LIBS_INITIAL))
|
||||
ROCM_TRANSITIVE_LIBS_INITIAL = $(sort $(shell ldd $(ROCM_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf))
|
||||
ROCM_TRANSITIVE_LIBS = $(sort $(shell readlink -f $(ROCM_TRANSITIVE_LIBS_INITIAL)) $(ROCM_TRANSITIVE_LIBS_INITIAL))
|
||||
endif
|
||||
|
||||
# TODO future multi-variant support for ROCm
|
||||
@@ -38,13 +38,13 @@ GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -fPIC -D_GNU_SOURCE
|
||||
GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS) -D_WIN32_WINNT=0x602
|
||||
GPU_COMPILER_CXXFLAGS_LINUX = $(CXXFLAGS) -fPIC -D_GNU_SOURCE
|
||||
|
||||
GPU_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))
|
||||
ROCM_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))
|
||||
ifeq ($(OS),windows)
|
||||
ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH))/lib/ollama
|
||||
else ifeq ($(OS),linux)
|
||||
ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH)-rocm)/lib/ollama
|
||||
endif
|
||||
GPU_DIST_DEPS_LIBS= $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(GPU_LIBS)) $(notdir $(GPU_TRANSITIVE_LIBS))))
|
||||
ROCM_DIST_DEPS_LIBS= $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(ROCM_LIBS)) $(notdir $(ROCM_TRANSITIVE_LIBS))))
|
||||
ROCBLAS_DIST_DEP_MANIFEST = $(ROCM_DIST_DEPS_DIR)/rocblas/library/TensileManifest.txt
|
||||
|
||||
ifeq ($(OS),linux)
|
||||
@@ -90,9 +90,12 @@ GPU_COMPILER_CUFLAGS = \
|
||||
include make/gpu.make
|
||||
|
||||
# Adjust the rules from gpu.make to handle the ROCm dependencies properly
|
||||
$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(ROCBLAS_DIST_DEP_MANIFEST)
|
||||
$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(ROCBLAS_DIST_DEP_MANIFEST) $(ROCM_DIST_DEPS_LIBS)
|
||||
$(ROCBLAS_DIST_DEP_MANIFEST):
|
||||
@-mkdir -p $(dir $@)
|
||||
@echo "Copying rocblas library..."
|
||||
cd $(GPU_LIB_DIR)/rocblas/library/ && tar cf - . | (cd $(dir $@) && tar xf - )
|
||||
@echo "rocblas library copy complete"
|
||||
$(ROCM_DIST_DEPS_LIBS):
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CP) $(dir $(filter %$(notdir $@),$(ROCM_LIBS) $(ROCM_TRANSITIVE_LIBS)))/$(notdir $@) $(dir $@)
|
||||
|
||||
@@ -19,9 +19,6 @@ GPU_COMPILER_CFLAGS_WIN = $(CFLAGS) -D_WIN32_WINNT=0x602
|
||||
GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -Xcompiler -fPIC -D_GNU_SOURCE
|
||||
GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS) -D_WIN32_WINNT=0x602
|
||||
GPU_COMPILER_CXXFLAGS_LINUX = $(CXXFLAGS) -Xcompiler -fPIC -D_GNU_SOURCE
|
||||
GPU_LIBS = $(sort $(wildcard $(addsuffix *.$(SHARED_EXT)*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT)))))
|
||||
GPU_DIST_DEPS_LIBS= $(sort $(addprefix $(DIST_LIB_DIR)/,$(notdir $(GPU_LIBS))))
|
||||
|
||||
ifeq ($(OS),linux)
|
||||
CUDA_PATH?=/usr/local/cuda
|
||||
GPU_COMPILER_FPIC = -fPIC -Wno-unused-function -std=c++11
|
||||
|
||||
@@ -98,16 +98,13 @@ $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).
|
||||
$(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CP) $< $@
|
||||
$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) $(GPU_DIST_DEPS_LIBS)
|
||||
$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
|
||||
$(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CP) $< $@
|
||||
$(DIST_GPU_RUNNER_LIB_DEPS):
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CP) $(GPU_LIB_DIR)/$(notdir $@) $(dir $@)
|
||||
$(GPU_DIST_DEPS_LIBS):
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CP) $(dir $(filter %$(notdir $@),$(GPU_LIBS) $(GPU_TRANSITIVE_LIBS)))/$(notdir $@) $(dir $@)
|
||||
|
||||
# Payload targets
|
||||
$(RUNNERS_PAYLOAD_DIR)/%/ollama_llama_server.gz: $(RUNNERS_BUILD_DIR)/%/ollama_llama_server
|
||||
|
||||
Reference in New Issue
Block a user