Compare commits
50 Commits
parth/cons
...
v0.5.8-rc1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e8d4eb3e68 | ||
|
|
ae7e368f75 | ||
|
|
31acd1ebf9 | ||
|
|
9a4757ae66 | ||
|
|
7814019708 | ||
|
|
b698f9a0d8 | ||
|
|
32285a6d19 | ||
|
|
1c198977ec | ||
|
|
330b6c50b0 | ||
|
|
928911bc68 | ||
|
|
5b446cc815 | ||
|
|
451c1596af | ||
|
|
932bded12f | ||
|
|
070ad913ac | ||
|
|
8d8b9f83ae | ||
|
|
f00d359a67 | ||
|
|
291def6adb | ||
|
|
cd3fbf1c49 | ||
|
|
c852b8e021 | ||
|
|
d8932c55e7 | ||
|
|
63f0269f7f | ||
|
|
4759ecae19 | ||
|
|
65b7ecac7b | ||
|
|
f9d2d89135 | ||
|
|
669dc31cf3 | ||
|
|
d4d338c224 | ||
|
|
bfdeffc375 | ||
|
|
e806184023 | ||
|
|
50566113ac | ||
|
|
ad22ace439 | ||
|
|
f4321a421c | ||
|
|
475333d533 | ||
|
|
39fd89308c | ||
|
|
548a9f56a6 | ||
|
|
3f0cb36bdb | ||
|
|
bea1f1fac6 | ||
|
|
5d75d837ef | ||
|
|
711648c9bb | ||
|
|
dcfb7a105c | ||
|
|
2ef3c803a1 | ||
|
|
453e4d090b | ||
|
|
ca2f9843c8 | ||
|
|
294b6f5a22 | ||
|
|
7bb356c680 | ||
|
|
021817e59a | ||
|
|
a420a453b4 | ||
|
|
42cf4db601 | ||
|
|
93a8daf285 | ||
|
|
a041b4df7c | ||
|
|
2539f2dbf9 |
@@ -3,7 +3,9 @@ ollama
|
|||||||
app
|
app
|
||||||
macapp
|
macapp
|
||||||
dist
|
dist
|
||||||
|
build
|
||||||
.env
|
.env
|
||||||
.cache
|
.cache
|
||||||
test_data
|
test_data
|
||||||
llama/build
|
.git
|
||||||
|
|
||||||
|
|||||||
4
.gitattributes
vendored
4
.gitattributes
vendored
@@ -15,6 +15,10 @@ ml/backend/**/*.cu linguist-vendored
|
|||||||
ml/backend/**/*.cuh linguist-vendored
|
ml/backend/**/*.cuh linguist-vendored
|
||||||
ml/backend/**/*.m linguist-vendored
|
ml/backend/**/*.m linguist-vendored
|
||||||
ml/backend/**/*.metal linguist-vendored
|
ml/backend/**/*.metal linguist-vendored
|
||||||
|
ml/backend/**/CMakeLists.txt linguist-vendored
|
||||||
|
|
||||||
|
llama/build-info.cpp linguist-generated
|
||||||
|
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s linguist-generated
|
||||||
|
|
||||||
* text=auto
|
* text=auto
|
||||||
*.go text eol=lf
|
*.go text eol=lf
|
||||||
|
|||||||
8
.github/ISSUE_TEMPLATE/10_bug_report.yml
vendored
8
.github/ISSUE_TEMPLATE/10_bug_report.yml
vendored
@@ -9,6 +9,14 @@ body:
|
|||||||
description: What happened? What did you expect to happen?
|
description: What happened? What did you expect to happen?
|
||||||
validations:
|
validations:
|
||||||
required: true
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Relevant log output
|
||||||
|
description: Please copy and paste any relevant log output. See [Troubleshooting Guide](https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md#how-to-troubleshoot-issues) for details.
|
||||||
|
render: shell
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
- type: dropdown
|
- type: dropdown
|
||||||
id: os
|
id: os
|
||||||
attributes:
|
attributes:
|
||||||
|
|||||||
1021
.github/workflows/release.yaml
vendored
1021
.github/workflows/release.yaml
vendored
File diff suppressed because it is too large
Load Diff
98
.github/workflows/test.yaml
vendored
98
.github/workflows/test.yaml
vendored
@@ -40,28 +40,106 @@ jobs:
|
|||||||
|
|
||||||
linux:
|
linux:
|
||||||
needs: [changes]
|
needs: [changes]
|
||||||
if: ${{ needs.changes.outputs.changed == 'True' }}
|
if: needs.changes.outputs.changed == 'True'
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- container: nvidia/cuda:11.8.0-devel-ubuntu22.04
|
- preset: CPU
|
||||||
preset: CUDA
|
- preset: CUDA
|
||||||
- container: rocm/dev-ubuntu-22.04:6.1.2
|
container: nvidia/cuda:11.8.0-devel-ubuntu22.04
|
||||||
preset: ROCm
|
flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
|
||||||
|
- preset: ROCm
|
||||||
|
container: rocm/dev-ubuntu-22.04:6.1.2
|
||||||
extra-packages: rocm-libs
|
extra-packages: rocm-libs
|
||||||
runs-on: ubuntu-latest
|
flags: '-DAMDGPU_TARGETS=gfx1010 -DCMAKE_PREFIX_PATH=/opt/rocm'
|
||||||
|
runs-on: linux
|
||||||
container: ${{ matrix.container }}
|
container: ${{ matrix.container }}
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- run: |
|
- run: |
|
||||||
apt-get update
|
[ -n "${{ matrix.container }}" ] || sudo=sudo
|
||||||
apt-get install -y cmake pkg-config ${{ matrix.extra-packages }}
|
$sudo apt-get update
|
||||||
|
$sudo apt-get install -y cmake ccache ${{ matrix.extra-packages }}
|
||||||
env:
|
env:
|
||||||
DEBIAN_FRONTEND: noninteractive
|
DEBIAN_FRONTEND: noninteractive
|
||||||
|
- uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: /github/home/.cache/ccache
|
||||||
|
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
|
||||||
- run: |
|
- run: |
|
||||||
cmake --preset ${{ matrix.preset }}
|
cmake --preset ${{ matrix.preset }} ${{ matrix.flags }}
|
||||||
cmake --build --preset ${{ matrix.preset }} --parallel
|
cmake --build --preset ${{ matrix.preset }} --parallel
|
||||||
|
|
||||||
|
windows:
|
||||||
|
needs: [changes]
|
||||||
|
if: needs.changes.outputs.changed == 'True'
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- preset: CPU
|
||||||
|
- preset: CUDA
|
||||||
|
install: https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_522.06_windows.exe
|
||||||
|
flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
|
||||||
|
- preset: ROCm
|
||||||
|
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe
|
||||||
|
flags: '-DAMDGPU_TARGETS=gfx1010'
|
||||||
|
runs-on: windows
|
||||||
|
steps:
|
||||||
|
- run: |
|
||||||
|
choco install -y --no-progress ccache ninja
|
||||||
|
ccache -o cache_dir=${{ github.workspace }}\.ccache
|
||||||
|
- if: matrix.preset == 'CUDA' || matrix.preset == 'ROCm'
|
||||||
|
id: cache-install
|
||||||
|
uses: actions/cache/restore@v4
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
|
||||||
|
C:\Program Files\AMD\ROCm
|
||||||
|
key: ${{ matrix.install }}
|
||||||
|
- if: matrix.preset == 'CUDA'
|
||||||
|
name: Install CUDA ${{ matrix.cuda-version }}
|
||||||
|
run: |
|
||||||
|
$ErrorActionPreference = "Stop"
|
||||||
|
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
|
||||||
|
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
|
||||||
|
Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_11.8", "nvcc_11.8", "cublas_11.8", "cublas_dev_11.8")) -NoNewWindow -Wait
|
||||||
|
}
|
||||||
|
|
||||||
|
$cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path
|
||||||
|
echo "$cudaPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||||
|
- if: matrix.preset == 'ROCm'
|
||||||
|
name: Install ROCm ${{ matrix.rocm-version }}
|
||||||
|
run: |
|
||||||
|
$ErrorActionPreference = "Stop"
|
||||||
|
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
|
||||||
|
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
|
||||||
|
Start-Process -FilePath .\install.exe -ArgumentList '-install' -NoNewWindow -Wait
|
||||||
|
}
|
||||||
|
|
||||||
|
$hipPath = (Resolve-Path "C:\Program Files\AMD\ROCm\*").path
|
||||||
|
echo "$hipPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||||
|
echo "CC=$hipPath\bin\clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
|
echo "CXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
|
- if: ${{ !cancelled() && steps.cache-install.outputs.cache-hit != 'true' }}
|
||||||
|
uses: actions/cache/save@v4
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
|
||||||
|
C:\Program Files\AMD\ROCm
|
||||||
|
key: ${{ matrix.install }}
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: ${{ github.workspace }}\.ccache
|
||||||
|
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
|
||||||
|
- run: |
|
||||||
|
Import-Module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||||
|
Enter-VsDevShell -VsInstallPath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo'
|
||||||
|
cmake --preset "${{ matrix.preset }}" ${{ matrix.flags }}
|
||||||
|
cmake --build --parallel --preset "${{ matrix.preset }}"
|
||||||
|
env:
|
||||||
|
CMAKE_GENERATOR: Ninja
|
||||||
|
|
||||||
test:
|
test:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
@@ -85,5 +163,5 @@ jobs:
|
|||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- name: Verify patches apply cleanly and do not change files
|
- name: Verify patches apply cleanly and do not change files
|
||||||
run: |
|
run: |
|
||||||
make -f Makefile2 clean checkout sync
|
make -f Makefile.sync clean sync
|
||||||
git diff --compact-summary --exit-code
|
git diff --compact-summary --exit-code
|
||||||
|
|||||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -4,12 +4,13 @@
|
|||||||
.venv
|
.venv
|
||||||
.swp
|
.swp
|
||||||
dist
|
dist
|
||||||
|
build
|
||||||
ollama
|
ollama
|
||||||
.cache
|
.cache
|
||||||
*.exe
|
*.exe
|
||||||
.idea
|
.idea
|
||||||
test_data
|
test_data
|
||||||
*.crt
|
*.crt
|
||||||
llama/build
|
|
||||||
__debug_bin*
|
__debug_bin*
|
||||||
llama/vendor
|
llama/build
|
||||||
|
llama/vendor
|
||||||
|
|||||||
@@ -19,11 +19,30 @@ set(GGML_CCACHE ON)
|
|||||||
set(GGML_BACKEND_DL ON)
|
set(GGML_BACKEND_DL ON)
|
||||||
set(GGML_BACKEND_SHARED ON)
|
set(GGML_BACKEND_SHARED ON)
|
||||||
set(GGML_SCHED_MAX_COPIES 4)
|
set(GGML_SCHED_MAX_COPIES 4)
|
||||||
set(GGML_CPU_ALL_VARIANTS ON)
|
|
||||||
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
|
|
||||||
set(GGML_LLAMAFILE ON)
|
|
||||||
|
|
||||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
|
set(GGML_LLAMAFILE ON)
|
||||||
|
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
|
||||||
|
set(GGML_CUDA_GRAPHS ON)
|
||||||
|
|
||||||
|
if((NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
|
||||||
|
OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))
|
||||||
|
set(GGML_CPU_ALL_VARIANTS ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
|
||||||
|
set(CMAKE_BUILD_RPATH "@loader_path")
|
||||||
|
set(CMAKE_INSTALL_RPATH "@loader_path")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(OLLAMA_BUILD_DIR ${CMAKE_BINARY_DIR}/lib/ollama)
|
||||||
|
set(OLLAMA_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/lib/ollama)
|
||||||
|
|
||||||
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OLLAMA_BUILD_DIR})
|
||||||
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${OLLAMA_BUILD_DIR})
|
||||||
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${OLLAMA_BUILD_DIR})
|
||||||
|
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OLLAMA_BUILD_DIR})
|
||||||
|
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${OLLAMA_BUILD_DIR})
|
||||||
|
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${OLLAMA_BUILD_DIR})
|
||||||
|
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include)
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include)
|
||||||
@@ -34,12 +53,66 @@ set(GGML_CPU ON)
|
|||||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
|
||||||
set_property(TARGET ggml PROPERTY EXCLUDE_FROM_ALL TRUE)
|
set_property(TARGET ggml PROPERTY EXCLUDE_FROM_ALL TRUE)
|
||||||
|
|
||||||
|
get_target_property(CPU_VARIANTS ggml-cpu MANUALLY_ADDED_DEPENDENCIES)
|
||||||
|
if(NOT CPU_VARIANTS)
|
||||||
|
set(CPU_VARIANTS "ggml-cpu")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
install(TARGETS ggml-base ${CPU_VARIANTS}
|
||||||
|
RUNTIME_DEPENDENCIES
|
||||||
|
PRE_EXCLUDE_REGEXES ".*"
|
||||||
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CPU
|
||||||
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CPU
|
||||||
|
FRAMEWORK DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CPU
|
||||||
|
)
|
||||||
|
|
||||||
check_language(CUDA)
|
check_language(CUDA)
|
||||||
if(CMAKE_CUDA_COMPILER)
|
if(CMAKE_CUDA_COMPILER)
|
||||||
|
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24" AND NOT CMAKE_CUDA_ARCHITECTURES)
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES "native")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
find_package(CUDAToolkit)
|
||||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda)
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda)
|
||||||
|
set(OLLAMA_CUDA_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/cuda_v${CUDAToolkit_VERSION_MAJOR})
|
||||||
|
install(TARGETS ggml-cuda
|
||||||
|
RUNTIME_DEPENDENCIES
|
||||||
|
DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_LIBRARY_DIR}
|
||||||
|
PRE_INCLUDE_REGEXES cublas cublasLt cudart
|
||||||
|
PRE_EXCLUDE_REGEXES ".*"
|
||||||
|
RUNTIME DESTINATION ${OLLAMA_CUDA_INSTALL_DIR} COMPONENT CUDA
|
||||||
|
LIBRARY DESTINATION ${OLLAMA_CUDA_INSTALL_DIR} COMPONENT CUDA
|
||||||
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
check_language(HIP)
|
check_language(HIP)
|
||||||
if(CMAKE_HIP_COMPILER)
|
if(CMAKE_HIP_COMPILER)
|
||||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip)
|
set(HIP_PLATFORM "amd")
|
||||||
|
|
||||||
|
find_package(hip REQUIRED)
|
||||||
|
if(NOT AMDGPU_TARGETS)
|
||||||
|
list(FILTER AMDGPU_TARGETS INCLUDE REGEX "^gfx(900|94[012]|101[02]|1030|110[012])$")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(AMDGPU_TARGETS)
|
||||||
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip)
|
||||||
|
|
||||||
|
set(OLLAMA_HIP_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/rocm)
|
||||||
|
install(TARGETS ggml-hip
|
||||||
|
RUNTIME_DEPENDENCIES
|
||||||
|
DIRECTORIES ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR}
|
||||||
|
PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register drm drm_amdgpu numa elf
|
||||||
|
PRE_EXCLUDE_REGEXES ".*"
|
||||||
|
POST_EXCLUDE_REGEXES "system32"
|
||||||
|
RUNTIME DESTINATION ${OLLAMA_HIP_INSTALL_DIR} COMPONENT HIP
|
||||||
|
LIBRARY DESTINATION ${OLLAMA_HIP_INSTALL_DIR} COMPONENT HIP
|
||||||
|
)
|
||||||
|
|
||||||
|
foreach(HIP_LIB_BIN_INSTALL_DIR IN ITEMS ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR})
|
||||||
|
if(EXISTS ${HIP_LIB_BIN_INSTALL_DIR}/rocblas)
|
||||||
|
install(DIRECTORY ${HIP_LIB_BIN_INSTALL_DIR}/rocblas DESTINATION ${OLLAMA_HIP_INSTALL_DIR} COMPONENT HIP)
|
||||||
|
break()
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -4,10 +4,15 @@
|
|||||||
{
|
{
|
||||||
"name": "Default",
|
"name": "Default",
|
||||||
"binaryDir": "${sourceDir}/build",
|
"binaryDir": "${sourceDir}/build",
|
||||||
|
"installDir": "${sourceDir}/dist",
|
||||||
"cacheVariables": {
|
"cacheVariables": {
|
||||||
"CMAKE_BUILD_TYPE": "Release"
|
"CMAKE_BUILD_TYPE": "Release"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "CPU",
|
||||||
|
"inherits": [ "Default" ]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "CUDA",
|
"name": "CUDA",
|
||||||
"inherits": [ "Default" ]
|
"inherits": [ "Default" ]
|
||||||
@@ -42,20 +47,29 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "ROCm",
|
"name": "ROCm",
|
||||||
"inherits": [ "Default" ]
|
"inherits": [ "Default" ],
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_HIP_PLATFORM": "amd"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "ROCm 6",
|
"name": "ROCm 6",
|
||||||
"inherits": [ "ROCm" ],
|
"inherits": [ "ROCm" ],
|
||||||
"cacheVariables": {
|
"cacheVariables": {
|
||||||
"CMAKE_HIP_ARCHITECTURES": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
|
"AMDGPU_TARGETS": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"buildPresets": [
|
"buildPresets": [
|
||||||
{
|
{
|
||||||
"name": "Default",
|
"name": "Default",
|
||||||
"configurePreset": "Default"
|
"configurePreset": "Default",
|
||||||
|
"configuration": "Release"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CPU",
|
||||||
|
"configurePreset": "Default",
|
||||||
|
"targets": [ "ggml-cpu" ]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "CUDA",
|
"name": "CUDA",
|
||||||
|
|||||||
281
Dockerfile
281
Dockerfile
@@ -1,201 +1,128 @@
|
|||||||
ARG GOLANG_VERSION=1.22.8
|
# vim: filetype=dockerfile
|
||||||
ARG CUDA_VERSION_11=11.3.1
|
|
||||||
ARG CUDA_VERSION_12=12.4.0
|
|
||||||
ARG ROCM_VERSION=6.1.2
|
|
||||||
ARG JETPACK_6=r36.2.0
|
|
||||||
ARG JETPACK_5=r35.4.1
|
|
||||||
|
|
||||||
### To create a local image for building linux binaries on mac or windows with efficient incremental builds
|
ARG FLAVOR=${TARGETARCH}
|
||||||
#
|
|
||||||
# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 .
|
|
||||||
# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
|
|
||||||
#
|
|
||||||
### Then incremental builds will be much faster in this container
|
|
||||||
#
|
|
||||||
# make -j 10 dist
|
|
||||||
#
|
|
||||||
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
|
|
||||||
ARG GOLANG_VERSION
|
|
||||||
ARG CUDA_VERSION_11
|
|
||||||
ARG CUDA_VERSION_12
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH
|
|
||||||
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
|
|
||||||
RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
|
||||||
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
|
|
||||||
dnf clean all && \
|
|
||||||
dnf install -y \
|
|
||||||
zsh \
|
|
||||||
cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
|
|
||||||
cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
|
|
||||||
# TODO intel oneapi goes here...
|
|
||||||
ENV GOARCH amd64
|
|
||||||
ENV CGO_ENABLED 1
|
|
||||||
WORKDIR /go/src/github.com/ollama/ollama/
|
|
||||||
ENTRYPOINT [ "zsh" ]
|
|
||||||
|
|
||||||
### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
|
ARG ROCMVERSION=6.1.2
|
||||||
# Note: this does not contain jetson variants
|
ARG JETPACK5VERSION=r35.4.1
|
||||||
#
|
ARG JETPACK6VERSION=r36.2.0
|
||||||
# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 .
|
ARG CMAKEVERSION=3.31.2
|
||||||
# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
|
|
||||||
#
|
|
||||||
FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
|
|
||||||
ARG GOLANG_VERSION
|
|
||||||
ARG CUDA_VERSION_11
|
|
||||||
ARG CUDA_VERSION_12
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
|
||||||
RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
|
||||||
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
|
|
||||||
dnf config-manager --set-enabled appstream && \
|
|
||||||
dnf clean all && \
|
|
||||||
dnf install -y \
|
|
||||||
zsh \
|
|
||||||
cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
|
|
||||||
cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
|
|
||||||
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
|
|
||||||
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
|
|
||||||
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
|
|
||||||
ENV GOARCH arm64
|
|
||||||
ENV CGO_ENABLED 1
|
|
||||||
WORKDIR /go/src/github.com/ollama/ollama/
|
|
||||||
ENTRYPOINT [ "zsh" ]
|
|
||||||
|
|
||||||
FROM --platform=linux/amd64 unified-builder-amd64 AS build-amd64
|
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCMVERSION}-complete AS base-amd64
|
||||||
COPY . .
|
RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \
|
||||||
ARG OLLAMA_SKIP_CUDA_GENERATE
|
&& yum install -y yum-utils devtoolset-10-gcc devtoolset-10-gcc-c++ \
|
||||||
ARG OLLAMA_SKIP_ROCM_GENERATE
|
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo \
|
||||||
ARG OLLAMA_FAST_BUILD
|
&& curl -s -L https://github.com/ccache/ccache/releases/download/v4.10.2/ccache-4.10.2-linux-x86_64.tar.xz | tar -Jx -C /usr/local/bin --strip-components 1
|
||||||
ARG VERSION
|
ENV PATH=/opt/rh/devtoolset-10/root/usr/bin:/opt/rh/devtoolset-11/root/usr/bin:$PATH
|
||||||
ARG CUSTOM_CPU_FLAGS
|
|
||||||
|
FROM --platform=linux/arm64 rockylinux:8 AS base-arm64
|
||||||
|
# install epel-release for ccache
|
||||||
|
RUN yum install -y yum-utils epel-release \
|
||||||
|
&& yum install -y clang ccache \
|
||||||
|
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
|
||||||
|
ENV CC=clang CXX=clang++
|
||||||
|
|
||||||
|
FROM base-${TARGETARCH} AS base
|
||||||
|
ARG CMAKEVERSION
|
||||||
|
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
|
||||||
|
COPY CMakeLists.txt CMakePresets.json .
|
||||||
|
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
||||||
|
ENV LDFLAGS=-s
|
||||||
|
|
||||||
|
FROM base AS cpu
|
||||||
|
# amd64 uses gcc which requires devtoolset-11 for AVX extensions while arm64 uses clang
|
||||||
|
RUN if [ "$(uname -m)" = "x86_64" ]; then yum install -y devtoolset-11-gcc devtoolset-11-gcc-c++; fi
|
||||||
|
ENV PATH=/opt/rh/devtoolset-11/root/usr/bin:$PATH
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \
|
cmake --preset 'CPU' \
|
||||||
make -j $(nproc) dist ; \
|
&& cmake --build --parallel --preset 'CPU' \
|
||||||
else \
|
&& cmake --install build --component CPU --strip --parallel 8
|
||||||
make -j 5 dist ; \
|
|
||||||
fi
|
|
||||||
RUN cd dist/linux-$GOARCH && \
|
|
||||||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
|
|
||||||
RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
|
|
||||||
cd dist/linux-$GOARCH-rocm && \
|
|
||||||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Jetsons need to be built in discrete stages
|
FROM base AS cuda-11
|
||||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64
|
ARG CUDA11VERSION=11.3
|
||||||
ARG GOLANG_VERSION
|
RUN yum install -y cuda-toolkit-${CUDA11VERSION//./-}
|
||||||
RUN apt-get update && apt-get install -y git curl ccache && \
|
ENV PATH=/usr/local/cuda-11/bin:$PATH
|
||||||
curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
|
|
||||||
ln -s /usr/local/go/bin/go /usr/local/bin/go && \
|
|
||||||
ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
|
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
||||||
WORKDIR /go/src/github.com/ollama/ollama/
|
|
||||||
COPY . .
|
|
||||||
ARG CGO_CFLAGS
|
|
||||||
ENV GOARCH arm64
|
|
||||||
ARG VERSION
|
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
make -j 5 dist_cuda_v11 \
|
cmake --preset 'CUDA 11' \
|
||||||
CUDA_ARCHITECTURES="72;87" \
|
&& cmake --build --parallel --preset 'CUDA 11' \
|
||||||
GPU_RUNNER_VARIANT=_jetpack5 \
|
&& cmake --install build --component CUDA --strip --parallel 8
|
||||||
DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama \
|
|
||||||
DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama/cuda_jetpack5
|
|
||||||
|
|
||||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS runners-jetpack6-arm64
|
FROM base AS cuda-12
|
||||||
ARG GOLANG_VERSION
|
ARG CUDA12VERSION=12.4
|
||||||
RUN apt-get update && apt-get install -y git curl ccache && \
|
RUN yum install -y cuda-toolkit-${CUDA12VERSION//./-}
|
||||||
curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
|
ENV PATH=/usr/local/cuda-12/bin:$PATH
|
||||||
ln -s /usr/local/go/bin/go /usr/local/bin/go && \
|
|
||||||
ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
|
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
||||||
WORKDIR /go/src/github.com/ollama/ollama/
|
|
||||||
COPY . .
|
|
||||||
ARG CGO_CFLAGS
|
|
||||||
ENV GOARCH arm64
|
|
||||||
ARG VERSION
|
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
make -j 5 dist_cuda_v12 \
|
cmake --preset 'CUDA 12' \
|
||||||
CUDA_ARCHITECTURES="87" \
|
&& cmake --build --parallel --preset 'CUDA 12' \
|
||||||
GPU_RUNNER_VARIANT=_jetpack6 \
|
&& cmake --install build --component CUDA --strip --parallel 8
|
||||||
DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama \
|
|
||||||
DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama/cuda_jetpack6
|
|
||||||
|
|
||||||
FROM --platform=linux/arm64 unified-builder-arm64 AS build-arm64
|
FROM base AS rocm-6
|
||||||
COPY . .
|
|
||||||
ARG OLLAMA_SKIP_CUDA_GENERATE
|
|
||||||
ARG OLLAMA_FAST_BUILD
|
|
||||||
ARG VERSION
|
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
make -j 5 dist
|
cmake --preset 'ROCm 6' \
|
||||||
COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
&& cmake --build --parallel --preset 'ROCm 6' \
|
||||||
COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
&& cmake --install build --component HIP --strip --parallel 8
|
||||||
RUN cd dist/linux-$GOARCH && \
|
|
||||||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
|
|
||||||
RUN cd dist/linux-$GOARCH-jetpack5 && \
|
|
||||||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack5.tgz
|
|
||||||
RUN cd dist/linux-$GOARCH-jetpack6 && \
|
|
||||||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack6.tgz
|
|
||||||
|
|
||||||
FROM --platform=linux/amd64 scratch AS dist-amd64
|
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5
|
||||||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
|
ARG CMAKEVERSION
|
||||||
FROM --platform=linux/arm64 scratch AS dist-arm64
|
RUN apt-get update && apt-get install -y curl ccache \
|
||||||
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
|
&& curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
|
||||||
FROM dist-$TARGETARCH AS dist
|
COPY CMakeLists.txt CMakePresets.json .
|
||||||
|
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
||||||
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
|
cmake --preset 'JetPack 5' \
|
||||||
|
&& cmake --build --parallel --preset 'JetPack 5' \
|
||||||
|
&& cmake --install build --component CUDA --strip --parallel 8
|
||||||
|
|
||||||
|
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK6VERSION} AS jetpack-6
|
||||||
|
ARG CMAKEVERSION
|
||||||
|
RUN apt-get update && apt-get install -y curl ccache \
|
||||||
|
&& curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
|
||||||
|
COPY CMakeLists.txt CMakePresets.json .
|
||||||
|
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
||||||
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
|
cmake --preset 'JetPack 6' \
|
||||||
|
&& cmake --build --parallel --preset 'JetPack 6' \
|
||||||
|
&& cmake --install build --component CUDA --strip --parallel 8
|
||||||
|
|
||||||
# For amd64 container images, filter out cuda/rocm to minimize size
|
FROM base AS build
|
||||||
FROM build-amd64 AS runners-cuda-amd64
|
ARG GOVERSION=1.23.4
|
||||||
RUN rm -rf \
|
RUN curl -fsSL https://golang.org/dl/go${GOVERSION}.linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
|
||||||
./dist/linux-amd64/lib/ollama/libggml_hipblas.so \
|
ENV PATH=/usr/local/go/bin:$PATH
|
||||||
./dist/linux-amd64/lib/ollama/runners/rocm*
|
WORKDIR /go/src/github.com/ollama/ollama
|
||||||
|
COPY . .
|
||||||
|
ARG GOFLAGS="'-ldflags=-w -s'"
|
||||||
|
ENV CGO_ENABLED=1
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/go-build \
|
||||||
|
go build -trimpath -buildmode=pie -o /bin/ollama .
|
||||||
|
|
||||||
FROM build-amd64 AS runners-rocm-amd64
|
FROM --platform=linux/amd64 scratch AS amd64
|
||||||
RUN rm -rf \
|
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
|
||||||
./dist/linux-amd64/lib/ollama/libggml_cuda*.so \
|
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
|
||||||
./dist/linux-amd64/lib/ollama/libcu*.so* \
|
|
||||||
./dist/linux-amd64/lib/ollama/runners/cuda*
|
|
||||||
|
|
||||||
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
|
FROM --platform=linux/arm64 scratch AS arm64
|
||||||
RUN apt-get update && \
|
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
|
||||||
apt-get install -y ca-certificates && \
|
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
COPY --from=jetpack-5 dist/lib/ollama/cuda_v11 lib/ollama/cuda_jetpack5
|
||||||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
|
COPY --from=jetpack-6 dist/lib/ollama/cuda_v12 lib/ollama/cuda_jetpack6
|
||||||
COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
|
|
||||||
|
|
||||||
FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
|
FROM --platform=linux/arm64 scratch AS rocm
|
||||||
RUN apt-get update && \
|
COPY --from=rocm-6 dist/lib/ollama/rocm /lib/ollama/rocm
|
||||||
apt-get install -y ca-certificates && \
|
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
||||||
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
|
|
||||||
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
|
|
||||||
COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/
|
|
||||||
COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/
|
|
||||||
|
|
||||||
|
FROM ${FLAVOR} AS archive
|
||||||
|
COPY --from=cpu dist/lib/ollama /lib/ollama
|
||||||
|
COPY --from=build /bin/ollama /bin/ollama
|
||||||
|
|
||||||
# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
|
FROM ubuntu:20.04
|
||||||
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
|
RUN apt-get update \
|
||||||
# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer
|
&& apt-get install -y ca-certificates \
|
||||||
# across releases
|
&& apt-get clean \
|
||||||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
RUN apt-get update && \
|
COPY --from=archive /bin /usr/bin
|
||||||
apt-get install -y ca-certificates && \
|
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
||||||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
|
|
||||||
COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
|
|
||||||
|
|
||||||
EXPOSE 11434
|
|
||||||
ENV OLLAMA_HOST 0.0.0.0
|
|
||||||
|
|
||||||
ENTRYPOINT ["/bin/ollama"]
|
|
||||||
CMD ["serve"]
|
|
||||||
|
|
||||||
FROM runtime-$TARGETARCH
|
|
||||||
EXPOSE 11434
|
|
||||||
ENV OLLAMA_HOST 0.0.0.0
|
|
||||||
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||||
|
COPY --from=archive /lib/ollama /usr/lib/ollama
|
||||||
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
ENV OLLAMA_HOST=0.0.0.0:11434
|
||||||
|
EXPOSE 11434
|
||||||
ENTRYPOINT ["/bin/ollama"]
|
ENTRYPOINT ["/bin/ollama"]
|
||||||
CMD ["serve"]
|
CMD ["serve"]
|
||||||
|
|||||||
66
Dockerfile2
66
Dockerfile2
@@ -1,66 +0,0 @@
|
|||||||
ARG CUDA_11_VERSION=11.3
|
|
||||||
ARG CUDA_12_VERSION=12.4
|
|
||||||
ARG ROCM_VERSION=6.1.2
|
|
||||||
ARG JETPACK_5_VERSION=r35.4.1
|
|
||||||
ARG JETPACK_6_VERSION=r36.2.0
|
|
||||||
ARG CMAKE_VERSION=3.31.2
|
|
||||||
|
|
||||||
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS base
|
|
||||||
ARG CMAKE_VERSION
|
|
||||||
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz | tar xz -C /usr --strip-components 1
|
|
||||||
RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \
|
|
||||||
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
|
|
||||||
|
|
||||||
# FROM --platform=linux/arm64 rockylinux:8 AS base
|
|
||||||
# ARG CMAKE_VERSION
|
|
||||||
# RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1
|
|
||||||
# RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
|
|
||||||
|
|
||||||
FROM base AS amd64
|
|
||||||
ARG CUDA_11_VERSION
|
|
||||||
ARG CUDA_12_VERSION
|
|
||||||
RUN yum install -y cuda-toolkit-${CUDA_11_VERSION//./-} \
|
|
||||||
&& yum install -y cuda-toolkit-${CUDA_12_VERSION//./-}
|
|
||||||
COPY CMakeLists.txt CMakeLists.txt
|
|
||||||
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
|
||||||
|
|
||||||
FROM --platform=linux/amd64 amd64 AS cuda_11
|
|
||||||
ENV PATH=/usr/local/cuda-${CUDA_11_VERSION}/bin:$PATH
|
|
||||||
RUN cmake -S . -B build -DCMAKE_CUDA_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
|
|
||||||
RUN cmake --build build --target ggml-cuda -j
|
|
||||||
|
|
||||||
FROM --platform=linux/amd64 amd64 AS cuda_12
|
|
||||||
ENV PATH=/usr/local/cuda-${CUDA_12_VERSION}/bin:$PATH
|
|
||||||
RUN cmake -S . -B build -DCMAKE_CUDA_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
|
|
||||||
RUN cmake --build build --target ggml-cuda -j
|
|
||||||
|
|
||||||
FROM --platform=linux/amd64 amd64 AS rocm
|
|
||||||
RUN cmake -S . -B build -DCMAKE_HIP_ARCHITECTURES="gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
|
|
||||||
RUN cmake --build build --target ggml-hip -j
|
|
||||||
|
|
||||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5_VERSION} AS jetpack_5
|
|
||||||
ARG CMAKE_VERSION
|
|
||||||
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1
|
|
||||||
COPY CMakeLists.txt .
|
|
||||||
COPY ml/backend/ggml/ggml .
|
|
||||||
RUN cmake -S . -B build \
|
|
||||||
-DCMAKE_CUDA_ARCHITECTURES="72;87"
|
|
||||||
RUN cmake --build build --target ggml-cuda
|
|
||||||
|
|
||||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6_VERSION} AS jetpack_6
|
|
||||||
ARG CMAKE_VERSION
|
|
||||||
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1
|
|
||||||
COPY CMakeLists.txt .
|
|
||||||
COPY ml/backend/ggml/ggml .
|
|
||||||
RUN cmake -S . -B build \
|
|
||||||
-DCMAKE_CUDA_ARCHITECTURES="87"
|
|
||||||
RUN cmake --build build --target ggml-cuda
|
|
||||||
|
|
||||||
FROM --platform=linux/amd64 golang:1.23
|
|
||||||
COPY --from=cuda_11 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-11.so
|
|
||||||
COPY --from=cuda_12 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-12.so
|
|
||||||
COPY --from=rocm build/ml/backend/ggml/ggml/src/ggml-hip/libggml-hip.so libggml-hip.so
|
|
||||||
|
|
||||||
# FROM --platform=linux/arm64 golang:1.23
|
|
||||||
# COPY --from=jetpack_5 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-jetpack-5.so
|
|
||||||
# COPY --from=jetpack_6 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-jetpack-6.so
|
|
||||||
60
Makefile.sync
Normal file
60
Makefile.sync
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
UPSTREAM=https://github.com/ggerganov/llama.cpp.git
|
||||||
|
WORKDIR=llama/vendor
|
||||||
|
FETCH_HEAD=46e3556e01b824e52395fb050b29804b6cff2a7c
|
||||||
|
|
||||||
|
.PHONY: help
|
||||||
|
help:
|
||||||
|
@echo "Available targets:"
|
||||||
|
@echo " sync Sync with upstream repositories"
|
||||||
|
@echo " checkout Checkout upstream repository"
|
||||||
|
@echo " apply-patches Apply patches to local repository"
|
||||||
|
@echo " format-patches Format patches from local repository"
|
||||||
|
@echo " clean Clean local repository"
|
||||||
|
@echo
|
||||||
|
@echo "Example:"
|
||||||
|
@echo " make -f $(lastword $(MAKEFILE_LIST)) clean sync"
|
||||||
|
|
||||||
|
.PHONY: sync
|
||||||
|
sync: llama/build-info.cpp llama/llama.cpp ml/backend/ggml/ggml apply-patches
|
||||||
|
|
||||||
|
.PHONY: llama/build-info.cpp
|
||||||
|
llama/build-info.cpp: llama/build-info.cpp.in
|
||||||
|
sed -e 's|@FETCH_HEAD@|$(FETCH_HEAD)|' $< > $@
|
||||||
|
|
||||||
|
.PHONY: llama/llama.cpp
|
||||||
|
llama/llama.cpp: llama/vendor/ apply-patches
|
||||||
|
rsync -arvzc -f "merge $@/.rsync-filter" $< $@
|
||||||
|
|
||||||
|
.PHONY: ml/backend/ggml/ggml apply-patches
|
||||||
|
ml/backend/ggml/ggml: llama/vendor/ggml/ apply-patches
|
||||||
|
rsync -arvzc -f "merge $@/.rsync-filter" $< $@
|
||||||
|
|
||||||
|
PATCHES=$(wildcard llama/patches/*.patch)
|
||||||
|
|
||||||
|
.PHONY: apply-patches
|
||||||
|
.NOTPARALLEL:
|
||||||
|
apply-patches: $(addsuffix ed, $(PATCHES))
|
||||||
|
|
||||||
|
%.patched: %.patch
|
||||||
|
@if git -c user.name=nobody -c 'user.email=<>' -C $(WORKDIR) am -3 $(realpath $<); then touch $@; else git -C $(WORKDIR) am --abort; exit 1; fi
|
||||||
|
|
||||||
|
.PHONY: checkout
|
||||||
|
checkout: $(WORKDIR)
|
||||||
|
git -C $(WORKDIR) fetch
|
||||||
|
git -C $(WORKDIR) checkout -f $(FETCH_HEAD)
|
||||||
|
|
||||||
|
$(WORKDIR):
|
||||||
|
git clone $(UPSTREAM) $(WORKDIR)
|
||||||
|
|
||||||
|
.PHONE: format-patches
|
||||||
|
format-patches: llama/patches
|
||||||
|
git -C $(WORKDIR) format-patch \
|
||||||
|
--no-signature \
|
||||||
|
--no-numbered \
|
||||||
|
--zero-commit \
|
||||||
|
-o $(realpath $<) \
|
||||||
|
$(FETCH_HEAD)
|
||||||
|
|
||||||
|
.PHONE: clean
|
||||||
|
clean: checkout
|
||||||
|
$(RM) $(addsuffix ed, $(PATCHES))
|
||||||
46
Makefile2
46
Makefile2
@@ -1,46 +0,0 @@
|
|||||||
UPSTREAM=https://github.com/ggerganov/llama.cpp.git
|
|
||||||
WORKDIR=llama/vendor
|
|
||||||
FETCH_HEAD=46e3556e01b824e52395fb050b29804b6cff2a7c
|
|
||||||
|
|
||||||
all: sync
|
|
||||||
|
|
||||||
.PHONY: sync
|
|
||||||
sync: llama/llama.cpp ml/backend/ggml/ggml
|
|
||||||
|
|
||||||
.PHONY: llama/llama.cpp
|
|
||||||
llama/llama.cpp: llama/vendor/ apply_patches
|
|
||||||
rsync -arvzc -f "merge $@/.rsync-filter" $< $@
|
|
||||||
|
|
||||||
.PHONY: ml/backend/ggml/ggml apply_patches
|
|
||||||
ml/backend/ggml/ggml: llama/vendor/ggml/ apply_patches
|
|
||||||
rsync -arvzc -f "merge $@/.rsync-filter" $< $@
|
|
||||||
|
|
||||||
PATCHES=$(wildcard llama/patches/*.patch)
|
|
||||||
|
|
||||||
.PHONY: apply_patches
|
|
||||||
.NOTPARALLEL:
|
|
||||||
apply_patches: $(addsuffix ed, $(PATCHES))
|
|
||||||
|
|
||||||
%.patched: %.patch
|
|
||||||
@if git -c user.name=nobody -c 'user.email=<>' -C $(WORKDIR) am -3 $(realpath $<); then touch $@; else git -C $(WORKDIR) am --abort; exit 1; fi
|
|
||||||
|
|
||||||
.PHONY: checkout
|
|
||||||
checkout: $(WORKDIR)
|
|
||||||
git -C $(WORKDIR) fetch
|
|
||||||
git -C $(WORKDIR) checkout -f $(FETCH_HEAD)
|
|
||||||
|
|
||||||
$(WORKDIR):
|
|
||||||
git clone $(UPSTREAM) $(WORKDIR)
|
|
||||||
|
|
||||||
.PHONE: format_patches
|
|
||||||
format_patches: llama/patches
|
|
||||||
git -C $(WORKDIR) format-patch \
|
|
||||||
--no-signature \
|
|
||||||
--no-numbered \
|
|
||||||
--zero-commit \
|
|
||||||
-o $(realpath $<) \
|
|
||||||
$(FETCH_HEAD)
|
|
||||||
|
|
||||||
.PHONE: clean
|
|
||||||
clean: checkout
|
|
||||||
$(RM) $(addsuffix ed, $(PATCHES))
|
|
||||||
11
README.md
11
README.md
@@ -369,6 +369,11 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Minima](https://github.com/dmayboroda/minima) (RAG with on-premises or fully local workflow)
|
- [Minima](https://github.com/dmayboroda/minima) (RAG with on-premises or fully local workflow)
|
||||||
- [aidful-ollama-model-delete](https://github.com/AidfulAI/aidful-ollama-model-delete) (User interface for simplified model cleanup)
|
- [aidful-ollama-model-delete](https://github.com/AidfulAI/aidful-ollama-model-delete) (User interface for simplified model cleanup)
|
||||||
- [Perplexica](https://github.com/ItzCrazyKns/Perplexica) (An AI-powered search engine & an open-source alternative to Perplexity AI)
|
- [Perplexica](https://github.com/ItzCrazyKns/Perplexica) (An AI-powered search engine & an open-source alternative to Perplexity AI)
|
||||||
|
- [Ollama Chat WebUI for Docker ](https://github.com/oslook/ollama-webui) (Support for local docker deployment, lightweight ollama webui)
|
||||||
|
- [AI Toolkit for Visual Studio Code](https://aka.ms/ai-tooklit/ollama-docs) (Microsoft-official VSCode extension to chat, test, evaluate models with Ollama support, and use them in your AI applications.)
|
||||||
|
- [MinimalNextOllamaChat](https://github.com/anilkay/MinimalNextOllamaChat) (Minimal Web UI for Chat and Model Control)
|
||||||
|
- [Chipper](https://github.com/TilmanGriesel/chipper) AI interface for tinkerers (Ollama, Haystack RAG, Python)
|
||||||
|
- [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)
|
||||||
|
|
||||||
### Cloud
|
### Cloud
|
||||||
|
|
||||||
@@ -481,6 +486,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [GoLamify](https://github.com/prasad89/golamify)
|
- [GoLamify](https://github.com/prasad89/golamify)
|
||||||
- [Ollama for Haskell](https://github.com/tusharad/ollama-haskell)
|
- [Ollama for Haskell](https://github.com/tusharad/ollama-haskell)
|
||||||
- [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in unified API)
|
- [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in unified API)
|
||||||
|
- [LlmTornado](https://github.com/lofcz/llmtornado) (C# library providing a unified interface for major FOSS & Commercial inference APIs)
|
||||||
|
|
||||||
### Mobile
|
### Mobile
|
||||||
|
|
||||||
@@ -531,6 +537,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [TextCraft](https://github.com/suncloudsmoon/TextCraft) (Copilot in Word alternative using Ollama)
|
- [TextCraft](https://github.com/suncloudsmoon/TextCraft) (Copilot in Word alternative using Ollama)
|
||||||
- [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow)
|
- [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow)
|
||||||
- [TextLLaMA](https://github.com/adarshM84/TextLLaMA) A Chrome Extension that helps you write emails, correct grammar, and translate into any language
|
- [TextLLaMA](https://github.com/adarshM84/TextLLaMA) A Chrome Extension that helps you write emails, correct grammar, and translate into any language
|
||||||
|
- [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai)
|
||||||
|
|
||||||
### Supported backends
|
### Supported backends
|
||||||
|
|
||||||
@@ -539,4 +546,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
### Observability
|
### Observability
|
||||||
|
|
||||||
- [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics.
|
- [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics.
|
||||||
- [HoneyHive](https://docs.honeyhive.ai/integrations/ollama) is an AI observability and evaluation platform for AI agents. Use HoneyHive to evaluate agent performance, interrogate failures, and monitor quality in production.
|
- [HoneyHive](https://docs.honeyhive.ai/integrations/ollama) is an AI observability and evaluation platform for AI agents. Use HoneyHive to evaluate agent performance, interrogate failures, and monitor quality in production.
|
||||||
|
- [Langfuse](https://langfuse.com/docs/integrations/ollama) is an open source LLM observability platform that enables teams to collaboratively monitor, evaluate and debug AI applications.
|
||||||
|
- [MLflow Tracing](https://mlflow.org/docs/latest/llms/tracing/index.html#automatic-tracing) is an open source LLM observability tool with a convenient API to log and visualize traces, making it easy to debug and evaluate GenAI applications.
|
||||||
|
|||||||
63
cache/cache.go
vendored
63
cache/cache.go
vendored
@@ -1,63 +0,0 @@
|
|||||||
package cache
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/ollama/ollama/ml"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Options struct {
|
|
||||||
Position int
|
|
||||||
}
|
|
||||||
|
|
||||||
type Cache interface {
|
|
||||||
Sub(i int) Cache
|
|
||||||
Put(ctx ml.Context, key, value ml.Tensor, opts Options) (ml.Tensor, ml.Tensor)
|
|
||||||
}
|
|
||||||
|
|
||||||
type Simple struct {
|
|
||||||
DType ml.DType
|
|
||||||
Capacity int
|
|
||||||
|
|
||||||
keys, values []ml.Tensor
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Simple) Sub(i int) Cache {
|
|
||||||
if i >= len(c.keys) {
|
|
||||||
c.keys = append(c.keys, make([]ml.Tensor, i-len(c.keys)+1)...)
|
|
||||||
c.values = append(c.values, make([]ml.Tensor, i-len(c.values)+1)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
return &Simple{
|
|
||||||
keys: c.keys[i : i+1],
|
|
||||||
values: c.values[i : i+1],
|
|
||||||
Capacity: c.Capacity,
|
|
||||||
DType: c.DType,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Simple) Put(ctx ml.Context, key, value ml.Tensor, opts Options) (ml.Tensor, ml.Tensor) {
|
|
||||||
if c.keys[0] == nil || c.values[0] == nil {
|
|
||||||
c.keys[0] = ctx.Zeros(c.DType, int(key.Dim(0)*key.Dim(1))*c.Capacity)
|
|
||||||
c.values[0] = ctx.Zeros(c.DType, int(value.Dim(0)*value.Dim(1))*c.Capacity)
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.Forward(key.Copy(ctx, c.keys[0].View(ctx, int(key.Stride(2))*opts.Position, int(key.Dim(0)*key.Dim(1)*key.Dim(2)))))
|
|
||||||
ctx.Forward(value.Copy(ctx, c.values[0].View(ctx, int(value.Stride(2))*opts.Position, int(value.Dim(0)*value.Dim(1)*value.Dim(2)))))
|
|
||||||
|
|
||||||
n := min(c.Capacity, int(key.Dim(2))+opts.Position)
|
|
||||||
|
|
||||||
key = c.keys[0].View(ctx, 0,
|
|
||||||
int(key.Dim(0)), int(key.Stride(1)),
|
|
||||||
int(key.Dim(1)), int(key.Stride(2)),
|
|
||||||
n,
|
|
||||||
)
|
|
||||||
|
|
||||||
value = c.values[0].View(ctx, 0,
|
|
||||||
int(value.Dim(0)), int(value.Stride(1)),
|
|
||||||
int(value.Dim(1)), int(value.Stride(2)),
|
|
||||||
n,
|
|
||||||
)
|
|
||||||
|
|
||||||
// TODO shift context if necessary
|
|
||||||
|
|
||||||
return key, value
|
|
||||||
}
|
|
||||||
@@ -59,7 +59,7 @@ func getModelfileName(cmd *cobra.Command) (string, error) {
|
|||||||
|
|
||||||
_, err = os.Stat(absName)
|
_, err = os.Stat(absName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return filename, err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
return absName, nil
|
return absName, nil
|
||||||
|
|||||||
@@ -279,7 +279,7 @@ func TestGetModelfileName(t *testing.T) {
|
|||||||
name: "no modelfile specified, no modelfile exists",
|
name: "no modelfile specified, no modelfile exists",
|
||||||
modelfileName: "",
|
modelfileName: "",
|
||||||
fileExists: false,
|
fileExists: false,
|
||||||
expectedName: "Modelfile",
|
expectedName: "",
|
||||||
expectedErr: os.ErrNotExist,
|
expectedErr: os.ErrNotExist,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -293,7 +293,7 @@ func TestGetModelfileName(t *testing.T) {
|
|||||||
name: "modelfile specified, no modelfile exists",
|
name: "modelfile specified, no modelfile exists",
|
||||||
modelfileName: "crazyfile",
|
modelfileName: "crazyfile",
|
||||||
fileExists: false,
|
fileExists: false,
|
||||||
expectedName: "crazyfile",
|
expectedName: "",
|
||||||
expectedErr: os.ErrNotExist,
|
expectedErr: os.ErrNotExist,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import (
|
|||||||
"log/slog"
|
"log/slog"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ModelParameters struct {
|
type ModelParameters struct {
|
||||||
@@ -27,8 +27,8 @@ type AdapterParameters struct {
|
|||||||
} `json:"lora_parameters"`
|
} `json:"lora_parameters"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ModelParameters) KV(t *Tokenizer) ggml.KV {
|
func (ModelParameters) KV(t *Tokenizer) llm.KV {
|
||||||
kv := ggml.KV{
|
kv := llm.KV{
|
||||||
"general.file_type": uint32(1),
|
"general.file_type": uint32(1),
|
||||||
"general.quantization_version": uint32(2),
|
"general.quantization_version": uint32(2),
|
||||||
"tokenizer.ggml.pre": t.Pre,
|
"tokenizer.ggml.pre": t.Pre,
|
||||||
@@ -54,7 +54,7 @@ func (ModelParameters) KV(t *Tokenizer) ggml.KV {
|
|||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p AdapterParameters) KV() ggml.KV {
|
func (p AdapterParameters) KV() llm.KV {
|
||||||
var alpha float32
|
var alpha float32
|
||||||
if p.LoraParameters.Alpha == 0 {
|
if p.LoraParameters.Alpha == 0 {
|
||||||
alpha = float32(p.Alpha)
|
alpha = float32(p.Alpha)
|
||||||
@@ -62,7 +62,7 @@ func (p AdapterParameters) KV() ggml.KV {
|
|||||||
alpha = p.LoraParameters.Alpha
|
alpha = p.LoraParameters.Alpha
|
||||||
}
|
}
|
||||||
|
|
||||||
kv := ggml.KV{
|
kv := llm.KV{
|
||||||
"adapter.lora.alpha": alpha,
|
"adapter.lora.alpha": alpha,
|
||||||
"adapter.type": "lora",
|
"adapter.type": "lora",
|
||||||
"general.file_type": uint32(1),
|
"general.file_type": uint32(1),
|
||||||
@@ -79,19 +79,19 @@ func (ModelParameters) specialTokenTypes() []string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ModelParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
|
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
||||||
return ggml.WriteGGUF(ws, kv, ts)
|
return llm.WriteGGUF(ws, kv, ts)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
|
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
||||||
return ggml.WriteGGUF(ws, kv, ts)
|
return llm.WriteGGUF(ws, kv, ts)
|
||||||
}
|
}
|
||||||
|
|
||||||
type ModelConverter interface {
|
type ModelConverter interface {
|
||||||
// KV maps parameters to LLM key-values
|
// KV maps parameters to LLM key-values
|
||||||
KV(*Tokenizer) ggml.KV
|
KV(*Tokenizer) llm.KV
|
||||||
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
|
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
|
||||||
Tensors([]Tensor) []ggml.Tensor
|
Tensors([]Tensor) []llm.Tensor
|
||||||
// Replacements returns a list of string pairs to replace in tensor names.
|
// Replacements returns a list of string pairs to replace in tensor names.
|
||||||
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
||||||
Replacements() []string
|
Replacements() []string
|
||||||
@@ -99,7 +99,7 @@ type ModelConverter interface {
|
|||||||
// specialTokenTypes returns any special token types the model uses
|
// specialTokenTypes returns any special token types the model uses
|
||||||
specialTokenTypes() []string
|
specialTokenTypes() []string
|
||||||
// writeFile writes the model to the provided io.WriteSeeker
|
// writeFile writes the model to the provided io.WriteSeeker
|
||||||
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
|
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
|
||||||
}
|
}
|
||||||
|
|
||||||
type moreParser interface {
|
type moreParser interface {
|
||||||
@@ -108,17 +108,17 @@ type moreParser interface {
|
|||||||
|
|
||||||
type AdapterConverter interface {
|
type AdapterConverter interface {
|
||||||
// KV maps parameters to LLM key-values
|
// KV maps parameters to LLM key-values
|
||||||
KV(ggml.KV) ggml.KV
|
KV(llm.KV) llm.KV
|
||||||
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
|
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
|
||||||
Tensors([]Tensor) []ggml.Tensor
|
Tensors([]Tensor) []llm.Tensor
|
||||||
// Replacements returns a list of string pairs to replace in tensor names.
|
// Replacements returns a list of string pairs to replace in tensor names.
|
||||||
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
||||||
Replacements() []string
|
Replacements() []string
|
||||||
|
|
||||||
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
|
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
|
||||||
}
|
}
|
||||||
|
|
||||||
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
|
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
|
||||||
bts, err := fs.ReadFile(fsys, "adapter_config.json")
|
bts, err := fs.ReadFile(fsys, "adapter_config.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -191,6 +191,8 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
|
|||||||
conv = &qwen2Model{}
|
conv = &qwen2Model{}
|
||||||
case "BertModel":
|
case "BertModel":
|
||||||
conv = &bertModel{}
|
conv = &bertModel{}
|
||||||
|
case "CohereForCausalLM":
|
||||||
|
conv = &commandrModel{}
|
||||||
default:
|
default:
|
||||||
return errors.New("unsupported architecture")
|
return errors.New("unsupported architecture")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import (
|
|||||||
"slices"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type bertModel struct {
|
type bertModel struct {
|
||||||
@@ -85,7 +85,7 @@ func (p *bertModel) parseMore(fsys fs.FS) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *bertModel) KV(t *Tokenizer) ggml.KV {
|
func (p *bertModel) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.ModelParameters.KV(t)
|
kv := p.ModelParameters.KV(t)
|
||||||
kv["general.architecture"] = "bert"
|
kv["general.architecture"] = "bert"
|
||||||
kv["bert.attention.causal"] = false
|
kv["bert.attention.causal"] = false
|
||||||
@@ -132,8 +132,8 @@ func (p *bertModel) KV(t *Tokenizer) ggml.KV {
|
|||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *bertModel) Tensors(ts []Tensor) []ggml.Tensor {
|
func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
var out []ggml.Tensor
|
var out []llm.Tensor
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
if slices.Contains([]string{
|
if slices.Contains([]string{
|
||||||
"embeddings.position_ids",
|
"embeddings.position_ids",
|
||||||
@@ -143,7 +143,7 @@ func (p *bertModel) Tensors(ts []Tensor) []ggml.Tensor {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
out = append(out, ggml.Tensor{
|
out = append(out, llm.Tensor{
|
||||||
Name: t.Name(),
|
Name: t.Name(),
|
||||||
Kind: t.Kind(),
|
Kind: t.Kind(),
|
||||||
Shape: t.Shape(),
|
Shape: t.Shape(),
|
||||||
|
|||||||
76
convert/convert_commandr.go
Normal file
76
convert/convert_commandr.go
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"cmp"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type commandrModel struct {
|
||||||
|
ModelParameters
|
||||||
|
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||||
|
HiddenSize uint32 `json:"hidden_size"`
|
||||||
|
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||||
|
IntermediateSize uint32 `json:"intermediate_size"`
|
||||||
|
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||||
|
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||||
|
LayerNormEPS float32 `json:"layer_norm_eps"`
|
||||||
|
RopeTheta float32 `json:"rope_theta"`
|
||||||
|
UseQKNorm bool `json:"use_qk_norm"`
|
||||||
|
MaxLength uint32 `json:"model_max_length"`
|
||||||
|
LogitScale float32 `json:"logit_scale"`
|
||||||
|
NCtx uint32 `json:"n_ctx"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ ModelConverter = (*commandrModel)(nil)
|
||||||
|
|
||||||
|
func (p *commandrModel) KV(t *Tokenizer) llm.KV {
|
||||||
|
kv := p.ModelParameters.KV(t)
|
||||||
|
kv["general.architecture"] = "command-r"
|
||||||
|
kv["general.name"] = "command-r"
|
||||||
|
kv["command-r.context_length"] = cmp.Or(p.MaxLength, p.MaxPositionEmbeddings, p.NCtx)
|
||||||
|
kv["command-r.embedding_length"] = p.HiddenSize
|
||||||
|
kv["command-r.block_count"] = p.HiddenLayers
|
||||||
|
kv["command-r.feed_forward_length"] = p.IntermediateSize
|
||||||
|
kv["command-r.attention.head_count"] = p.NumAttentionHeads
|
||||||
|
kv["command-r.attention.head_count_kv"] = p.NumKeyValueHeads
|
||||||
|
kv["command-r.attention.layer_norm_epsilon"] = p.LayerNormEPS
|
||||||
|
kv["command-r.rope.freq_base"] = p.RopeTheta
|
||||||
|
kv["command-r.max_position_embeddings"] = cmp.Or(p.MaxLength, p.MaxPositionEmbeddings)
|
||||||
|
kv["command-r.logit_scale"] = p.LogitScale
|
||||||
|
kv["command-r.rope.scaling.type"] = "none"
|
||||||
|
|
||||||
|
return kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *commandrModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
|
var out []llm.Tensor
|
||||||
|
for _, t := range ts {
|
||||||
|
out = append(out, llm.Tensor{
|
||||||
|
Name: t.Name(),
|
||||||
|
Kind: t.Kind(),
|
||||||
|
Shape: t.Shape(),
|
||||||
|
WriterTo: t,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *commandrModel) Replacements() []string {
|
||||||
|
return []string{
|
||||||
|
"self_attn.q_norm", "attn_q_norm",
|
||||||
|
"self_attn.k_norm", "attn_k_norm",
|
||||||
|
"model.layers", "blk",
|
||||||
|
"input_layernorm", "attn_norm",
|
||||||
|
"mlp.down_proj", "ffn_down",
|
||||||
|
"mlp.gate_proj", "ffn_gate",
|
||||||
|
"mlp.up_proj", "ffn_up",
|
||||||
|
"self_attn.k_proj", "attn_k",
|
||||||
|
"self_attn.o_proj", "attn_output",
|
||||||
|
"self_attn.q_proj", "attn_q",
|
||||||
|
"self_attn.v_proj", "attn_v",
|
||||||
|
"model.norm", "output_norm",
|
||||||
|
"model.embed_tokens", "token_embd",
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -6,7 +6,7 @@ import (
|
|||||||
"github.com/pdevine/tensor"
|
"github.com/pdevine/tensor"
|
||||||
"github.com/pdevine/tensor/native"
|
"github.com/pdevine/tensor/native"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type gemmaModel struct {
|
type gemmaModel struct {
|
||||||
@@ -23,7 +23,7 @@ type gemmaModel struct {
|
|||||||
|
|
||||||
var _ ModelConverter = (*gemmaModel)(nil)
|
var _ ModelConverter = (*gemmaModel)(nil)
|
||||||
|
|
||||||
func (p *gemmaModel) KV(t *Tokenizer) ggml.KV {
|
func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.ModelParameters.KV(t)
|
kv := p.ModelParameters.KV(t)
|
||||||
kv["general.architecture"] = "gemma"
|
kv["general.architecture"] = "gemma"
|
||||||
kv["gemma.context_length"] = p.MaxPositionEmbeddings
|
kv["gemma.context_length"] = p.MaxPositionEmbeddings
|
||||||
@@ -42,14 +42,14 @@ func (p *gemmaModel) KV(t *Tokenizer) ggml.KV {
|
|||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *gemmaModel) Tensors(ts []Tensor) []ggml.Tensor {
|
func (p *gemmaModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
var out []ggml.Tensor
|
var out []llm.Tensor
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
if strings.HasSuffix(t.Name(), "_norm.weight") {
|
if strings.HasSuffix(t.Name(), "_norm.weight") {
|
||||||
t.SetRepacker(p.addOne)
|
t.SetRepacker(p.addOne)
|
||||||
}
|
}
|
||||||
|
|
||||||
out = append(out, ggml.Tensor{
|
out = append(out, llm.Tensor{
|
||||||
Name: t.Name(),
|
Name: t.Name(),
|
||||||
Kind: t.Kind(),
|
Kind: t.Kind(),
|
||||||
Shape: t.Shape(),
|
Shape: t.Shape(),
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
package convert
|
package convert
|
||||||
|
|
||||||
import "github.com/ollama/ollama/fs/ggml"
|
import (
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
type gemma2Model struct {
|
type gemma2Model struct {
|
||||||
gemmaModel
|
gemmaModel
|
||||||
@@ -9,7 +11,7 @@ type gemma2Model struct {
|
|||||||
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
|
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *gemma2Model) KV(t *Tokenizer) ggml.KV {
|
func (p *gemma2Model) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.ModelParameters.KV(t)
|
kv := p.ModelParameters.KV(t)
|
||||||
kv["general.architecture"] = "gemma2"
|
kv["general.architecture"] = "gemma2"
|
||||||
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
|
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import (
|
|||||||
"github.com/pdevine/tensor"
|
"github.com/pdevine/tensor"
|
||||||
"github.com/pdevine/tensor/native"
|
"github.com/pdevine/tensor/native"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type gemma2Adapter struct {
|
type gemma2Adapter struct {
|
||||||
@@ -15,14 +15,14 @@ type gemma2Adapter struct {
|
|||||||
|
|
||||||
var _ AdapterConverter = (*gemma2Adapter)(nil)
|
var _ AdapterConverter = (*gemma2Adapter)(nil)
|
||||||
|
|
||||||
func (p *gemma2Adapter) KV(baseKV ggml.KV) ggml.KV {
|
func (p *gemma2Adapter) KV(baseKV llm.KV) llm.KV {
|
||||||
kv := p.AdapterParameters.KV()
|
kv := p.AdapterParameters.KV()
|
||||||
kv["general.architecture"] = "gemma2"
|
kv["general.architecture"] = "gemma2"
|
||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *gemma2Adapter) Tensors(ts []Tensor) []ggml.Tensor {
|
func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
var out []ggml.Tensor
|
var out []llm.Tensor
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
shape := t.Shape()
|
shape := t.Shape()
|
||||||
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
||||||
@@ -31,7 +31,7 @@ func (p *gemma2Adapter) Tensors(ts []Tensor) []ggml.Tensor {
|
|||||||
t.SetRepacker(p.repack)
|
t.SetRepacker(p.repack)
|
||||||
}
|
}
|
||||||
|
|
||||||
out = append(out, ggml.Tensor{
|
out = append(out, llm.Tensor{
|
||||||
Name: t.Name(),
|
Name: t.Name(),
|
||||||
Kind: t.Kind(),
|
Kind: t.Kind(),
|
||||||
Shape: t.Shape(),
|
Shape: t.Shape(),
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import (
|
|||||||
"github.com/pdevine/tensor"
|
"github.com/pdevine/tensor"
|
||||||
"github.com/pdevine/tensor/native"
|
"github.com/pdevine/tensor/native"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type llamaModel struct {
|
type llamaModel struct {
|
||||||
@@ -46,7 +46,7 @@ type llamaModel struct {
|
|||||||
|
|
||||||
var _ ModelConverter = (*llamaModel)(nil)
|
var _ ModelConverter = (*llamaModel)(nil)
|
||||||
|
|
||||||
func (p *llamaModel) KV(t *Tokenizer) ggml.KV {
|
func (p *llamaModel) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.ModelParameters.KV(t)
|
kv := p.ModelParameters.KV(t)
|
||||||
kv["general.architecture"] = "llama"
|
kv["general.architecture"] = "llama"
|
||||||
kv["llama.vocab_size"] = p.VocabSize
|
kv["llama.vocab_size"] = p.VocabSize
|
||||||
@@ -120,11 +120,11 @@ func (p *llamaModel) KV(t *Tokenizer) ggml.KV {
|
|||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *llamaModel) Tensors(ts []Tensor) []ggml.Tensor {
|
func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
var out []ggml.Tensor
|
var out []llm.Tensor
|
||||||
|
|
||||||
if p.RopeScaling.factors != nil {
|
if p.RopeScaling.factors != nil {
|
||||||
out = append(out, ggml.Tensor{
|
out = append(out, llm.Tensor{
|
||||||
Name: "rope_freqs.weight",
|
Name: "rope_freqs.weight",
|
||||||
Kind: 0,
|
Kind: 0,
|
||||||
Shape: []uint64{uint64(len(p.RopeScaling.factors))},
|
Shape: []uint64{uint64(len(p.RopeScaling.factors))},
|
||||||
@@ -138,7 +138,7 @@ func (p *llamaModel) Tensors(ts []Tensor) []ggml.Tensor {
|
|||||||
t.SetRepacker(p.repack)
|
t.SetRepacker(p.repack)
|
||||||
}
|
}
|
||||||
|
|
||||||
out = append(out, ggml.Tensor{
|
out = append(out, llm.Tensor{
|
||||||
Name: t.Name(),
|
Name: t.Name(),
|
||||||
Kind: t.Kind(),
|
Kind: t.Kind(),
|
||||||
Shape: t.Shape(),
|
Shape: t.Shape(),
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import (
|
|||||||
"github.com/pdevine/tensor"
|
"github.com/pdevine/tensor"
|
||||||
"github.com/pdevine/tensor/native"
|
"github.com/pdevine/tensor/native"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type llamaAdapter struct {
|
type llamaAdapter struct {
|
||||||
@@ -18,7 +18,7 @@ type llamaAdapter struct {
|
|||||||
|
|
||||||
var _ AdapterConverter = (*llamaAdapter)(nil)
|
var _ AdapterConverter = (*llamaAdapter)(nil)
|
||||||
|
|
||||||
func (p *llamaAdapter) KV(baseKV ggml.KV) ggml.KV {
|
func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV {
|
||||||
kv := p.AdapterParameters.KV()
|
kv := p.AdapterParameters.KV()
|
||||||
kv["general.architecture"] = "llama"
|
kv["general.architecture"] = "llama"
|
||||||
kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"]
|
kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"]
|
||||||
@@ -29,8 +29,8 @@ func (p *llamaAdapter) KV(baseKV ggml.KV) ggml.KV {
|
|||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *llamaAdapter) Tensors(ts []Tensor) []ggml.Tensor {
|
func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
var out []ggml.Tensor
|
var out []llm.Tensor
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
shape := t.Shape()
|
shape := t.Shape()
|
||||||
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
||||||
@@ -41,7 +41,7 @@ func (p *llamaAdapter) Tensors(ts []Tensor) []ggml.Tensor {
|
|||||||
t.SetRepacker(p.repack)
|
t.SetRepacker(p.repack)
|
||||||
}
|
}
|
||||||
|
|
||||||
out = append(out, ggml.Tensor{
|
out = append(out, llm.Tensor{
|
||||||
Name: t.Name(),
|
Name: t.Name(),
|
||||||
Kind: t.Kind(),
|
Kind: t.Kind(),
|
||||||
Shape: shape,
|
Shape: shape,
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import (
|
|||||||
"slices"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type mixtralModel struct {
|
type mixtralModel struct {
|
||||||
@@ -15,7 +15,7 @@ type mixtralModel struct {
|
|||||||
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *mixtralModel) KV(t *Tokenizer) ggml.KV {
|
func (p *mixtralModel) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.llamaModel.KV(t)
|
kv := p.llamaModel.KV(t)
|
||||||
|
|
||||||
if p.NumLocalExperts > 0 {
|
if p.NumLocalExperts > 0 {
|
||||||
@@ -29,7 +29,7 @@ func (p *mixtralModel) KV(t *Tokenizer) ggml.KV {
|
|||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *mixtralModel) Tensors(ts []Tensor) []ggml.Tensor {
|
func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
oldnew := []string{
|
oldnew := []string{
|
||||||
"model.layers", "blk",
|
"model.layers", "blk",
|
||||||
"w1", "ffn_gate_exps",
|
"w1", "ffn_gate_exps",
|
||||||
@@ -56,10 +56,10 @@ func (p *mixtralModel) Tensors(ts []Tensor) []ggml.Tensor {
|
|||||||
return true
|
return true
|
||||||
})
|
})
|
||||||
|
|
||||||
var out []ggml.Tensor
|
var out []llm.Tensor
|
||||||
for n, e := range experts {
|
for n, e := range experts {
|
||||||
// TODO(mxyng): sanity check experts
|
// TODO(mxyng): sanity check experts
|
||||||
out = append(out, ggml.Tensor{
|
out = append(out, llm.Tensor{
|
||||||
Name: n,
|
Name: n,
|
||||||
Kind: e[0].Kind(),
|
Kind: e[0].Kind(),
|
||||||
Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...),
|
Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...),
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type phi3Model struct {
|
type phi3Model struct {
|
||||||
@@ -37,7 +37,7 @@ type phi3Model struct {
|
|||||||
|
|
||||||
var _ ModelConverter = (*phi3Model)(nil)
|
var _ ModelConverter = (*phi3Model)(nil)
|
||||||
|
|
||||||
func (p *phi3Model) KV(t *Tokenizer) ggml.KV {
|
func (p *phi3Model) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.ModelParameters.KV(t)
|
kv := p.ModelParameters.KV(t)
|
||||||
kv["general.architecture"] = "phi3"
|
kv["general.architecture"] = "phi3"
|
||||||
kv["phi3.context_length"] = p.MaxPositionEmbeddings
|
kv["phi3.context_length"] = p.MaxPositionEmbeddings
|
||||||
@@ -68,19 +68,19 @@ func (p *phi3Model) KV(t *Tokenizer) ggml.KV {
|
|||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *phi3Model) Tensors(ts []Tensor) []ggml.Tensor {
|
func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
var addRopeFactors sync.Once
|
var addRopeFactors sync.Once
|
||||||
|
|
||||||
out := make([]ggml.Tensor, 0, len(ts)+2)
|
out := make([]llm.Tensor, 0, len(ts)+2)
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
if strings.HasPrefix(t.Name(), "blk.0.") {
|
if strings.HasPrefix(t.Name(), "blk.0.") {
|
||||||
addRopeFactors.Do(func() {
|
addRopeFactors.Do(func() {
|
||||||
out = append(out, ggml.Tensor{
|
out = append(out, llm.Tensor{
|
||||||
Name: "rope_factors_long.weight",
|
Name: "rope_factors_long.weight",
|
||||||
Kind: 0,
|
Kind: 0,
|
||||||
Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
|
Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
|
||||||
WriterTo: p.RopeScaling.LongFactor,
|
WriterTo: p.RopeScaling.LongFactor,
|
||||||
}, ggml.Tensor{
|
}, llm.Tensor{
|
||||||
Name: "rope_factors_short.weight",
|
Name: "rope_factors_short.weight",
|
||||||
Kind: 0,
|
Kind: 0,
|
||||||
Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
|
Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
|
||||||
@@ -89,7 +89,7 @@ func (p *phi3Model) Tensors(ts []Tensor) []ggml.Tensor {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
out = append(out, ggml.Tensor{
|
out = append(out, llm.Tensor{
|
||||||
Name: t.Name(),
|
Name: t.Name(),
|
||||||
Kind: t.Kind(),
|
Kind: t.Kind(),
|
||||||
Shape: t.Shape(),
|
Shape: t.Shape(),
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
package convert
|
package convert
|
||||||
|
|
||||||
import "github.com/ollama/ollama/fs/ggml"
|
import "github.com/ollama/ollama/llm"
|
||||||
|
|
||||||
|
|
||||||
type qwen2Model struct {
|
type qwen2Model struct {
|
||||||
ModelParameters
|
ModelParameters
|
||||||
@@ -22,7 +21,7 @@ type qwen2Model struct {
|
|||||||
|
|
||||||
var _ ModelConverter = (*qwen2Model)(nil)
|
var _ ModelConverter = (*qwen2Model)(nil)
|
||||||
|
|
||||||
func (q *qwen2Model) KV(t *Tokenizer) ggml.KV {
|
func (q *qwen2Model) KV(t *Tokenizer) llm.KV {
|
||||||
kv := q.ModelParameters.KV(t)
|
kv := q.ModelParameters.KV(t)
|
||||||
kv["general.architecture"] = "qwen2"
|
kv["general.architecture"] = "qwen2"
|
||||||
kv["qwen2.block_count"] = q.HiddenLayers
|
kv["qwen2.block_count"] = q.HiddenLayers
|
||||||
@@ -46,10 +45,10 @@ func (q *qwen2Model) KV(t *Tokenizer) ggml.KV {
|
|||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *qwen2Model) Tensors(ts []Tensor) []ggml.Tensor {
|
func (q *qwen2Model) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
var out []ggml.Tensor
|
var out []llm.Tensor
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
out = append(out, ggml.Tensor{
|
out = append(out, llm.Tensor{
|
||||||
Name: t.Name(),
|
Name: t.Name(),
|
||||||
Kind: t.Kind(),
|
Kind: t.Kind(),
|
||||||
Shape: t.Shape(),
|
Shape: t.Shape(),
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ import (
|
|||||||
|
|
||||||
"golang.org/x/exp/maps"
|
"golang.org/x/exp/maps"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type tensorData struct {
|
type tensorData struct {
|
||||||
@@ -29,7 +29,7 @@ type tensorData struct {
|
|||||||
Shape []int `json:"shape"`
|
Shape []int `json:"shape"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func convertFull(t *testing.T, fsys fs.FS) (*os.File, ggml.KV, ggml.Tensors) {
|
func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, *llm.Tensors) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
f, err := os.CreateTemp(t.TempDir(), "f16")
|
f, err := os.CreateTemp(t.TempDir(), "f16")
|
||||||
@@ -48,7 +48,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, ggml.KV, ggml.Tensors) {
|
|||||||
}
|
}
|
||||||
t.Cleanup(func() { r.Close() })
|
t.Cleanup(func() { r.Close() })
|
||||||
|
|
||||||
m, _, err := ggml.Decode(r, math.MaxInt)
|
m, _, err := llm.DecodeGGML(r, math.MaxInt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@@ -60,7 +60,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, ggml.KV, ggml.Tensors) {
|
|||||||
return r, m.KV(), m.Tensors()
|
return r, m.KV(), m.Tensors()
|
||||||
}
|
}
|
||||||
|
|
||||||
func generateResultsJSON(t *testing.T, f *os.File, kv ggml.KV, tensors ggml.Tensors) map[string]string {
|
func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors *llm.Tensors) map[string]string {
|
||||||
actual := make(map[string]string)
|
actual := make(map[string]string)
|
||||||
for k, v := range kv {
|
for k, v := range kv {
|
||||||
if s, ok := v.(json.Marshaler); !ok {
|
if s, ok := v.(json.Marshaler); !ok {
|
||||||
@@ -75,7 +75,7 @@ func generateResultsJSON(t *testing.T, f *os.File, kv ggml.KV, tensors ggml.Tens
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tensor := range tensors.Items() {
|
for _, tensor := range tensors.Items {
|
||||||
sha256sum := sha256.New()
|
sha256sum := sha256.New()
|
||||||
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
|
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
|
||||||
if _, err := io.Copy(sha256sum, sr); err != nil {
|
if _, err := io.Copy(sha256sum, sr); err != nil {
|
||||||
@@ -109,6 +109,7 @@ func TestConvertModel(t *testing.T) {
|
|||||||
"all-MiniLM-L6-v2",
|
"all-MiniLM-L6-v2",
|
||||||
"gemma-2-9b-it",
|
"gemma-2-9b-it",
|
||||||
"Qwen2.5-0.5B-Instruct",
|
"Qwen2.5-0.5B-Instruct",
|
||||||
|
"c4ai-command-r-v01",
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := range cases {
|
for i := range cases {
|
||||||
@@ -331,7 +332,7 @@ func TestConvertAdapter(t *testing.T) {
|
|||||||
}
|
}
|
||||||
defer r.Close()
|
defer r.Close()
|
||||||
|
|
||||||
m, _, err := ggml.Decode(r, math.MaxInt)
|
m, _, err := llm.DecodeGGML(r, math.MaxInt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|||||||
344
convert/testdata/c4ai-command-r-v01.json
vendored
Normal file
344
convert/testdata/c4ai-command-r-v01.json
vendored
Normal file
@@ -0,0 +1,344 @@
|
|||||||
|
{
|
||||||
|
"general.architecture": "command-r",
|
||||||
|
"general.name": "command-r",
|
||||||
|
"command-r.attention.head_count": "64",
|
||||||
|
"command-r.attention.head_count_kv": "64",
|
||||||
|
"command-r.attention.layer_norm_epsilon": "1e-05",
|
||||||
|
"command-r.block_count": "40",
|
||||||
|
"command-r.context_length": "131072",
|
||||||
|
"command-r.embedding_length": "8192",
|
||||||
|
"command-r.feed_forward_length": "22528",
|
||||||
|
"command-r.logit_scale": "0.0625",
|
||||||
|
"command-r.rope.freq_base": "8e+06",
|
||||||
|
"command-r.rope.scaling.type": "none",
|
||||||
|
"tokenizer.ggml.add_bos_token": "true",
|
||||||
|
"tokenizer.ggml.add_eos_token": "false",
|
||||||
|
"tokenizer.ggml.bos_token_id": "5",
|
||||||
|
"tokenizer.ggml.eos_token_id": "255001",
|
||||||
|
"tokenizer.ggml.merges": "902a060cac8884a5793d2a857dd2e53a259de46c8d08c4deb243c239671e1350",
|
||||||
|
"tokenizer.ggml.model": "gpt2",
|
||||||
|
"tokenizer.ggml.padding_token_id": "0",
|
||||||
|
"tokenizer.ggml.token_type": "b7a352ccd1c99d4413bcf452c2db707b0526d0e1216616b865560fab80296462",
|
||||||
|
"tokenizer.ggml.tokens": "815ac90ff23565081522d7258f46648c8a0619eb847a9c7c31b238a9b984e4ae",
|
||||||
|
"blk.0.attn_k.weight": "6fcfdb466f9ceb1229404ce4ec4e480751b8d00da12707a11783dad7256cb864",
|
||||||
|
"blk.0.attn_norm.weight": "6063317f731371864049c7704a70772f1eb632194201ebdc2ed0f8e483507c72",
|
||||||
|
"blk.0.attn_output.weight": "920f49716a1e2fc73b6794ec777947f1c122701e63ed302422ac89e90f06e9da",
|
||||||
|
"blk.0.attn_q.weight": "ddbcd7cde197e632564ac58e4f25d9e3a8ca52917329eeb6081eb41a797932ab",
|
||||||
|
"blk.0.attn_v.weight": "318fc02a189d87420f0cbf57f47f11e00c21ec1ed472ce0a2a895b44f7fa0fca",
|
||||||
|
"blk.0.ffn_down.weight": "aa71975b6eb1f4c77b03d2ac4a194cf8d95718efac741bb12f0f3ff79a27f9bc",
|
||||||
|
"blk.0.ffn_gate.weight": "42967702fa0bc738b88dc50007ace26dbe74a5a9e0978124dd093f818241a9e1",
|
||||||
|
"blk.0.ffn_up.weight": "5282c8788b086bd30f46525e7995a17464882a72703fd27165491afdd8bfd4af",
|
||||||
|
"blk.1.attn_k.weight": "cd248882e64fd2c3402c44790ebe12440133dc671b6893fdad0564c461973adc",
|
||||||
|
"blk.1.attn_norm.weight": "ba84e1c8fd30af6ec94208db4078befac8c921aad3acb887812887f3282ea2be",
|
||||||
|
"blk.1.attn_output.weight": "2efa3ef7c5666ccceb05e339b83ad680cc0d2c3ec78203f5da5959f23a80e14f",
|
||||||
|
"blk.1.attn_q.weight": "5106f2e255358a1303c22e8b5f0ec044852bb30a866c52cabefd30017a7a6b7d",
|
||||||
|
"blk.1.attn_v.weight": "a211a634a1a5df1d5f973645438be0461dd922210f9747c6b04e386c7f1ebe95",
|
||||||
|
"blk.1.ffn_down.weight": "37093afe48d32c578ec956c9ed85242cd000d6aa979e60526aafa10c822dbb10",
|
||||||
|
"blk.1.ffn_gate.weight": "469860819e9159caefb1aad0bc66db790f3393f05fd87b08e52256a7ed256543",
|
||||||
|
"blk.1.ffn_up.weight": "736742c97d35d1a011f9cafd3c0ce947ad559bb2fba6da73c816f6bfd0fa9aeb",
|
||||||
|
"blk.2.attn_k.weight": "92c219d92804d832ab404bd6dc7339c90877bb7cf405dd030c121f8b27757739",
|
||||||
|
"blk.2.attn_norm.weight": "61e4466069474b76b6d1e702566420eb669faf3556b00ff7b824784aca13a2d6",
|
||||||
|
"blk.2.attn_output.weight": "d2fb38a2b2171fd91caf037faa585a62225819aa232d86fd4f7f9d2c3c8a45e9",
|
||||||
|
"blk.2.attn_q.weight": "f6faf5cc6844e3daa4f9f68d90f5458c64879de68a7728860e38374e30c3429d",
|
||||||
|
"blk.2.attn_v.weight": "f340ef8f7341d987a6f37c0e9afe0aef5be67be00c0ce5f57612daf73319cce1",
|
||||||
|
"blk.2.ffn_down.weight": "c7be61a701d779860b621b143fb6365b607bf99ec7c0f153b07908ac8120885a",
|
||||||
|
"blk.2.ffn_gate.weight": "b64f0878187bd3392abfa4c3e8ad2f8b4c133903e54246747ff8f3b4639ad83e",
|
||||||
|
"blk.2.ffn_up.weight": "50b11c712652e90ee7428dbb45cffebb80662ac982bc72bd9eafff361b5eb5a8",
|
||||||
|
"blk.3.attn_k.weight": "2b7bcbe9ee5c9c630c8c8d7483887e78b73581016f4cbb6933db2a147a25f431",
|
||||||
|
"blk.3.attn_norm.weight": "0181dac7f4eee7252980323e8032cf339bef2046ce0a16c0fd72af7c98a8a37b",
|
||||||
|
"blk.3.attn_output.weight": "aef8843b636ce231da9e7c9acbee197883cc15df0e2887709324c6a50f16da7b",
|
||||||
|
"blk.3.attn_q.weight": "55404130fa10e81322d33eb378aa0de31a92990ce7730f1338c0ace0406bb1b1",
|
||||||
|
"blk.3.attn_v.weight": "76f7fb8040d82b957d689ce34fea2302a6640ad5bbaa0052ad2b7ebce270c33d",
|
||||||
|
"blk.3.ffn_down.weight": "648628933eff3b357c3729c33c5b1ae51c28e59b9c19acd1601a2ff7c5d5d9a5",
|
||||||
|
"blk.3.ffn_gate.weight": "6a588885d16e98d5f50ebed05af089154f680085ca9c97691e5b489088630a4a",
|
||||||
|
"blk.3.ffn_up.weight": "e12455a1d702f4986e1a663493e3d5102b367af74d45557522002a35d63ecac2",
|
||||||
|
"blk.4.attn_k.weight": "40d943380a8a85e4eab147934bf6e16f23cc8ab753f6636526382c074d182288",
|
||||||
|
"blk.4.attn_norm.weight": "4ab2c098983d4599fe540eef624c4df954adb7473faebda7471ef0ba4134814c",
|
||||||
|
"blk.4.attn_output.weight": "d14b91e40f58bf4a3c8c2eca0b12bb541de406574af39027d56f6c588a147082",
|
||||||
|
"blk.4.attn_q.weight": "e1224960a3562107488589f883fa32414bae41712fa8dbd47c5f3e3a7801452f",
|
||||||
|
"blk.4.attn_v.weight": "063f297bc4aa6e709fc32c4c32e35af7d07d80e83cb939b76adbba858006c03d",
|
||||||
|
"blk.4.ffn_down.weight": "f88a18020c5e1caaa29596895eb348e76ee5bfad27ed57651a86cd8cd1f9b5aa",
|
||||||
|
"blk.4.ffn_gate.weight": "48e7e1eed3fb52e92e61d3557dd0ec002418327090e034ce4322fd68542266f8",
|
||||||
|
"blk.4.ffn_up.weight": "1ca8a7aa17355b6ce0d9ad5539fdad3899fa47fd359c285fbfb31f19f47bf073",
|
||||||
|
"blk.5.attn_k.weight": "2bdf15f8e73d068d972380f25d207004cf0bf3b5bfa46946803ba6fba07d9175",
|
||||||
|
"blk.5.attn_norm.weight": "60448d7cde6e1b6467aa31bdea012e39cdb08c88081cee7d102dca4f93f766ef",
|
||||||
|
"blk.5.attn_output.weight": "f9f687d7c457537f9fca8a4087a59f1c3bebfaf5537b94e42c831a13224f7799",
|
||||||
|
"blk.5.attn_q.weight": "987db7a2ad68657a92625e1980effbb1f79697c2183f2b9f3b3a0570c51b0ab9",
|
||||||
|
"blk.5.attn_v.weight": "cf696891148f3e4783ad1d20f93462ae091eb8651c656bba9b662253b6263e02",
|
||||||
|
"blk.5.ffn_down.weight": "c0662b0bd0929136005fb9d691fdd9b2c33867d9ce9622339a6a456b720b059a",
|
||||||
|
"blk.5.ffn_gate.weight": "200bbdfab615d7a3a84719b6ced7751e3ce52757ef212d96f87798bc1de5e987",
|
||||||
|
"blk.5.ffn_up.weight": "df5d23e7e035fb1b9d163da7ddfdfe38da6a37e86e96534dc02ad20f011b55b3",
|
||||||
|
"blk.6.attn_k.weight": "c0dae2d272a7c5a2fa004bbb8475dbab362fc1f6d008e73d5a4434a9382ac6ba",
|
||||||
|
"blk.6.attn_norm.weight": "51c57ac8b55e04354d5dca6bb9c0cf4177639d3b038e80209e33036209688f64",
|
||||||
|
"blk.6.attn_output.weight": "229d97892c62f85bcdf431675250e01c976ad69ffa450b01fb543bf88f14a2fb",
|
||||||
|
"blk.6.attn_q.weight": "c20e49621821bd46ed156e6823864a5bda4f317750e71ab8dc54e44eb48cf7c2",
|
||||||
|
"blk.6.attn_v.weight": "53ceb1a2ee43fce3c7b5b33c58a9fc5ee7f44dc1c6f29bc9dbefc37582102dc9",
|
||||||
|
"blk.6.ffn_down.weight": "7923c943b7629d560a032d1efa210d1d75c6692140f1be94464ee7ed24f44ed0",
|
||||||
|
"blk.6.ffn_gate.weight": "57593d350361af753a6a39f53b066282634c0fb44f396f6f2966a574b01d8f8c",
|
||||||
|
"blk.6.ffn_up.weight": "327b6a7a387098b8899d3ded04a4d4e7c658ca61b80d4e7b17594be232721602",
|
||||||
|
"blk.7.attn_k.weight": "9ca48b87a10116fd8868e62b76f211d4bb91f166096be9061439ee2e1c3a5c20",
|
||||||
|
"blk.7.attn_norm.weight": "cd56cfcc4e2ad6b96e23ea7b0d32b4caf236107d99a0b22c56760b62e63c8cfd",
|
||||||
|
"blk.7.attn_output.weight": "7352b509a03cae2491ffc060e577d189341a0f861233f18c96f9d275dc4234bf",
|
||||||
|
"blk.7.attn_q.weight": "2b3791c8c008c33ddbe12bedba8191322ceea2dcce5cf0eb7a93d40ad254e672",
|
||||||
|
"blk.7.attn_v.weight": "3ae721d52466487a3d48150581e57f6d64ea1e83ab929f23b28c3d777422eeb6",
|
||||||
|
"blk.7.ffn_down.weight": "3b6fa8ececdb3c34af3a5363863d6f94289c1c95bf47fce3a3ddcf184c5f0848",
|
||||||
|
"blk.7.ffn_gate.weight": "dbd7df6c5ae5eb4adb859f0d36453813a4e289a359a1ba8f72d67fcbf21c3e22",
|
||||||
|
"blk.7.ffn_up.weight": "de68380a334b4c5cfd4c318b0e9854aec59bd79aa0f0c30af3f56414f83482b0",
|
||||||
|
"blk.8.attn_k.weight": "7303c4e4480abc72a7ee271811311199245fb5c2ea27a2bd3b8cad3a53a03c27",
|
||||||
|
"blk.8.attn_norm.weight": "2e3d1921898d1b943ce1a1b6818546c8b471d6d542da24f51a8b514b8c3dd4ef",
|
||||||
|
"blk.8.attn_output.weight": "30421520887b66bf97a18dbcdc283bc8d0b60590b612fd638a319a6eae923227",
|
||||||
|
"blk.8.attn_q.weight": "73e064d5433c9b500068a1c31744dbd53f4ade298fb450a0e8c97f62cf1f8a8d",
|
||||||
|
"blk.8.attn_v.weight": "27e21f8b9a9a8533e8178ca34a72aa1d786393d57302b7806dcdf3e51de511a8",
|
||||||
|
"blk.8.ffn_down.weight": "bf694bd8e00047982108000e7b3dee7b225db8b19abc595e5697b6bbefd92e7c",
|
||||||
|
"blk.8.ffn_gate.weight": "d55fdbf8606d9141b774b0500c58944fd1253b9e69d1f765eaa9a680b9f2ca40",
|
||||||
|
"blk.8.ffn_up.weight": "1ae3f580655e7c8e8dd6c34fa4ac574fdfc5e3f1a8536da0c5442d3a2976f0e7",
|
||||||
|
"blk.9.attn_k.weight": "b18080626012d8aabcf78542d6c7bf31c712bf55a70172fbfe173fcf34481036",
|
||||||
|
"blk.9.attn_norm.weight": "2e3620620dc09998c6d3063a7d5de5433fbbae8c11e5b00d13f145d39140e162",
|
||||||
|
"blk.9.attn_output.weight": "69c3c0e27ef1c0fc933eeb7b612b70909f18cde238873c0d576a2ba9714ef174",
|
||||||
|
"blk.9.attn_q.weight": "68330e5aa28a28873c9a6e67f032186ef651df2df5844e0f27094ba349fbe4ab",
|
||||||
|
"blk.9.attn_v.weight": "3df8d45a102be082d0793a51cb82aa62a43cd0e9d047ba4115ca0f2414b39325",
|
||||||
|
"blk.9.ffn_down.weight": "1d6cc162b73745b135b4f040a0aac3c06d5135a3dc5b2421e7ee2af48662fd7f",
|
||||||
|
"blk.9.ffn_gate.weight": "034a9d40fb1e32b534b45f4bccd65cbe43c4a6a3f5d01132bd245ca0005de5fc",
|
||||||
|
"blk.9.ffn_up.weight": "c838c38d0e1a0ac0da17eb2a66023ed31929f07d8fcfe1cc546df26096c91f0c",
|
||||||
|
"blk.10.attn_k.weight": "a78507cb72f744b86ceaa032596e74e5571c822d0226d334881169addb32cbd5",
|
||||||
|
"blk.10.attn_norm.weight": "35f48d0b28ee0e6b4cad4e983925737562d64824be5b168b3e26df3d6b260cf1",
|
||||||
|
"blk.10.attn_output.weight": "53712db06796de39b131323e7abf9a58551b6d52da6db66a471580386d396252",
|
||||||
|
"blk.10.attn_q.weight": "efe08429ba196026b81cd1c471e1c7418afd9e966659feb3936b674aa0803b58",
|
||||||
|
"blk.10.attn_v.weight": "7ec6055e134f89da0cbe79ec9f13ef2e442ac584b1f03c3e13e7d0cdad0078bd",
|
||||||
|
"blk.10.ffn_down.weight": "37e66af4bcd1f3079e841e892255b8255070655901864ea3a8c602a7f681a640",
|
||||||
|
"blk.10.ffn_gate.weight": "1825282bc34830d371c6edcc3c1e73e6ecc1e10f4aea0122dbb7acc1d6f7b1bc",
|
||||||
|
"blk.10.ffn_up.weight": "819b3b276a4d4c14a35ed6682d5ef18a5e8ed468e5ce3f12e8c75ec18ac20ec4",
|
||||||
|
"blk.11.attn_k.weight": "5327e6a2af82dfff0619a14971f5864a15553c36fead84e1af42c7630f2729c6",
|
||||||
|
"blk.11.attn_norm.weight": "fec363b3c4a43036d2c635fb8aa9e122dd87ee79811839f2f6cd955be3373e7b",
|
||||||
|
"blk.11.attn_output.weight": "ccf7b38f18ee8798b8a6a35018e2df3eb3e007de62876befb68025dd66c79763",
|
||||||
|
"blk.11.attn_q.weight": "da8c4a1c824ffe174e39f126cd72f7ef83c56aff1259d452a1212de80f98f5e9",
|
||||||
|
"blk.11.attn_v.weight": "d17ae6bb77f03982b55d341eb67acb5969e9ad3da5994b96eafc09793dcfe3a0",
|
||||||
|
"blk.11.ffn_down.weight": "a6bac521e2791345f22c57205fa1c2f2f687794dfd24d0e98d50ae0d0eb6088a",
|
||||||
|
"blk.11.ffn_gate.weight": "5ed902c488cb51ba5635f3df08258c5f84f31a679a00211ea5f9d8b824ef6d9d",
|
||||||
|
"blk.11.ffn_up.weight": "ee9f1437eb890d2cf9df2574afa1cecf20aafdd847cd75b152d7eb74419afd34",
|
||||||
|
"blk.12.attn_k.weight": "5a069c06e1019b0f889088e67458f7a11ec77fa190ada6069e46211f62219947",
|
||||||
|
"blk.12.attn_norm.weight": "194d7e5fcc8c49aea62daf1940532419cf3c505afdce6be377286b677db5db8f",
|
||||||
|
"blk.12.attn_output.weight": "6534995fd4d6fecb55e317add4b1723aba4d825e1e9471d0b08813dfdc247176",
|
||||||
|
"blk.12.attn_q.weight": "4ab51ca519b5995581fa34f846276feca3b907ef2b51f192f6cc0b3263c3f5a2",
|
||||||
|
"blk.12.attn_v.weight": "5652ca3fa81ef9a1ac1543d71fc6813f8517f8ec54b25c701f6f98061614830f",
|
||||||
|
"blk.12.ffn_down.weight": "4b2c263f54c88516b8eb273bb8d9615b01c5c8b484dc70358adb91b50b300edd",
|
||||||
|
"blk.12.ffn_gate.weight": "8f50c3c3e3e8568991d6c1b0e74b500cf4f208e7700bbb8e87c3f6a6d359b6b5",
|
||||||
|
"blk.12.ffn_up.weight": "1c1a581fec1fbe959e1427fa513f400100b5e1ee9d83932630be9905fb49c231",
|
||||||
|
"blk.13.attn_k.weight": "efd7a38c46f08d8376d82974f33c644e3a02220e142d63b1704718699a8a884c",
|
||||||
|
"blk.13.attn_norm.weight": "d28fa4f1bd75abbd063b0e622e08f579c89cd0c0c5ce63c1952ec9f944f8ee13",
|
||||||
|
"blk.13.attn_output.weight": "71e0068a639288718bdb70a6cfdefd50bc8b3ec3993347a65129e70001ca5827",
|
||||||
|
"blk.13.attn_q.weight": "b97077adc92cff07a2e07d80ee38f214ad8713571c69cd5c70ebd43dc501ac87",
|
||||||
|
"blk.13.attn_v.weight": "79b3e2749ab4b459c81e96e322b215f1e8af645eb346e176c326bd00cf6ed2fd",
|
||||||
|
"blk.13.ffn_down.weight": "9f8687d11effa1db7cfecf7bec5631734bcf2962aad74a9f519144491e08ec85",
|
||||||
|
"blk.13.ffn_gate.weight": "7d14dfa0543852e7777fe8fff29ca533744cbcf1ebcf10067e5adfc4eb345e65",
|
||||||
|
"blk.13.ffn_up.weight": "852b9527b97fdab211ff3f832a660ee1d93ccb56906144c50f01319a6e8ee615",
|
||||||
|
"blk.14.attn_k.weight": "79e926b20f36f66d58226cb358881f2f68ae7b468787d33cafae5110287a14a0",
|
||||||
|
"blk.14.attn_norm.weight": "97d481b63deb0df6142c2c6cd23043720c62eb609e390f47a7113751c79974ec",
|
||||||
|
"blk.14.attn_output.weight": "aa6e94d7176d5c79fbb89b96e5f13ce75702ce3dd23ee52986446da436a6c3d6",
|
||||||
|
"blk.14.attn_q.weight": "214becb6d1bb460da9fb8ace0f99b9a5afa9edf7aa7acc19606c7401b11d6305",
|
||||||
|
"blk.14.attn_v.weight": "488b0e6d7f1a7a2ed0972aaa6d10ef9c775ee5373460324efcf5b3e3da9311df",
|
||||||
|
"blk.14.ffn_down.weight": "29c7ad16cf9542e30996a1a01ab95b844533b28051f04cc7949c371afb796471",
|
||||||
|
"blk.14.ffn_gate.weight": "b7ef208f2b054803665b377f5a5980c122c026841809cf855c6ba06d1c3a885a",
|
||||||
|
"blk.14.ffn_up.weight": "76a5cc28100748d79c4398ce7b9176aab4d661548b6293a82f99144812e5b70e",
|
||||||
|
"blk.15.attn_k.weight": "a6b8f9e98ab878fa7ebc5d080978ebf2d050acc2ab2fa8ea9188eb10e27702c8",
|
||||||
|
"blk.15.attn_norm.weight": "a26d07a9752d6dccb68e3a8a2a49fd0752cdd0a415e05547819bc37d9ba63d5e",
|
||||||
|
"blk.15.attn_output.weight": "c63616c69048ccbee801e05be4f56d21fda21aa0cc470f41d57c31b4d9283a4d",
|
||||||
|
"blk.15.attn_q.weight": "fd595a67bf96c6ba16eb148a9d02fa52fa3c1d33ed10be28a08f851409fd6e64",
|
||||||
|
"blk.15.attn_v.weight": "1c5c9d33fa07c05d5f4ed0032c6c4aa83d863f0d31c94a66109d239dcd03cea3",
|
||||||
|
"blk.15.ffn_down.weight": "585ea62ab8aff7d7d212ea5c1a03226fda6b68370c890b776834af70c948dcbc",
|
||||||
|
"blk.15.ffn_gate.weight": "a13c63f86f879b03a573d5dd2a25cfd1f4dc73e8132e6454ecc23e538b4cdf6f",
|
||||||
|
"blk.15.ffn_up.weight": "f7112450f57c12fcd511f049e0dc0b541625a107a7901c3261ed9e984299f65c",
|
||||||
|
"blk.16.attn_k.weight": "2d2c8b11dd71fba6d1c106aa1673c113a5448653cca7eab897c8739212ed5003",
|
||||||
|
"blk.16.attn_norm.weight": "95c2ec7be9469690e18a9a1779684acb3e9da44b13e263a0da840305646fbf8a",
|
||||||
|
"blk.16.attn_output.weight": "31a65046e677f54dae654ded4e733479fcc0f7283d83076b7dc7cbcae8528230",
|
||||||
|
"blk.16.attn_q.weight": "bfc6292b9c6d49b7118d08060242a138182eb182d136ba5dfaf469437c16081d",
|
||||||
|
"blk.16.attn_v.weight": "68f81d037340217d87c7853ff4d6edfbc46d9e827ee6d5bff7c3f6238e3a95ad",
|
||||||
|
"blk.16.ffn_down.weight": "bbd6629691950cef4d5113e1c6670e91b216a9b872cb92cee02dfda4d6c4f7b8",
|
||||||
|
"blk.16.ffn_gate.weight": "63cb56f282b7401ed6c76e5bb6fdf1bf68a64f9af0c82c014209b55bcb5191d0",
|
||||||
|
"blk.16.ffn_up.weight": "b54f39a2541063cbfb6f713aa81c3b69a04100e999aa2ebbeec195dc382eceec",
|
||||||
|
"blk.17.attn_k.weight": "3d9ba49799cc56664ec30a002bcad61eb651294212a68c3ddb573eb042aef5a4",
|
||||||
|
"blk.17.attn_norm.weight": "42ee0db4b9d63257bca0012a30b12737ead1caafeb5ed3d93c8f48ffec4b46de",
|
||||||
|
"blk.17.attn_output.weight": "a38fd100f05c9041c592bc739e287de0b10d08ef2bda41a879225bdca9002f71",
|
||||||
|
"blk.17.attn_q.weight": "8a3bee285b0180a9eb35662e449ee4cbe16d992bdd48fb3a94bc4a347728cfa2",
|
||||||
|
"blk.17.attn_v.weight": "d7f8f1b8b863494ed4392a1656775912e9b264ad36016547b12e832a1d6757d6",
|
||||||
|
"blk.17.ffn_down.weight": "bb7ee58f61da8630972e25b621996fbe8ec06f4dc9ab1e268ab5b120c526ca28",
|
||||||
|
"blk.17.ffn_gate.weight": "6b652dbf167fee09a45ebfd78d500ff6548fb2756dbe5343ffec3f7e6207179f",
|
||||||
|
"blk.17.ffn_up.weight": "3b67f727e55e742715de978fab80457781e7a3762bc48f79d13b45dcb8de664c",
|
||||||
|
"blk.18.attn_k.weight": "ff7fe57c57b90c6fcc0aefc39ec24593c3a7d1ea1c23770480075a015450e0f5",
|
||||||
|
"blk.18.attn_norm.weight": "1d40faca082d2633ef0ccf19e121870dd6c7c3e2154607c7f3543fa96e99cb2d",
|
||||||
|
"blk.18.attn_output.weight": "9adfecaaa397a92db4687efd5fcabfa0daef9e6b0493763b7ff5ebc185c43a6c",
|
||||||
|
"blk.18.attn_q.weight": "ad1803eb9b291948639277afe981e666b07167eb3fcae903ba5b73bf86d8f50b",
|
||||||
|
"blk.18.attn_v.weight": "308cf23399adccf27401a4ab60d74dac6fb9d4cd4b9c5940d9145118d1881b34",
|
||||||
|
"blk.18.ffn_down.weight": "7de4ac9a561fb580619b745687dfd7ca8a69ef70471dee978741b80e9ff7bead",
|
||||||
|
"blk.18.ffn_gate.weight": "0c66970f696b33bd5ee8f1f2fbcb41fd78fa5ccabdc927e11a4d5a4089f19c69",
|
||||||
|
"blk.18.ffn_up.weight": "66a42e988e8a1f468fabf976c48e9e4bb045eaac6916ef16555ac101cd674abc",
|
||||||
|
"blk.19.attn_k.weight": "a928ab50390bacbcebe2e4b66922498134ce22d7b93beaa87d6cf4ab52eb7174",
|
||||||
|
"blk.19.attn_norm.weight": "b4a02c55b46c2a96aec9c64a254087cf48e6c1d4b6f31782c77a46fc4daebad1",
|
||||||
|
"blk.19.attn_output.weight": "b768319c641dff1eac5d1f8ceb960c9899c795bf2b24c1d6bf70aa24fda45f77",
|
||||||
|
"blk.19.attn_q.weight": "79ef3f57d187d3954a26362096e1b6c222d76f537dff73e034d6e9999935b8bc",
|
||||||
|
"blk.19.attn_v.weight": "ce13d6b13e24fcb2d5bc6a2662e5bd295b31b12db10a6d0307f86cf29b8d5001",
|
||||||
|
"blk.19.ffn_down.weight": "cf90d7e2137482cfd50934a8223ad774621d08554969da80a9712df5e6227eb0",
|
||||||
|
"blk.19.ffn_gate.weight": "71ce30150f003b6eeb3bf7464e05b6ae615f135110d8e47f0a47fd973e537c0f",
|
||||||
|
"blk.19.ffn_up.weight": "7f92aca0cc29866633feec701ec01a85a8ee2fd4e2b9630173a6cffb1d9d50ee",
|
||||||
|
"blk.20.attn_k.weight": "a2df23159d6fb74ef28e14b61028fe8b00a693a2fc9234a980be74f20b958682",
|
||||||
|
"blk.20.attn_norm.weight": "c6cd5f1b096fc5efa4eb59ca1c8c4bd28730f3dcedd59a63601663eccc6724ed",
|
||||||
|
"blk.20.attn_output.weight": "896a8a166d0f006d4b09867ae4345426303cbc3fb13a18d3d4e1bde00f16dbdf",
|
||||||
|
"blk.20.attn_q.weight": "01eb79588fe61baea0da43e99f4dc5939590e1bafd01e12dadb8326f102bfea2",
|
||||||
|
"blk.20.attn_v.weight": "bd39630fdd5a7c859ac1addaf53e63faf524c3f32f5f4896d86b6e746b1d5c06",
|
||||||
|
"blk.20.ffn_down.weight": "0304a5d39957a0e3f031c4bcc4549a135d396c8d97c8d276fd1c823ce86560c2",
|
||||||
|
"blk.20.ffn_gate.weight": "117b79d595b1dca0c8b37586beaecc4d84411507276212dc286cde7fc36c9bef",
|
||||||
|
"blk.20.ffn_up.weight": "6e799346db145c125f01783539749d3828fcc451cd4f10c5352f047a47e28714",
|
||||||
|
"blk.21.attn_k.weight": "1c37e4c0664147e775bb006b226b9553e3421140cd96288ea755f81731ab80ba",
|
||||||
|
"blk.21.attn_norm.weight": "00ae783a29000ccda5e4bdbff03df0752fb82805dc3f9b987500ebd80714476e",
|
||||||
|
"blk.21.attn_output.weight": "7588b84f9fb19f15095b5265c60b4a4e7ae74bcc47d4607dfa5d0bfab6f136cb",
|
||||||
|
"blk.21.attn_q.weight": "a65f1c0dd06d45bb97532d3e932689c1eecfe7359089b39174a96a149335cbc1",
|
||||||
|
"blk.21.attn_v.weight": "4220b77e7d5e8709b4eef33a679b5dad11f297085ef44c9977f9e54ef08f7a2d",
|
||||||
|
"blk.21.ffn_down.weight": "b8c082a0530d4b5328e67db0df84c5498f2af956de23c639fa0198ffea853950",
|
||||||
|
"blk.21.ffn_gate.weight": "cd1b656ee72d00e9835ef667c19ef89a88de261eb8eb7c0e936e0f9ddf83ef9f",
|
||||||
|
"blk.21.ffn_up.weight": "dc445f73e36ec7a3bd86884186b728f8e0187f32848c3b8b69d4d41f8571bf31",
|
||||||
|
"blk.22.attn_k.weight": "e37cf0b893ec8b9ee8c78dd139b8d9c45cb997a3bc0c3d93a70ca1c3f6af8859",
|
||||||
|
"blk.22.attn_norm.weight": "248a27838d3c46cc03a5c312facc84e2e0e2c990ef8401e93da25918497f88d1",
|
||||||
|
"blk.22.attn_output.weight": "fc191a18f6d18332c66761f7ab28008bfe295dd1f5c8741a2488442f9e00d0f5",
|
||||||
|
"blk.22.attn_q.weight": "4b193a2ab8bc2b085db18f2bf3eeba26e02b537b2cdd738160c8f14b165d0f5a",
|
||||||
|
"blk.22.attn_v.weight": "7a60ce5ccac7e045e55ba1e1e85bd2a0f93f8c781daee96c5223665e22f0c666",
|
||||||
|
"blk.22.ffn_down.weight": "e0a34fb4244e2c7168f3dbaa1904c15d339ec39999cdf27128bbaf619ee0a237",
|
||||||
|
"blk.22.ffn_gate.weight": "8bac872d4b8549c8812f927efa309f1792b524f33601095fff61b826de5a5615",
|
||||||
|
"blk.22.ffn_up.weight": "b67fa2b94dd901b6ec64c0853ce8ca2d86fe9cb1cc6d2f15fbbbe0e691c0c648",
|
||||||
|
"blk.23.attn_k.weight": "2c32e66ad01942b819ac09a197c71579fe66f02226a264fdd72ad1e02c67a27e",
|
||||||
|
"blk.23.attn_norm.weight": "825fdc94deb439cb93c713eeb077c1052b90ed658d6d464fc4ad3d611e911d48",
|
||||||
|
"blk.23.attn_output.weight": "95ca6707a95b8750b0c7c5d379d368f0f2e7ebef631954e7d4d8ec0f41f13a3a",
|
||||||
|
"blk.23.attn_q.weight": "6eccc84faca5fac015d1b26e2854501edcfd292a302228fe14cf99f5eb59a34b",
|
||||||
|
"blk.23.attn_v.weight": "b343ac3d226040f1033ee049668aa1d89b1774bc18431965682e5dbdce78ccdc",
|
||||||
|
"blk.23.ffn_down.weight": "9fc599befea8d3b1e342d564a110074f66d2542df406c4b90b6bdc5828fbb2b2",
|
||||||
|
"blk.23.ffn_gate.weight": "488556c1b0c9f0b20b0c99b4bac2e0f4046b81edb601d7b91e7e5b3bab47d667",
|
||||||
|
"blk.23.ffn_up.weight": "1088e291d7008dd9c7c2dd6830af686a8a84b724d123a016209bd5156d6898f1",
|
||||||
|
"blk.24.attn_k.weight": "a923fbe35e61e009a53927d7828818e0592bb737d6a1106c4b0b5a1efc367e07",
|
||||||
|
"blk.24.attn_norm.weight": "9b51aaaa939cefafdd9b13a7e5b74ac7fa2d603427e55a16a909d6f3f353750a",
|
||||||
|
"blk.24.attn_output.weight": "1beb2baba56f8409466434b037771248c2f620ec5f53e15f44c271d5a2d9ecf4",
|
||||||
|
"blk.24.attn_q.weight": "4b0194fe5bfae0c6bf6131dcf8cb6e2b994f6ea10b27cb03574f0f4f8cc0c950",
|
||||||
|
"blk.24.attn_v.weight": "6ac34b1ab0f66226d85bca1194a7c212cd93d384ecbc8b8395de48aec0970a61",
|
||||||
|
"blk.24.ffn_down.weight": "5508f74cb732a662c2936b32ac5e90742d172b9f961a747b0e5cba0e5906a89d",
|
||||||
|
"blk.24.ffn_gate.weight": "095e39b8584403835f9bb1ac33e0e81f54175575e4800273d281b845bff381e7",
|
||||||
|
"blk.24.ffn_up.weight": "2d43ec21637dda12973de367b0113ee9840b0d815bf6fce042f7c3f270b0b530",
|
||||||
|
"blk.25.attn_k.weight": "9e2aee029f3d2c7f67dfc7926e72c8228fb978382c8e5a4701bbf82c93801419",
|
||||||
|
"blk.25.attn_norm.weight": "220cd7164fb4cdbe22d26058e4153b26c27c7b5ce2bec8e95bf2c0ea08d23103",
|
||||||
|
"blk.25.attn_output.weight": "a17f4a5dc6aa51f03dbd75602d98e9491767c205cdc2c3a5f8667fc54bbf7c64",
|
||||||
|
"blk.25.attn_q.weight": "f60827496835c440c794bf57ce9780704d10a59d8229886bf75ebb18900ba4ef",
|
||||||
|
"blk.25.attn_v.weight": "9cac217e9e9f4f4c85f14ee51165a77c580165bd4a34b202389169bbe61a1ced",
|
||||||
|
"blk.25.ffn_down.weight": "a0f36949b663e80849581dfb71e7babcc73580793bbcb0c80ab26d5a6e000359",
|
||||||
|
"blk.25.ffn_gate.weight": "df4d1be4d50d6afe5ad3ef0d0e0fac76a33e85c963dea769641d612dd53e7d13",
|
||||||
|
"blk.25.ffn_up.weight": "992da76be762632e25ebc5ef4d03728eece1b43f7c4e31827df19ca724aea694",
|
||||||
|
"blk.26.attn_k.weight": "34199ff856ac32a500c754539d070258574192a34ecba87a182897cb59fdff52",
|
||||||
|
"blk.26.attn_norm.weight": "a8e9dfb2dae5d22b5c0aec5f3675991c0e3c3e6a44153db2579136b73f456e00",
|
||||||
|
"blk.26.attn_output.weight": "1c4f257ffb0d7db0f11cfb275e38b4af736917b43ad82de1badce3f1d227da4d",
|
||||||
|
"blk.26.attn_q.weight": "33d55786274c2e718cf61e8fbecf3dfa5ee0c208f0b716d42b061f55459acb3c",
|
||||||
|
"blk.26.attn_v.weight": "684b636939cd4ffcfec5a6238a0790ffa43d853c95783af9b9e8275e74071a7a",
|
||||||
|
"blk.26.ffn_down.weight": "89d0bf066db154e6d312b5433aed1714f6a28b40f4c52e3e1530ee07703303c8",
|
||||||
|
"blk.26.ffn_gate.weight": "393d649bebe5e2940e1b043649f6c860b4b8b9f380f30e9da1744a830f358156",
|
||||||
|
"blk.26.ffn_up.weight": "179edc85ababd9d8440cc6093eecd1004290aa1cb96434b26ecf7585b6cca17b",
|
||||||
|
"blk.27.attn_k.weight": "334841445a7f1e14731b08f56eb0b1f0938c63823d28bc6d078c4c5f05b36f19",
|
||||||
|
"blk.27.attn_norm.weight": "57344471bbda2e9deffdfdb2dd05a07aa47f8761e24de53525588639145bf551",
|
||||||
|
"blk.27.attn_output.weight": "506126af9ee54b535d49f97e36f630e74834f480329f098d6d62e96246d8d65a",
|
||||||
|
"blk.27.attn_q.weight": "dd984df1acb4783849e25ba7ae378bfd385cd9efc540fb798cd5bdd873f0118f",
|
||||||
|
"blk.27.attn_v.weight": "b4b3fe9a4455d34c297ff20a2f537b647cef424741d840a747b265f23d320ac0",
|
||||||
|
"blk.27.ffn_down.weight": "621fdb185ba0d35ba5476dae73d2c81ec1482a0e878d5bfd5c3b29fe837af013",
|
||||||
|
"blk.27.ffn_gate.weight": "e4fbab45f2ec506fa374103251a0bdb7baa6f576080bdd796f3e9db92098e08f",
|
||||||
|
"blk.27.ffn_up.weight": "a0c57e463e988002bbd6a6c6792baa21a65e6f89ae303a2c301951b0ae6e4bbe",
|
||||||
|
"blk.28.attn_k.weight": "bac36cbd52ec5056841663865e1291ddab4b47ef9a2544dd285d4503bfb0e4a0",
|
||||||
|
"blk.28.attn_norm.weight": "5774a9df2bbb2e86d1f70179c7b92d81e1f401160148b3328fb64db6646a5425",
|
||||||
|
"blk.28.attn_output.weight": "e8712622d1569557000c75f26c3f55fad267fd300463c2c2cfe3afbfa1c8f908",
|
||||||
|
"blk.28.attn_q.weight": "11677751fddee52cc739699c02836f7be54d96038be4240be5d4f53d00161608",
|
||||||
|
"blk.28.attn_v.weight": "e5ee459b8958d65e1445997b9aa1e90e2f5d17761ebcf5357313119a45322507",
|
||||||
|
"blk.28.ffn_down.weight": "3934518f9f85292da8475fe38a8edcbfc4e24ac56c351b472d6351f98750871e",
|
||||||
|
"blk.28.ffn_gate.weight": "6ba735d57e98d0847e487f25ffaa25256deaa8abec76f428cb70bd9774279d83",
|
||||||
|
"blk.28.ffn_up.weight": "977fae6e1e5353114fc645dd98429464749758765cbc6e6457593d596e57850c",
|
||||||
|
"blk.29.attn_k.weight": "8122a457307d580ad6f1e0acea09a2f593d97f595ba0d6737f5fea16d2433642",
|
||||||
|
"blk.29.attn_norm.weight": "d626f721e05aa1202439b01027031d4caf1adace61ed37870a277cb6297c77cc",
|
||||||
|
"blk.29.attn_output.weight": "7fb7122ab1b6b1e6615ca746897da27bc52c92cb70d3147183cdde61795b72b3",
|
||||||
|
"blk.29.attn_q.weight": "be43e94ff6b6e391024dc824101efa0ddf4005d5b002ac26cb03765c0c73c2fa",
|
||||||
|
"blk.29.attn_v.weight": "af93c85ebff908f74f9935b81bde0516ca487c84139868a1ce079c3ae20036b1",
|
||||||
|
"blk.29.ffn_down.weight": "39dae12340ed3120bd19c495fe0872b559613641e41fde69d02d8631900b84c0",
|
||||||
|
"blk.29.ffn_gate.weight": "36fd482439840ef197c9f3b8905d86acfcea49bcf018544106ca465d4bf8d5c7",
|
||||||
|
"blk.29.ffn_up.weight": "5243fbdfdc1e2a1dd84b6210a9869d18a014db9088897e345240cdc99990bd5d",
|
||||||
|
"blk.30.attn_k.weight": "948f263616bd3788b2b968baafd69b9c5bd1b77578665f096c4b7e247b4cea42",
|
||||||
|
"blk.30.attn_norm.weight": "e168df981e744874ff303faf2eb470e5f6868c2040ba5f383f6c5148669975e7",
|
||||||
|
"blk.30.attn_output.weight": "4cf0ccca04b792573b756655a24fc89cfb1f272da8305633f0bc66ef14990b93",
|
||||||
|
"blk.30.attn_q.weight": "21e07d6cba6c50d65350289258209717174a13c42be57e8141d69712cbaf32c1",
|
||||||
|
"blk.30.attn_v.weight": "65a8ca29c7237b3182ccf03e2fc94e84f9a53d0e160fb679ab401c853170dd9c",
|
||||||
|
"blk.30.ffn_down.weight": "8b00500a6d00d84058f6658ee1d6f06fb4fcae2f90d4341792259362923b3c13",
|
||||||
|
"blk.30.ffn_gate.weight": "5bc0e19ab7a31b50ac2118ad1b36e31055271a322cd8ff661d47c3ac0210703c",
|
||||||
|
"blk.30.ffn_up.weight": "f37a0561955725bd59ee2d064fa9f4e00a12a1b620b624db3bc3add5330bc321",
|
||||||
|
"blk.31.attn_k.weight": "9a5663edda227f5d87533897146764f8e8a7481b9e71fae197c39204f8463221",
|
||||||
|
"blk.31.attn_norm.weight": "060a4f438a1ee5e220b5b5278ad2f5c085a428bf38c515766781815597c87529",
|
||||||
|
"blk.31.attn_output.weight": "6ada5d3cad9dea4780ffbb43302bb6ccc2f24eddd0fc4f5f84c9ce0fc0c6e5dd",
|
||||||
|
"blk.31.attn_q.weight": "bb5d08c08603907981ad388d5d8b70fcc9b98034ba264b8474c8890cc0297af0",
|
||||||
|
"blk.31.attn_v.weight": "e01b4252ea9c6a889c32b21144b441a347464d04536ef4f6572425be55759796",
|
||||||
|
"blk.31.ffn_down.weight": "8ba4d679c36e93ba65ba03180385ef35ea86b3b7cdf2fded9df59369f1c09630",
|
||||||
|
"blk.31.ffn_gate.weight": "e5b41dc93645f8b5e8eebae3ada3ea43a18f97ce2654228655170b07b463ccb0",
|
||||||
|
"blk.31.ffn_up.weight": "25b88cdddc8b547af294ed107d3d1312e90b983cae87936fa6062ecd8ea02539",
|
||||||
|
"blk.32.attn_k.weight": "4bcf86dc0858c8ca2fbdf6aa76674d43eb698f78979fdc1a38f556a7af1facc4",
|
||||||
|
"blk.32.attn_norm.weight": "cdcc12f3b8b9773c6722736bfb748a2729230b21478cbcc4104859d3148df815",
|
||||||
|
"blk.32.attn_output.weight": "d43f1196822995ed89a9365c97054753a8b30ce20b6e273c8edcc42673a1e141",
|
||||||
|
"blk.32.attn_q.weight": "ebf2972bb3865cbc5be4840113a322089752038344beab2a0122c7cb4fb399b6",
|
||||||
|
"blk.32.attn_v.weight": "714db81704ff34fa137512903c1013acee7877467473e46600728b9240582eb7",
|
||||||
|
"blk.32.ffn_down.weight": "2cde3da1258bb170a79d5d3cdfe10c86a71eb34b77da46b74c5ed71e7f4fe274",
|
||||||
|
"blk.32.ffn_gate.weight": "c7e1ed792532613ff9d4e5834b6536e2e0f47df2303bc0fdaa90aac0c1f4e8db",
|
||||||
|
"blk.32.ffn_up.weight": "d8d6f13fe66a716e28f79101a29817f0c0d6f99969a6f017d51bafd1a16c600c",
|
||||||
|
"blk.33.attn_k.weight": "a0a28f6cbca88da00cab2ca37094d9b0503bf9defdae77b91895b911c408cbb6",
|
||||||
|
"blk.33.attn_norm.weight": "0251200c24cc8445607ace6dc8c5aa0566567997262b7cca53a11ac23cc564b2",
|
||||||
|
"blk.33.attn_output.weight": "b2423205bdf6a1096d43c44d8d12f1a84fcd4e1bb70fcf6dc8542b8b8a71a13c",
|
||||||
|
"blk.33.attn_q.weight": "00b425c3ef71065ce5e0234e702bf38143b4952da78a85f52ab2c2e3073d97ab",
|
||||||
|
"blk.33.attn_v.weight": "035edd2335df816c42c765a5e66b9d9b9e15a822a8dc1863508145499c942c14",
|
||||||
|
"blk.33.ffn_down.weight": "4894a923a3db75bae4496ba3ce5f28796ad31fe33996a066271fb8654964310e",
|
||||||
|
"blk.33.ffn_gate.weight": "8f6c819b8bbfbe3357fae89e1ac5a3d58be85b3b04be3bacf7b62775869046ff",
|
||||||
|
"blk.33.ffn_up.weight": "257c3544b5b544fd5d839665bf5caf107a329b59dbc3751efcaa24ae63c56179",
|
||||||
|
"blk.34.attn_k.weight": "b6cd8bba892e38dac4a2ebc3ba1bce49e71b967fc436fde30c6d76f54a18935f",
|
||||||
|
"blk.34.attn_norm.weight": "2b3c8e60a064cba9955752bbbbdd92c71ba5c2f1bd721097bdbe88b5abc68787",
|
||||||
|
"blk.34.attn_output.weight": "8cc272551c9aaca9db5a660c6927bab94a0243d74a30b2bc165f06bd577714ea",
|
||||||
|
"blk.34.attn_q.weight": "74b561eb4792484e6a94b58fe2583848c3ae28ff2f1bf3d02939a0cfdfa49990",
|
||||||
|
"blk.34.attn_v.weight": "dba19e24ff05154dc5a1f55c023729303a583d13d68732ce22ea74d4410dc8f0",
|
||||||
|
"blk.34.ffn_down.weight": "76eca5dfeb274c35774e0bf9f22ee420ed9085c8e99aa2cd5a236e4918b44c61",
|
||||||
|
"blk.34.ffn_gate.weight": "9af0862d5fcbc24732846488e653db8242a467765c0cdbc00332b3a40256b4a6",
|
||||||
|
"blk.34.ffn_up.weight": "2a03126bf73587eaba99ece2066103d12e47bcd4ce30ff6c17b2f383b81d40df",
|
||||||
|
"blk.35.attn_k.weight": "52513fc0cd4e997a842729af7d21dd09399bce0a339558374738be266d0fa2f0",
|
||||||
|
"blk.35.attn_norm.weight": "e5281fa911964263ccf1630b14762edbd41d0b9472d6ec695fc600fed4892c35",
|
||||||
|
"blk.35.attn_output.weight": "b391d6705d5dc6f48326b5fd16573f679edf64109d86fb729a498819676590ca",
|
||||||
|
"blk.35.attn_q.weight": "d16446921966db9b0e0539626ad22a2511ace780e59379d6a4162d8c5441440b",
|
||||||
|
"blk.35.attn_v.weight": "9d8cdf23ffdb0c5c74106843390b94b24c9f33ef0eb9998d39f78c73390101ea",
|
||||||
|
"blk.35.ffn_down.weight": "938eb6301f7bbf162d7dd965682a5ed11d0a4a530c6fedd7e5469ce80012fc17",
|
||||||
|
"blk.35.ffn_gate.weight": "5ad84f5a0c8edcfea1ecf1a3e3d21d85ceda0c4ad9e3c6ca68885eeff8ed3c2f",
|
||||||
|
"blk.35.ffn_up.weight": "1c4330d9dc71bf4c98812c34356c51f520f47610a534152aa6d29284b758090d",
|
||||||
|
"blk.36.attn_k.weight": "ef720655e5ca2465f13db2dfc4732fb4ef2c9d53acde52f514fd4f301e974081",
|
||||||
|
"blk.36.attn_norm.weight": "88f4b9310b3c8c2644e3029160cd35678c79dfa59280430e03f5c29a6fe84a58",
|
||||||
|
"blk.36.attn_output.weight": "aec6f915fffd7bb72cd783273e871b4f09605950089d45e72059d1316b6c4b01",
|
||||||
|
"blk.36.attn_q.weight": "72f9408a2405d42f8db6ce5fcf1d26a3660b6f225fc60e77d0277109cfcb82ed",
|
||||||
|
"blk.36.attn_v.weight": "0f3b3d851dc44b3893ef53f6cca5b4acc9658bacfe1cc2d13c3d704ddd409b67",
|
||||||
|
"blk.36.ffn_down.weight": "470aec48ce8c5129a6654d9fd26fcae72776f9fc1429a8bb05818072a876475d",
|
||||||
|
"blk.36.ffn_gate.weight": "7f5f296d09cf55679767b5d15de3eff489c456782119f25204be4b1647f18dcf",
|
||||||
|
"blk.36.ffn_up.weight": "b7ef74a1f7ffb4982711d93f1787be3a70edc3d2358d5203c41d8900508037d4",
|
||||||
|
"blk.37.attn_k.weight": "c4ffa5412e4ff2dcfe1aed991c1f54169fd171a4c7638e4b9f21a1ca64c5e1d6",
|
||||||
|
"blk.37.attn_norm.weight": "4eb6c888d841cccfacf5b963f8611120f6ff24b84af0b5714fd9ab36dcda422f",
|
||||||
|
"blk.37.attn_output.weight": "db2a7bbf9682f9f6eea672dae8e150738f1bf74dbc80edc7022017a3f040c8ac",
|
||||||
|
"blk.37.attn_q.weight": "e38c0462aff139afcbab289189823527e453abc9e541154adde5e7af88cacf0b",
|
||||||
|
"blk.37.attn_v.weight": "952eb2492ed452a72f96bcc12d4b2affad9dfdf46ee39ce4a5d7b57a5dc301e5",
|
||||||
|
"blk.37.ffn_down.weight": "25f23a8fbc44febf6dc4848fd7fe03a580e2822bd3b3b5a51f4990826bfe3e4e",
|
||||||
|
"blk.37.ffn_gate.weight": "707da5eb40118b035305d3262444382351f170a20a537386a70e90c5a83a7817",
|
||||||
|
"blk.37.ffn_up.weight": "d2d2ba5cfc4ef47338dd7384219e22bf030a5a2209e0354d88f5bbaaafd20e87",
|
||||||
|
"blk.38.attn_k.weight": "abc4bb189dedf7ce661e79028427623a4f91ac091c2cd60e31b58bc62b1cda71",
|
||||||
|
"blk.38.attn_norm.weight": "9f4803a7d03fd40fcb83d85f84eb1d5682ea4e5bb084f210c02850675d804c3d",
|
||||||
|
"blk.38.attn_output.weight": "77cb66007f1a41df7135d0e7f900ceb499c2f667dfc3f1a6ac01a3203bbd3ccf",
|
||||||
|
"blk.38.attn_q.weight": "d94a8b26cd375bf2bcaa76597e314aa8268ee50a479d00931e5e0e021feadb5d",
|
||||||
|
"blk.38.attn_v.weight": "660c907888bc5016dc69b7d35fe6f55c7ded697c93be0e2d332a2f17aff88758",
|
||||||
|
"blk.38.ffn_down.weight": "6f06173bae5b00ffaf88ef383619a8b9c6a8d0d5c6494695d17f6c1de1a68a13",
|
||||||
|
"blk.38.ffn_gate.weight": "89f99be149d03f116527bfcabe073c50001c874de40fb6e817f6619027f3cd05",
|
||||||
|
"blk.38.ffn_up.weight": "8d57557c8d5e2d2688b73f01dddf1ce8d5194990cda6358153320aea88aac7f8",
|
||||||
|
"blk.39.attn_k.weight": "21be09c988b46c8393e6c2ec9230f3b5136eb7607dd1953ba92d0811c2f0dd75",
|
||||||
|
"blk.39.attn_norm.weight": "ba7c1912dd1c4e2d16917201f62396fd0600e4a451137eaddff255548c209abd",
|
||||||
|
"blk.39.attn_output.weight": "acfaf4abb3fd27fd899b5563c3877f176b597d8f6cdb2f2fd3f3a0bd4da15ed6",
|
||||||
|
"blk.39.attn_q.weight": "e8adbc140d4c8f0db2a27ca584c5531d5b1e080555fe627e34d80d0814a92bed",
|
||||||
|
"blk.39.attn_v.weight": "92f96b0e1f724e73a0f90a76c145654418844c04a6d4b14c05eb5af8a62bf8dc",
|
||||||
|
"blk.39.ffn_down.weight": "4d9ee7c65fc16fe95d10c47b79ac6a525741947600a64b5fcea5d300a82c50de",
|
||||||
|
"blk.39.ffn_gate.weight": "7e18507989f39b32191133d2657c2ee3b74f42f070579204d727eb72215793d1",
|
||||||
|
"blk.39.ffn_up.weight": "22cda752269c9757ba918abede1df95bb0f83a5c772dea13c8deea3d5f2723d9",
|
||||||
|
"output_norm.weight": "2858cf0e39d32caf52b7861378ace076000241e147f10b9eb21d8a5cd149e3cb"
|
||||||
|
}
|
||||||
@@ -9,8 +9,6 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/envconfig"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Determine if the given ROCm lib directory is usable by checking for existence of some glob patterns
|
// Determine if the given ROCm lib directory is usable by checking for existence of some glob patterns
|
||||||
@@ -41,13 +39,10 @@ func commonAMDValidateLibDir() (string, error) {
|
|||||||
// Favor our bundled version
|
// Favor our bundled version
|
||||||
|
|
||||||
// Installer payload location if we're running the installed binary
|
// Installer payload location if we're running the installed binary
|
||||||
exe, err := os.Executable()
|
rocmTargetDir := filepath.Join(LibOllamaPath, "rocm")
|
||||||
if err == nil {
|
if rocmLibUsable(rocmTargetDir) {
|
||||||
rocmTargetDir := filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama")
|
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
|
||||||
if rocmLibUsable(rocmTargetDir) {
|
return rocmTargetDir, nil
|
||||||
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
|
|
||||||
return rocmTargetDir, nil
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prefer explicit HIP env var
|
// Prefer explicit HIP env var
|
||||||
|
|||||||
@@ -77,8 +77,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
|
|
||||||
gfxOverride := envconfig.HsaOverrideGfxVersion()
|
gfxOverride := envconfig.HsaOverrideGfxVersion()
|
||||||
var supported []string
|
var supported []string
|
||||||
depPaths := LibraryDirs()
|
var libDir string
|
||||||
libDir := ""
|
|
||||||
|
|
||||||
// The amdgpu driver always exposes the host CPU(s) first, but we have to skip them and subtract
|
// The amdgpu driver always exposes the host CPU(s) first, but we have to skip them and subtract
|
||||||
// from the other IDs to get alignment with the HIP libraries expectations (zero is the first GPU, not the CPU)
|
// from the other IDs to get alignment with the HIP libraries expectations (zero is the first GPU, not the CPU)
|
||||||
@@ -353,9 +352,8 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
})
|
})
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
depPaths = append(depPaths, libDir)
|
|
||||||
}
|
}
|
||||||
gpuInfo.DependencyPath = depPaths
|
gpuInfo.DependencyPath = []string{libDir}
|
||||||
|
|
||||||
if gfxOverride == "" {
|
if gfxOverride == "" {
|
||||||
// Only load supported list once
|
// Only load supported list once
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"slices"
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
@@ -50,14 +49,13 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
slog.Info(err.Error())
|
slog.Info(err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
depPaths := LibraryDirs()
|
|
||||||
libDir, err := AMDValidateLibDir()
|
libDir, err := AMDValidateLibDir()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err = fmt.Errorf("unable to verify rocm library: %w", err)
|
err = fmt.Errorf("unable to verify rocm library: %w", err)
|
||||||
slog.Warn(err.Error())
|
slog.Warn(err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
depPaths = append(depPaths, libDir)
|
|
||||||
|
|
||||||
var supported []string
|
var supported []string
|
||||||
gfxOverride := envconfig.HsaOverrideGfxVersion()
|
gfxOverride := envconfig.HsaOverrideGfxVersion()
|
||||||
@@ -113,7 +111,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
UnreliableFreeMemory: true,
|
UnreliableFreeMemory: true,
|
||||||
|
|
||||||
ID: strconv.Itoa(i), // TODO this is probably wrong if we specify visible devices
|
ID: strconv.Itoa(i), // TODO this is probably wrong if we specify visible devices
|
||||||
DependencyPath: depPaths,
|
DependencyPath: []string{libDir},
|
||||||
MinimumMemory: rocmMinimumMemory,
|
MinimumMemory: rocmMinimumMemory,
|
||||||
Name: name,
|
Name: name,
|
||||||
Compute: gfx,
|
Compute: gfx,
|
||||||
@@ -164,9 +162,7 @@ func AMDValidateLibDir() (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Installer payload (if we're running from some other location)
|
// Installer payload (if we're running from some other location)
|
||||||
localAppData := os.Getenv("LOCALAPPDATA")
|
rocmTargetDir := filepath.Join(LibOllamaPath, "rocm")
|
||||||
appDir := filepath.Join(localAppData, "Programs", "Ollama")
|
|
||||||
rocmTargetDir := filepath.Join(appDir, envconfig.LibRelativeToExe(), "lib", "ollama")
|
|
||||||
if rocmLibUsable(rocmTargetDir) {
|
if rocmLibUsable(rocmTargetDir) {
|
||||||
slog.Debug("detected ollama installed ROCm at " + rocmTargetDir)
|
slog.Debug("detected ollama installed ROCm at " + rocmTargetDir)
|
||||||
return rocmTargetDir, nil
|
return rocmTargetDir, nil
|
||||||
|
|||||||
@@ -23,7 +23,6 @@ import (
|
|||||||
|
|
||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/runners"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type cudaHandles struct {
|
type cudaHandles struct {
|
||||||
@@ -101,15 +100,7 @@ func initCudaHandles() *cudaHandles {
|
|||||||
|
|
||||||
// Aligned with driver, we can't carry as payloads
|
// Aligned with driver, we can't carry as payloads
|
||||||
nvcudaMgmtPatterns := NvcudaGlobs
|
nvcudaMgmtPatterns := NvcudaGlobs
|
||||||
|
cudartMgmtPatterns = append(cudartMgmtPatterns, filepath.Join(LibOllamaPath, "cuda_v*", CudartMgmtName))
|
||||||
if runtime.GOOS == "windows" {
|
|
||||||
localAppData := os.Getenv("LOCALAPPDATA")
|
|
||||||
cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
|
|
||||||
}
|
|
||||||
libDirs := LibraryDirs()
|
|
||||||
for _, d := range libDirs {
|
|
||||||
cudartMgmtPatterns = append(cudartMgmtPatterns, filepath.Join(d, CudartMgmtName))
|
|
||||||
}
|
|
||||||
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
|
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
|
||||||
|
|
||||||
if len(NvmlGlobs) > 0 {
|
if len(NvmlGlobs) > 0 {
|
||||||
@@ -240,7 +231,7 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("error looking up system memory", "error", err)
|
slog.Warn("error looking up system memory", "error", err)
|
||||||
}
|
}
|
||||||
depPaths := LibraryDirs()
|
|
||||||
details, err := GetCPUDetails()
|
details, err := GetCPUDetails()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("failed to lookup CPU details", "error", err)
|
slog.Warn("failed to lookup CPU details", "error", err)
|
||||||
@@ -248,11 +239,9 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
cpus = []CPUInfo{
|
cpus = []CPUInfo{
|
||||||
{
|
{
|
||||||
GpuInfo: GpuInfo{
|
GpuInfo: GpuInfo{
|
||||||
memInfo: mem,
|
memInfo: mem,
|
||||||
Library: "cpu",
|
Library: "cpu",
|
||||||
Variant: runners.GetCPUCapability().String(),
|
ID: "0",
|
||||||
ID: "0",
|
|
||||||
DependencyPath: depPaths,
|
|
||||||
},
|
},
|
||||||
CPUs: details,
|
CPUs: details,
|
||||||
},
|
},
|
||||||
@@ -294,17 +283,13 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
gpuInfo.DriverMajor = driverMajor
|
gpuInfo.DriverMajor = driverMajor
|
||||||
gpuInfo.DriverMinor = driverMinor
|
gpuInfo.DriverMinor = driverMinor
|
||||||
variant := cudaVariant(gpuInfo)
|
variant := cudaVariant(gpuInfo)
|
||||||
if depPaths != nil {
|
|
||||||
gpuInfo.DependencyPath = depPaths
|
// Start with our bundled libraries
|
||||||
// Check for variant specific directory
|
if variant != "" {
|
||||||
if variant != "" {
|
variantPath := filepath.Join(LibOllamaPath, "cuda_"+variant)
|
||||||
for _, d := range depPaths {
|
if _, err := os.Stat(variantPath); err == nil {
|
||||||
if _, err := os.Stat(filepath.Join(d, "cuda_"+variant)); err == nil {
|
// Put the variant directory first in the search path to avoid runtime linking to the wrong library
|
||||||
// Put the variant directory first in the search path to avoid runtime linking to the wrong library
|
gpuInfo.DependencyPath = append([]string{variantPath}, gpuInfo.DependencyPath...)
|
||||||
gpuInfo.DependencyPath = append([]string{filepath.Join(d, "cuda_"+variant)}, gpuInfo.DependencyPath...)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||||
@@ -376,7 +361,7 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
gpuInfo.FreeMemory = uint64(memInfo.free)
|
gpuInfo.FreeMemory = uint64(memInfo.free)
|
||||||
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
||||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||||
gpuInfo.DependencyPath = depPaths
|
gpuInfo.DependencyPath = []string{LibOllamaPath}
|
||||||
oneapiGPUs = append(oneapiGPUs, gpuInfo)
|
oneapiGPUs = append(oneapiGPUs, gpuInfo)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -512,33 +497,30 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
|
|
||||||
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
|
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
|
||||||
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
|
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
|
||||||
var ldPaths []string
|
|
||||||
gpuLibPaths := []string{}
|
gpuLibPaths := []string{}
|
||||||
slog.Debug("Searching for GPU library", "name", baseLibName)
|
slog.Debug("Searching for GPU library", "name", baseLibName)
|
||||||
|
|
||||||
// Start with our bundled libraries
|
// search our bundled libraries first
|
||||||
patterns := []string{}
|
patterns := []string{filepath.Join(LibOllamaPath, baseLibName)}
|
||||||
for _, d := range LibraryDirs() {
|
|
||||||
patterns = append(patterns, filepath.Join(d, baseLibName))
|
|
||||||
}
|
|
||||||
|
|
||||||
|
var ldPaths []string
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
case "windows":
|
case "windows":
|
||||||
ldPaths = strings.Split(os.Getenv("PATH"), ";")
|
ldPaths = strings.Split(os.Getenv("PATH"), string(os.PathListSeparator))
|
||||||
case "linux":
|
case "linux":
|
||||||
ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
|
ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), string(os.PathListSeparator))
|
||||||
default:
|
|
||||||
return gpuLibPaths
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then with whatever we find in the PATH/LD_LIBRARY_PATH
|
// then search the system's LD_LIBRARY_PATH
|
||||||
for _, ldPath := range ldPaths {
|
for _, p := range ldPaths {
|
||||||
d, err := filepath.Abs(ldPath)
|
p, err := filepath.Abs(p)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
patterns = append(patterns, filepath.Join(d, baseLibName))
|
patterns = append(patterns, filepath.Join(p, baseLibName))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// finally, search the default patterns provided by the caller
|
||||||
patterns = append(patterns, defaultPatterns...)
|
patterns = append(patterns, defaultPatterns...)
|
||||||
slog.Debug("gpu library search", "globs", patterns)
|
slog.Debug("gpu library search", "globs", patterns)
|
||||||
for _, pattern := range patterns {
|
for _, pattern := range patterns {
|
||||||
@@ -715,23 +697,6 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func LibraryDirs() []string {
|
|
||||||
// dependencies can exist wherever we found the runners (e.g. build tree for developers) and relative to the executable
|
|
||||||
// This can be simplified once we no longer carry runners as payloads
|
|
||||||
exe, err := os.Executable()
|
|
||||||
if err != nil {
|
|
||||||
slog.Warn("failed to lookup executable path", "error", err)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
lib := filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama")
|
|
||||||
if _, err := os.Stat(lib); err != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return []string{lib}
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetSystemInfo() SystemInfo {
|
func GetSystemInfo() SystemInfo {
|
||||||
gpus := GetGPUInfo()
|
gpus := GetGPUInfo()
|
||||||
gpuMutex.Lock()
|
gpuMutex.Lock()
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ import (
|
|||||||
"syscall"
|
"syscall"
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/runners"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -28,7 +27,6 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
return []GpuInfo{
|
return []GpuInfo{
|
||||||
{
|
{
|
||||||
Library: "cpu",
|
Library: "cpu",
|
||||||
Variant: runners.GetCPUCapability().String(),
|
|
||||||
memInfo: mem,
|
memInfo: mem,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -51,7 +49,6 @@ func GetCPUInfo() GpuInfoList {
|
|||||||
return []GpuInfo{
|
return []GpuInfo{
|
||||||
{
|
{
|
||||||
Library: "cpu",
|
Library: "cpu",
|
||||||
Variant: runners.GetCPUCapability().String(),
|
|
||||||
memInfo: mem,
|
memInfo: mem,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
57
discover/path.go
Normal file
57
discover/path.go
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
package discover
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
)
|
||||||
|
|
||||||
|
// LibPath is a path to lookup dynamic libraries
|
||||||
|
// in development it's usually 'build/lib/ollama'
|
||||||
|
// in distribution builds it's 'lib/ollama' on Windows
|
||||||
|
// '../lib/ollama' on Linux and the executable's directory on macOS
|
||||||
|
// note: distribution builds, additional GPU-specific libraries are
|
||||||
|
// found in subdirectories of the returned path, such as
|
||||||
|
// 'cuda_v11', 'cuda_v12', 'rocm', etc.
|
||||||
|
var LibOllamaPath string = func() string {
|
||||||
|
exe, err := os.Executable()
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
exe, err = filepath.EvalSymlinks(exe)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
var libPath string
|
||||||
|
switch runtime.GOOS {
|
||||||
|
case "windows":
|
||||||
|
libPath = filepath.Join(filepath.Dir(exe), "lib", "ollama")
|
||||||
|
case "linux":
|
||||||
|
libPath = filepath.Join(filepath.Dir(exe), "..", "lib", "ollama")
|
||||||
|
case "darwin":
|
||||||
|
libPath = filepath.Dir(exe)
|
||||||
|
}
|
||||||
|
|
||||||
|
cwd, err := os.Getwd()
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
paths := []string{
|
||||||
|
libPath,
|
||||||
|
|
||||||
|
// build paths for development
|
||||||
|
filepath.Join(filepath.Dir(exe), "build", "lib", "ollama"),
|
||||||
|
filepath.Join(cwd, "build", "lib", "ollama"),
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, p := range paths {
|
||||||
|
if _, err := os.Stat(p); err == nil {
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return filepath.Dir(exe)
|
||||||
|
}()
|
||||||
@@ -5,7 +5,6 @@ import (
|
|||||||
"log/slog"
|
"log/slog"
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/runners"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type memInfo struct {
|
type memInfo struct {
|
||||||
@@ -107,7 +106,7 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
|
|||||||
for _, info := range l {
|
for _, info := range l {
|
||||||
found := false
|
found := false
|
||||||
requested := info.Library
|
requested := info.Library
|
||||||
if info.Variant != runners.CPUCapabilityNone.String() {
|
if info.Variant != "" {
|
||||||
requested += "_" + info.Variant
|
requested += "_" + info.Variant
|
||||||
}
|
}
|
||||||
for i, lib := range libs {
|
for i, lib := range libs {
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
### Getting Started
|
### Getting Started
|
||||||
* [Quickstart](../README.md#quickstart)
|
* [Quickstart](../README.md#quickstart)
|
||||||
* [Examples](../examples)
|
* [Examples](./examples.md)
|
||||||
* [Importing models](./import.md)
|
* [Importing models](./import.md)
|
||||||
* [Linux Documentation](./linux.md)
|
* [Linux Documentation](./linux.md)
|
||||||
* [Windows Documentation](./windows.md)
|
* [Windows Documentation](./windows.md)
|
||||||
|
|||||||
12
docs/api.md
12
docs/api.md
@@ -306,7 +306,7 @@ curl http://localhost:11434/api/generate -d '{
|
|||||||
|
|
||||||
#### Response
|
#### Response
|
||||||
|
|
||||||
```
|
```json
|
||||||
{
|
{
|
||||||
"model": "llava",
|
"model": "llava",
|
||||||
"created_at": "2023-11-03T15:36:02.583064Z",
|
"created_at": "2023-11-03T15:36:02.583064Z",
|
||||||
@@ -495,14 +495,14 @@ Generate the next message in a chat with a provided model. This is a streaming e
|
|||||||
|
|
||||||
- `model`: (required) the [model name](#model-names)
|
- `model`: (required) the [model name](#model-names)
|
||||||
- `messages`: the messages of the chat, this can be used to keep a chat memory
|
- `messages`: the messages of the chat, this can be used to keep a chat memory
|
||||||
- `tools`: tools for the model to use if supported. Requires `stream` to be set to `false`
|
- `tools`: list of tools in JSON for the model to use if supported
|
||||||
|
|
||||||
The `message` object has the following fields:
|
The `message` object has the following fields:
|
||||||
|
|
||||||
- `role`: the role of the message, either `system`, `user`, `assistant`, or `tool`
|
- `role`: the role of the message, either `system`, `user`, `assistant`, or `tool`
|
||||||
- `content`: the content of the message
|
- `content`: the content of the message
|
||||||
- `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
|
- `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
|
||||||
- `tool_calls` (optional): a list of tools the model wants to use
|
- `tool_calls` (optional): a list of tools in JSON that the model wants to use
|
||||||
|
|
||||||
Advanced parameters (optional):
|
Advanced parameters (optional):
|
||||||
|
|
||||||
@@ -795,7 +795,7 @@ curl http://localhost:11434/api/chat -d '{
|
|||||||
|
|
||||||
##### Request
|
##### Request
|
||||||
|
|
||||||
```
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.2",
|
||||||
"messages": [
|
"messages": [
|
||||||
@@ -870,7 +870,7 @@ If the messages array is empty, the model will be loaded into memory.
|
|||||||
|
|
||||||
##### Request
|
##### Request
|
||||||
|
|
||||||
```
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.2",
|
||||||
"messages": []
|
"messages": []
|
||||||
@@ -897,7 +897,7 @@ If the messages array is empty and the `keep_alive` parameter is set to `0`, a m
|
|||||||
|
|
||||||
##### Request
|
##### Request
|
||||||
|
|
||||||
```
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.2",
|
||||||
"messages": [],
|
"messages": [],
|
||||||
|
|||||||
@@ -1,165 +1,131 @@
|
|||||||
# Development
|
# Development
|
||||||
|
|
||||||
Install required tools:
|
Install prerequisites:
|
||||||
|
|
||||||
- go version 1.22 or higher
|
- [Go](https://go.dev/doc/install)
|
||||||
- OS specific C/C++ compiler (see below)
|
- C/C++ Compiler e.g. Clang on macOS, [TDM-GCC](https://jmeubank.github.io/tdm-gcc/download/) (Windows amd64) or [llvm-mingw](https://github.com/mstorsjo/llvm-mingw) (Windows arm64), GCC/Clang on Linux.
|
||||||
- GNU Make
|
|
||||||
|
|
||||||
|
Then build and run Ollama from the root directory of the repository:
|
||||||
## Overview
|
|
||||||
|
|
||||||
Ollama uses a mix of Go and C/C++ code to interface with GPUs. The C/C++ code is compiled with both CGO and GPU library specific compilers. A set of GNU Makefiles are used to compile the project. GPU Libraries are auto-detected based on the typical environment variables used by the respective libraries, but can be overridden if necessary. The default make target will build the runners and primary Go Ollama application that will run within the repo directory. Throughout the examples below `-j 5` is suggested for 5 parallel jobs to speed up the build. You can adjust the job count based on your CPU Core count to reduce build times. If you want to relocate the built binaries, use the `dist` target and recursively copy the files in `./dist/$OS-$ARCH/` to your desired location. To learn more about the other make targets use `make help`
|
|
||||||
|
|
||||||
Once you have built the GPU/CPU runners, you can compile the main application with `go build .`
|
|
||||||
|
|
||||||
### MacOS
|
|
||||||
|
|
||||||
[Download Go](https://go.dev/dl/)
|
|
||||||
|
|
||||||
```bash
|
|
||||||
make -j 5
|
|
||||||
```
|
|
||||||
|
|
||||||
Now you can run `ollama`:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
./ollama
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Xcode 15 warnings
|
|
||||||
|
|
||||||
If you are using Xcode newer than version 14, you may see a warning during `go build` about `ld: warning: ignoring duplicate libraries: '-lobjc'` due to Golang issue https://github.com/golang/go/issues/67799 which can be safely ignored. You can suppress the warning with `export CGO_LDFLAGS="-Wl,-no_warn_duplicate_libraries"`
|
|
||||||
|
|
||||||
### Linux
|
|
||||||
|
|
||||||
#### Linux CUDA (NVIDIA)
|
|
||||||
|
|
||||||
_Your operating system distribution may already have packages for NVIDIA CUDA. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!_
|
|
||||||
|
|
||||||
Install `make`, `gcc` and `golang` as well as [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads)
|
|
||||||
development and runtime packages.
|
|
||||||
|
|
||||||
Typically the makefile will auto-detect CUDA, however, if your Linux distro
|
|
||||||
or installation approach uses alternative paths, you can specify the location by
|
|
||||||
overriding `CUDA_PATH` to the location of the CUDA toolkit. You can customize
|
|
||||||
a set of target CUDA architectures by setting `CUDA_ARCHITECTURES` (e.g. `CUDA_ARCHITECTURES=50;60;70`)
|
|
||||||
|
|
||||||
```
|
```
|
||||||
make -j 5
|
go run . serve
|
||||||
```
|
```
|
||||||
|
|
||||||
If both v11 and v12 tookkits are detected, runners for both major versions will be built by default. You can build just v12 with `make cuda_v12`
|
## macOS (Apple Silicon)
|
||||||
|
|
||||||
#### Older Linux CUDA (NVIDIA)
|
macOS Apple Silicon supports Metal which is built-in to the Ollama binary. No additional steps are required.
|
||||||
|
|
||||||
To support older GPUs with Compute Capability 3.5 or 3.7, you will need to use an older version of the Driver from [Unix Driver Archive](https://www.nvidia.com/en-us/drivers/unix/) (tested with 470) and [CUDA Toolkit Archive](https://developer.nvidia.com/cuda-toolkit-archive) (tested with cuda V11). When you build Ollama, you will need to set two make variable to adjust the minimum compute capability Ollama supports via `make -j 5 CUDA_ARCHITECTURES="35;37;50;52" EXTRA_GOLDFLAGS="\"-X=github.com/ollama/ollama/discover.CudaComputeMajorMin=3\" \"-X=github.com/ollama/ollama/discover.CudaComputeMinorMin=5\""`. To find the Compute Capability of your older GPU, refer to [GPU Compute Capability](https://developer.nvidia.com/cuda-gpus).
|
## macOS (Intel)
|
||||||
|
|
||||||
#### Linux ROCm (AMD)
|
Install prerequisites:
|
||||||
|
|
||||||
_Your operating system distribution may already have packages for AMD ROCm. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!_
|
- [CMake](https://cmake.org/download/) or `brew install cmake`
|
||||||
|
|
||||||
Install [ROCm](https://rocm.docs.amd.com/en/latest/) development packages first, as well as `make`, `gcc`, and `golang`.
|
Then, configure and build the project:
|
||||||
|
|
||||||
Typically the build scripts will auto-detect ROCm, however, if your Linux distro
|
|
||||||
or installation approach uses unusual paths, you can specify the location by
|
|
||||||
specifying an environment variable `HIP_PATH` to the location of the ROCm
|
|
||||||
install (typically `/opt/rocm`). You can also customize
|
|
||||||
the AMD GPU targets by setting HIP_ARCHS (e.g. `HIP_ARCHS=gfx1101;gfx1102`)
|
|
||||||
|
|
||||||
```
|
```
|
||||||
make -j 5
|
cmake -B build
|
||||||
|
cmake --build build
|
||||||
```
|
```
|
||||||
|
|
||||||
ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.
|
Lastly, run Ollama:
|
||||||
|
|
||||||
#### Containerized Linux Build
|
|
||||||
|
|
||||||
If you have Docker and buildx available, you can build linux binaries with `./scripts/build_linux.sh` which has the CUDA and ROCm dependencies included. The resulting artifacts are placed in `./dist` and by default the script builds both arm64 and amd64 binaries. If you want to build only amd64, you can build with `PLATFORM=linux/amd64 ./scripts/build_linux.sh`
|
|
||||||
|
|
||||||
### Windows
|
|
||||||
|
|
||||||
The following tools are required as a minimal development environment to build CPU inference support.
|
|
||||||
|
|
||||||
- Go version 1.22 or higher
|
|
||||||
- https://go.dev/dl/
|
|
||||||
- Git
|
|
||||||
- https://git-scm.com/download/win
|
|
||||||
- clang with gcc compat and Make. There are multiple options on how to go about installing these tools on Windows. We have verified the following, but others may work as well:
|
|
||||||
- [MSYS2](https://www.msys2.org/)
|
|
||||||
- After installing, from an MSYS2 terminal, run `pacman -S mingw-w64-clang-x86_64-gcc-compat mingw-w64-clang-x86_64-clang make` to install the required tools
|
|
||||||
- Assuming you used the default install prefix for msys2 above, add `C:\msys64\clang64\bin` and `c:\msys64\usr\bin` to your environment variable `PATH` where you will perform the build steps below (e.g. system-wide, account-level, powershell, cmd, etc.)
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> Due to bugs in the GCC C++ library for unicode support, Ollama should be built with clang on windows.
|
|
||||||
|
|
||||||
```
|
```
|
||||||
make -j 5
|
go run . serve
|
||||||
```
|
```
|
||||||
|
|
||||||
#### GPU Support
|
## Windows
|
||||||
|
|
||||||
The GPU tools require the Microsoft native build tools. To build either CUDA or ROCm, you must first install MSVC via Visual Studio:
|
Install prerequisites:
|
||||||
|
|
||||||
- Make sure to select `Desktop development with C++` as a Workload during the Visual Studio install
|
- [CMake](https://cmake.org/download/)
|
||||||
- You must complete the Visual Studio install and run it once **BEFORE** installing CUDA or ROCm for the tools to properly register
|
- [Visual Studio 2022](https://visualstudio.microsoft.com/downloads/) including the Native Desktop Workload
|
||||||
- Add the location of the **64 bit (x64)** compiler (`cl.exe`) to your `PATH`
|
- (Optional) AMD GPU support
|
||||||
- Note: the default Developer Shell may configure the 32 bit (x86) compiler which will lead to build failures. Ollama requires a 64 bit toolchain.
|
- [ROCm](https://rocm.github.io/install.html)
|
||||||
|
- [Ninja](https://github.com/ninja-build/ninja/releases)
|
||||||
|
- (Optional) NVIDIA GPU support
|
||||||
|
- [CUDA SDK](https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=11&target_type=exe_network)
|
||||||
|
|
||||||
#### Windows CUDA (NVIDIA)
|
> [!IMPORTANT]
|
||||||
|
> Ensure prerequisites are in `PATH` before running CMake.
|
||||||
|
|
||||||
In addition to the common Windows development tools and MSVC described above:
|
> [!IMPORTANT]
|
||||||
|
> ROCm is not compatible with Visual Studio CMake generators. Use `-GNinja` when configuring the project.
|
||||||
|
|
||||||
- [NVIDIA CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html)
|
> [!IMPORTANT]
|
||||||
|
> CUDA is only compatible with Visual Studio CMake generators.
|
||||||
|
|
||||||
#### Windows ROCm (AMD Radeon)
|
Then, configure and build the project:
|
||||||
|
|
||||||
In addition to the common Windows development tools and MSVC described above:
|
|
||||||
|
|
||||||
- [AMD HIP](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html)
|
|
||||||
|
|
||||||
#### Windows arm64
|
|
||||||
|
|
||||||
The default `Developer PowerShell for VS 2022` may default to x86 which is not what you want. To ensure you get an arm64 development environment, start a plain PowerShell terminal and run:
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
import-module 'C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\Common7\\Tools\\Microsoft.VisualStudio.DevShell.dll'
|
|
||||||
Enter-VsDevShell -Arch arm64 -vsinstallpath 'C:\\Program Files\\Microsoft Visual Studio\\2022\\Community' -skipautomaticlocation
|
|
||||||
```
|
|
||||||
|
|
||||||
You can confirm with `write-host $env:VSCMD_ARG_TGT_ARCH`
|
|
||||||
|
|
||||||
Follow the instructions at https://www.msys2.org/wiki/arm64/ to set up an arm64 msys2 environment. Ollama requires gcc and mingw32-make to compile, which is not currently available on Windows arm64, but a gcc compatibility adapter is available via `mingw-w64-clang-aarch64-gcc-compat`. At a minimum you will need to install the following:
|
|
||||||
|
|
||||||
```
|
```
|
||||||
pacman -S mingw-w64-clang-aarch64-clang mingw-w64-clang-aarch64-gcc-compat mingw-w64-clang-aarch64-make make
|
cmake -B build
|
||||||
|
cmake --build build --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
You will need to ensure your PATH includes go, cmake, gcc and clang mingw32-make to build ollama from source. (typically `C:\msys64\clangarm64\bin\`)
|
Lastly, run Ollama:
|
||||||
|
|
||||||
|
|
||||||
## Advanced CPU Vector Settings
|
|
||||||
|
|
||||||
On x86, running `make` will compile several CPU runners which can run on different CPU families. At runtime, Ollama will auto-detect the best variation to load. If GPU libraries are present at build time, Ollama also compiles GPU runners with the `AVX` CPU vector feature enabled. This provides a good performance balance when loading large models that split across GPU and CPU with broad compatibility. Some users may prefer no vector extensions (e.g. older Xeon/Celeron processors, or hypervisors that mask the vector features) while other users may prefer turning on many more vector extensions to further improve performance for split model loads.
|
|
||||||
|
|
||||||
To customize the set of CPU vector features enabled for a CPU runner and all GPU runners, use CUSTOM_CPU_FLAGS during the build.
|
|
||||||
|
|
||||||
To build without any vector flags:
|
|
||||||
|
|
||||||
```
|
```
|
||||||
make CUSTOM_CPU_FLAGS=""
|
go run . serve
|
||||||
```
|
```
|
||||||
|
|
||||||
To build with both AVX and AVX2:
|
## Windows (ARM)
|
||||||
```
|
|
||||||
make CUSTOM_CPU_FLAGS=avx,avx2
|
|
||||||
```
|
|
||||||
|
|
||||||
To build with AVX512 features turned on:
|
Windows ARM does not support additional acceleration libraries at this time.
|
||||||
|
|
||||||
|
## Linux
|
||||||
|
|
||||||
|
Install prerequisites:
|
||||||
|
|
||||||
|
- [CMake](https://cmake.org/download/) or `sudo apt install cmake` or `sudo dnf install cmake`
|
||||||
|
- (Optional) AMD GPU support
|
||||||
|
- [ROCm](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/quick-start.html)
|
||||||
|
- (Optional) NVIDIA GPU support
|
||||||
|
- [CUDA SDK](https://developer.nvidia.com/cuda-downloads)
|
||||||
|
|
||||||
|
> [!IMPORTANT]
|
||||||
|
> Ensure prerequisites are in `PATH` before running CMake.
|
||||||
|
|
||||||
|
|
||||||
|
Then, configure and build the project:
|
||||||
|
|
||||||
```
|
```
|
||||||
make CUSTOM_CPU_FLAGS=avx,avx2,avx512,avx512vbmi,avx512vnni,avx512bf16
|
cmake -B build
|
||||||
|
cmake --build build
|
||||||
```
|
```
|
||||||
|
|
||||||
> [!NOTE]
|
Lastly, run Ollama:
|
||||||
> If you are experimenting with different flags, make sure to do a `make clean` between each change to ensure everything is rebuilt with the new compiler flags
|
|
||||||
|
```
|
||||||
|
go run . serve
|
||||||
|
```
|
||||||
|
|
||||||
|
## Docker
|
||||||
|
|
||||||
|
```
|
||||||
|
docker build .
|
||||||
|
```
|
||||||
|
|
||||||
|
### ROCm
|
||||||
|
|
||||||
|
```
|
||||||
|
docker build --build-arg FLAVOR=rocm .
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running tests
|
||||||
|
|
||||||
|
To run tests, use `go test`:
|
||||||
|
|
||||||
|
```
|
||||||
|
go test ./...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Library detection
|
||||||
|
|
||||||
|
Ollama looks for acceleration libraries in the following paths relative to the `ollama` executable:
|
||||||
|
|
||||||
|
* `./lib/ollama` (Windows)
|
||||||
|
* `../lib/ollama` (Linux)
|
||||||
|
* `.` (macOS)
|
||||||
|
* `build/lib/ollama` (for development)
|
||||||
|
|
||||||
|
If the libraries are not found, Ollama will not run with any acceleration libraries.
|
||||||
@@ -38,7 +38,7 @@ Numeric IDs may be used, however ordering may vary, so UUIDs are more reliable.
|
|||||||
You can discover the UUID of your GPUs by running `nvidia-smi -L` If you want to
|
You can discover the UUID of your GPUs by running `nvidia-smi -L` If you want to
|
||||||
ignore the GPUs and force CPU usage, use an invalid GPU ID (e.g., "-1")
|
ignore the GPUs and force CPU usage, use an invalid GPU ID (e.g., "-1")
|
||||||
|
|
||||||
### Laptop Suspend Resume
|
### Linux Suspend Resume
|
||||||
|
|
||||||
On linux, after a suspend/resume cycle, sometimes Ollama will fail to discover
|
On linux, after a suspend/resume cycle, sometimes Ollama will fail to discover
|
||||||
your NVIDIA GPU, and fallback to running on the CPU. You can workaround this
|
your NVIDIA GPU, and fallback to running on the CPU. You can workaround this
|
||||||
|
|||||||
@@ -152,7 +152,7 @@ Use `OLLAMA_VERSION` environment variable with the install script to install a s
|
|||||||
For example:
|
For example:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.3.9 sh
|
curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.5.7 sh
|
||||||
```
|
```
|
||||||
|
|
||||||
## Viewing logs
|
## Viewing logs
|
||||||
@@ -186,3 +186,9 @@ sudo rm -r /usr/share/ollama
|
|||||||
sudo userdel ollama
|
sudo userdel ollama
|
||||||
sudo groupdel ollama
|
sudo groupdel ollama
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Remove installed libraries:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
sudo rm -rf /usr/local/lib/ollama
|
||||||
|
```
|
||||||
|
|||||||
@@ -67,8 +67,6 @@ To use this:
|
|||||||
3. `ollama run choose-a-model-name`
|
3. `ollama run choose-a-model-name`
|
||||||
4. Start using the model!
|
4. Start using the model!
|
||||||
|
|
||||||
More examples are available in the [examples directory](../examples).
|
|
||||||
|
|
||||||
To view the Modelfile of a given model, use the `ollama show --modelfile` command.
|
To view the Modelfile of a given model, use the `ollama show --modelfile` command.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -155,7 +153,6 @@ PARAMETER <parameter> <parametervalue>
|
|||||||
| temperature | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8) | float | temperature 0.7 |
|
| temperature | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8) | float | temperature 0.7 |
|
||||||
| seed | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0) | int | seed 42 |
|
| seed | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0) | int | seed 42 |
|
||||||
| stop | Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate `stop` parameters in a modelfile. | string | stop "AI assistant:" |
|
| stop | Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate `stop` parameters in a modelfile. | string | stop "AI assistant:" |
|
||||||
| tfs_z | Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1) | float | tfs_z 1 |
|
|
||||||
| num_predict | Maximum number of tokens to predict when generating text. (Default: -1, infinite generation) | int | num_predict 42 |
|
| num_predict | Maximum number of tokens to predict when generating text. (Default: -1, infinite generation) | int | num_predict 42 |
|
||||||
| top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 |
|
| top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 |
|
||||||
| top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 |
|
| top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 |
|
||||||
|
|||||||
@@ -288,12 +288,3 @@ func Values() map[string]string {
|
|||||||
func Var(key string) string {
|
func Var(key string) string {
|
||||||
return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
|
return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
|
||||||
}
|
}
|
||||||
|
|
||||||
// On windows, we keep the binary at the top directory, but
|
|
||||||
// other platforms use a "bin" directory, so this returns ".."
|
|
||||||
func LibRelativeToExe() string {
|
|
||||||
if runtime.GOOS == "windows" {
|
|
||||||
return "."
|
|
||||||
}
|
|
||||||
return ".."
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -40,8 +40,6 @@ func HumanBytes(b int64) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case value >= 100:
|
|
||||||
return fmt.Sprintf("%d %s", int(value), unit)
|
|
||||||
case value >= 10:
|
case value >= 10:
|
||||||
return fmt.Sprintf("%d %s", int(value), unit)
|
return fmt.Sprintf("%d %s", int(value), unit)
|
||||||
case value != math.Trunc(value):
|
case value != math.Trunc(value):
|
||||||
|
|||||||
91
format/bytes_test.go
Normal file
91
format/bytes_test.go
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
package format
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestHumanBytes(t *testing.T) {
|
||||||
|
type testCase struct {
|
||||||
|
input int64
|
||||||
|
expected string
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []testCase{
|
||||||
|
// Test bytes (B)
|
||||||
|
{0, "0 B"},
|
||||||
|
{1, "1 B"},
|
||||||
|
{999, "999 B"},
|
||||||
|
|
||||||
|
// Test kilobytes (KB)
|
||||||
|
{1000, "1 KB"},
|
||||||
|
{1500, "1.5 KB"},
|
||||||
|
{999999, "999 KB"},
|
||||||
|
|
||||||
|
// Test megabytes (MB)
|
||||||
|
{1000000, "1 MB"},
|
||||||
|
{1500000, "1.5 MB"},
|
||||||
|
{999999999, "999 MB"},
|
||||||
|
|
||||||
|
// Test gigabytes (GB)
|
||||||
|
{1000000000, "1 GB"},
|
||||||
|
{1500000000, "1.5 GB"},
|
||||||
|
{999999999999, "999 GB"},
|
||||||
|
|
||||||
|
// Test terabytes (TB)
|
||||||
|
{1000000000000, "1 TB"},
|
||||||
|
{1500000000000, "1.5 TB"},
|
||||||
|
{1999999999999, "2.0 TB"},
|
||||||
|
|
||||||
|
// Test fractional values
|
||||||
|
{1234, "1.2 KB"},
|
||||||
|
{1234567, "1.2 MB"},
|
||||||
|
{1234567890, "1.2 GB"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.expected, func(t *testing.T) {
|
||||||
|
result := HumanBytes(tc.input)
|
||||||
|
if result != tc.expected {
|
||||||
|
t.Errorf("Expected %s, got %s", tc.expected, result)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHumanBytes2(t *testing.T) {
|
||||||
|
type testCase struct {
|
||||||
|
input uint64
|
||||||
|
expected string
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []testCase{
|
||||||
|
// Test bytes (B)
|
||||||
|
{0, "0 B"},
|
||||||
|
{1, "1 B"},
|
||||||
|
{1023, "1023 B"},
|
||||||
|
|
||||||
|
// Test kibibytes (KiB)
|
||||||
|
{1024, "1.0 KiB"},
|
||||||
|
{1536, "1.5 KiB"},
|
||||||
|
{1048575, "1024.0 KiB"},
|
||||||
|
|
||||||
|
// Test mebibytes (MiB)
|
||||||
|
{1048576, "1.0 MiB"},
|
||||||
|
{1572864, "1.5 MiB"},
|
||||||
|
{1073741823, "1024.0 MiB"},
|
||||||
|
|
||||||
|
// Test gibibytes (GiB)
|
||||||
|
{1073741824, "1.0 GiB"},
|
||||||
|
{1610612736, "1.5 GiB"},
|
||||||
|
{2147483648, "2.0 GiB"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.expected, func(t *testing.T) {
|
||||||
|
result := HumanBytes2(tc.input)
|
||||||
|
if result != tc.expected {
|
||||||
|
t.Errorf("Expected %s, got %s", tc.expected, result)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
3
go.mod
3
go.mod
@@ -24,7 +24,6 @@ require (
|
|||||||
github.com/nlpodyssey/gopickle v0.3.0
|
github.com/nlpodyssey/gopickle v0.3.0
|
||||||
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
|
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
|
||||||
golang.org/x/image v0.22.0
|
golang.org/x/image v0.22.0
|
||||||
golang.org/x/tools v0.28.0
|
|
||||||
gonum.org/v1/gonum v0.15.0
|
gonum.org/v1/gonum v0.15.0
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -72,7 +71,7 @@ require (
|
|||||||
golang.org/x/arch v0.8.0 // indirect
|
golang.org/x/arch v0.8.0 // indirect
|
||||||
golang.org/x/crypto v0.31.0
|
golang.org/x/crypto v0.31.0
|
||||||
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa
|
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa
|
||||||
golang.org/x/net v0.32.0 // indirect
|
golang.org/x/net v0.25.0 // indirect
|
||||||
golang.org/x/sys v0.28.0
|
golang.org/x/sys v0.28.0
|
||||||
golang.org/x/term v0.27.0
|
golang.org/x/term v0.27.0
|
||||||
golang.org/x/text v0.21.0
|
golang.org/x/text v0.21.0
|
||||||
|
|||||||
6
go.sum
6
go.sum
@@ -257,8 +257,8 @@ golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81R
|
|||||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||||
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
|
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
|
||||||
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||||
golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI=
|
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
|
||||||
golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs=
|
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
|
||||||
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
||||||
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||||
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
@@ -309,8 +309,6 @@ golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapK
|
|||||||
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||||
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||||
golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
||||||
golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8=
|
|
||||||
golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw=
|
|
||||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
|
|||||||
@@ -1,22 +0,0 @@
|
|||||||
//go:build go1.24
|
|
||||||
|
|
||||||
package grammar
|
|
||||||
|
|
||||||
import "testing"
|
|
||||||
|
|
||||||
func BenchmarkFromSchema(b *testing.B) {
|
|
||||||
for tt := range testCases(b) {
|
|
||||||
b.Run("", func(b *testing.B) {
|
|
||||||
s := []byte(tt.schema)
|
|
||||||
|
|
||||||
b.ReportAllocs()
|
|
||||||
for b.Loop() {
|
|
||||||
_, err := FromSchema(nil, s)
|
|
||||||
if err != nil {
|
|
||||||
b.Fatalf("GrammarFromSchema: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,227 +0,0 @@
|
|||||||
package grammar
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"iter"
|
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/grammar/jsonschema"
|
|
||||||
)
|
|
||||||
|
|
||||||
const jsonTerms = `
|
|
||||||
# Unicode
|
|
||||||
#
|
|
||||||
# Unicode characters can be specified directly in the grammar, for example
|
|
||||||
# hiragana ::= [ぁ-ゟ], or with escapes: 8-bit (\xXX), 16-bit (\uXXXX) or 32-bit
|
|
||||||
# (\UXXXXXXXX).
|
|
||||||
unicode ::= \x{hex}{2} | \u{hex}{4} | \U{hex}{8}
|
|
||||||
|
|
||||||
# JSON grammar from RFC 7159
|
|
||||||
null ::= "null"
|
|
||||||
object ::= "{" (kv ("," kv)*)? "}"
|
|
||||||
array ::= "[" (value ("," value)*)? "]"
|
|
||||||
kv ::= string ":" value
|
|
||||||
integer ::= "0" | [1-9] [0-9]*
|
|
||||||
number ::= "-"? integer frac? exp?
|
|
||||||
frac ::= "." [0-9]+
|
|
||||||
exp ::= ("e" | "E") ("+" | "-") [0-9]+
|
|
||||||
string ::= "\"" char* "\""
|
|
||||||
escape ::= ["/" | "b" | "f" | "n" | "r" | "t" | unicode]
|
|
||||||
char ::= [^"\\] | escape
|
|
||||||
space ::= (" " | "\t" | "\n" | "\r")*
|
|
||||||
hex ::= [0-9] | [a-f] | [A-F]
|
|
||||||
boolean ::= "true" | "false"
|
|
||||||
value ::= object | array | string | number | boolean | "null"
|
|
||||||
|
|
||||||
# User-defined
|
|
||||||
`
|
|
||||||
|
|
||||||
// FromSchema generates a grammar from a JSON schema.
|
|
||||||
func FromSchema(buf []byte, jsonSchema []byte) ([]byte, error) {
|
|
||||||
var s *jsonschema.Schema
|
|
||||||
if err := json.Unmarshal(jsonSchema, &s); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var g builder
|
|
||||||
|
|
||||||
// "root" is the only rule that is guaranteed to exist, so we start
|
|
||||||
// with its length for padding, and then adjust it as we go.
|
|
||||||
g.pad = len("root")
|
|
||||||
for id := range dependencies("root", s) {
|
|
||||||
g.pad = max(g.pad, len(id))
|
|
||||||
}
|
|
||||||
|
|
||||||
g.b.WriteString(jsonTerms)
|
|
||||||
|
|
||||||
ids := make(map[*jsonschema.Schema]string)
|
|
||||||
for id, s := range dependencies("root", s) {
|
|
||||||
ids[s] = id
|
|
||||||
g.define(id)
|
|
||||||
if err := fromSchema(&g, ids, s); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
g.define("root")
|
|
||||||
if err := fromSchema(&g, ids, s); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
g.define("") // finalize the last rule
|
|
||||||
return g.b.Bytes(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func fromSchema(g *builder, ids map[*jsonschema.Schema]string, s *jsonschema.Schema) error {
|
|
||||||
switch typ := s.EffectiveType(); typ {
|
|
||||||
case "array":
|
|
||||||
if len(s.PrefixItems) == 0 && s.Items == nil {
|
|
||||||
g.u("array")
|
|
||||||
} else {
|
|
||||||
g.q("[")
|
|
||||||
for i, s := range s.PrefixItems {
|
|
||||||
if i > 0 {
|
|
||||||
g.q(",")
|
|
||||||
}
|
|
||||||
g.u(ids[s])
|
|
||||||
}
|
|
||||||
if s.Items != nil {
|
|
||||||
g.u("(")
|
|
||||||
if len(s.PrefixItems) > 0 {
|
|
||||||
g.q(",")
|
|
||||||
}
|
|
||||||
g.u(ids[s.Items])
|
|
||||||
g.u(")*")
|
|
||||||
}
|
|
||||||
g.q("]")
|
|
||||||
}
|
|
||||||
case "object":
|
|
||||||
if len(s.Properties) == 0 {
|
|
||||||
g.u("object")
|
|
||||||
} else {
|
|
||||||
g.q("{")
|
|
||||||
for i, p := range s.Properties {
|
|
||||||
name := ids[p]
|
|
||||||
if i > 0 {
|
|
||||||
g.q(",")
|
|
||||||
}
|
|
||||||
g.q(p.Name)
|
|
||||||
g.q(":")
|
|
||||||
g.u(name)
|
|
||||||
}
|
|
||||||
g.q("}")
|
|
||||||
}
|
|
||||||
case "number":
|
|
||||||
buildConstrainedNumber(g, s)
|
|
||||||
case "string":
|
|
||||||
if len(s.Enum) == 0 {
|
|
||||||
g.u("string")
|
|
||||||
} else {
|
|
||||||
g.u("(")
|
|
||||||
for i, e := range s.Enum {
|
|
||||||
if i > 0 {
|
|
||||||
g.q("|")
|
|
||||||
}
|
|
||||||
g.q(string(e))
|
|
||||||
}
|
|
||||||
g.u(")")
|
|
||||||
}
|
|
||||||
case "boolean", "value", "null", "integer":
|
|
||||||
g.u(typ)
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("%s: unsupported type %q", s.Name, typ)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// dependencies returns a sequence of all child dependencies of the schema in
|
|
||||||
// post-order.
|
|
||||||
//
|
|
||||||
// The first value is the id/pointer to the dependency, and the second value
|
|
||||||
// is the schema.
|
|
||||||
func dependencies(id string, s *jsonschema.Schema) iter.Seq2[string, *jsonschema.Schema] {
|
|
||||||
return func(yield func(string, *jsonschema.Schema) bool) {
|
|
||||||
for i, p := range s.Properties {
|
|
||||||
id := fmt.Sprintf("%s_%d", id, i)
|
|
||||||
for did, d := range dependencies(id, p) {
|
|
||||||
if !yield(did, d) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !yield(id, p) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for i, p := range s.PrefixItems {
|
|
||||||
id := fmt.Sprintf("tuple_%d", i)
|
|
||||||
for did, d := range dependencies(id, p) {
|
|
||||||
id := fmt.Sprintf("%s_%s", id, did)
|
|
||||||
if !yield(id, d) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !yield(id, p) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if s.Items != nil {
|
|
||||||
id := fmt.Sprintf("%s_tuple_%d", id, len(s.PrefixItems))
|
|
||||||
for did, d := range dependencies(id, s.Items) {
|
|
||||||
if !yield(did, d) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !yield(id, s.Items) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type builder struct {
|
|
||||||
b bytes.Buffer
|
|
||||||
pad int
|
|
||||||
rules int
|
|
||||||
items int
|
|
||||||
}
|
|
||||||
|
|
||||||
// define terminates the current rule, if any, and then either starts a new
|
|
||||||
// rule or does nothing else if the name is empty.
|
|
||||||
func (b *builder) define(name string) {
|
|
||||||
if b.rules > 0 {
|
|
||||||
b.b.WriteString(";\n")
|
|
||||||
}
|
|
||||||
if name == "" {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
fmt.Fprintf(&b.b, "% -*s", b.pad, name)
|
|
||||||
b.b.WriteString(" ::=")
|
|
||||||
b.rules++
|
|
||||||
b.items = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// quote appends a terminal to the current rule.
|
|
||||||
func (b *builder) q(s string) {
|
|
||||||
if b.items > 0 {
|
|
||||||
b.b.WriteString(" ")
|
|
||||||
}
|
|
||||||
b.b.WriteString(" ")
|
|
||||||
b.b.WriteString(strconv.Quote(s))
|
|
||||||
}
|
|
||||||
|
|
||||||
// u appends a non-terminal to the current rule.
|
|
||||||
func (b *builder) u(s string) {
|
|
||||||
if b.items > 0 {
|
|
||||||
b.b.WriteString(" ")
|
|
||||||
}
|
|
||||||
b.b.WriteString(" ")
|
|
||||||
b.b.WriteString(s)
|
|
||||||
}
|
|
||||||
|
|
||||||
func buildConstrainedNumber(b *builder, s *jsonschema.Schema) {
|
|
||||||
if s.Minimum == 0 && s.Maximum == 0 {
|
|
||||||
b.u("TODO")
|
|
||||||
} else {
|
|
||||||
b.u("number")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,75 +0,0 @@
|
|||||||
package grammar
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"cmp"
|
|
||||||
"iter"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
_ "embed"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/grammar/internal/diff"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestFromSchema(t *testing.T) {
|
|
||||||
for tt := range testCases(t) {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
g, err := FromSchema(nil, []byte(tt.schema))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("FromSchema: %v", err)
|
|
||||||
}
|
|
||||||
got := string(g)
|
|
||||||
got = strings.TrimPrefix(got, jsonTerms)
|
|
||||||
if got != tt.want {
|
|
||||||
t.Logf("schema:\n%s", tt.schema)
|
|
||||||
t.Fatal(string(diff.Diff("got", []byte(got), "want", []byte(tt.want))))
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type testCase struct {
|
|
||||||
name string
|
|
||||||
schema string
|
|
||||||
want string
|
|
||||||
}
|
|
||||||
|
|
||||||
//go:embed testdata/schemas.txt
|
|
||||||
var tests string
|
|
||||||
|
|
||||||
func testCases(t testing.TB) iter.Seq[testCase] {
|
|
||||||
t.Helper()
|
|
||||||
return func(yield func(testCase) bool) {
|
|
||||||
t.Helper()
|
|
||||||
sc := bufio.NewScanner(strings.NewReader(tests))
|
|
||||||
name := ""
|
|
||||||
for sc.Scan() {
|
|
||||||
line := strings.TrimSpace(sc.Text())
|
|
||||||
if line == "" {
|
|
||||||
name = ""
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if line[0] == '#' {
|
|
||||||
name = cmp.Or(name, strings.TrimSpace(line[1:]))
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
s := sc.Text()
|
|
||||||
g := ""
|
|
||||||
for sc.Scan() {
|
|
||||||
line = strings.TrimSpace(sc.Text())
|
|
||||||
if line == "" || line[0] == '#' {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
g += sc.Text() + "\n"
|
|
||||||
}
|
|
||||||
if !yield(testCase{name, s, g}) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
name = strings.TrimSpace(strings.TrimPrefix(line, "#"))
|
|
||||||
}
|
|
||||||
if err := sc.Err(); err != nil {
|
|
||||||
t.Fatalf("error reading tests: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,261 +0,0 @@
|
|||||||
// Copyright 2022 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package diff
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"fmt"
|
|
||||||
"sort"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
// A pair is a pair of values tracked for both the x and y side of a diff.
|
|
||||||
// It is typically a pair of line indexes.
|
|
||||||
type pair struct{ x, y int }
|
|
||||||
|
|
||||||
// Diff returns an anchored diff of the two texts old and new
|
|
||||||
// in the “unified diff” format. If old and new are identical,
|
|
||||||
// Diff returns a nil slice (no output).
|
|
||||||
//
|
|
||||||
// Unix diff implementations typically look for a diff with
|
|
||||||
// the smallest number of lines inserted and removed,
|
|
||||||
// which can in the worst case take time quadratic in the
|
|
||||||
// number of lines in the texts. As a result, many implementations
|
|
||||||
// either can be made to run for a long time or cut off the search
|
|
||||||
// after a predetermined amount of work.
|
|
||||||
//
|
|
||||||
// In contrast, this implementation looks for a diff with the
|
|
||||||
// smallest number of “unique” lines inserted and removed,
|
|
||||||
// where unique means a line that appears just once in both old and new.
|
|
||||||
// We call this an “anchored diff” because the unique lines anchor
|
|
||||||
// the chosen matching regions. An anchored diff is usually clearer
|
|
||||||
// than a standard diff, because the algorithm does not try to
|
|
||||||
// reuse unrelated blank lines or closing braces.
|
|
||||||
// The algorithm also guarantees to run in O(n log n) time
|
|
||||||
// instead of the standard O(n²) time.
|
|
||||||
//
|
|
||||||
// Some systems call this approach a “patience diff,” named for
|
|
||||||
// the “patience sorting” algorithm, itself named for a solitaire card game.
|
|
||||||
// We avoid that name for two reasons. First, the name has been used
|
|
||||||
// for a few different variants of the algorithm, so it is imprecise.
|
|
||||||
// Second, the name is frequently interpreted as meaning that you have
|
|
||||||
// to wait longer (to be patient) for the diff, meaning that it is a slower algorithm,
|
|
||||||
// when in fact the algorithm is faster than the standard one.
|
|
||||||
func Diff(oldName string, old []byte, newName string, new []byte) []byte {
|
|
||||||
if bytes.Equal(old, new) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
x := lines(old)
|
|
||||||
y := lines(new)
|
|
||||||
|
|
||||||
// Print diff header.
|
|
||||||
var out bytes.Buffer
|
|
||||||
fmt.Fprintf(&out, "diff %s %s\n", oldName, newName)
|
|
||||||
fmt.Fprintf(&out, "--- %s\n", oldName)
|
|
||||||
fmt.Fprintf(&out, "+++ %s\n", newName)
|
|
||||||
|
|
||||||
// Loop over matches to consider,
|
|
||||||
// expanding each match to include surrounding lines,
|
|
||||||
// and then printing diff chunks.
|
|
||||||
// To avoid setup/teardown cases outside the loop,
|
|
||||||
// tgs returns a leading {0,0} and trailing {len(x), len(y)} pair
|
|
||||||
// in the sequence of matches.
|
|
||||||
var (
|
|
||||||
done pair // printed up to x[:done.x] and y[:done.y]
|
|
||||||
chunk pair // start lines of current chunk
|
|
||||||
count pair // number of lines from each side in current chunk
|
|
||||||
ctext []string // lines for current chunk
|
|
||||||
)
|
|
||||||
for _, m := range tgs(x, y) {
|
|
||||||
if m.x < done.x {
|
|
||||||
// Already handled scanning forward from earlier match.
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Expand matching lines as far as possible,
|
|
||||||
// establishing that x[start.x:end.x] == y[start.y:end.y].
|
|
||||||
// Note that on the first (or last) iteration we may (or definitely do)
|
|
||||||
// have an empty match: start.x==end.x and start.y==end.y.
|
|
||||||
start := m
|
|
||||||
for start.x > done.x && start.y > done.y && x[start.x-1] == y[start.y-1] {
|
|
||||||
start.x--
|
|
||||||
start.y--
|
|
||||||
}
|
|
||||||
end := m
|
|
||||||
for end.x < len(x) && end.y < len(y) && x[end.x] == y[end.y] {
|
|
||||||
end.x++
|
|
||||||
end.y++
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit the mismatched lines before start into this chunk.
|
|
||||||
// (No effect on first sentinel iteration, when start = {0,0}.)
|
|
||||||
for _, s := range x[done.x:start.x] {
|
|
||||||
ctext = append(ctext, "-"+s)
|
|
||||||
count.x++
|
|
||||||
}
|
|
||||||
for _, s := range y[done.y:start.y] {
|
|
||||||
ctext = append(ctext, "+"+s)
|
|
||||||
count.y++
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we're not at EOF and have too few common lines,
|
|
||||||
// the chunk includes all the common lines and continues.
|
|
||||||
const C = 3 // number of context lines
|
|
||||||
if (end.x < len(x) || end.y < len(y)) &&
|
|
||||||
(end.x-start.x < C || (len(ctext) > 0 && end.x-start.x < 2*C)) {
|
|
||||||
for _, s := range x[start.x:end.x] {
|
|
||||||
ctext = append(ctext, " "+s)
|
|
||||||
count.x++
|
|
||||||
count.y++
|
|
||||||
}
|
|
||||||
done = end
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// End chunk with common lines for context.
|
|
||||||
if len(ctext) > 0 {
|
|
||||||
n := end.x - start.x
|
|
||||||
if n > C {
|
|
||||||
n = C
|
|
||||||
}
|
|
||||||
for _, s := range x[start.x : start.x+n] {
|
|
||||||
ctext = append(ctext, " "+s)
|
|
||||||
count.x++
|
|
||||||
count.y++
|
|
||||||
}
|
|
||||||
done = pair{start.x + n, start.y + n}
|
|
||||||
|
|
||||||
// Format and emit chunk.
|
|
||||||
// Convert line numbers to 1-indexed.
|
|
||||||
// Special case: empty file shows up as 0,0 not 1,0.
|
|
||||||
if count.x > 0 {
|
|
||||||
chunk.x++
|
|
||||||
}
|
|
||||||
if count.y > 0 {
|
|
||||||
chunk.y++
|
|
||||||
}
|
|
||||||
fmt.Fprintf(&out, "@@ -%d,%d +%d,%d @@\n", chunk.x, count.x, chunk.y, count.y)
|
|
||||||
for _, s := range ctext {
|
|
||||||
out.WriteString(s)
|
|
||||||
}
|
|
||||||
count.x = 0
|
|
||||||
count.y = 0
|
|
||||||
ctext = ctext[:0]
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we reached EOF, we're done.
|
|
||||||
if end.x >= len(x) && end.y >= len(y) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise start a new chunk.
|
|
||||||
chunk = pair{end.x - C, end.y - C}
|
|
||||||
for _, s := range x[chunk.x:end.x] {
|
|
||||||
ctext = append(ctext, " "+s)
|
|
||||||
count.x++
|
|
||||||
count.y++
|
|
||||||
}
|
|
||||||
done = end
|
|
||||||
}
|
|
||||||
|
|
||||||
return out.Bytes()
|
|
||||||
}
|
|
||||||
|
|
||||||
// lines returns the lines in the file x, including newlines.
|
|
||||||
// If the file does not end in a newline, one is supplied
|
|
||||||
// along with a warning about the missing newline.
|
|
||||||
func lines(x []byte) []string {
|
|
||||||
l := strings.SplitAfter(string(x), "\n")
|
|
||||||
if l[len(l)-1] == "" {
|
|
||||||
l = l[:len(l)-1]
|
|
||||||
} else {
|
|
||||||
// Treat last line as having a message about the missing newline attached,
|
|
||||||
// using the same text as BSD/GNU diff (including the leading backslash).
|
|
||||||
l[len(l)-1] += "\n\\ No newline at end of file\n"
|
|
||||||
}
|
|
||||||
return l
|
|
||||||
}
|
|
||||||
|
|
||||||
// tgs returns the pairs of indexes of the longest common subsequence
|
|
||||||
// of unique lines in x and y, where a unique line is one that appears
|
|
||||||
// once in x and once in y.
|
|
||||||
//
|
|
||||||
// The longest common subsequence algorithm is as described in
|
|
||||||
// Thomas G. Szymanski, “A Special Case of the Maximal Common
|
|
||||||
// Subsequence Problem,” Princeton TR #170 (January 1975),
|
|
||||||
// available at https://research.swtch.com/tgs170.pdf.
|
|
||||||
func tgs(x, y []string) []pair {
|
|
||||||
// Count the number of times each string appears in a and b.
|
|
||||||
// We only care about 0, 1, many, counted as 0, -1, -2
|
|
||||||
// for the x side and 0, -4, -8 for the y side.
|
|
||||||
// Using negative numbers now lets us distinguish positive line numbers later.
|
|
||||||
m := make(map[string]int)
|
|
||||||
for _, s := range x {
|
|
||||||
if c := m[s]; c > -2 {
|
|
||||||
m[s] = c - 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, s := range y {
|
|
||||||
if c := m[s]; c > -8 {
|
|
||||||
m[s] = c - 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now unique strings can be identified by m[s] = -1+-4.
|
|
||||||
//
|
|
||||||
// Gather the indexes of those strings in x and y, building:
|
|
||||||
// xi[i] = increasing indexes of unique strings in x.
|
|
||||||
// yi[i] = increasing indexes of unique strings in y.
|
|
||||||
// inv[i] = index j such that x[xi[i]] = y[yi[j]].
|
|
||||||
var xi, yi, inv []int
|
|
||||||
for i, s := range y {
|
|
||||||
if m[s] == -1+-4 {
|
|
||||||
m[s] = len(yi)
|
|
||||||
yi = append(yi, i)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for i, s := range x {
|
|
||||||
if j, ok := m[s]; ok && j >= 0 {
|
|
||||||
xi = append(xi, i)
|
|
||||||
inv = append(inv, j)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply Algorithm A from Szymanski's paper.
|
|
||||||
// In those terms, A = J = inv and B = [0, n).
|
|
||||||
// We add sentinel pairs {0,0}, and {len(x),len(y)}
|
|
||||||
// to the returned sequence, to help the processing loop.
|
|
||||||
J := inv
|
|
||||||
n := len(xi)
|
|
||||||
T := make([]int, n)
|
|
||||||
L := make([]int, n)
|
|
||||||
for i := range T {
|
|
||||||
T[i] = n + 1
|
|
||||||
}
|
|
||||||
for i := range n {
|
|
||||||
k := sort.Search(n, func(k int) bool {
|
|
||||||
return T[k] >= J[i]
|
|
||||||
})
|
|
||||||
T[k] = J[i]
|
|
||||||
L[i] = k + 1
|
|
||||||
}
|
|
||||||
k := 0
|
|
||||||
for _, v := range L {
|
|
||||||
if k < v {
|
|
||||||
k = v
|
|
||||||
}
|
|
||||||
}
|
|
||||||
seq := make([]pair, 2+k)
|
|
||||||
seq[1+k] = pair{len(x), len(y)} // sentinel at end
|
|
||||||
lastj := n
|
|
||||||
for i := n - 1; i >= 0; i-- {
|
|
||||||
if L[i] == k && J[i] < lastj {
|
|
||||||
seq[k] = pair{xi[i], yi[J[i]]}
|
|
||||||
k--
|
|
||||||
}
|
|
||||||
}
|
|
||||||
seq[0] = pair{0, 0} // sentinel at start
|
|
||||||
return seq
|
|
||||||
}
|
|
||||||
@@ -1,44 +0,0 @@
|
|||||||
// Copyright 2022 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package diff
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"path/filepath"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"golang.org/x/tools/txtar"
|
|
||||||
)
|
|
||||||
|
|
||||||
func clean(text []byte) []byte {
|
|
||||||
text = bytes.ReplaceAll(text, []byte("$\n"), []byte("\n"))
|
|
||||||
text = bytes.TrimSuffix(text, []byte("^D\n"))
|
|
||||||
return text
|
|
||||||
}
|
|
||||||
|
|
||||||
func Test(t *testing.T) {
|
|
||||||
files, _ := filepath.Glob("testdata/*.txt")
|
|
||||||
if len(files) == 0 {
|
|
||||||
t.Fatalf("no testdata")
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, file := range files {
|
|
||||||
t.Run(filepath.Base(file), func(t *testing.T) {
|
|
||||||
a, err := txtar.ParseFile(file)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
if len(a.Files) != 3 || a.Files[2].Name != "diff" {
|
|
||||||
t.Fatalf("%s: want three files, third named \"diff\"", file)
|
|
||||||
}
|
|
||||||
diffs := Diff(a.Files[0].Name, clean(a.Files[0].Data), a.Files[1].Name, clean(a.Files[1].Data))
|
|
||||||
want := clean(a.Files[2].Data)
|
|
||||||
if !bytes.Equal(diffs, want) {
|
|
||||||
t.Fatalf("%s: have:\n%s\nwant:\n%s\n%s", file,
|
|
||||||
diffs, want, Diff("have", diffs, "want", want))
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
13
grammar/internal/diff/testdata/allnew.txt
vendored
13
grammar/internal/diff/testdata/allnew.txt
vendored
@@ -1,13 +0,0 @@
|
|||||||
-- old --
|
|
||||||
-- new --
|
|
||||||
a
|
|
||||||
b
|
|
||||||
c
|
|
||||||
-- diff --
|
|
||||||
diff old new
|
|
||||||
--- old
|
|
||||||
+++ new
|
|
||||||
@@ -0,0 +1,3 @@
|
|
||||||
+a
|
|
||||||
+b
|
|
||||||
+c
|
|
||||||
13
grammar/internal/diff/testdata/allold.txt
vendored
13
grammar/internal/diff/testdata/allold.txt
vendored
@@ -1,13 +0,0 @@
|
|||||||
-- old --
|
|
||||||
a
|
|
||||||
b
|
|
||||||
c
|
|
||||||
-- new --
|
|
||||||
-- diff --
|
|
||||||
diff old new
|
|
||||||
--- old
|
|
||||||
+++ new
|
|
||||||
@@ -1,3 +0,0 @@
|
|
||||||
-a
|
|
||||||
-b
|
|
||||||
-c
|
|
||||||
35
grammar/internal/diff/testdata/basic.txt
vendored
35
grammar/internal/diff/testdata/basic.txt
vendored
@@ -1,35 +0,0 @@
|
|||||||
Example from Hunt and McIlroy, “An Algorithm for Differential File Comparison.”
|
|
||||||
https://www.cs.dartmouth.edu/~doug/diff.pdf
|
|
||||||
|
|
||||||
-- old --
|
|
||||||
a
|
|
||||||
b
|
|
||||||
c
|
|
||||||
d
|
|
||||||
e
|
|
||||||
f
|
|
||||||
g
|
|
||||||
-- new --
|
|
||||||
w
|
|
||||||
a
|
|
||||||
b
|
|
||||||
x
|
|
||||||
y
|
|
||||||
z
|
|
||||||
e
|
|
||||||
-- diff --
|
|
||||||
diff old new
|
|
||||||
--- old
|
|
||||||
+++ new
|
|
||||||
@@ -1,7 +1,7 @@
|
|
||||||
+w
|
|
||||||
a
|
|
||||||
b
|
|
||||||
-c
|
|
||||||
-d
|
|
||||||
+x
|
|
||||||
+y
|
|
||||||
+z
|
|
||||||
e
|
|
||||||
-f
|
|
||||||
-g
|
|
||||||
40
grammar/internal/diff/testdata/dups.txt
vendored
40
grammar/internal/diff/testdata/dups.txt
vendored
@@ -1,40 +0,0 @@
|
|||||||
-- old --
|
|
||||||
a
|
|
||||||
|
|
||||||
b
|
|
||||||
|
|
||||||
c
|
|
||||||
|
|
||||||
d
|
|
||||||
|
|
||||||
e
|
|
||||||
|
|
||||||
f
|
|
||||||
-- new --
|
|
||||||
a
|
|
||||||
|
|
||||||
B
|
|
||||||
|
|
||||||
C
|
|
||||||
|
|
||||||
d
|
|
||||||
|
|
||||||
e
|
|
||||||
|
|
||||||
f
|
|
||||||
-- diff --
|
|
||||||
diff old new
|
|
||||||
--- old
|
|
||||||
+++ new
|
|
||||||
@@ -1,8 +1,8 @@
|
|
||||||
a
|
|
||||||
$
|
|
||||||
-b
|
|
||||||
-
|
|
||||||
-c
|
|
||||||
+B
|
|
||||||
+
|
|
||||||
+C
|
|
||||||
$
|
|
||||||
d
|
|
||||||
$
|
|
||||||
38
grammar/internal/diff/testdata/end.txt
vendored
38
grammar/internal/diff/testdata/end.txt
vendored
@@ -1,38 +0,0 @@
|
|||||||
-- old --
|
|
||||||
1
|
|
||||||
2
|
|
||||||
3
|
|
||||||
4
|
|
||||||
5
|
|
||||||
6
|
|
||||||
7
|
|
||||||
eight
|
|
||||||
nine
|
|
||||||
ten
|
|
||||||
eleven
|
|
||||||
-- new --
|
|
||||||
1
|
|
||||||
2
|
|
||||||
3
|
|
||||||
4
|
|
||||||
5
|
|
||||||
6
|
|
||||||
7
|
|
||||||
8
|
|
||||||
9
|
|
||||||
10
|
|
||||||
-- diff --
|
|
||||||
diff old new
|
|
||||||
--- old
|
|
||||||
+++ new
|
|
||||||
@@ -5,7 +5,6 @@
|
|
||||||
5
|
|
||||||
6
|
|
||||||
7
|
|
||||||
-eight
|
|
||||||
-nine
|
|
||||||
-ten
|
|
||||||
-eleven
|
|
||||||
+8
|
|
||||||
+9
|
|
||||||
+10
|
|
||||||
9
grammar/internal/diff/testdata/eof.txt
vendored
9
grammar/internal/diff/testdata/eof.txt
vendored
@@ -1,9 +0,0 @@
|
|||||||
-- old --
|
|
||||||
a
|
|
||||||
b
|
|
||||||
c^D
|
|
||||||
-- new --
|
|
||||||
a
|
|
||||||
b
|
|
||||||
c^D
|
|
||||||
-- diff --
|
|
||||||
18
grammar/internal/diff/testdata/eof1.txt
vendored
18
grammar/internal/diff/testdata/eof1.txt
vendored
@@ -1,18 +0,0 @@
|
|||||||
-- old --
|
|
||||||
a
|
|
||||||
b
|
|
||||||
c
|
|
||||||
-- new --
|
|
||||||
a
|
|
||||||
b
|
|
||||||
c^D
|
|
||||||
-- diff --
|
|
||||||
diff old new
|
|
||||||
--- old
|
|
||||||
+++ new
|
|
||||||
@@ -1,3 +1,3 @@
|
|
||||||
a
|
|
||||||
b
|
|
||||||
-c
|
|
||||||
+c
|
|
||||||
\ No newline at end of file
|
|
||||||
18
grammar/internal/diff/testdata/eof2.txt
vendored
18
grammar/internal/diff/testdata/eof2.txt
vendored
@@ -1,18 +0,0 @@
|
|||||||
-- old --
|
|
||||||
a
|
|
||||||
b
|
|
||||||
c^D
|
|
||||||
-- new --
|
|
||||||
a
|
|
||||||
b
|
|
||||||
c
|
|
||||||
-- diff --
|
|
||||||
diff old new
|
|
||||||
--- old
|
|
||||||
+++ new
|
|
||||||
@@ -1,3 +1,3 @@
|
|
||||||
a
|
|
||||||
b
|
|
||||||
-c
|
|
||||||
\ No newline at end of file
|
|
||||||
+c
|
|
||||||
62
grammar/internal/diff/testdata/long.txt
vendored
62
grammar/internal/diff/testdata/long.txt
vendored
@@ -1,62 +0,0 @@
|
|||||||
-- old --
|
|
||||||
1
|
|
||||||
2
|
|
||||||
3
|
|
||||||
4
|
|
||||||
5
|
|
||||||
6
|
|
||||||
7
|
|
||||||
8
|
|
||||||
9
|
|
||||||
10
|
|
||||||
11
|
|
||||||
12
|
|
||||||
13
|
|
||||||
14
|
|
||||||
14½
|
|
||||||
15
|
|
||||||
16
|
|
||||||
17
|
|
||||||
18
|
|
||||||
19
|
|
||||||
20
|
|
||||||
-- new --
|
|
||||||
1
|
|
||||||
2
|
|
||||||
3
|
|
||||||
4
|
|
||||||
5
|
|
||||||
6
|
|
||||||
8
|
|
||||||
9
|
|
||||||
10
|
|
||||||
11
|
|
||||||
12
|
|
||||||
13
|
|
||||||
14
|
|
||||||
17
|
|
||||||
18
|
|
||||||
19
|
|
||||||
20
|
|
||||||
-- diff --
|
|
||||||
diff old new
|
|
||||||
--- old
|
|
||||||
+++ new
|
|
||||||
@@ -4,7 +4,6 @@
|
|
||||||
4
|
|
||||||
5
|
|
||||||
6
|
|
||||||
-7
|
|
||||||
8
|
|
||||||
9
|
|
||||||
10
|
|
||||||
@@ -12,9 +11,6 @@
|
|
||||||
12
|
|
||||||
13
|
|
||||||
14
|
|
||||||
-14½
|
|
||||||
-15
|
|
||||||
-16
|
|
||||||
17
|
|
||||||
18
|
|
||||||
19
|
|
||||||
5
grammar/internal/diff/testdata/same.txt
vendored
5
grammar/internal/diff/testdata/same.txt
vendored
@@ -1,5 +0,0 @@
|
|||||||
-- old --
|
|
||||||
hello world
|
|
||||||
-- new --
|
|
||||||
hello world
|
|
||||||
-- diff --
|
|
||||||
34
grammar/internal/diff/testdata/start.txt
vendored
34
grammar/internal/diff/testdata/start.txt
vendored
@@ -1,34 +0,0 @@
|
|||||||
-- old --
|
|
||||||
e
|
|
||||||
pi
|
|
||||||
4
|
|
||||||
5
|
|
||||||
6
|
|
||||||
7
|
|
||||||
8
|
|
||||||
9
|
|
||||||
10
|
|
||||||
-- new --
|
|
||||||
1
|
|
||||||
2
|
|
||||||
3
|
|
||||||
4
|
|
||||||
5
|
|
||||||
6
|
|
||||||
7
|
|
||||||
8
|
|
||||||
9
|
|
||||||
10
|
|
||||||
-- diff --
|
|
||||||
diff old new
|
|
||||||
--- old
|
|
||||||
+++ new
|
|
||||||
@@ -1,5 +1,6 @@
|
|
||||||
-e
|
|
||||||
-pi
|
|
||||||
+1
|
|
||||||
+2
|
|
||||||
+3
|
|
||||||
4
|
|
||||||
5
|
|
||||||
6
|
|
||||||
40
grammar/internal/diff/testdata/triv.txt
vendored
40
grammar/internal/diff/testdata/triv.txt
vendored
@@ -1,40 +0,0 @@
|
|||||||
Another example from Hunt and McIlroy,
|
|
||||||
“An Algorithm for Differential File Comparison.”
|
|
||||||
https://www.cs.dartmouth.edu/~doug/diff.pdf
|
|
||||||
|
|
||||||
Anchored diff gives up on finding anything,
|
|
||||||
since there are no unique lines.
|
|
||||||
|
|
||||||
-- old --
|
|
||||||
a
|
|
||||||
b
|
|
||||||
c
|
|
||||||
a
|
|
||||||
b
|
|
||||||
b
|
|
||||||
a
|
|
||||||
-- new --
|
|
||||||
c
|
|
||||||
a
|
|
||||||
b
|
|
||||||
a
|
|
||||||
b
|
|
||||||
c
|
|
||||||
-- diff --
|
|
||||||
diff old new
|
|
||||||
--- old
|
|
||||||
+++ new
|
|
||||||
@@ -1,7 +1,6 @@
|
|
||||||
-a
|
|
||||||
-b
|
|
||||||
-c
|
|
||||||
-a
|
|
||||||
-b
|
|
||||||
-b
|
|
||||||
-a
|
|
||||||
+c
|
|
||||||
+a
|
|
||||||
+b
|
|
||||||
+a
|
|
||||||
+b
|
|
||||||
+c
|
|
||||||
@@ -1,171 +0,0 @@
|
|||||||
package jsonschema
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"encoding/json"
|
|
||||||
"errors"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Schema holds a JSON schema.
|
|
||||||
type Schema struct {
|
|
||||||
// Name is the name of the property. For the parent/root property, this
|
|
||||||
// is "root". For child properties, this is the name of the property.
|
|
||||||
Name string `json:"-"`
|
|
||||||
|
|
||||||
// Type is the type of the property.
|
|
||||||
//
|
|
||||||
// TODO: Union types (e.g. make this a []string).
|
|
||||||
Type string
|
|
||||||
|
|
||||||
// PrefixItems is a list of schemas for each item in a tuple. By
|
|
||||||
// default, the tuple is "closed." unless Items is set to true or a
|
|
||||||
// valid Schema.
|
|
||||||
PrefixItems []*Schema
|
|
||||||
|
|
||||||
// Items is the schema for each item in a list.
|
|
||||||
//
|
|
||||||
// If it is missing, or its JSON value is "null" or "false", it is nil.
|
|
||||||
// If the JSON value is "true", it is set to the empty Schema. If the
|
|
||||||
// JSON value is an object, it will be decoded as a Schema.
|
|
||||||
Items *Schema
|
|
||||||
|
|
||||||
// MinItems specifies the minimum number of items allowed in a list.
|
|
||||||
MinItems int
|
|
||||||
|
|
||||||
// MaxItems specifies the maximum number of items allowed in a list.
|
|
||||||
MaxItems int
|
|
||||||
|
|
||||||
// Properties is the schema for each property of an object.
|
|
||||||
Properties []*Schema
|
|
||||||
|
|
||||||
// Format is the format of the property. This is used to validate the
|
|
||||||
// property against a specific format.
|
|
||||||
//
|
|
||||||
// It is the callers responsibility to validate the property against
|
|
||||||
// the format.
|
|
||||||
Format string
|
|
||||||
|
|
||||||
// Minimum specifies the minimum value for numeric properties.
|
|
||||||
Minimum float64
|
|
||||||
|
|
||||||
// Maximum specifies the maximum value for numeric properties.
|
|
||||||
Maximum float64
|
|
||||||
|
|
||||||
// Enum is a list of valid values for the property.
|
|
||||||
Enum []json.RawMessage
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Schema) UnmarshalJSON(data []byte) error {
|
|
||||||
type S Schema
|
|
||||||
w := struct {
|
|
||||||
Properties props
|
|
||||||
Items items
|
|
||||||
*S
|
|
||||||
}{
|
|
||||||
S: (*S)(s),
|
|
||||||
}
|
|
||||||
if err := json.Unmarshal(data, &w); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if w.Items.set {
|
|
||||||
s.Items = &w.Items.Schema
|
|
||||||
}
|
|
||||||
s.Properties = w.Properties
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type items struct {
|
|
||||||
Schema
|
|
||||||
set bool
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *items) UnmarshalJSON(data []byte) error {
|
|
||||||
switch b := data[0]; b {
|
|
||||||
case 't':
|
|
||||||
*s = items{set: true}
|
|
||||||
case '{':
|
|
||||||
type I items
|
|
||||||
if err := json.Unmarshal(data, (*I)(s)); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
s.set = true
|
|
||||||
case 'n', 'f':
|
|
||||||
default:
|
|
||||||
return errors.New("invalid Items")
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// EffectiveType returns the effective type of the schema. If the Type field is
|
|
||||||
// not empty, it is returned; otherwise:
|
|
||||||
//
|
|
||||||
// - If the schema has both Properties and Items, it returns an empty string.
|
|
||||||
// - If the schema has Properties, it returns "object".
|
|
||||||
// - If the schema has Items, it returns "array".
|
|
||||||
// - If the schema has neither Properties nor Items, it returns "value".
|
|
||||||
//
|
|
||||||
// The returned string is never empty.
|
|
||||||
func (d *Schema) EffectiveType() string {
|
|
||||||
if d.Type == "" {
|
|
||||||
if len(d.Properties) > 0 {
|
|
||||||
return "object"
|
|
||||||
}
|
|
||||||
if len(d.PrefixItems) > 0 || d.Items != nil {
|
|
||||||
return "array"
|
|
||||||
}
|
|
||||||
return "value"
|
|
||||||
}
|
|
||||||
return d.Type
|
|
||||||
}
|
|
||||||
|
|
||||||
// props is an ordered list of properties. The order of the properties
|
|
||||||
// is the order in which they were defined in the schema.
|
|
||||||
type props []*Schema
|
|
||||||
|
|
||||||
var _ json.Unmarshaler = (*props)(nil)
|
|
||||||
|
|
||||||
func (v *props) UnmarshalJSON(data []byte) error {
|
|
||||||
if len(data) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if data[0] != '{' {
|
|
||||||
return errors.New("expected object")
|
|
||||||
}
|
|
||||||
|
|
||||||
d := json.NewDecoder(bytes.NewReader(data))
|
|
||||||
|
|
||||||
// TODO(bmizerany): Consider DisallowUnknownFields. Currently, we, like
|
|
||||||
// llama.cpp, ignore unknown fields, which could be lead to unexpected
|
|
||||||
// behavior for clients of this package, since they may not be aware
|
|
||||||
// that "additionalFields", "itemsPrefix", etc, are being ignored.
|
|
||||||
//
|
|
||||||
// For now, just do what llama.cpp does.
|
|
||||||
|
|
||||||
t, err := d.Token()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if t != json.Delim('{') {
|
|
||||||
return errors.New("expected object")
|
|
||||||
}
|
|
||||||
for d.More() {
|
|
||||||
// Use the first token (map key) as the property name, then
|
|
||||||
// decode the rest of the object fields into a Schema and
|
|
||||||
// append.
|
|
||||||
t, err := d.Token()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if t == json.Delim('}') {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
s := &Schema{
|
|
||||||
Name: t.(string),
|
|
||||||
}
|
|
||||||
if err := d.Decode(s); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
*v = append(*v, s)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -1,104 +0,0 @@
|
|||||||
package jsonschema
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"reflect"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
|
||||||
)
|
|
||||||
|
|
||||||
const testSchemaBasic = `
|
|
||||||
{
|
|
||||||
"properties": {
|
|
||||||
"tupleClosedEmpty": { "prefixItems": [] },
|
|
||||||
"tupleClosedMissing": { "prefixItems": [{}] },
|
|
||||||
"tupleClosedNull": { "prefixItems": [{}], "items": null },
|
|
||||||
"tupleClosedFalse": { "prefixItems": [{}], "items": false },
|
|
||||||
"tupleOpenTrue": { "prefixItems": [{}], "items": true },
|
|
||||||
"tupleOpenEmpty": { "prefixItems": [{}], "items": {} },
|
|
||||||
"tupleOpenTyped": { "prefixItems": [{}], "items": {"type": "boolean"} },
|
|
||||||
"tupleOpenMax": { "prefixItems": [{}], "items": true, "maxItems": 3},
|
|
||||||
|
|
||||||
"array": { "items": {"type": "number"} },
|
|
||||||
|
|
||||||
"null": { "type": "null" },
|
|
||||||
"string": { "type": "string" },
|
|
||||||
"boolean": { "type": "boolean" }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
`
|
|
||||||
|
|
||||||
func TestSchemaUnmarshal(t *testing.T) {
|
|
||||||
var got *Schema
|
|
||||||
if err := json.Unmarshal([]byte(testSchemaBasic), &got); err != nil {
|
|
||||||
t.Fatalf("Unmarshal: %v", err)
|
|
||||||
}
|
|
||||||
want := &Schema{
|
|
||||||
Properties: []*Schema{
|
|
||||||
{Name: "tupleClosedEmpty", PrefixItems: []*Schema{}, Items: nil},
|
|
||||||
{Name: "tupleClosedMissing", PrefixItems: []*Schema{{}}, Items: nil},
|
|
||||||
{Name: "tupleClosedNull", PrefixItems: []*Schema{{}}, Items: nil},
|
|
||||||
{Name: "tupleClosedFalse", PrefixItems: []*Schema{{}}, Items: nil},
|
|
||||||
|
|
||||||
{Name: "tupleOpenTrue", PrefixItems: []*Schema{{}}, Items: &Schema{}},
|
|
||||||
{Name: "tupleOpenEmpty", PrefixItems: []*Schema{{}}, Items: &Schema{}},
|
|
||||||
{Name: "tupleOpenTyped", PrefixItems: []*Schema{{}}, Items: &Schema{Type: "boolean"}},
|
|
||||||
{Name: "tupleOpenMax", PrefixItems: []*Schema{{}}, Items: &Schema{}, MaxItems: 3},
|
|
||||||
|
|
||||||
{Name: "array", Items: &Schema{Type: "number"}},
|
|
||||||
|
|
||||||
{Name: "null", Type: "null"},
|
|
||||||
{Name: "string", Type: "string"},
|
|
||||||
{Name: "boolean", Type: "boolean"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
if diff := cmp.Diff(want, got); diff != "" {
|
|
||||||
t.Errorf("(-want, +got)\n%s", diff)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestEffectiveType(t *testing.T) {
|
|
||||||
const schema = `
|
|
||||||
{"properties": {
|
|
||||||
"o": {"type": "object"},
|
|
||||||
"a": {"type": "array"},
|
|
||||||
"n": {"type": "number"},
|
|
||||||
"s": {"type": "string"},
|
|
||||||
"z": {"type": "null"},
|
|
||||||
"b": {"type": "boolean"},
|
|
||||||
|
|
||||||
"t0": {"prefixItems": [{}], "items": {"type": "number"}},
|
|
||||||
"t1": {"items": {"type": "number"}, "maxItems": 3},
|
|
||||||
|
|
||||||
"v": {"maxItems": 3}
|
|
||||||
}}
|
|
||||||
`
|
|
||||||
|
|
||||||
var s *Schema
|
|
||||||
if err := json.Unmarshal([]byte(schema), &s); err != nil {
|
|
||||||
t.Fatalf("json.Unmarshal: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var got []string
|
|
||||||
for _, p := range s.Properties {
|
|
||||||
got = append(got, p.EffectiveType())
|
|
||||||
}
|
|
||||||
|
|
||||||
want := strings.Fields(`
|
|
||||||
object
|
|
||||||
array
|
|
||||||
number
|
|
||||||
string
|
|
||||||
null
|
|
||||||
boolean
|
|
||||||
array
|
|
||||||
array
|
|
||||||
value
|
|
||||||
`)
|
|
||||||
if !reflect.DeepEqual(want, got) {
|
|
||||||
t.Errorf("\ngot:\n\t%v\nwant:\n\t%v", got, want)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
76
grammar/testdata/schemas.txt
vendored
76
grammar/testdata/schemas.txt
vendored
@@ -1,76 +0,0 @@
|
|||||||
# This file holds tests for JSON schema to EBNF grammar conversions.
|
|
||||||
#
|
|
||||||
# The format is a JSON schema, followed by the expected EBNF grammar. Each test
|
|
||||||
# MAY be preceded by a comment that describes the test (e.g. the test name), followed by
|
|
||||||
# the JSON schema and the expected EBNF grammar. If no comment is present, the test
|
|
||||||
# name the tests number in the file (e.g. "#0", "#1", etc.)
|
|
||||||
#
|
|
||||||
# Blank lines signify the end or start of a new test. Comments can be added
|
|
||||||
# anywhere in the file, but they must be preceded by a '#' character and start at
|
|
||||||
# the beginning of the line.
|
|
||||||
|
|
||||||
# default
|
|
||||||
{}
|
|
||||||
root ::= value;
|
|
||||||
|
|
||||||
{"properties": {}}
|
|
||||||
root ::= value;
|
|
||||||
|
|
||||||
# array
|
|
||||||
{"properties": {"a": {"type": "array", "items": {"type": "string"}}}}
|
|
||||||
root_0_tuple_0 ::= string;
|
|
||||||
root_0 ::= "[" ( root_0_tuple_0 )* "]";
|
|
||||||
root ::= "{" "a" ":" root_0 "}";
|
|
||||||
|
|
||||||
# array with nested array
|
|
||||||
{"type": "array", "items": {"type": "array", "items": {"type": "string"}}}
|
|
||||||
root_tuple_0_tuple_0 ::= string;
|
|
||||||
root_tuple_0 ::= "[" ( root_tuple_0_tuple_0 )* "]";
|
|
||||||
root ::= "[" ( root_tuple_0 )* "]";
|
|
||||||
|
|
||||||
# object
|
|
||||||
{"properties": {"e": {}}}
|
|
||||||
root_0 ::= value;
|
|
||||||
root ::= "{" "e" ":" root_0 "}";
|
|
||||||
|
|
||||||
# object with nested object
|
|
||||||
{"properties": {"o": {"type": "object", "properties": {"e": {}}}}}
|
|
||||||
root_0_0 ::= value;
|
|
||||||
root_0 ::= "{" "e" ":" root_0_0 "}";
|
|
||||||
root ::= "{" "o" ":" root_0 "}";
|
|
||||||
|
|
||||||
# boolean
|
|
||||||
{"type": "boolean"}
|
|
||||||
root ::= boolean;
|
|
||||||
|
|
||||||
# number
|
|
||||||
{"properties": {"n": {"type": "number", "minimum": 123, "maximum": 4567}}}
|
|
||||||
root_0 ::= number;
|
|
||||||
root ::= "{" "n" ":" root_0 "}";
|
|
||||||
|
|
||||||
# string
|
|
||||||
{"type": "string"}
|
|
||||||
root ::= string;
|
|
||||||
|
|
||||||
# string with enum
|
|
||||||
{"type": "string", "enum": ["a", "b", "c"]}
|
|
||||||
root ::= ( "\"a\"" "|" "\"b\"" "|" "\"c\"" );
|
|
||||||
|
|
||||||
# spaces in key
|
|
||||||
{"properties": {"a b": {}}}
|
|
||||||
root_0 ::= value;
|
|
||||||
root ::= "{" "a b" ":" root_0 "}";
|
|
||||||
|
|
||||||
# issue7978
|
|
||||||
{ "type": "object", "properties": { "steps": { "type": "array", "items": { "type": "object", "properties": { "explanation": { "type": "string" }, "output": { "type": "string" } }, "required": [ "explanation", "output" ], "additionalProperties": false } }, "final_answer": { "type": "string" } }, "required": [ "steps", "final_answer" ], "additionalProperties": false }
|
|
||||||
root_0_tuple_0_0 ::= string;
|
|
||||||
root_0_tuple_0_1 ::= string;
|
|
||||||
root_0_tuple_0 ::= "{" "explanation" ":" root_0_tuple_0_0 "," "output" ":" root_0_tuple_0_1 "}";
|
|
||||||
root_0 ::= "[" ( root_0_tuple_0 )* "]";
|
|
||||||
root_1 ::= string;
|
|
||||||
root ::= "{" "steps" ":" root_0 "," "final_answer" ":" root_1 "}";
|
|
||||||
|
|
||||||
# !! # special characters in key
|
|
||||||
# !! {"properties": {"a!b": {}}}
|
|
||||||
# !! !invalid character '!' in key
|
|
||||||
# !!
|
|
||||||
126
llama/README.md
126
llama/README.md
@@ -1,157 +1,53 @@
|
|||||||
# `llama`
|
# `llama`
|
||||||
|
|
||||||
This package integrates the [llama.cpp](https://github.com/ggerganov/llama.cpp) library as a Go package and makes it easy to build it with tags for different CPU and GPU processors.
|
This package provides Go bindings to [llama.cpp](https://github.com/ggerganov/llama.cpp).
|
||||||
|
|
||||||
Supported:
|
|
||||||
|
|
||||||
- [x] CPU
|
|
||||||
- [x] avx, avx2
|
|
||||||
- [x] macOS Metal
|
|
||||||
- [x] Windows CUDA
|
|
||||||
- [x] Windows ROCm
|
|
||||||
- [x] Linux CUDA
|
|
||||||
- [x] Linux ROCm
|
|
||||||
- [x] Llava
|
|
||||||
|
|
||||||
Extra build steps are required for CUDA and ROCm on Windows since `nvcc` and `hipcc` both require using msvc as the host compiler. For these shared libraries are created:
|
|
||||||
|
|
||||||
- `ggml_cuda.dll` on Windows or `ggml_cuda.so` on Linux
|
|
||||||
- `ggml_hipblas.dll` on Windows or `ggml_hipblas.so` on Linux
|
|
||||||
|
|
||||||
> Note: it's important that memory is allocated and freed by the same compiler (e.g. entirely by code compiled with msvc or mingw). Issues from this should be rare, but there are some places where pointers are returned by the CUDA or HIP runtimes and freed elsewhere, causing a a crash. In a future change the same runtime should be used in both cases to avoid crashes.
|
|
||||||
|
|
||||||
## Building
|
|
||||||
|
|
||||||
```
|
|
||||||
go build .
|
|
||||||
```
|
|
||||||
|
|
||||||
### AVX
|
|
||||||
|
|
||||||
```shell
|
|
||||||
go build -tags avx .
|
|
||||||
```
|
|
||||||
|
|
||||||
### AVX2
|
|
||||||
|
|
||||||
```shell
|
|
||||||
# go doesn't recognize `-mfma` as a valid compiler flag
|
|
||||||
# see https://github.com/golang/go/issues/17895
|
|
||||||
go env -w "CGO_CPPFLAGS_ALLOW=-mfma|-mf16c"
|
|
||||||
go build -tags=avx,avx2 .
|
|
||||||
```
|
|
||||||
|
|
||||||
## Linux
|
|
||||||
|
|
||||||
### CUDA
|
|
||||||
|
|
||||||
Install the [CUDA toolkit v11.3.1](https://developer.nvidia.com/cuda-11-3-1-download-archive):
|
|
||||||
|
|
||||||
```shell
|
|
||||||
make ggml_cuda.so
|
|
||||||
go build -tags avx,cuda .
|
|
||||||
```
|
|
||||||
|
|
||||||
### ROCm
|
|
||||||
|
|
||||||
Install [ROCm](https://rocm.docs.amd.com/en/latest/).
|
|
||||||
|
|
||||||
```shell
|
|
||||||
make ggml_hipblas.so
|
|
||||||
go build -tags avx,rocm .
|
|
||||||
```
|
|
||||||
|
|
||||||
## Windows
|
|
||||||
|
|
||||||
Download [w64devkit](https://github.com/skeeto/w64devkit/releases/latest) for a simple MinGW development environment.
|
|
||||||
|
|
||||||
### CUDA
|
|
||||||
|
|
||||||
Install the [CUDA toolkit v11.3.1](https://developer.nvidia.com/cuda-11-3-1-download-archive) then build the cuda code:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
make ggml_cuda.dll
|
|
||||||
go build -tags avx,cuda .
|
|
||||||
```
|
|
||||||
|
|
||||||
### ROCm
|
|
||||||
|
|
||||||
Install [ROCm](https://rocm.docs.amd.com/en/latest/).
|
|
||||||
|
|
||||||
```shell
|
|
||||||
make ggml_hipblas.dll
|
|
||||||
go build -tags avx,rocm .
|
|
||||||
```
|
|
||||||
|
|
||||||
## Building runners
|
|
||||||
|
|
||||||
```shell
|
|
||||||
# build all runners for this platform
|
|
||||||
make -j
|
|
||||||
```
|
|
||||||
|
|
||||||
## Vendoring
|
## Vendoring
|
||||||
|
|
||||||
Ollama currently vendors [llama.cpp](https://github.com/ggerganov/llama.cpp/) and [ggml](https://github.com/ggerganov/ggml) through a vendoring model. While we generally strive to contribute changes back upstream to avoid drift, we cary a small set of patches which are applied to the tracking commit. A set of make targets are available to aid developers in updating to a newer tracking commit, or to work on changes.
|
Ollama vendors [llama.cpp](https://github.com/ggerganov/llama.cpp/) and [ggml](https://github.com/ggerganov/llama.cpp/tree/master/ggml/src). While we generally strive to contribute changes back upstream to avoid drift, we carry a small set of patches which are applied to the tracking commit.
|
||||||
|
|
||||||
If you update the vendoring code, start by running the following command to establish the tracking llama.cpp repo in the `./vendor/` directory.
|
If you update the vendoring code, start by running the following command to establish the tracking llama.cpp repo in the `./vendor/` directory.
|
||||||
|
|
||||||
```
|
```
|
||||||
make apply-patches
|
make -f Makefile.sync apply-patches
|
||||||
```
|
```
|
||||||
|
|
||||||
### Updating Base Commit
|
### Updating Base Commit
|
||||||
|
|
||||||
**Pin to new base commit**
|
**Pin to new base commit**
|
||||||
|
|
||||||
To update to a newer base commit, select the upstream git tag or commit and update `llama/vendoring`
|
To change the base commit, update `FETCH_HEAD` in Makefile.sync.
|
||||||
|
|
||||||
#### Applying patches
|
|
||||||
|
|
||||||
When updating to a newer base commit, the existing patches may not apply cleanly and require manual merge resolution.
|
When updating to a newer base commit, the existing patches may not apply cleanly and require manual merge resolution.
|
||||||
|
|
||||||
Start by applying the patches. If any of the patches have conflicts, the `git am` will stop at the first failure.
|
Start by applying the patches. If any of the patches have conflicts, the `git am` will stop at the first failure.
|
||||||
|
|
||||||
```
|
```
|
||||||
make apply-patches
|
make -f Makefile.sync apply-patches
|
||||||
```
|
```
|
||||||
|
|
||||||
If you see an error message about a conflict, go into the `./vendor/` directory, and perform merge resolution using your preferred tool to the patch commit which failed. Save the file(s) and continue the patch series with `git am --continue` . If any additional patches fail, follow the same pattern until the full patch series is applied. Once finished, run a final `create-patches` and `sync` target to ensure everything is updated.
|
If there are conflicts, you will see an error message. Resolve the conflicts in `./vendor/`, and continue the patch series with `git am --continue` and rerun `make -f Makefile.sync apply-patches`. Repeat until all patches are successfully applied.
|
||||||
|
|
||||||
|
Once all patches are applied, commit the changes to the tracking repository.
|
||||||
|
|
||||||
```
|
```
|
||||||
make create-patches sync
|
make -f Makefile.sync format-patches sync
|
||||||
```
|
```
|
||||||
|
|
||||||
Build and test Ollama, and make any necessary changes to the Go code based on the new base commit. Submit your PR to the Ollama repo.
|
|
||||||
|
|
||||||
### Generating Patches
|
### Generating Patches
|
||||||
|
|
||||||
When working on new fixes or features that impact vendored code, use the following model. First get a clean tracking repo with all current patches applied:
|
When working on new fixes or features that impact vendored code, use the following model. First get a clean tracking repo with all current patches applied:
|
||||||
|
|
||||||
```
|
```
|
||||||
make apply-patches
|
make -f Makefile.sync clean apply-patches
|
||||||
```
|
```
|
||||||
|
|
||||||
Now edit the upstream native code in the `./vendor/` directory. You do not need to commit every change in order to build, a dirty working tree in the tracking repo is OK while developing. Simply save in your editor, and run the following to refresh the vendored code with your changes, build the backend(s) and build ollama:
|
|
||||||
|
|
||||||
```
|
|
||||||
make sync
|
|
||||||
make -j 8
|
|
||||||
go build .
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!IMPORTANT]
|
|
||||||
> Do **NOT** run `apply-patches` while you're iterating as that will reset the tracking repo. It will detect a dirty tree and abort, but if your tree is clean and you accidentally ran this target, use `git reflog` to recover your commit(s).
|
|
||||||
|
|
||||||
Iterate until you're ready to submit PRs. Once your code is ready, commit a change in the `./vendor/` directory, then generate the patches for ollama with
|
Iterate until you're ready to submit PRs. Once your code is ready, commit a change in the `./vendor/` directory, then generate the patches for ollama with
|
||||||
|
|
||||||
```
|
```
|
||||||
make create-patches
|
make -f Makefile.sync format-patches
|
||||||
```
|
```
|
||||||
|
|
||||||
> [!IMPORTANT]
|
|
||||||
> Once you have completed this step, it is safe to run `apply-patches` since your change is preserved in the patches.
|
|
||||||
|
|
||||||
In your `./vendor/` directory, create a branch, and cherry-pick the new commit to that branch, then submit a PR upstream to llama.cpp.
|
In your `./vendor/` directory, create a branch, and cherry-pick the new commit to that branch, then submit a PR upstream to llama.cpp.
|
||||||
|
|
||||||
Commit the changes in the ollama repo and submit a PR to Ollama, which will include the vendored code update with your change, along with the patches.
|
Commit the changes in the ollama repo and submit a PR to Ollama, which will include the vendored code update with your change, along with the patches.
|
||||||
|
|||||||
2
llama/build-info.cpp
generated
vendored
2
llama/build-info.cpp
generated
vendored
@@ -1,4 +1,4 @@
|
|||||||
int LLAMA_BUILD_NUMBER = 0;
|
int LLAMA_BUILD_NUMBER = 0;
|
||||||
char const *LLAMA_COMMIT = "ba1cb19cdd0d92e012e0f6e009e0620f854b6afd";
|
char const *LLAMA_COMMIT = "46e3556e01b824e52395fb050b29804b6cff2a7c";
|
||||||
char const *LLAMA_COMPILER = "";
|
char const *LLAMA_COMPILER = "";
|
||||||
char const *LLAMA_BUILD_TARGET = "";
|
char const *LLAMA_BUILD_TARGET = "";
|
||||||
|
|||||||
4
llama/build-info.cpp.in
Normal file
4
llama/build-info.cpp.in
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
int LLAMA_BUILD_NUMBER = 0;
|
||||||
|
char const *LLAMA_COMMIT = "@FETCH_HEAD@";
|
||||||
|
char const *LLAMA_COMPILER = "";
|
||||||
|
char const *LLAMA_BUILD_TARGET = "";
|
||||||
36
llama/llama.cpp/examples/llava/clip.cpp
vendored
36
llama/llama.cpp/examples/llava/clip.cpp
vendored
@@ -1235,35 +1235,15 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef GGML_USE_CUDA
|
ggml_backend_t backend = ggml_backend_init_best();
|
||||||
new_clip->backend = ggml_backend_cuda_init(0);
|
if (backend == nullptr) {
|
||||||
LOG_INF("%s: CLIP using CUDA backend\n", __func__);
|
LOG_ERR("%s: failed to initialize backend\n", __func__);
|
||||||
#endif
|
clip_free(new_clip);
|
||||||
|
gguf_free(ctx);
|
||||||
#ifdef GGML_USE_METAL
|
return nullptr;
|
||||||
new_clip->backend = ggml_backend_metal_init();
|
|
||||||
LOG_INF("%s: CLIP using Metal backend\n", __func__);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef GGML_USE_CANN
|
|
||||||
new_clip->backend = ggml_backend_cann_init(0);
|
|
||||||
LOG_INF("%s: CLIP using CANN backend\n", __func__);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef GGML_USE_VULKAN
|
|
||||||
new_clip->backend = ggml_backend_vk_init(0);
|
|
||||||
LOG_INF("%s: CLIP using Vulkan backend\n", __func__);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef GGML_USE_SYCL
|
|
||||||
new_clip->backend = ggml_backend_sycl_init(0);
|
|
||||||
LOG_INF("%s: CLIP using SYCL backend\n", __func__);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!new_clip->backend) {
|
|
||||||
new_clip->backend = ggml_backend_cpu_init();
|
|
||||||
LOG_INF("%s: CLIP using CPU backend\n", __func__);
|
|
||||||
}
|
}
|
||||||
|
LOG_INF("%s: using %s backend\n", __func__, ggml_backend_name(backend));
|
||||||
|
new_clip->backend = backend;
|
||||||
|
|
||||||
// model size and capabilities
|
// model size and capabilities
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -3,5 +3,6 @@ package llama
|
|||||||
// #cgo CXXFLAGS: -std=c++17
|
// #cgo CXXFLAGS: -std=c++17
|
||||||
// #cgo CPPFLAGS: -I${SRCDIR}/../include
|
// #cgo CPPFLAGS: -I${SRCDIR}/../include
|
||||||
// #cgo CPPFLAGS: -I${SRCDIR}/../../../ml/backend/ggml/ggml/include
|
// #cgo CPPFLAGS: -I${SRCDIR}/../../../ml/backend/ggml/ggml/include
|
||||||
|
// #cgo windows CPPFLAGS: -D_WIN32_WINNT=0x0602
|
||||||
import "C"
|
import "C"
|
||||||
import _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src"
|
import _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src"
|
||||||
|
|||||||
@@ -199,21 +199,25 @@ func (c *Context) KvCacheDefrag() {
|
|||||||
|
|
||||||
// Get the embeddings for a sequence id
|
// Get the embeddings for a sequence id
|
||||||
func (c *Context) GetEmbeddingsSeq(seqId int) []float32 {
|
func (c *Context) GetEmbeddingsSeq(seqId int) []float32 {
|
||||||
embeddings := unsafe.Pointer(C.llama_get_embeddings_seq(c.c, C.int(seqId)))
|
e := unsafe.Pointer(C.llama_get_embeddings_seq(c.c, C.int(seqId)))
|
||||||
if embeddings == nil {
|
if e == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return unsafe.Slice((*float32)(embeddings), c.Model().NEmbd())
|
embeddings := make([]float32, c.Model().NEmbd())
|
||||||
|
_ = copy(embeddings, unsafe.Slice((*float32)(e), c.Model().NEmbd()))
|
||||||
|
return embeddings
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Context) GetEmbeddingsIth(i int) []float32 {
|
func (c *Context) GetEmbeddingsIth(i int) []float32 {
|
||||||
embeddings := unsafe.Pointer(C.llama_get_embeddings_ith(c.c, C.int32_t(i)))
|
e := unsafe.Pointer(C.llama_get_embeddings_ith(c.c, C.int32_t(i)))
|
||||||
if embeddings == nil {
|
if e == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return unsafe.Slice((*float32)(embeddings), c.Model().NEmbd())
|
embeddings := make([]float32, c.Model().NEmbd())
|
||||||
|
_ = copy(embeddings, unsafe.Slice((*float32)(e), c.Model().NEmbd()))
|
||||||
|
return embeddings
|
||||||
}
|
}
|
||||||
|
|
||||||
type ModelParams struct {
|
type ModelParams struct {
|
||||||
|
|||||||
31
llama/mllama.cpp
vendored
31
llama/mllama.cpp
vendored
@@ -558,30 +558,15 @@ struct mllama_ctx *mllama_model_load(const char *fname, const int verbosity = 1)
|
|||||||
|
|
||||||
mllama_ctx *new_mllama = new mllama_ctx{};
|
mllama_ctx *new_mllama = new mllama_ctx{};
|
||||||
|
|
||||||
#ifdef GGML_USE_CUDA
|
ggml_backend_t backend = ggml_backend_init_best();
|
||||||
new_mllama->backend = ggml_backend_cuda_init(0);
|
if (backend == nullptr) {
|
||||||
LOG("vision using CUDA backend");
|
LOG("%s: failed to initialize backend\n", __func__);
|
||||||
#endif
|
mllama_free(new_mllama);
|
||||||
|
gguf_free(ctx);
|
||||||
#ifdef GGML_USE_METAL
|
return nullptr;
|
||||||
new_mllama->backend = ggml_backend_metal_init();
|
|
||||||
LOG("vision using Metal backend");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef GGML_USE_CANN
|
|
||||||
new_mllama->backend = ggml_backend_cann_init(0);
|
|
||||||
LOG("vision using CANN backend");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef GGML_USE_VULKAN
|
|
||||||
new_mllama->backend = ggml_backend_vk_init(0);
|
|
||||||
LOG("vision using Vulkan backend");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!new_mllama->backend) {
|
|
||||||
new_mllama->backend = ggml_backend_cpu_init();
|
|
||||||
LOG("vision using CPU backend");
|
|
||||||
}
|
}
|
||||||
|
LOG("%s: using %s backend\n", __func__, ggml_backend_name(backend));
|
||||||
|
new_mllama->backend = backend;
|
||||||
|
|
||||||
// load tensors
|
// load tensors
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,14 +1,14 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
From: jmorganca <jmorganca@gmail.com>
|
From: jmorganca <jmorganca@gmail.com>
|
||||||
Date: Sat, 4 Jan 2025 22:52:48 -0800
|
Date: Sat, 4 Jan 2025 22:52:48 -0800
|
||||||
Subject: [PATCH] re-enable gpu for clip
|
Subject: [PATCH] use dynamic backend loading for clip
|
||||||
|
|
||||||
---
|
---
|
||||||
examples/llava/clip.cpp | 86 ++++++++++++++++++++---------------------
|
examples/llava/clip.cpp | 74 +++++++++++++++--------------------------
|
||||||
1 file changed, 43 insertions(+), 43 deletions(-)
|
1 file changed, 27 insertions(+), 47 deletions(-)
|
||||||
|
|
||||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||||
index b3c1829f..718052e1 100644
|
index b3c1829f..86b91d5c 100644
|
||||||
--- a/examples/llava/clip.cpp
|
--- a/examples/llava/clip.cpp
|
||||||
+++ b/examples/llava/clip.cpp
|
+++ b/examples/llava/clip.cpp
|
||||||
@@ -8,25 +8,25 @@
|
@@ -8,25 +8,25 @@
|
||||||
@@ -56,7 +56,7 @@ index b3c1829f..718052e1 100644
|
|||||||
|
|
||||||
#define STB_IMAGE_IMPLEMENTATION
|
#define STB_IMAGE_IMPLEMENTATION
|
||||||
#include "stb_image.h"
|
#include "stb_image.h"
|
||||||
@@ -1235,30 +1235,30 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
@@ -1235,35 +1235,15 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -84,30 +84,19 @@ index b3c1829f..718052e1 100644
|
|||||||
-// new_clip->backend = ggml_backend_sycl_init(0);
|
-// new_clip->backend = ggml_backend_sycl_init(0);
|
||||||
-// LOG_INF("%s: CLIP using SYCL backend\n", __func__);
|
-// LOG_INF("%s: CLIP using SYCL backend\n", __func__);
|
||||||
-//#endif
|
-//#endif
|
||||||
+#ifdef GGML_USE_CUDA
|
-
|
||||||
+ new_clip->backend = ggml_backend_cuda_init(0);
|
- if (!new_clip->backend) {
|
||||||
+ LOG_INF("%s: CLIP using CUDA backend\n", __func__);
|
- new_clip->backend = ggml_backend_cpu_init();
|
||||||
+#endif
|
- LOG_INF("%s: CLIP using CPU backend\n", __func__);
|
||||||
+
|
+ ggml_backend_t backend = ggml_backend_init_best();
|
||||||
+#ifdef GGML_USE_METAL
|
+ if (backend == nullptr) {
|
||||||
+ new_clip->backend = ggml_backend_metal_init();
|
+ LOG_ERR("%s: failed to initialize backend\n", __func__);
|
||||||
+ LOG_INF("%s: CLIP using Metal backend\n", __func__);
|
+ clip_free(new_clip);
|
||||||
+#endif
|
+ gguf_free(ctx);
|
||||||
+
|
+ return nullptr;
|
||||||
+#ifdef GGML_USE_CANN
|
}
|
||||||
+ new_clip->backend = ggml_backend_cann_init(0);
|
+ LOG_INF("%s: using %s backend\n", __func__, ggml_backend_name(backend));
|
||||||
+ LOG_INF("%s: CLIP using CANN backend\n", __func__);
|
+ new_clip->backend = backend;
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+#ifdef GGML_USE_VULKAN
|
|
||||||
+ new_clip->backend = ggml_backend_vk_init(0);
|
|
||||||
+ LOG_INF("%s: CLIP using Vulkan backend\n", __func__);
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+#ifdef GGML_USE_SYCL
|
|
||||||
+ new_clip->backend = ggml_backend_sycl_init(0);
|
|
||||||
+ LOG_INF("%s: CLIP using SYCL backend\n", __func__);
|
|
||||||
+#endif
|
|
||||||
|
|
||||||
if (!new_clip->backend) {
|
// model size and capabilities
|
||||||
new_clip->backend = ggml_backend_cpu_init();
|
{
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Michael Yang <mxyng@pm.me>
|
||||||
|
Date: Tue, 14 Jan 2025 15:59:04 -0800
|
||||||
|
Subject: [PATCH] add phony target ggml-cpu for all cpu variants
|
||||||
|
|
||||||
|
---
|
||||||
|
ggml/src/CMakeLists.txt | 2 ++
|
||||||
|
1 file changed, 2 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
|
||||||
|
index 84101c32..72b488dd 100644
|
||||||
|
--- a/ggml/src/CMakeLists.txt
|
||||||
|
+++ b/ggml/src/CMakeLists.txt
|
||||||
|
@@ -278,6 +278,7 @@ function(ggml_add_cpu_backend_variant tag_name)
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
ggml_add_cpu_backend_variant_impl(${tag_name})
|
||||||
|
+ add_dependencies(ggml-cpu ggml-cpu-${tag_name})
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
ggml_add_backend(CPU)
|
||||||
|
@@ -286,6 +287,7 @@ if (GGML_CPU_ALL_VARIANTS)
|
||||||
|
if (NOT GGML_BACKEND_DL)
|
||||||
|
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
|
||||||
|
endif()
|
||||||
|
+ add_custom_target(ggml-cpu)
|
||||||
|
ggml_add_cpu_backend_variant(sandybridge AVX)
|
||||||
|
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
|
||||||
|
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
||||||
@@ -443,7 +443,6 @@ func (s *Server) processBatch(tokenBatch *llama.Batch, embedBatch *llama.Batch)
|
|||||||
s.lc.Synchronize()
|
s.lc.Synchronize()
|
||||||
}
|
}
|
||||||
|
|
||||||
var totalSamplingTime time.Duration
|
|
||||||
for i, seq := range s.seqs {
|
for i, seq := range s.seqs {
|
||||||
if seq == nil {
|
if seq == nil {
|
||||||
continue
|
continue
|
||||||
@@ -478,12 +477,8 @@ func (s *Server) processBatch(tokenBatch *llama.Batch, embedBatch *llama.Batch)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// sample a token
|
// sample a token
|
||||||
samplingStart := time.Now()
|
|
||||||
token := seq.samplingCtx.Sample(s.lc, seq.iBatch)
|
token := seq.samplingCtx.Sample(s.lc, seq.iBatch)
|
||||||
seq.samplingCtx.Accept(token, true)
|
seq.samplingCtx.Accept(token, true)
|
||||||
samplingTime := time.Since(samplingStart)
|
|
||||||
totalSamplingTime += samplingTime
|
|
||||||
slog.Info("sampling time", "time", samplingTime)
|
|
||||||
piece := s.model.TokenToPiece(token)
|
piece := s.model.TokenToPiece(token)
|
||||||
|
|
||||||
seq.numPredicted++
|
seq.numPredicted++
|
||||||
@@ -640,7 +635,6 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
|
|||||||
samplingParams.Seed = uint32(req.Seed)
|
samplingParams.Seed = uint32(req.Seed)
|
||||||
samplingParams.Grammar = req.Grammar
|
samplingParams.Grammar = req.Grammar
|
||||||
|
|
||||||
start := time.Now()
|
|
||||||
seq, err := s.NewSequence(req.Prompt, req.Images, NewSequenceParams{
|
seq, err := s.NewSequence(req.Prompt, req.Images, NewSequenceParams{
|
||||||
numPredict: req.NumPredict,
|
numPredict: req.NumPredict,
|
||||||
stop: req.Stop,
|
stop: req.Stop,
|
||||||
@@ -648,7 +642,6 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
|
|||||||
samplingParams: &samplingParams,
|
samplingParams: &samplingParams,
|
||||||
embedding: false,
|
embedding: false,
|
||||||
})
|
})
|
||||||
slog.Info("new sequence created", "duration", time.Since(start))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, fmt.Sprintf("Failed to create new sequence: %v", err), http.StatusInternalServerError)
|
http.Error(w, fmt.Sprintf("Failed to create new sequence: %v", err), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
package ggml
|
package llm
|
||||||
|
|
||||||
import "fmt"
|
import "fmt"
|
||||||
|
|
||||||
@@ -32,9 +32,10 @@ const (
|
|||||||
fileTypeIQ1_S
|
fileTypeIQ1_S
|
||||||
fileTypeIQ4_NL
|
fileTypeIQ4_NL
|
||||||
fileTypeIQ3_S
|
fileTypeIQ3_S
|
||||||
|
fileTypeIQ3_M
|
||||||
fileTypeIQ2_S
|
fileTypeIQ2_S
|
||||||
fileTypeIQ4_XS
|
|
||||||
fileTypeIQ2_M
|
fileTypeIQ2_M
|
||||||
|
fileTypeIQ4_XS
|
||||||
fileTypeIQ1_M
|
fileTypeIQ1_M
|
||||||
fileTypeBF16
|
fileTypeBF16
|
||||||
|
|
||||||
@@ -93,6 +94,8 @@ func ParseFileType(s string) (fileType, error) {
|
|||||||
return fileTypeIQ4_NL, nil
|
return fileTypeIQ4_NL, nil
|
||||||
case "IQ3_S":
|
case "IQ3_S":
|
||||||
return fileTypeIQ3_S, nil
|
return fileTypeIQ3_S, nil
|
||||||
|
case "IQ3_M":
|
||||||
|
return fileTypeIQ3_M, nil
|
||||||
case "IQ2_S":
|
case "IQ2_S":
|
||||||
return fileTypeIQ2_S, nil
|
return fileTypeIQ2_S, nil
|
||||||
case "IQ4_XS":
|
case "IQ4_XS":
|
||||||
@@ -160,6 +163,8 @@ func (t fileType) String() string {
|
|||||||
return "IQ4_NL"
|
return "IQ4_NL"
|
||||||
case fileTypeIQ3_S:
|
case fileTypeIQ3_S:
|
||||||
return "IQ3_S"
|
return "IQ3_S"
|
||||||
|
case fileTypeIQ3_M:
|
||||||
|
return "IQ3_M"
|
||||||
case fileTypeIQ2_S:
|
case fileTypeIQ2_S:
|
||||||
return "IQ2_S"
|
return "IQ2_S"
|
||||||
case fileTypeIQ4_XS:
|
case fileTypeIQ4_XS:
|
||||||
149
llm/ggla.go
Normal file
149
llm/ggla.go
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
package llm
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"slices"
|
||||||
|
)
|
||||||
|
|
||||||
|
type containerGGLA struct {
|
||||||
|
version uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *containerGGLA) Name() string {
|
||||||
|
return "ggla"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {
|
||||||
|
if err := binary.Read(rs, binary.LittleEndian, &c.version); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch c.version {
|
||||||
|
case 1:
|
||||||
|
default:
|
||||||
|
return nil, errors.New("invalid version")
|
||||||
|
}
|
||||||
|
|
||||||
|
model := newGGLA(c)
|
||||||
|
err := model.decode(rs)
|
||||||
|
return model, err
|
||||||
|
}
|
||||||
|
|
||||||
|
type ggla struct {
|
||||||
|
*containerGGLA
|
||||||
|
|
||||||
|
kv KV
|
||||||
|
tensors []*Tensor
|
||||||
|
|
||||||
|
tensorOffset uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
func newGGLA(container *containerGGLA) *ggla {
|
||||||
|
return &ggla{
|
||||||
|
containerGGLA: container,
|
||||||
|
kv: make(KV),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *ggla) KV() KV {
|
||||||
|
return llm.kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *ggla) Tensors() *Tensors {
|
||||||
|
return &Tensors{
|
||||||
|
Items: llm.tensors,
|
||||||
|
Offset: llm.tensorOffset,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {
|
||||||
|
var r uint32
|
||||||
|
if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
llm.kv["r"] = r
|
||||||
|
|
||||||
|
var alpha uint32
|
||||||
|
if err := binary.Read(rs, binary.LittleEndian, &alpha); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
llm.kv["alpha"] = alpha
|
||||||
|
|
||||||
|
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
llm.tensorOffset = uint64(offset)
|
||||||
|
|
||||||
|
for {
|
||||||
|
var dims uint32
|
||||||
|
if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
if errors.Is(retErr, io.EOF) {
|
||||||
|
retErr = io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
var namesize uint32
|
||||||
|
if err := binary.Read(rs, binary.LittleEndian, &namesize); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var t Tensor
|
||||||
|
if err := binary.Read(rs, binary.LittleEndian, &t.Kind); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Shape = make([]uint64, dims)
|
||||||
|
for i := 0; uint32(i) < dims; i++ {
|
||||||
|
var shape32 uint32
|
||||||
|
if err := binary.Read(rs, binary.LittleEndian, &shape32); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Shape[i] = uint64(shape32)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ggla tensor shape is reversed
|
||||||
|
// ref: https://github.com/ggerganov/llama.cpp/blob/29ae62d2ae163e2b68aa0ad3bf2ab4636de0c957/convert-lora-to-ggml.py#L44
|
||||||
|
slices.Reverse(t.Shape)
|
||||||
|
|
||||||
|
name := make([]byte, namesize)
|
||||||
|
if err := binary.Read(rs, binary.LittleEndian, &name); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Name = string(name)
|
||||||
|
|
||||||
|
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := rs.Seek((offset+31)&-32-offset, io.SeekCurrent); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
offset, err = rs.Seek(0, io.SeekCurrent)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Offset = uint64(offset)
|
||||||
|
|
||||||
|
if _, err := rs.Seek(int64(t.Size()), io.SeekCurrent); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
llm.tensors = append(llm.tensors, &t)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,15 +1,15 @@
|
|||||||
package ggml
|
package llm
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
|
||||||
"slices"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs/util/bufioutil"
|
"github.com/ollama/ollama/util/bufioutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
type GGML struct {
|
type GGML struct {
|
||||||
@@ -19,168 +19,145 @@ type GGML struct {
|
|||||||
|
|
||||||
type model interface {
|
type model interface {
|
||||||
KV() KV
|
KV() KV
|
||||||
Tensors() Tensors
|
Tensors() *Tensors
|
||||||
}
|
}
|
||||||
|
|
||||||
type KV map[string]any
|
type KV map[string]any
|
||||||
|
|
||||||
|
func (kv KV) u64(key string) uint64 {
|
||||||
|
switch v := kv[key].(type) {
|
||||||
|
case uint64:
|
||||||
|
return v
|
||||||
|
case uint32:
|
||||||
|
return uint64(v)
|
||||||
|
case float64:
|
||||||
|
return uint64(v)
|
||||||
|
default:
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (kv KV) Architecture() string {
|
func (kv KV) Architecture() string {
|
||||||
return kv.String("general.architecture", "unknown")
|
if s, ok := kv["general.architecture"].(string); ok {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
return "unknown"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kv KV) Kind() string {
|
func (kv KV) Kind() string {
|
||||||
return kv.String("general.type", "unknown")
|
if s, ok := kv["general.type"].(string); ok {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
return "unknown"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kv KV) ParameterCount() uint64 {
|
func (kv KV) ParameterCount() uint64 {
|
||||||
return keyValue[uint64](kv, "general.parameter_count")
|
return kv.u64("general.parameter_count")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kv KV) FileType() fileType {
|
func (kv KV) FileType() fileType {
|
||||||
if t := kv.Uint("general.file_type"); t > 0 {
|
if u64 := kv.u64("general.file_type"); u64 > 0 {
|
||||||
return fileType(t)
|
return fileType(uint32(u64))
|
||||||
}
|
}
|
||||||
|
|
||||||
return fileTypeUnknown
|
return fileTypeUnknown
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kv KV) BlockCount() uint64 {
|
func (kv KV) BlockCount() uint64 {
|
||||||
return uint64(kv.Uint("block_count"))
|
return kv.u64(fmt.Sprintf("%s.block_count", kv.Architecture()))
|
||||||
}
|
|
||||||
|
|
||||||
func (kv KV) EmbeddingLength() uint64 {
|
|
||||||
return uint64(kv.Uint("embedding_length"))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kv KV) HeadCount() uint64 {
|
func (kv KV) HeadCount() uint64 {
|
||||||
return uint64(kv.Uint("attention.head_count"))
|
return kv.u64(fmt.Sprintf("%s.attention.head_count", kv.Architecture()))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kv KV) HeadCountKV() uint64 {
|
func (kv KV) HeadCountKV() uint64 {
|
||||||
return uint64(kv.Uint("attention.head_count_kv", 1))
|
if headCountKV := kv.u64(fmt.Sprintf("%s.attention.head_count_kv", kv.Architecture())); headCountKV > 0 {
|
||||||
|
return headCountKV
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kv KV) EmbeddingHeadCount() uint64 {
|
func (kv KV) EmbeddingHeadCount() uint64 {
|
||||||
if heads := kv.HeadCount(); heads > 0 {
|
if heads := kv.HeadCount(); heads > 0 {
|
||||||
return kv.EmbeddingLength() / heads
|
return kv.EmbeddingLength() / kv.HeadCount()
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kv KV) EmbeddingHeadCountK() uint64 {
|
func (kv KV) EmbeddingHeadCountK() uint64 {
|
||||||
return uint64(kv.Uint("attention.key_length", uint32(kv.EmbeddingHeadCount())))
|
if k := kv.u64(fmt.Sprintf("%s.attention.key_length", kv.Architecture())); k > 0 {
|
||||||
|
return k
|
||||||
|
}
|
||||||
|
|
||||||
|
return kv.EmbeddingHeadCount()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kv KV) EmbeddingHeadCountV() uint64 {
|
func (kv KV) EmbeddingHeadCountV() uint64 {
|
||||||
return uint64(kv.Uint("attention.value_length", uint32(kv.EmbeddingHeadCount())))
|
if v := kv.u64(fmt.Sprintf("%s.attention.value_length", kv.Architecture())); v > 0 {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
return kv.EmbeddingHeadCount()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kv KV) GQA() uint64 {
|
func (kv KV) GQA() uint64 {
|
||||||
return kv.HeadCount() / kv.HeadCountKV()
|
return kv.HeadCount() / kv.HeadCountKV()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (kv KV) EmbeddingLength() uint64 {
|
||||||
|
return kv.u64(fmt.Sprintf("%s.embedding_length", kv.Architecture()))
|
||||||
|
}
|
||||||
|
|
||||||
func (kv KV) ContextLength() uint64 {
|
func (kv KV) ContextLength() uint64 {
|
||||||
return uint64(kv.Uint("context_length"))
|
return kv.u64(fmt.Sprintf("%s.context_length", kv.Architecture()))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kv KV) ChatTemplate() string {
|
func (kv KV) ChatTemplate() string {
|
||||||
return kv.String("tokenizer.chat_template")
|
s, _ := kv["tokenizer.chat_template"].(string)
|
||||||
}
|
|
||||||
|
|
||||||
func (kv KV) String(key string, defaultValue ...string) string {
|
|
||||||
return keyValue(kv, key, append(defaultValue, "")...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (kv KV) Uint(key string, defaultValue ...uint32) uint32 {
|
|
||||||
return keyValue(kv, key, append(defaultValue, 0)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (kv KV) Float(key string, defaultValue ...float32) float32 {
|
|
||||||
return keyValue(kv, key, append(defaultValue, 0)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (kv KV) Strings(key string, defaultValue ...[]string) []string {
|
|
||||||
r := keyValue(kv, key, &array{})
|
|
||||||
s := make([]string, r.size)
|
|
||||||
for i := range r.size {
|
|
||||||
s[i] = r.values[i].(string)
|
|
||||||
}
|
|
||||||
|
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kv KV) Uints(key string, defaultValue ...[]uint32) []uint32 {
|
|
||||||
r := keyValue(kv, key, &array{})
|
|
||||||
s := make([]uint32, r.size)
|
|
||||||
for i := range r.size {
|
|
||||||
s[i] = uint32(r.values[i].(int32))
|
|
||||||
}
|
|
||||||
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
|
|
||||||
func keyValue[T string | uint32 | uint64 | float32 | *array](kv KV, key string, defaultValue ...T) T {
|
|
||||||
if !strings.HasPrefix(key, "tokenizer.") && !strings.HasPrefix(key, "general.") {
|
|
||||||
key = kv.Architecture() + "." + key
|
|
||||||
}
|
|
||||||
|
|
||||||
if val, ok := kv[key]; ok {
|
|
||||||
return val.(T)
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.Warn("key not found", "key", key, "default", defaultValue[0])
|
|
||||||
return defaultValue[0]
|
|
||||||
}
|
|
||||||
|
|
||||||
type Tensors struct {
|
type Tensors struct {
|
||||||
items []*Tensor
|
Items []*Tensor
|
||||||
Offset uint64
|
Offset uint64
|
||||||
|
|
||||||
|
layers map[string]Layer
|
||||||
|
layersOnce sync.Once
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s Tensors) Items(prefix ...string) []*Tensor {
|
func (ts *Tensors) Layers() map[string]Layer {
|
||||||
if len(prefix) == 0 {
|
ts.layersOnce.Do(func() {
|
||||||
return s.items
|
ts.layers = make(map[string]Layer)
|
||||||
}
|
for _, t := range ts.Items {
|
||||||
|
parts := strings.Split(t.Name, ".")
|
||||||
|
if index := slices.IndexFunc(parts, func(s string) bool { return s == "blk" || s == "mm" }); index != -1 {
|
||||||
|
if len(parts) > index+2 {
|
||||||
|
// blk and mm should have a number after them, join it
|
||||||
|
parts = append(
|
||||||
|
[]string{strings.Join(parts[:index+2], ".")},
|
||||||
|
parts[index+2:]...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var items []*Tensor
|
if _, ok := ts.layers[parts[0]]; !ok {
|
||||||
for _, t := range s.items {
|
ts.layers[parts[0]] = make(Layer)
|
||||||
if strings.HasPrefix(t.Name, prefix[0]) {
|
}
|
||||||
items = append(items, t)
|
|
||||||
|
ts.layers[parts[0]][strings.Join(parts[1:], ".")] = t
|
||||||
}
|
}
|
||||||
}
|
})
|
||||||
|
|
||||||
return items
|
return ts.layers
|
||||||
}
|
|
||||||
|
|
||||||
func (ts Tensors) Layers() map[string]Layer {
|
|
||||||
layers := make(map[string]Layer)
|
|
||||||
for _, t := range ts.items {
|
|
||||||
parts := strings.Split(t.Name, ".")
|
|
||||||
if i := slices.Index(parts, "blk"); i > 0 {
|
|
||||||
parts = append([]string{
|
|
||||||
strings.Join(parts[:i], "."),
|
|
||||||
strings.Join(parts[i:i+2], "."),
|
|
||||||
}, parts[i+2:]...)
|
|
||||||
} else if i == 0 {
|
|
||||||
parts = append([]string{
|
|
||||||
strings.Join(parts[i:i+2], "."),
|
|
||||||
}, parts[i+2:]...)
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, ok := layers[parts[0]]; !ok {
|
|
||||||
layers[parts[0]] = make(Layer)
|
|
||||||
}
|
|
||||||
|
|
||||||
layers[parts[0]][strings.Join(parts[1:], ".")] = t
|
|
||||||
}
|
|
||||||
|
|
||||||
return layers
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Layer map[string]*Tensor
|
type Layer map[string]*Tensor
|
||||||
|
|
||||||
func (l Layer) Size() (size uint64) {
|
func (l Layer) size() (size uint64) {
|
||||||
for _, t := range l {
|
for _, t := range l {
|
||||||
size += t.Size()
|
size += t.Size()
|
||||||
}
|
}
|
||||||
@@ -278,6 +255,8 @@ func (t Tensor) typeSize() uint64 {
|
|||||||
return 8
|
return 8
|
||||||
case 29: // IQ1_M
|
case 29: // IQ1_M
|
||||||
return blockSize/8 + blockSize/16 + blockSize/32
|
return blockSize/8 + blockSize/16 + blockSize/32
|
||||||
|
case 30: // BF16
|
||||||
|
return 2
|
||||||
default:
|
default:
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
@@ -316,7 +295,7 @@ const (
|
|||||||
|
|
||||||
var ErrUnsupportedFormat = errors.New("unsupported model format")
|
var ErrUnsupportedFormat = errors.New("unsupported model format")
|
||||||
|
|
||||||
func DetectContentType(b []byte) string {
|
func DetectGGMLType(b []byte) string {
|
||||||
switch binary.LittleEndian.Uint32(b[:4]) {
|
switch binary.LittleEndian.Uint32(b[:4]) {
|
||||||
case FILE_MAGIC_GGML:
|
case FILE_MAGIC_GGML:
|
||||||
return "ggml"
|
return "ggml"
|
||||||
@@ -333,12 +312,12 @@ func DetectContentType(b []byte) string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decode decodes a GGML model from the given reader.
|
// DecodeGGML decodes a GGML model from the given reader.
|
||||||
//
|
//
|
||||||
// It collects array values for arrays with a size less than or equal to
|
// It collects array values for arrays with a size less than or equal to
|
||||||
// maxArraySize. If maxArraySize is 0, the default value of 1024 is used. If
|
// maxArraySize. If maxArraySize is 0, the default value of 1024 is used. If
|
||||||
// the maxArraySize is negative, all arrays are collected.
|
// the maxArraySize is negative, all arrays are collected.
|
||||||
func Decode(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
|
func DecodeGGML(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
|
||||||
if maxArraySize == 0 {
|
if maxArraySize == 0 {
|
||||||
maxArraySize = 1024
|
maxArraySize = 1024
|
||||||
}
|
}
|
||||||
@@ -352,6 +331,10 @@ func Decode(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
|
|||||||
|
|
||||||
var c container
|
var c container
|
||||||
switch magic {
|
switch magic {
|
||||||
|
case FILE_MAGIC_GGML, FILE_MAGIC_GGMF, FILE_MAGIC_GGJT:
|
||||||
|
return nil, 0, ErrUnsupportedFormat
|
||||||
|
case FILE_MAGIC_GGLA:
|
||||||
|
c = &containerGGLA{}
|
||||||
case FILE_MAGIC_GGUF_LE:
|
case FILE_MAGIC_GGUF_LE:
|
||||||
c = &containerGGUF{ByteOrder: binary.LittleEndian, maxArraySize: maxArraySize}
|
c = &containerGGUF{ByteOrder: binary.LittleEndian, maxArraySize: maxArraySize}
|
||||||
case FILE_MAGIC_GGUF_BE:
|
case FILE_MAGIC_GGUF_BE:
|
||||||
@@ -547,20 +530,21 @@ func (llm GGML) GraphSize(context, batch uint64, kvCacheType string) (kv, partia
|
|||||||
}
|
}
|
||||||
|
|
||||||
// SupportsKVCacheType checks if the requested cache type is supported
|
// SupportsKVCacheType checks if the requested cache type is supported
|
||||||
func (llm GGML) SupportsKVCacheType(cacheType string) bool {
|
func (ggml GGML) SupportsKVCacheType(cacheType string) bool {
|
||||||
return slices.Contains([]string{"f16", "q8_0", "q4_0"}, cacheType)
|
validKVCacheTypes := []string{"f16", "q8_0", "q4_0"}
|
||||||
|
return slices.Contains(validKVCacheTypes, cacheType)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SupportsFlashAttention checks if the model supports flash attention
|
// SupportsFlashAttention checks if the model supports flash attention
|
||||||
func (llm GGML) SupportsFlashAttention() bool {
|
func (ggml GGML) SupportsFlashAttention() bool {
|
||||||
_, isEmbedding := llm.KV()[fmt.Sprintf("%s.pooling_type", llm.KV().Architecture())]
|
_, isEmbedding := ggml.KV()[fmt.Sprintf("%s.pooling_type", ggml.KV().Architecture())]
|
||||||
if isEmbedding {
|
if isEmbedding {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check head counts match and are non-zero
|
// Check head counts match and are non-zero
|
||||||
headCountK := llm.KV().EmbeddingHeadCountK()
|
headCountK := ggml.KV().EmbeddingHeadCountK()
|
||||||
headCountV := llm.KV().EmbeddingHeadCountV()
|
headCountV := ggml.KV().EmbeddingHeadCountV()
|
||||||
return headCountK != 0 && headCountV != 0 && headCountK == headCountV
|
return headCountK != 0 && headCountV != 0 && headCountK == headCountV
|
||||||
}
|
}
|
||||||
|
|
||||||
1
llm/ggml_test.go
Normal file
1
llm/ggml_test.go
Normal file
@@ -0,0 +1 @@
|
|||||||
|
package llm
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package ggml
|
package llm
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
@@ -8,9 +8,10 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"maps"
|
|
||||||
"slices"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/exp/maps"
|
||||||
)
|
)
|
||||||
|
|
||||||
type containerGGUF struct {
|
type containerGGUF struct {
|
||||||
@@ -109,9 +110,9 @@ func (llm *gguf) KV() KV {
|
|||||||
return llm.kv
|
return llm.kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *gguf) Tensors() Tensors {
|
func (llm *gguf) Tensors() *Tensors {
|
||||||
return Tensors{
|
return &Tensors{
|
||||||
items: llm.tensors,
|
Items: llm.tensors,
|
||||||
Offset: llm.tensorOffset,
|
Offset: llm.tensorOffset,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -522,7 +523,7 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
keys := slices.Collect(maps.Keys(kv))
|
keys := maps.Keys(kv)
|
||||||
slices.Sort(keys)
|
slices.Sort(keys)
|
||||||
|
|
||||||
for _, key := range keys {
|
for _, key := range keys {
|
||||||
@@ -11,19 +11,18 @@ import (
|
|||||||
"github.com/ollama/ollama/discover"
|
"github.com/ollama/ollama/discover"
|
||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// This algorithm looks for a complete fit to determine if we need to unload other models
|
// This algorithm looks for a complete fit to determine if we need to unload other models
|
||||||
func PredictServerFit(allGpus discover.GpuInfoList, f *ggml.GGML, adapters, projectors []string, opts api.Options) (bool, uint64) {
|
func PredictServerFit(allGpus discover.GpuInfoList, ggml *GGML, adapters, projectors []string, opts api.Options) (bool, uint64) {
|
||||||
// Split up the GPUs by type and try them
|
// Split up the GPUs by type and try them
|
||||||
var estimatedVRAM uint64
|
var estimatedVRAM uint64
|
||||||
for _, gpus := range allGpus.ByLibrary() {
|
for _, gpus := range allGpus.ByLibrary() {
|
||||||
var layerCount int
|
var layerCount int
|
||||||
estimate := EstimateGPULayers(gpus, f, projectors, opts)
|
estimate := EstimateGPULayers(gpus, ggml, projectors, opts)
|
||||||
layerCount, estimatedVRAM = estimate.Layers, estimate.VRAMSize
|
layerCount, estimatedVRAM = estimate.Layers, estimate.VRAMSize
|
||||||
if opts.NumGPU < 0 {
|
if opts.NumGPU < 0 {
|
||||||
if layerCount > 0 && layerCount >= int(f.KV().BlockCount()+1) {
|
if layerCount > 0 && layerCount >= int(ggml.KV().BlockCount()+1) {
|
||||||
return true, estimatedVRAM
|
return true, estimatedVRAM
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -71,7 +70,7 @@ type MemoryEstimate struct {
|
|||||||
|
|
||||||
// Given a model and one or more GPU targets, predict how many layers and bytes we can load, and the total size
|
// Given a model and one or more GPU targets, predict how many layers and bytes we can load, and the total size
|
||||||
// The GPUs provided must all be the same Library
|
// The GPUs provided must all be the same Library
|
||||||
func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []string, opts api.Options) MemoryEstimate {
|
func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string, opts api.Options) MemoryEstimate {
|
||||||
// Graph size for a partial offload, applies to all GPUs
|
// Graph size for a partial offload, applies to all GPUs
|
||||||
var graphPartialOffload uint64
|
var graphPartialOffload uint64
|
||||||
|
|
||||||
@@ -116,31 +115,33 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
|
|||||||
opts.NumCtx = max(opts.NumCtx, 2048)
|
opts.NumCtx = max(opts.NumCtx, 2048)
|
||||||
}
|
}
|
||||||
|
|
||||||
layers := f.Tensors().Layers()
|
layers := ggml.Tensors().Layers()
|
||||||
// add one layer worth of memory as a buffer
|
// add one layer worth of memory as a buffer
|
||||||
if blk0, ok := layers["blk.0"]; ok {
|
if blk0, ok := layers["blk.0"]; ok {
|
||||||
layerSize = blk0.Size()
|
layerSize = blk0.size()
|
||||||
} else {
|
} else {
|
||||||
slog.Warn("model missing blk.0 layer size")
|
slog.Warn("model missing blk.0 layer size")
|
||||||
}
|
}
|
||||||
|
|
||||||
var kvct string
|
fa := envconfig.FlashAttention() &&
|
||||||
if envconfig.FlashAttention() &&
|
|
||||||
discover.GetGPUInfo().FlashAttentionSupported() &&
|
discover.GetGPUInfo().FlashAttentionSupported() &&
|
||||||
f.SupportsFlashAttention() {
|
ggml.SupportsFlashAttention()
|
||||||
|
|
||||||
|
var kvct string
|
||||||
|
if fa {
|
||||||
requested := strings.ToLower(envconfig.KvCacheType())
|
requested := strings.ToLower(envconfig.KvCacheType())
|
||||||
if requested != "" && f.SupportsKVCacheType(requested) {
|
if requested != "" && ggml.SupportsKVCacheType(requested) {
|
||||||
kvct = requested
|
kvct = requested
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
kv, graphPartialOffload, graphFullOffload := f.GraphSize(uint64(opts.NumCtx), uint64(min(opts.NumCtx, opts.NumBatch)), kvct)
|
kv, graphPartialOffload, graphFullOffload := ggml.GraphSize(uint64(opts.NumCtx), uint64(min(opts.NumCtx, opts.NumBatch)), kvct)
|
||||||
|
|
||||||
// KV is proportional to the number of layers
|
// KV is proportional to the number of layers
|
||||||
layerSize += kv / f.KV().BlockCount()
|
layerSize += kv / ggml.KV().BlockCount()
|
||||||
|
|
||||||
if graphPartialOffload == 0 {
|
if graphPartialOffload == 0 {
|
||||||
graphPartialOffload = f.KV().GQA() * kv / 6
|
graphPartialOffload = ggml.KV().GQA() * kv / 6
|
||||||
}
|
}
|
||||||
if graphFullOffload == 0 {
|
if graphFullOffload == 0 {
|
||||||
graphFullOffload = graphPartialOffload
|
graphFullOffload = graphPartialOffload
|
||||||
@@ -155,12 +156,12 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
|
|||||||
}
|
}
|
||||||
|
|
||||||
if layer, ok := layers["output_norm"]; ok {
|
if layer, ok := layers["output_norm"]; ok {
|
||||||
memoryLayerOutput += layer.Size()
|
memoryLayerOutput += layer.size()
|
||||||
}
|
}
|
||||||
if layer, ok := layers["output"]; ok {
|
if layer, ok := layers["output"]; ok {
|
||||||
memoryLayerOutput += layer.Size()
|
memoryLayerOutput += layer.size()
|
||||||
} else if layer, ok := layers["token_embd"]; ok {
|
} else if layer, ok := layers["token_embd"]; ok {
|
||||||
memoryLayerOutput += layer.Size()
|
memoryLayerOutput += layer.size()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Output layer handled at the end if we have space
|
// Output layer handled at the end if we have space
|
||||||
@@ -210,11 +211,11 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
|
|||||||
}
|
}
|
||||||
|
|
||||||
// For all the layers, find where they can fit on the GPU(s)
|
// For all the layers, find where they can fit on the GPU(s)
|
||||||
for i := range int(f.KV().BlockCount()) {
|
for i := range int(ggml.KV().BlockCount()) {
|
||||||
// Some models have inconsistent layer sizes
|
// Some models have inconsistent layer sizes
|
||||||
if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok {
|
if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok {
|
||||||
layerSize = blk.Size()
|
layerSize = blk.size()
|
||||||
layerSize += kv / f.KV().BlockCount()
|
layerSize += kv / ggml.KV().BlockCount()
|
||||||
}
|
}
|
||||||
memoryWeights += layerSize
|
memoryWeights += layerSize
|
||||||
|
|
||||||
@@ -237,10 +238,10 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if layerCount >= int(f.KV().BlockCount()) {
|
if layerCount >= int(ggml.KV().BlockCount()) {
|
||||||
fullyLoaded = true
|
fullyLoaded = true
|
||||||
} else {
|
} else {
|
||||||
for i := layerCount; i < int(f.KV().BlockCount()); i++ {
|
for i := layerCount; i < int(ggml.KV().BlockCount()); i++ {
|
||||||
overflow += layerSize
|
overflow += layerSize
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -258,7 +259,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if layerCount < int(f.KV().BlockCount())+1 {
|
if layerCount < int(ggml.KV().BlockCount())+1 {
|
||||||
fullyLoaded = false
|
fullyLoaded = false
|
||||||
overflow += memoryLayerOutput
|
overflow += memoryLayerOutput
|
||||||
}
|
}
|
||||||
@@ -310,7 +311,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
|
|||||||
|
|
||||||
inferenceLibrary: gpus[0].Library,
|
inferenceLibrary: gpus[0].Library,
|
||||||
layersRequested: opts.NumGPU,
|
layersRequested: opts.NumGPU,
|
||||||
layersModel: int(f.KV().BlockCount()) + 1,
|
layersModel: int(ggml.KV().BlockCount()) + 1,
|
||||||
availableList: availableList,
|
availableList: availableList,
|
||||||
kv: kv,
|
kv: kv,
|
||||||
allocationsList: allocationsList,
|
allocationsList: allocationsList,
|
||||||
@@ -338,9 +339,22 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
|
|||||||
return estimate
|
return estimate
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m MemoryEstimate) LogValue() slog.Value {
|
func (m MemoryEstimate) log() {
|
||||||
attrs := []slog.Attr{
|
overhead := envconfig.GpuOverhead()
|
||||||
slog.String("library", m.inferenceLibrary),
|
|
||||||
|
log := slog.With()
|
||||||
|
if m.projectorWeights > 0 {
|
||||||
|
log = log.With(
|
||||||
|
slog.Group(
|
||||||
|
"projector",
|
||||||
|
"weights", format.HumanBytes2(m.projectorWeights),
|
||||||
|
"graph", format.HumanBytes2(m.projectorGraph),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info(
|
||||||
|
"offload to "+m.inferenceLibrary,
|
||||||
slog.Group(
|
slog.Group(
|
||||||
"layers",
|
"layers",
|
||||||
// requested number of layers to offload
|
// requested number of layers to offload
|
||||||
@@ -356,7 +370,7 @@ func (m MemoryEstimate) LogValue() slog.Value {
|
|||||||
"memory",
|
"memory",
|
||||||
// memory available by GPU for offloading
|
// memory available by GPU for offloading
|
||||||
"available", m.availableList,
|
"available", m.availableList,
|
||||||
"gpu_overhead", format.HumanBytes2(envconfig.GpuOverhead()),
|
"gpu_overhead", format.HumanBytes2(overhead),
|
||||||
slog.Group(
|
slog.Group(
|
||||||
"required",
|
"required",
|
||||||
// memory required for full offloading
|
// memory required for full offloading
|
||||||
@@ -385,17 +399,7 @@ func (m MemoryEstimate) LogValue() slog.Value {
|
|||||||
"partial", format.HumanBytes2(m.graphPartialOffload),
|
"partial", format.HumanBytes2(m.graphPartialOffload),
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
}
|
)
|
||||||
|
|
||||||
if m.projectorWeights > 0 {
|
|
||||||
attrs = append(attrs, slog.Group(
|
|
||||||
"projector",
|
|
||||||
"weights", format.HumanBytes2(m.projectorWeights),
|
|
||||||
"graph", format.HumanBytes2(m.projectorGraph),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
return slog.GroupValue(attrs...)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func projectorMemoryRequirements(filename string) (weights, graphSize uint64) {
|
func projectorMemoryRequirements(filename string) (weights, graphSize uint64) {
|
||||||
@@ -405,13 +409,13 @@ func projectorMemoryRequirements(filename string) (weights, graphSize uint64) {
|
|||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
ggml, _, err := ggml.Decode(file, 0)
|
ggml, _, err := DecodeGGML(file, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, 0
|
return 0, 0
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, layer := range ggml.Tensors().Layers() {
|
for _, layer := range ggml.Tensors().Layers() {
|
||||||
weights += layer.Size()
|
weights += layer.size()
|
||||||
}
|
}
|
||||||
|
|
||||||
switch arch := ggml.KV().Architecture(); arch {
|
switch arch := ggml.KV().Architecture(); arch {
|
||||||
|
|||||||
@@ -11,7 +11,6 @@ import (
|
|||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/discover"
|
"github.com/ollama/ollama/discover"
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestEstimateGPULayers(t *testing.T) {
|
func TestEstimateGPULayers(t *testing.T) {
|
||||||
@@ -24,7 +23,7 @@ func TestEstimateGPULayers(t *testing.T) {
|
|||||||
defer f.Close()
|
defer f.Close()
|
||||||
inputLayerCount := 5
|
inputLayerCount := 5
|
||||||
|
|
||||||
tensors := []ggml.Tensor{
|
tensors := []Tensor{
|
||||||
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
||||||
{Name: "blk.1.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
{Name: "blk.1.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
||||||
{Name: "blk.2.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
{Name: "blk.2.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
||||||
@@ -33,7 +32,7 @@ func TestEstimateGPULayers(t *testing.T) {
|
|||||||
{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
||||||
}
|
}
|
||||||
assert.Len(t, tensors, inputLayerCount+1)
|
assert.Len(t, tensors, inputLayerCount+1)
|
||||||
err = ggml.WriteGGUF(f, ggml.KV{
|
err = WriteGGUF(f, KV{
|
||||||
"general.architecture": "llama",
|
"general.architecture": "llama",
|
||||||
"llama.context_length": uint32(32),
|
"llama.context_length": uint32(32),
|
||||||
"llama.embedding_length": uint32(4096),
|
"llama.embedding_length": uint32(4096),
|
||||||
|
|||||||
345
llm/server.go
345
llm/server.go
@@ -28,8 +28,6 @@ import (
|
|||||||
"github.com/ollama/ollama/discover"
|
"github.com/ollama/ollama/discover"
|
||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
|
||||||
"github.com/ollama/ollama/grammar"
|
|
||||||
"github.com/ollama/ollama/llama"
|
"github.com/ollama/ollama/llama"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -73,7 +71,7 @@ type llmServer struct {
|
|||||||
// It collects array values for arrays with a size less than or equal to
|
// It collects array values for arrays with a size less than or equal to
|
||||||
// maxArraySize. If maxArraySize is 0, the default value of 1024 is used. If
|
// maxArraySize. If maxArraySize is 0, the default value of 1024 is used. If
|
||||||
// the maxArraySize is negative, all arrays are collected.
|
// the maxArraySize is negative, all arrays are collected.
|
||||||
func LoadModel(model string, maxArraySize int) (*ggml.GGML, error) {
|
func LoadModel(model string, maxArraySize int) (*GGML, error) {
|
||||||
if _, err := os.Stat(model); err != nil {
|
if _, err := os.Stat(model); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -84,17 +82,21 @@ func LoadModel(model string, maxArraySize int) (*ggml.GGML, error) {
|
|||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
|
|
||||||
ggml, _, err := ggml.Decode(f, maxArraySize)
|
ggml, _, err := DecodeGGML(f, maxArraySize)
|
||||||
return ggml, err
|
return ggml, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewLlamaServer will run a server for the given GPUs
|
// NewLlamaServer will run a server for the given GPUs
|
||||||
// The gpu list must be a single family.
|
// The gpu list must be a single family.
|
||||||
func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapters, projectors []string, opts api.Options, numParallel int) (LlamaServer, error) {
|
func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapters, projectors []string, opts api.Options, numParallel int) (LlamaServer, error) {
|
||||||
|
var systemTotalMemory uint64
|
||||||
|
var systemFreeMemory uint64
|
||||||
|
var systemSwapFreeMemory uint64
|
||||||
|
|
||||||
systemInfo := discover.GetSystemInfo()
|
systemInfo := discover.GetSystemInfo()
|
||||||
systemTotalMemory := systemInfo.System.TotalMemory
|
systemTotalMemory = systemInfo.System.TotalMemory
|
||||||
systemFreeMemory := systemInfo.System.FreeMemory
|
systemFreeMemory = systemInfo.System.FreeMemory
|
||||||
systemSwapFreeMemory := systemInfo.System.FreeSwap
|
systemSwapFreeMemory = systemInfo.System.FreeSwap
|
||||||
slog.Info("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "free_swap", format.HumanBytes2(systemSwapFreeMemory))
|
slog.Info("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "free_swap", format.HumanBytes2(systemSwapFreeMemory))
|
||||||
|
|
||||||
// If the user wants zero GPU layers, reset the gpu list to be CPU/system ram info
|
// If the user wants zero GPU layers, reset the gpu list to be CPU/system ram info
|
||||||
@@ -102,12 +104,8 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
|
|||||||
gpus = discover.GetCPUInfo()
|
gpus = discover.GetCPUInfo()
|
||||||
}
|
}
|
||||||
|
|
||||||
var estimate MemoryEstimate
|
estimate := EstimateGPULayers(gpus, ggml, projectors, opts)
|
||||||
if len(gpus) == 1 && gpus[0].Library == "cpu" {
|
if len(gpus) > 1 || gpus[0].Library != "cpu" {
|
||||||
estimate = EstimateGPULayers(gpus, f, projectors, opts)
|
|
||||||
} else {
|
|
||||||
estimate = EstimateGPULayers(gpus, f, projectors, opts)
|
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case gpus[0].Library == "metal" && estimate.VRAMSize > systemTotalMemory:
|
case gpus[0].Library == "metal" && estimate.VRAMSize > systemTotalMemory:
|
||||||
// disable partial offloading when model is greater than total system memory as this
|
// disable partial offloading when model is greater than total system memory as this
|
||||||
@@ -132,7 +130,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Info("offload", "", estimate)
|
estimate.log()
|
||||||
|
|
||||||
params := []string{
|
params := []string{
|
||||||
"--model", model,
|
"--model", model,
|
||||||
@@ -176,7 +174,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
|
|||||||
fa = false
|
fa = false
|
||||||
}
|
}
|
||||||
|
|
||||||
if fa && !f.SupportsFlashAttention() {
|
if fa && !ggml.SupportsFlashAttention() {
|
||||||
slog.Warn("flash attention enabled but not supported by model")
|
slog.Warn("flash attention enabled but not supported by model")
|
||||||
fa = false
|
fa = false
|
||||||
}
|
}
|
||||||
@@ -189,7 +187,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
|
|||||||
|
|
||||||
// Flash Attention also supports kv cache quantization
|
// Flash Attention also supports kv cache quantization
|
||||||
// Enable if the requested and kv cache type is supported by the model
|
// Enable if the requested and kv cache type is supported by the model
|
||||||
if kvct != "" && f.SupportsKVCacheType(kvct) {
|
if kvct != "" && ggml.SupportsKVCacheType(kvct) {
|
||||||
params = append(params, "--kv-cache-type", kvct)
|
params = append(params, "--kv-cache-type", kvct)
|
||||||
} else {
|
} else {
|
||||||
slog.Warn("kv cache type not supported by model", "type", kvct)
|
slog.Warn("kv cache type not supported by model", "type", kvct)
|
||||||
@@ -202,7 +200,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
|
|||||||
for _, g := range gpus {
|
for _, g := range gpus {
|
||||||
if g.Library == "metal" &&
|
if g.Library == "metal" &&
|
||||||
uint64(opts.NumGPU) > 0 &&
|
uint64(opts.NumGPU) > 0 &&
|
||||||
uint64(opts.NumGPU) < f.KV().BlockCount()+1 {
|
uint64(opts.NumGPU) < ggml.KV().BlockCount()+1 {
|
||||||
opts.UseMMap = new(bool)
|
opts.UseMMap = new(bool)
|
||||||
*opts.UseMMap = false
|
*opts.UseMMap = false
|
||||||
}
|
}
|
||||||
@@ -234,149 +232,205 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
|
|||||||
params = append(params, "--multiuser-cache")
|
params = append(params, "--multiuser-cache")
|
||||||
}
|
}
|
||||||
|
|
||||||
exe, err := os.Executable()
|
libs := make(map[string]string)
|
||||||
if err != nil {
|
if entries, err := os.ReadDir(discover.LibOllamaPath); err == nil {
|
||||||
return nil, err
|
for _, entry := range entries {
|
||||||
}
|
libs[entry.Name()] = filepath.Join(discover.LibOllamaPath, entry.Name())
|
||||||
|
|
||||||
// Find an availableServers port, retry on each iteration in case the failure was a port conflict race
|
|
||||||
port := 0
|
|
||||||
if a, err := net.ResolveTCPAddr("tcp", "localhost:0"); err == nil {
|
|
||||||
var l *net.TCPListener
|
|
||||||
if l, err = net.ListenTCP("tcp", a); err == nil {
|
|
||||||
port = l.Addr().(*net.TCPAddr).Port
|
|
||||||
l.Close()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if port == 0 {
|
|
||||||
slog.Debug("ResolveTCPAddr failed ", "error", err)
|
|
||||||
port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
|
|
||||||
}
|
|
||||||
finalParams := []string{"runner"}
|
|
||||||
finalParams = append(finalParams, params...)
|
|
||||||
finalParams = append(finalParams, "--port", strconv.Itoa(port))
|
|
||||||
|
|
||||||
pathEnv := "LD_LIBRARY_PATH"
|
lib := gpus[0].RunnerName()
|
||||||
if runtime.GOOS == "windows" {
|
requested := envconfig.LLMLibrary()
|
||||||
pathEnv = "PATH"
|
if libs[requested] != "" {
|
||||||
}
|
slog.Info("using requested gpu library", "requested", requested)
|
||||||
// Start with the server directory for the LD_LIBRARY_PATH/PATH
|
lib = requested
|
||||||
libraryPaths := []string{filepath.Dir(exe)}
|
|
||||||
|
|
||||||
if libraryPath, ok := os.LookupEnv(pathEnv); ok {
|
|
||||||
// favor our bundled library dependencies over system libraries
|
|
||||||
libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: we always put the dependency path first
|
var compatible []string
|
||||||
// since this was the exact version we compiled/linked against
|
for k := range libs {
|
||||||
if gpus[0].DependencyPath != nil {
|
// exact match first
|
||||||
// assume gpus from the same library have the same dependency path
|
if k == lib {
|
||||||
libraryPaths = append(gpus[0].DependencyPath, libraryPaths...)
|
compatible = append([]string{k}, compatible...)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// then match the family (e.g. 'cuda')
|
||||||
|
if strings.Split(k, "_")[0] == strings.Split(lib, "_")[0] {
|
||||||
|
compatible = append(compatible, k)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
slog.Debug("compatible gpu libraries", "compatible", compatible)
|
||||||
|
|
||||||
// TODO - once fully switched to the Go runner, load the model here for tokenize/detokenize cgo access
|
// iterate through compatible GPU libraries such as 'cuda_v12', 'cuda_v11', 'rocm', etc.
|
||||||
s := &llmServer{
|
// adding each library's respective path to the LD_LIBRARY_PATH, until finally running
|
||||||
port: port,
|
// without any LD_LIBRARY_PATH flags
|
||||||
cmd: exec.Command(exe, finalParams...),
|
for {
|
||||||
status: NewStatusWriter(os.Stderr),
|
port := 0
|
||||||
options: opts,
|
if a, err := net.ResolveTCPAddr("tcp", "localhost:0"); err == nil {
|
||||||
modelPath: model,
|
var l *net.TCPListener
|
||||||
estimate: estimate,
|
if l, err = net.ListenTCP("tcp", a); err == nil {
|
||||||
numParallel: numParallel,
|
port = l.Addr().(*net.TCPAddr).Port
|
||||||
sem: semaphore.NewWeighted(int64(numParallel)),
|
l.Close()
|
||||||
totalLayers: f.KV().BlockCount() + 1,
|
}
|
||||||
gpus: gpus,
|
}
|
||||||
done: make(chan error, 1),
|
if port == 0 {
|
||||||
}
|
slog.Debug("ResolveTCPAddr failed, using random port")
|
||||||
|
port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
|
||||||
|
}
|
||||||
|
finalParams := []string{"runner"}
|
||||||
|
finalParams = append(finalParams, params...)
|
||||||
|
finalParams = append(finalParams, "--port", strconv.Itoa(port))
|
||||||
|
|
||||||
s.cmd.Env = os.Environ()
|
var pathEnv string
|
||||||
s.cmd.Stdout = os.Stdout
|
switch runtime.GOOS {
|
||||||
s.cmd.Stderr = s.status
|
case "windows":
|
||||||
s.cmd.SysProcAttr = LlamaServerSysProcAttr
|
pathEnv = "PATH"
|
||||||
|
case "darwin":
|
||||||
|
pathEnv = "DYLD_LIBRARY_PATH"
|
||||||
|
default:
|
||||||
|
pathEnv = "LD_LIBRARY_PATH"
|
||||||
|
}
|
||||||
|
|
||||||
envWorkarounds := [][2]string{}
|
var libraryPaths []string
|
||||||
for _, gpu := range gpus {
|
if libraryPath, ok := os.LookupEnv(pathEnv); ok {
|
||||||
envWorkarounds = append(envWorkarounds, gpu.EnvWorkarounds...)
|
libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
|
||||||
}
|
}
|
||||||
visibleDevicesEnv, visibleDevicesEnvVal := gpus.GetVisibleDevicesEnv()
|
|
||||||
pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
|
|
||||||
|
|
||||||
// Update or add the path and visible devices variable with our adjusted version
|
if len(compatible) > 0 {
|
||||||
pathNeeded := true
|
c := compatible[0]
|
||||||
devicesNeeded := visibleDevicesEnv != ""
|
if libpath, ok := libs[c]; ok {
|
||||||
for i := range s.cmd.Env {
|
slog.Debug("adding gpu library", "path", libpath)
|
||||||
cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
|
libraryPaths = append(libraryPaths, libpath)
|
||||||
if strings.EqualFold(cmp[0], pathEnv) {
|
}
|
||||||
s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
|
}
|
||||||
pathNeeded = false
|
|
||||||
} else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) {
|
// Note: we always put the dependency path first
|
||||||
s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal
|
// since this was the exact version we compiled/linked against
|
||||||
devicesNeeded = false
|
if gpus[0].DependencyPath != nil {
|
||||||
} else if len(envWorkarounds) != 0 {
|
slog.Debug("adding gpu dependency paths", "paths", gpus[0].DependencyPath)
|
||||||
for _, kv := range envWorkarounds {
|
// assume gpus from the same library have the same dependency path
|
||||||
if strings.EqualFold(cmp[0], kv[0]) {
|
libraryPaths = append(gpus[0].DependencyPath, libraryPaths...)
|
||||||
s.cmd.Env[i] = kv[0] + "=" + kv[1]
|
}
|
||||||
|
|
||||||
|
// finally, add the root library path
|
||||||
|
libraryPaths = append(libraryPaths, discover.LibOllamaPath)
|
||||||
|
|
||||||
|
exe, err := os.Executable()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to lookup executable path: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
exe, err = filepath.EvalSymlinks(exe)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to evaluate symlinks for executable path: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO - once fully switched to the Go runner, load the model here for tokenize/detokenize cgo access
|
||||||
|
s := &llmServer{
|
||||||
|
port: port,
|
||||||
|
cmd: exec.Command(exe, finalParams...),
|
||||||
|
status: NewStatusWriter(os.Stderr),
|
||||||
|
options: opts,
|
||||||
|
modelPath: model,
|
||||||
|
estimate: estimate,
|
||||||
|
numParallel: numParallel,
|
||||||
|
sem: semaphore.NewWeighted(int64(numParallel)),
|
||||||
|
totalLayers: ggml.KV().BlockCount() + 1,
|
||||||
|
gpus: gpus,
|
||||||
|
done: make(chan error, 1),
|
||||||
|
}
|
||||||
|
|
||||||
|
s.cmd.Env = os.Environ()
|
||||||
|
s.cmd.Stdout = os.Stdout
|
||||||
|
s.cmd.Stderr = s.status
|
||||||
|
s.cmd.SysProcAttr = LlamaServerSysProcAttr
|
||||||
|
|
||||||
|
envWorkarounds := [][2]string{}
|
||||||
|
for _, gpu := range gpus {
|
||||||
|
envWorkarounds = append(envWorkarounds, gpu.EnvWorkarounds...)
|
||||||
|
}
|
||||||
|
visibleDevicesEnv, visibleDevicesEnvVal := gpus.GetVisibleDevicesEnv()
|
||||||
|
pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
|
||||||
|
|
||||||
|
// Update or add the path and visible devices variable with our adjusted version
|
||||||
|
pathNeeded := true
|
||||||
|
devicesNeeded := visibleDevicesEnv != ""
|
||||||
|
for i := range s.cmd.Env {
|
||||||
|
cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
|
||||||
|
if strings.EqualFold(cmp[0], pathEnv) {
|
||||||
|
s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
|
||||||
|
pathNeeded = false
|
||||||
|
} else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) {
|
||||||
|
s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal
|
||||||
|
devicesNeeded = false
|
||||||
|
} else if len(envWorkarounds) != 0 {
|
||||||
|
for _, kv := range envWorkarounds {
|
||||||
|
if strings.EqualFold(cmp[0], kv[0]) {
|
||||||
|
s.cmd.Env[i] = kv[0] + "=" + kv[1]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
if pathNeeded {
|
||||||
if pathNeeded {
|
s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
|
||||||
s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
|
}
|
||||||
}
|
if devicesNeeded {
|
||||||
if devicesNeeded {
|
s.cmd.Env = append(s.cmd.Env, visibleDevicesEnv+"="+visibleDevicesEnvVal)
|
||||||
s.cmd.Env = append(s.cmd.Env, visibleDevicesEnv+"="+visibleDevicesEnvVal)
|
}
|
||||||
}
|
|
||||||
|
|
||||||
slog.Info("starting llama server", "cmd", s.cmd.String())
|
slog.Info("starting llama server", "cmd", s.cmd.String())
|
||||||
if envconfig.Debug() {
|
if envconfig.Debug() {
|
||||||
filteredEnv := []string{}
|
filteredEnv := []string{}
|
||||||
for _, ev := range s.cmd.Env {
|
for _, ev := range s.cmd.Env {
|
||||||
if strings.HasPrefix(ev, "CUDA_") ||
|
if strings.HasPrefix(ev, "CUDA_") ||
|
||||||
strings.HasPrefix(ev, "ROCR_") ||
|
strings.HasPrefix(ev, "ROCR_") ||
|
||||||
strings.HasPrefix(ev, "ROCM_") ||
|
strings.HasPrefix(ev, "ROCM_") ||
|
||||||
strings.HasPrefix(ev, "HIP_") ||
|
strings.HasPrefix(ev, "HIP_") ||
|
||||||
strings.HasPrefix(ev, "GPU_") ||
|
strings.HasPrefix(ev, "GPU_") ||
|
||||||
strings.HasPrefix(ev, "HSA_") ||
|
strings.HasPrefix(ev, "HSA_") ||
|
||||||
strings.HasPrefix(ev, "GGML_") ||
|
strings.HasPrefix(ev, "GGML_") ||
|
||||||
strings.HasPrefix(ev, "PATH=") ||
|
strings.HasPrefix(ev, "PATH=") ||
|
||||||
strings.HasPrefix(ev, "LD_LIBRARY_PATH=") {
|
strings.HasPrefix(ev, "LD_LIBRARY_PATH=") ||
|
||||||
filteredEnv = append(filteredEnv, ev)
|
strings.HasPrefix(ev, "DYLD_LIBRARY_PATH=") {
|
||||||
|
filteredEnv = append(filteredEnv, ev)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
// Log at debug as the environment is inherited and might contain sensitive information
|
||||||
// Log at debug as the environment is inherited and might contain sensitive information
|
slog.Debug("subprocess", "environment", filteredEnv)
|
||||||
slog.Debug("subprocess", "environment", filteredEnv)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = s.cmd.Start(); err != nil {
|
|
||||||
// Detect permission denied and augment the message about noexec
|
|
||||||
if errors.Is(err, os.ErrPermission) {
|
|
||||||
return nil, fmt.Errorf("unable to start server %w. %s may have noexec set. Set OLLAMA_TMPDIR for server to a writable executable directory", err, exe)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
msg := ""
|
if err = s.cmd.Start(); err != nil {
|
||||||
if s.status != nil && s.status.LastErrMsg != "" {
|
var msg string
|
||||||
msg = s.status.LastErrMsg
|
if s.status != nil && s.status.LastErrMsg != "" {
|
||||||
}
|
msg = s.status.LastErrMsg
|
||||||
return nil, fmt.Errorf("error starting the external llama server: %v %s", err, msg)
|
}
|
||||||
}
|
err := fmt.Errorf("error starting runner: %v %s", err, msg)
|
||||||
|
if len(compatible) == 0 {
|
||||||
// reap subprocess when it exits
|
return nil, err
|
||||||
go func() {
|
|
||||||
err := s.cmd.Wait()
|
|
||||||
// Favor a more detailed message over the process exit status
|
|
||||||
if err != nil && s.status != nil && s.status.LastErrMsg != "" {
|
|
||||||
slog.Debug("llama runner terminated", "error", err)
|
|
||||||
if strings.Contains(s.status.LastErrMsg, "unknown model") {
|
|
||||||
s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
|
|
||||||
}
|
}
|
||||||
s.done <- errors.New(s.status.LastErrMsg)
|
|
||||||
} else {
|
|
||||||
s.done <- err
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
return s, nil
|
slog.Warn("unable to start runner with compatible gpu", "error", err, "compatible", compatible)
|
||||||
|
compatible = compatible[1:]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// reap subprocess when it exits
|
||||||
|
go func() {
|
||||||
|
err := s.cmd.Wait()
|
||||||
|
// Favor a more detailed message over the process exit status
|
||||||
|
if err != nil && s.status != nil && s.status.LastErrMsg != "" {
|
||||||
|
slog.Error("llama runner terminated", "error", err)
|
||||||
|
if strings.Contains(s.status.LastErrMsg, "unknown model") {
|
||||||
|
s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
|
||||||
|
}
|
||||||
|
s.done <- errors.New(s.status.LastErrMsg)
|
||||||
|
} else {
|
||||||
|
s.done <- err
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return s, nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type ServerStatus int
|
type ServerStatus int
|
||||||
@@ -661,9 +715,9 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
|||||||
}
|
}
|
||||||
|
|
||||||
// User provided a JSON schema
|
// User provided a JSON schema
|
||||||
g, err := grammar.FromSchema(nil, req.Format)
|
g := llama.SchemaToGrammar(req.Format)
|
||||||
if err != nil {
|
if g == nil {
|
||||||
return fmt.Errorf("invalid JSON schema in format: %w", err)
|
return fmt.Errorf("invalid JSON schema in format")
|
||||||
}
|
}
|
||||||
request["grammar"] = string(g)
|
request["grammar"] = string(g)
|
||||||
}
|
}
|
||||||
@@ -683,6 +737,7 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
|||||||
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
|
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
|
||||||
req.Options.NumPredict = 10 * s.options.NumCtx
|
req.Options.NumPredict = 10 * s.options.NumCtx
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure the server is ready
|
// Make sure the server is ready
|
||||||
status, err := s.getServerStatusRetry(ctx)
|
status, err := s.getServerStatusRetry(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ const config: ForgeConfig = {
|
|||||||
asar: true,
|
asar: true,
|
||||||
icon: './assets/icon.icns',
|
icon: './assets/icon.icns',
|
||||||
extraResource: [
|
extraResource: [
|
||||||
'../dist/ollama',
|
path.join(__dirname, '../dist/darwin/ollama'),
|
||||||
'../dist/darwin-amd64/lib',
|
...fs.readdirSync(path.join(__dirname, '../dist/darwin-amd64/lib/ollama')).map(f => path.join(__dirname, '../dist/darwin-amd64/lib/ollama', f)),
|
||||||
path.join(__dirname, './assets/iconTemplate.png'),
|
path.join(__dirname, './assets/iconTemplate.png'),
|
||||||
path.join(__dirname, './assets/iconTemplate@2x.png'),
|
path.join(__dirname, './assets/iconTemplate@2x.png'),
|
||||||
path.join(__dirname, './assets/iconUpdateTemplate.png'),
|
path.join(__dirname, './assets/iconUpdateTemplate.png'),
|
||||||
@@ -43,7 +43,7 @@ const config: ForgeConfig = {
|
|||||||
}
|
}
|
||||||
: {}),
|
: {}),
|
||||||
osxUniversal: {
|
osxUniversal: {
|
||||||
x64ArchFiles: '**/ollama*',
|
x64ArchFiles: '*',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
rebuildConfig: {},
|
rebuildConfig: {},
|
||||||
|
|||||||
191
ml/backend.go
191
ml/backend.go
@@ -1,191 +0,0 @@
|
|||||||
package ml
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"encoding/binary"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Config interface {
|
|
||||||
Architecture() string
|
|
||||||
String(string, ...string) string
|
|
||||||
Uint(string, ...uint32) uint32
|
|
||||||
Float(string, ...float32) float32
|
|
||||||
|
|
||||||
Strings(string, ...[]string) []string
|
|
||||||
Uints(string, ...[]uint32) []uint32
|
|
||||||
}
|
|
||||||
|
|
||||||
type Backend interface {
|
|
||||||
Config() Config
|
|
||||||
Get(name string) Tensor
|
|
||||||
NewContext() Context
|
|
||||||
}
|
|
||||||
|
|
||||||
var backends = make(map[string]func(*os.File) (Backend, error))
|
|
||||||
|
|
||||||
func RegisterBackend(name string, f func(*os.File) (Backend, error)) {
|
|
||||||
if _, ok := backends[name]; ok {
|
|
||||||
panic("backend: backend already registered")
|
|
||||||
}
|
|
||||||
|
|
||||||
backends[name] = f
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewBackend(f *os.File) (Backend, error) {
|
|
||||||
if backend, ok := backends["ggml"]; ok {
|
|
||||||
return backend(f)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil, fmt.Errorf("unsupported backend")
|
|
||||||
}
|
|
||||||
|
|
||||||
type Context interface {
|
|
||||||
Zeros(dtype DType, shape ...int) Tensor
|
|
||||||
FromFloatSlice(s []float32, shape ...int) (Tensor, error)
|
|
||||||
FromIntSlice(s []int32, shape ...int) (Tensor, error)
|
|
||||||
|
|
||||||
Forward(Tensor)
|
|
||||||
Compute(Tensor) Tensor
|
|
||||||
Close() error
|
|
||||||
}
|
|
||||||
|
|
||||||
type Tensor interface {
|
|
||||||
Dim(n int) int64
|
|
||||||
Stride(n int) int64
|
|
||||||
|
|
||||||
Shape() []int64
|
|
||||||
DType() DType
|
|
||||||
|
|
||||||
Bytes() []byte
|
|
||||||
Floats() []float32
|
|
||||||
|
|
||||||
Add(ctx Context, t2 Tensor) Tensor
|
|
||||||
Mul(ctx Context, t2 Tensor) Tensor
|
|
||||||
Mulmat(ctx Context, t2 Tensor) Tensor
|
|
||||||
|
|
||||||
Softmax(ctx Context) Tensor
|
|
||||||
LayerNorm(ctx Context, weight, bias Tensor, eps float32) Tensor
|
|
||||||
RMSNorm(ctx Context, weight Tensor, eps float32) Tensor
|
|
||||||
Scale(ctx Context, s float64) Tensor
|
|
||||||
|
|
||||||
Conv2D(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
|
|
||||||
RoPE(ctx Context, positionIDs, ropeFactors Tensor, dim uint32, base, scale float32) Tensor
|
|
||||||
|
|
||||||
Tanh(ctx Context) Tensor
|
|
||||||
GELU(ctx Context) Tensor
|
|
||||||
SILU(ctx Context) Tensor
|
|
||||||
|
|
||||||
Reshape(ctx Context, shape ...int64) Tensor
|
|
||||||
View(ctx Context, offset int, shape ...int) Tensor
|
|
||||||
Permute(ctx Context, shape ...int) Tensor
|
|
||||||
Contiguous(ctx Context) Tensor
|
|
||||||
|
|
||||||
Pad(ctx Context, shape ...int64) Tensor
|
|
||||||
Unpad(ctx Context, shape ...int64) Tensor
|
|
||||||
|
|
||||||
Stack(ctx Context, dim int, s ...Tensor) Tensor
|
|
||||||
Concat(ctx Context, t2 Tensor, dim int) Tensor
|
|
||||||
Rows(ctx Context, t2 Tensor) Tensor
|
|
||||||
Copy(ctx Context, t2 Tensor) Tensor
|
|
||||||
}
|
|
||||||
|
|
||||||
type number interface {
|
|
||||||
~int | ~int8 | ~int16 | ~int32 | ~int64 |
|
|
||||||
~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 |
|
|
||||||
~float32 | ~float64 |
|
|
||||||
~complex64 | ~complex128
|
|
||||||
}
|
|
||||||
|
|
||||||
func mul[T number](s ...T) T {
|
|
||||||
p := T(1)
|
|
||||||
for _, v := range s {
|
|
||||||
p *= v
|
|
||||||
}
|
|
||||||
|
|
||||||
return p
|
|
||||||
}
|
|
||||||
|
|
||||||
type DumpOptions struct {
|
|
||||||
// Items is the number of elements to print at the beginning and end of each dimension.
|
|
||||||
Items int64
|
|
||||||
|
|
||||||
// Precision is the number of decimal places to print. Applies to float32 and float64.
|
|
||||||
Precision int
|
|
||||||
}
|
|
||||||
|
|
||||||
func Dump(t Tensor, opts ...DumpOptions) string {
|
|
||||||
if len(opts) < 1 {
|
|
||||||
opts = append(opts, DumpOptions{
|
|
||||||
Items: 3,
|
|
||||||
Precision: 4,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
switch t.DType() {
|
|
||||||
case DTypeF32:
|
|
||||||
return dump[[]float32](t, opts[0])
|
|
||||||
case DTypeI32:
|
|
||||||
return dump[[]int32](t, opts[0])
|
|
||||||
default:
|
|
||||||
return "<unsupported>"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func dump[S ~[]E, E number](t Tensor, opts DumpOptions) string {
|
|
||||||
bts := t.Bytes()
|
|
||||||
if bts == nil {
|
|
||||||
return "<nil>"
|
|
||||||
}
|
|
||||||
|
|
||||||
s := make(S, mul(t.Shape()...))
|
|
||||||
if err := binary.Read(bytes.NewBuffer(t.Bytes()), binary.LittleEndian, &s); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
shape := t.Shape()
|
|
||||||
|
|
||||||
var sb strings.Builder
|
|
||||||
var f func([]int64, int64)
|
|
||||||
f = func(dims []int64, stride int64) {
|
|
||||||
prefix := strings.Repeat(" ", len(shape)-len(dims)+1)
|
|
||||||
fmt.Fprint(&sb, "[")
|
|
||||||
defer func() { fmt.Fprint(&sb, "]") }()
|
|
||||||
for i := int64(0); i < dims[0]; i++ {
|
|
||||||
if i >= opts.Items && i < dims[0]-opts.Items {
|
|
||||||
fmt.Fprint(&sb, "..., ")
|
|
||||||
// skip to next printable element
|
|
||||||
skip := dims[0] - 2*opts.Items
|
|
||||||
if len(dims) > 1 {
|
|
||||||
stride += mul(append(dims[1:], skip)...)
|
|
||||||
fmt.Fprint(&sb, strings.Repeat("\n", len(dims)-1), prefix)
|
|
||||||
}
|
|
||||||
i += skip - 1
|
|
||||||
} else if len(dims) > 1 {
|
|
||||||
f(dims[1:], stride)
|
|
||||||
stride += mul(dims[1:]...)
|
|
||||||
if i < dims[0]-1 {
|
|
||||||
fmt.Fprint(&sb, ",", strings.Repeat("\n", len(dims)-1), prefix)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
fmt.Fprint(&sb, s[stride+i])
|
|
||||||
if i < dims[0]-1 {
|
|
||||||
fmt.Fprint(&sb, ", ")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
f(shape, 0)
|
|
||||||
|
|
||||||
return sb.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
type DType int
|
|
||||||
|
|
||||||
const (
|
|
||||||
DTypeF32 DType = iota
|
|
||||||
DTypeI32
|
|
||||||
DTypeOther
|
|
||||||
)
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
package backend
|
|
||||||
|
|
||||||
import (
|
|
||||||
_ "github.com/ollama/ollama/ml/backend/ggml"
|
|
||||||
)
|
|
||||||
@@ -1,580 +0,0 @@
|
|||||||
package ggml
|
|
||||||
|
|
||||||
// #cgo CPPFLAGS: -I${SRCDIR}/ggml/include
|
|
||||||
// #include <stdlib.h>
|
|
||||||
// #include <stdint.h>
|
|
||||||
// #include "ggml.h"
|
|
||||||
// #include "ggml-cpu.h"
|
|
||||||
// #include "ggml-backend.h"
|
|
||||||
import "C"
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"encoding/binary"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"log/slog"
|
|
||||||
"os"
|
|
||||||
"sync"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
|
||||||
fs "github.com/ollama/ollama/fs/ggml"
|
|
||||||
"github.com/ollama/ollama/ml"
|
|
||||||
"golang.org/x/sync/errgroup"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/ml/backend/ggml/ggml/src"
|
|
||||||
)
|
|
||||||
|
|
||||||
type device struct {
|
|
||||||
d *C.struct_ggml_backend_device
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d device) LogValue() slog.Value {
|
|
||||||
var free, total uint64
|
|
||||||
C.ggml_backend_dev_memory(d.d, (*C.size_t)(&free), (*C.size_t)(&total))
|
|
||||||
|
|
||||||
kind := "unknown"
|
|
||||||
switch C.ggml_backend_dev_type(d.d) {
|
|
||||||
case C.GGML_BACKEND_DEVICE_TYPE_CPU:
|
|
||||||
kind = "cpu"
|
|
||||||
case C.GGML_BACKEND_DEVICE_TYPE_GPU:
|
|
||||||
kind = "gpu"
|
|
||||||
case C.GGML_BACKEND_DEVICE_TYPE_ACCEL:
|
|
||||||
kind = "accel"
|
|
||||||
}
|
|
||||||
|
|
||||||
return slog.GroupValue(
|
|
||||||
slog.String("name", C.GoString(C.ggml_backend_dev_name(d.d))),
|
|
||||||
slog.String("description", C.GoString(C.ggml_backend_dev_description(d.d))),
|
|
||||||
slog.String("kind", kind),
|
|
||||||
slog.String("free", format.HumanBytes2(free)),
|
|
||||||
slog.String("total", format.HumanBytes2(total)),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
var devices = sync.OnceValue(func() []device {
|
|
||||||
ggml.OnceLoad()
|
|
||||||
|
|
||||||
s := make([]device, C.ggml_backend_dev_count())
|
|
||||||
for i := range s {
|
|
||||||
s[i] = device{C.ggml_backend_dev_get(C.size_t(i))}
|
|
||||||
}
|
|
||||||
|
|
||||||
return s
|
|
||||||
})
|
|
||||||
|
|
||||||
type Backend struct {
|
|
||||||
meta *fs.GGML
|
|
||||||
cpus, gpus []Context
|
|
||||||
tensors map[string]*Context
|
|
||||||
}
|
|
||||||
|
|
||||||
func New(r *os.File) (ml.Backend, error) {
|
|
||||||
meta, n, err := fs.Decode(r, -1)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.Info(
|
|
||||||
"",
|
|
||||||
"architecture", meta.KV().Architecture(),
|
|
||||||
"file_type", meta.KV().FileType(),
|
|
||||||
"name", meta.KV().String("general.name"),
|
|
||||||
"description", meta.KV().String("general.description"),
|
|
||||||
"num_tensors", len(meta.Tensors().Items()),
|
|
||||||
"num_key_values", len(meta.KV()),
|
|
||||||
)
|
|
||||||
|
|
||||||
var cpus, gpus []Context
|
|
||||||
for _, d := range devices() {
|
|
||||||
switch C.ggml_backend_dev_type(d.d) {
|
|
||||||
case C.GGML_BACKEND_DEVICE_TYPE_CPU,
|
|
||||||
C.GGML_BACKEND_DEVICE_TYPE_ACCEL:
|
|
||||||
slog.Info("cpu", "device", d)
|
|
||||||
cpus = append(cpus, Context{
|
|
||||||
ctx: C.ggml_init(C.struct_ggml_init_params{
|
|
||||||
mem_size: C.size_t(int(C.ggml_tensor_overhead()) * (len(meta.Tensors().Items()) + 1 + int(meta.KV().BlockCount())*2)),
|
|
||||||
no_alloc: true,
|
|
||||||
}),
|
|
||||||
backend: C.ggml_backend_dev_init(d.d, nil),
|
|
||||||
})
|
|
||||||
case C.GGML_BACKEND_DEVICE_TYPE_GPU:
|
|
||||||
slog.Info("gpu", "device", d)
|
|
||||||
gpus = append(gpus, Context{
|
|
||||||
ctx: C.ggml_init(C.struct_ggml_init_params{
|
|
||||||
mem_size: C.size_t(int(C.ggml_tensor_overhead()) * (len(meta.Tensors().Items()) + 1 + int(meta.KV().BlockCount())*2)),
|
|
||||||
no_alloc: true,
|
|
||||||
}),
|
|
||||||
backend: C.ggml_backend_dev_init(d.d, nil),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ctxFunc := func(s []Context) (*Context, error) {
|
|
||||||
for _, e := range s {
|
|
||||||
return &e, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil, fmt.Errorf("no devices available")
|
|
||||||
}
|
|
||||||
|
|
||||||
tensors := make(map[*fs.Tensor]*Context, len(meta.Tensors().Items()))
|
|
||||||
for _, t := range meta.Tensors().Items() {
|
|
||||||
c, err := ctxFunc(append(gpus, cpus...))
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func() {
|
|
||||||
tt := C.ggml_new_tensor(c.ctx, t.Kind, C.int(len(t.Shape)), (*C.int64_t)(unsafe.Pointer(&t.Shape[0])))
|
|
||||||
|
|
||||||
cname := C.CString(t.Name)
|
|
||||||
defer C.free(unsafe.Pointer(cname))
|
|
||||||
C.ggml_set_name(tt, cname)
|
|
||||||
|
|
||||||
tensors[t] = c
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, b := range append(gpus, cpus...) {
|
|
||||||
C.ggml_backend_alloc_ctx_tensors(b.ctx, b.backend)
|
|
||||||
}
|
|
||||||
|
|
||||||
sr := io.NewSectionReader(r, int64(meta.Tensors().Offset), n-int64(meta.Tensors().Offset))
|
|
||||||
|
|
||||||
var g errgroup.Group
|
|
||||||
for t, c := range tensors {
|
|
||||||
g.Go(func() error {
|
|
||||||
bts := make([]byte, t.Size())
|
|
||||||
n, err := io.ReadFull(io.NewSectionReader(sr, int64(t.Offset), int64(t.Size())), bts)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if n != int(t.Size()) {
|
|
||||||
return fmt.Errorf("expected %d bytes, got %d", t.Size(), n)
|
|
||||||
}
|
|
||||||
|
|
||||||
cname := C.CString(t.Name)
|
|
||||||
defer C.free(unsafe.Pointer(cname))
|
|
||||||
|
|
||||||
C.ggml_backend_tensor_set(C.ggml_get_tensor(c.ctx, cname), unsafe.Pointer(&bts[0]), 0, C.size_t(n))
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := g.Wait(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return &Backend{
|
|
||||||
meta: meta,
|
|
||||||
cpus: cpus,
|
|
||||||
gpus: gpus,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
ml.RegisterBackend("ggml", New)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (b *Backend) Config() ml.Config {
|
|
||||||
return b.meta.KV()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (b *Backend) Get(name string) ml.Tensor {
|
|
||||||
cname := C.CString(name)
|
|
||||||
defer C.free(unsafe.Pointer(cname))
|
|
||||||
|
|
||||||
for _, c := range append(b.gpus, b.cpus...) {
|
|
||||||
if t := C.ggml_get_tensor(c.ctx, cname); t != nil {
|
|
||||||
return &Tensor{t: t}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (b *Backend) NewContext() ml.Context {
|
|
||||||
nodes := max(8192, len(b.meta.Tensors().Items())*5)
|
|
||||||
bts := make([]byte, C.size_t(nodes)*C.ggml_tensor_overhead()+C.ggml_graph_overhead_custom(C.size_t(nodes), false))
|
|
||||||
c := C.ggml_init(C.struct_ggml_init_params{
|
|
||||||
mem_buffer: unsafe.Pointer(&bts[0]),
|
|
||||||
mem_size: C.size_t(len(bts)),
|
|
||||||
no_alloc: true,
|
|
||||||
})
|
|
||||||
|
|
||||||
backends := make([]*C.struct_ggml_backend, len(b.gpus)+len(b.cpus))
|
|
||||||
bufts := make([]*C.struct_ggml_backend_buffer_type, len(b.gpus)+len(b.cpus))
|
|
||||||
for i, c := range append(b.gpus, b.cpus...) {
|
|
||||||
backends[i] = c.backend
|
|
||||||
bufts[i] = C.ggml_backend_get_default_buffer_type(c.backend)
|
|
||||||
}
|
|
||||||
|
|
||||||
return &Context{
|
|
||||||
ctx: c,
|
|
||||||
backend: backends[0],
|
|
||||||
nodes: nodes,
|
|
||||||
sched: C.ggml_backend_sched_new(
|
|
||||||
(*C.ggml_backend_t)(unsafe.Pointer(&backends[0])),
|
|
||||||
(*C.ggml_backend_buffer_type_t)(unsafe.Pointer(&bufts[0])),
|
|
||||||
C.int(len(backends)),
|
|
||||||
C.size_t(nodes),
|
|
||||||
true,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type Context struct {
|
|
||||||
ctx *C.struct_ggml_context
|
|
||||||
backend *C.struct_ggml_backend
|
|
||||||
|
|
||||||
sched *C.struct_ggml_backend_sched
|
|
||||||
graph *C.struct_ggml_cgraph
|
|
||||||
nodes int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Context) Forward(t ml.Tensor) {
|
|
||||||
if c.graph == nil {
|
|
||||||
c.graph = C.ggml_new_graph_custom(c.ctx, C.size_t(c.nodes), false)
|
|
||||||
}
|
|
||||||
|
|
||||||
C.ggml_build_forward_expand(c.graph, t.(*Tensor).t)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Context) Compute(t ml.Tensor) ml.Tensor {
|
|
||||||
c.Forward(t)
|
|
||||||
C.ggml_backend_sched_graph_compute_async(c.sched, c.graph)
|
|
||||||
|
|
||||||
backend := C.ggml_backend_sched_get_tensor_backend(c.sched, t.(*Tensor).t)
|
|
||||||
|
|
||||||
t.(*Tensor).data = make([]byte, C.ggml_nbytes(t.(*Tensor).t))
|
|
||||||
C.ggml_backend_tensor_get_async(backend, t.(*Tensor).t, unsafe.Pointer(&t.(*Tensor).data[0]), 0, C.ggml_nbytes(t.(*Tensor).t))
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c Context) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
|
|
||||||
if len(shape) < 1 || len(shape) > 4 {
|
|
||||||
panic("unsupported number of dimensions")
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, dim := range shape {
|
|
||||||
if dim < 1 {
|
|
||||||
panic("invalid shape")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var t *C.struct_ggml_tensor
|
|
||||||
switch dtype {
|
|
||||||
case ml.DTypeF32:
|
|
||||||
t = C.ggml_new_tensor(c.ctx, C.GGML_TYPE_F32, C.int(len(shape)), (*C.int64_t)(unsafe.Pointer(&shape[0])))
|
|
||||||
case ml.DTypeI32:
|
|
||||||
t = C.ggml_new_tensor(c.ctx, C.GGML_TYPE_I32, C.int(len(shape)), (*C.int64_t)(unsafe.Pointer(&shape[0])))
|
|
||||||
default:
|
|
||||||
panic("unsupported dtype")
|
|
||||||
}
|
|
||||||
|
|
||||||
b := C.ggml_backend_alloc_buffer(c.backend, C.ggml_nbytes(t))
|
|
||||||
C.ggml_backend_tensor_alloc(b, t, C.ggml_backend_buffer_get_base(b))
|
|
||||||
C.ggml_set_zero(t)
|
|
||||||
return &Tensor{t: t}
|
|
||||||
}
|
|
||||||
|
|
||||||
func fromSlice[S ~[]E, E float32 | int32](ctx Context, s S, shape []int, dtype uint32) (ml.Tensor, error) {
|
|
||||||
n := len(s)
|
|
||||||
for _, v := range shape {
|
|
||||||
n /= v
|
|
||||||
}
|
|
||||||
|
|
||||||
if n != 1 {
|
|
||||||
return nil, fmt.Errorf("invalid shape %v for %d elements", shape, len(s))
|
|
||||||
}
|
|
||||||
|
|
||||||
t := C.ggml_new_tensor(ctx.ctx, dtype, C.int(len(shape)), (*C.int64_t)(unsafe.Pointer(&shape[0])))
|
|
||||||
b := C.ggml_backend_alloc_buffer(ctx.backend, C.ggml_nbytes(t))
|
|
||||||
C.ggml_backend_tensor_alloc(b, t, C.ggml_backend_buffer_get_base(b))
|
|
||||||
C.ggml_backend_tensor_set(t, unsafe.Pointer(&s[0]), 0, C.ggml_nbytes(t))
|
|
||||||
return &Tensor{t: t}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c Context) FromFloatSlice(s []float32, shape ...int) (ml.Tensor, error) {
|
|
||||||
return fromSlice(c, s, shape, C.GGML_TYPE_F32)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c Context) FromIntSlice(s []int32, shape ...int) (ml.Tensor, error) {
|
|
||||||
return fromSlice(c, s, shape, C.GGML_TYPE_I32)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Context) Close() error {
|
|
||||||
C.ggml_backend_sched_free(c.sched)
|
|
||||||
C.ggml_free(c.ctx)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type Tensor struct {
|
|
||||||
t *C.struct_ggml_tensor
|
|
||||||
data []byte
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) LogValue() slog.Value {
|
|
||||||
return slog.GroupValue(
|
|
||||||
slog.String("name", C.GoString(C.ggml_get_name(t.t))),
|
|
||||||
slog.String("type", C.GoString(C.ggml_type_name(t.t._type))),
|
|
||||||
slog.Any("shape", t.Shape()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Dim(n int) int64 {
|
|
||||||
return int64(t.t.ne[n])
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Stride(n int) int64 {
|
|
||||||
return int64(t.t.nb[n])
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Shape() []int64 {
|
|
||||||
shape := make([]int64, C.ggml_n_dims(t.t))
|
|
||||||
for i := range shape {
|
|
||||||
shape[i] = t.Dim(i)
|
|
||||||
}
|
|
||||||
|
|
||||||
return shape
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Bytes() []byte {
|
|
||||||
if bts := C.ggml_get_data(t.t); bts != nil {
|
|
||||||
return C.GoBytes(bts, C.int(C.ggml_nbytes(t.t)))
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Floats() (f32s []float32) {
|
|
||||||
if t.data != nil {
|
|
||||||
f32s = make([]float32, C.ggml_nelements(t.t))
|
|
||||||
_ = binary.Read(bytes.NewReader(t.data), binary.LittleEndian, f32s)
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) DType() ml.DType {
|
|
||||||
switch t.t._type {
|
|
||||||
case C.GGML_TYPE_F32:
|
|
||||||
return ml.DTypeF32
|
|
||||||
case C.GGML_TYPE_I32:
|
|
||||||
return ml.DTypeI32
|
|
||||||
default:
|
|
||||||
return ml.DTypeOther
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Add(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_add(ctx.(*Context).ctx, t.t, t2.(*Tensor).t),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Stack(ctx ml.Context, dim int, s ...ml.Tensor) ml.Tensor {
|
|
||||||
if len(s) > 0 {
|
|
||||||
return t.Concat(ctx, s[0].Stack(ctx, dim, s[1:]...), dim)
|
|
||||||
}
|
|
||||||
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Concat(ctx ml.Context, t2 ml.Tensor, dim int) ml.Tensor {
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_concat(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int(dim)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Contiguous(ctx ml.Context) ml.Tensor {
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_cont(ctx.(*Context).ctx, t.t),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Mul(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_mul(ctx.(*Context).ctx, t.t, t2.(*Tensor).t),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Mulmat(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_mul_mat(ctx.(*Context).ctx, t.t, t2.(*Tensor).t),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) LayerNorm(ctx ml.Context, w, b ml.Tensor, eps float32) ml.Tensor {
|
|
||||||
tt := (&Tensor{t: C.ggml_norm(ctx.(*Context).ctx, t.t, C.float(eps))}).Mul(ctx, w)
|
|
||||||
if b != nil {
|
|
||||||
tt = tt.Add(ctx, b)
|
|
||||||
}
|
|
||||||
|
|
||||||
return tt
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) RMSNorm(ctx ml.Context, w ml.Tensor, eps float32) ml.Tensor {
|
|
||||||
return (&Tensor{t: C.ggml_norm(ctx.(*Context).ctx, t.t, C.float(eps))}).Mul(ctx, w)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Pad(ctx ml.Context, shape ...int64) ml.Tensor {
|
|
||||||
if len(shape) != 4 {
|
|
||||||
panic("expected 4 dimensions")
|
|
||||||
}
|
|
||||||
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_pad(ctx.(*Context).ctx, t.t, C.int(shape[0]), C.int(shape[1]), C.int(shape[2]), C.int(shape[3])),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Permute(ctx ml.Context, shape ...int) ml.Tensor {
|
|
||||||
if len(shape) != 4 {
|
|
||||||
panic("expected 4 dimensions")
|
|
||||||
}
|
|
||||||
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_permute(ctx.(*Context).ctx, t.t, C.int(shape[0]), C.int(shape[1]), C.int(shape[2]), C.int(shape[3])),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Rows(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_get_rows(ctx.(*Context).ctx, t.t, t2.(*Tensor).t),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Copy(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_cpy(ctx.(*Context).ctx, t.t, t2.(*Tensor).t),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Reshape(ctx ml.Context, shape ...int64) ml.Tensor {
|
|
||||||
switch len(shape) {
|
|
||||||
case 1:
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_reshape_1d(ctx.(*Context).ctx, t.t, C.int64_t(shape[0])),
|
|
||||||
}
|
|
||||||
case 2:
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_reshape_2d(ctx.(*Context).ctx, t.t, C.int64_t(shape[0]), C.int64_t(shape[1])),
|
|
||||||
}
|
|
||||||
case 3:
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_reshape_3d(ctx.(*Context).ctx, t.t, C.int64_t(shape[0]), C.int64_t(shape[1]), C.int64_t(shape[2])),
|
|
||||||
}
|
|
||||||
case 4:
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_reshape_4d(ctx.(*Context).ctx, t.t, C.int64_t(shape[0]), C.int64_t(shape[1]), C.int64_t(shape[2]), C.int64_t(shape[3])),
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
panic("unsupported number of dimensions")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Scale(ctx ml.Context, s float64) ml.Tensor {
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_scale(ctx.(*Context).ctx, t.t, (C.float)(s)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Softmax(ctx ml.Context) ml.Tensor {
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_soft_max(ctx.(*Context).ctx, t.t),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Tanh(ctx ml.Context) ml.Tensor {
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_tanh_inplace(ctx.(*Context).ctx, t.t),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Unpad(ctx ml.Context, shape ...int64) ml.Tensor {
|
|
||||||
if len(shape) != 4 {
|
|
||||||
panic("expected 4 dimensions")
|
|
||||||
}
|
|
||||||
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_unpad(ctx.(*Context).ctx, t.t, C.int(shape[0]), C.int(shape[1]), C.int(shape[2]), C.int(shape[3])),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) View(ctx ml.Context, offset int, shape ...int) ml.Tensor {
|
|
||||||
switch len(shape) {
|
|
||||||
case 1:
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_view_1d(ctx.(*Context).ctx, t.t, C.int64_t(shape[0]), C.size_t(offset)),
|
|
||||||
}
|
|
||||||
case 3:
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_view_2d(ctx.(*Context).ctx, t.t,
|
|
||||||
C.int64_t(shape[0]), C.int64_t(shape[2]),
|
|
||||||
C.size_t(shape[1]),
|
|
||||||
C.size_t(offset)),
|
|
||||||
}
|
|
||||||
case 5:
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_view_3d(ctx.(*Context).ctx, t.t,
|
|
||||||
C.int64_t(shape[0]), C.int64_t(shape[2]), C.int64_t(shape[4]),
|
|
||||||
C.size_t(shape[1]), C.size_t(shape[3]),
|
|
||||||
C.size_t(offset)),
|
|
||||||
}
|
|
||||||
case 7:
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_view_4d(ctx.(*Context).ctx, t.t,
|
|
||||||
C.int64_t(shape[0]), C.int64_t(shape[2]), C.int64_t(shape[4]), C.int64_t(shape[6]),
|
|
||||||
C.size_t(shape[1]), C.size_t(shape[3]), C.size_t(shape[5]),
|
|
||||||
C.size_t(offset)),
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
panic("unsupported number of dimensions")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
|
||||||
ropeTypeNorm C.int = iota
|
|
||||||
)
|
|
||||||
|
|
||||||
func (t *Tensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, ropeDim uint32, ropeBase, ropeScale float32) ml.Tensor {
|
|
||||||
if ropeFactors == nil {
|
|
||||||
ropeFactors = &Tensor{}
|
|
||||||
}
|
|
||||||
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_rope_ext(
|
|
||||||
ctx.(*Context).ctx, t.t, positionIDs.(*Tensor).t, ropeFactors.(*Tensor).t,
|
|
||||||
C.int(ropeDim),
|
|
||||||
131072, // YaRN n_ctx_train
|
|
||||||
ropeTypeNorm, // ROPE_TYPE_NORM
|
|
||||||
C.float(ropeBase),
|
|
||||||
C.float(ropeScale),
|
|
||||||
0., // YaRN ext_factor
|
|
||||||
1., // YaRN attn_factor
|
|
||||||
32., // YaRN beta_fast
|
|
||||||
1., // YaRN beta_slow
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) GELU(ctx ml.Context) ml.Tensor {
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_gelu_inplace(ctx.(*Context).ctx, t.t),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) SILU(ctx ml.Context) ml.Tensor {
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_silu_inplace(ctx.(*Context).ctx, t.t),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Tensor) Conv2D(ctx ml.Context, t2 ml.Tensor, s0, s1, p0, p1, d0, d1 int) ml.Tensor {
|
|
||||||
return &Tensor{
|
|
||||||
t: C.ggml_conv_2d(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,7 +1,9 @@
|
|||||||
protect **/*.go
|
protect *.go
|
||||||
protect **/*-embed.*
|
protect *-embed.*
|
||||||
include include/
|
include include/
|
||||||
include src/
|
include src/
|
||||||
|
include src/CMakeLists.txt
|
||||||
|
include src/**/CMakeLists.txt
|
||||||
include src/ggml-blas/
|
include src/ggml-blas/
|
||||||
include src/ggml-cpu/
|
include src/ggml-cpu/
|
||||||
include src/ggml-cpu/amx/
|
include src/ggml-cpu/amx/
|
||||||
@@ -10,12 +12,11 @@ include src/ggml-cuda/
|
|||||||
include src/ggml-cuda/template-instances/
|
include src/ggml-cuda/template-instances/
|
||||||
include src/ggml-hip/
|
include src/ggml-hip/
|
||||||
include src/ggml-metal/
|
include src/ggml-metal/
|
||||||
include **/CMakeLists.txt
|
include *.c
|
||||||
include **/*.c
|
include *.h
|
||||||
include **/*.h
|
include *.cpp
|
||||||
include **/*.cpp
|
include *.cu
|
||||||
include **/*.cu
|
include *.cuh
|
||||||
include **/*.cuh
|
include *.m
|
||||||
include **/*.m
|
include *.metal
|
||||||
include **/*.metal
|
|
||||||
exclude *
|
exclude *
|
||||||
|
|||||||
262
ml/backend/ggml/ggml/CMakeLists.txt
vendored
262
ml/backend/ggml/ggml/CMakeLists.txt
vendored
@@ -1,262 +0,0 @@
|
|||||||
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
|
|
||||||
project("ggml" C CXX)
|
|
||||||
include(CheckIncludeFileCXX)
|
|
||||||
|
|
||||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
|
||||||
|
|
||||||
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
|
||||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
|
||||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
|
||||||
set(GGML_STANDALONE ON)
|
|
||||||
|
|
||||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
|
||||||
|
|
||||||
# configure project version
|
|
||||||
# TODO
|
|
||||||
else()
|
|
||||||
set(GGML_STANDALONE OFF)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (EMSCRIPTEN)
|
|
||||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
|
||||||
|
|
||||||
option(GGML_WASM_SINGLE_FILE "ggml: embed WASM inside the generated ggml.js" ON)
|
|
||||||
else()
|
|
||||||
if (MINGW)
|
|
||||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
|
||||||
else()
|
|
||||||
set(BUILD_SHARED_LIBS_DEFAULT ON)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# remove the lib prefix on win32 mingw
|
|
||||||
if (WIN32)
|
|
||||||
set(CMAKE_STATIC_LIBRARY_PREFIX "")
|
|
||||||
set(CMAKE_SHARED_LIBRARY_PREFIX "")
|
|
||||||
set(CMAKE_SHARED_MODULE_PREFIX "")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
|
||||||
option(GGML_BACKEND_DL "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF)
|
|
||||||
|
|
||||||
#
|
|
||||||
# option list
|
|
||||||
#
|
|
||||||
|
|
||||||
# TODO: mark all options as advanced when not GGML_STANDALONE
|
|
||||||
|
|
||||||
if (APPLE)
|
|
||||||
set(GGML_METAL_DEFAULT ON)
|
|
||||||
set(GGML_BLAS_DEFAULT ON)
|
|
||||||
set(GGML_BLAS_VENDOR_DEFAULT "Apple")
|
|
||||||
else()
|
|
||||||
set(GGML_METAL_DEFAULT OFF)
|
|
||||||
set(GGML_BLAS_DEFAULT OFF)
|
|
||||||
set(GGML_BLAS_VENDOR_DEFAULT "Generic")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (CMAKE_CROSSCOMPILING)
|
|
||||||
set(GGML_NATIVE_DEFAULT OFF)
|
|
||||||
else()
|
|
||||||
set(GGML_NATIVE_DEFAULT ON)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# defaults
|
|
||||||
if (NOT GGML_LLAMAFILE_DEFAULT)
|
|
||||||
set(GGML_LLAMAFILE_DEFAULT OFF)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (NOT GGML_CUDA_GRAPHS_DEFAULT)
|
|
||||||
set(GGML_CUDA_GRAPHS_DEFAULT OFF)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# general
|
|
||||||
option(GGML_STATIC "ggml: static link libraries" OFF)
|
|
||||||
option(GGML_NATIVE "ggml: optimize the build for the current system" ${GGML_NATIVE_DEFAULT})
|
|
||||||
option(GGML_LTO "ggml: enable link time optimization" OFF)
|
|
||||||
option(GGML_CCACHE "ggml: use ccache if available" ON)
|
|
||||||
|
|
||||||
# debug
|
|
||||||
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
|
|
||||||
option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)
|
|
||||||
option(GGML_GPROF "ggml: enable gprof" OFF)
|
|
||||||
|
|
||||||
# build
|
|
||||||
option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF)
|
|
||||||
|
|
||||||
# sanitizers
|
|
||||||
option(GGML_SANITIZE_THREAD "ggml: enable thread sanitizer" OFF)
|
|
||||||
option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF)
|
|
||||||
option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF)
|
|
||||||
|
|
||||||
# instruction set specific
|
|
||||||
if (GGML_NATIVE OR NOT GGML_NATIVE_DEFAULT)
|
|
||||||
set(INS_ENB OFF)
|
|
||||||
else()
|
|
||||||
set(INS_ENB ON)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
|
|
||||||
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
|
|
||||||
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
|
|
||||||
option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
|
|
||||||
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
|
|
||||||
option(GGML_AVX512 "ggml: enable AVX512F" OFF)
|
|
||||||
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
|
|
||||||
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
|
|
||||||
option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)
|
|
||||||
if (NOT MSVC)
|
|
||||||
# in MSVC F16C and FMA is implied with AVX2/AVX512
|
|
||||||
option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
|
|
||||||
option(GGML_F16C "ggml: enable F16C" ${INS_ENB})
|
|
||||||
# MSVC does not seem to support AMX
|
|
||||||
option(GGML_AMX_TILE "ggml: enable AMX-TILE" OFF)
|
|
||||||
option(GGML_AMX_INT8 "ggml: enable AMX-INT8" OFF)
|
|
||||||
option(GGML_AMX_BF16 "ggml: enable AMX-BF16" OFF)
|
|
||||||
endif()
|
|
||||||
option(GGML_LASX "ggml: enable lasx" ON)
|
|
||||||
option(GGML_LSX "ggml: enable lsx" ON)
|
|
||||||
option(GGML_RVV "ggml: enable rvv" ON)
|
|
||||||
|
|
||||||
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
|
|
||||||
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
|
|
||||||
|
|
||||||
|
|
||||||
if (WIN32)
|
|
||||||
set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows version")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# ggml core
|
|
||||||
set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism")
|
|
||||||
option(GGML_CPU "ggml: enable CPU backend" ON)
|
|
||||||
|
|
||||||
# 3rd party libs / backends
|
|
||||||
option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON)
|
|
||||||
option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
|
|
||||||
set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
|
|
||||||
"ggml: BLAS library vendor")
|
|
||||||
option(GGML_LLAMAFILE "ggml: use LLAMAFILE" ${GGML_LLAMAFILE_DEFAULT})
|
|
||||||
|
|
||||||
option(GGML_CUDA "ggml: use CUDA" OFF)
|
|
||||||
option(GGML_MUSA "ggml: use MUSA" OFF)
|
|
||||||
option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)
|
|
||||||
option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF)
|
|
||||||
option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF)
|
|
||||||
set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
|
||||||
"ggml: max. batch size for using peer access")
|
|
||||||
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
|
|
||||||
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
|
|
||||||
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
|
|
||||||
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
|
|
||||||
|
|
||||||
option(GGML_HIP "ggml: use HIP" OFF)
|
|
||||||
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
|
||||||
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
|
||||||
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
|
||||||
option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
|
|
||||||
option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)
|
|
||||||
option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
|
|
||||||
option(GGML_VULKAN_PERF "ggml: enable Vulkan perf output" OFF)
|
|
||||||
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
|
|
||||||
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
|
||||||
option(GGML_KOMPUTE "ggml: use Kompute" OFF)
|
|
||||||
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
|
|
||||||
option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF)
|
|
||||||
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
|
|
||||||
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
|
|
||||||
option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL})
|
|
||||||
set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING
|
|
||||||
"ggml: metal minimum macOS version")
|
|
||||||
set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)")
|
|
||||||
option(GGML_OPENMP "ggml: use OpenMP" ON)
|
|
||||||
option(GGML_RPC "ggml: use RPC" OFF)
|
|
||||||
option(GGML_SYCL "ggml: use SYCL" OFF)
|
|
||||||
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
|
|
||||||
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
|
|
||||||
"ggml: sycl target device")
|
|
||||||
set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
|
|
||||||
"ggml: sycl device architecture")
|
|
||||||
|
|
||||||
option(GGML_OPENCL "ggml: use OpenCL" OFF)
|
|
||||||
option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
|
|
||||||
option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
|
|
||||||
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
|
|
||||||
|
|
||||||
# extra artifacts
|
|
||||||
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
|
|
||||||
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
|
|
||||||
|
|
||||||
#
|
|
||||||
# dependencies
|
|
||||||
#
|
|
||||||
|
|
||||||
set(CMAKE_C_STANDARD 11)
|
|
||||||
set(CMAKE_C_STANDARD_REQUIRED true)
|
|
||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
|
||||||
set(CMAKE_CXX_STANDARD_REQUIRED true)
|
|
||||||
|
|
||||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
|
||||||
|
|
||||||
find_package(Threads REQUIRED)
|
|
||||||
|
|
||||||
#
|
|
||||||
# build the library
|
|
||||||
#
|
|
||||||
|
|
||||||
add_subdirectory(src)
|
|
||||||
|
|
||||||
#
|
|
||||||
# tests and examples
|
|
||||||
#
|
|
||||||
|
|
||||||
if (GGML_BUILD_TESTS)
|
|
||||||
enable_testing()
|
|
||||||
add_subdirectory(tests)
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
if (GGML_BUILD_EXAMPLES)
|
|
||||||
add_subdirectory(examples)
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
#
|
|
||||||
# install
|
|
||||||
#
|
|
||||||
|
|
||||||
include(GNUInstallDirs)
|
|
||||||
include(CMakePackageConfigHelpers)
|
|
||||||
|
|
||||||
# all public headers
|
|
||||||
set(GGML_PUBLIC_HEADERS
|
|
||||||
include/ggml.h
|
|
||||||
include/ggml-cpu.h
|
|
||||||
include/ggml-alloc.h
|
|
||||||
include/ggml-backend.h
|
|
||||||
include/ggml-blas.h
|
|
||||||
include/ggml-cann.h
|
|
||||||
include/ggml-cuda.h
|
|
||||||
include/ggml-kompute.h
|
|
||||||
include/ggml-opt.h
|
|
||||||
include/ggml-metal.h
|
|
||||||
include/ggml-rpc.h
|
|
||||||
include/ggml-sycl.h
|
|
||||||
include/ggml-vulkan.h)
|
|
||||||
|
|
||||||
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
|
||||||
#if (GGML_METAL)
|
|
||||||
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
|
|
||||||
#endif()
|
|
||||||
install(TARGETS ggml LIBRARY PUBLIC_HEADER)
|
|
||||||
install(TARGETS ggml-base LIBRARY)
|
|
||||||
|
|
||||||
if (GGML_STANDALONE)
|
|
||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in
|
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
|
|
||||||
@ONLY)
|
|
||||||
|
|
||||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
|
|
||||||
DESTINATION share/pkgconfig)
|
|
||||||
endif()
|
|
||||||
2
ml/backend/ggml/ggml/src/CMakeLists.txt
vendored
2
ml/backend/ggml/ggml/src/CMakeLists.txt
vendored
@@ -278,6 +278,7 @@ function(ggml_add_cpu_backend_variant tag_name)
|
|||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
ggml_add_cpu_backend_variant_impl(${tag_name})
|
ggml_add_cpu_backend_variant_impl(${tag_name})
|
||||||
|
add_dependencies(ggml-cpu ggml-cpu-${tag_name})
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
ggml_add_backend(CPU)
|
ggml_add_backend(CPU)
|
||||||
@@ -286,6 +287,7 @@ if (GGML_CPU_ALL_VARIANTS)
|
|||||||
if (NOT GGML_BACKEND_DL)
|
if (NOT GGML_BACKEND_DL)
|
||||||
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
|
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
|
||||||
endif()
|
endif()
|
||||||
|
add_custom_target(ggml-cpu)
|
||||||
ggml_add_cpu_backend_variant(sandybridge AVX)
|
ggml_add_cpu_backend_variant(sandybridge AVX)
|
||||||
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
|
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
|
||||||
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
// #cgo CFLAGS: -O3 -Wno-implicit-function-declaration
|
||||||
// #cgo CXXFLAGS: -std=c++17
|
// #cgo CXXFLAGS: -std=c++17
|
||||||
// #cgo CPPFLAGS: -I${SRCDIR}/amx -I${SRCDIR}/llamafile -I${SRCDIR}/.. -I${SRCDIR}/../../include
|
// #cgo CPPFLAGS: -I${SRCDIR}/amx -I${SRCDIR}/llamafile -I${SRCDIR}/.. -I${SRCDIR}/../../include
|
||||||
// #cgo CPPFLAGS: -DGGML_USE_LLAMAFILE
|
// #cgo CPPFLAGS: -DGGML_USE_LLAMAFILE
|
||||||
|
|||||||
@@ -1,77 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
from glob import glob
|
|
||||||
import os
|
|
||||||
|
|
||||||
TYPES_KV = ["GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0", "GGML_TYPE_F16"]
|
|
||||||
|
|
||||||
SOURCE_FATTN_VEC = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
|
||||||
|
|
||||||
#include "../fattn-vec-f{vkq_size}.cuh"
|
|
||||||
|
|
||||||
DECL_FATTN_VEC_F{vkq_size}_CASE({head_size}, {type_k}, {type_v});
|
|
||||||
"""
|
|
||||||
|
|
||||||
SOURCE_FATTN_WMMA_START = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
|
||||||
|
|
||||||
#include "../fattn-wmma-f16.cuh"
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
SOURCE_FATTN_WMMA_CASE = "DECL_FATTN_WMMA_F16_CASE({head_size}, {cols_per_block}, {kq_acc_t});\n"
|
|
||||||
|
|
||||||
TYPES_MMQ = [
|
|
||||||
"GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0",
|
|
||||||
"GGML_TYPE_Q2_K", "GGML_TYPE_Q3_K", "GGML_TYPE_Q4_K", "GGML_TYPE_Q5_K", "GGML_TYPE_Q6_K",
|
|
||||||
"GGML_TYPE_IQ2_XXS", "GGML_TYPE_IQ2_XS", "GGML_TYPE_IQ2_S", "GGML_TYPE_IQ3_XXS", "GGML_TYPE_IQ3_S",
|
|
||||||
"GGML_TYPE_IQ1_S", "GGML_TYPE_IQ4_NL", "GGML_TYPE_IQ4_XS"
|
|
||||||
]
|
|
||||||
|
|
||||||
SOURCE_MMQ = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
|
||||||
|
|
||||||
#include "../mmq.cuh"
|
|
||||||
|
|
||||||
DECL_MMQ_CASE({type});
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def get_short_name(long_quant_name):
|
|
||||||
return long_quant_name.replace("GGML_TYPE_", "").lower()
|
|
||||||
|
|
||||||
|
|
||||||
def get_head_sizes(type_k, type_v):
|
|
||||||
if type_k == "GGML_TYPE_F16" and type_v == "GGML_TYPE_F16":
|
|
||||||
return [64, 128, 256]
|
|
||||||
if type_k == "GGML_TYPE_F16":
|
|
||||||
return [64, 128]
|
|
||||||
return [128]
|
|
||||||
|
|
||||||
|
|
||||||
for filename in glob("*.cu"):
|
|
||||||
os.remove(filename)
|
|
||||||
|
|
||||||
for vkq_size in [16, 32]:
|
|
||||||
for type_k in TYPES_KV:
|
|
||||||
for type_v in TYPES_KV:
|
|
||||||
for head_size in get_head_sizes(type_k, type_v):
|
|
||||||
with open(f"fattn-vec-f{vkq_size}-instance-hs{head_size}-{get_short_name(type_k)}-{get_short_name(type_v)}.cu", "w") as f:
|
|
||||||
f.write(SOURCE_FATTN_VEC.format(vkq_size=vkq_size, head_size=head_size, type_k=type_k, type_v=type_v))
|
|
||||||
|
|
||||||
for kq_acc_t in ["half", "float"]:
|
|
||||||
for cols_per_block in [8, 16, 32]:
|
|
||||||
if kq_acc_t == "float" and cols_per_block == 8:
|
|
||||||
continue
|
|
||||||
|
|
||||||
with open(f"fattn-wmma-f16-instance-kq{kq_acc_t}-cpb{cols_per_block}.cu", "w") as f:
|
|
||||||
f.write(SOURCE_FATTN_WMMA_START)
|
|
||||||
|
|
||||||
for head_size in [64, 80, 96, 112, 128, 256]:
|
|
||||||
if cols_per_block == 8 and head_size % 32 != 0: # wmma fragment is 8x32
|
|
||||||
continue
|
|
||||||
if kq_acc_t == "float" and cols_per_block == 32 and head_size == 256: # register spilling, bad performance
|
|
||||||
continue
|
|
||||||
f.write(SOURCE_FATTN_WMMA_CASE.format(kq_acc_t=kq_acc_t, cols_per_block=cols_per_block, head_size=head_size))
|
|
||||||
|
|
||||||
for type in TYPES_MMQ:
|
|
||||||
with open(f"mmq-instance-{get_short_name(type)}.cu", "w") as f:
|
|
||||||
f.write(SOURCE_MMQ.format(type=type))
|
|
||||||
@@ -3,6 +3,7 @@ package ggml
|
|||||||
// #cgo CXXFLAGS: -std=c++17
|
// #cgo CXXFLAGS: -std=c++17
|
||||||
// #cgo CPPFLAGS: -DNDEBUG -DGGML_USE_CPU
|
// #cgo CPPFLAGS: -DNDEBUG -DGGML_USE_CPU
|
||||||
// #cgo CPPFLAGS: -I${SRCDIR}/../include -I${SRCDIR}/ggml-cpu
|
// #cgo CPPFLAGS: -I${SRCDIR}/../include -I${SRCDIR}/ggml-cpu
|
||||||
|
// #cgo windows LDFLAGS: -lmsvcrt -static -static-libgcc -static-libstdc++
|
||||||
// #include <stdlib.h>
|
// #include <stdlib.h>
|
||||||
// #include "ggml-backend.h"
|
// #include "ggml-backend.h"
|
||||||
// extern void sink(int level, char *text, void *user_data);
|
// extern void sink(int level, char *text, void *user_data);
|
||||||
@@ -40,28 +41,53 @@ func sink(level C.int, text *C.char, _ unsafe.Pointer) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var OnceLoad = sync.OnceFunc(func() {
|
var OnceLoad = sync.OnceFunc(func() {
|
||||||
var lib struct{ name, defaultValue string }
|
exe, err := os.Executable()
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("failed to get executable path", "error", err)
|
||||||
|
exe = "."
|
||||||
|
}
|
||||||
|
|
||||||
|
// PATH, LD_LIBRARY_PATH, and DYLD_LIBRARY_PATH are often
|
||||||
|
// set by the parent process, however, use a default value
|
||||||
|
// if the environment variable is not set.
|
||||||
|
var name, value string
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
case "darwin", "linux":
|
case "darwin":
|
||||||
lib.name = "LD_LIBRARY_PATH"
|
// On macOS, DYLD_LIBRARY_PATH is often not set, so
|
||||||
lib.defaultValue = "/usr/local/lib:/usr/lib"
|
// we use the directory of the executable as the default.
|
||||||
|
name = "DYLD_LIBRARY_PATH"
|
||||||
|
value = filepath.Dir(exe)
|
||||||
case "windows":
|
case "windows":
|
||||||
lib.name = "PATH"
|
name = "PATH"
|
||||||
lib.defaultValue = "."
|
value = filepath.Join(filepath.Dir(exe), "lib", "ollama")
|
||||||
default:
|
default:
|
||||||
return
|
name = "LD_LIBRARY_PATH"
|
||||||
|
value = filepath.Join(filepath.Dir(exe), "..", "lib", "ollama")
|
||||||
}
|
}
|
||||||
|
|
||||||
paths, ok := os.LookupEnv(lib.name)
|
paths, ok := os.LookupEnv(name)
|
||||||
if !ok {
|
if !ok {
|
||||||
paths = lib.defaultValue
|
paths = value
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, path := range filepath.SplitList(paths) {
|
split := filepath.SplitList(paths)
|
||||||
func() {
|
visited := make(map[string]struct{}, len(split))
|
||||||
cpath := C.CString(path)
|
for _, path := range split {
|
||||||
defer C.free(unsafe.Pointer(cpath))
|
abspath, err := filepath.Abs(path)
|
||||||
C.ggml_backend_load_all_from_path(cpath)
|
if err != nil {
|
||||||
}()
|
slog.Error("failed to get absolute path", "error", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, ok := visited[abspath]; !ok {
|
||||||
|
func() {
|
||||||
|
slog.Debug("ggml backend load all from path", "path", abspath)
|
||||||
|
cpath := C.CString(abspath)
|
||||||
|
defer C.free(unsafe.Pointer(cpath))
|
||||||
|
C.ggml_backend_load_all_from_path(cpath)
|
||||||
|
}()
|
||||||
|
|
||||||
|
visited[abspath] = struct{}{}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -1,11 +0,0 @@
|
|||||||
package nn
|
|
||||||
|
|
||||||
import "github.com/ollama/ollama/ml"
|
|
||||||
|
|
||||||
type Conv2D struct {
|
|
||||||
Weight ml.Tensor `gguf:"weight"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Conv2D) Forward(ctx ml.Context, t ml.Tensor, s0, s1, p0, p1, d0, d1 int) ml.Tensor {
|
|
||||||
return m.Weight.Conv2D(ctx, t, s0, s1, p0, p1, d0, d1)
|
|
||||||
}
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
package nn
|
|
||||||
|
|
||||||
import "github.com/ollama/ollama/ml"
|
|
||||||
|
|
||||||
type Embedding struct {
|
|
||||||
Weight ml.Tensor `gguf:"weight"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Embedding) Forward(ctx ml.Context, hiddenState ml.Tensor) ml.Tensor {
|
|
||||||
return m.Weight.Rows(ctx, hiddenState)
|
|
||||||
}
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
package nn
|
|
||||||
|
|
||||||
import "github.com/ollama/ollama/ml"
|
|
||||||
|
|
||||||
type Linear struct {
|
|
||||||
Weight ml.Tensor `gguf:"weight"`
|
|
||||||
Bias ml.Tensor `gguf:"bias"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Linear) Forward(ctx ml.Context, t ml.Tensor) ml.Tensor {
|
|
||||||
t = m.Weight.Mulmat(ctx, t)
|
|
||||||
if m.Bias != nil {
|
|
||||||
t = t.Add(ctx, m.Bias)
|
|
||||||
}
|
|
||||||
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
package nn
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/ollama/ollama/ml"
|
|
||||||
)
|
|
||||||
|
|
||||||
type LayerNorm struct {
|
|
||||||
Weight ml.Tensor `gguf:"weight"`
|
|
||||||
Bias ml.Tensor `gguf:"bias"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *LayerNorm) Forward(ctx ml.Context, t ml.Tensor, eps float32) ml.Tensor {
|
|
||||||
return t.LayerNorm(ctx, m.Weight, m.Bias, eps)
|
|
||||||
}
|
|
||||||
|
|
||||||
type RMSNorm struct {
|
|
||||||
Weight ml.Tensor `gguf:"weight"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *RMSNorm) Forward(ctx ml.Context, t ml.Tensor, eps float32) ml.Tensor {
|
|
||||||
return t.RMSNorm(ctx, m.Weight, eps)
|
|
||||||
}
|
|
||||||
@@ -1,212 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"flag"
|
|
||||||
"fmt"
|
|
||||||
"image"
|
|
||||||
"io"
|
|
||||||
"log/slog"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/cache"
|
|
||||||
"github.com/ollama/ollama/ml"
|
|
||||||
"github.com/ollama/ollama/model"
|
|
||||||
_ "github.com/ollama/ollama/model/llama"
|
|
||||||
_ "github.com/ollama/ollama/model/mllama"
|
|
||||||
"github.com/ollama/ollama/sample"
|
|
||||||
)
|
|
||||||
|
|
||||||
var args struct {
|
|
||||||
n int
|
|
||||||
debug bool
|
|
||||||
image string
|
|
||||||
cache bool
|
|
||||||
}
|
|
||||||
|
|
||||||
func temp() error {
|
|
||||||
// start := time.Now()
|
|
||||||
flag.IntVar(&args.n, "n", 10, "number of samples")
|
|
||||||
flag.BoolVar(&args.debug, "debug", false, "enable debug logging")
|
|
||||||
flag.StringVar(&args.image, "image", "", "path to image file")
|
|
||||||
flag.BoolVar(&args.cache, "cache", false, "enable KV cache")
|
|
||||||
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
var prompt string
|
|
||||||
if n := len(flag.Args()); n == 1 {
|
|
||||||
bts, err := io.ReadAll(os.Stdin)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
prompt = string(bts)
|
|
||||||
} else if n > 1 {
|
|
||||||
prompt = strings.Join(flag.Args()[1:], " ")
|
|
||||||
} else {
|
|
||||||
return fmt.Errorf("usage: %s path/to/file <prompt\n", filepath.Base(os.Args[0]))
|
|
||||||
}
|
|
||||||
|
|
||||||
level := slog.LevelInfo
|
|
||||||
if args.debug {
|
|
||||||
level = slog.LevelDebug
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
|
||||||
Level: level,
|
|
||||||
AddSource: true,
|
|
||||||
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
|
|
||||||
if attr.Key == slog.SourceKey {
|
|
||||||
source := attr.Value.Any().(*slog.Source)
|
|
||||||
source.File = filepath.Base(source.File)
|
|
||||||
}
|
|
||||||
|
|
||||||
return attr
|
|
||||||
},
|
|
||||||
})))
|
|
||||||
|
|
||||||
m, err := model.New(flag.Arg(0))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
inputIDs, err := m.(model.TextProcessor).Encode(prompt)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
var opts []model.OptionsFunc
|
|
||||||
if args.cache {
|
|
||||||
opts = append(opts, model.WithCache(&cache.Simple{
|
|
||||||
Capacity: 2048,
|
|
||||||
DType: ml.DTypeF32,
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
|
|
||||||
if args.image != "" {
|
|
||||||
if err := func() error {
|
|
||||||
f, err := os.Open(args.image)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
img, _, err := image.Decode(f)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
opts = append(opts, model.WithImage(img))
|
|
||||||
return nil
|
|
||||||
}(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Schema for a list of friends with their info
|
|
||||||
// Maps to JSON like:
|
|
||||||
// {
|
|
||||||
// "name": "string",
|
|
||||||
// "age": integer,
|
|
||||||
// "is_available": boolean
|
|
||||||
// }
|
|
||||||
schema := &sample.Schema{
|
|
||||||
Name: "root",
|
|
||||||
Type: "object",
|
|
||||||
Properties: []*sample.Schema{
|
|
||||||
{Name: "name", Type: "string"},
|
|
||||||
{Name: "age", Type: "integer"},
|
|
||||||
{Name: "is_available", Type: "boolean"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// fmt.Println("schema", schema)
|
|
||||||
// schema = nil
|
|
||||||
jsonTransform, err := sample.NewJSONSampler(m.(model.TextProcessor), schema)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
transforms := []sample.Transform{
|
|
||||||
jsonTransform,
|
|
||||||
}
|
|
||||||
|
|
||||||
var offset int
|
|
||||||
var stringBuffer string
|
|
||||||
// var ttft time.Duration
|
|
||||||
var totalSamplingTime time.Duration
|
|
||||||
count := 0
|
|
||||||
for range args.n {
|
|
||||||
logits, err := model.Forward(m, append(opts, model.WithInputIDs(inputIDs), model.WithOffset(offset))...)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
samplingStart := time.Now()
|
|
||||||
sampler := sample.Greedy()
|
|
||||||
sampledIdx, err := sampler.Sample(logits.Floats(), transforms...)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
samplingTime := time.Since(samplingStart)
|
|
||||||
totalSamplingTime += samplingTime
|
|
||||||
|
|
||||||
// fmt.Println("sampling time", samplingTime)
|
|
||||||
// fmt.Printf("Sample time: %vms\n", finishTime.Sub(sampleTime).Milliseconds())
|
|
||||||
|
|
||||||
var outputIDs []int32
|
|
||||||
|
|
||||||
if !m.(model.TextProcessor).Is(uint32(sampledIdx), model.SpecialEOS) {
|
|
||||||
outputIDs = append(outputIDs, int32(sampledIdx))
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(outputIDs) == 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
s, err := m.(model.TextProcessor).Decode(outputIDs)
|
|
||||||
if errors.Is(err, io.EOF) {
|
|
||||||
break
|
|
||||||
} else if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// if ttft == 0 {
|
|
||||||
// ttft = time.Since(start)
|
|
||||||
// fmt.Printf("Time to first token: %vms\n", ttft.Milliseconds())
|
|
||||||
// }
|
|
||||||
|
|
||||||
// fmt.Printf("--- token: %q\n", s)
|
|
||||||
// fmt.Printf("--- outputIDs: %v\n", outputIDs)
|
|
||||||
stringBuffer += s
|
|
||||||
count++
|
|
||||||
fmt.Println("--- stringBuffer", stringBuffer)
|
|
||||||
|
|
||||||
outputIDs, err = jsonTransform.UpdateState(outputIDs)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// can do fun shifting stuff here if needed
|
|
||||||
inputIDs = append(inputIDs, outputIDs...)
|
|
||||||
if args.cache {
|
|
||||||
offset = len(inputIDs) - 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fmt.Println("\n------ Output: ------")
|
|
||||||
fmt.Println(stringBuffer)
|
|
||||||
fmt.Println("--------------------")
|
|
||||||
fmt.Println("sample average time", totalSamplingTime/time.Duration(count))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
if err := temp(); err != nil {
|
|
||||||
fmt.Println("err", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user