Compare commits

..

4 Commits

Author SHA1 Message Date
jmorganca
201a987ff9 some more menu options... 2024-04-28 12:40:52 -04:00
jmorganca
2d8125042a Touch ID for cli install; server restarts 2024-04-27 22:42:38 -04:00
jmorganca
776e7bb5e4 app: fix status item icons 2024-04-27 15:57:57 -04:00
jmorganca
b8d7ca1a7b Native implementation of macOS app 2024-04-27 14:20:10 -04:00
402 changed files with 7954 additions and 41110 deletions

2
.gitattributes vendored
View File

@@ -1,3 +1 @@
llm/ext_server/* linguist-vendored llm/ext_server/* linguist-vendored
* text=auto
*.go text eol=lf

View File

@@ -28,7 +28,6 @@ jobs:
security unlock-keychain -p password build.keychain security unlock-keychain -p password build.keychain
security import certificate.p12 -k build.keychain -P $MACOS_SIGNING_KEY_PASSWORD -T /usr/bin/codesign security import certificate.p12 -k build.keychain -P $MACOS_SIGNING_KEY_PASSWORD -T /usr/bin/codesign
security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k password build.keychain security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k password build.keychain
security set-keychain-settings -lut 3600 build.keychain
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version-file: go.mod go-version-file: go.mod
@@ -147,7 +146,7 @@ jobs:
run: | run: |
$ErrorActionPreference = "Stop" $ErrorActionPreference = "Stop"
write-host "downloading AMD HIP Installer" write-host "downloading AMD HIP Installer"
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
write-host "Installing AMD HIP" write-host "Installing AMD HIP"
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
write-host "Completed AMD HIP" write-host "Completed AMD HIP"
@@ -187,13 +186,6 @@ jobs:
generate-windows-cuda: generate-windows-cuda:
environment: release environment: release
runs-on: windows runs-on: windows
strategy:
matrix:
cuda:
- version: "11"
url: 'https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe'
- version: "12"
url: 'https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe'
env: env:
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }} KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
steps: steps:
@@ -227,11 +219,11 @@ jobs:
with: with:
go-version-file: go.mod go-version-file: go.mod
cache: true cache: true
- name: 'Install CUDA ${{ matrix.cuda.version }}' - name: 'Install CUDA'
run: | run: |
$ErrorActionPreference = "Stop" $ErrorActionPreference = "Stop"
write-host "downloading CUDA Installer" write-host "downloading CUDA Installer"
Invoke-WebRequest -Uri "${{ matrix.cuda.url }}" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe" Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
write-host "Installing CUDA" write-host "Installing CUDA"
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
write-host "Completed CUDA" write-host "Completed CUDA"
@@ -263,16 +255,15 @@ jobs:
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\" cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: generate-windows-cuda-${{ matrix.cuda.version }} name: generate-windows-cuda
path: | path: |
llm/build/**/bin/* llm/build/**/bin/*
dist/windows-amd64/** dist/windows-amd64/**
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: windows-cuda-deps-${{ matrix.cuda.version }} name: windows-cuda-deps
path: dist/deps/* path: dist/deps/*
# Import the prior generation steps and build the final windows assets # Import the prior generation steps and build the final windows assets
build-windows: build-windows:
environment: release environment: release
@@ -322,16 +313,10 @@ jobs:
name: generate-windows-cpu name: generate-windows-cpu
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
with: with:
name: generate-windows-cuda-11 name: generate-windows-cuda
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
with: with:
name: generate-windows-cuda-12 name: windows-cuda-deps
- uses: actions/download-artifact@v4
with:
name: windows-cuda-deps-11
- uses: actions/download-artifact@v4
with:
name: windows-cuda-deps-12
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
with: with:
name: windows-rocm-deps name: windows-rocm-deps
@@ -377,6 +362,7 @@ jobs:
- run: | - run: |
./scripts/build_linux.sh ./scripts/build_linux.sh
./scripts/build_docker.sh ./scripts/build_docker.sh
mv dist/deps/* dist/
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: dist-linux-amd64 name: dist-linux-amd64
@@ -450,7 +436,6 @@ jobs:
env: env:
OLLAMA_SKIP_IMAGE_BUILD: '1' OLLAMA_SKIP_IMAGE_BUILD: '1'
PUSH: '1' PUSH: '1'
GH_TOKEN: ${{ github.token }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set Version - name: Set Version
@@ -472,25 +457,17 @@ jobs:
merge-multiple: true merge-multiple: true
- run: | - run: |
ls -lh dist/ ls -lh dist/
(cd dist; find . -type f | xargs sha256sum > ../sha256sum.txt) (cd dist; sha256sum * > sha256sum.txt)
mv sha256sum.txt dist/
mv dist/linux-???64 .
mv dist/linux-amd64-rocm .
cat dist/sha256sum.txt cat dist/sha256sum.txt
- name: Create or update Release - uses: ncipollo/release-action@v1
run: | with:
echo "Looking for existing release for ${{ env.RELEASE_VERSION }}" name: ${{ env.RELEASE_VERSION }}
OLD_TAG=$(gh release ls --json name,tagName | jq -r ".[] | select(.name == \"${{ env.RELEASE_VERSION }}\") | .tagName") allowUpdates: true
if [ -n "$OLD_TAG" ]; then artifacts: 'dist/*'
echo "Updating release ${{ env.RELEASE_VERSION }} to point to new tag ${GITHUB_REF_NAME}" draft: true
gh release edit ${OLD_TAG} --tag ${GITHUB_REF_NAME} prerelease: true
else omitBodyDuringUpdate: true
echo "Creating new release ${{ env.RELEASE_VERSION }} pointing to tag ${GITHUB_REF_NAME}" generateReleaseNotes: true
gh release create ${GITHUB_REF_NAME} \ omitDraftDuringUpdate: true
--title ${{ env.RELEASE_VERSION }} \ omitPrereleaseDuringUpdate: true
--draft \ replacesArtifacts: true
--generate-notes \
--prerelease
fi
echo "Uploading artifacts for tag ${GITHUB_REF_NAME}"
gh release upload ${GITHUB_REF_NAME} dist/* --clobber

View File

@@ -34,13 +34,13 @@ jobs:
git diff-tree -r --no-commit-id --name-only \ git diff-tree -r --no-commit-id --name-only \
$(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \ $(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \
${{ github.event.pull_request.head.sha }} \ ${{ github.event.pull_request.head.sha }} \
| xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))" | xargs python3 -c "import sys; print(any([x.startswith('$1') for x in sys.argv[1:]]))"
} }
{ {
echo GENERATE=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**') echo GENERATE=$(changed llm/)
echo GENERATE_CUDA=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**') echo GENERATE_CUDA=$(changed llm/)
echo GENERATE_ROCM=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**') echo GENERATE_ROCM=$(changed llm/)
} >>$GITHUB_OUTPUT } >>$GITHUB_OUTPUT
generate: generate:
@@ -58,7 +58,6 @@ jobs:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
env: env:
GOARCH: ${{ matrix.arch }} GOARCH: ${{ matrix.arch }}
CGO_ENABLED: '1'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
@@ -80,7 +79,6 @@ jobs:
- run: go generate -x ./... - run: go generate -x ./...
if: ${{ ! startsWith(matrix.os, 'windows-') }} if: ${{ ! startsWith(matrix.os, 'windows-') }}
name: 'Unix Go Generate' name: 'Unix Go Generate'
- run: go build .
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: ${{ matrix.os }}-${{ matrix.arch }}-libraries name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
@@ -126,7 +124,7 @@ jobs:
strategy: strategy:
matrix: matrix:
rocm-version: rocm-version:
- '6.1.2' - '6.0.2'
runs-on: linux runs-on: linux
container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }} container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
steps: steps:
@@ -169,7 +167,7 @@ jobs:
run: | run: |
$ErrorActionPreference = "Stop" $ErrorActionPreference = "Stop"
write-host "downloading AMD HIP Installer" write-host "downloading AMD HIP Installer"
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
write-host "Installing AMD HIP" write-host "Installing AMD HIP"
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
write-host "Completed AMD HIP" write-host "Completed AMD HIP"
@@ -271,7 +269,7 @@ jobs:
mkdir -p llm/build/darwin/$ARCH/stub/bin mkdir -p llm/build/darwin/$ARCH/stub/bin
touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
if: ${{ startsWith(matrix.os, 'macos-') }} if: ${{ startsWith(matrix.os, 'macos-') }}
- uses: golangci/golangci-lint-action@v6 - uses: golangci/golangci-lint-action@v4
with: with:
args: --timeout 8m0s -v args: --timeout 8m0s -v
test: test:
@@ -289,8 +287,6 @@ jobs:
GOARCH: ${{ matrix.arch }} GOARCH: ${{ matrix.arch }}
CGO_ENABLED: '1' CGO_ENABLED: '1'
OLLAMA_CPU_TARGET: 'static' OLLAMA_CPU_TARGET: 'static'
OLLAMA_SKIP_CPU_GENERATE: '1'
OLLAMA_SKIP_METAL_GENERATE: '1'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:

1
.gitignore vendored
View File

@@ -12,4 +12,3 @@ ggml-metal.metal
test_data test_data
*.crt *.crt
llm/build llm/build
__debug_bin*

View File

@@ -7,37 +7,11 @@ linters:
- bodyclose - bodyclose
- containedctx - containedctx
- contextcheck - contextcheck
- errcheck
- exportloopref - exportloopref
- gci
- gocheckcompilerdirectives - gocheckcompilerdirectives
- gofmt # FIXME: for some reason this errors on windows
- gofumpt # - gofmt
- gosimple # - goimports
- govet
- ineffassign
- intrange
- makezero
- misspell - misspell
- nilerr - nilerr
- nolintlint
- nosprintfhostport
- staticcheck
- tenv
- unconvert
- unused - unused
- usestdlibvars
- wastedassign
- whitespace
linters-settings:
gci:
sections: [standard, default, localmodule]
severity:
default-severity: error
rules:
- linters:
- gofmt
- goimports
- intrange
- usestdlibvars
severity: info

View File

@@ -1,37 +0,0 @@
# Contributing to Ollama
Thank you for your interest in contributing to Ollama! Here are a few guidelines to help get you started.
## Set up
See the [development documentation](./docs/development.md) for instructions on how to build and run Ollama locally.
## Pull requests
### Ideal issues
* [Bugs](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Abug): issues where Ollama stops working or where it results in an unexpected error.
* [Performance](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Aperformance): issues to make Ollama faster at model inference, downloading or uploading.
* [Security](https://github.com/ollama/ollama/blob/main/SECURITY.md): issues that could lead to a security vulnerability. As mentioned in [SECURITY.md](https://github.com/ollama/ollama/blob/main/SECURITY.md), please do not disclose security vulnerabilities publicly.
### Issues that are harder to review
* New features: new features (e.g. API fields, environment variables) add surface area to Ollama and make it harder to maintain in the long run as they cannot be removed without potentially breaking users in the future.
* Refactoring: large code improvements are important, but can be harder or take longer to review and merge.
* Documentation: small updates to fill in or dorrect missing documentation is helpful, however large documentation additions can be hard to maintain over time.
### Issues that may not be accepted
* Changes that break backwards compatibility in Ollama's API (including the OpenAI-compatible API)
* Changes that add significant friction to the user experience
* Changes that create a large future maintenance burden for maintainers and contributors
### Best practices
* Commit messages: please leave both a title and a description in your commit messages. The title should be a short summary of the changes, with a leading word that explains the section of the code being changed (e.g. `api: fix parsing of prompt field`) . In the description, leave a short 2-3 sentences that explain more about the change and its impact.
* Tests: please add test coverage to changes where possible.
* Minimize dependencies: avoid adding new dependencies unless absolutely necessary.
## Need help?
If you need help with anything, feel free to reach out to us on our [Discord server](https://discord.gg/ollama).

View File

@@ -1,10 +1,8 @@
ARG GOLANG_VERSION=1.22.5 ARG GOLANG_VERSION=1.22.1
ARG CMAKE_VERSION=3.22.1 ARG CMAKE_VERSION=3.22.1
ARG CUDA_VERSION_11=11.3.1 # this CUDA_VERSION corresponds with the one specified in docs/gpu.md
ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86" ARG CUDA_VERSION=11.3.1
ARG CUDA_VERSION_12=12.4.0 ARG ROCM_VERSION=6.0.2
ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
ARG ROCM_VERSION=6.1.2
# Copy the minimal context we need to run the generate scripts # Copy the minimal context we need to run the generate scripts
FROM scratch AS llm-code FROM scratch AS llm-code
@@ -12,7 +10,7 @@ COPY .git .git
COPY .gitmodules .gitmodules COPY .gitmodules .gitmodules
COPY llm llm COPY llm llm
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_11-devel-centos7 AS cuda-11-build-amd64 FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
@@ -20,34 +18,9 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS ARG CGO_CFLAGS
ARG CUDA_V11_ARCHITECTURES RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
ENV GOARCH amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
CUDA_VARIANT="_v11" \
bash gen_linux.sh
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_12-devel-centos7 AS cuda-12-build-amd64 FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ARG CUDA_V12_ARCHITECTURES
ENV GOARCH amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
CUDA_VARIANT="_v12" \
OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
bash gen_linux.sh
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64
ARG CMAKE_VERSION ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
@@ -55,32 +28,7 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS ARG CGO_CFLAGS
ARG CUDA_V11_ARCHITECTURES RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
ENV GOARCH arm64
RUN OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
CUDA_VARIANT="_v11" \
bash gen_linux.sh
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ARG CUDA_V12_ARCHITECTURES
ENV GOARCH arm64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
CUDA_VARIANT="_v12" \
OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
bash gen_linux.sh
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64 FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
@@ -92,11 +40,15 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS ARG CGO_CFLAGS
ARG AMDGPU_TARGETS ARG AMDGPU_TARGETS
ENV GOARCH amd64 RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
RUN --mount=type=cache,target=/root/.ccache \ RUN mkdir /tmp/scratch && \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh for dep in $(zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm*/bin/deps.txt.gz) ; do \
RUN mkdir -p ../../dist/linux-amd64-rocm/lib/ollama && \ cp ${dep} /tmp/scratch/ || exit 1 ; \
(cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64-rocm/lib/ollama && tar xf - ) done && \
(cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \
mkdir -p /go/src/github.com/ollama/ollama/dist/deps/ && \
(cd /tmp/scratch/ && tar czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz . )
FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64 FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
@@ -107,40 +59,32 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
ARG OLLAMA_CUSTOM_CPU_DEFS ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS ARG CGO_CFLAGS
ENV GOARCH amd64
WORKDIR /go/src/github.com/ollama/ollama/llm/generate WORKDIR /go/src/github.com/ollama/ollama/llm/generate
FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64 FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
RUN --mount=type=cache,target=/root/.ccache \ RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
OLLAMA_CPU_TARGET="static" bash gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64 FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
RUN --mount=type=cache,target=/root/.ccache \ RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64 FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
RUN --mount=type=cache,target=/root/.ccache \ RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" bash gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64 FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
RUN --mount=type=cache,target=/root/.ccache \ RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" bash gen_linux.sh
FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64 FROM --platform=linux/arm64 centos:7 AS cpu-builder-arm64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG GOLANG_VERSION ARG GOLANG_VERSION
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
ARG OLLAMA_CUSTOM_CPU_DEFS ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS ARG CGO_CFLAGS
ENV GOARCH arm64
WORKDIR /go/src/github.com/ollama/ollama/llm/generate WORKDIR /go/src/github.com/ollama/ollama/llm/generate
FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64 FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
RUN --mount=type=cache,target=/root/.ccache \ RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
OLLAMA_CPU_TARGET="static" bash gen_linux.sh
FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64 FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
RUN --mount=type=cache,target=/root/.ccache \ RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
# Intermediate stage used for ./scripts/build_linux.sh # Intermediate stage used for ./scripts/build_linux.sh
@@ -151,16 +95,12 @@ COPY . .
COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/
ARG GOFLAGS ARG GOFLAGS
ARG CGO_CFLAGS ARG CGO_CFLAGS
RUN --mount=type=cache,target=/root/.ccache \ RUN go build -trimpath .
go build -trimpath -o dist/linux-amd64/bin/ollama .
# Intermediate stage used for ./scripts/build_linux.sh # Intermediate stage used for ./scripts/build_linux.sh
FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64 FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
@@ -169,36 +109,23 @@ ARG GOLANG_VERSION
WORKDIR /go/src/github.com/ollama/ollama WORKDIR /go/src/github.com/ollama/ollama
COPY . . COPY . .
COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
ARG GOFLAGS ARG GOFLAGS
ARG CGO_CFLAGS ARG CGO_CFLAGS
RUN --mount=type=cache,target=/root/.ccache \ RUN go build -trimpath .
go build -trimpath -o dist/linux-arm64/bin/ollama .
# Strip out ROCm dependencies to keep the primary image lean
FROM --platform=linux/amd64 ubuntu:22.04 as amd64-libs-without-rocm
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /scratch/
RUN cd /scratch/ollama/ && rm -rf rocblas libamd* libdrm* libroc* libhip* libhsa*
# Runtime stages # Runtime stages
FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64 FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64
COPY --from=amd64-libs-without-rocm /scratch/ /lib/
RUN apt-get update && apt-get install -y ca-certificates RUN apt-get update && apt-get install -y ca-certificates
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64 FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
RUN apt-get update && apt-get install -y ca-certificates RUN apt-get update && apt-get install -y ca-certificates
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/ COPY --from=build-arm64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
# Radeon images are much larger so we keep it distinct from the CPU/CUDA image # Radeon images are much larger so we keep it distinct from the CPU/CUDA image
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
RUN update-pciids RUN update-pciids
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
RUN ln -s /opt/rocm/lib /lib/ollama
EXPOSE 11434 EXPOSE 11434
ENV OLLAMA_HOST 0.0.0.0 ENV OLLAMA_HOST 0.0.0.0

126
README.md
View File

@@ -1,12 +1,12 @@
<div align="center"> <div align="center">
 <img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7"> <img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
</div> </div>
# Ollama # Ollama
[![Discord](https://dcbadge.vercel.app/api/server/ollama?style=flat&compact=true)](https://discord.gg/ollama) [![Discord](https://dcbadge.vercel.app/api/server/ollama?style=flat&compact=true)](https://discord.gg/ollama)
Get up and running with large language models. Get up and running with large language models locally.
### macOS ### macOS
@@ -35,10 +35,10 @@ The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `olla
## Quickstart ## Quickstart
To run and chat with [Llama 3.1](https://ollama.com/library/llama3.1): To run and chat with [Llama 3](https://ollama.com/library/llama3):
``` ```
ollama run llama3.1 ollama run llama3
``` ```
## Model library ## Model library
@@ -49,25 +49,20 @@ Here are some example models that can be downloaded:
| Model | Parameters | Size | Download | | Model | Parameters | Size | Download |
| ------------------ | ---------- | ----- | ------------------------------ | | ------------------ | ---------- | ----- | ------------------------------ |
| Llama 3.1 | 8B | 4.7GB | `ollama run llama3.1` | | Llama 3 | 8B | 4.7GB | `ollama run llama3` |
| Llama 3.1 | 70B | 40GB | `ollama run llama3.1:70b` | | Llama 3 | 70B | 40GB | `ollama run llama3:70b` |
| Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` | | Phi-3 | 3,8B | 2.3GB | `ollama run phi3` |
| Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
| Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
| Gemma 2 | 2B | 1.6GB | `ollama run gemma2:2b` |
| Gemma 2 | 9B | 5.5GB | `ollama run gemma2` |
| Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` |
| Mistral | 7B | 4.1GB | `ollama run mistral` | | Mistral | 7B | 4.1GB | `ollama run mistral` |
| Moondream 2 | 1.4B | 829MB | `ollama run moondream` |
| Neural Chat | 7B | 4.1GB | `ollama run neural-chat` | | Neural Chat | 7B | 4.1GB | `ollama run neural-chat` |
| Starling | 7B | 4.1GB | `ollama run starling-lm` | | Starling | 7B | 4.1GB | `ollama run starling-lm` |
| Code Llama | 7B | 3.8GB | `ollama run codellama` | | Code Llama | 7B | 3.8GB | `ollama run codellama` |
| Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` | | Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` |
| LLaVA | 7B | 4.5GB | `ollama run llava` | | LLaVA | 7B | 4.5GB | `ollama run llava` |
| Gemma | 2B | 1.4GB | `ollama run gemma:2b` |
| Gemma | 7B | 4.8GB | `ollama run gemma:7b` |
| Solar | 10.7B | 6.1GB | `ollama run solar` | | Solar | 10.7B | 6.1GB | `ollama run solar` |
> [!NOTE] > Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
> You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
## Customize a model ## Customize a model
@@ -99,16 +94,16 @@ See the [guide](docs/import.md) on importing models for more information.
### Customize a prompt ### Customize a prompt
Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.1` model: Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3` model:
``` ```
ollama pull llama3.1 ollama pull llama3
``` ```
Create a `Modelfile`: Create a `Modelfile`:
``` ```
FROM llama3.1 FROM llama3
# set the temperature to 1 [higher is more creative, lower is more coherent] # set the temperature to 1 [higher is more creative, lower is more coherent]
PARAMETER temperature 1 PARAMETER temperature 1
@@ -143,7 +138,7 @@ ollama create mymodel -f ./Modelfile
### Pull a model ### Pull a model
``` ```
ollama pull llama3.1 ollama pull llama3
``` ```
> This command can also be used to update a local model. Only the diff will be pulled. > This command can also be used to update a local model. Only the diff will be pulled.
@@ -151,13 +146,13 @@ ollama pull llama3.1
### Remove a model ### Remove a model
``` ```
ollama rm llama3.1 ollama rm llama3
``` ```
### Copy a model ### Copy a model
``` ```
ollama cp llama3.1 my-model ollama cp llama3 my-model
``` ```
### Multiline input ### Multiline input
@@ -174,23 +169,17 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol
### Multimodal models ### Multimodal models
``` ```
ollama run llava "What's in this image? /Users/jmorgan/Desktop/smile.png" >>> What's in this image? /Users/jmorgan/Desktop/smile.png
The image features a yellow smiley face, which is likely the central focus of the picture. The image features a yellow smiley face, which is likely the central focus of the picture.
``` ```
### Pass the prompt as an argument ### Pass in prompt as arguments
``` ```
$ ollama run llama3.1 "Summarize this file: $(cat README.md)" $ ollama run llama3 "Summarize this file: $(cat README.md)"
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications. Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
``` ```
### Show model information
```
ollama show llama3.1
```
### List models on your computer ### List models on your computer
``` ```
@@ -203,7 +192,25 @@ ollama list
## Building ## Building
See the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md) Install `cmake` and `go`:
```
brew install cmake go
```
Then generate dependencies:
```
go generate ./...
```
Then build the binary:
```
go build .
```
More detailed instructions can be found in the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
### Running local builds ### Running local builds
@@ -216,7 +223,7 @@ Next, start the server:
Finally, in a separate shell, run a model: Finally, in a separate shell, run a model:
``` ```
./ollama run llama3.1 ./ollama run llama3
``` ```
## REST API ## REST API
@@ -227,7 +234,7 @@ Ollama has a REST API for running and managing models.
``` ```
curl http://localhost:11434/api/generate -d '{ curl http://localhost:11434/api/generate -d '{
"model": "llama3.1", "model": "llama3",
"prompt":"Why is the sky blue?" "prompt":"Why is the sky blue?"
}' }'
``` ```
@@ -236,7 +243,7 @@ curl http://localhost:11434/api/generate -d '{
``` ```
curl http://localhost:11434/api/chat -d '{ curl http://localhost:11434/api/chat -d '{
"model": "llama3.1", "model": "llama3",
"messages": [ "messages": [
{ "role": "user", "content": "why is the sky blue?" } { "role": "user", "content": "why is the sky blue?" }
] ]
@@ -251,7 +258,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Open WebUI](https://github.com/open-webui/open-webui) - [Open WebUI](https://github.com/open-webui/open-webui)
- [Enchanted (macOS native)](https://github.com/AugustDev/enchanted) - [Enchanted (macOS native)](https://github.com/AugustDev/enchanted)
- [Hollama](https://github.com/fmaclen/hollama)
- [Lollms-Webui](https://github.com/ParisNeo/lollms-webui) - [Lollms-Webui](https://github.com/ParisNeo/lollms-webui)
- [LibreChat](https://github.com/danny-avila/LibreChat) - [LibreChat](https://github.com/danny-avila/LibreChat)
- [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt) - [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt)
@@ -278,30 +284,17 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [OllamaGUI](https://github.com/enoch1118/ollamaGUI) - [OllamaGUI](https://github.com/enoch1118/ollamaGUI)
- [OpenAOE](https://github.com/InternLM/OpenAOE) - [OpenAOE](https://github.com/InternLM/OpenAOE)
- [Odin Runes](https://github.com/leonid20000/OdinRunes) - [Odin Runes](https://github.com/leonid20000/OdinRunes)
- [LLM-X](https://github.com/mrdjohnson/llm-x) (Progressive Web App) - [LLM-X: Progressive Web App](https://github.com/mrdjohnson/llm-x)
- [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm) - [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm)
- [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat) - [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
- [Ollama-chats RPG](https://github.com/drazdra/ollama-chats) - [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
- [QA-Pilot](https://github.com/reid41/QA-Pilot) (Chat with Code Repository) - [QA-Pilot: Chat with Code Repository](https://github.com/reid41/QA-Pilot)
- [ChatOllama](https://github.com/sugarforever/chat-ollama) (Open Source Chatbot based on Ollama with Knowledge Bases) - [ChatOllama: Open Source Chatbot based on Ollama with Knowledge Bases](https://github.com/sugarforever/chat-ollama)
- [CRAG Ollama Chat](https://github.com/Nagi-ovo/CRAG-Ollama-Chat) (Simple Web Search with Corrective RAG) - [CRAG Ollama Chat: Simple Web Search with Corrective RAG](https://github.com/Nagi-ovo/CRAG-Ollama-Chat)
- [RAGFlow](https://github.com/infiniflow/ragflow) (Open-source Retrieval-Augmented Generation engine based on deep document understanding) - [RAGFlow: Open-source Retrieval-Augmented Generation engine based on deep document understanding](https://github.com/infiniflow/ragflow)
- [StreamDeploy](https://github.com/StreamDeploy-DevRel/streamdeploy-llm-app-scaffold) (LLM Application Scaffold) - [chat: chat web app for teams](https://github.com/swuecho/chat)
- [chat](https://github.com/swuecho/chat) (chat web app for teams)
- [Lobe Chat](https://github.com/lobehub/lobe-chat) with [Integrating Doc](https://lobehub.com/docs/self-hosting/examples/ollama) - [Lobe Chat](https://github.com/lobehub/lobe-chat) with [Integrating Doc](https://lobehub.com/docs/self-hosting/examples/ollama)
- [Ollama RAG Chatbot](https://github.com/datvodinh/rag-chatbot.git) (Local Chat with multiple PDFs using Ollama and RAG) - [Ollama RAG Chatbot: Local Chat with multiples PDFs using Ollama and RAG.](https://github.com/datvodinh/rag-chatbot.git)
- [BrainSoup](https://www.nurgo-software.com/products/brainsoup) (Flexible native client with RAG & multi-agent automation)
- [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends)
- [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
- [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
- [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
- [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
- [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
- [AI Studio](https://github.com/MindWorkAI/AI-Studio)
- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
- [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
- [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)
### Terminal ### Terminal
@@ -324,8 +317,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [ShellOracle](https://github.com/djcopley/ShellOracle) - [ShellOracle](https://github.com/djcopley/ShellOracle)
- [tlm](https://github.com/yusufcanb/tlm) - [tlm](https://github.com/yusufcanb/tlm)
- [podman-ollama](https://github.com/ericcurtin/podman-ollama) - [podman-ollama](https://github.com/ericcurtin/podman-ollama)
- [gollama](https://github.com/sammcj/gollama)
- [Ollama eBook Summary](https://github.com/cognitivetech/ollama-ebook-summary/)
### Database ### Database
@@ -336,21 +327,17 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/) - [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
- [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama) - [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
- [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
### Libraries ### Libraries
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa) - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
- [Firebase Genkit](https://firebase.google.com/docs/genkit/plugins/ollama)
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example) - [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java) - [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html) - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
- [LiteLLM](https://github.com/BerriAI/litellm) - [LiteLLM](https://github.com/BerriAI/litellm)
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp) - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
- [Ollama for Ruby](https://github.com/gbaptista/ollama-ai) - [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
- [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs) - [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
- [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
- [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j) - [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama) - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit) - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
@@ -361,13 +348,9 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Haystack](https://github.com/deepset-ai/haystack-integrations/blob/main/integrations/ollama.md) - [Haystack](https://github.com/deepset-ai/haystack-integrations/blob/main/integrations/ollama.md)
- [Elixir LangChain](https://github.com/brainlid/langchain) - [Elixir LangChain](https://github.com/brainlid/langchain)
- [Ollama for R - rollama](https://github.com/JBGruber/rollama) - [Ollama for R - rollama](https://github.com/JBGruber/rollama)
- [Ollama for R - ollama-r](https://github.com/hauselin/ollama-r)
- [Ollama-ex for Elixir](https://github.com/lebrunel/ollama-ex) - [Ollama-ex for Elixir](https://github.com/lebrunel/ollama-ex)
- [Ollama Connector for SAP ABAP](https://github.com/b-tocs/abap_btocs_ollama) - [Ollama Connector for SAP ABAP](https://github.com/b-tocs/abap_btocs_ollama)
- [Testcontainers](https://testcontainers.com/modules/ollama/) - [Testcontainers](https://testcontainers.com/modules/ollama/)
- [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama)
- [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) with an [example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama)
- [LlamaScript](https://github.com/Project-Llama/llamascript)
### Mobile ### Mobile
@@ -387,23 +370,18 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram) - [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram)
- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation) - [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama) - [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
- [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot) - [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)
- [Cliobot](https://github.com/herval/cliobot) (Telegram bot with Ollama support) - [Cliobot](https://github.com/herval/cliobot) (Telegram bot with Ollama support)
- [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot) - [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot)
- [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt) - [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
- [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama) - [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
- [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot)
- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama) - [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and Hugging Face) - [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension) - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend) - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support) - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation) - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
- [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)
### Supported backends
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
### Supported backends
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.

View File

@@ -1,25 +0,0 @@
# Security
The Ollama maintainer team takes security seriously and will actively work to resolve security issues.
## Reporting a vulnerability
If you discover a security vulnerability, please do not open a public issue. Instead, please report it by emailing hello@ollama.com. We ask that you give us sufficient time to investigate and address the vulnerability before disclosing it publicly.
Please include the following details in your report:
- A description of the vulnerability
- Steps to reproduce the issue
- Your assessment of the potential impact
- Any possible mitigations
## Security best practices
While the maintainer team does their best to secure Ollama, users are encouraged to implement their own security best practices, such as:
- Regularly updating to the latest version of Ollama
- Securing access to hosted instances of Ollama
- Monitoring systems for unusual activity
## Contact
For any other questions or concerns related to security, please contact us at hello@ollama.com

View File

@@ -1,16 +1,9 @@
// Package api implements the client-side API for code wishing to interact // Package api implements the client-side API for code wishing to interact
// with the ollama service. The methods of the [Client] type correspond to // with the ollama service. The methods of the [Client] type correspond to
// the ollama REST API as described in [the API documentation]. // the ollama REST API as described in https://github.com/ollama/ollama/blob/main/docs/api.md
//
// The ollama command-line client itself uses this package to interact with // The ollama command-line client itself uses this package to interact with
// the backend service. // the backend service.
//
// # Examples
//
// Several examples of using this package are available [in the GitHub
// repository].
//
// [the API documentation]: https://github.com/ollama/ollama/blob/main/docs/api.md
// [in the GitHub repository]: https://github.com/ollama/ollama/tree/main/examples
package api package api
import ( import (
@@ -18,14 +11,15 @@ import (
"bytes" "bytes"
"context" "context"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io" "io"
"net"
"net/http" "net/http"
"net/url" "net/url"
"os"
"runtime" "runtime"
"strings"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
"github.com/ollama/ollama/version" "github.com/ollama/ollama/version"
) )
@@ -63,8 +57,36 @@ func checkError(resp *http.Response, body []byte) error {
// If the variable is not specified, a default ollama host and port will be // If the variable is not specified, a default ollama host and port will be
// used. // used.
func ClientFromEnvironment() (*Client, error) { func ClientFromEnvironment() (*Client, error) {
defaultPort := "11434"
scheme, hostport, ok := strings.Cut(os.Getenv("OLLAMA_HOST"), "://")
switch {
case !ok:
scheme, hostport = "http", os.Getenv("OLLAMA_HOST")
case scheme == "http":
defaultPort = "80"
case scheme == "https":
defaultPort = "443"
}
// trim trailing slashes
hostport = strings.TrimRight(hostport, "/")
host, port, err := net.SplitHostPort(hostport)
if err != nil {
host, port = "127.0.0.1", defaultPort
if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
host = ip.String()
} else if hostport != "" {
host = hostport
}
}
return &Client{ return &Client{
base: envconfig.Host(), base: &url.URL{
Scheme: scheme,
Host: net.JoinHostPort(host, port),
},
http: http.DefaultClient, http: http.DefaultClient,
}, nil }, nil
} }
@@ -173,7 +195,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
} }
if errorResponse.Error != "" { if errorResponse.Error != "" {
return errors.New(errorResponse.Error) return fmt.Errorf(errorResponse.Error)
} }
if response.StatusCode >= http.StatusBadRequest { if response.StatusCode >= http.StatusBadRequest {
@@ -250,14 +272,8 @@ func (c *Client) Pull(ctx context.Context, req *PullRequest, fn PullProgressFunc
}) })
} }
// PushProgressFunc is a function that [Client.Push] invokes when progress is
// made.
// It's similar to other progress function types like [PullProgressFunc].
type PushProgressFunc func(ProgressResponse) error type PushProgressFunc func(ProgressResponse) error
// Push uploads a model to the model library; requires registering for ollama.ai
// and adding a public key first. fn is called each time progress is made on
// the request and can be used to display a progress bar, etc.
func (c *Client) Push(ctx context.Context, req *PushRequest, fn PushProgressFunc) error { func (c *Client) Push(ctx context.Context, req *PushRequest, fn PushProgressFunc) error {
return c.stream(ctx, http.MethodPost, "/api/push", req, func(bts []byte) error { return c.stream(ctx, http.MethodPost, "/api/push", req, func(bts []byte) error {
var resp ProgressResponse var resp ProgressResponse
@@ -269,15 +285,8 @@ func (c *Client) Push(ctx context.Context, req *PushRequest, fn PushProgressFunc
}) })
} }
// CreateProgressFunc is a function that [Client.Create] invokes when progress
// is made.
// It's similar to other progress function types like [PullProgressFunc].
type CreateProgressFunc func(ProgressResponse) error type CreateProgressFunc func(ProgressResponse) error
// Create creates a model from a [Modelfile]. fn is a progress function that
// behaves similarly to other methods (see [Client.Pull]).
//
// [Modelfile]: https://github.com/ollama/ollama/blob/main/docs/modelfile.md
func (c *Client) Create(ctx context.Context, req *CreateRequest, fn CreateProgressFunc) error { func (c *Client) Create(ctx context.Context, req *CreateRequest, fn CreateProgressFunc) error {
return c.stream(ctx, http.MethodPost, "/api/create", req, func(bts []byte) error { return c.stream(ctx, http.MethodPost, "/api/create", req, func(bts []byte) error {
var resp ProgressResponse var resp ProgressResponse
@@ -289,7 +298,6 @@ func (c *Client) Create(ctx context.Context, req *CreateRequest, fn CreateProgre
}) })
} }
// List lists models that are available locally.
func (c *Client) List(ctx context.Context) (*ListResponse, error) { func (c *Client) List(ctx context.Context) (*ListResponse, error) {
var lr ListResponse var lr ListResponse
if err := c.do(ctx, http.MethodGet, "/api/tags", nil, &lr); err != nil { if err := c.do(ctx, http.MethodGet, "/api/tags", nil, &lr); err != nil {
@@ -298,17 +306,6 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) {
return &lr, nil return &lr, nil
} }
// ListRunning lists running models.
func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
var lr ProcessResponse
if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
return nil, err
}
return &lr, nil
}
// Copy copies a model - creating a model with another name from an existing
// model.
func (c *Client) Copy(ctx context.Context, req *CopyRequest) error { func (c *Client) Copy(ctx context.Context, req *CopyRequest) error {
if err := c.do(ctx, http.MethodPost, "/api/copy", req, nil); err != nil { if err := c.do(ctx, http.MethodPost, "/api/copy", req, nil); err != nil {
return err return err
@@ -316,7 +313,6 @@ func (c *Client) Copy(ctx context.Context, req *CopyRequest) error {
return nil return nil
} }
// Delete deletes a model and its data.
func (c *Client) Delete(ctx context.Context, req *DeleteRequest) error { func (c *Client) Delete(ctx context.Context, req *DeleteRequest) error {
if err := c.do(ctx, http.MethodDelete, "/api/delete", req, nil); err != nil { if err := c.do(ctx, http.MethodDelete, "/api/delete", req, nil); err != nil {
return err return err
@@ -324,7 +320,6 @@ func (c *Client) Delete(ctx context.Context, req *DeleteRequest) error {
return nil return nil
} }
// Show obtains model information, including details, modelfile, license etc.
func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, error) { func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, error) {
var resp ShowResponse var resp ShowResponse
if err := c.do(ctx, http.MethodPost, "/api/show", req, &resp); err != nil { if err := c.do(ctx, http.MethodPost, "/api/show", req, &resp); err != nil {
@@ -333,25 +328,12 @@ func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, err
return &resp, nil return &resp, nil
} }
// Heartbeat checks if the server has started and is responsive; if yes, it
// returns nil, otherwise an error.
func (c *Client) Heartbeat(ctx context.Context) error { func (c *Client) Heartbeat(ctx context.Context) error {
if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil { if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {
return err return err
} }
return nil return nil
} }
// Embed generates embeddings from a model.
func (c *Client) Embed(ctx context.Context, req *EmbedRequest) (*EmbedResponse, error) {
var resp EmbedResponse
if err := c.do(ctx, http.MethodPost, "/api/embed", req, &resp); err != nil {
return nil, err
}
return &resp, nil
}
// Embeddings generates an embedding from a model.
func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error) { func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error) {
var resp EmbeddingResponse var resp EmbeddingResponse
if err := c.do(ctx, http.MethodPost, "/api/embeddings", req, &resp); err != nil { if err := c.do(ctx, http.MethodPost, "/api/embeddings", req, &resp); err != nil {
@@ -360,13 +342,10 @@ func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*Embedd
return &resp, nil return &resp, nil
} }
// CreateBlob creates a blob from a file on the server. digest is the
// expected SHA256 digest of the file, and r represents the file.
func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error { func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error {
return c.do(ctx, http.MethodPost, fmt.Sprintf("/api/blobs/%s", digest), r, nil) return c.do(ctx, http.MethodPost, fmt.Sprintf("/api/blobs/%s", digest), r, nil)
} }
// Version returns the Ollama server version as a string.
func (c *Client) Version(ctx context.Context) (string, error) { func (c *Client) Version(ctx context.Context) (string, error) {
var version struct { var version struct {
Version string `json:"version"` Version string `json:"version"`

View File

@@ -1,8 +1,6 @@
package api package api
import ( import "testing"
"testing"
)
func TestClientFromEnvironment(t *testing.T) { func TestClientFromEnvironment(t *testing.T) {
type testCase struct { type testCase struct {

View File

@@ -2,8 +2,8 @@ package api
import ( import (
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"log/slog"
"math" "math"
"os" "os"
"reflect" "reflect"
@@ -12,7 +12,6 @@ import (
"time" "time"
) )
// StatusError is an error with and HTTP status code.
type StatusError struct { type StatusError struct {
StatusCode int StatusCode int
Status string Status string
@@ -33,7 +32,6 @@ func (e StatusError) Error() string {
} }
} }
// ImageData represents the raw binary data of an image file.
type ImageData []byte type ImageData []byte
// GenerateRequest describes a request sent by [Client.Generate]. While you // GenerateRequest describes a request sent by [Client.Generate]. While you
@@ -47,9 +45,6 @@ type GenerateRequest struct {
// Prompt is the textual prompt to send to the model. // Prompt is the textual prompt to send to the model.
Prompt string `json:"prompt"` Prompt string `json:"prompt"`
// Suffix is the text that comes after the inserted text.
Suffix string `json:"suffix"`
// System overrides the model's default system message/prompt. // System overrides the model's default system message/prompt.
System string `json:"system"` System string `json:"system"`
@@ -82,112 +77,26 @@ type GenerateRequest struct {
Options map[string]interface{} `json:"options"` Options map[string]interface{} `json:"options"`
} }
// ChatRequest describes a request sent by [Client.Chat].
type ChatRequest struct { type ChatRequest struct {
// Model is the model name, as in [GenerateRequest]. Model string `json:"model"`
Model string `json:"model"` Messages []Message `json:"messages"`
Stream *bool `json:"stream,omitempty"`
// Messages is the messages of the chat - can be used to keep a chat memory. Format string `json:"format"`
Messages []Message `json:"messages"`
// Stream enable streaming of returned response; true by default.
Stream *bool `json:"stream,omitempty"`
// Format is the format to return the response in (e.g. "json").
Format string `json:"format"`
// KeepAlive controls how long the model will stay loaded into memory
// followin the request.
KeepAlive *Duration `json:"keep_alive,omitempty"` KeepAlive *Duration `json:"keep_alive,omitempty"`
// Tools is an optional list of tools the model has access to.
Tools `json:"tools,omitempty"`
// Options lists model-specific options.
Options map[string]interface{} `json:"options"` Options map[string]interface{} `json:"options"`
} }
type Tools []Tool
func (t Tools) String() string {
bts, _ := json.Marshal(t)
return string(bts)
}
func (t Tool) String() string {
bts, _ := json.Marshal(t)
return string(bts)
}
// Message is a single message in a chat sequence. The message contains the
// role ("system", "user", or "assistant"), the content and an optional list
// of images.
type Message struct { type Message struct {
Role string `json:"role"` Role string `json:"role"` // one of ["system", "user", "assistant"]
Content string `json:"content"` Content string `json:"content"`
Images []ImageData `json:"images,omitempty"` Images []ImageData `json:"images,omitempty"`
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
} }
func (m *Message) UnmarshalJSON(b []byte) error {
type Alias Message
var a Alias
if err := json.Unmarshal(b, &a); err != nil {
return err
}
*m = Message(a)
m.Role = strings.ToLower(m.Role)
return nil
}
type ToolCall struct {
Function ToolCallFunction `json:"function"`
}
type ToolCallFunction struct {
Name string `json:"name"`
Arguments ToolCallFunctionArguments `json:"arguments"`
}
type ToolCallFunctionArguments map[string]any
func (t *ToolCallFunctionArguments) String() string {
bts, _ := json.Marshal(t)
return string(bts)
}
type Tool struct {
Type string `json:"type"`
Function ToolFunction `json:"function"`
}
type ToolFunction struct {
Name string `json:"name"`
Description string `json:"description"`
Parameters struct {
Type string `json:"type"`
Required []string `json:"required"`
Properties map[string]struct {
Type string `json:"type"`
Description string `json:"description"`
Enum []string `json:"enum,omitempty"`
} `json:"properties"`
} `json:"parameters"`
}
func (t *ToolFunction) String() string {
bts, _ := json.Marshal(t)
return string(bts)
}
// ChatResponse is the response returned by [Client.Chat]. Its fields are
// similar to [GenerateResponse].
type ChatResponse struct { type ChatResponse struct {
Model string `json:"model"` Model string `json:"model"`
CreatedAt time.Time `json:"created_at"` CreatedAt time.Time `json:"created_at"`
Message Message `json:"message"` Message Message `json:"message"`
DoneReason string `json:"done_reason,omitempty"`
Done bool `json:"done"` Done bool `json:"done"`
@@ -203,8 +112,7 @@ type Metrics struct {
EvalDuration time.Duration `json:"eval_duration,omitempty"` EvalDuration time.Duration `json:"eval_duration,omitempty"`
} }
// Options specified in [GenerateRequest], if you add a new option here add it // Options specified in GenerateRequest, if you add a new option here add it to the API docs also
// to the API docs also.
type Options struct { type Options struct {
Runner Runner
@@ -214,7 +122,6 @@ type Options struct {
NumPredict int `json:"num_predict,omitempty"` NumPredict int `json:"num_predict,omitempty"`
TopK int `json:"top_k,omitempty"` TopK int `json:"top_k,omitempty"`
TopP float32 `json:"top_p,omitempty"` TopP float32 `json:"top_p,omitempty"`
MinP float32 `json:"min_p,omitempty"`
TFSZ float32 `json:"tfs_z,omitempty"` TFSZ float32 `json:"tfs_z,omitempty"`
TypicalP float32 `json:"typical_p,omitempty"` TypicalP float32 `json:"typical_p,omitempty"`
RepeatLastN int `json:"repeat_last_n,omitempty"` RepeatLastN int `json:"repeat_last_n,omitempty"`
@@ -231,84 +138,49 @@ type Options struct {
// Runner options which must be set when the model is loaded into memory // Runner options which must be set when the model is loaded into memory
type Runner struct { type Runner struct {
NumCtx int `json:"num_ctx,omitempty"` UseNUMA bool `json:"numa,omitempty"`
NumBatch int `json:"num_batch,omitempty"` NumCtx int `json:"num_ctx,omitempty"`
NumGPU int `json:"num_gpu,omitempty"` NumBatch int `json:"num_batch,omitempty"`
MainGPU int `json:"main_gpu,omitempty"` NumGQA int `json:"num_gqa,omitempty"`
LowVRAM bool `json:"low_vram,omitempty"` NumGPU int `json:"num_gpu,omitempty"`
F16KV bool `json:"f16_kv,omitempty"` MainGPU int `json:"main_gpu,omitempty"`
LogitsAll bool `json:"logits_all,omitempty"` LowVRAM bool `json:"low_vram,omitempty"`
VocabOnly bool `json:"vocab_only,omitempty"` F16KV bool `json:"f16_kv,omitempty"`
UseMMap *bool `json:"use_mmap,omitempty"` LogitsAll bool `json:"logits_all,omitempty"`
UseMLock bool `json:"use_mlock,omitempty"` VocabOnly bool `json:"vocab_only,omitempty"`
NumThread int `json:"num_thread,omitempty"` UseMMap bool `json:"use_mmap,omitempty"`
UseMLock bool `json:"use_mlock,omitempty"`
NumThread int `json:"num_thread,omitempty"`
// Unused: RopeFrequencyBase is ignored. Instead the value in the model will be used
RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
// Unused: RopeFrequencyScale is ignored. Instead the value in the model will be used
RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
} }
// EmbedRequest is the request passed to [Client.Embed].
type EmbedRequest struct {
// Model is the model name.
Model string `json:"model"`
// Input is the input to embed.
Input any `json:"input"`
// KeepAlive controls how long the model will stay loaded in memory following
// this request.
KeepAlive *Duration `json:"keep_alive,omitempty"`
Truncate *bool `json:"truncate,omitempty"`
// Options lists model-specific options.
Options map[string]interface{} `json:"options"`
}
// EmbedResponse is the response from [Client.Embed].
type EmbedResponse struct {
Model string `json:"model"`
Embeddings [][]float32 `json:"embeddings"`
TotalDuration time.Duration `json:"total_duration,omitempty"`
LoadDuration time.Duration `json:"load_duration,omitempty"`
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
}
// EmbeddingRequest is the request passed to [Client.Embeddings].
type EmbeddingRequest struct { type EmbeddingRequest struct {
// Model is the model name. Model string `json:"model"`
Model string `json:"model"` Prompt string `json:"prompt"`
// Prompt is the textual prompt to embed.
Prompt string `json:"prompt"`
// KeepAlive controls how long the model will stay loaded in memory following
// this request.
KeepAlive *Duration `json:"keep_alive,omitempty"` KeepAlive *Duration `json:"keep_alive,omitempty"`
// Options lists model-specific options.
Options map[string]interface{} `json:"options"` Options map[string]interface{} `json:"options"`
} }
// EmbeddingResponse is the response from [Client.Embeddings].
type EmbeddingResponse struct { type EmbeddingResponse struct {
Embedding []float64 `json:"embedding"` Embedding []float64 `json:"embedding"`
} }
// CreateRequest is the request passed to [Client.Create].
type CreateRequest struct { type CreateRequest struct {
Model string `json:"model"` Model string `json:"model"`
Path string `json:"path"` Path string `json:"path"`
Modelfile string `json:"modelfile"` Modelfile string `json:"modelfile"`
Stream *bool `json:"stream,omitempty"` Stream *bool `json:"stream,omitempty"`
Quantize string `json:"quantize,omitempty"` Quantization string `json:"quantization,omitempty"`
// Name is deprecated, see Model // Name is deprecated, see Model
Name string `json:"name"` Name string `json:"name"`
// Quantization is deprecated, see Quantize
Quantization string `json:"quantization,omitempty"`
} }
// DeleteRequest is the request passed to [Client.Delete].
type DeleteRequest struct { type DeleteRequest struct {
Model string `json:"model"` Model string `json:"model"`
@@ -316,14 +188,10 @@ type DeleteRequest struct {
Name string `json:"name"` Name string `json:"name"`
} }
// ShowRequest is the request passed to [Client.Show].
type ShowRequest struct { type ShowRequest struct {
Model string `json:"model"` Model string `json:"model"`
System string `json:"system"` System string `json:"system"`
// Template is deprecated
Template string `json:"template"` Template string `json:"template"`
Verbose bool `json:"verbose"`
Options map[string]interface{} `json:"options"` Options map[string]interface{} `json:"options"`
@@ -331,27 +199,21 @@ type ShowRequest struct {
Name string `json:"name"` Name string `json:"name"`
} }
// ShowResponse is the response returned from [Client.Show].
type ShowResponse struct { type ShowResponse struct {
License string `json:"license,omitempty"` License string `json:"license,omitempty"`
Modelfile string `json:"modelfile,omitempty"` Modelfile string `json:"modelfile,omitempty"`
Parameters string `json:"parameters,omitempty"` Parameters string `json:"parameters,omitempty"`
Template string `json:"template,omitempty"` Template string `json:"template,omitempty"`
System string `json:"system,omitempty"` System string `json:"system,omitempty"`
Details ModelDetails `json:"details,omitempty"` Details ModelDetails `json:"details,omitempty"`
Messages []Message `json:"messages,omitempty"` Messages []Message `json:"messages,omitempty"`
ModelInfo map[string]any `json:"model_info,omitempty"`
ProjectorInfo map[string]any `json:"projector_info,omitempty"`
ModifiedAt time.Time `json:"modified_at,omitempty"`
} }
// CopyRequest is the request passed to [Client.Copy].
type CopyRequest struct { type CopyRequest struct {
Source string `json:"source"` Source string `json:"source"`
Destination string `json:"destination"` Destination string `json:"destination"`
} }
// PullRequest is the request passed to [Client.Pull].
type PullRequest struct { type PullRequest struct {
Model string `json:"model"` Model string `json:"model"`
Insecure bool `json:"insecure,omitempty"` Insecure bool `json:"insecure,omitempty"`
@@ -363,8 +225,6 @@ type PullRequest struct {
Name string `json:"name"` Name string `json:"name"`
} }
// ProgressResponse is the response passed to progress functions like
// [PullProgressFunc] and [PushProgressFunc].
type ProgressResponse struct { type ProgressResponse struct {
Status string `json:"status"` Status string `json:"status"`
Digest string `json:"digest,omitempty"` Digest string `json:"digest,omitempty"`
@@ -372,7 +232,6 @@ type ProgressResponse struct {
Completed int64 `json:"completed,omitempty"` Completed int64 `json:"completed,omitempty"`
} }
// PushRequest is the request passed to [Client.Push].
type PushRequest struct { type PushRequest struct {
Model string `json:"model"` Model string `json:"model"`
Insecure bool `json:"insecure,omitempty"` Insecure bool `json:"insecure,omitempty"`
@@ -384,18 +243,11 @@ type PushRequest struct {
Name string `json:"name"` Name string `json:"name"`
} }
// ListResponse is the response from [Client.List].
type ListResponse struct { type ListResponse struct {
Models []ListModelResponse `json:"models"` Models []ModelResponse `json:"models"`
} }
// ProcessResponse is the response from [Client.Process]. type ModelResponse struct {
type ProcessResponse struct {
Models []ProcessModelResponse `json:"models"`
}
// ListModelResponse is a single model description in [ListResponse].
type ListModelResponse struct {
Name string `json:"name"` Name string `json:"name"`
Model string `json:"model"` Model string `json:"model"`
ModifiedAt time.Time `json:"modified_at"` ModifiedAt time.Time `json:"modified_at"`
@@ -404,53 +256,21 @@ type ListModelResponse struct {
Details ModelDetails `json:"details,omitempty"` Details ModelDetails `json:"details,omitempty"`
} }
// ProcessModelResponse is a single model description in [ProcessResponse].
type ProcessModelResponse struct {
Name string `json:"name"`
Model string `json:"model"`
Size int64 `json:"size"`
Digest string `json:"digest"`
Details ModelDetails `json:"details,omitempty"`
ExpiresAt time.Time `json:"expires_at"`
SizeVRAM int64 `json:"size_vram"`
}
type RetrieveModelResponse struct {
Id string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
OwnedBy string `json:"owned_by"`
}
type TokenResponse struct { type TokenResponse struct {
Token string `json:"token"` Token string `json:"token"`
} }
// GenerateResponse is the response passed into [GenerateResponseFunc].
type GenerateResponse struct { type GenerateResponse struct {
// Model is the model name that generated the response. Model string `json:"model"`
Model string `json:"model"`
// CreatedAt is the timestamp of the response.
CreatedAt time.Time `json:"created_at"` CreatedAt time.Time `json:"created_at"`
Response string `json:"response"`
// Response is the textual response itself. Done bool `json:"done"`
Response string `json:"response"`
// Done specifies if the response is complete.
Done bool `json:"done"`
// DoneReason is the reason the model stopped generating text.
DoneReason string `json:"done_reason,omitempty"`
// Context is an encoding of the conversation used in this response; this
// can be sent in the next request to keep a conversational memory.
Context []int `json:"context,omitempty"` Context []int `json:"context,omitempty"`
Metrics Metrics
} }
// ModelDetails provides details about a model.
type ModelDetails struct { type ModelDetails struct {
ParentModel string `json:"parent_model"` ParentModel string `json:"parent_model"`
Format string `json:"format"` Format string `json:"format"`
@@ -488,6 +308,8 @@ func (m *Metrics) Summary() {
} }
} }
var ErrInvalidOpts = errors.New("invalid options")
func (opts *Options) FromMap(m map[string]interface{}) error { func (opts *Options) FromMap(m map[string]interface{}) error {
valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
typeOpts := reflect.TypeOf(opts).Elem() // types of the fields in the options struct typeOpts := reflect.TypeOf(opts).Elem() // types of the fields in the options struct
@@ -501,87 +323,76 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
} }
} }
invalidOpts := []string{}
for key, val := range m { for key, val := range m {
opt, ok := jsonOpts[key] if opt, ok := jsonOpts[key]; ok {
if !ok { field := valueOpts.FieldByName(opt.Name)
slog.Warn("invalid option provided", "option", key) if field.IsValid() && field.CanSet() {
continue if val == nil {
} continue
}
field := valueOpts.FieldByName(opt.Name) switch field.Kind() {
if field.IsValid() && field.CanSet() { case reflect.Int:
if val == nil { switch t := val.(type) {
continue case int64:
} field.SetInt(t)
case float64:
switch field.Kind() { // when JSON unmarshals numbers, it uses float64, not int
case reflect.Int: field.SetInt(int64(t))
switch t := val.(type) { default:
case int64: return fmt.Errorf("option %q must be of type integer", key)
field.SetInt(t)
case float64:
// when JSON unmarshals numbers, it uses float64, not int
field.SetInt(int64(t))
default:
return fmt.Errorf("option %q must be of type integer", key)
}
case reflect.Bool:
val, ok := val.(bool)
if !ok {
return fmt.Errorf("option %q must be of type boolean", key)
}
field.SetBool(val)
case reflect.Float32:
// JSON unmarshals to float64
val, ok := val.(float64)
if !ok {
return fmt.Errorf("option %q must be of type float32", key)
}
field.SetFloat(val)
case reflect.String:
val, ok := val.(string)
if !ok {
return fmt.Errorf("option %q must be of type string", key)
}
field.SetString(val)
case reflect.Slice:
// JSON unmarshals to []interface{}, not []string
val, ok := val.([]interface{})
if !ok {
return fmt.Errorf("option %q must be of type array", key)
}
// convert []interface{} to []string
slice := make([]string, len(val))
for i, item := range val {
str, ok := item.(string)
if !ok {
return fmt.Errorf("option %q must be of an array of strings", key)
} }
slice[i] = str case reflect.Bool:
}
field.Set(reflect.ValueOf(slice))
case reflect.Pointer:
var b bool
if field.Type() == reflect.TypeOf(&b) {
val, ok := val.(bool) val, ok := val.(bool)
if !ok { if !ok {
return fmt.Errorf("option %q must be of type boolean", key) return fmt.Errorf("option %q must be of type boolean", key)
} }
field.Set(reflect.ValueOf(&val)) field.SetBool(val)
} else { case reflect.Float32:
return fmt.Errorf("unknown type loading config params: %v %v", field.Kind(), field.Type()) // JSON unmarshals to float64
val, ok := val.(float64)
if !ok {
return fmt.Errorf("option %q must be of type float32", key)
}
field.SetFloat(val)
case reflect.String:
val, ok := val.(string)
if !ok {
return fmt.Errorf("option %q must be of type string", key)
}
field.SetString(val)
case reflect.Slice:
// JSON unmarshals to []interface{}, not []string
val, ok := val.([]interface{})
if !ok {
return fmt.Errorf("option %q must be of type array", key)
}
// convert []interface{} to []string
slice := make([]string, len(val))
for i, item := range val {
str, ok := item.(string)
if !ok {
return fmt.Errorf("option %q must be of an array of strings", key)
}
slice[i] = str
}
field.Set(reflect.ValueOf(slice))
default:
return fmt.Errorf("unknown type loading config params: %v", field.Kind())
} }
default:
return fmt.Errorf("unknown type loading config params: %v", field.Kind())
} }
} else {
invalidOpts = append(invalidOpts, key)
} }
} }
if len(invalidOpts) > 0 {
return fmt.Errorf("%w: %v", ErrInvalidOpts, strings.Join(invalidOpts, ", "))
}
return nil return nil
} }
// DefaultOptions is the default set of options for [GenerateRequest]; these
// values are used unless the user specifies other values explicitly.
func DefaultOptions() Options { func DefaultOptions() Options {
return Options{ return Options{
// options set on request to runner // options set on request to runner
@@ -609,11 +420,13 @@ func DefaultOptions() Options {
NumCtx: 2048, NumCtx: 2048,
NumBatch: 512, NumBatch: 512,
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
NumThread: 0, // let the runtime decide NumGQA: 1,
NumThread: 0, // let the runtime decide
LowVRAM: false, LowVRAM: false,
F16KV: true, F16KV: true,
UseMLock: false, UseMLock: false,
UseMMap: nil, UseMMap: true,
UseNUMA: false,
}, },
} }
} }
@@ -622,13 +435,6 @@ type Duration struct {
time.Duration time.Duration
} }
func (d Duration) MarshalJSON() ([]byte, error) {
if d.Duration < 0 {
return []byte("-1"), nil
}
return []byte("\"" + d.Duration.String() + "\""), nil
}
func (d *Duration) UnmarshalJSON(b []byte) (err error) { func (d *Duration) UnmarshalJSON(b []byte) (err error) {
var v any var v any
if err := json.Unmarshal(b, &v); err != nil { if err := json.Unmarshal(b, &v); err != nil {
@@ -642,7 +448,7 @@ func (d *Duration) UnmarshalJSON(b []byte) (err error) {
if t < 0 { if t < 0 {
d.Duration = time.Duration(math.MaxInt64) d.Duration = time.Duration(math.MaxInt64)
} else { } else {
d.Duration = time.Duration(int(t) * int(time.Second)) d.Duration = time.Duration(t * float64(time.Second))
} }
case string: case string:
d.Duration, err = time.ParseDuration(t) d.Duration, err = time.ParseDuration(t)
@@ -652,8 +458,6 @@ func (d *Duration) UnmarshalJSON(b []byte) (err error) {
if d.Duration < 0 { if d.Duration < 0 {
d.Duration = time.Duration(math.MaxInt64) d.Duration = time.Duration(math.MaxInt64)
} }
default:
return fmt.Errorf("Unsupported type: '%s'", reflect.TypeOf(v))
} }
return nil return nil
@@ -709,17 +513,6 @@ func FormatParams(params map[string][]string) (map[string]interface{}, error) {
case reflect.Slice: case reflect.Slice:
// TODO: only string slices are supported right now // TODO: only string slices are supported right now
out[key] = vals out[key] = vals
case reflect.Pointer:
var b bool
if field.Type() == reflect.TypeOf(&b) {
boolVal, err := strconv.ParseBool(vals[0])
if err != nil {
return nil, fmt.Errorf("invalid bool value %s", vals)
}
out[key] = &boolVal
} else {
return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
}
default: default:
return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key) return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
} }

View File

@@ -2,7 +2,6 @@ package api
import ( import (
"encoding/json" "encoding/json"
"errors"
"math" "math"
"testing" "testing"
"time" "time"
@@ -22,11 +21,6 @@ func TestKeepAliveParsingFromJSON(t *testing.T) {
req: `{ "keep_alive": 42 }`, req: `{ "keep_alive": 42 }`,
exp: &Duration{42 * time.Second}, exp: &Duration{42 * time.Second},
}, },
{
name: "Positive Float",
req: `{ "keep_alive": 42.5 }`,
exp: &Duration{42 * time.Second},
},
{ {
name: "Positive Integer String", name: "Positive Integer String",
req: `{ "keep_alive": "42m" }`, req: `{ "keep_alive": "42m" }`,
@@ -37,11 +31,6 @@ func TestKeepAliveParsingFromJSON(t *testing.T) {
req: `{ "keep_alive": -1 }`, req: `{ "keep_alive": -1 }`,
exp: &Duration{math.MaxInt64}, exp: &Duration{math.MaxInt64},
}, },
{
name: "Negative Float",
req: `{ "keep_alive": -3.14 }`,
exp: &Duration{math.MaxInt64},
},
{ {
name: "Negative Integer String", name: "Negative Integer String",
req: `{ "keep_alive": "-1m" }`, req: `{ "keep_alive": "-1m" }`,
@@ -59,175 +48,3 @@ func TestKeepAliveParsingFromJSON(t *testing.T) {
}) })
} }
} }
func TestDurationMarshalUnmarshal(t *testing.T) {
tests := []struct {
name string
input time.Duration
expected time.Duration
}{
{
"negative duration",
time.Duration(-1),
time.Duration(math.MaxInt64),
},
{
"positive duration",
42 * time.Second,
42 * time.Second,
},
{
"another positive duration",
42 * time.Minute,
42 * time.Minute,
},
{
"zero duration",
time.Duration(0),
time.Duration(0),
},
{
"max duration",
time.Duration(math.MaxInt64),
time.Duration(math.MaxInt64),
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
b, err := json.Marshal(Duration{test.input})
require.NoError(t, err)
var d Duration
err = json.Unmarshal(b, &d)
require.NoError(t, err)
assert.Equal(t, test.expected, d.Duration, "input %v, marshalled %v, got %v", test.input, string(b), d.Duration)
})
}
}
func TestUseMmapParsingFromJSON(t *testing.T) {
tr := true
fa := false
tests := []struct {
name string
req string
exp *bool
}{
{
name: "Undefined",
req: `{ }`,
exp: nil,
},
{
name: "True",
req: `{ "use_mmap": true }`,
exp: &tr,
},
{
name: "False",
req: `{ "use_mmap": false }`,
exp: &fa,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
var oMap map[string]interface{}
err := json.Unmarshal([]byte(test.req), &oMap)
require.NoError(t, err)
opts := DefaultOptions()
err = opts.FromMap(oMap)
require.NoError(t, err)
assert.Equal(t, test.exp, opts.UseMMap)
})
}
}
func TestUseMmapFormatParams(t *testing.T) {
tr := true
fa := false
tests := []struct {
name string
req map[string][]string
exp *bool
err error
}{
{
name: "True",
req: map[string][]string{
"use_mmap": {"true"},
},
exp: &tr,
err: nil,
},
{
name: "False",
req: map[string][]string{
"use_mmap": {"false"},
},
exp: &fa,
err: nil,
},
{
name: "Numeric True",
req: map[string][]string{
"use_mmap": {"1"},
},
exp: &tr,
err: nil,
},
{
name: "Numeric False",
req: map[string][]string{
"use_mmap": {"0"},
},
exp: &fa,
err: nil,
},
{
name: "invalid string",
req: map[string][]string{
"use_mmap": {"foo"},
},
exp: nil,
err: errors.New("invalid bool value [foo]"),
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
resp, err := FormatParams(test.req)
require.Equal(t, test.err, err)
respVal, ok := resp["use_mmap"]
if test.exp != nil {
assert.True(t, ok, "resp: %v", resp)
assert.Equal(t, *test.exp, *respVal.(*bool))
}
})
}
}
func TestMessage_UnmarshalJSON(t *testing.T) {
tests := []struct {
input string
expected string
}{
{`{"role": "USER", "content": "Hello!"}`, "user"},
{`{"role": "System", "content": "Initialization complete."}`, "system"},
{`{"role": "assistant", "content": "How can I help you?"}`, "assistant"},
{`{"role": "TOOl", "content": "Access granted."}`, "tool"},
}
for _, test := range tests {
var msg Message
if err := json.Unmarshal([]byte(test.input), &msg); err != nil {
t.Errorf("Unexpected error: %v", err)
}
if msg.Role != test.expected {
t.Errorf("role not lowercased: got %v, expected %v", msg.Role, test.expected)
}
}
}

1
app/.gitignore vendored
View File

@@ -1 +1,2 @@
ollama.syso ollama.syso
app

7
app/AppDelegate.h Normal file
View File

@@ -0,0 +1,7 @@
#import <Cocoa/Cocoa.h>
@interface AppDelegate : NSObject <NSApplicationDelegate>
- (void)applicationDidFinishLaunching:(NSNotification *)aNotification;
@end

View File

@@ -1,10 +1,6 @@
# Ollama App # Ollama App
## Linux ## macOS
TODO
## MacOS
TODO TODO

76
app/app_darwin.go Normal file
View File

@@ -0,0 +1,76 @@
package main
// #cgo CFLAGS: -x objective-c
// #cgo LDFLAGS: -framework Cocoa -framework LocalAuthentication -framework ServiceManagement
// #include "app_darwin.h"
import "C"
import (
"context"
"fmt"
"log/slog"
"os"
"path/filepath"
"syscall"
)
func init() {
home, err := os.UserHomeDir()
if err != nil {
panic(err)
}
ServerLogFile = filepath.Join(home, ".ollama", "logs", "server.log")
}
func run() {
initLogging()
slog.Info("ollama macOS app started")
// Ask to move to applications directory
moving := C.askToMoveToApplications()
if moving {
return
}
C.killOtherInstances()
code := C.installSymlink()
if code != 0 {
slog.Error("Failed to install symlink")
}
exe, err := os.Executable()
if err != nil {
panic(err)
}
var options ServerOptions
ctx, cancel := context.WithCancel(context.Background())
var done chan int
done, err = SpawnServer(ctx, filepath.Join(filepath.Dir(exe), "..", "Resources", "ollama"), options)
if err != nil {
slog.Error(fmt.Sprintf("Failed to spawn ollama server %s", err))
done = make(chan int, 1)
done <- 1
}
// Run the native macOS app
// Note: this will block until the app is closed
C.run()
slog.Info("ollama macOS app closed")
cancel()
slog.Info("Waiting for ollama server to shutdown...")
if done != nil {
<-done
}
slog.Info("Ollama app exiting")
}
//export Quit
func Quit() {
syscall.Kill(os.Getpid(), syscall.SIGTERM)
}

13
app/app_darwin.h Normal file
View File

@@ -0,0 +1,13 @@
#import <Cocoa/Cocoa.h>
@interface AppDelegate : NSObject <NSApplicationDelegate>
- (void)applicationDidFinishLaunching:(NSNotification *)aNotification;
@end
void run();
void killOtherInstances();
bool askToMoveToApplications();
int createSymlinkWithAuthorization();
int installSymlink();
extern void Restart();
extern void Quit();

282
app/app_darwin.m Normal file
View File

@@ -0,0 +1,282 @@
#import <AppKit/AppKit.h>
#import <Cocoa/Cocoa.h>
#import <CoreServices/CoreServices.h>
#import <Security/Security.h>
#import <ServiceManagement/ServiceManagement.h>
#import "app_darwin.h"
@interface AppDelegate ()
@property (strong, nonatomic) NSStatusItem *statusItem;
@end
@implementation AppDelegate
- (void)applicationDidFinishLaunching:(NSNotification *)aNotification {
// show status menu
NSMenu *menu = [[NSMenu alloc] init];
NSMenuItem *aboutMenuItem = [[NSMenuItem alloc] initWithTitle:@"About Ollama" action:@selector(aboutOllama) keyEquivalent:@""];
[aboutMenuItem setTarget:self];
[menu addItem:aboutMenuItem];
// Settings submenu
NSMenu *settingsMenu = [[NSMenu alloc] initWithTitle:@"Settings"];
// Submenu items
NSMenuItem *chooseModelDirectoryItem = [[NSMenuItem alloc] initWithTitle:@"Choose model directory..." action:@selector(chooseModelDirectory) keyEquivalent:@""];
[chooseModelDirectoryItem setTarget:self];
[chooseModelDirectoryItem setEnabled:YES];
[settingsMenu addItem:chooseModelDirectoryItem];
NSMenuItem *exposeExternallyItem = [[NSMenuItem alloc] initWithTitle:@"Allow external connections" action:@selector(toggleExposeExternally:) keyEquivalent:@""];
[exposeExternallyItem setTarget:self];
[exposeExternallyItem setState:NSOffState]; // Set initial state to off
[exposeExternallyItem setEnabled:YES];
[settingsMenu addItem:exposeExternallyItem];
NSMenuItem *allowCrossOriginItem = [[NSMenuItem alloc] initWithTitle:@"Allow browser requests" action:@selector(toggleCrossOrigin:) keyEquivalent:@""];
[allowCrossOriginItem setTarget:self];
[allowCrossOriginItem setState:NSOffState]; // Set initial state to off
[allowCrossOriginItem setEnabled:YES];
[settingsMenu addItem:allowCrossOriginItem];
NSMenuItem *settingsMenuItem = [[NSMenuItem alloc] initWithTitle:@"Settings" action:nil keyEquivalent:@""];
[settingsMenuItem setSubmenu:settingsMenu];
[menu addItem:settingsMenuItem];
[menu addItemWithTitle:@"Quit Ollama" action:@selector(quit) keyEquivalent:@"q"];
self.statusItem = [[NSStatusBar systemStatusBar] statusItemWithLength:NSVariableStatusItemLength];
[self.statusItem addObserver:self forKeyPath:@"button.effectiveAppearance" options:NSKeyValueObservingOptionNew|NSKeyValueObservingOptionInitial context:nil];
self.statusItem.menu = menu;
[self showIcon];
}
- (void)aboutOllama {
[[NSApplication sharedApplication] orderFrontStandardAboutPanel:nil];
}
- (void)toggleCrossOrigin:(id)sender {
NSMenuItem *item = (NSMenuItem *)sender;
if ([item state] == NSOffState) {
// Do something when cross-origin requests are allowed
[item setState:NSOnState];
} else {
// Do something when cross-origin requests are disallowed
[item setState:NSOffState];
}
}
- (void)toggleExposeExternally:(id)sender {
NSMenuItem *item = (NSMenuItem *)sender;
if ([item state] == NSOffState) {
// Do something when Ollama is exposed externally
[item setState:NSOnState];
} else {
// Do something when Ollama is not exposed externally
[item setState:NSOffState];
}
}
- (void)chooseModelDirectory {
NSOpenPanel *openPanel = [NSOpenPanel openPanel];
[openPanel setCanChooseFiles:NO];
[openPanel setCanChooseDirectories:YES];
[openPanel setAllowsMultipleSelection:NO];
NSInteger result = [openPanel runModal];
if (result == NSModalResponseOK) {
NSURL *selectedDirectoryURL = [openPanel URLs].firstObject;
// Do something with the selected directory URL
}
}
-(void) showIcon {
NSAppearance* appearance = self.statusItem.button.effectiveAppearance;
NSString* appearanceName = (NSString*)(appearance.name);
NSString* iconName = [[appearanceName lowercaseString] containsString:@"dark"] ? @"iconDark" : @"icon";
NSImage* statusImage = [NSImage imageNamed:iconName];
[statusImage setTemplate:YES];
self.statusItem.button.image = statusImage;
}
-(void)observeValueForKeyPath:(NSString *)keyPath ofObject:(id)object change:(NSDictionary<NSKeyValueChangeKey,id> *)change context:(void *)context {
[self showIcon];
}
- (void)quit {
[NSApp stop:nil];
}
@end
void run() {
@autoreleasepool {
[NSApplication sharedApplication];
AppDelegate *appDelegate = [[AppDelegate alloc] init];
[NSApp setDelegate:appDelegate];
[NSApp run];
}
}
// killOtherInstances kills all other instances of the app currently
// running. This way we can ensure that only the most recently started
// instance of Ollama is running
void killOtherInstances() {
pid_t pid = getpid();
NSArray *all = [[NSWorkspace sharedWorkspace] runningApplications];
NSMutableArray *apps = [NSMutableArray array];
for (NSRunningApplication *app in all) {
if ([app.bundleIdentifier isEqualToString:[[NSBundle mainBundle] bundleIdentifier]] ||
[app.bundleIdentifier isEqualToString:@"ai.ollama.ollama"] ||
[app.bundleIdentifier isEqualToString:@"com.electron.ollama"]) {
if (app.processIdentifier != pid) {
[apps addObject:app];
}
}
}
for (NSRunningApplication *app in apps) {
kill(app.processIdentifier, SIGTERM);
}
NSDate *startTime = [NSDate date];
for (NSRunningApplication *app in apps) {
while (!app.terminated) {
if (-[startTime timeIntervalSinceNow] >= 5) {
kill(app.processIdentifier, SIGKILL);
break;
}
[[NSRunLoop currentRunLoop] runUntilDate:[NSDate dateWithTimeIntervalSinceNow:0.1]];
}
}
}
bool askToMoveToApplications() {
NSString *bundlePath = [[NSBundle mainBundle] bundlePath];
if ([bundlePath hasPrefix:@"/Applications"]) {
return false;
}
NSAlert *alert = [[NSAlert alloc] init];
[alert setMessageText:@"Move to Applications?"];
[alert setInformativeText:@"Ollama works best when run from the Applications directory."];
[alert addButtonWithTitle:@"Move to Applications"];
[alert addButtonWithTitle:@"Don't move"];
[NSApp activateIgnoringOtherApps:YES];
if ([alert runModal] != NSAlertFirstButtonReturn) {
return false;
}
// move to applications
NSString *applicationsPath = @"/Applications";
NSString *newPath = [applicationsPath stringByAppendingPathComponent:@"Ollama.app"];
NSFileManager *fileManager = [NSFileManager defaultManager];
// Check if the newPath already exists
if ([fileManager fileExistsAtPath:newPath]) {
NSError *removeError = nil;
[fileManager removeItemAtPath:newPath error:&removeError];
if (removeError) {
NSLog(@"Error removing file at %@: %@", newPath, removeError);
return false; // or handle the error
}
}
NSError *moveError = nil;
[fileManager moveItemAtPath:bundlePath toPath:newPath error:&moveError];
if (moveError) {
NSLog(@"Error moving file from %@ to %@: %@", bundlePath, newPath, moveError);
return false;
}
NSLog(@"Opening %@", newPath);
NSError *error = nil;
NSWorkspace *workspace = [NSWorkspace sharedWorkspace];
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
[workspace launchApplicationAtURL:[NSURL fileURLWithPath:newPath]
options:NSWorkspaceLaunchNewInstance | NSWorkspaceLaunchDefault
configuration:@{}
error:&error];
return true;
}
int installSymlink() {
NSString *linkPath = @"/usr/local/bin/ollama";
NSError *error = nil;
NSFileManager *fileManager = [NSFileManager defaultManager];
NSString *symlinkPath = [fileManager destinationOfSymbolicLinkAtPath:linkPath error:&error];
NSString *bundlePath = [[NSBundle mainBundle] bundlePath];
NSString *execPath = [[NSBundle mainBundle] executablePath];
NSString *resPath = [[NSBundle mainBundle] pathForResource:@"ollama" ofType:nil];
// if the symlink already exists and points to the right place, don't prompt
if ([symlinkPath isEqualToString:resPath]) {
NSLog(@"symbolic link already exists and points to the right place");
return 0;
}
NSString *authorizationPrompt = @"Ollama is trying to install its command line interface (CLI) tool.";
AuthorizationRef auth = NULL;
OSStatus createStatus = AuthorizationCreate(NULL, kAuthorizationEmptyEnvironment, kAuthorizationFlagDefaults, &auth);
if (createStatus != errAuthorizationSuccess) {
NSLog(@"Error creating authorization");
return -1;
}
NSString * bundleIdentifier = [[NSBundle mainBundle] bundleIdentifier];
NSString *rightNameString = [NSString stringWithFormat:@"%@.%@", bundleIdentifier, @"auth3"];
const char *rightName = rightNameString.UTF8String;
OSStatus getRightResult = AuthorizationRightGet(rightName, NULL);
if (getRightResult == errAuthorizationDenied) {
if (AuthorizationRightSet(auth, rightName, (__bridge CFTypeRef _Nonnull)(@(kAuthorizationRuleAuthenticateAsAdmin)), (__bridge CFStringRef _Nullable)(authorizationPrompt), NULL, NULL) != errAuthorizationSuccess) {
NSLog(@"Failed to set right");
return -1;
}
}
AuthorizationItem right = { .name = rightName, .valueLength = 0, .value = NULL, .flags = 0 };
AuthorizationRights rights = { .count = 1, .items = &right };
AuthorizationFlags flags = (AuthorizationFlags)(kAuthorizationFlagExtendRights | kAuthorizationFlagInteractionAllowed);
AuthorizationItem iconAuthorizationItem = {.name = kAuthorizationEnvironmentIcon, .valueLength = 0, .value = NULL, .flags = 0};
AuthorizationEnvironment authorizationEnvironment = {.count = 0, .items = NULL};
BOOL failedToUseSystemDomain = NO;
OSStatus copyStatus = AuthorizationCopyRights(auth, &rights, &authorizationEnvironment, flags, NULL);
if (copyStatus != errAuthorizationSuccess) {
failedToUseSystemDomain = YES;
if (copyStatus == errAuthorizationCanceled) {
NSLog(@"User cancelled authorization");
return -1;
} else {
NSLog(@"Failed copying system domain rights: %d", copyStatus);
return -1;
}
}
const char *toolPath = "/bin/ln";
const char *args[] = {"-s", "-F", [resPath UTF8String], "/usr/local/bin/ollama", NULL};
FILE *pipe = NULL;
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
OSStatus status = AuthorizationExecuteWithPrivileges(auth, toolPath, kAuthorizationFlagDefaults, (char *const *)args, &pipe);
if (status != errAuthorizationSuccess) {
NSLog(@"Failed to create symlink");
return -1;
}
AuthorizationFree(auth, kAuthorizationFlagDestroyRights);
return 0;
}

166
app/app_windows.go Normal file
View File

@@ -0,0 +1,166 @@
package main
import (
"context"
"errors"
"fmt"
"log"
"log/slog"
"os"
"os/exec"
"os/signal"
"path/filepath"
"strings"
"syscall"
"github.com/ollama/ollama/app/lifecycle"
"github.com/ollama/ollama/app/store"
"github.com/ollama/ollama/app/tray"
"github.com/ollama/ollama/app/updater"
)
func init() {
AppName += ".exe"
CLIName += ".exe"
// Logs, configs, downloads go to LOCALAPPDATA
localAppData := os.Getenv("LOCALAPPDATA")
AppDataDir = filepath.Join(localAppData, "Ollama")
AppLogFile = filepath.Join(AppDataDir, "app.log")
ServerLogFile = filepath.Join(AppDataDir, "server.log")
// Executables are stored in APPDATA
AppDir = filepath.Join(localAppData, "Programs", "Ollama")
// Make sure we have PATH set correctly for any spawned children
paths := strings.Split(os.Getenv("PATH"), ";")
// Start with whatever we find in the PATH/LD_LIBRARY_PATH
found := false
for _, path := range paths {
d, err := filepath.Abs(path)
if err != nil {
continue
}
if strings.EqualFold(AppDir, d) {
found = true
}
}
if !found {
paths = append(paths, AppDir)
pathVal := strings.Join(paths, ";")
slog.Debug("setting PATH=" + pathVal)
err := os.Setenv("PATH", pathVal)
if err != nil {
slog.Error(fmt.Sprintf("failed to update PATH: %s", err))
}
}
// Make sure our logging dir exists
_, err := os.Stat(AppDataDir)
if errors.Is(err, os.ErrNotExist) {
if err := os.MkdirAll(AppDataDir, 0o755); err != nil {
slog.Error(fmt.Sprintf("create ollama dir %s: %v", AppDataDir, err))
}
}
}
func ShowLogs() {
cmd_path := "c:\\Windows\\system32\\cmd.exe"
slog.Debug(fmt.Sprintf("viewing logs with start %s", AppDataDir))
cmd := exec.Command(cmd_path, "/c", "start", AppDataDir)
cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: false, CreationFlags: 0x08000000}
err := cmd.Start()
if err != nil {
slog.Error(fmt.Sprintf("Failed to open log dir: %s", err))
}
}
func Start() {
cmd_path := "c:\\Windows\\system32\\cmd.exe"
slog.Debug(fmt.Sprintf("viewing logs with start %s", AppDataDir))
cmd := exec.Command(cmd_path, "/c", "start", AppDataDir)
cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: false, CreationFlags: 0x08000000}
err := cmd.Start()
if err != nil {
slog.Error(fmt.Sprintf("Failed to open log dir: %s", err))
}
}
func run() {
initLogging()
slog.Info("ollama windows app started")
ctx, cancel := context.WithCancel(context.Background())
var done chan int
t, err := tray.NewTray()
if err != nil {
log.Fatalf("Failed to start: %s", err)
}
callbacks := t.GetCallbacks()
signals := make(chan os.Signal, 1)
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
go func() {
slog.Debug("starting callback loop")
for {
select {
case <-callbacks.Quit:
slog.Debug("quit called")
t.Quit()
case <-signals:
slog.Debug("shutting down due to signal")
t.Quit()
case <-callbacks.Update:
err := updater.DoUpgrade(cancel, done)
if err != nil {
slog.Warn(fmt.Sprintf("upgrade attempt failed: %s", err))
}
case <-callbacks.ShowLogs:
ShowLogs()
case <-callbacks.DoFirstUse:
err := lifecycle.GetStarted()
if err != nil {
slog.Warn(fmt.Sprintf("Failed to launch getting started shell: %s", err))
}
}
}
}()
if !store.GetFirstTimeRun() {
slog.Debug("First time run")
err = t.DisplayFirstUseNotification()
if err != nil {
slog.Debug(fmt.Sprintf("XXX failed to display first use notification %v", err))
}
store.SetFirstTimeRun(true)
} else {
slog.Debug("Not first time, skipping first run notification")
}
if isServerRunning(ctx) {
slog.Info("Detected another instance of ollama running, exiting")
os.Exit(1)
}
done, err = SpawnServer(ctx, CLIName)
if err != nil {
// TODO - should we retry in a backoff loop?
// TODO - should we pop up a warning and maybe add a menu item to view application logs?
slog.Error(fmt.Sprintf("Failed to spawn ollama server %s", err))
done = make(chan int, 1)
done <- 1
}
updater.StartBackgroundUpdaterChecker(ctx, t.UpdateAvailable)
t.Run()
cancel()
slog.Info("Waiting for ollama server to shutdown...")
if done != nil {
<-done
}
slog.Info("Ollama app exiting")
}

View File

@@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDisplayName</key>
<string>Ollama</string>
<key>CFBundleExecutable</key>
<string>Ollama</string>
<key>CFBundleIconFile</key>
<string>icon.icns</string>
<key>CFBundleIdentifier</key>
<string>com.ollama.ollama</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>Ollama</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>0.0.0</string>
<key>CFBundleVersion</key>
<string>0.0.0</string>
<key>DTCompiler</key>
<string>com.apple.compilers.llvm.clang.1_0</string>
<key>DTSDKBuild</key>
<string>22E245</string>
<key>DTSDKName</key>
<string>macosx13.3</string>
<key>DTXcode</key>
<string>1431</string>
<key>DTXcodeBuild</key>
<string>14E300c</string>
<key>LSApplicationCategoryType</key>
<string>public.app-category.developer-tools</string>
<key>LSMinimumSystemVersion</key>
<string>11.0</string>
<key>LSUIElement</key>
<true/>
</dict>
</plist>

Binary file not shown.

After

Width:  |  Height:  |  Size: 382 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 691 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 382 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 721 B

View File

@@ -0,0 +1,7 @@
package lifecycle
import "fmt"
func GetStarted() error {
return fmt.Errorf("GetStarted not implemented")
}

View File

@@ -1,9 +0,0 @@
//go:build !windows
package lifecycle
import "errors"
func GetStarted() error {
return errors.New("not implemented")
}

View File

@@ -34,6 +34,7 @@ func GetStarted() error {
Sys: &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false}, Sys: &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
} }
proc, err := os.StartProcess(args[0], args, attrs) proc, err := os.StartProcess(args[0], args, attrs)
if err != nil { if err != nil {
return fmt.Errorf("unable to start getting started shell %w", err) return fmt.Errorf("unable to start getting started shell %w", err)
} }

View File

@@ -1,92 +0,0 @@
package lifecycle
import (
"context"
"fmt"
"log"
"log/slog"
"os"
"os/signal"
"syscall"
"github.com/ollama/ollama/app/store"
"github.com/ollama/ollama/app/tray"
)
func Run() {
InitLogging()
ctx, cancel := context.WithCancel(context.Background())
var done chan int
t, err := tray.NewTray()
if err != nil {
log.Fatalf("Failed to start: %s", err)
}
callbacks := t.GetCallbacks()
signals := make(chan os.Signal, 1)
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
go func() {
slog.Debug("starting callback loop")
for {
select {
case <-callbacks.Quit:
slog.Debug("quit called")
t.Quit()
case <-signals:
slog.Debug("shutting down due to signal")
t.Quit()
case <-callbacks.Update:
err := DoUpgrade(cancel, done)
if err != nil {
slog.Warn(fmt.Sprintf("upgrade attempt failed: %s", err))
}
case <-callbacks.ShowLogs:
ShowLogs()
case <-callbacks.DoFirstUse:
err := GetStarted()
if err != nil {
slog.Warn(fmt.Sprintf("Failed to launch getting started shell: %s", err))
}
}
}
}()
// Are we first use?
if !store.GetFirstTimeRun() {
slog.Debug("First time run")
err = t.DisplayFirstUseNotification()
if err != nil {
slog.Debug(fmt.Sprintf("XXX failed to display first use notification %v", err))
}
store.SetFirstTimeRun(true)
} else {
slog.Debug("Not first time, skipping first run notification")
}
if IsServerRunning(ctx) {
slog.Info("Detected another instance of ollama running, exiting")
os.Exit(1)
} else {
done, err = SpawnServer(ctx, CLIName)
if err != nil {
// TODO - should we retry in a backoff loop?
// TODO - should we pop up a warning and maybe add a menu item to view application logs?
slog.Error(fmt.Sprintf("Failed to spawn ollama server %s", err))
done = make(chan int, 1)
done <- 1
}
}
StartBackgroundUpdaterChecker(ctx, t.UpdateAvailable)
t.Run()
cancel()
slog.Info("Waiting for ollama server to shutdown...")
if done != nil {
<-done
}
slog.Info("Ollama app exiting")
}

View File

@@ -1,80 +0,0 @@
package lifecycle
import (
"fmt"
"log/slog"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/ollama/ollama/envconfig"
)
func InitLogging() {
level := slog.LevelInfo
if envconfig.Debug() {
level = slog.LevelDebug
}
var logFile *os.File
var err error
// Detect if we're a GUI app on windows, and if not, send logs to console
if os.Stderr.Fd() != 0 {
// Console app detected
logFile = os.Stderr
// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
} else {
rotateLogs(AppLogFile)
logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
if err != nil {
slog.Error(fmt.Sprintf("failed to create server log %v", err))
return
}
}
handler := slog.NewTextHandler(logFile, &slog.HandlerOptions{
Level: level,
AddSource: true,
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
if attr.Key == slog.SourceKey {
source := attr.Value.Any().(*slog.Source)
source.File = filepath.Base(source.File)
}
return attr
},
})
slog.SetDefault(slog.New(handler))
slog.Info("ollama app started")
}
func rotateLogs(logFile string) {
if _, err := os.Stat(logFile); os.IsNotExist(err) {
return
}
index := strings.LastIndex(logFile, ".")
pre := logFile[:index]
post := "." + logFile[index+1:]
for i := LogRotationCount; i > 0; i-- {
older := pre + "-" + strconv.Itoa(i) + post
newer := pre + "-" + strconv.Itoa(i-1) + post
if i == 1 {
newer = pre + post
}
if _, err := os.Stat(newer); err == nil {
if _, err := os.Stat(older); err == nil {
err := os.Remove(older)
if err != nil {
slog.Warn("Failed to remove older log", "older", older, "error", err)
continue
}
}
err := os.Rename(newer, older)
if err != nil {
slog.Warn("Failed to rotate log", "older", older, "newer", newer, "error", err)
}
}
}
}

View File

@@ -1,9 +0,0 @@
//go:build !windows
package lifecycle
import "log/slog"
func ShowLogs() {
slog.Warn("not implemented")
}

View File

@@ -1,44 +0,0 @@
package lifecycle
import (
"os"
"path/filepath"
"strconv"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestRotateLogs(t *testing.T) {
logDir := t.TempDir()
logFile := filepath.Join(logDir, "testlog.log")
// No log exists
rotateLogs(logFile)
require.NoError(t, os.WriteFile(logFile, []byte("1"), 0o644))
assert.FileExists(t, logFile)
// First rotation
rotateLogs(logFile)
assert.FileExists(t, filepath.Join(logDir, "testlog-1.log"))
assert.NoFileExists(t, filepath.Join(logDir, "testlog-2.log"))
assert.NoFileExists(t, logFile)
// Should be a no-op without a new log
rotateLogs(logFile)
assert.FileExists(t, filepath.Join(logDir, "testlog-1.log"))
assert.NoFileExists(t, filepath.Join(logDir, "testlog-2.log"))
assert.NoFileExists(t, logFile)
for i := 2; i <= LogRotationCount+1; i++ {
require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0o644))
assert.FileExists(t, logFile)
rotateLogs(logFile)
assert.NoFileExists(t, logFile)
for j := 1; j < i; j++ {
assert.FileExists(t, filepath.Join(logDir, "testlog-"+strconv.Itoa(j)+".log"))
}
assert.NoFileExists(t, filepath.Join(logDir, "testlog-"+strconv.Itoa(i+1)+".log"))
}
}

View File

@@ -1,19 +0,0 @@
package lifecycle
import (
"fmt"
"log/slog"
"os/exec"
"syscall"
)
func ShowLogs() {
cmd_path := "c:\\Windows\\system32\\cmd.exe"
slog.Debug(fmt.Sprintf("viewing logs with start %s", AppDataDir))
cmd := exec.Command(cmd_path, "/c", "start", AppDataDir)
cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: false, CreationFlags: 0x08000000}
err := cmd.Start()
if err != nil {
slog.Error(fmt.Sprintf("Failed to open log dir: %s", err))
}
}

View File

@@ -16,12 +16,11 @@ var (
AppDir = "/opt/Ollama" AppDir = "/opt/Ollama"
AppDataDir = "/opt/Ollama" AppDataDir = "/opt/Ollama"
// TODO - should there be a distinct log dir? // TODO - should there be a distinct log dir?
UpdateStageDir = "/tmp" UpdateStageDir = "/tmp"
AppLogFile = "/tmp/ollama_app.log" AppLogFile = "/tmp/ollama_app.log"
ServerLogFile = "/tmp/ollama.log" ServerLogFile = "/tmp/ollama.log"
UpgradeLogFile = "/tmp/ollama_update.log" UpgradeLogFile = "/tmp/ollama_update.log"
Installer = "OllamaSetup.exe" Installer = "OllamaSetup.exe"
LogRotationCount = 5
) )
func init() { func init() {
@@ -70,10 +69,6 @@ func init() {
slog.Error(fmt.Sprintf("create ollama dir %s: %v", AppDataDir, err)) slog.Error(fmt.Sprintf("create ollama dir %s: %v", AppDataDir, err))
} }
} }
} else if runtime.GOOS == "darwin" {
// TODO
AppName += ".app"
// } else if runtime.GOOS == "linux" {
// TODO
} }
} }

44
app/log.go Normal file
View File

@@ -0,0 +1,44 @@
package main
import (
"fmt"
"log/slog"
"os"
"path/filepath"
)
func initLogging() {
level := slog.LevelInfo
if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
level = slog.LevelDebug
}
var logFile *os.File
var err error
// Detect if we're a GUI app on windows, and if not, send logs to console
if os.Stderr.Fd() != 0 {
// Console app detected
logFile = os.Stderr
// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
} else {
logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
if err != nil {
slog.Error(fmt.Sprintf("failed to create server log %v", err))
return
}
}
handler := slog.NewTextHandler(logFile, &slog.HandlerOptions{
Level: level,
AddSource: true,
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
if attr.Key == slog.SourceKey {
source := attr.Value.Any().(*slog.Source)
source.File = filepath.Base(source.File)
}
return attr
},
})
slog.SetDefault(slog.New(handler))
}

View File

@@ -2,11 +2,15 @@ package main
// Compile with the following to get rid of the cmd pop up on windows // Compile with the following to get rid of the cmd pop up on windows
// go build -ldflags="-H windowsgui" . // go build -ldflags="-H windowsgui" .
var (
import ( AppName string
"github.com/ollama/ollama/app/lifecycle" CLIName string
AppDir string
AppDataDir string
AppLogFile string
ServerLogFile string
) )
func main() { func main() {
lifecycle.Run() run()
} }

View File

@@ -1,4 +1,4 @@
package lifecycle package main
import ( import (
"context" "context"
@@ -14,37 +14,28 @@ import (
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
) )
func getCLIFullPath(command string) string { type ServerOptions struct {
var cmdPath string Cors bool
appExe, err := os.Executable() Expose bool
if err == nil { ModelsPath string
cmdPath = filepath.Join(filepath.Dir(appExe), command)
_, err := os.Stat(cmdPath)
if err == nil {
return cmdPath
}
}
cmdPath, err = exec.LookPath(command)
if err == nil {
_, err := os.Stat(cmdPath)
if err == nil {
return cmdPath
}
}
pwd, err := os.Getwd()
if err == nil {
cmdPath = filepath.Join(pwd, command)
_, err = os.Stat(cmdPath)
if err == nil {
return cmdPath
}
}
return command
} }
func start(ctx context.Context, command string) (*exec.Cmd, error) { func start(ctx context.Context, command string, options ServerOptions) (*exec.Cmd, error) {
cmd := getCmd(ctx, getCLIFullPath(command)) cmd := getCmd(ctx, command)
// set environment variables
if options.ModelsPath != "" {
cmd.Env = append(cmd.Env, fmt.Sprintf("OLLAMA_MODELS=%s", options.ModelsPath))
}
if options.Cors {
cmd.Env = append(cmd.Env, "OLLAMA_ORIGINS=*")
}
if options.Expose {
cmd.Env = append(cmd.Env, "OLLAMA_HOST=0.0.0.0")
}
stdout, err := cmd.StdoutPipe() stdout, err := cmd.StdoutPipe()
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to spawn server stdout pipe: %w", err) return nil, fmt.Errorf("failed to spawn server stdout pipe: %w", err)
@@ -54,24 +45,11 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
return nil, fmt.Errorf("failed to spawn server stderr pipe: %w", err) return nil, fmt.Errorf("failed to spawn server stderr pipe: %w", err)
} }
rotateLogs(ServerLogFile) // TODO - rotation
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755) logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create server log: %w", err) return nil, fmt.Errorf("failed to create server log: %w", err)
} }
logDir := filepath.Dir(ServerLogFile)
_, err = os.Stat(logDir)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("stat ollama server log dir %s: %v", logDir, err)
}
if err := os.MkdirAll(logDir, 0o755); err != nil {
return nil, fmt.Errorf("create ollama server log dir %s: %v", logDir, err)
}
}
go func() { go func() {
defer logFile.Close() defer logFile.Close()
io.Copy(logFile, stdout) //nolint:errcheck io.Copy(logFile, stdout) //nolint:errcheck
@@ -125,20 +103,25 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
return cmd, nil return cmd, nil
} }
func SpawnServer(ctx context.Context, command string) (chan int, error) { func SpawnServer(ctx context.Context, command string, options ServerOptions) (chan int, error) {
logDir := filepath.Dir(ServerLogFile)
_, err := os.Stat(logDir)
if errors.Is(err, os.ErrNotExist) {
if err := os.MkdirAll(logDir, 0o755); err != nil {
return nil, fmt.Errorf("create ollama server log dir %s: %v", logDir, err)
}
}
done := make(chan int) done := make(chan int)
go func() { go func() {
// Keep the server running unless we're shuttind down the app // Keep the server running unless we're shuttind down the app
crashCount := 0 crashCount := 0
for { for {
slog.Info("starting server...") slog.Info(fmt.Sprintf("starting server..."))
cmd, err := start(ctx, command) cmd, err := start(ctx, command, options)
if err != nil { if err != nil {
crashCount++
slog.Error(fmt.Sprintf("failed to start server %s", err)) slog.Error(fmt.Sprintf("failed to start server %s", err))
time.Sleep(500 * time.Millisecond * time.Duration(crashCount))
continue
} }
cmd.Wait() //nolint:errcheck cmd.Wait() //nolint:errcheck
@@ -164,7 +147,7 @@ func SpawnServer(ctx context.Context, command string) (chan int, error) {
return done, nil return done, nil
} }
func IsServerRunning(ctx context.Context) bool { func isServerRunning(ctx context.Context) bool {
client, err := api.ClientFromEnvironment() client, err := api.ClientFromEnvironment()
if err != nil { if err != nil {
slog.Info("unable to connect to server") slog.Info("unable to connect to server")

View File

@@ -1,6 +1,4 @@
//go:build !windows package main
package lifecycle
import ( import (
"context" "context"

View File

@@ -1,4 +1,4 @@
package lifecycle package main
import ( import (
"context" "context"
@@ -24,8 +24,7 @@ func terminate(cmd *exec.Cmd) error {
if err != nil { if err != nil {
return err return err
} }
//nolint:errcheck defer dll.Release() // nolint: errcheck
defer dll.Release()
pid := cmd.Process.Pid pid := cmd.Process.Pid
@@ -74,8 +73,7 @@ func isProcessExited(pid int) (bool, error) {
if err != nil { if err != nil {
return false, fmt.Errorf("failed to open process: %v", err) return false, fmt.Errorf("failed to open process: %v", err)
} }
//nolint:errcheck defer windows.CloseHandle(hProcess) // nolint: errcheck
defer windows.CloseHandle(hProcess)
var exitCode uint32 var exitCode uint32
err = windows.GetExitCodeProcess(hProcess, &exitCode) err = windows.GetExitCodeProcess(hProcess, &exitCode)

View File

@@ -29,6 +29,7 @@ func GetID() string {
initStore() initStore()
} }
return store.ID return store.ID
} }
func GetFirstTimeRun() bool { func GetFirstTimeRun() bool {

View File

@@ -1,13 +1,11 @@
//go:build !windows
package tray package tray
import ( import (
"errors" "fmt"
"github.com/ollama/ollama/app/tray/commontray" "github.com/ollama/ollama/app/tray/commontray"
) )
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) { func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
return nil, errors.New("not implemented") return nil, fmt.Errorf("NOT IMPLEMENTED YET")
} }

View File

@@ -11,7 +11,9 @@ import (
"golang.org/x/sys/windows" "golang.org/x/sys/windows"
) )
var quitOnce sync.Once var (
quitOnce sync.Once
)
func (t *winTray) Run() { func (t *winTray) Run() {
nativeLoop() nativeLoop()
@@ -45,6 +47,7 @@ func nativeLoop() {
default: default:
pTranslateMessage.Call(uintptr(unsafe.Pointer(m))) //nolint:errcheck pTranslateMessage.Call(uintptr(unsafe.Pointer(m))) //nolint:errcheck
pDispatchMessage.Call(uintptr(unsafe.Pointer(m))) //nolint:errcheck pDispatchMessage.Call(uintptr(unsafe.Pointer(m))) //nolint:errcheck
} }
} }
} }
@@ -157,8 +160,8 @@ func (t *winTray) wndProc(hWnd windows.Handle, message uint32, wParam, lParam ui
lResult, _, _ = pDefWindowProc.Call( lResult, _, _ = pDefWindowProc.Call(
uintptr(hWnd), uintptr(hWnd),
uintptr(message), uintptr(message),
wParam, uintptr(wParam),
lParam, uintptr(lParam),
) )
} }
return return

View File

@@ -1,71 +1,71 @@
//go:build windows //go:build windows
package wintray package wintray
import ( import (
"fmt" "fmt"
"log/slog" "log/slog"
"unsafe" "unsafe"
"golang.org/x/sys/windows" "golang.org/x/sys/windows"
) )
const ( const (
updateAvailableMenuID = 1 updatAvailableMenuID = 1
updateMenuID = updateAvailableMenuID + 1 updateMenuID = updatAvailableMenuID + 1
separatorMenuID = updateMenuID + 1 separatorMenuID = updateMenuID + 1
diagLogsMenuID = separatorMenuID + 1 diagLogsMenuID = separatorMenuID + 1
diagSeparatorMenuID = diagLogsMenuID + 1 diagSeparatorMenuID = diagLogsMenuID + 1
quitMenuID = diagSeparatorMenuID + 1 quitMenuID = diagSeparatorMenuID + 1
) )
func (t *winTray) initMenus() error { func (t *winTray) initMenus() error {
if err := t.addOrUpdateMenuItem(diagLogsMenuID, 0, diagLogsMenuTitle, false); err != nil { if err := t.addOrUpdateMenuItem(diagLogsMenuID, 0, diagLogsMenuTitle, false); err != nil {
return fmt.Errorf("unable to create menu entries %w\n", err) return fmt.Errorf("unable to create menu entries %w\n", err)
} }
if err := t.addSeparatorMenuItem(diagSeparatorMenuID, 0); err != nil { if err := t.addSeparatorMenuItem(diagSeparatorMenuID, 0); err != nil {
return fmt.Errorf("unable to create menu entries %w", err) return fmt.Errorf("unable to create menu entries %w", err)
} }
if err := t.addOrUpdateMenuItem(quitMenuID, 0, quitMenuTitle, false); err != nil { if err := t.addOrUpdateMenuItem(quitMenuID, 0, quitMenuTitle, false); err != nil {
return fmt.Errorf("unable to create menu entries %w\n", err) return fmt.Errorf("unable to create menu entries %w\n", err)
} }
return nil return nil
} }
func (t *winTray) UpdateAvailable(ver string) error { func (t *winTray) UpdateAvailable(ver string) error {
if !t.updateNotified { if !t.updateNotified {
slog.Debug("updating menu and sending notification for new update") slog.Debug("updating menu and sending notification for new update")
if err := t.addOrUpdateMenuItem(updateAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil { if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
return fmt.Errorf("unable to create menu entries %w", err) return fmt.Errorf("unable to create menu entries %w", err)
} }
if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil { if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {
return fmt.Errorf("unable to create menu entries %w", err) return fmt.Errorf("unable to create menu entries %w", err)
} }
if err := t.addSeparatorMenuItem(separatorMenuID, 0); err != nil { if err := t.addSeparatorMenuItem(separatorMenuID, 0); err != nil {
return fmt.Errorf("unable to create menu entries %w", err) return fmt.Errorf("unable to create menu entries %w", err)
} }
iconFilePath, err := iconBytesToFilePath(wt.updateIcon) iconFilePath, err := iconBytesToFilePath(wt.updateIcon)
if err != nil { if err != nil {
return fmt.Errorf("unable to write icon data to temp file: %w", err) return fmt.Errorf("unable to write icon data to temp file: %w", err)
} }
if err := wt.setIcon(iconFilePath); err != nil { if err := wt.setIcon(iconFilePath); err != nil {
return fmt.Errorf("unable to set icon: %w", err) return fmt.Errorf("unable to set icon: %w", err)
} }
t.updateNotified = true t.updateNotified = true
t.pendingUpdate = true t.pendingUpdate = true
// Now pop up the notification // Now pop up the notification
t.muNID.Lock() t.muNID.Lock()
defer t.muNID.Unlock() defer t.muNID.Unlock()
copy(t.nid.InfoTitle[:], windows.StringToUTF16(updateTitle)) copy(t.nid.InfoTitle[:], windows.StringToUTF16(updateTitle))
copy(t.nid.Info[:], windows.StringToUTF16(fmt.Sprintf(updateMessage, ver))) copy(t.nid.Info[:], windows.StringToUTF16(fmt.Sprintf(updateMessage, ver)))
t.nid.Flags |= NIF_INFO t.nid.Flags |= NIF_INFO
t.nid.Timeout = 10 t.nid.Timeout = 10
t.nid.Size = uint32(unsafe.Sizeof(*wt.nid)) t.nid.Size = uint32(unsafe.Sizeof(*wt.nid))
err = t.nid.modify() err = t.nid.modify()
if err != nil { if err != nil {
return err return err
} }
} }
return nil return nil
} }

View File

@@ -11,12 +11,10 @@ import (
"path/filepath" "path/filepath"
"sort" "sort"
"sync" "sync"
"syscall"
"unsafe" "unsafe"
"golang.org/x/sys/windows"
"github.com/ollama/ollama/app/tray/commontray" "github.com/ollama/ollama/app/tray/commontray"
"golang.org/x/sys/windows"
) )
// Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32 // Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
@@ -188,7 +186,7 @@ func (t *winTray) initInstance() error {
t.muNID.Lock() t.muNID.Lock()
defer t.muNID.Unlock() defer t.muNID.Unlock()
t.nid = &notifyIconData{ t.nid = &notifyIconData{
Wnd: t.window, Wnd: windows.Handle(t.window),
ID: 100, ID: 100,
Flags: NIF_MESSAGE, Flags: NIF_MESSAGE,
CallbackMessage: t.wmSystrayMessage, CallbackMessage: t.wmSystrayMessage,
@@ -199,6 +197,7 @@ func (t *winTray) initInstance() error {
} }
func (t *winTray) createMenu() error { func (t *winTray) createMenu() error {
menuHandle, _, err := pCreatePopupMenu.Call() menuHandle, _, err := pCreatePopupMenu.Call()
if menuHandle == 0 { if menuHandle == 0 {
return err return err
@@ -247,7 +246,7 @@ func (t *winTray) addOrUpdateMenuItem(menuItemId uint32, parentId uint32, title
mi := menuItemInfo{ mi := menuItemInfo{
Mask: MIIM_FTYPE | MIIM_STRING | MIIM_ID | MIIM_STATE, Mask: MIIM_FTYPE | MIIM_STRING | MIIM_ID | MIIM_STATE,
Type: MFT_STRING, Type: MFT_STRING,
ID: menuItemId, ID: uint32(menuItemId),
TypeData: titlePtr, TypeData: titlePtr,
Cch: uint32(len(title)), Cch: uint32(len(title)),
} }
@@ -303,10 +302,11 @@ func (t *winTray) addOrUpdateMenuItem(menuItemId uint32, parentId uint32, title
} }
func (t *winTray) addSeparatorMenuItem(menuItemId, parentId uint32) error { func (t *winTray) addSeparatorMenuItem(menuItemId, parentId uint32) error {
mi := menuItemInfo{ mi := menuItemInfo{
Mask: MIIM_FTYPE | MIIM_ID | MIIM_STATE, Mask: MIIM_FTYPE | MIIM_ID | MIIM_STATE,
Type: MFT_SEPARATOR, Type: MFT_SEPARATOR,
ID: menuItemId, ID: uint32(menuItemId),
} }
mi.Size = uint32(unsafe.Sizeof(mi)) mi.Size = uint32(unsafe.Sizeof(mi))
@@ -416,7 +416,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash) iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)
if _, err := os.Stat(iconFilePath); os.IsNotExist(err) { if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
if err := os.WriteFile(iconFilePath, iconBytes, 0o644); err != nil { if err := os.WriteFile(iconFilePath, iconBytes, 0644); err != nil {
return "", err return "", err
} }
} }
@@ -426,6 +426,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
// Loads an image from file and shows it in tray. // Loads an image from file and shows it in tray.
// Shell_NotifyIcon: https://msdn.microsoft.com/en-us/library/windows/desktop/bb762159(v=vs.85).aspx // Shell_NotifyIcon: https://msdn.microsoft.com/en-us/library/windows/desktop/bb762159(v=vs.85).aspx
func (t *winTray) setIcon(src string) error { func (t *winTray) setIcon(src string) error {
h, err := t.loadIconFrom(src) h, err := t.loadIconFrom(src)
if err != nil { if err != nil {
return err return err
@@ -434,12 +435,7 @@ func (t *winTray) setIcon(src string) error {
t.muNID.Lock() t.muNID.Lock()
defer t.muNID.Unlock() defer t.muNID.Unlock()
t.nid.Icon = h t.nid.Icon = h
t.nid.Flags |= NIF_ICON | NIF_TIP t.nid.Flags |= NIF_ICON
if toolTipUTF16, err := syscall.UTF16FromString(commontray.ToolTip); err == nil {
copy(t.nid.Tip[:], toolTipUTF16)
} else {
return err
}
t.nid.Size = uint32(unsafe.Sizeof(*t.nid)) t.nid.Size = uint32(unsafe.Sizeof(*t.nid))
return t.nid.modify() return t.nid.modify()
@@ -448,6 +444,7 @@ func (t *winTray) setIcon(src string) error {
// Loads an image from file to be shown in tray or menu item. // Loads an image from file to be shown in tray or menu item.
// LoadImage: https://msdn.microsoft.com/en-us/library/windows/desktop/ms648045(v=vs.85).aspx // LoadImage: https://msdn.microsoft.com/en-us/library/windows/desktop/ms648045(v=vs.85).aspx
func (t *winTray) loadIconFrom(src string) (windows.Handle, error) { func (t *winTray) loadIconFrom(src string) (windows.Handle, error) {
// Save and reuse handles of loaded images // Save and reuse handles of loaded images
t.muLoadedImages.RLock() t.muLoadedImages.RLock()
h, ok := t.loadedImages[src] h, ok := t.loadedImages[src]

View File

@@ -61,7 +61,6 @@ const (
MIIM_SUBMENU = 0x00000004 MIIM_SUBMENU = 0x00000004
MIM_APPLYTOSUBMENUS = 0x80000000 MIM_APPLYTOSUBMENUS = 0x80000000
NIF_ICON = 0x00000002 NIF_ICON = 0x00000002
NIF_TIP = 0x00000004
NIF_INFO = 0x00000010 NIF_INFO = 0x00000010
NIF_MESSAGE = 0x00000001 NIF_MESSAGE = 0x00000001
SW_HIDE = 0 SW_HIDE = 0

View File

@@ -1,4 +1,4 @@
package lifecycle package updater
import ( import (
"context" "context"
@@ -15,7 +15,6 @@ import (
"path" "path"
"path/filepath" "path/filepath"
"runtime" "runtime"
"strconv"
"strings" "strings"
"time" "time"
@@ -23,6 +22,10 @@ import (
"github.com/ollama/ollama/version" "github.com/ollama/ollama/version"
) )
var (
UpdateStageDir string
)
var ( var (
UpdateCheckURLBase = "https://ollama.com/api/update" UpdateCheckURLBase = "https://ollama.com/api/update"
UpdateDownloaded = false UpdateDownloaded = false
@@ -47,7 +50,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
query.Add("os", runtime.GOOS) query.Add("os", runtime.GOOS)
query.Add("arch", runtime.GOARCH) query.Add("arch", runtime.GOARCH)
query.Add("version", version.Version) query.Add("version", version.Version)
query.Add("ts", strconv.FormatInt(time.Now().Unix(), 10)) query.Add("ts", fmt.Sprintf("%d", time.Now().Unix()))
nonce, err := auth.NewNonce(rand.Reader, 16) nonce, err := auth.NewNonce(rand.Reader, 16)
if err != nil { if err != nil {
@@ -79,7 +82,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
} }
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode == http.StatusNoContent { if resp.StatusCode == 204 {
slog.Debug("check update response 204 (current version is up to date)") slog.Debug("check update response 204 (current version is up to date)")
return false, updateResp return false, updateResp
} }
@@ -88,7 +91,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
slog.Warn(fmt.Sprintf("failed to read body response: %s", err)) slog.Warn(fmt.Sprintf("failed to read body response: %s", err))
} }
if resp.StatusCode != http.StatusOK { if resp.StatusCode != 200 {
slog.Info(fmt.Sprintf("check update error %d - %.96s", resp.StatusCode, string(body))) slog.Info(fmt.Sprintf("check update error %d - %.96s", resp.StatusCode, string(body)))
return false, updateResp return false, updateResp
} }
@@ -115,7 +118,7 @@ func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
if err != nil { if err != nil {
return fmt.Errorf("error checking update: %w", err) return fmt.Errorf("error checking update: %w", err)
} }
if resp.StatusCode != http.StatusOK { if resp.StatusCode != 200 {
return fmt.Errorf("unexpected status attempting to download update %d", resp.StatusCode) return fmt.Errorf("unexpected status attempting to download update %d", resp.StatusCode)
} }
resp.Body.Close() resp.Body.Close()
@@ -124,7 +127,7 @@ func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
slog.Debug("no etag detected, falling back to filename based dedup") slog.Debug("no etag detected, falling back to filename based dedup")
etag = "_" etag = "_"
} }
filename := Installer filename := "OllamaSetup.exe"
_, params, err := mime.ParseMediaType(resp.Header.Get("content-disposition")) _, params, err := mime.ParseMediaType(resp.Header.Get("content-disposition"))
if err == nil { if err == nil {
filename = params["filename"] filename = params["filename"]

View File

@@ -1,12 +1,10 @@
//go:build !windows package updater
package lifecycle
import ( import (
"context" "context"
"errors" "fmt"
) )
func DoUpgrade(cancel context.CancelFunc, done chan int) error { func DoUpgrade(cancel context.CancelFunc, done chan int) error {
return errors.New("not implemented") return fmt.Errorf("DoUpgrade not yet implemented")
} }

View File

@@ -1,8 +1,7 @@
package lifecycle package updater
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"log/slog" "log/slog"
"os" "os"
@@ -10,13 +9,19 @@ import (
"path/filepath" "path/filepath"
) )
func init() {
UpdateStageDir = filepath.Join(os.Getenv("LOCALAPPDATA"), "Ollama", "updates")
}
func DoUpgrade(cancel context.CancelFunc, done chan int) error { func DoUpgrade(cancel context.CancelFunc, done chan int) error {
logFile := filepath.Join(os.Getenv("LOCALAPPDATA"), "Ollama", "upgrade.log")
files, err := filepath.Glob(filepath.Join(UpdateStageDir, "*", "*.exe")) // TODO generalize for multiplatform files, err := filepath.Glob(filepath.Join(UpdateStageDir, "*", "*.exe")) // TODO generalize for multiplatform
if err != nil { if err != nil {
return fmt.Errorf("failed to lookup downloads: %s", err) return fmt.Errorf("failed to lookup downloads: %s", err)
} }
if len(files) == 0 { if len(files) == 0 {
return errors.New("no update downloads found") return fmt.Errorf("no update downloads found")
} else if len(files) > 1 { } else if len(files) > 1 {
// Shouldn't happen // Shouldn't happen
slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files)) slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
@@ -24,21 +29,24 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
installerExe := files[0] installerExe := files[0]
slog.Info("starting upgrade with " + installerExe) slog.Info("starting upgrade with " + installerExe)
slog.Info("upgrade log file " + UpgradeLogFile) slog.Info("upgrade log file " + logFile)
// When running in debug mode, we'll be "verbose" and let the installer pop up and prompt // When running in debug mode, we'll be "verbose" and let the installer pop up and prompt
installArgs := []string{ installArgs := []string{
"/CLOSEAPPLICATIONS", // Quit the tray app if it's still running "/CLOSEAPPLICATIONS", // Quit the tray app if it's still running
"/LOG=" + filepath.Base(UpgradeLogFile), // Only relative seems reliable, so set pwd "/LOG=" + filepath.Base(logFile), // Only relative seems reliable, so set pwd
"/FORCECLOSEAPPLICATIONS", // Force close the tray app - might be needed "/FORCECLOSEAPPLICATIONS", // Force close the tray app - might be needed
} }
// make the upgrade as quiet as possible (no GUI, no prompts) // When we're not in debug mode, make the upgrade as quiet as possible (no GUI, no prompts)
// TODO - temporarily disable since we're pinning in debug mode for the preview
// if debug := os.Getenv("OLLAMA_DEBUG"); debug == "" {
installArgs = append(installArgs, installArgs = append(installArgs,
"/SP", // Skip the "This will install... Do you wish to continue" prompt "/SP", // Skip the "This will install... Do you wish to continue" prompt
"/SUPPRESSMSGBOXES", "/SUPPRESSMSGBOXES",
"/SILENT", "/SILENT",
"/VERYSILENT", "/VERYSILENT",
) )
// }
// Safeguard in case we have requests in flight that need to drain... // Safeguard in case we have requests in flight that need to drain...
slog.Info("Waiting for server to shutdown") slog.Info("Waiting for server to shutdown")
@@ -51,7 +59,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
} }
slog.Debug(fmt.Sprintf("starting installer: %s %v", installerExe, installArgs)) slog.Debug(fmt.Sprintf("starting installer: %s %v", installerExe, installArgs))
os.Chdir(filepath.Dir(UpgradeLogFile)) //nolint:errcheck os.Chdir(filepath.Dir(logFile)) //nolint:errcheck
cmd := exec.Command(installerExe, installArgs...) cmd := exec.Command(installerExe, installArgs...)
if err := cmd.Start(); err != nil { if err := cmd.Start(); err != nil {
@@ -65,7 +73,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
} }
} else { } else {
// TODO - some details about why it didn't start, or is this a pedantic error case? // TODO - some details about why it didn't start, or is this a pedantic error case?
return errors.New("installer process did not start") return fmt.Errorf("installer process did not start")
} }
// TODO should we linger for a moment and check to make sure it's actually running by checking the pid? // TODO should we linger for a moment and check to make sure it's actually running by checking the pid?

View File

@@ -87,11 +87,15 @@ DialogFontSize=12
[Files] [Files]
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
Source: "..\ollama.exe"; DestDir: "{app}\bin"; Flags: ignoreversion 64bit Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
Source: "..\dist\windows-{#ARCH}\lib\ollama\runners\*"; DestDir: "{app}\lib\ollama\runners"; Flags: ignoreversion 64bit recursesubdirs Source: "..\dist\windows-amd64\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
Source: "..\dist\windows-amd64\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
Source: "..\dist\windows-amd64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Flags: ignoreversion recursesubdirs #if DirExists("..\dist\windows-amd64\rocm")
Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
#endif
[Icons] [Icons]
Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico" Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
@@ -99,7 +103,7 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico" Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
[Run] [Run]
Filename: "{cmd}"; Parameters: "/C set PATH={app}\bin;%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
[UninstallRun] [UninstallRun]
; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden ; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden
@@ -118,10 +122,6 @@ Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\models"
Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\history" Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\history"
; NOTE: if the user has a custom OLLAMA_MODELS it will be preserved ; NOTE: if the user has a custom OLLAMA_MODELS it will be preserved
[InstallDelete]
Type: filesandordirs; Name: "{%TEMP}\ollama*"
Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama"
[Messages] [Messages]
WizardReady=Ollama Windows Preview WizardReady=Ollama Windows Preview
ReadyLabel1=%nLet's get you up and running with your own large language models. ReadyLabel1=%nLet's get you up and running with your own large language models.
@@ -129,13 +129,13 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
;FinishedHeadingLabel=Run your first model ;FinishedHeadingLabel=Run your first model
;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama3.1 ;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama3
;ClickFinish=%n ;ClickFinish=%n
[Registry] [Registry]
Root: HKCU; Subkey: "Environment"; \ Root: HKCU; Subkey: "Environment"; \
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}\bin"; \ ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}"; \
Check: NeedsAddPath('{app}\bin') Check: NeedsAddPath('{app}')
[Code] [Code]

View File

@@ -4,5 +4,5 @@ write-host "Welcome to Ollama!"
write-host "" write-host ""
write-host "Run your first model:" write-host "Run your first model:"
write-host "" write-host ""
write-host "`tollama run llama3.1" write-host "`tollama run llama2"
write-host "" write-host ""

View File

@@ -5,50 +5,17 @@ import (
"context" "context"
"crypto/rand" "crypto/rand"
"encoding/base64" "encoding/base64"
"errors"
"fmt" "fmt"
"io" "io"
"log/slog" "log/slog"
"os" "os"
"path/filepath" "path/filepath"
"strings"
"golang.org/x/crypto/ssh" "golang.org/x/crypto/ssh"
) )
const defaultPrivateKey = "id_ed25519" const defaultPrivateKey = "id_ed25519"
func keyPath() (string, error) {
home, err := os.UserHomeDir()
if err != nil {
return "", err
}
return filepath.Join(home, ".ollama", defaultPrivateKey), nil
}
func GetPublicKey() (string, error) {
keyPath, err := keyPath()
if err != nil {
return "", err
}
privateKeyFile, err := os.ReadFile(keyPath)
if err != nil {
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
return "", err
}
privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
if err != nil {
return "", err
}
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
return strings.TrimSpace(string(publicKey)), nil
}
func NewNonce(r io.Reader, length int) (string, error) { func NewNonce(r io.Reader, length int) (string, error) {
nonce := make([]byte, length) nonce := make([]byte, length)
if _, err := io.ReadFull(r, nonce); err != nil { if _, err := io.ReadFull(r, nonce); err != nil {
@@ -59,11 +26,13 @@ func NewNonce(r io.Reader, length int) (string, error) {
} }
func Sign(ctx context.Context, bts []byte) (string, error) { func Sign(ctx context.Context, bts []byte) (string, error) {
keyPath, err := keyPath() home, err := os.UserHomeDir()
if err != nil { if err != nil {
return "", err return "", err
} }
keyPath := filepath.Join(home, ".ollama", defaultPrivateKey)
privateKeyFile, err := os.ReadFile(keyPath) privateKeyFile, err := os.ReadFile(keyPath)
if err != nil { if err != nil {
slog.Info(fmt.Sprintf("Failed to load private key: %v", err)) slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
@@ -79,7 +48,7 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey()) publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
parts := bytes.Split(publicKey, []byte(" ")) parts := bytes.Split(publicKey, []byte(" "))
if len(parts) < 2 { if len(parts) < 2 {
return "", errors.New("malformed public key") return "", fmt.Errorf("malformed public key")
} }
signedData, err := privateKey.Sign(rand.Reader, bts) signedData, err := privateKey.Sign(rand.Reader, bts)

View File

@@ -12,7 +12,6 @@ import (
"fmt" "fmt"
"io" "io"
"log" "log"
"math"
"net" "net"
"net/http" "net/http"
"os" "os"
@@ -20,28 +19,23 @@ import (
"path/filepath" "path/filepath"
"regexp" "regexp"
"runtime" "runtime"
"slices"
"strings" "strings"
"sync/atomic"
"syscall" "syscall"
"time" "time"
"github.com/containerd/console" "github.com/containerd/console"
"github.com/mattn/go-runewidth"
"github.com/olekukonko/tablewriter" "github.com/olekukonko/tablewriter"
"github.com/spf13/cobra" "github.com/spf13/cobra"
"golang.org/x/crypto/ssh" "golang.org/x/crypto/ssh"
"golang.org/x/exp/slices"
"golang.org/x/term" "golang.org/x/term"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/auth"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
"github.com/ollama/ollama/parser" "github.com/ollama/ollama/parser"
"github.com/ollama/ollama/progress" "github.com/ollama/ollama/progress"
"github.com/ollama/ollama/server" "github.com/ollama/ollama/server"
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version" "github.com/ollama/ollama/version"
) )
@@ -60,13 +54,12 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
p := progress.NewProgress(os.Stderr) p := progress.NewProgress(os.Stderr)
defer p.Stop() defer p.Stop()
f, err := os.Open(filename) modelfile, err := os.ReadFile(filename)
if err != nil { if err != nil {
return err return err
} }
defer f.Close()
modelfile, err := parser.ParseFile(f) commands, err := parser.Parse(bytes.NewReader(modelfile))
if err != nil { if err != nil {
return err return err
} }
@@ -79,12 +72,11 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
status := "transferring model data" status := "transferring model data"
spinner := progress.NewSpinner(status) spinner := progress.NewSpinner(status)
p.Add(status, spinner) p.Add(status, spinner)
defer p.Stop()
for i := range modelfile.Commands { for _, c := range commands {
switch modelfile.Commands[i].Name { switch c.Name {
case "model", "adapter": case "model", "adapter":
path := modelfile.Commands[i].Args path := c.Args
if path == "~" { if path == "~" {
path = home path = home
} else if strings.HasPrefix(path, "~/") { } else if strings.HasPrefix(path, "~/") {
@@ -96,7 +88,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
} }
fi, err := os.Stat(path) fi, err := os.Stat(path)
if errors.Is(err, os.ErrNotExist) && modelfile.Commands[i].Name == "model" { if errors.Is(err, os.ErrNotExist) && c.Name == "model" {
continue continue
} else if err != nil { } else if err != nil {
return err return err
@@ -114,12 +106,18 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
path = tempfile path = tempfile
} }
digest, err := createBlob(cmd, client, path, spinner) digest, err := createBlob(cmd, client, path)
if err != nil { if err != nil {
return err return err
} }
modelfile.Commands[i].Args = "@" + digest name := c.Name
if c.Name == "model" {
name = "from"
}
re := regexp.MustCompile(fmt.Sprintf(`(?im)^(%s)\s+%s\s*$`, name, c.Args))
modelfile = re.ReplaceAll(modelfile, []byte("$1 @"+digest))
} }
} }
@@ -147,9 +145,9 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return nil return nil
} }
quantize, _ := cmd.Flags().GetString("quantize") quantization, _ := cmd.Flags().GetString("quantization")
request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantize: quantize} request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile), Quantization: quantization}
if err := client.Create(cmd.Context(), &request, fn); err != nil { if err := client.Create(cmd.Context(), &request, fn); err != nil {
return err return err
} }
@@ -164,6 +162,9 @@ func tempZipFiles(path string) (string, error) {
} }
defer tempfile.Close() defer tempfile.Close()
zipfile := zip.NewWriter(tempfile)
defer zipfile.Close()
detectContentType := func(path string) (string, error) { detectContentType := func(path string) (string, error) {
f, err := os.Open(path) f, err := os.Open(path)
if err != nil { if err != nil {
@@ -204,17 +205,11 @@ func tempZipFiles(path string) (string, error) {
// safetensors files might be unresolved git lfs references; skip if they are // safetensors files might be unresolved git lfs references; skip if they are
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors // covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
files = append(files, st...) files = append(files, st...)
} else if st, _ := glob(filepath.Join(path, "adapters.safetensors"), "application/octet-stream"); len(st) > 0 {
// covers adapters.safetensors
files = append(files, st...)
} else if st, _ := glob(filepath.Join(path, "adapter_model.safetensors"), "application/octet-stream"); len(st) > 0 {
// covers adapter_model.safetensors
files = append(files, st...)
} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 { } else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
// pytorch files might also be unresolved git lfs references; skip if they are // pytorch files might also be unresolved git lfs references; skip if they are
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin // covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
files = append(files, pt...) files = append(files, pt...)
} else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/zip"); len(pt) > 0 { } else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/octet-stream"); len(pt) > 0 {
// pytorch files might also be unresolved git lfs references; skip if they are // pytorch files might also be unresolved git lfs references; skip if they are
// covers consolidated.x.pth, consolidated.pth // covers consolidated.x.pth, consolidated.pth
files = append(files, pt...) files = append(files, pt...)
@@ -229,14 +224,6 @@ func tempZipFiles(path string) (string, error) {
} }
files = append(files, js...) files = append(files, js...)
// bert models require a nested config.json
// TODO(mxyng): merge this with the glob above
js, err = glob(filepath.Join(path, "**/*.json"), "text/plain")
if err != nil {
return "", err
}
files = append(files, js...)
if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 { if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob // add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
// tokenizer.model might be a unresolved git lfs reference; error if it is // tokenizer.model might be a unresolved git lfs reference; error if it is
@@ -246,9 +233,6 @@ func tempZipFiles(path string) (string, error) {
files = append(files, tks...) files = append(files, tks...)
} }
zipfile := zip.NewWriter(tempfile)
defer zipfile.Close()
for _, file := range files { for _, file := range files {
f, err := os.Open(file) f, err := os.Open(file)
if err != nil { if err != nil {
@@ -266,11 +250,6 @@ func tempZipFiles(path string) (string, error) {
return "", err return "", err
} }
zfi.Name, err = filepath.Rel(path, file)
if err != nil {
return "", err
}
zf, err := zipfile.CreateHeader(zfi) zf, err := zipfile.CreateHeader(zfi)
if err != nil { if err != nil {
return "", err return "", err
@@ -284,20 +263,13 @@ func tempZipFiles(path string) (string, error) {
return tempfile.Name(), nil return tempfile.Name(), nil
} }
func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *progress.Spinner) (string, error) { func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
bin, err := os.Open(path) bin, err := os.Open(path)
if err != nil { if err != nil {
return "", err return "", err
} }
defer bin.Close() defer bin.Close()
// Get file info to retrieve the size
fileInfo, err := bin.Stat()
if err != nil {
return "", err
}
fileSize := fileInfo.Size()
hash := sha256.New() hash := sha256.New()
if _, err := io.Copy(hash, bin); err != nil { if _, err := io.Copy(hash, bin); err != nil {
return "", err return "", err
@@ -307,50 +279,46 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *pr
return "", err return "", err
} }
var pw progressWriter
status := "transferring model data 0%"
spinner.SetMessage(status)
done := make(chan struct{})
defer close(done)
go func() {
ticker := time.NewTicker(60 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-ticker.C:
spinner.SetMessage(fmt.Sprintf("transferring model data %d%%", int(100*pw.n.Load()/fileSize)))
case <-done:
spinner.SetMessage("transferring model data 100%")
return
}
}
}()
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil)) digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
if err = client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil { if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
return "", err return "", err
} }
return digest, nil return digest, nil
} }
type progressWriter struct {
n atomic.Int64
}
func (w *progressWriter) Write(p []byte) (n int, err error) {
w.n.Add(int64(len(p)))
return len(p), nil
}
func RunHandler(cmd *cobra.Command, args []string) error { func RunHandler(cmd *cobra.Command, args []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
name := args[0]
// check if the model exists on the server
show, err := client.Show(cmd.Context(), &api.ShowRequest{Name: name})
var statusError api.StatusError
switch {
case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
if err := PullHandler(cmd, []string{name}); err != nil {
return err
}
show, err = client.Show(cmd.Context(), &api.ShowRequest{Name: name})
if err != nil {
return err
}
case err != nil:
return err
}
interactive := true interactive := true
opts := runOptions{ opts := runOptions{
Model: args[0], Model: args[0],
WordWrap: os.Getenv("TERM") == "xterm-256color", WordWrap: os.Getenv("TERM") == "xterm-256color",
Options: map[string]interface{}{}, Options: map[string]interface{}{},
MultiModal: slices.Contains(show.Details.Families, "clip"),
ParentModel: show.Details.ParentModel,
} }
format, err := cmd.Flags().GetString("format") format, err := cmd.Flags().GetString("format")
@@ -359,18 +327,6 @@ func RunHandler(cmd *cobra.Command, args []string) error {
} }
opts.Format = format opts.Format = format
keepAlive, err := cmd.Flags().GetString("keepalive")
if err != nil {
return err
}
if keepAlive != "" {
d, err := time.ParseDuration(keepAlive)
if err != nil {
return err
}
opts.KeepAlive = &api.Duration{Duration: d}
}
prompts := args[1:] prompts := args[1:]
// prepend stdin to the prompt if provided // prepend stdin to the prompt if provided
if !term.IsTerminal(int(os.Stdin.Fd())) { if !term.IsTerminal(int(os.Stdin.Fd())) {
@@ -394,94 +350,11 @@ func RunHandler(cmd *cobra.Command, args []string) error {
} }
opts.WordWrap = !nowrap opts.WordWrap = !nowrap
// Fill out the rest of the options based on information about the if !interactive {
// model. return generate(cmd, opts)
client, err := api.ClientFromEnvironment()
if err != nil {
return err
} }
name := args[0] return generateInteractive(cmd, opts)
info, err := func() (*api.ShowResponse, error) {
showReq := &api.ShowRequest{Name: name}
info, err := client.Show(cmd.Context(), showReq)
var se api.StatusError
if errors.As(err, &se) && se.StatusCode == http.StatusNotFound {
if err := PullHandler(cmd, []string{name}); err != nil {
return nil, err
}
return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
}
return info, err
}()
if err != nil {
return err
}
opts.MultiModal = slices.Contains(info.Details.Families, "clip")
opts.ParentModel = info.Details.ParentModel
if interactive {
if err := loadModel(cmd, &opts); err != nil {
return err
}
for _, msg := range info.Messages {
switch msg.Role {
case "user":
fmt.Printf(">>> %s\n", msg.Content)
case "assistant":
state := &displayResponseState{}
displayResponse(msg.Content, opts.WordWrap, state)
fmt.Println()
fmt.Println()
}
}
return generateInteractive(cmd, opts)
}
return generate(cmd, opts)
}
func errFromUnknownKey(unknownKeyErr error) error {
// find SSH public key in the error message
sshKeyPattern := `ssh-\w+ [^\s"]+`
re := regexp.MustCompile(sshKeyPattern)
matches := re.FindStringSubmatch(unknownKeyErr.Error())
if len(matches) > 0 {
serverPubKey := matches[0]
localPubKey, err := auth.GetPublicKey()
if err != nil {
return unknownKeyErr
}
if runtime.GOOS == "linux" && serverPubKey != localPubKey {
// try the ollama service public key
svcPubKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub")
if err != nil {
return unknownKeyErr
}
localPubKey = strings.TrimSpace(string(svcPubKey))
}
// check if the returned public key matches the local public key, this prevents adding a remote key to the user's account
if serverPubKey != localPubKey {
return unknownKeyErr
}
var msg strings.Builder
msg.WriteString(unknownKeyErr.Error())
msg.WriteString("\n\nYour ollama key is:\n")
msg.WriteString(localPubKey)
msg.WriteString("\nAdd your key at:\n")
msg.WriteString("https://ollama.com/settings/keys")
return errors.New(msg.String())
}
return unknownKeyErr
} }
func PushHandler(cmd *cobra.Command, args []string) error { func PushHandler(cmd *cobra.Command, args []string) error {
@@ -531,20 +404,6 @@ func PushHandler(cmd *cobra.Command, args []string) error {
request := api.PushRequest{Name: args[0], Insecure: insecure} request := api.PushRequest{Name: args[0], Insecure: insecure}
if err := client.Push(cmd.Context(), &request, fn); err != nil { if err := client.Push(cmd.Context(), &request, fn); err != nil {
if spinner != nil {
spinner.Stop()
}
if strings.Contains(err.Error(), "access denied") {
return errors.New("you are not authorized to push to this namespace, create the model under a namespace you own")
}
host := model.ParseName(args[0]).Host
isOllamaHost := strings.HasSuffix(host, ".ollama.ai") || strings.HasSuffix(host, ".ollama.com")
if strings.Contains(err.Error(), errtypes.UnknownOllamaKeyErrMsg) && isOllamaHost {
// the user has not added their ollama key to ollama.com
// re-throw an error with a more user-friendly message
return errFromUnknownKey(err)
}
return err return err
} }
@@ -585,52 +444,6 @@ func ListHandler(cmd *cobra.Command, args []string) error {
return nil return nil
} }
func ListRunningHandler(cmd *cobra.Command, args []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
models, err := client.ListRunning(cmd.Context())
if err != nil {
return err
}
var data [][]string
for _, m := range models.Models {
if len(args) == 0 || strings.HasPrefix(m.Name, args[0]) {
var procStr string
switch {
case m.SizeVRAM == 0:
procStr = "100% CPU"
case m.SizeVRAM == m.Size:
procStr = "100% GPU"
case m.SizeVRAM > m.Size || m.Size == 0:
procStr = "Unknown"
default:
sizeCPU := m.Size - m.SizeVRAM
cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 100)
procStr = fmt.Sprintf("%d%%/%d%% CPU/GPU", int(cpuPercent), int(100-cpuPercent))
}
data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, format.HumanTime(m.ExpiresAt, "Never")})
}
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"NAME", "ID", "SIZE", "PROCESSOR", "UNTIL"})
table.SetHeaderAlignment(tablewriter.ALIGN_LEFT)
table.SetAlignment(tablewriter.ALIGN_LEFT)
table.SetHeaderLine(false)
table.SetBorder(false)
table.SetNoWhiteSpace(true)
table.SetTablePadding("\t")
table.AppendBulk(data)
table.Render()
return nil
}
func DeleteHandler(cmd *cobra.Command, args []string) error { func DeleteHandler(cmd *cobra.Command, args []string) error {
client, err := api.ClientFromEnvironment() client, err := api.ClientFromEnvironment()
if err != nil { if err != nil {
@@ -653,6 +466,10 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
return err return err
} }
if len(args) != 1 {
return errors.New("missing model name")
}
license, errLicense := cmd.Flags().GetBool("license") license, errLicense := cmd.Flags().GetBool("license")
modelfile, errModelfile := cmd.Flags().GetBool("modelfile") modelfile, errModelfile := cmd.Flags().GetBool("modelfile")
parameters, errParams := cmd.Flags().GetBool("parameters") parameters, errParams := cmd.Flags().GetBool("parameters")
@@ -695,6 +512,8 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
if flagsSet > 1 { if flagsSet > 1 {
return errors.New("only one of '--license', '--modelfile', '--parameters', '--system', or '--template' can be specified") return errors.New("only one of '--license', '--modelfile', '--parameters', '--system', or '--template' can be specified")
} else if flagsSet == 0 {
return errors.New("one of '--license', '--modelfile', '--parameters', '--system', or '--template' must be specified")
} }
req := api.ShowRequest{Name: args[0]} req := api.ShowRequest{Name: args[0]}
@@ -703,141 +522,22 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
return err return err
} }
if flagsSet == 1 { switch showType {
switch showType { case "license":
case "license": fmt.Println(resp.License)
fmt.Println(resp.License) case "modelfile":
case "modelfile": fmt.Println(resp.Modelfile)
fmt.Println(resp.Modelfile) case "parameters":
case "parameters": fmt.Println(resp.Parameters)
fmt.Println(resp.Parameters) case "system":
case "system": fmt.Println(resp.System)
fmt.Println(resp.System) case "template":
case "template": fmt.Println(resp.Template)
fmt.Println(resp.Template)
}
return nil
} }
showInfo(resp)
return nil return nil
} }
func showInfo(resp *api.ShowResponse) {
arch := resp.ModelInfo["general.architecture"].(string)
modelData := [][]string{
{"arch", arch},
{"parameters", resp.Details.ParameterSize},
{"quantization", resp.Details.QuantizationLevel},
{"context length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)].(float64))},
{"embedding length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.embedding_length", arch)].(float64))},
}
mainTableData := [][]string{
{"Model"},
{renderSubTable(modelData, false)},
}
if resp.ProjectorInfo != nil {
projectorData := [][]string{
{"arch", "clip"},
{"parameters", format.HumanNumber(uint64(resp.ProjectorInfo["general.parameter_count"].(float64)))},
}
if projectorType, ok := resp.ProjectorInfo["clip.projector_type"]; ok {
projectorData = append(projectorData, []string{"projector type", projectorType.(string)})
}
projectorData = append(projectorData,
[]string{"embedding length", fmt.Sprintf("%v", resp.ProjectorInfo["clip.vision.embedding_length"].(float64))},
[]string{"projection dimensionality", fmt.Sprintf("%v", resp.ProjectorInfo["clip.vision.projection_dim"].(float64))},
)
mainTableData = append(mainTableData,
[]string{"Projector"},
[]string{renderSubTable(projectorData, false)},
)
}
if resp.Parameters != "" {
mainTableData = append(mainTableData, []string{"Parameters"}, []string{formatParams(resp.Parameters)})
}
if resp.System != "" {
mainTableData = append(mainTableData, []string{"System"}, []string{renderSubTable(twoLines(resp.System), true)})
}
if resp.License != "" {
mainTableData = append(mainTableData, []string{"License"}, []string{renderSubTable(twoLines(resp.License), true)})
}
table := tablewriter.NewWriter(os.Stdout)
table.SetAutoWrapText(false)
table.SetBorder(false)
table.SetAlignment(tablewriter.ALIGN_LEFT)
for _, v := range mainTableData {
table.Append(v)
}
table.Render()
}
func renderSubTable(data [][]string, file bool) string {
var buf bytes.Buffer
table := tablewriter.NewWriter(&buf)
table.SetAutoWrapText(!file)
table.SetBorder(false)
table.SetNoWhiteSpace(true)
table.SetTablePadding("\t")
table.SetAlignment(tablewriter.ALIGN_LEFT)
for _, v := range data {
table.Append(v)
}
table.Render()
renderedTable := buf.String()
lines := strings.Split(renderedTable, "\n")
for i, line := range lines {
lines[i] = "\t" + line
}
return strings.Join(lines, "\n")
}
func twoLines(s string) [][]string {
lines := strings.Split(s, "\n")
res := [][]string{}
count := 0
for _, line := range lines {
line = strings.TrimSpace(line)
if line != "" {
count++
res = append(res, []string{line})
if count == 2 {
return res
}
}
}
return res
}
func formatParams(s string) string {
lines := strings.Split(s, "\n")
table := [][]string{}
for _, line := range lines {
table = append(table, strings.Fields(line))
}
return renderSubTable(table, false)
}
func CopyHandler(cmd *cobra.Command, args []string) error { func CopyHandler(cmd *cobra.Command, args []string) error {
client, err := api.ClientFromEnvironment() client, err := api.ClientFromEnvironment()
if err != nil { if err != nil {
@@ -916,10 +616,10 @@ type runOptions struct {
WordWrap bool WordWrap bool
Format string Format string
System string System string
Template string
Images []api.ImageData Images []api.ImageData
Options map[string]interface{} Options map[string]interface{}
MultiModal bool MultiModal bool
KeepAlive *api.Duration
} }
type displayResponseState struct { type displayResponseState struct {
@@ -932,7 +632,7 @@ func displayResponse(content string, wordWrap bool, state *displayResponseState)
if wordWrap && termWidth >= 10 { if wordWrap && termWidth >= 10 {
for _, ch := range content { for _, ch := range content {
if state.lineLength+1 > termWidth-5 { if state.lineLength+1 > termWidth-5 {
if runewidth.StringWidth(state.wordBuffer) > termWidth-10 { if len(state.wordBuffer) > termWidth-10 {
fmt.Printf("%s%c", state.wordBuffer, ch) fmt.Printf("%s%c", state.wordBuffer, ch)
state.wordBuffer = "" state.wordBuffer = ""
state.lineLength = 0 state.lineLength = 0
@@ -940,22 +640,12 @@ func displayResponse(content string, wordWrap bool, state *displayResponseState)
} }
// backtrack the length of the last word and clear to the end of the line // backtrack the length of the last word and clear to the end of the line
a := runewidth.StringWidth(state.wordBuffer) fmt.Printf("\x1b[%dD\x1b[K\n", len(state.wordBuffer))
if a > 0 {
fmt.Printf("\x1b[%dD", a)
}
fmt.Printf("\x1b[K\n")
fmt.Printf("%s%c", state.wordBuffer, ch) fmt.Printf("%s%c", state.wordBuffer, ch)
chWidth := runewidth.RuneWidth(ch) state.lineLength = len(state.wordBuffer) + 1
state.lineLength = runewidth.StringWidth(state.wordBuffer) + chWidth
} else { } else {
fmt.Print(string(ch)) fmt.Print(string(ch))
state.lineLength += runewidth.RuneWidth(ch) state.lineLength += 1
if runewidth.RuneWidth(ch) >= 2 {
state.wordBuffer = ""
continue
}
switch ch { switch ch {
case ' ': case ' ':
@@ -1024,10 +714,6 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
Options: opts.Options, Options: opts.Options,
} }
if opts.KeepAlive != nil {
req.KeepAlive = opts.KeepAlive
}
if err := client.Chat(cancelCtx, req, fn); err != nil { if err := client.Chat(cancelCtx, req, fn); err != nil {
if errors.Is(err, context.Canceled) { if errors.Is(err, context.Canceled) {
return nil, nil return nil, nil
@@ -1103,14 +789,14 @@ func generate(cmd *cobra.Command, opts runOptions) error {
} }
request := api.GenerateRequest{ request := api.GenerateRequest{
Model: opts.Model, Model: opts.Model,
Prompt: opts.Prompt, Prompt: opts.Prompt,
Context: generateContext, Context: generateContext,
Images: opts.Images, Images: opts.Images,
Format: opts.Format, Format: opts.Format,
System: opts.System, System: opts.System,
Options: opts.Options, Template: opts.Template,
KeepAlive: opts.KeepAlive, Options: opts.Options,
} }
if err := client.Generate(ctx, &request, fn); err != nil { if err := client.Generate(ctx, &request, fn); err != nil {
@@ -1144,22 +830,25 @@ func generate(cmd *cobra.Command, opts runOptions) error {
return nil return nil
} }
func RunServer(_ *cobra.Command, _ []string) error { func RunServer(cmd *cobra.Command, _ []string) error {
host, port, err := net.SplitHostPort(strings.Trim(os.Getenv("OLLAMA_HOST"), "\"'"))
if err != nil {
host, port = "127.0.0.1", "11434"
if ip := net.ParseIP(strings.Trim(os.Getenv("OLLAMA_HOST"), "[]")); ip != nil {
host = ip.String()
}
}
if err := initializeKeypair(); err != nil { if err := initializeKeypair(); err != nil {
return err return err
} }
ln, err := net.Listen("tcp", envconfig.Host().Host) ln, err := net.Listen("tcp", net.JoinHostPort(host, port))
if err != nil { if err != nil {
return err return err
} }
err = server.Serve(ln) return server.Serve(ln)
if errors.Is(err, http.ErrServerClosed) {
return nil
}
return err
} }
func initializeKeypair() error { func initializeKeypair() error {
@@ -1208,6 +897,24 @@ func initializeKeypair() error {
return nil return nil
} }
//nolint:unused
func waitForServer(ctx context.Context, client *api.Client) error {
// wait for the server to start
timeout := time.After(5 * time.Second)
tick := time.Tick(500 * time.Millisecond)
for {
select {
case <-timeout:
return errors.New("timed out waiting for server to start")
case <-tick:
if err := client.Heartbeat(ctx); err == nil {
return nil // server has started
}
}
}
}
func checkServerHeartbeat(cmd *cobra.Command, _ []string) error { func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
client, err := api.ClientFromEnvironment() client, err := api.ClientFromEnvironment()
if err != nil { if err != nil {
@@ -1218,7 +925,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
return err return err
} }
if err := startApp(cmd.Context(), client); err != nil { if err := startApp(cmd.Context(), client); err != nil {
return errors.New("could not connect to ollama app, is it running?") return fmt.Errorf("could not connect to ollama app, is it running?")
} }
} }
return nil return nil
@@ -1244,19 +951,12 @@ func versionHandler(cmd *cobra.Command, _ []string) {
} }
} }
func appendEnvDocs(cmd *cobra.Command, envs []envconfig.EnvVar) { func appendHostEnvDocs(cmd *cobra.Command) {
if len(envs) == 0 { const hostEnvDocs = `
return
}
envUsage := `
Environment Variables: Environment Variables:
OLLAMA_HOST The host:port or base URL of the Ollama server (e.g. http://localhost:11434)
` `
for _, e := range envs { cmd.SetUsageTemplate(cmd.UsageTemplate() + hostEnvDocs)
envUsage += fmt.Sprintf(" %-24s %s\n", e.Name, e.Description)
}
cmd.SetUsageTemplate(cmd.UsageTemplate() + envUsage)
} }
func NewCLI() *cobra.Command { func NewCLI() *cobra.Command {
@@ -1295,8 +995,8 @@ func NewCLI() *cobra.Command {
RunE: CreateHandler, RunE: CreateHandler,
} }
createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile") createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile (default \"Modelfile\")")
createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_0)") createCmd.Flags().StringP("quantization", "q", "", "Quantization level.")
showCmd := &cobra.Command{ showCmd := &cobra.Command{
Use: "show MODEL", Use: "show MODEL",
@@ -1320,7 +1020,6 @@ func NewCLI() *cobra.Command {
RunE: RunHandler, RunE: RunHandler,
} }
runCmd.Flags().String("keepalive", "", "Duration to keep a model loaded (e.g. 5m)")
runCmd.Flags().Bool("verbose", false, "Show timings for response") runCmd.Flags().Bool("verbose", false, "Show timings for response")
runCmd.Flags().Bool("insecure", false, "Use an insecure registry") runCmd.Flags().Bool("insecure", false, "Use an insecure registry")
runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically") runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
@@ -1332,6 +1031,15 @@ func NewCLI() *cobra.Command {
Args: cobra.ExactArgs(0), Args: cobra.ExactArgs(0),
RunE: RunServer, RunE: RunServer,
} }
serveCmd.SetUsageTemplate(serveCmd.UsageTemplate() + `
Environment Variables:
OLLAMA_HOST The host:port to bind to (default "127.0.0.1:11434")
OLLAMA_ORIGINS A comma separated list of allowed origins.
OLLAMA_MODELS The path to the models directory (default is "~/.ollama/models")
OLLAMA_KEEP_ALIVE The duration that models stay loaded in memory (default is "5m")
OLLAMA_DEBUG Set to 1 to enable additional debug logging
`)
pullCmd := &cobra.Command{ pullCmd := &cobra.Command{
Use: "pull MODEL", Use: "pull MODEL",
@@ -1360,16 +1068,8 @@ func NewCLI() *cobra.Command {
PreRunE: checkServerHeartbeat, PreRunE: checkServerHeartbeat,
RunE: ListHandler, RunE: ListHandler,
} }
psCmd := &cobra.Command{
Use: "ps",
Short: "List running models",
PreRunE: checkServerHeartbeat,
RunE: ListRunningHandler,
}
copyCmd := &cobra.Command{ copyCmd := &cobra.Command{
Use: "cp SOURCE DESTINATION", Use: "cp SOURCE TARGET",
Short: "Copy a model", Short: "Copy a model",
Args: cobra.ExactArgs(2), Args: cobra.ExactArgs(2),
PreRunE: checkServerHeartbeat, PreRunE: checkServerHeartbeat,
@@ -1384,10 +1084,6 @@ func NewCLI() *cobra.Command {
RunE: DeleteHandler, RunE: DeleteHandler,
} }
envVars := envconfig.AsMap()
envs := []envconfig.EnvVar{envVars["OLLAMA_HOST"]}
for _, cmd := range []*cobra.Command{ for _, cmd := range []*cobra.Command{
createCmd, createCmd,
showCmd, showCmd,
@@ -1395,33 +1091,10 @@ func NewCLI() *cobra.Command {
pullCmd, pullCmd,
pushCmd, pushCmd,
listCmd, listCmd,
psCmd,
copyCmd, copyCmd,
deleteCmd, deleteCmd,
serveCmd,
} { } {
switch cmd { appendHostEnvDocs(cmd)
case runCmd:
appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
case serveCmd:
appendEnvDocs(cmd, []envconfig.EnvVar{
envVars["OLLAMA_DEBUG"],
envVars["OLLAMA_HOST"],
envVars["OLLAMA_KEEP_ALIVE"],
envVars["OLLAMA_MAX_LOADED_MODELS"],
envVars["OLLAMA_MAX_QUEUE"],
envVars["OLLAMA_MODELS"],
envVars["OLLAMA_NUM_PARALLEL"],
envVars["OLLAMA_NOPRUNE"],
envVars["OLLAMA_ORIGINS"],
envVars["OLLAMA_SCHED_SPREAD"],
envVars["OLLAMA_TMPDIR"],
envVars["OLLAMA_FLASH_ATTENTION"],
envVars["OLLAMA_LLM_LIBRARY"],
})
default:
appendEnvDocs(cmd, envs)
}
} }
rootCmd.AddCommand( rootCmd.AddCommand(
@@ -1432,7 +1105,6 @@ func NewCLI() *cobra.Command {
pullCmd, pullCmd,
pushCmd, pushCmd,
listCmd, listCmd,
psCmd,
copyCmd, copyCmd,
deleteCmd, deleteCmd,
) )

View File

@@ -1,7 +1,6 @@
package cmd package cmd
import ( import (
"cmp"
"errors" "errors"
"fmt" "fmt"
"io" "io"
@@ -9,18 +8,15 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"regexp" "regexp"
"slices" "sort"
"strings" "strings"
"github.com/spf13/cobra" "github.com/spf13/cobra"
"golang.org/x/exp/maps" "golang.org/x/exp/slices"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/progress" "github.com/ollama/ollama/progress"
"github.com/ollama/ollama/readline" "github.com/ollama/ollama/readline"
"github.com/ollama/ollama/types/errtypes"
) )
type MultilineState int type MultilineState int
@@ -29,36 +25,75 @@ const (
MultilineNone MultilineState = iota MultilineNone MultilineState = iota
MultilinePrompt MultilinePrompt
MultilineSystem MultilineSystem
MultilineTemplate
) )
func loadModel(cmd *cobra.Command, opts *runOptions) error { func loadModel(cmd *cobra.Command, opts *runOptions) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
p := progress.NewProgress(os.Stderr) p := progress.NewProgress(os.Stderr)
defer p.StopAndClear() defer p.StopAndClear()
spinner := progress.NewSpinner("") spinner := progress.NewSpinner("")
p.Add("", spinner) p.Add("", spinner)
client, err := api.ClientFromEnvironment() showReq := api.ShowRequest{Name: opts.Model}
showResp, err := client.Show(cmd.Context(), &showReq)
if err != nil {
return err
}
opts.MultiModal = slices.Contains(showResp.Details.Families, "clip")
opts.ParentModel = showResp.Details.ParentModel
if len(showResp.Messages) > 0 {
opts.Messages = append(opts.Messages, showResp.Messages...)
}
chatReq := &api.ChatRequest{
Model: opts.Model,
Messages: []api.Message{},
}
err = client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
p.StopAndClear()
if len(opts.Messages) > 0 {
for _, msg := range opts.Messages {
switch msg.Role {
case "user":
fmt.Printf(">>> %s\n", msg.Content)
case "assistant":
state := &displayResponseState{}
displayResponse(msg.Content, opts.WordWrap, state)
fmt.Println()
fmt.Println()
}
}
}
return nil
})
if err != nil { if err != nil {
return err return err
} }
chatReq := &api.ChatRequest{ return nil
Model: opts.Model,
KeepAlive: opts.KeepAlive,
}
return client.Chat(cmd.Context(), chatReq, func(api.ChatResponse) error { return nil })
} }
func generateInteractive(cmd *cobra.Command, opts runOptions) error { func generateInteractive(cmd *cobra.Command, opts runOptions) error {
opts.Messages = make([]api.Message, 0)
err := loadModel(cmd, &opts)
if err != nil {
return err
}
usage := func() { usage := func() {
fmt.Fprintln(os.Stderr, "Available Commands:") fmt.Fprintln(os.Stderr, "Available Commands:")
fmt.Fprintln(os.Stderr, " /set Set session variables") fmt.Fprintln(os.Stderr, " /set Set session variables")
fmt.Fprintln(os.Stderr, " /show Show model information") fmt.Fprintln(os.Stderr, " /show Show model information")
fmt.Fprintln(os.Stderr, " /load <model> Load a session or model") fmt.Fprintln(os.Stderr, " /load <model> Load a session or model")
fmt.Fprintln(os.Stderr, " /save <model> Save your current session") fmt.Fprintln(os.Stderr, " /save <model> Save your current session")
fmt.Fprintln(os.Stderr, " /clear Clear session context")
fmt.Fprintln(os.Stderr, " /bye Exit") fmt.Fprintln(os.Stderr, " /bye Exit")
fmt.Fprintln(os.Stderr, " /?, /help Help for a command") fmt.Fprintln(os.Stderr, " /?, /help Help for a command")
fmt.Fprintln(os.Stderr, " /? shortcuts Help for keyboard shortcuts") fmt.Fprintln(os.Stderr, " /? shortcuts Help for keyboard shortcuts")
@@ -76,6 +111,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
fmt.Fprintln(os.Stderr, "Available Commands:") fmt.Fprintln(os.Stderr, "Available Commands:")
fmt.Fprintln(os.Stderr, " /set parameter ... Set a parameter") fmt.Fprintln(os.Stderr, " /set parameter ... Set a parameter")
fmt.Fprintln(os.Stderr, " /set system <string> Set system message") fmt.Fprintln(os.Stderr, " /set system <string> Set system message")
fmt.Fprintln(os.Stderr, " /set template <string> Set prompt template")
fmt.Fprintln(os.Stderr, " /set history Enable history") fmt.Fprintln(os.Stderr, " /set history Enable history")
fmt.Fprintln(os.Stderr, " /set nohistory Disable history") fmt.Fprintln(os.Stderr, " /set nohistory Disable history")
fmt.Fprintln(os.Stderr, " /set wordwrap Enable wordwrap") fmt.Fprintln(os.Stderr, " /set wordwrap Enable wordwrap")
@@ -95,7 +131,6 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
fmt.Fprintln(os.Stderr, " Alt + f Move forward (right) one word") fmt.Fprintln(os.Stderr, " Alt + f Move forward (right) one word")
fmt.Fprintln(os.Stderr, " Ctrl + k Delete the sentence after the cursor") fmt.Fprintln(os.Stderr, " Ctrl + k Delete the sentence after the cursor")
fmt.Fprintln(os.Stderr, " Ctrl + u Delete the sentence before the cursor") fmt.Fprintln(os.Stderr, " Ctrl + u Delete the sentence before the cursor")
fmt.Fprintln(os.Stderr, " Ctrl + w Delete the word before the cursor")
fmt.Fprintln(os.Stderr, "") fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, " Ctrl + l Clear the screen") fmt.Fprintln(os.Stderr, " Ctrl + l Clear the screen")
fmt.Fprintln(os.Stderr, " Ctrl + c Stop the model from responding") fmt.Fprintln(os.Stderr, " Ctrl + c Stop the model from responding")
@@ -121,13 +156,12 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
fmt.Fprintln(os.Stderr, " /set parameter num_predict <int> Max number of tokens to predict") fmt.Fprintln(os.Stderr, " /set parameter num_predict <int> Max number of tokens to predict")
fmt.Fprintln(os.Stderr, " /set parameter top_k <int> Pick from top k num of tokens") fmt.Fprintln(os.Stderr, " /set parameter top_k <int> Pick from top k num of tokens")
fmt.Fprintln(os.Stderr, " /set parameter top_p <float> Pick token based on sum of probabilities") fmt.Fprintln(os.Stderr, " /set parameter top_p <float> Pick token based on sum of probabilities")
fmt.Fprintln(os.Stderr, " /set parameter min_p <float> Pick token based on top token probability * min_p")
fmt.Fprintln(os.Stderr, " /set parameter num_ctx <int> Set the context size") fmt.Fprintln(os.Stderr, " /set parameter num_ctx <int> Set the context size")
fmt.Fprintln(os.Stderr, " /set parameter temperature <float> Set creativity level") fmt.Fprintln(os.Stderr, " /set parameter temperature <float> Set creativity level")
fmt.Fprintln(os.Stderr, " /set parameter repeat_penalty <float> How strongly to penalize repetitions") fmt.Fprintln(os.Stderr, " /set parameter repeat_penalty <float> How strongly to penalize repetitions")
fmt.Fprintln(os.Stderr, " /set parameter repeat_last_n <int> Set how far back to look for repetitions") fmt.Fprintln(os.Stderr, " /set parameter repeat_last_n <int> Set how far back to look for repetitions")
fmt.Fprintln(os.Stderr, " /set parameter num_gpu <int> The number of layers to send to the GPU") fmt.Fprintln(os.Stderr, " /set parameter num_gpu <int> The number of layers to send to the GPU")
fmt.Fprintln(os.Stderr, " /set parameter stop <string> <string> ... Set the stop parameters") fmt.Fprintln(os.Stderr, " /set parameter stop \"<string>\", ... Set the stop parameters")
fmt.Fprintln(os.Stderr, "") fmt.Fprintln(os.Stderr, "")
} }
@@ -141,10 +175,6 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
return err return err
} }
if envconfig.NoHistory() {
scanner.HistoryDisable()
}
fmt.Print(readline.StartBracketedPaste) fmt.Print(readline.StartBracketedPaste)
defer fmt.Printf(readline.EndBracketedPaste) defer fmt.Printf(readline.EndBracketedPaste)
@@ -186,6 +216,10 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
opts.Messages = append(opts.Messages, api.Message{Role: "system", Content: opts.System}) opts.Messages = append(opts.Messages, api.Message{Role: "system", Content: opts.System})
fmt.Println("Set system message.") fmt.Println("Set system message.")
sb.Reset() sb.Reset()
case MultilineTemplate:
opts.Template = sb.String()
fmt.Println("Set prompt template.")
sb.Reset()
} }
multiline = MultilineNone multiline = MultilineNone
@@ -241,22 +275,11 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
fn := func(resp api.ProgressResponse) error { return nil } fn := func(resp api.ProgressResponse) error { return nil }
err = client.Create(cmd.Context(), req, fn) err = client.Create(cmd.Context(), req, fn)
if err != nil { if err != nil {
if strings.Contains(err.Error(), errtypes.InvalidModelNameErrMsg) { fmt.Println("error: couldn't save model")
fmt.Printf("error: The model name '%s' is invalid\n", args[1])
continue
}
return err return err
} }
fmt.Printf("Created new model '%s'\n", args[1]) fmt.Printf("Created new model '%s'\n", args[1])
continue continue
case strings.HasPrefix(line, "/clear"):
opts.Messages = []api.Message{}
if opts.System != "" {
newMessage := api.Message{Role: "system", Content: opts.System}
opts.Messages = append(opts.Messages, newMessage)
}
fmt.Println("Cleared session context")
continue
case strings.HasPrefix(line, "/set"): case strings.HasPrefix(line, "/set"):
args := strings.Fields(line) args := strings.Fields(line)
if len(args) > 1 { if len(args) > 1 {
@@ -304,13 +327,17 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
} }
fmt.Printf("Set parameter '%s' to '%s'\n", args[2], strings.Join(params, ", ")) fmt.Printf("Set parameter '%s' to '%s'\n", args[2], strings.Join(params, ", "))
opts.Options[args[2]] = fp[args[2]] opts.Options[args[2]] = fp[args[2]]
case "system": case "system", "template":
if len(args) < 3 { if len(args) < 3 {
usageSet() usageSet()
continue continue
} }
multiline = MultilineSystem if args[1] == "system" {
multiline = MultilineSystem
} else if args[1] == "template" {
multiline = MultilineTemplate
}
line := strings.Join(args[2:], " ") line := strings.Join(args[2:], " ")
line, ok := strings.CutPrefix(line, `"""`) line, ok := strings.CutPrefix(line, `"""`)
@@ -330,17 +357,23 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
continue continue
} }
opts.System = sb.String() // for display in modelfile if args[1] == "system" {
newMessage := api.Message{Role: "system", Content: sb.String()} opts.System = sb.String() // for display in modelfile
// Check if the slice is not empty and the last message is from 'system' newMessage := api.Message{Role: "system", Content: sb.String()}
if len(opts.Messages) > 0 && opts.Messages[len(opts.Messages)-1].Role == "system" { // Check if the slice is not empty and the last message is from 'system'
// Replace the last message if len(opts.Messages) > 0 && opts.Messages[len(opts.Messages)-1].Role == "system" {
opts.Messages[len(opts.Messages)-1] = newMessage // Replace the last message
} else { opts.Messages[len(opts.Messages)-1] = newMessage
opts.Messages = append(opts.Messages, newMessage) } else {
opts.Messages = append(opts.Messages, newMessage)
}
fmt.Println("Set system message.")
sb.Reset()
} else if args[1] == "template" {
opts.Template = sb.String()
fmt.Println("Set prompt template.")
sb.Reset()
} }
fmt.Println("Set system message.")
sb.Reset()
sb.Reset() sb.Reset()
continue continue
@@ -359,9 +392,10 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
return err return err
} }
req := &api.ShowRequest{ req := &api.ShowRequest{
Name: opts.Model, Name: opts.Model,
System: opts.System, System: opts.System,
Options: opts.Options, Template: opts.Template,
Options: opts.Options,
} }
resp, err := client.Show(cmd.Context(), req) resp, err := client.Show(cmd.Context(), req)
if err != nil { if err != nil {
@@ -371,7 +405,15 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
switch args[1] { switch args[1] {
case "info": case "info":
showInfo(resp) fmt.Println("Model details:")
if len(resp.Details.Families) > 0 {
fmt.Printf("Family %s\n", strings.Join(resp.Details.Families, ", "))
} else if resp.Details.Family != "" {
fmt.Printf("Family %s\n", resp.Details.Family)
}
fmt.Printf("Parameter Size %s\n", resp.Details.ParameterSize)
fmt.Printf("Quantization Level %s\n", resp.Details.QuantizationLevel)
fmt.Println("")
case "license": case "license":
if resp.License == "" { if resp.License == "" {
fmt.Println("No license was specified for this model.") fmt.Println("No license was specified for this model.")
@@ -404,9 +446,12 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
fmt.Println("No system message was specified for this model.") fmt.Println("No system message was specified for this model.")
} }
case "template": case "template":
if resp.Template != "" { switch {
case opts.Template != "":
fmt.Println(opts.Template + "\n")
case resp.Template != "":
fmt.Println(resp.Template) fmt.Println(resp.Template)
} else { default:
fmt.Println("No prompt template was specified for this model.") fmt.Println("No prompt template was specified for this model.")
} }
default: default:
@@ -490,35 +535,35 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
} }
func buildModelfile(opts runOptions) string { func buildModelfile(opts runOptions) string {
var f parser.File var mf strings.Builder
f.Commands = append(f.Commands, parser.Command{Name: "model", Args: cmp.Or(opts.ParentModel, opts.Model)}) model := opts.ParentModel
if model == "" {
model = opts.Model
}
fmt.Fprintf(&mf, "FROM %s\n", model)
if opts.System != "" { if opts.System != "" {
f.Commands = append(f.Commands, parser.Command{Name: "system", Args: opts.System}) fmt.Fprintf(&mf, "SYSTEM \"\"\"%s\"\"\"\n", opts.System)
} }
keys := maps.Keys(opts.Options) if opts.Template != "" {
slices.Sort(keys) fmt.Fprintf(&mf, "TEMPLATE \"\"\"%s\"\"\"\n", opts.Template)
}
keys := make([]string, 0)
for k := range opts.Options {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys { for _, k := range keys {
v := opts.Options[k] fmt.Fprintf(&mf, "PARAMETER %s %v\n", k, opts.Options[k])
var cmds []parser.Command
switch t := v.(type) {
case []string:
for _, s := range t {
cmds = append(cmds, parser.Command{Name: k, Args: s})
}
default:
cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", t)})
}
f.Commands = append(f.Commands, cmds...)
} }
fmt.Fprintln(&mf)
for _, msg := range opts.Messages { for _, msg := range opts.Messages {
f.Commands = append(f.Commands, parser.Command{Name: "message", Args: fmt.Sprintf("%s: %s", msg.Role, msg.Content)}) fmt.Fprintf(&mf, "MESSAGE %s \"\"\"%s\"\"\"\n", msg.Role, msg.Content)
} }
return f.String() return mf.String()
} }
func normalizeFilePath(fp string) string { func normalizeFilePath(fp string) string {
@@ -604,7 +649,7 @@ func getImageData(filePath string) ([]byte, error) {
// Check if the file size exceeds 100MB // Check if the file size exceeds 100MB
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
if info.Size() > maxSize { if info.Size() > maxSize {
return nil, errors.New("file size exceeds maximum limit (100MB)") return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
} }
buf = make([]byte, info.Size()) buf = make([]byte, info.Size())

View File

@@ -1,9 +1,10 @@
package cmd package cmd
import ( import (
"bytes"
"testing" "testing"
"text/template"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
@@ -55,53 +56,61 @@ d:\path with\spaces\seven.svg inbetween7 c:\users\jdoe\eight.png inbetween8
func TestModelfileBuilder(t *testing.T) { func TestModelfileBuilder(t *testing.T) {
opts := runOptions{ opts := runOptions{
Model: "hork", Model: "hork",
System: "You are part horse and part shark, but all hork. Do horklike things", System: "You are part horse and part shark, but all hork. Do horklike things",
Template: "This is a template.",
Messages: []api.Message{ Messages: []api.Message{
{Role: "user", Content: "Hey there hork!"}, {Role: "user", Content: "Hey there hork!"},
{Role: "assistant", Content: "Yes it is true, I am half horse, half shark."}, {Role: "assistant", Content: "Yes it is true, I am half horse, half shark."},
}, },
Options: map[string]any{ Options: map[string]interface{}{},
"temperature": 0.9,
"seed": 42,
"penalize_newline": false,
"stop": []string{"hi", "there"},
},
} }
t.Run("model", func(t *testing.T) { opts.Options["temperature"] = 0.9
expect := `FROM hork opts.Options["seed"] = 42
SYSTEM You are part horse and part shark, but all hork. Do horklike things opts.Options["penalize_newline"] = false
opts.Options["stop"] = []string{"hi", "there"}
mf := buildModelfile(opts)
expectedModelfile := `FROM {{.Model}}
SYSTEM """{{.System}}"""
TEMPLATE """{{.Template}}"""
PARAMETER penalize_newline false PARAMETER penalize_newline false
PARAMETER seed 42 PARAMETER seed 42
PARAMETER stop hi PARAMETER stop [hi there]
PARAMETER stop there
PARAMETER temperature 0.9 PARAMETER temperature 0.9
MESSAGE user Hey there hork!
MESSAGE assistant Yes it is true, I am half horse, half shark. MESSAGE user """Hey there hork!"""
MESSAGE assistant """Yes it is true, I am half horse, half shark."""
` `
actual := buildModelfile(opts) tmpl, err := template.New("").Parse(expectedModelfile)
if diff := cmp.Diff(expect, actual); diff != "" { assert.Nil(t, err)
t.Errorf("mismatch (-want +got):\n%s", diff)
}
})
t.Run("parent model", func(t *testing.T) { var buf bytes.Buffer
opts.ParentModel = "horseshark" err = tmpl.Execute(&buf, opts)
expect := `FROM horseshark assert.Nil(t, err)
SYSTEM You are part horse and part shark, but all hork. Do horklike things assert.Equal(t, buf.String(), mf)
opts.ParentModel = "horseshark"
mf = buildModelfile(opts)
expectedModelfile = `FROM {{.ParentModel}}
SYSTEM """{{.System}}"""
TEMPLATE """{{.Template}}"""
PARAMETER penalize_newline false PARAMETER penalize_newline false
PARAMETER seed 42 PARAMETER seed 42
PARAMETER stop hi PARAMETER stop [hi there]
PARAMETER stop there
PARAMETER temperature 0.9 PARAMETER temperature 0.9
MESSAGE user Hey there hork!
MESSAGE assistant Yes it is true, I am half horse, half shark. MESSAGE user """Hey there hork!"""
MESSAGE assistant """Yes it is true, I am half horse, half shark."""
` `
actual := buildModelfile(opts)
if diff := cmp.Diff(expect, actual); diff != "" { tmpl, err = template.New("").Parse(expectedModelfile)
t.Errorf("mismatch (-want +got):\n%s", diff) assert.Nil(t, err)
}
}) var parentBuf bytes.Buffer
err = tmpl.Execute(&parentBuf, opts)
assert.Nil(t, err)
assert.Equal(t, parentBuf.String(), mf)
} }

View File

@@ -1,27 +0,0 @@
//go:build darwin || windows
package cmd
import (
"context"
"errors"
"time"
"github.com/ollama/ollama/api"
)
func waitForServer(ctx context.Context, client *api.Client) error {
// wait for the server to start
timeout := time.After(5 * time.Second)
tick := time.Tick(500 * time.Millisecond)
for {
select {
case <-timeout:
return errors.New("timed out waiting for server to start")
case <-tick:
if err := client.Heartbeat(ctx); err == nil {
return nil // server has started
}
}
}
}

View File

@@ -2,7 +2,7 @@ package cmd
import ( import (
"context" "context"
"errors" "fmt"
"os" "os"
"os/exec" "os/exec"
"strings" "strings"
@@ -20,7 +20,7 @@ func startApp(ctx context.Context, client *api.Client) error {
return err return err
} }
if !strings.Contains(link, "Ollama.app") { if !strings.Contains(link, "Ollama.app") {
return errors.New("could not find ollama app") return fmt.Errorf("could not find ollama app")
} }
path := strings.Split(link, "Ollama.app") path := strings.Split(link, "Ollama.app")
if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil { if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {

View File

@@ -4,11 +4,11 @@ package cmd
import ( import (
"context" "context"
"errors" "fmt"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
) )
func startApp(ctx context.Context, client *api.Client) error { func startApp(ctx context.Context, client *api.Client) error {
return errors.New("could not connect to ollama server, run 'ollama serve' to start it") return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it")
} }

View File

@@ -31,7 +31,7 @@ func startApp(ctx context.Context, client *api.Client) error {
// Finally look in the path // Finally look in the path
appExe, err = exec.LookPath(AppName) appExe, err = exec.LookPath(AppName)
if err != nil { if err != nil {
return errors.New("could not locate ollama app") return fmt.Errorf("could not locate ollama app")
} }
} }
} }

View File

@@ -1,228 +1,187 @@
package convert package convert
import ( import (
"cmp"
"encoding/binary"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io"
"io/fs"
"log/slog" "log/slog"
"os"
"path/filepath"
"slices"
"strings" "strings"
"google.golang.org/protobuf/proto"
"github.com/ollama/ollama/convert/sentencepiece"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/llm"
) )
type ModelParameters struct { type Params struct {
Architectures []string `json:"architectures"` Architectures []string `json:"architectures"`
VocabSize uint32 `json:"vocab_size"` VocabSize int `json:"vocab_size"`
HiddenSize int `json:"hidden_size"` // n_embd
HiddenLayers int `json:"num_hidden_layers"` // n_layer
ContextSize int `json:"max_position_embeddings"`
IntermediateSize int `json:"intermediate_size"`
AttentionHeads int `json:"num_attention_heads"` // n_head
KeyValHeads int `json:"num_key_value_heads"`
NormEPS float64 `json:"rms_norm_eps"`
BoSTokenID int `json:"bos_token_id"`
EoSTokenID int `json:"eos_token_id"`
HeadDimension int `json:"head_dim"`
PaddingTokenID int `json:"pad_token_id"`
RopeFrequencyBase float64 `json:"rope_theta"`
Experts int `json:"num_local_experts"`
ExpertsUsed int `json:"num_experts_per_tok"`
ByteOrder
} }
type AdapterParameters struct { type ByteOrder interface {
Alpha uint32 `json:"lora_alpha"` binary.ByteOrder
LoraLayers uint32 `json:"lora_layers"` binary.AppendByteOrder
LoraParameters struct {
Rank uint32 `json:"rank"`
Alpha float32 `json:"alpha"`
Scale float32 `json:"scale"`
} `json:"lora_parameters"`
} }
func (ModelParameters) KV(t *Tokenizer) llm.KV { type ModelArch interface {
kv := llm.KV{ GetTensors() error
"general.file_type": uint32(1), LoadVocab() error
"general.quantization_version": uint32(2), WriteGGUF() (string, error)
"tokenizer.ggml.pre": t.Pre,
"tokenizer.ggml.model": t.Vocabulary.Model,
"tokenizer.ggml.tokens": t.Vocabulary.Tokens,
"tokenizer.ggml.scores": t.Vocabulary.Scores,
"tokenizer.ggml.token_type": t.Vocabulary.Types,
}
if len(t.Merges) > 0 {
kv["tokenizer.ggml.merges"] = t.Merges
}
if t.Template != "" {
kv["tokenizer.chat_template"] = t.Template
}
for _, sv := range t.SpecialVocabulary {
kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
}
return kv
} }
func (p AdapterParameters) KV() llm.KV { type ModelFormat interface {
var alpha float32 GetLayerName(string) (string, error)
if p.LoraParameters.Alpha == 0 { GetTensors(string, *Params) ([]llm.Tensor, error)
alpha = float32(p.Alpha) GetParams(string) (*Params, error)
} else { GetModelArch(string, string, *Params) (ModelArch, error)
alpha = p.LoraParameters.Alpha
}
kv := llm.KV{
"adapter.lora.alpha": alpha,
"adapter.type": "lora",
"general.file_type": uint32(1),
"general.type": "adapter",
"general.version": "v0.2",
}
return kv
} }
func (ModelParameters) specialTokenTypes() []string { type ModelData struct {
return []string{ Path string
"bos", "eos", "unk", "sep", "pad", "cls", "mask", Name string
} Params *Params
Vocab *Vocab
Tensors []llm.Tensor
Format ModelFormat
} }
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error { func GetModelFormat(dirname string) (ModelFormat, error) {
return llm.WriteGGUF(ws, kv, ts) files, err := filepath.Glob(filepath.Join(dirname, "*"))
}
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
return llm.WriteGGUF(ws, kv, ts)
}
type ModelConverter interface {
// KV maps parameters to LLM key-values
KV(*Tokenizer) llm.KV
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
Tensors([]Tensor) []llm.Tensor
// Replacements returns a list of string pairs to replace in tensor names.
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string
// specialTokenTypes returns any special token types the model uses
specialTokenTypes() []string
// writeFile writes the model to the provided io.WriteSeeker
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
}
type moreParser interface {
parseMore(fs.FS) error
}
type AdapterConverter interface {
// KV maps parameters to LLM key-values
KV(llm.KV) llm.KV
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
Tensors([]Tensor) []llm.Tensor
// Replacements returns a list of string pairs to replace in tensor names.
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
}
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
bts, err := fs.ReadFile(fsys, "adapter_config.json")
if err != nil { if err != nil {
return err return nil, err
} }
var p AdapterParameters for _, fn := range files {
if err := json.Unmarshal(bts, &p); err != nil { slog.Debug(fmt.Sprintf("file = %s", fn))
return err if strings.HasSuffix(fn, ".safetensors") {
} return &SafetensorFormat{}, nil
} else if strings.HasSuffix(fn, ".bin") {
arch, ok := baseKV["general.architecture"] slog.Debug("model is torch")
if !ok { return &TorchFormat{}, nil
return errors.New("architecture not set for the base model")
}
var conv AdapterConverter
switch arch {
case "llama":
conv = &llamaAdapter{}
case "gemma2":
conv = &gemma2Adapter{}
default:
return errors.New("unsupported architecture")
}
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
if err != nil {
return err
}
if err := json.Unmarshal(bts, conv); err != nil {
return err
}
return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
}
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
bts, err := fs.ReadFile(fsys, "config.json")
if err != nil {
return err
}
var p ModelParameters
if err := json.Unmarshal(bts, &p); err != nil {
return err
}
if len(p.Architectures) < 1 {
return errors.New("unknown architecture")
}
var conv ModelConverter
switch p.Architectures[0] {
case "LlamaForCausalLM", "MistralForCausalLM":
conv = &llamaModel{}
case "MixtralForCausalLM":
conv = &mixtralModel{}
case "GemmaForCausalLM":
conv = &gemmaModel{}
case "Gemma2ForCausalLM":
conv = &gemma2Model{}
case "Phi3ForCausalLM":
conv = &phi3Model{}
case "BertModel":
conv = &bertModel{}
default:
return errors.New("unsupported architecture")
}
if err := json.Unmarshal(bts, conv); err != nil {
return err
}
if t, ok := conv.(moreParser); ok {
if err := t.parseMore(fsys); err != nil {
return err
} }
} }
t, err := parseTokenizer(fsys, conv.specialTokenTypes()) return nil, fmt.Errorf("couldn't determine model format")
if err != nil { }
return err
} // Details on gguf's tokenizer can be found at:
// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer
if vocabSize := int(p.VocabSize); vocabSize > len(t.Vocabulary.Tokens) { type Vocab struct {
slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", p.VocabSize, "actual", len(t.Vocabulary.Tokens)) Tokens []string
for i := range vocabSize - len(t.Vocabulary.Tokens) { Scores []float32
t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i)) Types []int32
t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1) }
t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
} func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
} else { slog.Info(fmt.Sprintf("reading vocab from %s", filepath.Join(dirpath, "tokenizer.model")))
slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens)) in, err := os.ReadFile(filepath.Join(dirpath, "tokenizer.model"))
} if err != nil {
return nil, err
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...)) }
if err != nil {
return err // To regenerate sentencepiece from the protobufs use:
} // protoc -I=./ --go_out=./ sentencepiece_model.proto
modelProto := &sentencepiece.ModelProto{}
return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts)) if err := proto.Unmarshal(in, modelProto); err != nil {
return nil, err
}
v := &Vocab{
Tokens: make([]string, 0),
Scores: make([]float32, 0),
Types: make([]int32, 0),
}
pieces := modelProto.GetPieces()
for _, p := range pieces {
v.Tokens = append(v.Tokens, p.GetPiece())
v.Scores = append(v.Scores, p.GetScore())
t := p.GetType()
switch t {
case sentencepiece.ModelProto_SentencePiece_UNKNOWN:
case sentencepiece.ModelProto_SentencePiece_CONTROL:
case sentencepiece.ModelProto_SentencePiece_UNUSED:
case sentencepiece.ModelProto_SentencePiece_BYTE:
default:
t = sentencepiece.ModelProto_SentencePiece_NORMAL
}
v.Types = append(v.Types, int32(t))
}
slog.Info(fmt.Sprintf("vocab size: %d", len(v.Tokens)))
// add any additional tokens
addIn, err := os.ReadFile(filepath.Join(dirpath, "added_tokens.json"))
if os.IsNotExist(err) {
return v, nil
} else if err != nil {
return nil, err
}
slog.Info("reading user defined tokens")
var extraTokenData map[string]int
if err := json.Unmarshal(addIn, &extraTokenData); err != nil {
return nil, err
}
type token struct {
key string
pos int
}
extraTokens := make([]token, 0)
for k, id := range extraTokenData {
extraTokens = append(extraTokens, token{k, id})
}
slices.SortFunc(extraTokens, func(a, b token) int {
return cmp.Compare(a.pos, b.pos)
})
numToks := len(v.Tokens)
for cnt, t := range extraTokens {
// the token id should match the specific index for the total number of tokens
if t.pos != cnt+numToks {
return nil, fmt.Errorf("token ID '%d' for '%s' doesn't match total token size", t.pos, t.key)
}
v.Tokens = append(v.Tokens, t.key)
v.Scores = append(v.Scores, -1000.0)
v.Types = append(v.Types, int32(llm.GGUFTokenUserDefined))
}
slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens)))
if params.VocabSize > len(v.Tokens) {
missingTokens := params.VocabSize - len(v.Tokens)
slog.Warn(fmt.Sprintf("vocab is missing %d tokens", missingTokens))
for cnt := 0; cnt < missingTokens; cnt++ {
v.Tokens = append(v.Tokens, fmt.Sprintf("<dummy%05d>", cnt+1))
v.Scores = append(v.Scores, -1)
v.Types = append(v.Types, int32(llm.GGUFTokenUserDefined))
}
}
return v, nil
} }

View File

@@ -1,174 +0,0 @@
package convert
import (
"cmp"
"encoding/json"
"io/fs"
"path/filepath"
"slices"
"strings"
"github.com/ollama/ollama/llm"
)
type bertModel struct {
ModelParameters
NLayers uint32 `json:"n_layers"`
NumHiddenLayers uint32 `json:"num_hidden_layers"`
NLayer uint32 `json:"n_layer"`
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
NCtx uint32 `json:"n_ctx"`
HiddenSize uint32 `json:"hidden_size"`
NEmbd uint32 `json:"n_embd"`
IntermediateSize uint32 `json:"intermediate_size"`
NInner uint32 `json:"n_inner"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NHead uint32 `json:"n_head"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
LayerNormEPS float32 `json:"layer_norm_eps"`
LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
NormEpsilon float32 `json:"norm_epsilon"`
PoolingType uint32
}
var (
_ ModelConverter = (*bertModel)(nil)
_ moreParser = (*bertModel)(nil)
)
func (p *bertModel) parseMore(fsys fs.FS) error {
bts, err := fs.ReadFile(fsys, "modules.json")
if err != nil {
return err
}
var modules []struct {
Type string `json:"type"`
Path string `json:"path"`
}
if err := json.Unmarshal(bts, &modules); err != nil {
return err
}
var pooling string
for _, m := range modules {
if m.Type == "sentence_transformers.models.Pooling" {
pooling = m.Path
break
}
}
if pooling != "" {
bts, err := fs.ReadFile(fsys, filepath.Join(pooling, "config.json"))
if err != nil {
return err
}
var pc struct {
PoolingModeCLSToken bool `json:"pooling_mode_cls_token"`
PoolingModeMeanTokens bool `json:"pooling_mode_mean_tokens"`
}
if err := json.Unmarshal(bts, &pc); err != nil {
return err
}
if pc.PoolingModeMeanTokens {
p.PoolingType = 1
} else if pc.PoolingModeCLSToken {
p.PoolingType = 2
}
}
return nil
}
func (p *bertModel) KV(t *Tokenizer) llm.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "bert"
kv["bert.attention.causal"] = false
kv["bert.pooling_type"] = p.PoolingType
kv["bert.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
kv["bert.context_length"] = contextLength
}
if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
kv["bert.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
}
if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
kv["bert.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
}
if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
kv["bert.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
}
if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
kv["bert.attention.layer_norm_epsilon"] = layerNormEpsilon
}
kv["tokenizer.ggml.model"] = "bert"
kv["tokenizer.ggml.token_type_count"] = uint32(2)
// convert to phantom space tokens
for i, e := range t.Tokens {
if strings.HasPrefix(e, "[") && strings.HasSuffix(e, "]") {
// noop
} else if strings.HasPrefix(e, "##") {
t.Tokens[i] = e[2:]
} else {
t.Tokens[i] = "\u2581" + e
}
}
kv["tokenizer.ggml.tokens"] = t.Tokens
return kv
}
func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
for _, t := range ts {
if slices.Contains([]string{
"embeddings.position_ids",
"pooler.dense.weight",
"pooler.dense.bias",
}, t.Name()) {
continue
}
out = append(out, llm.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
return out
}
func (bertModel) Replacements() []string {
return []string{
"encoder.layer", "blk",
"encoder.layers", "blk",
"embeddings.word_embeddings", "token_embd",
"embeddings.token_type_embeddings", "token_types",
"embeddings.LayerNorm", "token_embd_norm",
"embeddings.position_embeddings", "position_embd",
"attention.self.query", "attn_q",
"attention.self.key", "attn_k",
"attention.self.value", "attn_v",
"attention.output.dense", "attn_output",
"attention.output.LayerNorm", "attn_output_norm",
"intermediate.dense", "ffn_up",
"output.dense", "ffn_down",
"output.LayerNorm", "layer_output_norm",
}
}

View File

@@ -1,100 +0,0 @@
package convert
import (
"strings"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
)
type gemmaModel struct {
ModelParameters
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
HiddenSize uint32 `json:"hidden_size"`
HiddenLayers uint32 `json:"num_hidden_layers"`
IntermediateSize uint32 `json:"intermediate_size"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
RMSNormEPS float32 `json:"rms_norm_eps"`
HeadDim uint32 `json:"head_dim"`
}
var _ ModelConverter = (*gemmaModel)(nil)
func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "gemma"
kv["gemma.context_length"] = p.MaxPositionEmbeddings
kv["gemma.embedding_length"] = p.HiddenSize
kv["gemma.block_count"] = p.HiddenLayers
kv["gemma.feed_forward_length"] = p.IntermediateSize
kv["gemma.attention.head_count"] = p.NumAttentionHeads
kv["gemma.attention.head_count_kv"] = p.NumKeyValueHeads
kv["gemma.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
kv["gemma.attention.key_length"] = p.HeadDim
kv["gemma.attention.value_length"] = p.HeadDim
kv["tokenizer.ggml.eot_token_id"] = uint32(107)
kv["tokenizer.ggml.middle_token_id"] = uint32(68)
kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
return kv
}
func (p *gemmaModel) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
for _, t := range ts {
if strings.HasSuffix(t.Name(), "_norm.weight") {
t.SetRepacker(p.addOne)
}
out = append(out, llm.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
return out
}
func (p *gemmaModel) Replacements() []string {
return []string{
"model.embed_tokens", "token_embd",
"model.norm", "output_norm",
"model.layers", "blk",
"input_layernorm", "attn_norm",
"self_attn.q_proj", "attn_q",
"self_attn.k_proj", "attn_k",
"self_attn.v_proj", "attn_v",
"self_attn.o_proj", "attn_output",
"mlp.gate_proj", "ffn_gate",
"mlp.down_proj", "ffn_down",
"mlp.up_proj", "ffn_up",
"post_attention_layernorm", "ffn_norm",
}
}
func (*gemmaModel) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data))
ones := tensor.Ones(tensor.Float32, int(shape[0]))
n, err := n.Add(ones)
if err != nil {
return nil, err
}
ts, err := native.SelectF32(n, 0)
if err != nil {
return nil, err
}
var f32s []float32
for _, t := range ts {
f32s = append(f32s, t...)
}
return f32s, nil
}

View File

@@ -1,43 +0,0 @@
package convert
import (
"github.com/ollama/ollama/llm"
)
type gemma2Model struct {
gemmaModel
SlidingWindow uint32 `json:"sliding_window"`
AttentionLogitSoftcap float32 `json:"attn_logit_softcapping"`
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
}
func (p *gemma2Model) KV(t *Tokenizer) llm.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "gemma2"
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
kv["gemma2.embedding_length"] = p.HiddenSize
kv["gemma2.block_count"] = p.HiddenLayers
kv["gemma2.feed_forward_length"] = p.IntermediateSize
kv["gemma2.attention.head_count"] = p.NumAttentionHeads
kv["gemma2.attention.head_count_kv"] = p.NumKeyValueHeads
kv["gemma2.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
kv["gemma2.attention.key_length"] = p.HeadDim
kv["gemma2.attention.value_length"] = p.HeadDim
kv["gemma2.attention.sliding_window"] = p.SlidingWindow
kv["gemma2.attn_logit_softcapping"] = p.AttentionLogitSoftcap
kv["gemma2.final_logit_softcapping"] = p.FinalLogitSoftcap
kv["tokenizer.ggml.eot_token_id"] = uint32(107)
kv["tokenizer.ggml.middle_token_id"] = uint32(68)
kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
return kv
}
func (p *gemma2Model) Replacements() []string {
return append(
p.gemmaModel.Replacements(),
"post_attention_layernorm", "post_attention_norm",
"pre_feedforward_layernorm", "ffn_norm",
"post_feedforward_layernorm", "post_ffw_norm",
)
}

View File

@@ -1,91 +0,0 @@
package convert
import (
"strings"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
)
type gemma2Adapter struct {
AdapterParameters
}
var _ AdapterConverter = (*gemma2Adapter)(nil)
func (p *gemma2Adapter) KV(baseKV llm.KV) llm.KV {
kv := p.AdapterParameters.KV()
kv["general.architecture"] = "gemma2"
return kv
}
func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
for _, t := range ts {
shape := t.Shape()
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
shape[0], shape[1] = shape[1], shape[0]
t.SetRepacker(p.repack)
}
out = append(out, llm.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
return out
}
func (p *gemma2Adapter) Replacements() []string {
return []string{
"base_model.model.", "",
"model.layers", "blk",
"self_attn.q_proj", "attn_q",
"self_attn.k_proj", "attn_k",
"self_attn.v_proj", "attn_v",
"self_attn.o_proj", "attn_output",
"mlp.gate_proj", "ffn_gate",
"mlp.down_proj", "ffn_down",
"mlp.up_proj", "ffn_up",
"lora_A.weight", "weight.lora_a",
"lora_B.weight", "weight.lora_b",
"lora_a", "weight.lora_a",
"lora_b", "weight.lora_b",
}
}
func (p *gemma2Adapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
dims := []int{int(shape[1]), int(shape[0])}
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
if err := n.T(1, 0); err != nil {
return nil, err
}
if err := n.Reshape(dims...); err != nil {
return nil, err
}
if err := n.Transpose(); err != nil {
return nil, err
}
ts, err := native.SelectF32(n, 1)
if err != nil {
return nil, err
}
var f32s []float32
for _, t := range ts {
f32s = append(f32s, t...)
}
return f32s, nil
}

View File

@@ -1,213 +0,0 @@
package convert
import (
"cmp"
"fmt"
"math"
"strings"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
)
type llamaModel struct {
ModelParameters
NLayers uint32 `json:"n_layers"`
NumHiddenLayers uint32 `json:"num_hidden_layers"`
NLayer uint32 `json:"n_layer"`
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
NCtx uint32 `json:"n_ctx"`
HiddenSize uint32 `json:"hidden_size"`
NEmbd uint32 `json:"n_embd"`
IntermediateSize uint32 `json:"intermediate_size"`
NInner uint32 `json:"n_inner"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NHead uint32 `json:"n_head"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
RopeTheta float32 `json:"rope_theta"`
RopeScaling struct {
Type string `json:"type"`
RopeType string `json:"rope_type"`
Factor float32 `json:"factor"`
LowFrequencyFactor float32 `json:"low_freq_factor"`
HighFrequencyFactor float32 `json:"high_freq_factor"`
OriginalMaxPositionalEmbeddings uint32 `json:"original_max_positional_embeddings"`
factors ropeFactor
} `json:"rope_scaling"`
RMSNormEPS float32 `json:"rms_norm_eps"`
LayerNormEPS float32 `json:"layer_norm_eps"`
LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
NormEpsilon float32 `json:"norm_epsilon"`
HeadDim uint32 `json:"head_dim"`
}
var _ ModelConverter = (*llamaModel)(nil)
func (p *llamaModel) KV(t *Tokenizer) llm.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "llama"
kv["llama.vocab_size"] = p.VocabSize
kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
kv["llama.context_length"] = contextLength
}
if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
kv["llama.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
}
if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
kv["llama.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
}
if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
kv["llama.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
kv["llama.rope.dimension_count"] = p.HiddenSize / headCount
}
if p.RopeTheta > 0 {
kv["llama.rope.freq_base"] = p.RopeTheta
}
if p.RopeScaling.Type == "linear" {
kv["llama.rope.scaling.type"] = p.RopeScaling.Type
kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor
} else if p.RopeScaling.RopeType == "llama3" {
dim := p.HiddenSize / p.NumAttentionHeads
for i := uint32(0); i < dim; i += 2 {
factor := cmp.Or(p.RopeScaling.Factor, 8.0)
factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0)
factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0)
original := cmp.Or(p.RopeScaling.OriginalMaxPositionalEmbeddings, 8192)
lambdaLow := float32(original) / factorLow
lambdaHigh := float32(original) / factorHigh
lambda := 2 * math.Pi * math.Pow(float64(p.RopeTheta), float64(i)/float64(dim))
if lambda < float64(lambdaHigh) {
p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0)
} else if lambda > float64(lambdaLow) {
p.RopeScaling.factors = append(p.RopeScaling.factors, factor)
} else {
smooth := (float32(original)/float32(lambda) - factorLow) / (factorHigh - factorLow)
p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0/((1-smooth)/factor+smooth))
}
}
}
if p.NumKeyValueHeads > 0 {
kv["llama.attention.head_count_kv"] = p.NumKeyValueHeads
}
if p.RMSNormEPS > 0 {
kv["llama.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
}
if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
kv["llama.attention.layer_norm_epsilon"] = layerNormEpsilon
}
if p.HeadDim > 0 {
kv["llama.attention.key_length"] = p.HeadDim
kv["llama.attention.value_length"] = p.HeadDim
}
return kv
}
func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
if p.RopeScaling.factors != nil {
out = append(out, llm.Tensor{
Name: "rope_freqs.weight",
Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.factors))},
WriterTo: p.RopeScaling.factors,
})
}
for _, t := range ts {
if strings.HasSuffix(t.Name(), "attn_q.weight") ||
strings.HasSuffix(t.Name(), "attn_k.weight") {
t.SetRepacker(p.repack)
}
out = append(out, llm.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
return out
}
func (p *llamaModel) Replacements() []string {
return []string{
"lm_head", "output",
"model.embed_tokens", "token_embd",
"model.norm", "output_norm",
"model.layers", "blk",
"input_layernorm", "attn_norm",
"self_attn.q_proj", "attn_q",
"self_attn.k_proj", "attn_k",
"self_attn.v_proj", "attn_v",
"self_attn.o_proj", "attn_output",
"mlp.gate_proj", "ffn_gate",
"mlp.down_proj", "ffn_down",
"mlp.up_proj", "ffn_up",
"post_attention_layernorm", "ffn_norm",
}
}
func (p *llamaModel) repack(name string, data []float32, shape []uint64) ([]float32, error) {
var dims []int
for _, dim := range shape {
dims = append(dims, int(dim))
}
var heads uint32
if strings.HasSuffix(name, "attn_q.weight") {
heads = p.NumAttentionHeads
} else if strings.HasSuffix(name, "attn_k.weight") {
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
} else {
return nil, fmt.Errorf("unknown tensor for repack: %s", name)
}
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
return nil, err
}
if err := n.T(0, 2, 1, 3); err != nil {
return nil, err
}
if err := n.Reshape(dims...); err != nil {
return nil, err
}
if err := n.Transpose(); err != nil {
return nil, err
}
ts, err := native.SelectF32(n, 1)
if err != nil {
return nil, err
}
var f32s []float32
for _, t := range ts {
f32s = append(f32s, t...)
}
return f32s, nil
}

View File

@@ -1,169 +0,0 @@
package convert
import (
"cmp"
"strings"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
)
type llamaAdapter struct {
AdapterParameters
NumAttentionHeads uint32 `json:"num_attention_heads"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
}
var _ AdapterConverter = (*llamaAdapter)(nil)
func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV {
kv := p.AdapterParameters.KV()
kv["general.architecture"] = "llama"
kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"]
kv["llama.attention.head_count_kv"] = baseKV["llama.attention.head_count_kv"]
p.NumAttentionHeads = baseKV["llama.attention.head_count"].(uint32)
return kv
}
func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
for _, t := range ts {
shape := t.Shape()
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
shape[0], shape[1] = shape[1], shape[0]
t.SetRepacker(p.repackAndTranspose)
} else {
t.SetRepacker(p.repack)
}
out = append(out, llm.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: shape,
WriterTo: t,
})
}
return out
}
func (p *llamaAdapter) Replacements() []string {
return []string{
"base_model.model.", "",
"model.layers", "blk",
"self_attn.q_proj", "attn_q",
"self_attn.k_proj", "attn_k",
"self_attn.v_proj", "attn_v",
"self_attn.o_proj", "attn_output",
"mlp.gate_proj", "ffn_gate",
"mlp.down_proj", "ffn_down",
"mlp.up_proj", "ffn_up",
"lora_A.weight", "weight.lora_a",
"lora_B.weight", "weight.lora_b",
"lora_a", "weight.lora_a",
"lora_b", "weight.lora_b",
}
}
func (p *llamaAdapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
dims := []int{int(shape[1]), int(shape[0])}
var heads uint32
if strings.HasSuffix(name, "attn_q.weight.lora_a") {
heads = p.NumAttentionHeads
} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
} else {
return data, nil
}
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
return nil, err
}
if err := n.T(0, 2, 1, 3); err != nil {
return nil, err
}
if err := n.Reshape(dims...); err != nil {
return nil, err
}
if err := n.Transpose(); err != nil {
return nil, err
}
ts, err := native.SelectF32(n, 1)
if err != nil {
return nil, err
}
var f32s []float32
for _, t := range ts {
f32s = append(f32s, t...)
}
return f32s, nil
}
func (p *llamaAdapter) repackAndTranspose(name string, data []float32, shape []uint64) ([]float32, error) {
dims := []int{int(shape[1]), int(shape[0])}
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
var heads uint32
if strings.HasSuffix(name, "attn_q.weight.lora_a") {
heads = p.NumAttentionHeads
} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
}
if heads > 0 {
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
return nil, err
}
if err := n.T(0, 2, 1, 3); err != nil {
return nil, err
}
if err := n.Reshape(dims...); err != nil {
return nil, err
}
if err := n.Transpose(); err != nil {
return nil, err
}
}
if err := n.T(1, 0); err != nil {
return nil, err
}
if err := n.Reshape(dims...); err != nil {
return nil, err
}
if err := n.Transpose(); err != nil {
return nil, err
}
ts, err := native.SelectF32(n, 1)
if err != nil {
return nil, err
}
var f32s []float32
for _, t := range ts {
f32s = append(f32s, t...)
}
return f32s, nil
}

View File

@@ -1,94 +0,0 @@
package convert
import (
"fmt"
"io"
"slices"
"strings"
"github.com/ollama/ollama/llm"
)
type mixtralModel struct {
llamaModel
NumLocalExperts uint32 `json:"num_local_experts"`
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
}
func (p *mixtralModel) KV(t *Tokenizer) llm.KV {
kv := p.llamaModel.KV(t)
if p.NumLocalExperts > 0 {
kv["llama.expert_count"] = p.NumLocalExperts
}
if p.NumExpertsPerToken > 0 {
kv["llama.expert_used_count"] = p.NumExpertsPerToken
}
return kv
}
func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor {
oldnew := []string{
"model.layers", "blk",
"w1", "ffn_gate_exps",
"w2", "ffn_down_exps",
"w3", "ffn_up_exps",
}
for i := range p.NumLocalExperts {
oldnew = append(oldnew, fmt.Sprintf(".block_sparse_moe.experts.%d.", i), ".")
}
// group experts of the same layer (model.layers.%d) and type (w[123]) into a single tensor
namer := strings.NewReplacer(oldnew...)
experts := make(map[string]experts)
// merge experts into a single tensor while removing them from ts
ts = slices.DeleteFunc(ts, func(t Tensor) bool {
if !strings.Contains(t.Name(), ".block_sparse_moe.experts.") {
return false
}
name := namer.Replace(t.Name())
experts[name] = append(experts[name], t)
return true
})
var out []llm.Tensor
for n, e := range experts {
// TODO(mxyng): sanity check experts
out = append(out, llm.Tensor{
Name: n,
Kind: e[0].Kind(),
Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...),
WriterTo: e,
})
}
return append(out, p.llamaModel.Tensors(ts)...)
}
func (p *mixtralModel) Replacements() []string {
return append(
p.llamaModel.Replacements(),
"block_sparse_moe.gate", "ffn_gate_inp",
)
}
type experts []Tensor
func (e experts) WriteTo(w io.Writer) (int64, error) {
// TODO(mxyng): experts _should_ be numerically sorted by expert but this should check
for _, t := range e {
// the canonical merged experts tensor stacks all experts along a new, 0 axis,
// e.g. `tensor.Stack(0, e[0], e[1:]...)`, which requires allocating temporary buffers
// this accomplishes the same thing by writing each expert tensor in sequence
if _, err := t.WriteTo(w); err != nil {
return 0, err
}
}
return 0, nil
}

View File

@@ -1,123 +0,0 @@
package convert
import (
"cmp"
"encoding/binary"
"io"
"math"
"strings"
"sync"
"github.com/ollama/ollama/llm"
)
type phi3Model struct {
ModelParameters
NumHiddenLayers uint32 `json:"num_hidden_layers"`
NLayers uint32 `json:"n_layers"`
HiddenSize uint32 `json:"hidden_size"`
NEmbd uint32 `json:"n_embd"`
IntermediateSize uint32 `json:"intermediate_size"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NHead uint32 `json:"n_head"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
NHeadKV uint32 `json:"n_head_kv"`
RopeTheta float32 `json:"rope_theta"`
RopeScaling struct {
Type string `json:"type"`
LongFactor ropeFactor `json:"long_factor"`
ShortFactor ropeFactor `json:"short_factor"`
} `json:"rope_scaling"`
RMSNormEPS float32 `json:"rms_norm_eps"`
NPositions uint32 `json:"n_positions"`
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
SlidingWindow uint32 `json:"sliding_window"`
}
var _ ModelConverter = (*phi3Model)(nil)
func (p *phi3Model) KV(t *Tokenizer) llm.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "phi3"
kv["phi3.context_length"] = p.MaxPositionEmbeddings
kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
kv["phi3.feed_forward_length"] = p.IntermediateSize
kv["phi3.block_count"] = cmp.Or(p.NumHiddenLayers, p.NLayers)
kv["phi3.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
kv["phi3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NHeadKV)
kv["phi3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
kv["phi3.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NumAttentionHeads, p.NHead)
kv["phi3.rope.freq_base"] = p.RopeTheta
kv["phi3.rope.scaling.original_context_length"] = p.OriginalMaxPositionEmbeddings
kv["phi3.attention.sliding_window"] = p.SlidingWindow
scale := float64(p.MaxPositionEmbeddings) / float64(p.OriginalMaxPositionEmbeddings)
switch p.RopeScaling.Type {
case "":
// no scaling
case "su", "longrope":
kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
case "yarn":
kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
default:
panic("unknown rope scaling type")
}
return kv
}
func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
var addRopeFactors sync.Once
out := make([]llm.Tensor, 0, len(ts)+2)
for _, t := range ts {
if strings.HasPrefix(t.Name(), "blk.0.") {
addRopeFactors.Do(func() {
out = append(out, llm.Tensor{
Name: "rope_factors_long.weight",
Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
WriterTo: p.RopeScaling.LongFactor,
}, llm.Tensor{
Name: "rope_factors_short.weight",
Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
WriterTo: p.RopeScaling.ShortFactor,
})
})
}
out = append(out, llm.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
return out
}
func (p *phi3Model) Replacements() []string {
return []string{
"lm_head", "output",
"model.embed_tokens", "token_embd",
"model.norm", "output_norm",
"model.layers", "blk",
"input_layernorm", "attn_norm",
"self_attn.qkv_proj", "attn_qkv",
"self_attn.o_proj", "attn_output",
"mlp.down_proj", "ffn_down",
"mlp.gate_up_proj", "ffn_up",
"post_attention_layernorm", "ffn_norm",
}
}
type ropeFactor []float32
func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
err := binary.Write(w, binary.LittleEndian, r)
return 0, err
}

View File

@@ -1,347 +0,0 @@
package convert
import (
"bytes"
"crypto/sha256"
"encoding/binary"
"encoding/hex"
"encoding/json"
"flag"
"fmt"
"io"
"io/fs"
"log/slog"
"math"
"os"
"path/filepath"
"slices"
"testing"
"golang.org/x/exp/maps"
"github.com/ollama/ollama/llm"
)
func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
t.Helper()
f, err := os.CreateTemp(t.TempDir(), "f16")
if err != nil {
t.Fatal(err)
}
defer f.Close()
if err := ConvertModel(fsys, f); err != nil {
t.Fatal(err)
}
r, err := os.Open(f.Name())
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() { r.Close() })
m, _, err := llm.DecodeGGML(r, math.MaxInt)
if err != nil {
t.Fatal(err)
}
if _, err := r.Seek(0, io.SeekStart); err != nil {
t.Fatal(err)
}
return r, m.KV(), m.Tensors()
}
func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors llm.Tensors) map[string]string {
actual := make(map[string]string)
for k, v := range kv {
if s, ok := v.(json.Marshaler); !ok {
actual[k] = fmt.Sprintf("%v", v)
} else {
bts, err := json.Marshal(s)
if err != nil {
t.Fatal(err)
}
actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts))
}
}
for _, tensor := range tensors.Items {
sha256sum := sha256.New()
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
if _, err := io.Copy(sha256sum, sr); err != nil {
t.Fatal(err)
}
actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
}
return actual
}
func TestMain(m *testing.M) {
var level slog.Level
flag.TextVar(&level, "level", slog.LevelInfo, "log level")
flag.Parse()
slog.SetLogLoggerLevel(level)
os.Exit(m.Run())
}
func TestConvertFull(t *testing.T) {
cases := []string{
"Meta-Llama-3-8B-Instruct",
"Meta-Llama-3.1-8B-Instruct",
"Mistral-7B-Instruct-v0.2",
"Mixtral-8x7B-Instruct-v0.1",
"gemma-2b-it",
// microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8
"Phi-3-mini-128k-instruct",
"all-MiniLM-L6-v2",
"gemma-2-9b-it",
}
for i := range cases {
tt := cases[i]
t.Run(tt, func(t *testing.T) {
t.Parallel()
p := filepath.Join("testdata", tt)
if testing.Short() {
t.Skip("skipping in short mode")
} else if _, err := os.Stat(p); err != nil {
t.Skipf("%s not found", p)
}
f, kv, tensors := convertFull(t, os.DirFS(p))
actual := generateResultsJSON(t, f, kv, tensors)
expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
if err != nil {
t.Fatal(err)
}
var expect map[string]string
if err := json.NewDecoder(expectFile).Decode(&expect); err != nil {
t.Fatal(err)
}
keys := maps.Keys(expect)
slices.Sort(keys)
for _, k := range keys {
if v, ok := actual[k]; !ok {
t.Errorf("missing %s", k)
} else if v != expect[k] {
t.Errorf("unexpected %s: want %s, got %s", k, expect[k], v)
}
}
})
}
}
func TestConvertAdapter(t *testing.T) {
type AdapterCase struct {
Name string
BaseKV map[string]any
Expected map[string]string
}
cases := []AdapterCase{
{
Name: "discollama",
BaseKV: map[string]any{
"general.architecture": "llama",
"llama.attention.head_count": uint32(32),
"llama.attention.head_count_kv": uint32(8),
},
Expected: map[string]string{
"general.architecture": "llama",
"general.file_type": "1",
"general.parameter_count": "106496",
"general.type": "adapter",
"general.version": "v0.2",
"adapter.lora.alpha": "16",
"adapter.type": "lora",
"llama.attention.head_count": "32",
"llama.attention.head_count_kv": "8",
"blk.31.attn_q.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
"blk.31.attn_q.weight.lora_b": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
"blk.31.attn_v.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
"blk.31.attn_v.weight.lora_b": "071dcafe89df065d6e1c935ecb8fdf6479b3c202eb912e7da938597673ff5857",
},
},
}
for _, c := range cases {
t.Run(c.Name, func(t *testing.T) {
t.Parallel()
f, err := os.CreateTemp(t.TempDir(), "f16")
if err != nil {
t.Fatal(err)
}
defer f.Close()
tempDir := t.TempDir()
generateLoraTestData(t, tempDir)
if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV); err != nil {
t.Fatal(err)
}
r, err := os.Open(f.Name())
if err != nil {
t.Fatal(err)
}
defer r.Close()
m, _, err := llm.DecodeGGML(r, math.MaxInt)
if err != nil {
t.Fatal(err)
}
if _, err := r.Seek(0, io.SeekStart); err != nil {
t.Fatal(err)
}
actual := generateResultsJSON(t, r, m.KV(), m.Tensors())
keys := maps.Keys(c.Expected)
slices.Sort(keys)
for _, k := range keys {
if v, ok := actual[k]; !ok {
t.Errorf("missing %s", k)
} else if v != c.Expected[k] {
t.Errorf("unexpected %s: want %s, got %s", k, c.Expected[k], v)
}
}
})
}
}
func generateLoraTestData(t *testing.T, tempDir string) {
type tensorData struct {
Offsets []int `json:"data_offsets"`
Type string `json:"dtype"`
Shape []int `json:"shape"`
}
offset := 4096 * 8 * 4
td := map[string]*tensorData{"__metadata__": nil}
td["model.layers.31.self_attn.q_proj.lora_a"] = &tensorData{
Offsets: []int{0, offset},
Type: "F32",
Shape: []int{4096, 8},
}
td["model.layers.31.self_attn.q_proj.lora_b"] = &tensorData{
Offsets: []int{offset, offset * 2},
Type: "F32",
Shape: []int{8, 4096},
}
td["model.layers.31.self_attn.v_proj.lora_a"] = &tensorData{
Offsets: []int{offset * 2, offset * 3},
Type: "F32",
Shape: []int{4096, 8},
}
td["model.layers.31.self_attn.v_proj.lora_b"] = &tensorData{
Offsets: []int{offset * 3, offset*3 + 8*1024*4},
Type: "F32",
Shape: []int{8, 1024},
}
data, err := json.Marshal(td)
if err != nil {
t.Fatal(err)
}
var buf bytes.Buffer
l := int64(len(data))
err = binary.Write(&buf, binary.LittleEndian, l)
if err != nil {
t.Fatal(err)
}
_, err = buf.Write(data)
if err != nil {
t.Fatal(err)
}
// write some data for the tensors
ones := make([]float32, 4096*8)
for i := range ones {
ones[i] = float32(1)
}
for range 3 {
err = binary.Write(&buf, binary.LittleEndian, ones)
if err != nil {
t.Fatal(err)
}
}
ones = make([]float32, 1024*8)
for i := range ones {
ones[i] = float32(1)
}
err = binary.Write(&buf, binary.LittleEndian, ones)
if err != nil {
t.Fatal(err)
}
fdata, err := os.Create(filepath.Join(tempDir, "adapters.safetensors"))
if err != nil {
t.Fatal(err)
}
defer fdata.Close()
_, err = fdata.Write(buf.Bytes())
if err != nil {
t.Fatal(err)
}
configData := `
{
"adapter_path": "adapters-test",
"batch_size": 8,
"config": "config-tiny.json",
"data": "../discollama-completion",
"grad_checkpoint": null,
"iters": 1000,
"learning_rate": 1e-05,
"lora_layers": 1,
"lora_parameters": {
"rank": 8,
"alpha": 16,
"dropout": 0.0,
"scale": 2.0
},
"lr_schedule": null,
"max_seq_length": 2048,
"model": "/Users/pdevine/git/Meta-Llama-3-8B-Instruct",
"resume_adapter_file": null,
"save_every": 100,
"seed": 0,
"steps_per_eval": 200,
"steps_per_report": 10,
"test": false,
"test_batches": 500,
"train": true,
"use_dora": false,
"val_batches": 25
}
`
f, err := os.Create(filepath.Join(tempDir, "adapter_config.json"))
if err != nil {
t.Fatal(err)
}
defer f.Close()
_, err = f.WriteString(configData)
if err != nil {
t.Fatal(err)
}
}

View File

@@ -1,58 +0,0 @@
package convert
import (
"archive/zip"
"errors"
"io"
"io/fs"
"os"
"path/filepath"
)
type ZipReader struct {
r *zip.Reader
p string
// limit is the maximum size of a file that can be read directly
// from the zip archive. Files larger than this size will be extracted
limit int64
}
func NewZipReader(r *zip.Reader, p string, limit int64) fs.FS {
return &ZipReader{r, p, limit}
}
func (z *ZipReader) Open(name string) (fs.File, error) {
r, err := z.r.Open(name)
if err != nil {
return nil, err
}
defer r.Close()
if fi, err := r.Stat(); err != nil {
return nil, err
} else if fi.Size() < z.limit {
return r, nil
}
if !filepath.IsLocal(name) {
return nil, zip.ErrInsecurePath
}
n := filepath.Join(z.p, name)
if _, err := os.Stat(n); errors.Is(err, os.ErrNotExist) {
w, err := os.Create(n)
if err != nil {
return nil, err
}
defer w.Close()
if _, err := io.Copy(w, r); err != nil {
return nil, err
}
} else if err != nil {
return nil, err
}
return os.Open(n)
}

137
convert/gemma.go Normal file
View File

@@ -0,0 +1,137 @@
package convert
import (
"encoding/binary"
"fmt"
"io"
"log/slog"
"os"
"strings"
"github.com/d4l3k/go-bfloat16"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
)
type GemmaModel struct {
ModelData
}
func gemmaLayerHandler(w io.Writer, r safetensorWriterTo, f *os.File) error {
slog.Debug(fmt.Sprintf("converting '%s'", r.t.Name))
data := make([]byte, r.end-r.start)
if err := binary.Read(f, r.bo, data); err != nil {
return err
}
tDataF32 := bfloat16.DecodeFloat32(data)
var err error
tDataF32, err = addOnes(tDataF32, int(r.t.Shape[0]))
if err != nil {
return err
}
if err := binary.Write(w, r.bo, tDataF32); err != nil {
return err
}
return nil
}
func addOnes(data []float32, vectorSize int) ([]float32, error) {
n := tensor.New(tensor.WithShape(vectorSize), tensor.WithBacking(data))
ones := tensor.Ones(tensor.Float32, vectorSize)
var err error
n, err = n.Add(ones)
if err != nil {
return []float32{}, err
}
newN, err := native.SelectF32(n, 0)
if err != nil {
return []float32{}, err
}
var fullTensor []float32
for _, v := range newN {
fullTensor = append(fullTensor, v...)
}
return fullTensor, nil
}
func (m *GemmaModel) GetTensors() error {
t, err := m.Format.GetTensors(m.Path, m.Params)
if err != nil {
return err
}
slog.Debug(fmt.Sprintf("Total tensors: %d", len(t)))
m.Tensors = []llm.Tensor{}
for _, l := range t {
if strings.HasSuffix(l.Name, "norm.weight") {
wt := l.WriterTo.(safetensorWriterTo)
wt.handler = gemmaLayerHandler
l.WriterTo = wt
}
m.Tensors = append(m.Tensors, l)
}
return nil
}
func (m *GemmaModel) LoadVocab() error {
v, err := LoadSentencePieceTokens(m.Path, m.Params)
if err != nil {
return err
}
m.Vocab = v
return nil
}
func (m *GemmaModel) WriteGGUF() (string, error) {
kv := llm.KV{
"general.architecture": "gemma",
"general.name": m.Name,
"gemma.context_length": uint32(m.Params.ContextSize),
"gemma.embedding_length": uint32(m.Params.HiddenSize),
"gemma.block_count": uint32(m.Params.HiddenLayers),
"gemma.feed_forward_length": uint32(m.Params.IntermediateSize),
"gemma.attention.head_count": uint32(m.Params.AttentionHeads),
"gemma.attention.head_count_kv": uint32(m.Params.KeyValHeads),
"gemma.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
"gemma.attention.key_length": uint32(m.Params.HeadDimension),
"gemma.attention.value_length": uint32(m.Params.HeadDimension),
"general.file_type": uint32(1),
"tokenizer.ggml.model": "llama",
"tokenizer.ggml.tokens": m.Vocab.Tokens,
"tokenizer.ggml.scores": m.Vocab.Scores,
"tokenizer.ggml.token_type": m.Vocab.Types,
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
"tokenizer.ggml.padding_token_id": uint32(m.Params.PaddingTokenID),
"tokenizer.ggml.unknown_token_id": uint32(3),
"tokenizer.ggml.add_bos_token": true,
"tokenizer.ggml.add_eos_token": false,
}
f, err := os.CreateTemp("", "ollama-gguf")
if err != nil {
return "", err
}
defer f.Close()
mod := llm.NewGGUFV3(m.Params.ByteOrder)
if err := mod.Encode(f, kv, m.Tensors); err != nil {
return "", err
}
return f.Name(), nil
}

176
convert/llama.go Normal file
View File

@@ -0,0 +1,176 @@
package convert
import (
"encoding/binary"
"fmt"
"io"
"log/slog"
"os"
"regexp"
"strings"
"github.com/nlpodyssey/gopickle/pytorch"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/x448/float16"
"github.com/ollama/ollama/llm"
)
type LlamaModel struct {
ModelData
}
func llamaLayerHandler(w io.Writer, r torchWriterTo) error {
slog.Debug(fmt.Sprintf("repacking layer '%s'", r.t.Name))
data := r.storage.(*pytorch.HalfStorage).Data
tData := make([]uint16, len(data))
for cnt, v := range data {
tData[cnt] = uint16(float16.Fromfloat32(v))
}
var err error
var heads uint32
if strings.Contains(r.t.Name, "attn_q") {
heads = uint32(r.params.AttentionHeads)
} else if strings.Contains(r.t.Name, "attn_k") {
heads = uint32(r.params.KeyValHeads)
if heads == 0 {
heads = uint32(r.params.AttentionHeads)
}
} else {
return fmt.Errorf("unknown layer type")
}
slog.Debug(fmt.Sprintf("heads = %d", heads))
tData, err = llamaRepack(tData, int(heads), r.t.Shape)
if err != nil {
return err
}
if err = binary.Write(w, r.bo, tData); err != nil {
return err
}
return nil
}
func llamaRepack(data []uint16, heads int, shape []uint64) ([]uint16, error) {
n := tensor.New(tensor.WithShape(int(shape[0]), int(shape[1])), tensor.WithBacking(data))
origShape := n.Shape().Clone()
// reshape the tensor and swap axes 1 and 2 to unpack the layer for gguf
if err := n.Reshape(heads, 2, origShape[0]/heads/2, origShape[1]); err != nil {
return nil, err
}
if err := n.T(0, 2, 1, 3); err != nil {
return nil, err
}
if err := n.Reshape(origShape...); err != nil {
return nil, err
}
if err := n.Transpose(); err != nil {
return nil, err
}
newN, err := native.SelectU16(n, 1)
if err != nil {
return nil, err
}
var fullTensor []uint16
for _, v := range newN {
fullTensor = append(fullTensor, v...)
}
return fullTensor, nil
}
func (m *LlamaModel) GetTensors() error {
t, err := m.Format.GetTensors(m.Path, m.Params)
if err != nil {
return err
}
m.Tensors = []llm.Tensor{}
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
re, err := regexp.Compile(pattern)
if err != nil {
return err
}
for _, l := range t {
matches := re.FindAllStringSubmatch(l.Name, -1)
if len(matches) > 0 {
slog.Debug(fmt.Sprintf("setting handler for: %s", l.Name))
wt := l.WriterTo.(torchWriterTo)
wt.handler = llamaLayerHandler
l.WriterTo = wt
}
m.Tensors = append(m.Tensors, l)
}
return nil
}
func (m *LlamaModel) LoadVocab() error {
var v *Vocab
var err error
slog.Debug("loading vocab")
v, err = LoadSentencePieceTokens(m.Path, m.Params)
if err != nil {
return err
}
slog.Debug("vocab loaded")
m.Vocab = v
return nil
}
func (m *LlamaModel) WriteGGUF() (string, error) {
kv := llm.KV{
"general.architecture": "llama",
"general.name": m.Name,
"llama.vocab_size": uint32(len(m.Vocab.Tokens)),
"llama.context_length": uint32(m.Params.ContextSize),
"llama.embedding_length": uint32(m.Params.HiddenSize),
"llama.block_count": uint32(m.Params.HiddenLayers),
"llama.feed_forward_length": uint32(m.Params.IntermediateSize),
"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
"general.file_type": uint32(1),
"tokenizer.ggml.model": "llama",
"tokenizer.ggml.tokens": m.Vocab.Tokens,
"tokenizer.ggml.scores": m.Vocab.Scores,
"tokenizer.ggml.token_type": m.Vocab.Types,
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
"tokenizer.ggml.unknown_token_id": uint32(0),
"tokenizer.ggml.add_bos_token": true,
"tokenizer.ggml.add_eos_token": false,
}
f, err := os.CreateTemp("", "ollama-gguf")
if err != nil {
return "", err
}
defer f.Close()
mod := llm.NewGGUFV3(m.Params.ByteOrder)
if err := mod.Encode(f, kv, m.Tensors); err != nil {
return "", err
}
slog.Debug(fmt.Sprintf("gguf file = %s", f.Name()))
return f.Name(), nil
}

173
convert/mistral.go Normal file
View File

@@ -0,0 +1,173 @@
package convert
import (
"encoding/binary"
"fmt"
"io"
"os"
"regexp"
"strings"
"github.com/d4l3k/go-bfloat16"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/x448/float16"
"github.com/ollama/ollama/llm"
)
type MistralModel struct {
ModelData
}
func mistralLayerHandler(w io.Writer, r safetensorWriterTo, f *os.File) error {
layerSize := r.end - r.start
var err error
tData := make([]uint16, layerSize/2)
if err = binary.Read(f, r.bo, tData); err != nil {
return err
}
var heads uint32
if strings.Contains(r.t.Name, "attn_q") {
heads = uint32(r.params.AttentionHeads)
} else if strings.Contains(r.t.Name, "attn_k") {
heads = uint32(r.params.KeyValHeads)
if heads == 0 {
heads = uint32(r.params.AttentionHeads)
}
} else {
return fmt.Errorf("unknown layer type")
}
tData, err = repack(tData, int(heads), r.t.Shape)
if err != nil {
return err
}
var buf []byte
for _, n := range tData {
buf = r.bo.AppendUint16(buf, n)
}
tempBuf := make([]uint16, len(tData))
tDataF32 := bfloat16.DecodeFloat32(buf)
for cnt, v := range tDataF32 {
tDataF16 := float16.Fromfloat32(v)
tempBuf[cnt] = uint16(tDataF16)
}
if err = binary.Write(w, r.bo, tempBuf); err != nil {
return err
}
return nil
}
func repack(data []uint16, heads int, shape []uint64) ([]uint16, error) {
n := tensor.New(tensor.WithShape(int(shape[0]), int(shape[1])), tensor.WithBacking(data))
origShape := n.Shape().Clone()
// reshape the tensor and swap axes 1 and 2 to unpack the layer for gguf
if err := n.Reshape(heads, 2, origShape[0]/heads/2, origShape[1]); err != nil {
return nil, err
}
if err := n.T(0, 2, 1, 3); err != nil {
return nil, err
}
if err := n.Reshape(origShape...); err != nil {
return nil, err
}
if err := n.Transpose(); err != nil {
return nil, err
}
newN, err := native.SelectU16(n, 1)
if err != nil {
return nil, err
}
var fullTensor []uint16
for _, v := range newN {
fullTensor = append(fullTensor, v...)
}
return fullTensor, nil
}
func (m *MistralModel) GetTensors() error {
t, err := m.Format.GetTensors(m.Path, m.Params)
if err != nil {
return err
}
m.Tensors = []llm.Tensor{}
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
re, err := regexp.Compile(pattern)
if err != nil {
return err
}
for _, l := range t {
matches := re.FindAllStringSubmatch(l.Name, -1)
if len(matches) > 0 {
wt := l.WriterTo.(safetensorWriterTo)
wt.handler = mistralLayerHandler
l.WriterTo = wt
}
m.Tensors = append(m.Tensors, l)
}
return nil
}
func (m *MistralModel) LoadVocab() error {
v, err := LoadSentencePieceTokens(m.Path, m.Params)
if err != nil {
return err
}
m.Vocab = v
return nil
}
func (m *MistralModel) WriteGGUF() (string, error) {
kv := llm.KV{
"general.architecture": "llama",
"general.name": m.Name,
"llama.context_length": uint32(m.Params.ContextSize),
"llama.embedding_length": uint32(m.Params.HiddenSize),
"llama.block_count": uint32(m.Params.HiddenLayers),
"llama.feed_forward_length": uint32(m.Params.IntermediateSize),
"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
"general.file_type": uint32(1),
"tokenizer.ggml.model": "llama",
"tokenizer.ggml.tokens": m.Vocab.Tokens,
"tokenizer.ggml.scores": m.Vocab.Scores,
"tokenizer.ggml.token_type": m.Vocab.Types,
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
"tokenizer.ggml.add_bos_token": true,
"tokenizer.ggml.add_eos_token": false,
"tokenizer.ggml.unknown_token_id": uint32(0),
}
f, err := os.CreateTemp("", "ollama-gguf")
if err != nil {
return "", err
}
defer f.Close()
mod := llm.NewGGUFV3(m.Params.ByteOrder)
if err := mod.Encode(f, kv, m.Tensors); err != nil {
return "", err
}
return f.Name(), nil
}

96
convert/mixtral.go Normal file
View File

@@ -0,0 +1,96 @@
package convert
import (
"os"
"regexp"
"github.com/ollama/ollama/llm"
)
type MixtralModel struct {
ModelData
}
func (m *MixtralModel) GetTensors() error {
t, err := m.Format.GetTensors(m.Path, m.Params)
if err != nil {
return err
}
m.Tensors = []llm.Tensor{}
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
re, err := regexp.Compile(pattern)
if err != nil {
return err
}
for _, l := range t {
matches := re.FindAllStringSubmatch(l.Name, -1)
if len(matches) > 0 {
wt := l.WriterTo.(safetensorWriterTo)
wt.handler = mistralLayerHandler
l.WriterTo = wt
}
m.Tensors = append(m.Tensors, l)
}
return nil
}
func (m *MixtralModel) LoadVocab() error {
v, err := LoadSentencePieceTokens(m.Path, m.Params)
if err != nil {
return err
}
m.Vocab = v
return nil
}
func (m *MixtralModel) WriteGGUF() (string, error) {
kv := llm.KV{
"general.architecture": "llama",
"general.name": m.Name,
"llama.block_count": uint32(m.Params.HiddenLayers),
"llama.context_length": uint32(m.Params.ContextSize),
"llama.embedding_length": uint32(m.Params.HiddenSize),
"llama.feed_forward_length": uint32(m.Params.IntermediateSize),
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
"llama.rope.freq_base": float32(m.Params.RopeFrequencyBase),
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
"llama.expert_count": uint32(m.Params.Experts),
"llama.expert_used_count": uint32(m.Params.ExpertsUsed),
"llama.vocab_size": uint32(len(m.Vocab.Tokens)),
"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
"general.file_type": uint32(1),
"tokenizer.ggml.model": "llama",
"tokenizer.ggml.tokens": m.Vocab.Tokens,
"tokenizer.ggml.scores": m.Vocab.Scores,
"tokenizer.ggml.token_type": m.Vocab.Types,
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
"tokenizer.ggml.unknown_token_id": uint32(0),
"tokenizer.ggml.add_bos_token": true,
"tokenizer.ggml.add_eos_token": false,
}
f, err := os.CreateTemp("", "ollama-gguf")
if err != nil {
return "", err
}
defer f.Close()
mod := llm.NewGGUFV3(m.Params.ByteOrder)
if err := mod.Encode(f, kv, m.Tensors); err != nil {
return "", err
}
return f.Name(), nil
}

View File

@@ -1,86 +0,0 @@
package convert
import (
"errors"
"io"
"io/fs"
"strings"
)
type Tensor interface {
Name() string
Shape() []uint64
Kind() uint32
SetRepacker(repacker)
WriteTo(io.Writer) (int64, error)
}
type tensorBase struct {
name string
shape []uint64
repacker
}
func (t tensorBase) Name() string {
return t.name
}
func (t tensorBase) Shape() []uint64 {
return t.shape
}
const (
tensorKindF32 uint32 = iota
tensorKindF16
)
func (t tensorBase) Kind() uint32 {
if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
t.name == "token_types.weight" {
// these tensors are always F32
return 0
}
switch len(t.shape) {
case 0:
panic("invalid tensor shape")
case 1:
return tensorKindF32
default:
return tensorKindF16
}
}
func (t *tensorBase) SetRepacker(fn repacker) {
t.repacker = fn
}
type repacker func(string, []float32, []uint64) ([]float32, error)
func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
patterns := []struct {
Pattern string
Func func(fs.FS, *strings.Replacer, ...string) ([]Tensor, error)
}{
{"model-*-of-*.safetensors", parseSafetensors},
{"model.safetensors", parseSafetensors},
{"adapters.safetensors", parseSafetensors},
{"adapter_model.safetensors", parseSafetensors},
{"pytorch_model-*-of-*.bin", parseTorch},
{"pytorch_model.bin", parseTorch},
{"consolidated.*.pth", parseTorch},
}
for _, pattern := range patterns {
matches, err := fs.Glob(fsys, pattern.Pattern)
if err != nil {
return nil, err
}
if len(matches) > 0 {
return pattern.Func(fsys, replacer, matches...)
}
}
return nil, errors.New("unknown tensor format")
}

View File

@@ -1,151 +0,0 @@
package convert
import (
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"io/fs"
"slices"
"strings"
"github.com/d4l3k/go-bfloat16"
"github.com/x448/float16"
"golang.org/x/exp/maps"
)
type safetensorMetadata struct {
Type string `json:"dtype"`
Shape []uint64 `json:"shape"`
Offsets []int64 `json:"data_offsets"`
}
func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
var ts []Tensor
for _, p := range ps {
f, err := fsys.Open(p)
if err != nil {
return nil, err
}
defer f.Close()
var n int64
if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
return nil, err
}
b := bytes.NewBuffer(make([]byte, 0, n))
if _, err = io.CopyN(b, f, n); err != nil {
return nil, err
}
var headers map[string]safetensorMetadata
if err := json.NewDecoder(b).Decode(&headers); err != nil {
return nil, err
}
keys := maps.Keys(headers)
slices.Sort(keys)
for _, key := range keys {
if value := headers[key]; value.Type != "" {
ts = append(ts, safetensor{
fs: fsys,
path: p,
dtype: value.Type,
offset: safetensorsPad(n, value.Offsets[0]),
size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
tensorBase: &tensorBase{
name: replacer.Replace(key),
shape: value.Shape,
},
})
}
}
}
return ts, nil
}
// safetensorsPad returns the padded size of the safetensors file given a length n and offset s
func safetensorsPad(n, offset int64) int64 {
return 8 + n + offset
}
type safetensor struct {
fs fs.FS
path string
dtype string
offset int64
size int64
*tensorBase
}
func (st safetensor) WriteTo(w io.Writer) (int64, error) {
f, err := st.fs.Open(st.path)
if err != nil {
return 0, err
}
defer f.Close()
if seeker, ok := f.(io.Seeker); ok {
if _, err := seeker.Seek(st.offset, io.SeekStart); err != nil {
return 0, err
}
} else {
if _, err := io.CopyN(io.Discard, f, st.offset); err != nil {
return 0, err
}
}
var f32s []float32
switch st.dtype {
case "F32":
f32s = make([]float32, st.size/4)
if err = binary.Read(f, binary.LittleEndian, f32s); err != nil {
return 0, err
}
case "F16":
u16s := make([]uint16, st.size/2)
if err = binary.Read(f, binary.LittleEndian, u16s); err != nil {
return 0, err
}
f32s = make([]float32, len(u16s))
for i := range u16s {
f32s[i] = float16.Frombits(u16s[i]).Float32()
}
case "BF16":
u8s := make([]uint8, st.size)
if err = binary.Read(f, binary.LittleEndian, u8s); err != nil {
return 0, err
}
f32s = bfloat16.DecodeFloat32(u8s)
default:
return 0, fmt.Errorf("unknown data type: %s", st.dtype)
}
if st.repacker != nil {
f32s, err = st.repacker(st.Name(), f32s, st.Shape())
if err != nil {
return 0, err
}
}
switch st.Kind() {
case tensorKindF32:
return 0, binary.Write(w, binary.LittleEndian, f32s)
case tensorKindF16:
f16s := make([]uint16, len(f32s))
for i := range f32s {
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
}
return 0, binary.Write(w, binary.LittleEndian, f16s)
default:
return 0, fmt.Errorf("unknown storage type: %d", st.Kind())
}
}

View File

@@ -1,48 +0,0 @@
package convert
import (
"io"
"io/fs"
"strings"
"github.com/nlpodyssey/gopickle/pytorch"
"github.com/nlpodyssey/gopickle/types"
)
func parseTorch(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
var ts []Tensor
for _, p := range ps {
pt, err := pytorch.Load(p)
if err != nil {
return nil, err
}
for _, k := range pt.(*types.Dict).Keys() {
t := pt.(*types.Dict).MustGet(k)
var shape []uint64
for dim := range t.(*pytorch.Tensor).Size {
shape = append(shape, uint64(dim))
}
ts = append(ts, torch{
storage: t.(*pytorch.Tensor).Source,
tensorBase: &tensorBase{
name: replacer.Replace(k.(string)),
shape: shape,
},
})
}
}
return ts, nil
}
type torch struct {
storage pytorch.StorageInterface
*tensorBase
}
func (pt torch) WriteTo(w io.Writer) (int64, error) {
return 0, nil
}

317
convert/safetensors.go Normal file
View File

@@ -0,0 +1,317 @@
package convert
import (
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"regexp"
"slices"
"github.com/d4l3k/go-bfloat16"
"github.com/mitchellh/mapstructure"
"github.com/x448/float16"
"github.com/ollama/ollama/llm"
)
type safetensorWriterTo struct {
t *llm.Tensor
params *Params
bo ByteOrder
filename string
start, end, padding uint64
handler func(w io.Writer, r safetensorWriterTo, f *os.File) error
}
type tensorMetaData struct {
Type string `mapstructure:"dtype"`
Shape []int `mapstructure:"shape"`
Offsets []int `mapstructure:"data_offsets"`
}
type SafetensorFormat struct{}
func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
slog.Debug("getting tensor data")
var tensors []llm.Tensor
files, err := filepath.Glob(filepath.Join(dirpath, "/model-*.safetensors"))
if err != nil {
return nil, err
}
var offset uint64
for _, f := range files {
var t []llm.Tensor
var err error
t, offset, err = m.readTensors(f, offset, params)
if err != nil {
slog.Error("%v", err)
return nil, err
}
tensors = append(tensors, t...)
}
slog.Debug(fmt.Sprintf("all tensors = %d", len(tensors)))
return tensors, nil
}
func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) ([]llm.Tensor, uint64, error) {
f, err := os.Open(fn)
if err != nil {
return nil, 0, err
}
defer f.Close()
var jsonSize uint64
if err := binary.Read(f, binary.LittleEndian, &jsonSize); err != nil {
return nil, 0, err
}
buf := make([]byte, jsonSize)
_, err = io.ReadFull(f, buf)
if err != nil {
return nil, 0, err
}
d := json.NewDecoder(bytes.NewBuffer(buf))
d.UseNumber()
var parsed map[string]interface{}
if err = d.Decode(&parsed); err != nil {
return nil, 0, err
}
var keys []string
for k := range parsed {
keys = append(keys, k)
}
slices.Sort(keys)
slog.Info("converting layers")
var tensors []llm.Tensor
for _, k := range keys {
vals := parsed[k].(map[string]interface{})
var data tensorMetaData
if err = mapstructure.Decode(vals, &data); err != nil {
slog.Error("couldn't decode properly")
return nil, 0, err
}
var size uint64
var kind uint32
switch len(data.Shape) {
case 0:
// metadata
continue
case 1:
// convert to float32
kind = 0
size = uint64(data.Shape[0] * 4)
case 2:
// convert to float16
kind = 1
size = uint64(data.Shape[0] * data.Shape[1] * 2)
}
ggufName, err := m.GetLayerName(k)
if err != nil {
slog.Error("%v", err)
return nil, 0, err
}
shape := []uint64{0, 0, 0, 0}
for i := range data.Shape {
shape[i] = uint64(data.Shape[i])
}
t := llm.Tensor{
Name: ggufName,
Kind: kind,
Offset: offset,
Shape: shape[:],
}
t.WriterTo = safetensorWriterTo{
t: &t,
params: params,
bo: params.ByteOrder,
filename: fn,
start: uint64(data.Offsets[0]),
end: uint64(data.Offsets[1]),
padding: 8 + jsonSize,
}
offset += size
tensors = append(tensors, t)
}
slog.Debug(fmt.Sprintf("total tensors for file = %d", len(tensors)))
slog.Debug(fmt.Sprintf("offset = %d", offset))
return tensors, offset, nil
}
func (m *SafetensorFormat) GetParams(dirpath string) (*Params, error) {
f, err := os.Open(filepath.Join(dirpath, "config.json"))
if err != nil {
return nil, err
}
defer f.Close()
var params Params
d := json.NewDecoder(f)
err = d.Decode(&params)
if err != nil {
return nil, err
}
params.ByteOrder = binary.LittleEndian
return &params, nil
}
func (m *SafetensorFormat) GetLayerName(n string) (string, error) {
directMap := map[string]string{
"model.embed_tokens.weight": "token_embd.weight",
"lm_head.weight": "output.weight",
"model.norm.weight": "output_norm.weight",
}
tMap := map[string]string{
"model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
"model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
"model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
"model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
"model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
"model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
"model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
"model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
"model.layers.(\\d+).block_sparse_moe.gate.weight": "blk.$1.ffn_gate_inp.weight",
"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w1.weight": "blk.$1.ffn_gate.$2.weight",
"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w2.weight": "blk.$1.ffn_down.$2.weight",
"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w3.weight": "blk.$1.ffn_up.$2.weight",
}
v, ok := directMap[n]
if ok {
return v, nil
}
// quick hack to rename the layers to gguf format
for k, v := range tMap {
re := regexp.MustCompile(k)
newName := re.ReplaceAllString(n, v)
if newName != n {
return newName, nil
}
}
return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
}
func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) {
f, err := os.Open(r.filename)
if err != nil {
return 0, err
}
defer f.Close()
if _, err = f.Seek(int64(r.padding+r.start), 0); err != nil {
return 0, err
}
// use the handler if one is present
if r.handler != nil {
return 0, r.handler(w, r, f)
}
remaining := r.end - r.start
bufSize := uint64(10240)
var finished bool
for {
data := make([]byte, min(bufSize, remaining))
b, err := io.ReadFull(f, data)
remaining -= uint64(b)
if err == io.EOF || remaining <= 0 {
finished = true
} else if err != nil {
return 0, err
}
// convert bfloat16 -> ieee float32
tDataF32 := bfloat16.DecodeFloat32(data)
switch r.t.Kind {
case 0:
if err := binary.Write(w, r.bo, tDataF32); err != nil {
return 0, err
}
case 1:
// convert float32 -> float16
tempBuf := make([]uint16, len(data)/2)
for cnt, v := range tDataF32 {
tDataF16 := float16.Fromfloat32(v)
tempBuf[cnt] = uint16(tDataF16)
}
if err := binary.Write(w, r.bo, tempBuf); err != nil {
return 0, err
}
}
if finished {
break
}
}
return 0, nil
}
func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
switch len(params.Architectures) {
case 0:
return nil, fmt.Errorf("No architecture specified to convert")
case 1:
switch params.Architectures[0] {
case "MistralForCausalLM":
return &MistralModel{
ModelData{
Name: name,
Path: dirPath,
Params: params,
Format: m,
},
}, nil
case "MixtralForCausalLM":
return &MixtralModel{
ModelData{
Name: name,
Path: dirPath,
Params: params,
Format: m,
},
}, nil
case "GemmaForCausalLM":
return &GemmaModel{
ModelData{
Name: name,
Path: dirPath,
Params: params,
Format: m,
},
}, nil
default:
return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
}
}
return nil, fmt.Errorf("Unknown error")
}

View File

@@ -1,313 +0,0 @@
{
"general.architecture": "llama",
"general.file_type": "1",
"general.quantization_version": "2",
"llama.block_count": "32",
"llama.context_length": "8192",
"llama.embedding_length": "4096",
"llama.feed_forward_length": "14336",
"llama.rope.dimension_count": "128",
"llama.rope.freq_base": "500000",
"llama.vocab_size": "128256",
"llama.attention.head_count": "32",
"llama.attention.head_count_kv": "8",
"llama.attention.layer_norm_rms_epsilon": "1e-05",
"tokenizer.ggml.model": "gpt2",
"tokenizer.ggml.pre": "llama-bpe",
"tokenizer.ggml.bos_token_id": "128000",
"tokenizer.ggml.eos_token_id": "128009",
"tokenizer.ggml.merges": "d0cbac1fcc9dcf03724b8db5c9bfb593ae1cf68fb9bc72eb1d15274dcbbf618b",
"tokenizer.ggml.token_type": "d70a88809fd7da6f1f028622685cd64268a7a922c5d343c96f25b66327358978",
"tokenizer.ggml.tokens": "765b529dbcbc42dd202ce657341c63807b51f3b07e09898f6aa6196326865d5a",
"token_embd.weight": "b53102a11d9064bbd404833e3464b1b13e08ce73300b442312cccde2f19b2698",
"blk.0.attn_norm.weight": "7318df3cca9e8d153ff0a503026a1265e63d20b2a8c1dd7a2769585082b5d1ee",
"blk.0.ffn_down.weight": "b950806a1fc722c9fad7fd0b20c3c0a7fb50f14395e1e7663a590bfd62e20900",
"blk.0.ffn_gate.weight": "e73e580af6d4f08e060a74a3c25efdf5d3bed99e183d95a5a85ae859014839fd",
"blk.0.ffn_up.weight": "c8158af679ef99746da1befb67eebb19489e0bbe6ce7d97e13e348508244e516",
"blk.0.ffn_norm.weight": "7ec69c3c31e95e49a3359003b0033f6b9e85561a3e3fd83e7476661ecdd756bb",
"blk.0.attn_k.weight": "2732303257bac969b4964e0e32ec08b5a7f5c031bb02bf6ac4467b3ea0ebcf1e",
"blk.0.attn_output.weight": "ecda1d43b4ccc91cd5b366d7e7a275353990ac78561a07c83d9c77031aba12dc",
"blk.0.attn_q.weight": "569b1f5faf92b6f00910cf7effb2d5862f91038ce5c3b0019fc10e5d79fbd5e1",
"blk.0.attn_v.weight": "aa8416c5ef7e32fb54a1f20d6ac651656845d4af240564b397c39bd83e06e3b8",
"blk.1.attn_norm.weight": "03327e02862908c2a44b2f52decdb924bf4201f400b46f8037a9cb2e1d7a61ff",
"blk.1.ffn_down.weight": "5a83a87603f38c99f8e1e370a2d5f967bb45ac51d881a609304a7811027321e0",
"blk.1.ffn_gate.weight": "31da0572c79e655186c721c231376f85e56cdcc6257c28d08c8c5b40d5c22b40",
"blk.1.ffn_up.weight": "e0c811d64ca155c8de10a868e72015d43888834804614ee1aa2953129ffbc90f",
"blk.1.ffn_norm.weight": "5861f313d6137d6f0f904d423df47fffc6069e224ff746e1b637ac9c7f0af862",
"blk.1.attn_k.weight": "5fbbec0acca6457b9416ebdcd90e526885d0224537b7628f6be376a7f275313d",
"blk.1.attn_output.weight": "b237c9763fa3f75166a6f70b70f1566e77d0d89dfa164ed1b3137393e90575c3",
"blk.1.attn_q.weight": "c0a9cf4a98b4882b16f3eb2b49d933793dcc5357abb246fd3fe3134ed2b12e1c",
"blk.1.attn_v.weight": "96867111727200cac1af7865189dd41fd62b47584e5e5f33a91f1d34509cbd40",
"blk.2.attn_norm.weight": "f392f8a88ee3a95b1cc19c40dd4ef66317037b0faaa1800f610779e129ee0539",
"blk.2.ffn_down.weight": "73823eef46632aedcc8c1cb08a736b6aa97ca97842cd1fdfc5567d8dec459662",
"blk.2.ffn_gate.weight": "f4909ae19fc3848b00bb8b9050122e74f8e903b89e22937036f4cc9fea20a718",
"blk.2.ffn_up.weight": "16f4904a3d814ea68f00519724fc4943e48444a84c786bda39aa5efc298a7d84",
"blk.2.ffn_norm.weight": "e3ccdf56e75cb969f6f69c39caf6daf7c4e70e89e25df0f4d2e4bc60e159aafe",
"blk.2.attn_k.weight": "c3beb1e0a11bcf007ef0f0d8f6bdd3082d8b29090cd29597846b5d51e308a8e5",
"blk.2.attn_output.weight": "bb9f66c32cff51154fea92933c2cd62549236f8cb1a767f9ef28d3f99809b343",
"blk.2.attn_q.weight": "8eba394132eef2a05c5a92d62d2376000f7948448d7a2dc74e6b608203add20d",
"blk.2.attn_v.weight": "88f61f77c53567c617db3eef8f30621109a750e679f6784f7911739bd42c2f02",
"blk.3.attn_norm.weight": "7b996675b7ca75fa24107b3ebe0788653ede0f49ac83b8659d71ff54d591f81a",
"blk.3.ffn_down.weight": "2cb332bc05e4821962fdc9dcbcc7cc12630f32117711b687d18fb53c0bc4fbf4",
"blk.3.ffn_gate.weight": "340b387c7f208c8f0a6db904ef8d87c1e84b7d6ad57177abd32d86c8d18b760f",
"blk.3.ffn_up.weight": "07484433f8a7ee061c55aa0de2ecc009f769b0617c9c0ec096e9bb2946df9f0e",
"blk.3.ffn_norm.weight": "4f1a4ade36b393af341240bc894a2aab09cff7e4d56dc4658445deb107f9371b",
"blk.3.attn_k.weight": "483dcd96acb4528df84b9842970994630dbd82b8715ace394aa8b39fcf8d6291",
"blk.3.attn_output.weight": "beaff0810687923585642ee11d929cbf3b43dc6f87f30ddb552c222ab57bdbb3",
"blk.3.attn_q.weight": "0739355002f6fce520863add697e0ff25fc88215322dc3f993be7bb68dcce7e8",
"blk.3.attn_v.weight": "c216d17b6d90ee3e07f82598b8161fae34de2f392dbb0f745b682b578c324767",
"blk.4.attn_norm.weight": "91ab405bc4ba15bf63af233f266aa43aaab43789a9e6596e14a357c2ac7df217",
"blk.4.ffn_down.weight": "620f34ee75cdc73aecb8949af5fbb0d2437fd81422b6d8eb7acfc52addb9fc68",
"blk.4.ffn_gate.weight": "f6feec7bc9acadf35ec22532f8998d8e50f31afedabb19263590dcf8b9a92eee",
"blk.4.ffn_up.weight": "4a72af7cd28fd07b038f6cc4406678d120517280236ea85d9e76eff40ab2cc22",
"blk.4.ffn_norm.weight": "1805b37b44d5d682bdbd2fadeafb763ee001617d7870848cc487079ee34b21f9",
"blk.4.attn_k.weight": "a1e4f9d97cdf4c1b0d177cf00c4e32d1be30c1984a239b3c9bd73f8848888853",
"blk.4.attn_output.weight": "a1547e2497c423b0aff0eee71d9300d6fdf4e4986679418b6e637b69a9a6720b",
"blk.4.attn_q.weight": "0677483a9264ea6803d03d304d87a54632242cb516e8b76b6e3e8284c2f4de04",
"blk.4.attn_v.weight": "02691ba3af344fcc1969428ab0df811ac94aaa2fd91b0dc4ec1ac0a58806980d",
"blk.5.attn_norm.weight": "ba9c028335e5c895b87a5bd1448ca429248f9746ed97bdcb8679923206117156",
"blk.5.ffn_down.weight": "ccfdc9006acad1940a6bc05042a3947f1066acd671e0bb53b7684e9eea9ef5c9",
"blk.5.ffn_gate.weight": "623157679f1e742ccc3807c0b0153ddc8450104de75ec62f1370ec3807c09cf4",
"blk.5.ffn_up.weight": "05748804c65091f963729b58b085f58351891cac8a2861f5eae26b06aa60b2a0",
"blk.5.ffn_norm.weight": "84bae55af2efc8b8429f09056c8c04990c466dae31cb3f9356038b8957f1b406",
"blk.5.attn_k.weight": "8c766180c726b037d587fc52371de6e3307140c52409011609d1225624b6a3eb",
"blk.5.attn_output.weight": "490b582b3b1dc151ae55aee8b6743dad6c01fb49e43afefb6e68394b74be3d73",
"blk.5.attn_q.weight": "6f7b8ca4d9025ec836a44bbcca46be30c66b471a9fb62943ddff8288b3731409",
"blk.5.attn_v.weight": "9f70df3ba00c9e723214b3da83ff435a2163fff5915f75515c9664c05c866c27",
"blk.6.attn_norm.weight": "1a4a66613a682df6f061fc7c4d986f9f7e9175b62f0c42fc1ef31db536bd5942",
"blk.6.ffn_down.weight": "c56f25e4e49b443dbc82d88311ee63bc1f5002cc67e52f4787fd5f003aedeac1",
"blk.6.ffn_gate.weight": "31a5cf1aa9b831a81588d508550f51fc425f9517c43254d4ef7096d38029cf04",
"blk.6.ffn_up.weight": "ce135f3a1163e0c9297a615bdbe68a67ead21edce8debbfa9f6e15e6af8d4c94",
"blk.6.ffn_norm.weight": "4e328ce0648c94e732bc40501858ef6262ad1161e2e407b0cdcf4813fa9d45d8",
"blk.6.attn_k.weight": "1eb1c4c9f9c4c7ff7f5429075e0dc6a7782bed55109fa88df209a817dd8ef960",
"blk.6.attn_output.weight": "3d32986b56873b88655ee1edabdd413fdd9ab18b82108c9ce90bdbc2d3a6f3a3",
"blk.6.attn_q.weight": "8432f583b3a2809c99c393f9beb077cb0534dd5d247c17108f2986cadc6651f6",
"blk.6.attn_v.weight": "5045381513815bb91839dbac8335ffe49bbc7b0008369de7ea97eb676c5e2b36",
"blk.7.attn_norm.weight": "3dabd003638ec2499bfc8a48c49eef34276caab4fe76894eb963207848c2fdaf",
"blk.7.ffn_down.weight": "194fae858608bdcffd235be59ab119d0b91c8549f864ea06dae69249e099935f",
"blk.7.ffn_gate.weight": "00b24c29c30246892bce0791be804a89701d4c1332777e0bcdad5d9d5666604f",
"blk.7.ffn_up.weight": "44d7082a5280080c90cef9e19d410391de34f212ca0736377769b8ddd0c82d5e",
"blk.7.ffn_norm.weight": "21fe8a7fd6911c64e0d15a788b3b4cb6d71dd6ec51de65f760ee89afbb6ae53e",
"blk.7.attn_k.weight": "57a149eec5f6744a9526cd3925ac073f9d12db0fbcb5afe042ef4dc846458c44",
"blk.7.attn_output.weight": "0e9c28a3e81a2880251ce5eed77bcb8be8aaa1a51c9cb6de820b47ed83849fc2",
"blk.7.attn_q.weight": "15ee75263ee4e2a43eb322bc159ae004bb7d77e3a7e63ee4ddab700430693fff",
"blk.7.attn_v.weight": "440aa970bba4bff429fd7b7b1de21f2ad14fb2952b776cfa4acee68d7c6e9b8f",
"blk.8.attn_norm.weight": "af5b44825633c42c1ae964c82bb2be6a242d3a751f0a91f1bae4f593e8f5b6ec",
"blk.8.ffn_down.weight": "b11c14c76adca94fa200496dd2c10743becb23aab6642443ef1ae6d8710edbc1",
"blk.8.ffn_gate.weight": "7bb03d3325bf8637ae2fa1296b0651356515578d46a7c5ca65c7a923d7de27bc",
"blk.8.ffn_up.weight": "b956ef0a0669b5a9c9bf3a8da2d1c24f52d331cfb7354f6d7c51bd65be355e30",
"blk.8.ffn_norm.weight": "c78c3d748302edfef76f71ea5cb2055c94352122eee8b9b1173779a1814d224e",
"blk.8.attn_k.weight": "c0fba6a596ed9c1c32a7055c31a935a8b31e42b77282ee47c1f03ee3bde736b5",
"blk.8.attn_output.weight": "83cf9947080c5d8d571f04a842bc3dcfe7bbb0195fb25b346e22635e8649f2d4",
"blk.8.attn_q.weight": "47409350a576b333d97b7c877d69f47f46df504f3765102dfc0be9e521c7ecd6",
"blk.8.attn_v.weight": "1999dff91404fdcf1ecb34d9eaaaa9244ec7658a74dec8feb7cfd1fddba0347e",
"blk.9.attn_norm.weight": "1e6e29d5c3889ab4e1b0a5b9998cba60179b0f1fca133515df49cbc19d092593",
"blk.9.ffn_down.weight": "acb898a6490adff592e10b4c62d70edc5941661ee6da44658500e9205357c8e9",
"blk.9.ffn_gate.weight": "4cff63013593aadc3ffbaaa6ed70ffdba1224cd43c3644bf6f4162b5ac1ab542",
"blk.9.ffn_up.weight": "f985b5a2d6cf4fe32c7256301c3c89b8ad22b59e516342c52da42d8110766a4e",
"blk.9.ffn_norm.weight": "0d659c538bc6b21ed0018f107ab674a7424a00a42946c80e07208b479b21918f",
"blk.9.attn_k.weight": "f67611d888780d1b38c1c146b361c65310c8183bdf64fd73e2259985c6e8517f",
"blk.9.attn_output.weight": "f12ca1fa62a02ddc3f77f798bfb5707e0c50bf18ee0eaa67025521a98355f26b",
"blk.9.attn_q.weight": "3865185f4361a645b086ad47b72904c095313fb1c624e511647bf1a7dfc1c476",
"blk.9.attn_v.weight": "92125bbfed63544ab56052bd1e4aa453bbf34c795249ee54cde54907c8c6d1d3",
"blk.10.attn_norm.weight": "5d6bfbe545bcc2fcb2fc75c68f64b1f4c918badaf53e0156fe2d88aa977b2f94",
"blk.10.ffn_down.weight": "1dd9da8b0d2696ab5531fbca8a29c7d67567620a9d3e5fc2a19ec5d7e4c6cc8a",
"blk.10.ffn_gate.weight": "6e55e7f014edaebda0ac6819a426221d3b025c27312a2e18cc5806f31e3db226",
"blk.10.ffn_up.weight": "d80dde54af5db51241345ee8d64c1972608644f4deeac1e8195dc423bf27474a",
"blk.10.ffn_norm.weight": "f6ca65951d58ae3379eee8247bec34ebd0db05674cc9295593573841b8a55df3",
"blk.10.attn_k.weight": "b58e350bd6b49aba0fba4e4dd6865de3a2a0651ab865dbf2419b627b53ffc187",
"blk.10.attn_output.weight": "6b26a986e12fe66ec286a21d7d5af5eaa1bfe6f2bf502165d270e4497235a54a",
"blk.10.attn_q.weight": "3440e0e5b7e0d1e426424ae5a33f4e057be623249e9035ea12e57dbe5d3893c4",
"blk.10.attn_v.weight": "ebfadcfe14bcd6dee933053df0a67e12e7a196d5cc45728c1ffb2a2daedd5ca2",
"blk.11.attn_norm.weight": "3ed057b9576cd2de84507ef64c7646dc478c651efca4c2024cbe91a4f3fbf0bc",
"blk.11.ffn_down.weight": "8ff1c2487d22f5c499761e4eb721418f141f960160d0bab779595a34e4d68898",
"blk.11.ffn_gate.weight": "9c74e4507c7e45bf39b7cc7402198cd1dd77e3fff8c625b0413acaeb16efeb9f",
"blk.11.ffn_up.weight": "4367158007161d29939e00a322bb6776016e43f648a94f9b08a96a477aae75be",
"blk.11.ffn_norm.weight": "1cc0288c1491072121f4c9a0af20be0e13af49895696a3320e4fcac608768de3",
"blk.11.attn_k.weight": "066f5b3c144fce1366835e1ebf376f768b333b8ae29f5b478c42d1d0c809c855",
"blk.11.attn_output.weight": "e0d9f3d3f2c54aed59c02713ea4fb562799ddbacbe67ca3998dfc887bc44e47b",
"blk.11.attn_q.weight": "28d3ecc8a88cb3815e89a7f7a7d043da7a71f702b337a126e4d3a2ac1cd6370f",
"blk.11.attn_v.weight": "7c5cdef10ee73bca0a3b9f6ece5f0a0155664e0ce3d8de90ccdccfab5545e5e7",
"blk.12.attn_norm.weight": "973b133301a1af760cd7b3a7955371ea0a750808b442deb6adaf7b98482bd0c6",
"blk.12.ffn_down.weight": "d6c87b4b4ca03f75546ddd6a9e7fca720585a309188723c1ace8122438d4b200",
"blk.12.ffn_gate.weight": "2189a6e0cab1540bd05d6089b922aa8fd694be51255654933c165f302a0c955f",
"blk.12.ffn_up.weight": "5affbec19b58d092b9305721e3552481fe2eff51269ea3ed91cda3b9ef84d4df",
"blk.12.ffn_norm.weight": "f650fd42a34e950f758b4a130e7b8b1a712b1dcbede0291bb8edde47aaed0ef6",
"blk.12.attn_k.weight": "59b1e86f10450a7cc188beefc0856d2dcf44e8d7fdd9cd8859c30ec1ebaf24b6",
"blk.12.attn_output.weight": "446b0d36b2f66bd72a2323f4f4e9d85a0f621e9a58872e89a27248d6b1123238",
"blk.12.attn_q.weight": "3ed6bfd39f040301ed99fad882d3e569769d594259f9948445bef0e44ec881fb",
"blk.12.attn_v.weight": "e73652cd5d0029b1931be3ba9d82508f6696dce5a29d085476a54fb7a2ddbabc",
"blk.13.attn_norm.weight": "491b85278c0bd67bd31b9b8a9720902c244bd067e53a4a03641b7c0994782e82",
"blk.13.ffn_down.weight": "ad71cc248a85e9ced49307a24a9bfae01d387e979a7689c82ff59998e09741f3",
"blk.13.ffn_gate.weight": "0a55984d53971fab97575ee0ef5882013be7fdecfa76e3fbebb5dc85a07a14d4",
"blk.13.ffn_up.weight": "378b697b35e2e53c0de98e8e29b73d42ae3ec112ec16129aa5997a9e2f3b5943",
"blk.13.ffn_norm.weight": "f8aff2f69ab286210fad45a62b03f8d10b38f96a420d7baadf6b95d7b0b0bcd2",
"blk.13.attn_k.weight": "25ceb841afb1034831bea7f4d6a6c578def2ce4d4c412c780ef147dc9a598360",
"blk.13.attn_output.weight": "a242b322889c6bdaa14b67a7bab593db39df8eea3721638ef639abbb74d482e3",
"blk.13.attn_q.weight": "d80be9945a369439e835c55cfb0e97828b8a66bb7ced534d9059c92487bf20a9",
"blk.13.attn_v.weight": "ac33274cf9b67979d9ecdc967a55175afe0c9c4aeeff6391433cd9840c818706",
"blk.14.attn_norm.weight": "12a1e1091de5b2da12c9e7c0b1c8e6f09ce2a749733cf7d5240445b8e21cd093",
"blk.14.ffn_down.weight": "cfd41965c88266e32bc2dcdadda512499c35519e8686fefb9a7f249ab2291eb5",
"blk.14.ffn_gate.weight": "8dcfe774f07a095c7c6cf0a901c9df70d938bad7b5ba347fbc8f694e7603c0d1",
"blk.14.ffn_up.weight": "c7995577fe4a72ea0fb17c4a7b6b87b959072bbfdd5edacc6c367d43465809ae",
"blk.14.ffn_norm.weight": "81c41ebde41739e7016ffec31d2256217b825dc3cae049a935f5f61a60d22003",
"blk.14.attn_k.weight": "fb708bdebe4384f5c4b479c110028554f4d122f166b8091eda7d8d65e6780eb8",
"blk.14.attn_output.weight": "f5295caf2dfdc60553dcabe17537a80577e8b153c902247daac058df23542514",
"blk.14.attn_q.weight": "c12b7a3601c68c63ab5dc9d2599ebf3f3a10abc2c59d3a2126fffd5818f2763b",
"blk.14.attn_v.weight": "1ce968d9149bf0d5e237d52cc6d6433565b4bbf03252a736262bb00a2b34a687",
"blk.15.attn_norm.weight": "266fd2c36d7dcefc6b6bb7f1c9374c41f2bab5d6c84a063b6f91c4f682dad3c4",
"blk.15.ffn_down.weight": "6154886e9ef0a6cc08ab0d264a35f497e6f0987efdac992ed04e87088bea7801",
"blk.15.ffn_gate.weight": "183d9fd3c1b5657840099053d2fd3f72ad953b1de523296159b7761f20491a76",
"blk.15.ffn_up.weight": "51546d4498842ae2340ee226a0888d5f61e7d2ca4d052dfa06a77b0451242d3d",
"blk.15.ffn_norm.weight": "ef7378091a41a25a5f58bf1bf9d3bc64ea562e7f421e1c232b1f177c30fd3500",
"blk.15.attn_k.weight": "8d556ab8d9639324141774999b6eed0e91d7ee645bf3e7a3dcd200b2e7a00751",
"blk.15.attn_output.weight": "54aa6ba87def7cbe18b0c6ab3aff5c351cb3b6ca4a0d7b2cd5f75a1312991429",
"blk.15.attn_q.weight": "10731b0dc031ea8e0ef37bd7f010e0a78518a10a6df05a8bae48e3148b73ef3e",
"blk.15.attn_v.weight": "cbbe50c2ed7224866d3cf9b489c599f3ec41a4ea1aa3181e9f4e87e1fa0cefec",
"blk.16.attn_norm.weight": "387058eb39d4b28c04cf1368247417f1faeae8ae79d894c9f293457e0eaa00b0",
"blk.16.ffn_down.weight": "2cb26ccee585e933401ad5c82ed36ddacb3289efa0b28f8cf91b020ffbd9c333",
"blk.16.ffn_gate.weight": "d745985efb5bab42304e5d509024631efe35f92f2b2ec4931ead6db97ca9727e",
"blk.16.ffn_up.weight": "7a67bd195e0642828ca36eb7818149bb70c2c25f82de07e2b5807c520daf540e",
"blk.16.ffn_norm.weight": "7cefd061c8182482a89272f8a4e88a954b12609a62716923ca1cb3593b1c1651",
"blk.16.attn_k.weight": "d7968a2de67e755b4533e061aaad1cb62f8882af92dcad67f99d6d5112513439",
"blk.16.attn_output.weight": "9e9ab5788272ca3394ea89eadbce8c86ecc3fd75b7899184d6191c134ad9aae0",
"blk.16.attn_q.weight": "ef81c261b536c1a3a093b33f44cf2d42b86e5aa2d821674f07a0c80e992ed925",
"blk.16.attn_v.weight": "aef38e7958301b4a437cbdd2fbae6197f677b09269ec1eaf63188cd5da428d25",
"blk.17.attn_norm.weight": "28f6b289f1bc3131041e9f791b7a2a3a48baee0dfea27bf7051ebbb7ed364d80",
"blk.17.ffn_down.weight": "1a502829aafc6a9bd6bc81f12573bf8632d5c8c659f0dfb13c8b2411f3b1ec05",
"blk.17.ffn_gate.weight": "ddfd8aa0eb98846ebc9afe31366249159f46ae9815199dd70161527ed241ac4d",
"blk.17.ffn_up.weight": "4211a3cc247071bd361b30de2131d02382f552855062bf3b3e004c17992e5d09",
"blk.17.ffn_norm.weight": "647e5fa99a5b0d232af36d15816539f4d27e60a50a341b00aa88bb6e4474f8b9",
"blk.17.attn_k.weight": "d9125ff33a19c502c0f8846433ffc24395048582fc2f463d34a0301a82156f02",
"blk.17.attn_output.weight": "3d64fbb1cfef04444827f37c35fd9ad3413eb2165094d339ef89f00503f09de4",
"blk.17.attn_q.weight": "e5b29424028f578beca385fd82e29f37adedf3037cd51e5889d5a1ffb0428ca7",
"blk.17.attn_v.weight": "1809c5aaf2ac04c5d65539097564ad62796e87d24bb8b9ce5b095561a61d908a",
"blk.18.attn_norm.weight": "99daca58d001c627523d3adfbca1d95f04e590382a326866544d57989d5f4835",
"blk.18.ffn_down.weight": "84f30231ce6ca0f10227541dfc602d6418c1a210386b0c4926ef1656e7d4635c",
"blk.18.ffn_gate.weight": "ca5bbe4468b541740e54f69b9e08fcc8e478c344b70551dab21b1206acfbaadb",
"blk.18.ffn_up.weight": "0b3067b9dded31686dcfdc1e247eae3974a28a61ac59e9862758dbfaad64e8f7",
"blk.18.ffn_norm.weight": "8154a102232dbc0f90ce77ae5c1ff8f26f8b6e4dcf326e9ec1645749669e7960",
"blk.18.attn_k.weight": "25abb26021ccc481471a30e0d4cbeb7e1db29828417ec5136edeb93fecf09ac4",
"blk.18.attn_output.weight": "d87d481d9b046b68efa06ccdd4ed8cbf61e692d61114b75b7fad5ed75f5d87b2",
"blk.18.attn_q.weight": "cc6400379e15766992ff1293be79dc67682c28e9e15155a78109f4b64653b164",
"blk.18.attn_v.weight": "45c75cb1dd496aea3173aafe2575b841dd1d02cbe010b3198099731eb98f531c",
"blk.19.attn_norm.weight": "65389efc75297684773284ef8e5f8789a4504b636c9f33b8a32e0ee42499fa72",
"blk.19.ffn_down.weight": "4eefab7e939f64a17e4a214ca3c77a6fa110d94f677e2d6401086f70fc538b04",
"blk.19.ffn_gate.weight": "f1c0a59cafda66f466ab585b0b8b4861b58abe87a67cea1f6a488492242edfdf",
"blk.19.ffn_up.weight": "c42d045eef588db4a0e56960a57e110e1ff92eb8041107d19899165fd3b90f17",
"blk.19.ffn_norm.weight": "a8f33eda6d5d62ff5f333ad9771783caff556641f4e7df713451385676f441fa",
"blk.19.attn_k.weight": "0bab5d9e9083492bfb05a5a3bb23b79c0e7b99ef6a6644817b4d57d5c453b8a5",
"blk.19.attn_output.weight": "c99c551d70eafad0f7aea98fb6f9251635897168eb3895f76abf0d4ea3b3aa6f",
"blk.19.attn_q.weight": "c98bde95627c3b54c9443813ca50b4e14f518319681db6bbf7b2332ba26e9a60",
"blk.19.attn_v.weight": "ff3a490518cf64904db89ce0dc7d6eb89e870f1440e41883c6b55a221f82de84",
"blk.20.ffn_gate.weight": "761f0e317229cafe9d3754048ab038a0a84e9a287b196ab65f633139f2d29aba",
"blk.20.attn_k.weight": "45d13439b41066d282e8490a726785abf513605f46c79bd0c840f6419d27e790",
"blk.20.attn_output.weight": "a3b958d84b4a097844179b7d55c18fd0e4f319cb15e918c6fde33b68de1bcac6",
"blk.20.attn_q.weight": "127ab8e7d8c3f882874904196a02712bab42e6744fde45871b67350609d19f5e",
"blk.20.attn_v.weight": "5f0ad2d14a8ae42dd3bbeccfb33295687a14055fa92c54bc946249373c1c9f17",
"blk.20.attn_norm.weight": "77300b1755edc8c70089e0f45efa646056b9add7d8568b2324d2f3e62b64971a",
"blk.20.ffn_down.weight": "ab93d0e075b42e9017b701a070d561e698050d90aac4b4b9919256fbe50c3204",
"blk.20.ffn_up.weight": "4fd6628a07acc57a48d1ef83f81b7d7aa0bce569c1160a99d307284f8821322c",
"blk.20.ffn_norm.weight": "2a9e46b9e48e8e55215de56592e1f189530037c1c94a1428e3d6f106c7f26fb2",
"blk.21.attn_norm.weight": "4b3b5912c7bc61eb9da8e47d4651f896e85d9e59c4ecaa65df7acf3c21737298",
"blk.21.ffn_down.weight": "7146f931663d93b8771cd84405cd4802ea6560d0729b0d6d44588203c095bc53",
"blk.21.ffn_gate.weight": "b44ec5d64388fa40b90b3e9976d97a8b6800fa3b97584f32e64b03daffb8601f",
"blk.21.ffn_up.weight": "0cf3643fd23c685e17062cd11e116e17ce57a405e5e78953bab94cd62fe48789",
"blk.21.ffn_norm.weight": "4ef2cdb53da166df70b39f3e6b17af51848cfa5ea3c27ad6a1ae2a1bb1da1ce9",
"blk.21.attn_k.weight": "5d40f32a706f670c19972b14176bf660d5b045e3637b110dbf8d7de4ff32101a",
"blk.21.attn_output.weight": "18afaa916752ce16c9653ec0ec7e2fe60be55faa2aa5025d147be184adb75cac",
"blk.21.attn_q.weight": "2621daa5f858931514a4b2f0fe8d81cf9b96f541e6af99bfa7539e9bde8e34ee",
"blk.21.attn_v.weight": "63226dafc54c899bbce4aa49efceeedd8908e94faa613450fdda91f332b62864",
"blk.22.attn_norm.weight": "cf3058daab4d2c04387e7d169d1553bb8e7358eea66285ec067703f6ce62043a",
"blk.22.ffn_down.weight": "6a58d5fd220abdbac6cee7ba048abab794731af318f04982c2506df59413d0b3",
"blk.22.ffn_gate.weight": "d5614535324b03c7b91727a903b2a72f8d07ad17f7aa8b61ea173cf9b895069e",
"blk.22.ffn_up.weight": "ec20da3949566e93f66cabb67f8cd7eab399047ec6ebf5d43edfaf3669b82296",
"blk.22.ffn_norm.weight": "84c82f38f53a649972a44466fc476bf764e064ce18de870291edc302f3700e28",
"blk.22.attn_k.weight": "a3d2ecc37fde7c201176bb8abadf27f0d8ede9679a6034913e03d9db924fda12",
"blk.22.attn_output.weight": "5a3b8bb433f43a387df43dd371bdf80ddfac986dfeaf38e9bac1d7a0ec6628de",
"blk.22.attn_q.weight": "3a875cec661b4859f30a8fd2c866811184b25b68c9e36fe2663d299caf8b59c6",
"blk.22.attn_v.weight": "8717a83b79035058dcfd3ef6f8e5b36e71d77379e5a239e1899eef8766fb7703",
"blk.23.attn_norm.weight": "2b4a68a0a2f023dd646e4755c9bef17c2f631901154afd839edac7ac006ec99c",
"blk.23.ffn_down.weight": "29499b1586c6fc4883c9b7a9c8cf388035146b5aecf90c5c4c8c8e082c71e7d7",
"blk.23.ffn_gate.weight": "7d6554036d21c587b9b556428054f9c15cbef96d24b257f906fcef4ae38bd9c8",
"blk.23.ffn_up.weight": "19761ecb288d6ebd44b681c4535661583b1e19dc29e96d0c007333cd8f00aacf",
"blk.23.ffn_norm.weight": "37dc35500790a4ca33807b39cf7af65065e535dc25b9e94f3ed2759f61887ac9",
"blk.23.attn_k.weight": "717547d00323817b0cb40a72ec5f8cf42ecd1f9e3e42715c2cc5e38f07fffffe",
"blk.23.attn_output.weight": "a24786feb6a905fdf166d7500133757cbe494779d4ebcba9eb03046b319557df",
"blk.23.attn_q.weight": "6a2c4a98f138b928d22136efa163562691d3b4ed526d52d46a2fa2694a8f3965",
"blk.23.attn_v.weight": "c6e6081eb9c38a7fda023085957b460e9ea321e1fff408b38c2b58595c39979c",
"blk.24.attn_norm.weight": "5e6283f891e538670425f3e244b08dc6f96f33dfa4aefa913f8eb17212421850",
"blk.24.ffn_down.weight": "e09eb170f389deea0a4a1cbfdb52c12490768a2c60491b7bef8a4c445e2a08f5",
"blk.24.ffn_gate.weight": "af29d815cf49a38fc2ebd0bf9b2dd9933d023a29f2d766981acb9a1b53f09117",
"blk.24.ffn_up.weight": "36ccd9333426666de9d3088bd4dcdf5b624b09dca9e3a83a22fc0383f2d950fa",
"blk.24.ffn_norm.weight": "a88e1692318826db6ac42582d182e51a3c698c655d0e21e04fa086318832d07b",
"blk.24.attn_k.weight": "f7d61d6d1225289bcc502e3bbb0168b4584add0253218c1b77ac92ccef9a1c2e",
"blk.24.attn_output.weight": "85a1363b3ccc87312094c2195022687c16b0dad7fafb9e80bb4ec474d53c29ac",
"blk.24.attn_q.weight": "53482a2c008f42f4fad779ca323addc3712040149dfc12f782417756388a72bb",
"blk.24.attn_v.weight": "67498272369af7dd10097c73b07f731b565cfc9a559e711cc0d526389e7b44e2",
"blk.25.attn_norm.weight": "98dd617def5cb7825ee4833132ca2da2121245921585e1d9e36b93344adc321b",
"blk.25.ffn_down.weight": "7fd477d6c50aed5f424a878dd284343379cffbee8a34c0b6e55100c8305fa13f",
"blk.25.ffn_gate.weight": "f892c9806c8ec22e8aa746734ac9213428c534921cf161239e1d249fdb5d1ec0",
"blk.25.ffn_up.weight": "528bed14c9bf9762f790525ee40412545221f4321d2a2323fa8e73c58b7643c5",
"blk.25.ffn_norm.weight": "ca5831966672e7be6a578feeb631ec3570d3b5afe12860819ccb96e896ffc346",
"blk.25.attn_k.weight": "610d3068cc9b20401f0c3a0efea39a279dd9f564fde19baf3403b2ec2319e4c4",
"blk.25.attn_output.weight": "798aaf702e53b657265ac3b5e6caf3a0ab515bdadfeb1a3a156b4f3bfba76666",
"blk.25.attn_q.weight": "8a7fa25248de83029fb97b51d036a01baebe31fcb4be121ab00dd8b7de209b10",
"blk.25.attn_v.weight": "2a53d5e9f8a1218c66958c6388d3b37400a9af7956c785024ca44bfbc3c7d371",
"blk.26.attn_norm.weight": "5f44fc043481eb0771f3e6d2420bcbcf73140afb9a9feb8eddb6575452acebee",
"blk.26.ffn_down.weight": "944a60a409d0d5b6a851e33c69aca152454b691711a8b96f5bcc488772ab2833",
"blk.26.ffn_gate.weight": "2a0ca4abb3de5593e6693d8be69b63d6d1a639855ac8332a75f520353f030c62",
"blk.26.ffn_up.weight": "0b1df496163f9ac07bf89375d3eb441b51a81d41b47d769a04a61efc18dbe35b",
"blk.26.ffn_norm.weight": "56b8dd046e9be6ea71f7efd80dbd14e7fb1aa020d3cd38e063275f3873fd12f8",
"blk.26.attn_k.weight": "b1dabfabb970e6971c7ea6e53c63cf7ef56341e6a2edd9cf177785cad9af2f9a",
"blk.26.attn_output.weight": "39532c7e836baad164a655fb97ec5114ea4da37ffba9fdea2684f6e4450e6f84",
"blk.26.attn_q.weight": "8f48bf6aaa1252bc149e98af2be1777a5c0d2c3274c6d314171ea9344a41b604",
"blk.26.attn_v.weight": "02fb145f7fd905133750e90571effacadddfd3f4966552dc59982ac3900ab8c4",
"blk.27.attn_norm.weight": "654d168fc3cab716d91261f5719f180b7d697218401633b4878a759f1b5283f2",
"blk.27.ffn_down.weight": "2823272bec3a1c12f02cc4cb24aa4031abd7e9dbe0b02676e2305b21671818f0",
"blk.27.ffn_gate.weight": "b1a1d40cd02f97182cac17a79971d1934ee0daf3aa0bf11303568c636e208a64",
"blk.27.ffn_up.weight": "ed62ec72a020d070e64eb7b50237b32213944727b5b2427f45d989f50df5fb2a",
"blk.27.ffn_norm.weight": "c69649ac65d694b306a905dee8b03b89eec1ed188b1eaaf38f8e29d4b12e38a0",
"blk.27.attn_k.weight": "cc57bbf413f1fd227128dc66efc8590c73634cbd6f96d01ec4878b5e7ca6a925",
"blk.27.attn_output.weight": "cac407ad02361d53207b3c7e25ceab84dcb4347b8087055162e2efe14d11d84a",
"blk.27.attn_q.weight": "0af18e07cee12015761c07c94407024f4f4d77d97bdb24163db0e16669e2cef3",
"blk.27.attn_v.weight": "a1d08fbdfa40af773c5adcf93bd68b78a44ed144e3fc6bbeb8af02e937527eb6",
"blk.28.attn_norm.weight": "f39a51f814512b040a1082143150e4a49ff730f85cef49d7f77fc79d83e91f40",
"blk.28.ffn_down.weight": "74f29ed51055d1c1adb8f0660bbe538a27e016c65650f2d67efc6f1c84fa1b45",
"blk.28.ffn_gate.weight": "ae48bb16487ded6781c60aafc0bf738fb4ae15729952906f247d216592ce249a",
"blk.28.ffn_up.weight": "543009727718ac22f11ee4b17815f68ea6f15ba1f3e7ed5ecdb755cf6417565b",
"blk.28.ffn_norm.weight": "b8f9e54c322079ff20a82b88948cdc2916c22c7db40b9a9ed6d3cbe89efb727e",
"blk.28.attn_k.weight": "55d055ba653b728d6e784f9e013786fed07115c9fdf23367e3941386d5e77db8",
"blk.28.attn_output.weight": "155101c03ddbf18f4fd0694bfc982f33c7bae25c9b087d6f5273c2bfbffcf2c9",
"blk.28.attn_q.weight": "1ed19bfdd22e9c14eca014739982492e9516d411515a8585f65cf754d849e53f",
"blk.28.attn_v.weight": "11ba854dd575c025d37256eee9041f6d1bd2b549a083d6409a09bfc1542913f3",
"blk.29.attn_norm.weight": "02b0bf5e2fcefd11a153cc988c81ba672682e4844fcf6442423e21a0e10d566d",
"blk.29.ffn_down.weight": "594bb692ec2779938721ff4748666ca8370e0e4fe85229503f616438b8884f5f",
"blk.29.ffn_gate.weight": "8bedcf47e91dcb2cf4093de56b048ee411faab6ff472f89ab2c9c113a08e6967",
"blk.29.ffn_up.weight": "e241a547b5fd6dfca8200b8141e21c1c487a96cbc4e5855f181a7ed1be91b642",
"blk.29.ffn_norm.weight": "e63eba5e4c6b288bfd9f15e46e236086456c8b7f1f9c732c0b5de84962a2e7cc",
"blk.29.attn_k.weight": "afe5979d5bcf211aebb526620f5974bcb0a2c39c8be71e815575c55d6385e3aa",
"blk.29.attn_output.weight": "9c944ed44b124b014906fc240afd3b90aed56bbd9567f2eddfd5b7a685b3cb48",
"blk.29.attn_q.weight": "e234e08e5c1bd9245a2edc8d63e9933b6b879f97c01392209cad4f55f05f3ada",
"blk.29.attn_v.weight": "5cb8e3e5f954e775c5a5e4de7a9a62b17e9c6931bb0ff0e2f82c4126fd3e1a1c",
"blk.30.attn_norm.weight": "a65483ee51a0b214144ec8a14f28ea5437586e9e12ebe342a57d1f8627ee12af",
"blk.30.ffn_down.weight": "417959da77ceb33ead4271cbb9428b195196173a893c44e52880a7ec61b4856b",
"blk.30.ffn_gate.weight": "a0d503ffcbe45dc927600bb98c9f6082487e65cb577ab545add400d666a87638",
"blk.30.ffn_up.weight": "f8ab957b82ffcd10b21303cb5e866209b6fe95f827b1b94e9a949207952d12c0",
"blk.30.ffn_norm.weight": "210c7ceb0514a9ef27b5d4d1b3aff6dde43f1af0345a050d71097940e0e73e03",
"blk.30.attn_k.weight": "16861b9abcf5a3fe73c93d977ca45a1e6daa65be0fd85c2cff53486ce2033afa",
"blk.30.attn_output.weight": "ca541fb2e57e2257118c35784845b0c731278af8db3036ac53d71aa1681fdbdc",
"blk.30.attn_q.weight": "f7834917748e26bb456b945e230bc926c228e93696bc01fbc2b134bdeeac71a1",
"blk.30.attn_v.weight": "9292783171dbe5eb689d17c9bda11e537f0e9b328fced6986c938d61ed590e81",
"blk.31.ffn_gate.weight": "e4766a04bcd8f937ba883c6a144101e546747804ca66c35c97281d6ccb47b566",
"blk.31.ffn_up.weight": "cc1e666116f7e6b06736db4aa4b81003c583f54f4d9200bfa48842249940e16a",
"blk.31.attn_k.weight": "fc80b57557687504efae7d24265cb7dc39b8f826bb3d897a11783012dbedc44f",
"blk.31.attn_output.weight": "215617f50a1f5d9b2250b82f3652b35a9e9aa0ad9ef2b485d73965a14b2b872a",
"blk.31.attn_q.weight": "274b4f1dfb0bdec28632705677049fb3e327ce6d9e1f3baaad1560439039982f",
"blk.31.attn_v.weight": "e641b8b926f9dfcbbf6b6da1c02555525ac4b1c306d96f20cfbba7d6662c4e56",
"blk.31.attn_norm.weight": "b3243c361d4041ddb892ce6862dd5091f57d87357e3c67e177451b85d8baf34d",
"blk.31.ffn_down.weight": "0a00cd3ecd5e91624a27f9e239b1de425d5ba3cfff82c256a11a4ad434abf3c2",
"blk.31.ffn_norm.weight": "2a0d67ea2bb1303975712243f07273c92fce83baa11b1cd6d8e42e74ea3c810b",
"output.weight": "768615f077fb797967844571c58b94d7c399d884d115be3ab4b0154504cae892",
"output_norm.weight": "7cc5b7ce10e5082000fa00bfa68af8c7c5da218e59e2c41cf2f1499d40ca229e"
}

View File

@@ -1,3 +0,0 @@
{
"rope_freqs.weight": "80fd5efb2f729381785b293a091a268cfeceb0079167f6ece9b07070e662b222"
}

View File

@@ -1,313 +0,0 @@
{
"general.architecture": "llama",
"general.file_type": "1",
"general.quantization_version": "2",
"llama.block_count": "32",
"llama.context_length": "32768",
"llama.embedding_length": "4096",
"llama.feed_forward_length": "14336",
"llama.attention.head_count": "32",
"llama.attention.head_count_kv": "8",
"llama.attention.layer_norm_rms_epsilon": "1e-05",
"llama.rope.dimension_count": "128",
"tokenizer.ggml.model": "llama",
"tokenizer.ggml.add_bos_token": "true",
"tokenizer.ggml.add_eos_token": "false",
"tokenizer.ggml.bos_token_id": "1",
"tokenizer.ggml.eos_token_id": "2",
"tokenizer.ggml.unknown_token_id": "0",
"tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
"tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
"tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
"token_embd.weight": "cde834ccac5e94324b25cb81b02d27312cac0c551b55a7e1d555d90bf6cb6e81",
"blk.0.attn_k.weight": "458bfdd9715c66e017c2447b1ed3c582963a3111479314e664faad8c914f42be",
"blk.0.attn_norm.weight": "e1fd60b95f713bae7b7e3ca933c64ae6c9cd1e8d808000204bbfdc19f0ba635b",
"blk.0.attn_output.weight": "df13b6a157d9d4f96c53b012b3b9bcd207d0c94144cbd22ae3ec13bb07d6c373",
"blk.0.attn_q.weight": "13b4126b4245bf06c915a93317c42b8174e05053535ec99dc576541e4cec7c25",
"blk.0.attn_v.weight": "5b1781d3a341214511b27eb4e268674ea3ea829dbdf8ae5a6bb89b3c0b33fafd",
"blk.0.ffn_down.weight": "49186f5d8148d316b07458841d13a2e66587f4af69b776188a809591ed9c070d",
"blk.0.ffn_gate.weight": "4397e30ece09136f00f4ff84ff49e5241b765a374deb8c5a12e897e2bf73473e",
"blk.0.ffn_norm.weight": "43260589aac3850a779bca3f9649f793bbfbe5db538361cb743b3830217f8287",
"blk.0.ffn_up.weight": "fd7ac918240a07566f6967527ffca58fcf433a30b78fdd6d84b2136d4ebd9987",
"blk.1.attn_k.weight": "209839566c7d235bdc20565a4766378b6ee8553133a5a3315abe8a85baa80712",
"blk.1.attn_norm.weight": "58c52986f7c69784ba327cb7f350923420782bee17fa39b1fbd13839d4005357",
"blk.1.attn_output.weight": "5067cc628449682665dfcf59b16e58fe2a9d2a81cb099f0fcd42f4f8670c6740",
"blk.1.attn_q.weight": "f410f9f0dd5edc09401af597d02e2a4c727f1502ec3ec3898321617b36c6df6b",
"blk.1.attn_v.weight": "d40fa49e07c102c0644e130e7909eaa93ed0d54e2edddc0759e721d58a4e4f5e",
"blk.1.ffn_down.weight": "594b1eff6ed4defbdd819fabbe2d48764984f08878a860bdb808511d5a25b8db",
"blk.1.ffn_gate.weight": "4cda97541e388a5bb607ce4cc8b3db1da7045830a630e7ba4d17807befcff346",
"blk.1.ffn_norm.weight": "66c13d7481be65b97aa474735ddc9674f33d512ddda76fa6fb45c7464b09f1ed",
"blk.1.ffn_up.weight": "1adc6de288ba4cc1237833ca8b4eb81107149842e38bc452e18e5cfe284338a2",
"blk.2.attn_k.weight": "5420423559f236ab22d85a00849f31e0cc6e9c7dd879de724393d8cd2b379153",
"blk.2.attn_norm.weight": "495fe1ab40cc52aa054ddd4f0c2d2790f4326c8d103296b1b38f3b1060db2a24",
"blk.2.attn_output.weight": "ccb83e7085381f558bfd65588c525ad2671feddcbc3887afb4038ad9c7aac348",
"blk.2.attn_q.weight": "2e8f77478392bc93c2a391f2e0f4a173a952bbab88a7aca099c6ee909726409a",
"blk.2.attn_v.weight": "d64512590f3b7ebbb9e77c2eb97fbda90b00d45c944f2b174f03a2cb11007567",
"blk.2.ffn_down.weight": "1de5084a05dcaa6b1bd926e83517dbe9ebe7fde79235fe56018b3028b1aa6397",
"blk.2.ffn_gate.weight": "cbea526b557f49aad8c976973cf367fcd12175b900f551984f498b9e07e4b7fd",
"blk.2.ffn_norm.weight": "530aa49b10c7eae08899d143409240deb95dae4e1d5bf78cea3b26393cff3ba1",
"blk.2.ffn_up.weight": "13a5fc19b96b4dcc1e9bd01998c8272ebe52034c1933ed123a506b711fae9a5c",
"blk.3.attn_k.weight": "1913b63a73305941d8cdc472e7f101c633d3357a78602eac0a4b49a744261075",
"blk.3.attn_norm.weight": "9c11bed5ab41f4adbfdae4ead65b525c8f19443e656a8c61ba412a4e1ad1193b",
"blk.3.attn_output.weight": "bb0b42c1d34779c5943272ed71f1dbb31ad8edd75f8bcd5c868f88505ac3a610",
"blk.3.attn_q.weight": "3461a1fe4e49f5319ea047cae98ccdb46528a3ec23831183fe87610b48c94948",
"blk.3.attn_v.weight": "82aa30be6a61526a41fb79bb28a2617416f5909f0477aa9e95e16be9370fcb38",
"blk.3.ffn_down.weight": "68521011ae03f5e3b0966127111afa8ee9f2eaeeef8d3a0b86b633e0332e9fbf",
"blk.3.ffn_gate.weight": "1e89e26338fd364bb679695968c65106382f15ad55c95cbb5ec9bdfeb766f432",
"blk.3.ffn_norm.weight": "c81932529a5a8c417c27b888dbe95fff8b447c2ea5f6f560444ec5d50b93832c",
"blk.3.ffn_up.weight": "305021735afd8669afefd713f56137248d5e817e60471a112ad06b7fa07ffe88",
"blk.4.attn_k.weight": "cc26ba5c5c28082a79e6abfe61186029e80b145252ca6a7924c437f0bcf2d51b",
"blk.4.attn_norm.weight": "302d251fdcc91f7468cf33f80b49484251d8917d7018ad264ab3a85c8ecf9ddd",
"blk.4.attn_output.weight": "a012f5bee3520cd4ce51f0076c132ebc3653309f304032ad051aa308f55f36de",
"blk.4.attn_q.weight": "3c8d607e447f5ef21e73af71e3c0d32fae16f91f31faae34ff06912cf9cb68fa",
"blk.4.attn_v.weight": "49f6c81a634ce46d71c2350206ecbd231b1732af96e4e4e67693c41a07e007d8",
"blk.4.ffn_down.weight": "e89504f311a4a34dc819a67b761022f14d71c43df3ead4f892c87aaa8e9f0adf",
"blk.4.ffn_gate.weight": "18b22f079a2fbaefe3572eec61fdcd996fd747724e2f0ff4f08cfcb43eb7bfb6",
"blk.4.ffn_norm.weight": "22415a492c168a0878912b05c854a631228b01c3ea8842e1d75989ec46c18a65",
"blk.4.ffn_up.weight": "f57379eae2874d8853f14ddf0f0fcc4ff1338574d5ed5d7e88331d5fb84f5642",
"blk.5.attn_k.weight": "d627af853c40bddf9762ce3988008c1ff17f2686fa8f73a0b5da38010147c316",
"blk.5.attn_norm.weight": "9ce01092c7f7f1c3ef72d6b794da12d77aa1f6a24fb96ba1b9bd5a0bcc3e2443",
"blk.5.attn_output.weight": "0388da8064c4b6b795ce2d8079e8a36535e82b2c9cf794e38ce8ae460aae726d",
"blk.5.attn_q.weight": "039b7ce1c909761fdf475c06cf14cabe5a90199282c89e4dcf460e95a4b6275d",
"blk.5.attn_v.weight": "c47bfd8d2496bdb6e00e03b903e15fd0ee806a515094ec257e43cc433147ab7e",
"blk.5.ffn_down.weight": "1d62e6708974bae318cbf00a8bf621d9ba0537e549ce4710a536520a8d14168e",
"blk.5.ffn_gate.weight": "8b42b1b11c92db19985094cbb50434e3a7c9cfea71ee6f21ea79eae7c49284a5",
"blk.5.ffn_norm.weight": "e0bc520f1505e687ec391d632a381d38d8ebcdec19f614a11a2000ab573e8b7b",
"blk.5.ffn_up.weight": "8cdcd17d2ea89bb9ab902dbc6bf3f827fa4ee029c6bf19eecbdefd146d8b6f2f",
"blk.6.attn_k.weight": "5dc6bcff89794d1756bf57ec665b58622d9352130d31082a6c66e1a079f99932",
"blk.6.attn_norm.weight": "13b26008abe0f119b5104b9d78ebd5e797d3cdd68122b93d73a3b4831a54d085",
"blk.6.attn_output.weight": "f5a49917ea70c3fb311ccfffbfafa63ab18416a5d55e5429b70ce8bfba57c075",
"blk.6.attn_q.weight": "d9c2f652c87dbd09ec3822e12876648fa32e86553ac25afab723b1cd9f8cef90",
"blk.6.attn_v.weight": "5ecc5fe67609a35151011cb526f45c56fc0a999079ae0ff37c755ca03c68c555",
"blk.6.ffn_down.weight": "0ec125ae0ecb2d9277fdb1b04f17efee94e37d0ae37311057c212ca2db3fe6d1",
"blk.6.ffn_gate.weight": "fa4d6d38355ee8aa3b80b476d65ae7e343c9b7770d7b097fc848ee8a6e091d1f",
"blk.6.ffn_norm.weight": "30e8f7defc627532e1739dc76d31223d45767391a431f925b63dabe334b0f392",
"blk.6.ffn_up.weight": "6b97cc32b290fa9087806b5d65aa6dc1760737730c8c71394cc4f30c2157f9ab",
"blk.7.attn_k.weight": "0231cb127cb7c3714cd72b8f39343891d7715a9bab2237ade9e7bc5f4ed2e68a",
"blk.7.attn_norm.weight": "7c3187f07eead7d219d98ab2daf87905e88d5f1ace109b6f5fa55dce3914981f",
"blk.7.attn_output.weight": "2f30ad972c284ae7c8eb0482053433495ebe8fe9c5ee2c28b4bc4ed1f33050fe",
"blk.7.attn_q.weight": "3a2b4b8d61cc9956d304fa9f82a9e65b4bb9fda2196670b16df7e0d8c43eff2c",
"blk.7.attn_v.weight": "d2aab97d0dcf0f61dd2f32848f7a8a99c423a4948a660a660a03a546972b8db8",
"blk.7.ffn_down.weight": "2270d520468c5549cd30023ff9c452a277058310104c4239a616373fc5a94387",
"blk.7.ffn_gate.weight": "4134a3ef71b3eac8f76b6f1a2e58625b3bae48081f175994bc3ed7d8b0d4f2d0",
"blk.7.ffn_norm.weight": "42df4abd4b8769b16f3930068f96960af1b061f1aeb7505384f272233b2badff",
"blk.7.ffn_up.weight": "c920549054ec16ff8c73a72f5d837cf4e11885e44db57c1c1c584c18fbd7a9a5",
"blk.8.attn_k.weight": "01c609bd3bf31ce65688f1f640ee413740e821330134d4ed1877a3065d1527d5",
"blk.8.attn_norm.weight": "48857411f769b00290f4e4f2e593e092781fdc2503f80c1e3eeda1b85a20f74d",
"blk.8.attn_output.weight": "90fb273f8df83744554bd59236515c16c5a5a698ca3fbedc17cc89ddcee354ff",
"blk.8.attn_q.weight": "ade617ac4653c7f00593dbb51837a468afef20a14eaab3780fb96ac3d6714369",
"blk.8.attn_v.weight": "c2c37496494864fee5c527d1fe1f88529d31c73f9cbd02ef9b2e9b23611ea50f",
"blk.8.ffn_down.weight": "2da58572e9ad79087c03cbb0c23c9ef69f93ec221fd5fe4ed92fb93871d23ffa",
"blk.8.ffn_gate.weight": "4483294e628edaa4901708e73e92c917bdd93b780fa01aa74aed57166f2bbf0a",
"blk.8.ffn_norm.weight": "c0cbb7a4f8123b62f0c4652a687f3b394802bc32870dc446eefb709e42043a7f",
"blk.8.ffn_up.weight": "9eaf8a2060cb9224cd585997cd671866c4051ad885c2c6d9fdc7056c2a5c0d89",
"blk.9.attn_k.weight": "5dd36c45fbc9c50fd35c36cd75576288506971eac5c5311d4f5c16ef60099645",
"blk.9.attn_norm.weight": "3c8ca64f2f75ed7c8fc1da010c23be787648139a96ca0ef3ad10be7b14942b8d",
"blk.9.attn_output.weight": "6277e1f833024f53c409be919ec76d34464a78b278c8f9dbf79e777746e3b995",
"blk.9.attn_q.weight": "87352b70d9e328c2d51d59090cf5ea5a046529864a890d0bc8986447a0a5c006",
"blk.9.attn_v.weight": "2efdf01161d7a82a9117cc2d87d37dba5ffefcf730781cb94fcc95130e48ff9e",
"blk.9.ffn_down.weight": "e7658a2ca984961c7ace16acb679387bedb1fef656b5330bbbf588db19673a75",
"blk.9.ffn_gate.weight": "773cd330d4ff5d64be8af00adf2e2722fae4e33fc26bb9d03549f6f4b3b0fe57",
"blk.9.ffn_norm.weight": "c8b86cd5c43b332f72060b807091c33a258e5dac01358ff4733b916cd34c9c97",
"blk.9.ffn_up.weight": "d8cc3bcff18bd46124ba2aa7caacc71220b44eeef6fccb993b4c6cb53e8f2c3a",
"blk.10.attn_k.weight": "964bdf3b4e77b915a216f750ff7b0f2eb1dd6bfa071358aef21010b90111044d",
"blk.10.attn_norm.weight": "59ed411d91d14775764eb514acb0895a75a10cbbfbc1c15d453bc50f8046cb7f",
"blk.10.attn_output.weight": "4d35a2a44cfe4ac0a83fd3ab0dcf1f5a0bf54cdb3b7be9fc353ed32c8a3eb81c",
"blk.10.attn_q.weight": "defff5339450dd881ac352f5c459293f39e07b9619ebd10ed632d79a3f310278",
"blk.10.attn_v.weight": "b9803e8d6a54acea58f662d4c0a5c8ebdf986676de7dfe12d4b288937881ce93",
"blk.10.ffn_down.weight": "eba856be64e4be20b92fb4639a783454dd92427250759df92a337e39f1971c08",
"blk.10.ffn_gate.weight": "2d5c509b066584db4de3632b01234e86edcde35409c5ebce18957dc80fe465e3",
"blk.10.ffn_norm.weight": "ecb9a8679945ff0273856624ce435dd250ffe5a440ea0861a5c84f0e4c44d2c6",
"blk.10.ffn_up.weight": "e76ec7e993f399af02958778c643aa78368e3067846714165eb5aba9d5f547f5",
"blk.11.attn_k.weight": "29c6d1f34bd3ba2f0904e57b32a5bf8dcb2834d439159a33edf234ce0b775677",
"blk.11.attn_norm.weight": "b5817b275149cd2abe18a6a10e19854605fc58fd364666744362ceee8cfe49f4",
"blk.11.attn_output.weight": "1e05653220e237cbe0cc770033e183c9a0eed5680510997409b16186c6691950",
"blk.11.attn_q.weight": "03db725ae669151e4d536e50285b3b047ad097f52475df208ed3e790e31a44be",
"blk.11.attn_v.weight": "27cdf1d4e971326c451a4615a0b79a8c7fe9508f9b76c0d52fa01971fc7eb403",
"blk.11.ffn_down.weight": "176938cd7c2966094f614cace8ba568b10532e45a0d438f80eccd19b6c2a7f87",
"blk.11.ffn_gate.weight": "9782339915dd6fa70013628a01524ee1d01ad8beab04068da7ac6a5ee7603a60",
"blk.11.ffn_norm.weight": "8245f6391e3be97811c0ff27f0d8f484ecc82a468a837c893f059745bfcd95eb",
"blk.11.ffn_up.weight": "15616ddde096d0d25e906375c548b6de4bd5576d1f6b68eefdc29f14e183af42",
"blk.12.attn_k.weight": "66dd21604993edd1b1fe547bcaa06f5bb7e31c9204902d147a227e4badf7feec",
"blk.12.attn_norm.weight": "23a69f85dd8a0904b9839cc5d0afcda299b74e82ae2642106224a1c820f2b761",
"blk.12.attn_output.weight": "4a98d132e376beb274a39d4ea9b6a1b870ad5c66625439d7ff6f45c229c3ca04",
"blk.12.attn_q.weight": "1c6c309d63afcfde32fe37257e300a78e25d01117e33490801107c0e75d1ea66",
"blk.12.attn_v.weight": "723d9e4ebe4e2b1974afa01d8f512b52933698fa36717dd47b37b07760c50a10",
"blk.12.ffn_down.weight": "00e0fb09e1f1fbbf3803f1dee373eaae7a93756b6e13063ab77f9927bc6f996a",
"blk.12.ffn_gate.weight": "89159f7f97aefb1e100107e3ac2d694e1008ad873f79bb953d60c2c1bb22724d",
"blk.12.ffn_norm.weight": "5f70aebd0e43a39d6373d8658cc670c13aadd7818831d3d84f761d5f688442f0",
"blk.12.ffn_up.weight": "faec21b446f061eb4dca561a3180712724347b77a71eb312e7afe9be9e89fa04",
"blk.13.attn_k.weight": "3d440825d19eac3b1753b34d94fee2b3a3cb6636c10b2703ffcf688d3c1eded3",
"blk.13.attn_norm.weight": "47b575e57e410738ad13fd3c74bb49c06b3d31030910834ece509cd1a5c6d9be",
"blk.13.attn_output.weight": "05436d8e613f4475741c1798a7c371b53d61b229507fa04fe23c504ba1f0e12a",
"blk.13.attn_q.weight": "002b5024ce520da41256e3ded5cdc60e5ae07ad9b202cb19d76ab511efd02b1b",
"blk.13.attn_v.weight": "c1f2d6763587c50312cee0d7140fa2c7ee326f5b172bc99b2d8946e08329cabd",
"blk.13.ffn_down.weight": "b5c4e0d8a3ff96cd76a135e415b89f02d28c28f7f3c16a36af31ef0ab8773da5",
"blk.13.ffn_gate.weight": "ae06e9e3d2e1f64c7ad23a4009dc904c2eccd7241f9f91c4974ab2504f116be0",
"blk.13.ffn_norm.weight": "e44a22321bcbcb4a3c345b504e939e8071370f54a8cd702fabdb40b97e0d7683",
"blk.13.ffn_up.weight": "7e6f366d538e21ad431264b12c011892d0be9dfe4c4da9f730af677f920641ba",
"blk.14.attn_k.weight": "95492d6417952ec24b2cab87bceb750fc7e95ac6b1944fc328a3852d980164be",
"blk.14.attn_norm.weight": "6b7b09e1c51addcdbb160ea59edf032531421c520ec5645fe1ff9ca4180cef54",
"blk.14.attn_output.weight": "75887474e4d72c218e6ab0f69f1bf3ec3dc414d51b36fc59df00cdb23421bb6a",
"blk.14.attn_q.weight": "940e33f76e48c21215d19e8a21234c8246d4d084381a7d9806aecb24b071d5bd",
"blk.14.attn_v.weight": "c58601cf5a9833f80f7f9a5b2656e8eab5eb133211446ebd48f8be15fed4ebb9",
"blk.14.ffn_down.weight": "f9f886e7f9b2a54d717b08947a25a0a93e8c2a5b8bcd5a907c06817c8ee3ac11",
"blk.14.ffn_gate.weight": "727ed0ee68594a3f59d704ed3240b6929f083b9c36650fb848d182315737245c",
"blk.14.ffn_norm.weight": "bd2471008ff1b2bae9aa26bea019393fb2bbc5b9493b8cec3ebd2c280fca24ca",
"blk.14.ffn_up.weight": "b006446769f51e4f93b503c4727deae897bc1fc7f4fad49f85024b63c4548d38",
"blk.15.attn_k.weight": "23bb70f9035356624039547a603e46be7d1e4403616eafc2451cc09c5373d522",
"blk.15.attn_norm.weight": "718cb371ca052eeb3bfac6ac506abb887df125271821fd171797a7f2d8dd6313",
"blk.15.attn_output.weight": "c76a2695a204b43a8e5acfa5720590b5d449a9ad9e082cbe3e80fab5903ea16a",
"blk.15.attn_q.weight": "2b3e4037b9e91bdd26d6e8d904cf39f948192dcf09bb6445cb55ca058d4f4626",
"blk.15.attn_v.weight": "7c15e89b6acafc8619e86aa9d412f5893ab17843ff2cfaf40eea9637b24910c6",
"blk.15.ffn_down.weight": "e16fd4bdc6d1c1209c6b633454df4992870c8cefb2cb0e8c92a7e489e9fb5d19",
"blk.15.ffn_gate.weight": "95a46bea366c260337c537fde06b4cbeaeec52484a69c3390bb1d178eb0525c9",
"blk.15.ffn_norm.weight": "37730293f704da265dc6d1896b3be00c39c0a41dab07f573af39dc30a481d623",
"blk.15.ffn_up.weight": "ba74a199da2d0875d7410824238c4ffafbda3993568812284a72b8800df91f15",
"blk.16.attn_k.weight": "f58f79a2a91c9a763adefce0c53a71eb5ce6bd8442f4af554b04b58083bff27e",
"blk.16.attn_norm.weight": "0c16e41b95e81978e0e0e3b338e2afe2d297426578cacee94de15df74e94eaad",
"blk.16.attn_output.weight": "ead22fc337514e4add49aee19720008558e52090466866e849671953a1fccba4",
"blk.16.attn_q.weight": "ef59c4e8fe8918c1add43d7e9c6fb3ef799dd3e1bdd731ec7b6a4a6f97c86048",
"blk.16.attn_v.weight": "902e6b84c2b64241470b13e6f412f859f66b4b223bcfb9c15d5cb1106b07ef3b",
"blk.16.ffn_down.weight": "2ad6e9eb4d8372c32a554395d460d17cfb02d6dbcb757cc962b6bfa36db4f5ee",
"blk.16.ffn_gate.weight": "825b2d50fcce3dbe6a5d8d8a50a95466f83ca4a10343efe67894c20b4628fb15",
"blk.16.ffn_norm.weight": "3bf6ac90befb0e17e077c8ea9454a8485a30f89f2d761ec7751b60c90aed1af9",
"blk.16.ffn_up.weight": "9fbdd08739b32411f5ab0252174d386bab19eb0b17884862f760429b7d41d78c",
"blk.17.attn_k.weight": "4033398718bf3674830ed1b73071ed8482b6dd4ef27f31a6c5fbb998321b6c07",
"blk.17.attn_norm.weight": "714f2e8ac9592966a0f1c02ee979eee8f84586405b992e8ee9543e840199ffa1",
"blk.17.attn_output.weight": "b6bbb618597d767b8f535117be68f92911e4a71d4eb4d8b5d943444151445ece",
"blk.17.attn_q.weight": "b84a0dc00ceb515faa2628125dcec502eed923077b21cfe900a4ff16c2e5f9ed",
"blk.17.attn_v.weight": "4387c7d6a17da9cc7a6bca8f4a75618b20407d570792056283a8e93b6ec65f18",
"blk.17.ffn_down.weight": "47db95c6f1e12b399c3eaf9ddba261782dd71173dd163b52af96541cf87b5196",
"blk.17.ffn_gate.weight": "59abaded0aedfd12f01df81f7a811e84db6a227f51b60abe9a247ca726e87392",
"blk.17.ffn_norm.weight": "b7e86445be5c7b722e01ddb98d5c7527ca86cb827ce0354f2c269e0f2558751e",
"blk.17.ffn_up.weight": "8e31c293bac649d2f60da4b3fc4a3acdce1111ec6058d8805eeeb242443011de",
"blk.18.attn_k.weight": "5ce762ab7b032511c131df81093b587871718c7097f79d8e07d707571f18a47b",
"blk.18.attn_norm.weight": "1f52cdc7af1f4dc1f0ef6ad1ad02e18cda32133654e57cfa9c72ada9c0b1d995",
"blk.18.attn_output.weight": "6486957f30bf8a88516e25772c6650f98b13923f490a2865a8752e36439d1cfa",
"blk.18.attn_q.weight": "93621c8abf69d2ca29c5207180eb628fb2b544d89de6c4a7fb0699be95534899",
"blk.18.attn_v.weight": "11604083b5a74828ac1d226af015ad5dc0215a1fdca44fa7131c2163c02d8156",
"blk.18.ffn_down.weight": "8f9997feb94385f106915df810239c9753b31efda2bf14bdf18a9fbbeec8233d",
"blk.18.ffn_gate.weight": "427c213b3a4e94af703429daf2f65766f70424d8230c123e7e712a18bceb5ecb",
"blk.18.ffn_norm.weight": "c45d305c4ea6a54013ba112f12dafaade064a32cf01317373464a3618d8ba44a",
"blk.18.ffn_up.weight": "a2811f2e73ac9eb9cce91a21a454e84e230a155244e2cd73f2c12aad3c9b8cfd",
"blk.19.attn_k.weight": "b2daed159925eac58c291e2f1e2000beed21002b03c9e1bc7e7a52e22240666c",
"blk.19.attn_norm.weight": "6307306ede2ab5bffa1bcac3f8b139354678c0376b1d9f5530c1fcb4268cfeb4",
"blk.19.attn_output.weight": "ebb98218b2a9c84d3fb6baeb02c5df264b7ab80d994d1098ba1cd47aa398effe",
"blk.19.attn_q.weight": "4f10df2ad09177e7528e9456039b670d07db22940a49417101b725d239c16724",
"blk.19.attn_v.weight": "30f1efc5114badaeaafa91fa466dc7fa14b1616db433c6f563ab851f7333a5dd",
"blk.19.ffn_down.weight": "be5ec7fe6b48855cd0015b0e430d1b70c620de87a7ff188c7c1afef546d7b6bd",
"blk.19.ffn_gate.weight": "10dffea4213881f8a9b583ee0fd370e033756d32255ed15053f794375b9400e9",
"blk.19.ffn_norm.weight": "e75cd24ade45dca78fdb0cbcaaa2d4a17d83a5a73dcc94ce0ec2d68fbdb2a881",
"blk.19.ffn_up.weight": "63e81bdb951410ffa81bcfba1b94a679ec9ebae59cd1623ce2651ed5d4c78bfd",
"blk.20.attn_k.weight": "c2fc5ad39e9bdd45e73c6e54aecc474388d944c4be1ee1921b7fcd035bad02e0",
"blk.20.attn_norm.weight": "aaa9169171937bdce20c1f057e94e9252f221cabacf1ced12e11b9586f23d308",
"blk.20.attn_output.weight": "a9f4fb496e4bc053e3f6cf2e72e22d4cd2b545ef6c32f7e782c2ef6ebcc21d4b",
"blk.20.attn_q.weight": "5a07ac619ed251494170b213921ef3fcc4c2712839da262516d9d5b8ea1ff185",
"blk.20.attn_v.weight": "d6689473105d241eacb17f09f06000ee237336916cf5ec4f48271c5b41bcb8e7",
"blk.20.ffn_down.weight": "74be38db51df736f26ede7c6b52ea787e385f181cb66231e2cced4556a25c9b8",
"blk.20.ffn_gate.weight": "ea91e06dc3d051c0ba0243b5a8bb40edbf254eadfb54fda7247e05cfdd88cbe2",
"blk.20.ffn_norm.weight": "5fbd357b3d6f44a7a91e8a4fc246b24303891b7957e0f3c32818ae5dc16ddd8d",
"blk.20.ffn_up.weight": "fe3290333e056af4ed12942ac72aeba97a6b562e2db05e79cd35dd07eab5b101",
"blk.21.attn_k.weight": "201ec6ee95f06ea5eb80fe86fd07bd016d3ae9ab6abd25d631834414e14a010e",
"blk.21.attn_norm.weight": "ea8154f93e06485828475a00b98cc397ac84768dd70e06ecc0c075b5712d7276",
"blk.21.attn_output.weight": "9f8af74d531478fd304723fd8e4e01578db598441b80dc7c960cb801dbbc501e",
"blk.21.attn_q.weight": "277de9953a8d3cff894ffd06c15ad0ee1407e319df0c1a693d4f45fa9c74ac7f",
"blk.21.attn_v.weight": "6bfdc16cfb898909b7788ddd39dd04b928f31d6732772195d53c558004638dca",
"blk.21.ffn_down.weight": "173877146cb94801157796ee9e5eecf3f46acb3b5e797f90b83a3fc22395eb30",
"blk.21.ffn_gate.weight": "53146713e2ca1be80496024077a028f6b6d749b02e71003c349e113b436f48f4",
"blk.21.ffn_norm.weight": "b28b97e18ab20a5c553ba422f7d7f6014f5902f1d62a69abd20d9fe19a5f9462",
"blk.21.ffn_up.weight": "5c39d0ac4d602b8ec8909dade93b2efcd6b6d9d84a19b252d76bb66dcfaab87c",
"blk.22.attn_k.weight": "01f26272c82917a87a3ccf922fa1d521a952b05de878241b7efe3525b617ac87",
"blk.22.attn_norm.weight": "5ffc96249d8873b506e9eb7158bdfd07fa1429e53c1951430ca7505d25f11c76",
"blk.22.attn_output.weight": "9c2201569358f720244b9c9497e4da02585a167b1414c8a506b85ad75ba990d0",
"blk.22.attn_q.weight": "906036eb4ddf027f6d920f9356a6a2a5e529b96f4e1231a0496d46b4434a5842",
"blk.22.attn_v.weight": "30ede8b0d166003a4b8a81fc99437f557719fc36e5c4dd510c9f161f36a47e73",
"blk.22.ffn_down.weight": "d04c164beabab30e1837b843e18852260efccfbb9d96a34ddd816e6fb3ba23c5",
"blk.22.ffn_gate.weight": "19c889db6b19179f0a62d5981a1506592c65de83760d67afbe00d202202750a8",
"blk.22.ffn_norm.weight": "4885eff2d851b32dbd306bd632c725857e6d164f0fa8b3d5857e572e6ef98ee9",
"blk.22.ffn_up.weight": "365594d8db8e95cf87cc33ac23947942dc326110175cc8ec5a07b5c7059089a7",
"blk.23.attn_k.weight": "badfea1569da0fc6ab817c5727ca3a69b07d9cfd622fb8be5e66678d5b3f7ae2",
"blk.23.attn_norm.weight": "8968f78a379ac3ca5458b4ed4251e8d9112aca6d6dd1ef6440b4bb0b380375a4",
"blk.23.attn_output.weight": "93e43393c03956287b1fe31e9735ff1cfe84f4ae56b83dbaebe96275e4e11831",
"blk.23.attn_q.weight": "aaff73c725a8700ae66bf26ac8869dfe96738eff23a8ff340de2ab53400a5795",
"blk.23.attn_v.weight": "3a86a8dcf14a746ed1411f5a7e634064bc4dfd6511c24cfeccfb2c9ebb6b4101",
"blk.23.ffn_down.weight": "d4da6f37bd7ef69bb203f7b0dd59f50bce37432c70627e6cf274ab81548af5cf",
"blk.23.ffn_gate.weight": "5b6072936c4a693923bb4e3d1473fd45545cb02fc07799aca458ef0449a04061",
"blk.23.ffn_norm.weight": "cd76e37025f84773180298ddb15e0d4ba9cfc7d832e19c791049daa47c6d9c10",
"blk.23.ffn_up.weight": "cde43b99b83124a13b2e4753d12674b3a61dfb34c04703007ced3e8e2aee1801",
"blk.24.attn_k.weight": "457379edc4cce4cbbe107385079019bc922264fdfc7bd1d1ae84343a81460c66",
"blk.24.attn_norm.weight": "0ce0dfab2edeede5da419fa7833db78e36222cf25c358d08f3ec664310f031fb",
"blk.24.attn_output.weight": "0cf91c2fd40c204d2fd4b9c85b69281e5ad4ea8442972fcd44b5fc8e835ffdf8",
"blk.24.attn_q.weight": "87ede30c09eafec6a4e6285674c1bc4637140b168b2da4ed34f36fdb6e176cc9",
"blk.24.attn_v.weight": "4c0b078b2798ca35d6d2c2258fe499820d2bc88700654ba4016e4b028f563590",
"blk.24.ffn_down.weight": "cdb8540c32b1ab988f984484928d39f6841f2131c1cebe90ad9456737fccbcaf",
"blk.24.ffn_gate.weight": "da2e0e913648b5526bd2bbb344038dd067639343aed3b413662b064b0db7556e",
"blk.24.ffn_norm.weight": "8940bd781c610d75eb2be63cfc8d869a3af05e53c963dc7fd4c6f653df5a80ab",
"blk.24.ffn_up.weight": "90cbac2a58801abe11ed6c24560aa4acb949f79429f2aa8ff129ac05868bb87d",
"blk.25.attn_k.weight": "90607131e36998e990ce718ad05cbecd1bcaed010931401ce6baa3b0d93ebce6",
"blk.25.attn_norm.weight": "fbf679c85656c04a6cf8fedd5412c1ace22960e6c2d47f2d43997827811fbb97",
"blk.25.attn_output.weight": "08412724ee7a2086514406e6f68fb9f622e10bac25b0c373b294709f4b09bd2b",
"blk.25.attn_q.weight": "9c1238e98a2747654a0d4371d3e7ea8b979867f609dc42482544f25591e85c7f",
"blk.25.attn_v.weight": "a57796a535c6cb09581cbafd6a91dc14adc8cca2a2465a7ffd0aec546cd84074",
"blk.25.ffn_down.weight": "f7e34e8a6391b480da08b52640613ccadce268373934b409759743a1735b74d6",
"blk.25.ffn_gate.weight": "b8d0b2f4612678b5ce42bd4a683f8024514b75fb5ebf6b22c600811e95582ee4",
"blk.25.ffn_norm.weight": "cde1fdba2369d315f3c6940a997c471ec891924e642505db580d732763bd7b75",
"blk.25.ffn_up.weight": "72e700c32ac8b9c47559c2222e45888a480b527ea512075423c5dc01678e2bb3",
"blk.26.attn_k.weight": "6ac83b3414ae75bf3a9055c32e49d2c40fe611ab21f8444f03d2f465d18122c9",
"blk.26.attn_norm.weight": "55f9d6dc9d75973dc75136ecb9d991b4398097ac133070873fb96ec76a6f60bc",
"blk.26.attn_output.weight": "ebc4fcbd15b33263e50ed2ad45740867cce15bc90e1216623babcb1820734509",
"blk.26.attn_q.weight": "080f057521073e412936fe3fee64fd574c8128fa4a148b879d3e598fe4954581",
"blk.26.attn_v.weight": "0fa2830d6746487ac91b243716e4302361f891e4e008eddd14abec47c7809d5e",
"blk.26.ffn_down.weight": "cb2ab8af1653adc57111ada49d2825c6995e338c8208455b92de10e580f60f31",
"blk.26.ffn_gate.weight": "231ce30966086bce2dc0e0afd34a22a1958cfda7a57c41b3b8e9444c5dfde8a6",
"blk.26.ffn_norm.weight": "35d959d25d17b00617590f5d5831bf705c385c51e46297a14375a700effca6af",
"blk.26.ffn_up.weight": "367680c8d332538b467d1ef87cfeb36cc5c6af564c5023c5fb50e728e3438287",
"blk.27.attn_k.weight": "0bfcb351c6d17aeac5b55a915074fbdf00f11c4bda98babb196ac8804805746b",
"blk.27.attn_norm.weight": "5d598a88c2e75ba59dd7ba4fee940bdec92d72038f1286536d2dfb71d008a09c",
"blk.27.attn_output.weight": "23a9da7347336479f6a10ded14cb3f46e06b5bd56dc4b0fbc526c688552ec840",
"blk.27.attn_q.weight": "b83319dba9055f069208e9c9d66da08bc6874f23e575288fcd81697d1777aa54",
"blk.27.attn_v.weight": "36ed34ccb2f36fdf16b2c2dd225a98ea6b7b0e376e7791191136ccd7bd7a4add",
"blk.27.ffn_down.weight": "5488e1d3a58c71b5e9ddda430540b4776b268cfe1457cbc1c2622dedd9e4526e",
"blk.27.ffn_gate.weight": "4ff48011ee0bac39af704849d9132a2410392c87a509c684f2062f6b76b498fb",
"blk.27.ffn_norm.weight": "32afe99675983da3de2961d1b5ca41c98970a356823597fe29e91f6e86abf0e8",
"blk.27.ffn_up.weight": "1eae3088a75629571fdbf6a20f141bc2bb2ed3f5ba2b9fd1d949f80695e442a1",
"blk.28.attn_k.weight": "c4e80af714962d6f9040d2c09f316f4a1cbc3a2e994e19902d7c653cf3c73dba",
"blk.28.attn_norm.weight": "c1ecf85dedc1c83d5d402bb7c94fb8b9c11f1a3e5f64e7680f80912d4a560794",
"blk.28.attn_output.weight": "72ba47c061b21f5ebc5213a455eaf6fc49c8f8e04ff9ce37e6ed4921b629161d",
"blk.28.attn_q.weight": "c4abc47234307f44b8ca789aa6668e298158fa4b459b2c1e84bd581806591cc1",
"blk.28.attn_v.weight": "aeba950799d4950e491ad0fcbe30334e39b8975177990a2cb339031c45ac153c",
"blk.28.ffn_down.weight": "4e84ce382a37b994fb8608df451a60040559e3f4f3241c3b3cb8989a3ed50d83",
"blk.28.ffn_gate.weight": "04df157acdc8e8534ad60acc2d2a4dd3a7a6610f6382535ec728994fa6f83f83",
"blk.28.ffn_norm.weight": "4d0386dae2bd1c1a9d0f9730718333e3a486c3bc6a5c5d482193c75d39832c80",
"blk.28.ffn_up.weight": "fec60bb0a3daf182a14bd8311fe6dd1e3fd020c5fc273e2549cdb1a2d6b79b05",
"blk.29.attn_k.weight": "b0532a263aa5a4e2a7a80adc83fc5dec974493bd18da7f953e7ebfc3f3a19aae",
"blk.29.attn_norm.weight": "593fc3b4000c35b7a59dace09ca1756c08be0105b2edd354a0e1c16c82898859",
"blk.29.attn_output.weight": "315b896f9f0cbacd0ca8937384c3a3a227efa908cb8c3a9125ec00c480e32b9b",
"blk.29.attn_q.weight": "d482d45386d4ad3394f08e9dff233ee3a70d0427d65c0b8fa05905da7e25ca53",
"blk.29.attn_v.weight": "cd3b5a6e2852da796902930a6a84bc87fc6a7c7bf51f8fc23758d12a39013b36",
"blk.29.ffn_down.weight": "5b3dba6f9753bd1b1ebcba65ef5373dd62c38e755c44b7231b95d93d45761f89",
"blk.29.ffn_gate.weight": "8610d9d2db15c256243ffcca3ffd31786d0ada0af0e7c7aa3fd20524370ab036",
"blk.29.ffn_norm.weight": "1a2ef2d38b7ac3e51190b9ccb8b6552ba83ab290e523356a7f851ddb35dedca2",
"blk.29.ffn_up.weight": "a5fdd15811bde16dc27677cf1a4c97daab4c28cb12a9530f1a0e573134fdb69c",
"blk.30.attn_k.weight": "1efeb0b5f4b45a85cdf47300f892ac77ac1f38000ec3653565d1303d1fb8c743",
"blk.30.attn_norm.weight": "c73934c182c7fe80838ec1d0b92f50a583f75f7a3d78d822f009b58ad2c80e65",
"blk.30.attn_output.weight": "3a0fd89de2d274614750345d827a9c886a4f97b343a13cdf680390505df596a3",
"blk.30.attn_q.weight": "711e113362bdb067db843c66236704eb1cd3fc5f40e3767143e96d510686ef4e",
"blk.30.attn_v.weight": "82b12a9a74fd3d91b73cc2e841e2b3f0a5197ccd2998afa17020995f880d2267",
"blk.30.ffn_down.weight": "af9f4b1287c0d824ae22d6e335d19e04a70135b835be7caa2435f1d85e931993",
"blk.30.ffn_gate.weight": "e2ab3e6f15f5c50fca66c084cb6a57a2b6b82406d65150e82ea0437b93dd9a46",
"blk.30.ffn_norm.weight": "c1b9c325c83f00e177386a4d7e769945f2995e60950c4a576c0a2c4ab9703d04",
"blk.30.ffn_up.weight": "9b94a21efd419715d82071b490d3b635cf1e8da080620dcc39e5bde976d7e9a6",
"blk.31.attn_k.weight": "0db0d82e3ddcc2c06209f5f013e1d72a84a996c40bf00186be485b909cc268e8",
"blk.31.attn_norm.weight": "2b8b7239471f57140c5cdfe06bd224a4f6326282f99736e44fba4c7b120ac101",
"blk.31.attn_output.weight": "a310b048840cc3ff2be4b84796340e8e2cdf05ec89d14bd3655c109b2bfa9fcd",
"blk.31.attn_q.weight": "f45e0cd95645175ea82813455356d171838539bc3f7676d877c698f2af0a0eda",
"blk.31.attn_v.weight": "8bde008e809112aa7e7c23e9c3099087bcc557313b01306c87efa0a4a30805ba",
"blk.31.ffn_down.weight": "8266fec7e203fbfad7033120861e44984581ff8b6851d01dfb7b81c5d8fa90ec",
"blk.31.ffn_gate.weight": "b73bc0aa5baf006d9ef6403104891b8133671b0992398fe038380b67e0d7e2cf",
"blk.31.ffn_norm.weight": "9c62cc27a7b6017c1df8ad49bff249a8245e8895c6754f402cd44623fda83268",
"blk.31.ffn_up.weight": "5b970a4694ea3171a0167f6e1636d9f00268bc1c9640430ffc35218494884adb",
"output.weight": "74fa0ef08c57a30e633e7117b1e9c805f833e2e5e21434bc79ddf9c92c6d7330",
"output_norm.weight": "59b8a59fd3fbf39353506116e43e5e76edd0cbf2a2873d869da4cf27a04997c3"
}

View File

@@ -1,348 +0,0 @@
{
"general.architecture": "llama",
"general.file_type": "1",
"general.quantization_version": "2",
"llama.block_count": "32",
"llama.context_length": "32768",
"llama.embedding_length": "4096",
"llama.feed_forward_length": "14336",
"llama.rope.dimension_count": "128",
"llama.rope.freq_base": "1e+06",
"llama.attention.head_count": "32",
"llama.attention.head_count_kv": "8",
"llama.attention.layer_norm_rms_epsilon": "1e-05",
"llama.expert_count": "8",
"llama.expert_used_count": "2",
"tokenizer.ggml.model": "llama",
"tokenizer.ggml.add_bos_token": "true",
"tokenizer.ggml.add_eos_token": "false",
"tokenizer.ggml.bos_token_id": "1",
"tokenizer.ggml.eos_token_id": "2",
"tokenizer.ggml.unknown_token_id": "0",
"tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
"tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
"tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
"token_embd.weight": "1d1d1d39a867d5a4bfb32792a47247d2638c10c95a6259391d02843583505cc4",
"blk.0.ffn_gate_exps.weight": "2e5cd43ac3f26c44f071926ff6c3f239ecc52a34bc9a5b5906d3d4c1bf2fbbfa",
"blk.0.ffn_down_exps.weight": "a4dfc7e7c96e7402eb70279601675b956bb7331da8101e63fe5c0a611b6972e5",
"blk.0.ffn_up_exps.weight": "2d5d87b378b2319c344ed2c642598b6f7cb6beeb582a8ea51abc9ae690d473c3",
"blk.0.ffn_gate_inp.weight": "a46aaf5aba7401ce6e41f158242b4879d34901661f3ede85496cbd0ce79d6314",
"blk.0.attn_norm.weight": "3fe37d913bdd2b65076bcdd6efe64a37b0b03cacbb1b80b9f7089068aa35f38c",
"blk.0.ffn_norm.weight": "5e14308a3c894734eb204c8f558bdc817e94bbd5b4e9cb4094e91ba388c8f7f2",
"blk.0.attn_k.weight": "73d943dcac0911e87bd771f4aa1c901e1bfe1aed293af06e1a67812159859f67",
"blk.0.attn_output.weight": "4c5f754c855e262e8d4c94c6fbbb57af06399dc0e170d7d99a1a17fc9aab9227",
"blk.0.attn_q.weight": "d6fd7403c873d49c05f6f03208f30d99ad34cb3b71c9990c47334d502a8e4c7b",
"blk.0.attn_v.weight": "cf17cf64b2d683bd9de6cebaf60e5c264df6fdc38fe719dde9d54c80334f6366",
"blk.1.ffn_gate_inp.weight": "0d524de81cd915816b4e714bf595ad6946a9130b3de731cd89428b2781230809",
"blk.1.attn_k.weight": "2ea47f412992b374c70674730fe84700e0c8cce177086ce9b6635e42408964bd",
"blk.1.attn_output.weight": "b4b2520794d54113e86c8ff678eacfc62e35be4395a594a6c8c22b4383ebcc0c",
"blk.1.attn_q.weight": "5db930c98c4f91f6eab57eb974c72210b158e366d23d6d2890b2759c053bee33",
"blk.1.attn_v.weight": "079bdde09668394bf7af9f8bc175017b4f48f0ab64e6dd855a4d7561d1693c0f",
"blk.1.ffn_gate_exps.weight": "146a62de19f9ab093deb101f9640534ffc3dc40d69f508be12fc0475d01b0c7a",
"blk.1.ffn_down_exps.weight": "949da94a3c0f375160672a979e85f7def284264b10d48d038238aad5f5ece793",
"blk.1.ffn_up_exps.weight": "7016a3f467d9e3f2f4b4019579ed86b757469cd367f2b225483305376b4bb3c1",
"blk.1.attn_norm.weight": "1614d1e6ed537737275eb888666c7bac533f4eefbe73dec92b591045ca9e1afd",
"blk.1.ffn_norm.weight": "405a455fa7d1ec36894652ceb554bbcb09a07fd6405f42741e66dc4a4665c19c",
"blk.2.ffn_gate_exps.weight": "90d5003fc7421f44220c0842d43128955e91488f6f785fe570b62d81b719e964",
"blk.2.ffn_down_exps.weight": "ecdc2b5a8b504ef0a7833acff47d69b0c1fa9c22126de1bb120ff5e48c3d6e2c",
"blk.2.ffn_up_exps.weight": "2cbd9485a32460d315eb50a2f3b00863fd77245bfe885b7565efac1cdb1f191e",
"blk.2.ffn_gate_inp.weight": "0d0a17a1a2c7a61f2cca49ecbb479154dc93a870873257bc4f225e7607f2e2c2",
"blk.2.attn_norm.weight": "b2e4c5a977f87a6f880896bd73596234c9b83622fa0d7add5892501e3155913c",
"blk.2.ffn_norm.weight": "0ab875b4280afa922376cfc7b9aa3f7071c9432ea1254091ce7de3749df0e8e6",
"blk.2.attn_k.weight": "bb884af51fb51550acfef54ccf1b58ce8284e587806e6a2f88c8265e1ad05a5e",
"blk.2.attn_output.weight": "0f03099ba1ef342ea61af9cd71d028123bbd8b1dd7d7fd9b509aef77815427d9",
"blk.2.attn_q.weight": "8fad0d29eb4c9d24e564774ee3316b9eb7a4c4985e4567111d2c836c830f6cf3",
"blk.2.attn_v.weight": "fe04c847ff677632401a94e7b6b6fdca60391ab21cb23bd791533115de6303a1",
"blk.3.ffn_gate_inp.weight": "29e3aaa724590c070e614af8288939603d2641b0ef11e8c0f476bebb2776673c",
"blk.3.attn_k.weight": "231cc5631def10f7f292d8862d6125ff555164cd70480ac76362149fad204497",
"blk.3.attn_output.weight": "86467a605c62852e05fda1a7ef43150df2cf715fe59785dbcba09f1c27cfa086",
"blk.3.attn_q.weight": "901822402453922225c2d6ac79616691d48217635d5ff7338daa971d5ddee210",
"blk.3.attn_v.weight": "27030784f44375720df2f090933645a31a022d3fb3b14573e5ca0b78f44070c1",
"blk.3.ffn_gate_exps.weight": "231ba59cc0b988d125d77bf627aa3f04636684870af88f081f3944b48a160d86",
"blk.3.ffn_down_exps.weight": "530c3ab44ae4d66e8afa4d10c153ba5dfcdfb7321989a988e62e9d12e7234625",
"blk.3.ffn_up_exps.weight": "b85c2d4d9d11332e702b3c0a6610d4f525f9a93e5d12f5c7c55c592c40755e75",
"blk.3.attn_norm.weight": "05dbb6d88cfa6b199f9d705ccbda97c0ef13f9ec875c595398a1a42d009a4555",
"blk.3.ffn_norm.weight": "6880b1c27d46969ce36fac049c05dc8b89e4bb47dc89df357e32df7e18fc512e",
"blk.4.ffn_gate_exps.weight": "a883b4f225b760c5a2f6605dc5e2167ab85bb398c70bf64ceb539fcbd6128dcd",
"blk.4.ffn_down_exps.weight": "d291bb656aae77947d4b525e2819bf4112afece53ff31de9dab999af1f65f9c4",
"blk.4.ffn_up_exps.weight": "38592afb8ba3dcfb26970f906174f7d3fa62da44fa4be4fc6912a19030ea9164",
"blk.4.ffn_gate_inp.weight": "1596cb74e8fd6c3080b937b06468bb397b0dbb661e6d180a6bcbdc43e8bfd0c6",
"blk.4.attn_norm.weight": "f90c83c5ff4366281d283384efc941620542b9cfdea160d678dc54a75e33f758",
"blk.4.ffn_norm.weight": "d28d8c49d1746b7cc085562d1074905fd14023844de823dc4fb22202bb280790",
"blk.4.attn_k.weight": "792bbf412cc357140fdaba543e547a9b2f7582919e307bbd9a80c7d6d8f5f1f9",
"blk.4.attn_output.weight": "d98e4a062d2631d9c315f1990d5f6ca9a88e7e0e46387f611ccb0353f876aa12",
"blk.4.attn_q.weight": "1a11a55a91d9f748a72176ff6b1c174844df406e00d1b66b9aa64dc6ee4bcd1d",
"blk.4.attn_v.weight": "04cb3c02b12a6313c7ac7044513441083d534fb4c5a3f63bbaa58f7edbd2fadb",
"blk.5.ffn_gate_inp.weight": "cbd5cdf015d33a2da6703eb74c22fcb97581fb9175435173b6dc4f9e8364320d",
"blk.5.attn_k.weight": "4fdf3405e4d657403f5647b51233521310ee984b4b81bbcd901cb3e6ab76b7ff",
"blk.5.attn_output.weight": "4a25662c46979a29600ed77e1907cf81fb16ef30e724c155444e54ccb76af481",
"blk.5.attn_q.weight": "e2acb30e30b97300039bb20ad0878f05159d5657fa811748a51d5b6fb35d631e",
"blk.5.attn_v.weight": "306504b6a26aa123c63dbbed3f4ced0ed2ee8fb6a30bf0093539b817539f5ece",
"blk.5.ffn_gate_exps.weight": "7e34df9b9944dbeea5e8565786d3aa6937314a4b87acd4d0874687877c5a39fd",
"blk.5.ffn_down_exps.weight": "c4b7a57a42b5ac0a8ae27dcd5cb2646d7a7cc7123126d44a56ab128e85f60b13",
"blk.5.ffn_up_exps.weight": "09d47593b6dd6c664a9155bff02fc2eb7ac4a70219a88162d05c802a01d3c6ba",
"blk.5.attn_norm.weight": "58804a036d6ac4c1fe357b8b6a97a5c37cae1c2f06ee0086c041d449c1c6ef6a",
"blk.5.ffn_norm.weight": "d872dee6789f0826211aa46ca9d0869e3e96bcace9e77d6559a7b6f3e524f3ca",
"blk.6.ffn_gate_inp.weight": "fb1eae732e974d6c1d020a5b4ef98c5f33016f984701bcea656f999a99daad66",
"blk.6.attn_k.weight": "55e9c59c5051ab5519b3a7962e1b5fa96a3c0251cb6200dc2f177885ad2de470",
"blk.6.attn_output.weight": "f3c834a8d0027370350e2b6294d95434d31432e57be6313b013c15a56303d61c",
"blk.6.attn_q.weight": "efaefe5f11c2140dc7cb532b0832c2a0b363a165cbda21f00fadae77efca377b",
"blk.6.attn_v.weight": "900bd734d75616d846a90a121c97e081c956a3d1ab012f66dd0bc62c43e1ec3c",
"blk.6.ffn_gate_exps.weight": "312a99661b1468fcaed2474621116f1681432755e973f3ee79d01912974fd424",
"blk.6.ffn_down_exps.weight": "ac9cd7db67a2ef0d2b5def86873673d05e48d49d147dd944469dbb8e2d4c46f6",
"blk.6.ffn_up_exps.weight": "57613e7e09579400a1a09fee4445acfbfe83f2f327fdf317877787d96ada6b84",
"blk.6.attn_norm.weight": "0e8801e09885c633bc01a9a5b85d4e878d30158a4eb41a937dc5b760ebd044cb",
"blk.6.ffn_norm.weight": "b8c58062ac93072f878446b0e7f958c737aa47fb769fc3a8f593133d12db2dd1",
"blk.7.ffn_gate_exps.weight": "1ef611732ff13edfa8d30981ed9dac00c15ceba9fc012ed0b199e9280a849948",
"blk.7.ffn_down_exps.weight": "856c6811945c7b0fa461ca17811cfa43436b4cdf5326bad23cbc30883486d7cc",
"blk.7.ffn_up_exps.weight": "6725e3e33994302ee13fa5ec163631ce2dcaa08aadde8fc166c2265d4561c5c5",
"blk.7.ffn_gate_inp.weight": "36b49d7f80c1003dc392b2c1b9960cd49889dd69e77b26b9e4b13d01f3d0a32a",
"blk.7.attn_norm.weight": "7a0ec49acc5e20ee71c6f80ca02f4f1e564c485e0ae0621309e7c2eb0c616cf0",
"blk.7.ffn_norm.weight": "eeae035c39ab6e64bc06a4baa1bf6e50d4c8b8797cb0ad8abd48be86974802c0",
"blk.7.attn_k.weight": "e8f78c1def01a7a38d2d9bf7becb17755e28fefe4927856f7890fbee52840187",
"blk.7.attn_output.weight": "5367f05ac3bb49ef8745ba5902e1bdd4442415a3ebff2c7e1a3918d7be6fe948",
"blk.7.attn_q.weight": "37c95fc5acc55a4f6e5f02cab9be60e4fe54c08b65f98f4455741b4aa542ff4e",
"blk.7.attn_v.weight": "c89f1343486ba55814233511e94090f7365662a8a4214aa4c278cdadc79196c2",
"blk.8.ffn_gate_inp.weight": "4e239afe8c7afb8de3a005757c887cf14b1622ca2d224227591cb0e5301f4c17",
"blk.8.attn_k.weight": "2ad0229f30fdcc1e85ce64e00d8f75902238294844a81d5af43e14ba75c02983",
"blk.8.attn_output.weight": "2e44a4722acb3b521b81d0b910f8ca2f6c286d874a92ddd02150566454061699",
"blk.8.attn_q.weight": "1cd2b09cb2f43e08de776b5f7eac197a5a6d4ffdfd52b21baa36319450147bd0",
"blk.8.attn_v.weight": "5a22c57ebfd33ac500cbcfd321d5b5b1783f8728801db6f3f8bed51c7183e4db",
"blk.8.ffn_gate_exps.weight": "91063fe56cb4f3ff3b41052bb5046fcf8ef61516a603ee90aab893a9d68c15a7",
"blk.8.ffn_down_exps.weight": "d4c3abc8f1d1b462f67f70bd8f404b3fcf45dceeaa8527fa120527254c383c90",
"blk.8.ffn_up_exps.weight": "76a1a1f08ec577716a2e7027b45293e9205751126424f1bebe1de89c78f087d5",
"blk.8.attn_norm.weight": "f980d774da39eb76c52358afac3e38cb4c81cb323deaabbe5c41822e3f17a98e",
"blk.8.ffn_norm.weight": "1c937658cf90f1a85db9a5f26e077730fdd4b694607dbeeb825c5fb2bc407e0b",
"blk.9.ffn_gate_exps.weight": "a2532471ecb7896d5c78e5a34e10cfaf4125265e1595166c8d0d0dfbe2a3187f",
"blk.9.ffn_down_exps.weight": "b47921a28412d48fee450b8b9d97cee42344a2e69f06d407fd9523d7adf13333",
"blk.9.ffn_up_exps.weight": "7c461bd1b2a73b439cff6a10d94afa01e8b06f7e6f09d9a6f28e3876aef48bce",
"blk.9.ffn_gate_inp.weight": "1648dfb08b5c06d7953a5a97ecb764995fae9487fb729a1c867023b2538149d0",
"blk.9.attn_norm.weight": "8635db0f299882a63b7cfcd1d4259c9e53fab22c31d3d054de36b1001380b31b",
"blk.9.ffn_norm.weight": "f9309aa323062d174c463613afef9b0a33501b510bfaa58a8e0e866d12ffef3c",
"blk.9.attn_k.weight": "dfe62030441e947a588512d18d9c6e4ed72c2f71c227d622c095e4263b23dadf",
"blk.9.attn_output.weight": "1977beb75c6349c50ba7dd3865d7c0a9c5c5ddc854413147b0eec98ac4fda351",
"blk.9.attn_q.weight": "eb132596719605cd6bd1782487f121994629e115190edd69240b12af66e734f5",
"blk.9.attn_v.weight": "9e708f15d332d7c5187b0693b1a977eb30a2fa10bf7df48ed9d7537c0aa6ed99",
"blk.10.ffn_gate_inp.weight": "97503a5d166c1925f9b65c0eed980753d411714d66896f3d0fad5286c7aba702",
"blk.10.attn_k.weight": "1ebdd222336bd25b48df1b138cdbe09021c4a5562ea7cb78cadd1255d2be3a39",
"blk.10.attn_output.weight": "5e98faa38e9d514b9057e1c8342c509cbe1083defd518e506f6bad89117d1f5a",
"blk.10.attn_q.weight": "3323a26c87d936d1dd87c577d0b763459fced726679612c874b3de5fc6d969c5",
"blk.10.attn_v.weight": "d5fa73cb56aca388e205f44455e4b4f676fdc12ed7fac4542fbb3b41ecea59ad",
"blk.10.ffn_gate_exps.weight": "225021b53782800906cd13b70be3a4161e8b300b97f984a959ccad6a6e8adcbd",
"blk.10.ffn_down_exps.weight": "f08eb91526bd22f5fd0402fe925d6141cdbb308a1ced0330858d0c85c71f5ef3",
"blk.10.ffn_up_exps.weight": "a9f688350c3b53eaada5103b5848bd9a3d7d6b327a70fa16c24bf28ece933eac",
"blk.10.attn_norm.weight": "5ba426c9dfc79805015ccd76cd1068b0ad3bb7a8453e14bb1d35486f122d8f95",
"blk.10.ffn_norm.weight": "98891d6acbc3986b2581b7a3af9f5946a392d9188972c6a8b15d4e745a4f2482",
"blk.11.ffn_gate_inp.weight": "b2365a60566e7dace892e1cb0e62eb73ce387352601723e847052b34874feaa6",
"blk.11.attn_k.weight": "0efbc1d1430505543ff71532a4fcda821aeac616ef6c1dca40e00d4f2ff70bea",
"blk.11.attn_output.weight": "3d5bd4d9a41236f30d4293edb9ae27beaa113ffb31b4fbfadff3a4c370dfd3e6",
"blk.11.attn_q.weight": "aa11e9db14dd9c77951511443077c2a1a78070753d7bd3d9811038473f69e325",
"blk.11.attn_v.weight": "5adc567f377aa11d1763d35f50e53fb2896a8b03b623ac36acc45efa2486d512",
"blk.11.ffn_gate_exps.weight": "71d07d982aabfab9eed3c733d49c20f023bf475368fc71db5084d91beadc4b47",
"blk.11.ffn_down_exps.weight": "9a06e61461e48b3925a9f7d9cca634d048c8b62163d7bc5c43e35899f959319e",
"blk.11.ffn_up_exps.weight": "bc05494d0dcec61021b3ac0c5bc1bf502736cadf48224e213bc139d562699a89",
"blk.11.attn_norm.weight": "a5758a10bdd0404ae1470e8e9db903985d4d07f60553c5001a5e7b660d4f7ada",
"blk.11.ffn_norm.weight": "814ae037563aad3771787316bec4806c95bf6f5991dd6474b4b1e5cc13dc18ee",
"blk.12.ffn_gate_exps.weight": "3a68b831ba1606fb9ef6dffed4732032447ecef23ea563ff4e79317586c7eb49",
"blk.12.ffn_down_exps.weight": "268b25e13f4b7beab08686e83705a41b21d15251809ee4784526f78a580da829",
"blk.12.ffn_up_exps.weight": "9105751a5b5b42ca2614d0456f24f779d2e2ac8cdff0f96842aa7ae2b70f341e",
"blk.12.ffn_gate_inp.weight": "d0de1558cc1d458c5c504f63ddc59785c323df7330474bb0644c346104b40a3a",
"blk.12.attn_norm.weight": "859a4c8113678e2e202d10299850e0cfb52eb11ea50bcbf4fe3ff39bdd394154",
"blk.12.ffn_norm.weight": "7fbf4c459c1760218877e9ee3f5ad49e960956a4369bcfe96c143f04ff9ddf97",
"blk.12.attn_k.weight": "0a7e254fdf3730a57372b6ff421a613eabaea68cdefd64800857941411318374",
"blk.12.attn_output.weight": "ceb763fc15d88af149d8fb78e82db2b7dab3aeae584af8cf7611a12356a397e5",
"blk.12.attn_q.weight": "a43402d23c46cb2d3cb3c2a98c81b19d10026b7e6742370fed6b2880b6e049b5",
"blk.12.attn_v.weight": "3bc24f2c0480ce91ef72993ee8f1cf962f7359e12183424583ffa1246bf3db52",
"blk.13.ffn_gate_inp.weight": "a6d68c82bfe66d8bab68f980f5f18268a9e2c0cd6b8832ed39010e0de198ae05",
"blk.13.attn_k.weight": "0166c39546b37dc2e01b2b396ba43e183f797dd04eaa51a6d103d8b58ee4bace",
"blk.13.attn_output.weight": "2ce5eb198deab9557475a58b69b11e9874b547e05c23f223c6e42fa35ddca069",
"blk.13.attn_q.weight": "745c1bbdf434284a7fae98f45e821c076dd9c2a2467dba6a9d8cf0041e419dbc",
"blk.13.attn_v.weight": "9ece68d5ac64d1421ea7aa32e1cff9cc1fecf5175f4c4da858dd31d8633e3337",
"blk.13.ffn_gate_exps.weight": "ccfdcb4670b131689de12d396a010b5ea737795cf5c15a14a304d720b3c7c899",
"blk.13.ffn_down_exps.weight": "8b8fb328664764f1aaa5cbdec336d5654e981e965a02ef622bde5f07ea1c164d",
"blk.13.ffn_up_exps.weight": "d2ace0236c2fb3365fdc85499d676a7f65813c48e5085348b1df1799922766ec",
"blk.13.attn_norm.weight": "1ed29d7d89ce52d7cb4d57e895ff7115430466e917136c049c385c030ed44e9c",
"blk.13.ffn_norm.weight": "a194fc542597a4dcfdfaec5e3cba2a2b2b21b21edfc87c39c0d7f7651355bc4d",
"blk.14.ffn_gate_exps.weight": "a625e3574e5e740e7f8e2f9c40390f2f382c720aab5b10534e298002dd8d1fb9",
"blk.14.ffn_down_exps.weight": "bc366f015b83c865946afd74c8a884943e0ea2c671314a0b7bb72f21a44d2f78",
"blk.14.ffn_up_exps.weight": "ee3199bf2086de77b49f57f487676be8ee70e102a2fb5a5ef8ddbbc28a9eff41",
"blk.14.ffn_gate_inp.weight": "2b437870c850fa2e2044d032bb02908af634356e37466fdae260b933e48ee8b4",
"blk.14.attn_norm.weight": "cd8344d193a1cbd42bd898e17f4bcb1ca0b2918420fbdafa9249a6f2b7f4ae06",
"blk.14.ffn_norm.weight": "70eec40374e558fed5b07257283cf36342b6b0129285a00007deb59c32c9f7c8",
"blk.14.attn_k.weight": "4053bdb507e0543d724b632570bac86b31707696d90a0db44c49b2a082e0d599",
"blk.14.attn_output.weight": "0182632cb0e06a07241b8293d25d109fbc1862e1e337d435f908e8681e2eb1ab",
"blk.14.attn_q.weight": "ffc7794a4c1b6f793c842dba969435330a7a80b9212e457b4b2ac33e68b41241",
"blk.14.attn_v.weight": "6411805292d528e61bbaad8f9aab9dd073529a17946c057fb06864fad9cf3211",
"blk.15.ffn_gate_inp.weight": "77d0744567c76e6abb67f81ba9c715b2b544841186d5b948309571eff213bafb",
"blk.15.attn_k.weight": "1f7957954ea4c6521c257b35a360e868ffa02bdb3de91f146d5e06bb4a545c98",
"blk.15.attn_output.weight": "d7809d36bd8d3342240c46fd87bcc7f9821a222f48d9a95e45ae50460265d3cf",
"blk.15.attn_q.weight": "25f509313ae4d8401b871904059f472a26f5714e7c791c725de77a1a522c976e",
"blk.15.attn_v.weight": "96fedf5a591fc0f020e6de10fd72ff12b3ef9cf70cd21dabaa0d3e7b06f54e73",
"blk.15.ffn_gate_exps.weight": "8f950d976b2fd9a3d213b84123cf114c1377efde9352767fb2ddee89e177c8ef",
"blk.15.ffn_down_exps.weight": "6fd09d1557bb94b06efbd4f6a1ca4be532a202ba290e9315bc8da3d12a5c4c4a",
"blk.15.ffn_up_exps.weight": "cbeb59ae7b0266a928dc7e3a6e70a9330b92f9ee1b17ee1ed91022108204a33c",
"blk.15.attn_norm.weight": "2005330911ac2edc7b6d27aca021c67d30d16eb632e49b1a13f30fdb2717aed0",
"blk.15.ffn_norm.weight": "0e9198f3b548eb78acc8961f2b3350d238d26cec110933ba753a8cf0035c501c",
"blk.16.ffn_gate_inp.weight": "a41d1f99d739c8b150c3945b6949763988d0c6a4c5a2b5855592ca1a48ed23d5",
"blk.16.attn_k.weight": "b624e2ec88c2d3047f60530fb87e72cb4a5e655a9663f6f3e9b09e5ad32cddaa",
"blk.16.attn_output.weight": "687759ea75e45108526ffc1573d6fdf084728079bfc2dc89b9979e76280f43c4",
"blk.16.attn_q.weight": "beff3a45c7e9ec82ffc6d3c701126be28654d10aabd747d03441210491fd31b6",
"blk.16.attn_v.weight": "43a349b13f0b9d040cacecd942bcb168c030fef8c75c987d59a4fce6c14e855b",
"blk.16.ffn_gate_exps.weight": "793406d6c13d727c82bb7b692ca98d65ca975baee69fc57be5378d77c5a19b62",
"blk.16.ffn_down_exps.weight": "9bad3dd150d0230404b7f886ac7ff8803225757e813f195cdb26bad245243b4d",
"blk.16.ffn_up_exps.weight": "7449d663023fea3496475bf0a9c1de7272ad0ce9adcb3265e8e424badaa674dc",
"blk.16.attn_norm.weight": "a424ce34c195a401df1ce37ac4f2794e8a6720b1ee8acb21428e2b68c65e0125",
"blk.16.ffn_norm.weight": "405a68bb8e16e1064df2de55ca3cd9ceddda1d9fc0af007a9bd7cad4b2676248",
"blk.17.ffn_gate_exps.weight": "97c6e5321491ca5dc039ee88da0eb0e78f347372785411809af84b3298cb19dd",
"blk.17.ffn_down_exps.weight": "1617ac19788a1be19bac69277408761e6bdf5719d63a8c7fea14d41cc27641b5",
"blk.17.ffn_up_exps.weight": "4ead1c365f112581c10610ea3f63d2a1474311d2503d2060fed4b458ef337f5d",
"blk.17.ffn_gate_inp.weight": "ed4b3393f2523f2b5e0fc7680a1caa2842e605728a529b5af68a7fa8d7abf940",
"blk.17.attn_norm.weight": "beac17ef86a7fb2b5840cc72f7a95a5e3d6bd24e7fa698e0b0ebb9bdac45c561",
"blk.17.ffn_norm.weight": "81cb58ec6d6dc02a0b4ede10adc336dc865fa76f982d4eab0e4a37b40f5b0fac",
"blk.17.attn_k.weight": "eab569e5ea8c8b05e5a6a209fba031129453c2e28181eee3e736b3b04b36bbec",
"blk.17.attn_output.weight": "f85b70f01438ce8fe5d10599b113f30bf18dee2bbae0657d3eba295870001db3",
"blk.17.attn_q.weight": "887ceebfbf6a2b94b43d2df4439ac3a5bbc29311d4b28addc04d525546032047",
"blk.17.attn_v.weight": "2df9414d65014c06a93da22ba3a668be7b83e2e8008e98d7771f7dfebed98298",
"blk.18.ffn_gate_inp.weight": "9b07741a0950fc667e5fd25937e33bc22e1f764f80eb4ff3119f005327ae0f6e",
"blk.18.attn_k.weight": "8649598dbb63938744c39bcda5ce8c31773e29c573be8d4d2c114f5030f8d3e8",
"blk.18.attn_output.weight": "f8e391adb92622298ca834d5d1eda48b69c3b1c51c5a584ef6c54a725c298d75",
"blk.18.attn_q.weight": "84bf8708a2eed618f48f69c178ed7dd11fa4c468102376e72e910ebd037d131f",
"blk.18.attn_v.weight": "31db3cd773f09548c2c1b1eac2718e46364a7810970fe9c433fad9d8de5397eb",
"blk.18.ffn_gate_exps.weight": "be2a2ba378002f1b61f86c273a69eede9b93786d5ce96b4fee1861f730dca4c4",
"blk.18.ffn_down_exps.weight": "d35196159e37705db50a5343e3989f7335477f1a4add67ef42ad64a638cd07ae",
"blk.18.ffn_up_exps.weight": "c6ceedd86e97913a6dcadc838e7abb762d629fb8dd55f15cf02fd9bd66d2ba78",
"blk.18.attn_norm.weight": "41f0b1ad83d6e3cb9fbe0d27878c2e7ad4a351b9f554a6bc9117c01745cdf6e5",
"blk.18.ffn_norm.weight": "96646204bd0d82f25dc77faba4dbd86b1332e449313e6684e00122da8be99057",
"blk.19.ffn_gate_exps.weight": "c6eb7f61e7938bda0492dbc05e51e8f631c99224fe18e99861fc4fc53ba9e9ff",
"blk.19.ffn_down_exps.weight": "4384803da3a3a3d44120d7dd192fe2c9bbd9a1a0cb492dbec1fdd7565230f1e8",
"blk.19.ffn_up_exps.weight": "22d73de2fbb8bb0f1bd2caf17fad8a355c47d914143f7f6e6d0128f66f074a60",
"blk.19.ffn_gate_inp.weight": "9a0cc4a2301a5634022fbce41189021bf0d1a961792d2d9330fd35556d18e5bd",
"blk.19.attn_norm.weight": "c5cc56ec5df9a1f7d5ad71fbda49f1433132e58895d45cb44c73420bd61ebd6b",
"blk.19.ffn_norm.weight": "77e17de741742ef2482fc7872fd423c8e3c1454dc4d2be89ee939084b6d78bc0",
"blk.19.attn_k.weight": "a92ea36ce2e3569656306aeefb835ccd5d1b03b33a86e0d3d030644cc923b813",
"blk.19.attn_output.weight": "5e2a912b37855f84ea964907a1a86d609cbdd79efa0c93c3e8e2fc07caf7c226",
"blk.19.attn_q.weight": "4ef3a5913292ac3c1a6fd3e9e53d011021f2b41d0276cf849706d1ca925cf7a7",
"blk.19.attn_v.weight": "42981b75b68ae852cee638b5433605c147da4392aaa6d7a06e756115b0171f39",
"blk.20.ffn_gate_inp.weight": "71381b9879a7c80b9f7b475abc0aa31b8cd71ccc00856ebe89764a2acb9df2dc",
"blk.20.attn_k.weight": "1928b7ebc054eb3967929ed6fb446314d5352f4aaf8b475ce55c6345019f2ea4",
"blk.20.attn_output.weight": "6071ecd9ca91af0d2ba93fef4a1a56f3b243dd70f862a21a2d164d56f386043b",
"blk.20.attn_q.weight": "002e95042a40f36ceed5829e3d0c8072e5f5e4ee86a089e2902b2348fed24dd5",
"blk.20.attn_v.weight": "42f509cdb1c0e298f89f896e349be86952c5168e49b3f83bb17badbcb7596d57",
"blk.20.ffn_gate_exps.weight": "a684a3ffe4b0a57c819a5fa9cb3521de223f392732927271e97ce925b6e33765",
"blk.20.ffn_down_exps.weight": "e3081a7bc7ba750d8a4886bc8ca4f231b55db4ca082b54b4106c7531964725cb",
"blk.20.ffn_up_exps.weight": "fad0fd5eca36ab154788da28be8ec25bb5d6db06c9d133db89e96df358a2f6a2",
"blk.20.attn_norm.weight": "c3e3f2429715ae95e884ef1246b0b461b23c5cc0ed08beecf70a14cddd184820",
"blk.20.ffn_norm.weight": "ff31f609dda65ca496b0584fabea6550e42edd05ebf229812aa6b7bb5ede15e6",
"blk.21.ffn_gate_exps.weight": "366f09ef0ecfb86808eb3296cc9abdb957951d27f6533c03f1422b54061da660",
"blk.21.ffn_down_exps.weight": "3fc495947d27fcca7fc0893c8a96e5d48ba27b2c8c58f8fcfb8dcfcd5539741c",
"blk.21.ffn_up_exps.weight": "6713ed51410bcc8283cbb001c4ad784098f25701e8021f4fa4f411e186859c4a",
"blk.21.ffn_gate_inp.weight": "6d4c92c01ec801647134d907bf1108878156df266a6107abc10526332b328b93",
"blk.21.attn_norm.weight": "27605719ae2df24f4f2e85a730927cab20367631612cb501631f6bbf38eb1209",
"blk.21.ffn_norm.weight": "ca80ee8177db185b15a4a378c1cb6f7143c76546a7f1726bda23f329323d4ffa",
"blk.21.attn_k.weight": "9e49f743d4a5bda9b4bd9c40c2ca37cdae5aec7e54cb193897ac8b4945ada14d",
"blk.21.attn_output.weight": "ab923540879753feaed152f5950f69cdd83d8f2413ca873f5f038b63ab0aea12",
"blk.21.attn_q.weight": "62617fc3f1c9d2aa672a4d91a121c7a91b92d145b65e75f0b06b4bb7c825dc36",
"blk.21.attn_v.weight": "15f8b2e72f8e8e992f2f6b3e93238a9d7be7bd6136f91c9d04b4b4cd0cd60369",
"blk.22.ffn_gate_inp.weight": "3ddb1773d9257b68add7a2a4e94dad25ed926803e02707863dd742ab9b2dc179",
"blk.22.attn_k.weight": "680e45a9e8d5feddee5266e119dc053bf80718fa9af1cf6803e6f493b265f1eb",
"blk.22.attn_output.weight": "0d5fae3402fb2c5aa3a860010e3973fc8e3168d1015f7a76b7b2964681693206",
"blk.22.attn_q.weight": "eee7e3d426ab533bd18d62c9aa142eedbde394bed07db58313e0fccc82a23237",
"blk.22.attn_v.weight": "26b5be1fe3c2b6824c5a648a3e4bdf17691904526fca158fbc3ebb627b67e2f4",
"blk.22.ffn_gate_exps.weight": "32ab7a7735313d60f6a75229b1aeee940b6aee176c9648536bf5921b0dc2929a",
"blk.22.ffn_down_exps.weight": "67590808f6a67777d3eb7976c31fe616d388b98fecbb12253b72d1241d70753f",
"blk.22.ffn_up_exps.weight": "fc245c0183e6d90829ff5e71a4ec93e4860b3d4c1a17b9dda2fb64f5f5c9ed32",
"blk.22.attn_norm.weight": "128e99d206d4d6724758ec97468af767fa0aea592149c324b731659c1e74a1a8",
"blk.22.ffn_norm.weight": "e45f498033f0cffa15da0eff2c47b4472e43fcf8921729fc4eeb2e3a6b3c78e2",
"blk.23.ffn_gate_inp.weight": "d63e686f5325fbc89fa242c2c52a3b8ff54f867dca914c9ae6eea13e9d6f46e5",
"blk.23.attn_k.weight": "f71f5a577f46ea12b1818f3a5ff4b85ddc45f9a2afb0fa2e041d71a3e31c6779",
"blk.23.attn_output.weight": "92b13563c1e0eac0d748fb67b235dfd7a64c8f16e2dafb316885744582e23b4b",
"blk.23.attn_q.weight": "2f9b9c35dc4f912f3f51c06e2d68f417b51a0de0a84aac530a64f9d3d7b0a2dd",
"blk.23.attn_v.weight": "268e40813806e74a5c364b19556d087bf8374e76e7b6fcf55c381eb7da13ccd1",
"blk.23.ffn_gate_exps.weight": "12f857e7a7ce228afac34d99b602c8d6fe96984f2a21118f459a58cb767ee65e",
"blk.23.ffn_down_exps.weight": "cdb082c16599c3bb36a28066dcc122d9529b54fa91b6cf0153437ec960a5e16d",
"blk.23.ffn_up_exps.weight": "f4b99f6f44d7b8b5a305894e88633bf5938fc1f6303a2b2092399da9c8b64d7c",
"blk.23.attn_norm.weight": "a691392210383915916b4d3886d5e4d56e7855e27e37e414fbd73bf66b3712e6",
"blk.23.ffn_norm.weight": "0c3dc72f667e5ae19b69bfa9f2bd2a01a57681f89ef9527bad4eb0d8c7b70da8",
"blk.24.ffn_gate_exps.weight": "86baca2a3157994df7fd8ced5e08436d5c1810dc29c0715637c36de723e0e7d1",
"blk.24.ffn_down_exps.weight": "ac5d559562b35c34993e34b071f66d15c65be5907797078c2d2a49aba54e3192",
"blk.24.ffn_up_exps.weight": "fce0a099cf09777f44fbab3606ceb75f7fae6f0b80725f9e871654b8cdf9262a",
"blk.24.ffn_gate_inp.weight": "e7c6800c0cfc56b565b2d35ad6f1dbfdb70dd0b05b338bc8da2286ffc3678d79",
"blk.24.attn_norm.weight": "dc6cc18ec52d102d015153c4a1132f9d7a504e29cbdec81c5edbf3b9e65815e1",
"blk.24.ffn_norm.weight": "480d5a1397af5e0e657f1e67d20ec0cdef5724e71246a326843321b87ffabd33",
"blk.24.attn_k.weight": "338c0597954a9b95a782545b2fe36469553e73f86ae2d2b5697767b28e1c7daa",
"blk.24.attn_output.weight": "a77d23b79933c67e52f1eef7f83a3dff4f767ce0bbcc39572f8cec4acd457643",
"blk.24.attn_q.weight": "45c9478593002be1998e96e70668aafa2dd3972380fbc1df12fb05c24ba959e0",
"blk.24.attn_v.weight": "515729420885408a6a9614bc27cda393ed907521318d14d21335d39a3eff0b61",
"blk.25.ffn_gate_inp.weight": "aae4ac40e9ab3925241f9d784b54b38851d9bc999a6c3bc03fc3f17c9b28a67c",
"blk.25.attn_k.weight": "4ab4808d02396c35b00b426f536015673b71c17ae6cd55bbc2e6bfe7a4c59d0c",
"blk.25.attn_output.weight": "1990bb982b77e0c947cd1a8ef0b36227ee1259e6dbbc2829e5c136edf88675eb",
"blk.25.attn_q.weight": "a1490f3048e8c0ec8784f8550c43adf5cc8d0f2f90131c934713fe4b1b015bd7",
"blk.25.attn_v.weight": "f15e53c6d45b3b6f58808fa968425d65e0b26b7f9b268127a77abb1227c67431",
"blk.25.ffn_gate_exps.weight": "656662447ff54f56ee80f78a1b9483f7efdc40f7375d0cd8a9c72ccf21f77e7b",
"blk.25.ffn_down_exps.weight": "db06f101bccbaef19cced0f6c185166e18202465f4a42cddfd535fbe5cbabb4a",
"blk.25.ffn_up_exps.weight": "584a7b02456f27fe1d8d3c7ccd21d426b6ea887795a3ed77f704596a1e3841d7",
"blk.25.attn_norm.weight": "8f0f3597982930fd237e9d609776c64f2b909a455b21678f83a7ebd4bbb83e64",
"blk.25.ffn_norm.weight": "3e7079c32582afba0c55e032f254adc18d2997705eec860185e9a6dd3d82f07e",
"blk.26.ffn_gate_exps.weight": "e70341691b583b86489812b29b77aa41eb658b1865733d6118da54c66e3bfcc6",
"blk.26.ffn_down_exps.weight": "5c1b812d11dfb064af816ced5ab6463bf9722eefdfc341b8a93705d5038fd781",
"blk.26.ffn_up_exps.weight": "e18118362ae54ef7432781c83884f9fb230a9d934e342aabeda8822ea5f71fb6",
"blk.26.ffn_gate_inp.weight": "cd1c5f6710166b9567c6b74c97b2348b191c60aa860958c6bc264ab095261dff",
"blk.26.attn_norm.weight": "71d087531af2520bda2e676c489e8529cef5db8aeea1eec0a937a8b4f2fa2e54",
"blk.26.ffn_norm.weight": "7f704e936fda28eb5c2cc339f0f6a5f78170b5aa43c01265b21668870d819c82",
"blk.26.attn_k.weight": "1cc62a0ce0ae251275d898c52c4a9fba5995fca10955d2011d10dd1a59e1afb8",
"blk.26.attn_output.weight": "636e881b1505f9cef656a4be98bec6a4765321d51f9bf1dac8933397cf44b765",
"blk.26.attn_q.weight": "89a3c4d202d7d6adebb9e0c1bcfd8b775f6456386f1be25e86e43acc949c1e16",
"blk.26.attn_v.weight": "ff2cc963b597cdf1a21703f3e7022af3bb4c65a34a19e19d9309a7c5e198b5bd",
"blk.27.ffn_gate_inp.weight": "6150139498fefe380bb99d11e72028da47a15ecb73dfc5b2774f726f4bed8f9e",
"blk.27.attn_k.weight": "f286eb9e5c56c7b801a497aedc40158c2a27877d7f9fb59b3fc67834798902d2",
"blk.27.attn_output.weight": "5dc3d3a05f9f7729509147fd09c16fb53f85f520cdab5cb69abf4bae3fd460c7",
"blk.27.attn_q.weight": "8462e40f86b24251960d6f35a9ea99b8793a01937faf1aec2859f2e5395dbb61",
"blk.27.attn_v.weight": "bac1a99e38e25953f8315f7212eb9777dc216cadb09b959977885ae62724ceca",
"blk.27.ffn_gate_exps.weight": "6a15eca7f0f6ecfd93db2e55c63875348ec4a78c4ff643ec46df9e958c0101e4",
"blk.27.ffn_down_exps.weight": "2e1c91247c4359e2073a8e5f26fd7f6426da7be3ed5bc65dcfff701f0a5022b2",
"blk.27.ffn_up_exps.weight": "65d6f5c553c9332085eae4aeadf25090b5d7768212ea7b08ed698102c21b29a1",
"blk.27.attn_norm.weight": "7fab8ae63ec8e91ce625cd130ab96d8427dad3a7413bb21b25ec5f408c5b9f5a",
"blk.27.ffn_norm.weight": "532720546b0fdcd423a02ca6e3e9d8aacb84b1b3e8269968f88a47fe2a69bab4",
"blk.28.ffn_gate_inp.weight": "a305ea58d98962d9dcf0c53ad2389b7acc8936fb35a0e3fc9410e7767cd49dea",
"blk.28.attn_k.weight": "8315e8a2e4f78dfdf36d4fc18fffc74bc95fe42c3ae4f9af2b6c874612c0f71b",
"blk.28.attn_output.weight": "9b5fdedd32d39ef46a22cca7cd5355d7b93bd07ea305f466a8aad6ca5a4f3778",
"blk.28.attn_q.weight": "4e8fb96997c30e231c437130f410d7c91d541a816f6c568b5f3bfdb4b8dece74",
"blk.28.attn_v.weight": "1fec739cf3bd7b4913f72ca358d4cf31391c304de44ac0ae31ecb825beaa7cfd",
"blk.28.ffn_gate_exps.weight": "9f259789d535e09268266b9a8020f32d6a6779966c909d91d3a10574f06238a2",
"blk.28.ffn_down_exps.weight": "516d3f8abaedb01b9916a4b67d4672159769138ef2850158bc1b32c41e31f0e8",
"blk.28.ffn_up_exps.weight": "f2f1d88d2c31ed588806fb5ad981d68f5134d7284c4fc022fd018de2eef437fc",
"blk.28.attn_norm.weight": "960fd005598deadaebd969996f4367a9dbfad90539a863674fe95730935acc64",
"blk.28.ffn_norm.weight": "e1993b37ced93d4049e9af2c47b0d9207d8f7e6f2cc3a52f57bef30bc806d805",
"blk.29.ffn_gate_exps.weight": "58927146338f443513337476b3cd30e6341742f096c2beb5890d400f10121298",
"blk.29.ffn_down_exps.weight": "03a3386e4f0b75a28c5608e23b2de8f0de25f21954e4aa7fc343431bde9db07e",
"blk.29.ffn_up_exps.weight": "6916b7490a7ae7b04a5d81cc1e7ac9b20c483434f3b186b12d87fe176bf1567b",
"blk.29.ffn_gate_inp.weight": "98e710e467a3d567abe4ce29d78b8e8dc033148762290c0c5e1ae4d78efd8c78",
"blk.29.attn_norm.weight": "4e64cb307d37be20d55f38c94faf7e451d11df5e60df347906cbaf9c5441be71",
"blk.29.ffn_norm.weight": "696c23a52f742679bd44440d687a4c44b4302d57f1e9dc5610d23374336187e7",
"blk.29.attn_k.weight": "e85253652fd6120c623634ba66b725bf7cd491318b54ccdad2c7df8851d64c0a",
"blk.29.attn_output.weight": "4f650a71efb150d1f24cd4d114d4187bf570ac424da3b92ea6455abdf1aea705",
"blk.29.attn_q.weight": "69fa7da901026ebcbbbc848455b425458b7e3295007d7fc093acf4b38e2166ea",
"blk.29.attn_v.weight": "17e2e7590b317b21f106de546aafd955579703d1e95d6aea044ee72ec3a514c9",
"blk.30.ffn_gate_inp.weight": "3a03284b4aa60d59d4a2ec86253469b61fc656372afca427cb77a5332fbcc62c",
"blk.30.attn_k.weight": "d518cfd0db9708e769eb1399e87ee49357dc54d5afdbac3d4c0ca46c64e789eb",
"blk.30.attn_output.weight": "9b44378714d784c5ef9ab604359091baca4e0ec222afa139b7f840eaefb371fd",
"blk.30.attn_q.weight": "cbb95365bbfbcad0c9cd99b4eebb5a5d32de68ce08e4063b5ec3e792b7548044",
"blk.30.attn_v.weight": "e7985c04fe1740e35a9598f43b67b0922b4fc2d00b68a92a9f917b82c3248de1",
"blk.30.ffn_gate_exps.weight": "8ac4bbd07935d98f895ba94dc174e5ad5046c3c222b53729d60f987c05e7eb70",
"blk.30.ffn_down_exps.weight": "dd672cc71e82abf05064a18121b8e55fe1a4f19bc1d7cb9a142f4add54bc336e",
"blk.30.ffn_up_exps.weight": "12282f664a2a12aa25e2deac58946108715ebb978bafed5274cef24569107646",
"blk.30.attn_norm.weight": "1a33458fee054c6c9c896a4bb0a4e1fbfa0293b2408c7dd2b81d692e966e7273",
"blk.30.ffn_norm.weight": "311e33b68051f507f1478ed8f2693fddb846170ddb7285a91be43f795c2ce31e",
"blk.31.ffn_gate_exps.weight": "8af43d9867a51cd8392fb48b981b0ceee0ae979c491c07d711b3b56b5162c786",
"blk.31.ffn_down_exps.weight": "5579cb7758c1600b19d1f540deffe081b575962e37437b3b2efb2fb0a2924e40",
"blk.31.ffn_up_exps.weight": "f2e7c005276b3a001fb40753f027fa10b4d5a346f43cf4b4bbdeec6e74e1cf6a",
"blk.31.ffn_gate_inp.weight": "89885dc0e30b6b16a90c0331d7fa3174671e941364e8102d934f02132237e61b",
"blk.31.attn_norm.weight": "99e4e9bf86a9edf8c404153a7e8a82324ba79da462622196e2faba161bd95172",
"blk.31.ffn_norm.weight": "55335997cf6de781bf332b943de96ff4646966b05d9fee86b76ea897e27b6ca7",
"blk.31.attn_k.weight": "cee570762b78da6316b637892cc4b080e40f57af5551ffb1866b9a8e80e96628",
"blk.31.attn_output.weight": "fa321ff55ec7819ead7b819fd45215262f39744569765ba2113c989c03588802",
"blk.31.attn_q.weight": "9e2c409b878f8a2a1436874abf428fceb1c534b21f9ad4dd6f532b8a469007f0",
"blk.31.attn_v.weight": "a845d0be68ba537b4a775bfba4d897faf7c82a811a2612b0b7420cc4f3574cb8",
"output.weight": "16101cbb74b54cda9ebc07ca3c762e3263a56efb3cc011156184b95807d7cf13",
"output_norm.weight": "d7aa61585baedd60157aafe157930785742c55989c288573566a971b02423564"
}

View File

@@ -1,225 +0,0 @@
{
"general.architecture": "phi3",
"general.file_type": "1",
"general.quantization_version": "2",
"phi3.block_count": "32",
"phi3.context_length": "131072",
"phi3.embedding_length": "3072",
"phi3.feed_forward_length": "8192",
"phi3.rope.scaling.original_context_length": "4096",
"phi3.rope.dimension_count": "96",
"phi3.rope.freq_base": "10000",
"phi3.rope.scaling.attn_factor": "1.1902381",
"phi3.attention.head_count": "32",
"phi3.attention.head_count_kv": "32",
"phi3.attention.layer_norm_rms_epsilon": "1e-05",
"phi3.attention.sliding_window": "262144",
"tokenizer.ggml.model": "llama",
"tokenizer.ggml.pre": "default",
"tokenizer.ggml.add_bos_token": "false",
"tokenizer.ggml.add_eos_token": "false",
"tokenizer.ggml.bos_token_id": "1",
"tokenizer.ggml.eos_token_id": "32000",
"tokenizer.ggml.unknown_token_id": "0",
"tokenizer.ggml.padding_token_id": "32000",
"tokenizer.ggml.scores": "6e37bcde2adc7e350e87c496eddd7a2124329c1dc66c5bf3ad3997253e4f7a62",
"tokenizer.ggml.token_type": "b6ecf55ec64ee67d87750bdb8d757a2c58bf78377e9f4219f5689a6c4dea57ce",
"tokenizer.ggml.tokens": "d168da3ddd3eee820916945fcb9baf24dd3cde42f606cffa2d19e7c8a8743918",
"blk.0.attn_norm.weight": "216aeb2c9e0c271f899e1ef2a63cceeb8f41e97642e84fada54b1d3c1c11cf25",
"blk.0.attn_output.weight": "b597d56f7188ffc1fafc273fadc59d41738cffd677ae98c61a62c3285b3a3099",
"blk.0.attn_qkv.weight": "d28a6b44e13f59be5483e4be2bedb544e346168d720aca27f47d1a5a722be91e",
"blk.0.ffn_down.weight": "4a691370e5a61fcbbf540fbcbf4c0f1d15dec0364528c0e916d0744f6262b63b",
"blk.0.ffn_norm.weight": "0c00af2b4a3128bec64a0cbb1084b042fdbe13d9ad0d03bd577f9449dfead338",
"blk.0.ffn_up.weight": "b32b52f790c1c083bfb8a3126dc1111cfeeb28dc8c584a930a1e5334cb176bf4",
"blk.1.attn_norm.weight": "68748011503c6c029e8e69a84a8e5a89338f378769627b6dbf7f93d715c292e1",
"blk.1.attn_output.weight": "2267344add13b048ca59e4377c86dc512be8046a57156901fa32a20fa74e4ee0",
"blk.1.attn_qkv.weight": "9109d2e3d7a2eacfda5226587b8be124a3bf44b972da7ebb17aa15795897eacc",
"blk.1.ffn_down.weight": "d675df4df4dd039c0c339ad6445d39eddd2004db6bf35bed6314c7497245a633",
"blk.1.ffn_norm.weight": "3b5767ae977bc8baaa06b06efdbea193b6b3ba605ce76d77a76ce317e935500c",
"blk.1.ffn_up.weight": "80dfd6d9d234b00334c89b8e0a02f81899c2efd377321c34ba5ba51a5f61b5ff",
"blk.2.attn_norm.weight": "6a6743b057e5088f145bc179e92c9bfb41163e7295d7b81c62e23dd89d2b59c4",
"blk.2.attn_output.weight": "bc5491ea54e0db81462d7d9b7d25cbdda380c2db8de041bd1c4ab7b76a1d19c3",
"blk.2.attn_qkv.weight": "a61287a9852e2f5aca9c100b471d98398b2913a3497c743de3c70ec9ddd7087f",
"blk.2.ffn_down.weight": "4fddcc382c8dceeab027fe43d8d44e67edb5e8ce4b9a1b7f773c87770380ade1",
"blk.2.ffn_norm.weight": "07e05f82b3f63f711db3b684ca79aed25c0657917e66f88af47348a82065c227",
"blk.2.ffn_up.weight": "4835a682ef1826c12df01ae7663fc45f9c82bc8e64b665f13fb7da8e201ec0fb",
"blk.3.attn_norm.weight": "f22aba7c03999ba7136f39cda747a39715e498699dc1716cd97fc5dfc58d1b1c",
"blk.3.attn_output.weight": "53b579855366fd786c5126b2b30aac4d583ca7bda56833c4865f5cadb5c18c6d",
"blk.3.attn_qkv.weight": "bb56aba78158123140fcea59c69ac562ca208f6d3086819417cdad8c50f333ad",
"blk.3.ffn_down.weight": "97280897a7cd86db2830c004bccc5bc094f50e293baded0189159a2019145a6e",
"blk.3.ffn_norm.weight": "10a8c99f8b57a960e8e0a1133c4a26f9148403d1b9bff2eff114917de996f3b5",
"blk.3.ffn_up.weight": "7324046c915e75d621b2043597a245a428d8eea31869135e6257a861491d8dcc",
"blk.4.attn_norm.weight": "507d8e164de94646edbfe33def8e8fbf7c9a6ee3fbaedb5000f72d9f51ec5e36",
"blk.4.attn_output.weight": "bbb3429e6efa98c150e0fdbf48c16180cbf0d0cbc1b3c253c6c319d78f4593a2",
"blk.4.attn_qkv.weight": "b95ee5be0786d3901273d806c339fe6c20e6bfffd2a20672a9f56af80921e8ab",
"blk.4.ffn_down.weight": "806bbf91df92a5a22bd5aa1ffb7fc2869f7293ffc7704771c290ecc583b27975",
"blk.4.ffn_norm.weight": "cfc2930a81df7aee3a5e7f726a15c1182233e868bf0d9d37f6b6ae6d8c15c234",
"blk.4.ffn_up.weight": "c3390c69533de2c8424e8069323ccc5d0c4543111535da04cf2c7d26745576aa",
"blk.5.attn_norm.weight": "0d71c4fbcefabbd021569442853d2fe90668b19409ae2805a718a829ca60beab",
"blk.5.attn_output.weight": "10ebd93629112bf2df5c30dd0953a4a5e9020306768283181ed426934d47e14f",
"blk.5.attn_qkv.weight": "5cb05633369f12d4b00e0ff787736bd846856682115720ebc6cce05270c334f6",
"blk.5.ffn_down.weight": "e28bcc5094212eafc7476dbc5b7a520d25b79578cbf4229d698e2655956a80ad",
"blk.5.ffn_norm.weight": "b6f2c4cf9f34bb4d59989f96165c14a67dc1e266ad0a6d0fcc49f1add929e6ff",
"blk.5.ffn_up.weight": "0f9ef99423cc07ebedc0e9cfa95809f2d7108d910bb4ef97ebc0b0309c440750",
"blk.6.attn_norm.weight": "b3edcc47a42218234f7564d7470611b49401a41ae8cd42123f86557c69f5d7f2",
"blk.6.attn_output.weight": "eb9b7d257b388bb5b8fe0515e5c6873317239cb94cda236e4b6ada2a6c57c65c",
"blk.6.attn_qkv.weight": "eb968081f478c52f07bd9c2761741e982dba33cc4eeadeea3557d391b9ac2106",
"blk.6.ffn_down.weight": "1b8588bb7463206290322695577dcfced300895d6e6f4b26966c53a9ae2f0f84",
"blk.6.ffn_norm.weight": "1219c04b7770983c77814200eefe743f46d15328ea2b12711e44f8103eab08d3",
"blk.6.ffn_up.weight": "197ef287239fec47c55677f0fbb66eaf0644f775bc382de843971730721394f6",
"blk.7.attn_norm.weight": "b630ad08c80d564ed1c024384818e9fd3f22a36cd7a14aa96e7e2759a8285099",
"blk.7.attn_output.weight": "970255aa750828a47d6b9d399f9612b5bf25aefe7dadbcba41fc416d0d4067c1",
"blk.7.attn_qkv.weight": "ebb157c880293e6de8d629f263ba8853ed1dbdc02c311d43432bb8cfbb310739",
"blk.7.ffn_down.weight": "24bcd4db4cba844c89f878b81843c373dbbc0675e889d32c5b12e63384a7b670",
"blk.7.ffn_norm.weight": "b9c6f71001808ee873ce7db8056e4b53fb4cccec8b7f0f312899b575fae39d39",
"blk.7.ffn_up.weight": "979f1828d227455c26015a2a11afe9dd05f2bb97a8ba6b38c8dab3f50e627401",
"blk.8.attn_norm.weight": "4e8e347e3775010b7112ee630f2f4f2383be7ff64e6ca6154b9b22566552eaa6",
"blk.8.attn_output.weight": "65a44babf44a435a1829945211b3168f9ec78ac3cb7a049a733e93d11f0d6659",
"blk.8.attn_qkv.weight": "343ed07671da400b040812a4058482fa38284b5d9af9becfed07417fe26ce747",
"blk.8.ffn_down.weight": "7fb7e073e3c2c503c4e9d60efa0988fed7398d900cc003695fe3fffd3e188b82",
"blk.8.ffn_norm.weight": "b07c1f655d8593e3892a2cf73f8a0c19ce8e5cb613fafbe7cbd430da8ce4c57d",
"blk.8.ffn_up.weight": "8b26e14de54b3fdc2e2d3ea41720f9d9c236a93688c3b7fd7bf43f5fbb327c9b",
"blk.9.attn_norm.weight": "46394d408a8e316916177e6aa261de32e137a82d729c0b1800b072f0c38c39b6",
"blk.9.attn_output.weight": "d57f3d46107947a7073373a0b35d6ecf7759b5df15406f4a3590a60666af6b16",
"blk.9.attn_qkv.weight": "14bb8ace8c5453148f4b536e9f4279c813f31136716947256f5cca333448639c",
"blk.9.ffn_down.weight": "2b8d98e2b5ed68338f6e4de43bf7de0c4858cc69103cd5177725f7444eec7694",
"blk.9.ffn_norm.weight": "41a499dfd418cc4c6b8c12313f673f7e2cd4a3f9c4065eb6c4feb5eed02fb542",
"blk.9.ffn_up.weight": "143aab7533a64b17fbe201490a6f674bc7f0bd370c094500b2e100419073d1c2",
"blk.10.attn_norm.weight": "ebb670aafd36816a794347287269d8f1a5b19c1e3c0a1e38023bc19fdba9b073",
"blk.10.attn_output.weight": "b5d65bbc0ed5e49fdd9d754bc18163cd042a285024d0cf6f954c503bc8c877cb",
"blk.10.attn_qkv.weight": "f06b15bac88da798fa34a62b03eaac0dbe8b846020516603c387541f2d8dd672",
"blk.10.ffn_down.weight": "fb091fcd1b4de25d1bea94d1755e255cb02914a030d23e3a234e57b8d46bde6e",
"blk.10.ffn_norm.weight": "eb347bdf9c40414af87e13a8e72e40b31f004b50f7cb366f1a219ced60a61355",
"blk.10.ffn_up.weight": "ed2d52fc881a173f404fe8a1067862c9856d6c3e0d2e90a330a7aa394e3f84d1",
"blk.11.attn_norm.weight": "64e252603cf010a0e502ca39fdf8d0a196a79aec67c0d2bb9213fc0cb80c47d4",
"blk.11.attn_output.weight": "228e33e21c69f52efc74fdfc831bc9af271e44b2a29a3dced1d64e667ce36eb5",
"blk.11.attn_qkv.weight": "ab9ce6d4ef9e42ee0da3f20a7708a3bbc5e79e967b05fa86ba946a05e2eb63eb",
"blk.11.ffn_down.weight": "0ca133b7835c98dc77c25d64e4eb7873778bdb5e4d22d8b80f920f46865b43bd",
"blk.11.ffn_norm.weight": "02455741a0dfd161c79aa1ecc381901721f229fdcda5615622a629631fb61cfd",
"blk.11.ffn_up.weight": "9fecdcc099fbb8e23c6b1ea9294702a027f4a58d265543ec5e7be79b8f63b354",
"blk.12.attn_norm.weight": "783bb459911b1b3609a9b2bdfe272f1670add73b5471da738e07ac47e2e07dfd",
"blk.12.attn_output.weight": "1e1a914c9e48b857206ac5a1f7cead994bc1ea91d5d4fff8c834d73f2e38ef5d",
"blk.12.attn_qkv.weight": "5953e7185ccb87fb4dae8f9426ec86315d4c7794326e8ab59b3a95d4af2189f0",
"blk.12.ffn_down.weight": "a3eecf0f394f86e2cfb48a5940a5c50ca86d71883b2f79fcc642a935fabce0d4",
"blk.12.ffn_norm.weight": "0a4272e41373c23bd72f10d2d82930aa3a1480aac75832bfbf01cebf0b86b6a4",
"blk.12.ffn_up.weight": "06f42776de3a7ceac3025f26a7a8bd20e062233cce2bdaa2183470dc4b30b87d",
"blk.13.attn_norm.weight": "5915da60fb03e201fa649faba780e5fdf1c761c262b206e5415cf83181f65780",
"blk.13.attn_output.weight": "4dbf6eab074fa3835fd32bd631a8208e511037d5056d2fd3015735cca7674ef7",
"blk.13.attn_qkv.weight": "d3d8339a1c4782d9e73d77fdebe154d3c5b83ac40c9175b3e91a4977d08f876b",
"blk.13.ffn_down.weight": "de6772b46a55e1fd42b007637dfbf68b6598e5d5b61622da0935002e1e192d3a",
"blk.13.ffn_norm.weight": "5a640ea3b8c7be49c95a58a2327e10d8e8d9d142504bde5c8091613e5b961d7a",
"blk.13.ffn_up.weight": "f35e3545e4bd3531b2e843b5efd31dee0c13c807ee6386e65473ba67bbec30d0",
"blk.14.attn_norm.weight": "9b34986450b7c98b4927e81e61a816f9e84b1addc7c14926402100037aad6678",
"blk.14.attn_output.weight": "155d52efb23d366016d861a251d4d1f4a0c13699188c50d50dba016a0d8bfcd9",
"blk.14.attn_qkv.weight": "8e1415084e1f33c73a777f19e752489f4dd312cca047733e5ea643cd4a955e04",
"blk.14.ffn_down.weight": "a2a142226b94baa01ccb65bdea2b7418e49085c1d9c3c63e544e3112c58a25da",
"blk.14.ffn_norm.weight": "8aecfd9b0ae6affaea31a80c5c9a4a14b31deaa0db7bd8f6da2a64d23447921c",
"blk.14.ffn_up.weight": "0c1407237b8c1bd02f193346b5681926fe698a5055eac6a7450451b0f991707c",
"blk.15.attn_norm.weight": "e037bd19880bfa83d983200fb0c7866f8ad16c3ff5cc4b4f3a37ca7373870ff6",
"blk.15.attn_output.weight": "045fe4fc95cc129a1b92771b179c11b12845c4c088786c607f17bd98857e68e1",
"blk.15.attn_qkv.weight": "7621b7559705cab1d4dea1c69f76dbf9dc1c8837a203b656f484703b9c1b70ce",
"blk.15.ffn_down.weight": "7e5ac20e290bc60761e1cd972354fde225b7fa861048d44d9a0dd9b046d55f58",
"blk.15.ffn_norm.weight": "b6d830d88f1db1825687973c8c2b1a24c6fa84f07af8d0e3ef9c86009baca0b2",
"blk.15.ffn_up.weight": "dcda0957cd04fc45476774dba2bbf9aa89d6b05d5ca7b10ae6f73ad2c49b1cd3",
"blk.16.attn_norm.weight": "4ee9b70ba15cb2a08240f93990e90f5068c48fceb481f8e2186bec8b7214eb3f",
"blk.16.attn_output.weight": "315cfe5536658d2498192b2980eade15b2c9a4ff220e4011911457b1727fa103",
"blk.16.attn_qkv.weight": "3c8122e3ad637583b9dcde8ff3a323267d3014bb1f0f9771e5322260ca9ecc8d",
"blk.16.ffn_down.weight": "3b5fbebd5ee2b86cad96fb8a9b45a8770d08f82c1c8b74d7061e866f7020a18d",
"blk.16.ffn_norm.weight": "ffab69f20bda372de6e5878f0539163e2fc6ba113621ded95705fc3b1465c9f0",
"blk.16.ffn_up.weight": "0935ea3d258da42d6258406365f39f58ddaabfe97ea5977580db3635188f24a1",
"blk.17.attn_norm.weight": "f030441733f3d147b4a06a1eb4aeb8465c7c24d9c53bf4c48fe7e134d3629803",
"blk.17.attn_output.weight": "07a955ef09e8dc766ac0df647d0b2c69f23c4c69a7137654b4aad80303ed0eda",
"blk.17.attn_qkv.weight": "1c10688061e21e2fe12ad0cb54bf03895c1f83c3b0df743a42f548b52cbca1b2",
"blk.17.ffn_down.weight": "ebb9cc9836f41d88fdae2aa9a4355514e4edaec8d1577ffeb947a35204e77f52",
"blk.17.ffn_norm.weight": "50aff44f6528b13db5389f2ddcdb7676244947610bd7ffbff3f881c968c2a0d4",
"blk.17.ffn_up.weight": "d716537949582be33bde6b02e38f5a70081c9642a9fb05a61312126718b8d148",
"blk.18.attn_norm.weight": "0ea695c4e53d637902f46663a6ee42adc493c36794476acc7dbddaa05b13840d",
"blk.18.attn_output.weight": "5fd35b500221a612eb4f4bddf0e9b6b7db4d7733032a75f8802fb2d884647c2e",
"blk.18.attn_qkv.weight": "b0da37fd030fe69581f990bf23bfd35467a1bbe558af6de7c0924f6b72e92317",
"blk.18.ffn_down.weight": "b355c33f44b328f4bb977567de8f7544db4b005d7a8fbded658518ecf3c5a153",
"blk.18.ffn_norm.weight": "58b3fe9094079989a86e0387143259e1cc35952d24dc3df290c4ba6df44f5c51",
"blk.18.ffn_up.weight": "2ce530954c342c30ed2ead5353f931960bfae1d278868504c0efb973560fabbe",
"blk.19.attn_norm.weight": "533e9aed66feea8f0392aa81f9e293240e1f009a5334253915fb60c2749b615d",
"blk.19.attn_output.weight": "84f2d00f98a4113a779d3b5d1c3e7c914eb47784d3ab13b290367c124c2994aa",
"blk.19.attn_qkv.weight": "fbe6b9f53b07fa7537d3b3d452d20a9bc666f9fd41ec2091dd28bc2f70fc668f",
"blk.19.ffn_down.weight": "b30199e098c8bb3f890183d8b18471e80b62b604729b277ad62488dd71e1206b",
"blk.19.ffn_norm.weight": "c81373e41cd340b7badb19f9517c77c4250b4eb9a02dc758b8b49b652487d7ff",
"blk.19.ffn_up.weight": "5a5cb083ca7725720e3a890f7fa46354760e8007a8188849a092e305694a75e3",
"blk.20.attn_norm.weight": "4953091b4477e354357a8e743ba0a1900633e52f1599ee082a0c9b0b2b5cd978",
"blk.20.attn_output.weight": "62d54f7749cd6856097b2632066a322b0296df915fe66f382c5b5981be0d4f23",
"blk.20.attn_qkv.weight": "406de9e35b0729ebe902d7a47905cc7fb29a921431ed35dbef0c03e5690a1329",
"blk.20.ffn_down.weight": "62fb678b0d1261e19a4903a2b347d67afcc8acff01feb33a687a35a2d1e6f9a5",
"blk.20.ffn_norm.weight": "cd9d36b7e71e55c8925b97bb09c28219f182626bcff094878ae39c3db887a14b",
"blk.20.ffn_up.weight": "b9276771d79d3e932e73ccc520c3f8476342b9ef312ed2ee1e0da822e6e3ad18",
"blk.21.attn_norm.weight": "66d8c8a35e13ce9c2a0e75b670150e2c31484a55c2316df46075312196178ed3",
"blk.21.attn_output.weight": "12ab46c9382648f9b3350fdd92a6be6352743d62d6b520d7e2024e0c838588f5",
"blk.21.attn_qkv.weight": "a7909676ee1675ca23cd29a5fdd226df8dd9d68f94c6c9bbb51dd9fd38504008",
"blk.21.ffn_down.weight": "6fb317279c6542e82f97d5a12a60fac1bd0fa0405154f9fbe265e2fe39bd49cc",
"blk.21.ffn_norm.weight": "c0f703eb3ff161b5ba4490d87d8684b8a6c47a8f433e12f418333b9db439010a",
"blk.21.ffn_up.weight": "6dbdb80ef0c35e364bbce12d40d5e74c7963c7b55d58d9579567a07ffce7b863",
"blk.22.attn_norm.weight": "f94237433bf03d675cb2f655b81ca91a1ce2447bc6b00b13d6b0ccfe2d411eff",
"blk.22.attn_output.weight": "e821f95995ce497c01e63ca64f737713b1b65f11df1903e51d444aa516f33f71",
"blk.22.attn_qkv.weight": "1b0f717c73afb5eb4c82a1708c4e85c969e8a2a8770d9ddb78b1870a2d8a781e",
"blk.22.ffn_down.weight": "0f33f7a3cdc685484be99aa0c03642b0b20850a27d1fddbe054b13a9382f3ccb",
"blk.22.ffn_norm.weight": "9df285cf211ddd7df2b36a50489af574755c7d4d98b29a05cd04566ae613c8dc",
"blk.22.ffn_up.weight": "63ac300e1efb34041dd0136cf43ea622fac6f0caccce1cd9262f5e08d2cf179c",
"blk.23.attn_norm.weight": "5f72d9e88689b4027b28f5f8f26cd3abb03635ceea7ec98a4c91a9fc691f6707",
"blk.23.attn_output.weight": "6ecf04ff61125c5fc768f8656497152149373daf321ee9c957e8f7245a1184d1",
"blk.23.attn_qkv.weight": "a9d9978806724c2959f2cf386c233831f08e1e933dbf2b32665e788d9d512ea4",
"blk.23.ffn_down.weight": "72c7d17886a3da17fa0daa456aa5e877b2ef5b8b403182b870d9ca5ca9c70347",
"blk.23.ffn_norm.weight": "971e4b712e3025a13419b5b57d674b5e4ab7f18f74b57b9afc4671623da90c4b",
"blk.23.ffn_up.weight": "df2b5c7dbd5834545b815073af0c7355b065124e6d6f0fee78d8fa5b2076dc3e",
"blk.24.attn_norm.weight": "c41957c4a79ad3b16f6e11daec1c7f530b9f3f4b618e1e4367c3b67787ac4ab6",
"blk.24.attn_output.weight": "ef7d61f5fc88ac6f31bf60cb5f4d2d6b8df42d38825807112361a7224b0dee3b",
"blk.24.attn_qkv.weight": "3e6a58fe7d49c90bb6971efbad3371c32256881173ea5aee4b0c296cb206490f",
"blk.24.ffn_down.weight": "f43619144047de42fed81dfa495f1815d3cb771330e574043e2b67620819292c",
"blk.24.ffn_norm.weight": "5501d4a2a98c8ca6b42e77b53b221dbc08f530f6a067256d787534ec6fe028bd",
"blk.24.ffn_up.weight": "d64c8b0e509e2b1118f6000176f8956cacecdbb200c7e95ed93fb78b6e26c84a",
"blk.25.attn_norm.weight": "502fa3c302d371f61c5791f4615b73018ffb1daa09b6499b227116581244c5d4",
"blk.25.attn_output.weight": "ad8391d4e9c980856f2547aa945b2b6a407a6382158dc1ddd4f08d94ecc24be6",
"blk.25.attn_qkv.weight": "42e8983780d4a01a02c54ad23d4df21eea437f119a10af5a9c12a76a42d308c1",
"blk.25.ffn_down.weight": "302dd010d4e0ab4eeaee89090409ea0dddeeeed3236415eb8f97c942497eea91",
"blk.25.ffn_norm.weight": "fb34c1ee5bca96986c08834df0a0c047ba041c1123ac1f563e9d64312bf82d6a",
"blk.25.ffn_up.weight": "10739a8de156816d93c92b935386540bfa976bdbef204f0312960f6fc657582f",
"blk.26.attn_norm.weight": "7036c711609128c4e55968ff3681d3043338879a5737efd6c2ac9e1a2a61f1a0",
"blk.26.attn_output.weight": "db5db45dead5cb911fa01da59832f121b7c18b2d167bf53741c40819f24d346c",
"blk.26.attn_qkv.weight": "cae34c6b7f82ed14348d5ed30a79919c383737c1694a9cb9c0de609d3b0c1d0a",
"blk.26.ffn_down.weight": "491ec3a4da9b4f49f8ebc6be658ce397a9b801ae9fb35e82177e47808c65e5d0",
"blk.26.ffn_norm.weight": "fd7059d75d7f0e5288511ddeeb0f772eb3cae3ccfe4226b877015834edc3c386",
"blk.26.ffn_up.weight": "ea1ee1274c56458ce056d2205e5bb6e5422ce4cb0ad58006b8141749b97a0c39",
"blk.27.attn_norm.weight": "cc362c9a937609265052cd38544af17a1a7448cea086d4c801139e1fc865832d",
"blk.27.attn_output.weight": "ba757a81dabde9cb1b069d1bb616fe79649a1724f756567ec61caed1304fe6cf",
"blk.27.attn_qkv.weight": "1ab8d7d02d87756c12c2275636823aa5ede3d683178225c4cac4bd892c319bd4",
"blk.27.ffn_down.weight": "deb1c711c8a66acf4dcd2d088e1548f8e08f296f755e4067d6557fa55afde88c",
"blk.27.ffn_norm.weight": "fc6242d8cb8a4a37a8ddb7e41e7e60a63d4a89edf36acb35df052f10b9c91ece",
"blk.27.ffn_up.weight": "8df39b09c4801f343aca78f2918a1f6db78c8c55e591eda4c69eadb74c26e180",
"blk.28.attn_norm.weight": "75b539308f77e3cefdc6d98484d8b5cbf0538f0c2869a77b7373a145a18bc850",
"blk.28.attn_output.weight": "ae128940eb60a6d2e121762ef4b3e9dcf9eb3e105b249507fa7f12de0e19822c",
"blk.28.attn_qkv.weight": "bdda781c288e9326c240e33905f8e621b6a2ad902e620739d34f93fcd6f933de",
"blk.28.ffn_down.weight": "f1d6e6d1c286b1138bfd7e53fe477f399ae93bc2c04e35416f84218ed7247965",
"blk.28.ffn_norm.weight": "3f837ce82c8b9bde0d61d08b6f5fe5574886ea5328dbdc53f2929f18da8b4087",
"blk.28.ffn_up.weight": "2af027002e31d1b6cfedbdb30a2b9d7213f3aa691167c353913adfd48fda31e4",
"blk.29.attn_norm.weight": "61e8003b5329462ffe0fe172f2b160260de006aed858332d49d75504b6b6aa7a",
"blk.29.attn_output.weight": "ca44542a72a37476dc73dbdcc01f5b7497cb3ebc4ea230a55c9634ccd8e56ad4",
"blk.29.attn_qkv.weight": "abb3d9d6abe57872ae3daa51935d43264093ded5ce63b49d1e280ee5758be0e4",
"blk.29.ffn_down.weight": "6764b895fce881df097489c263446f0106de36217997660c15984b3ee22a5a06",
"blk.29.ffn_norm.weight": "89e03e9a33fc0e6e31ba9f0c2bd7c5734a118c5602bb90148793e08a80e8d0ae",
"blk.29.ffn_up.weight": "fa7ad57a84954f4121653152efed1a871d8adb20a1ea9086e3e849ce359d7d2e",
"blk.30.attn_norm.weight": "91a697aca1e42af54f806a20211031c3369e8d0bd58df1b0147fe24954e1f5a4",
"blk.30.attn_output.weight": "36063fcf766c89ac75be56f688cc63cefe5f2c733fbf4378ea9956ad386fa148",
"blk.30.attn_qkv.weight": "2cacd1161f1121a2c0b979930134f4666f73fb8d7237b3b0659ae091b15955a6",
"blk.30.ffn_down.weight": "9f3fcb6217100595850c05dc98f9ab2a263afdb6ab28df2fcb08aeff512057d7",
"blk.30.ffn_norm.weight": "6c600bc1fc7de39d4f8917b81fc7d1d5ed2a9b56492234c13a4bd6028c30d880",
"blk.30.ffn_up.weight": "73cabd1bb011956b2689ea3338bb76642ef3a57c197377d666d2ab5f56317668",
"blk.31.attn_norm.weight": "72d3e1cc771380645fa75a899858c95f39857a4f3f1ed60fe1578df383b8bc53",
"blk.31.attn_output.weight": "40089cdd29994dc19a1d89fa15902a89cfeca3540f12dc9bf4d00ef82506e456",
"blk.31.attn_qkv.weight": "1d0bb40e9258071ae14290a53c619a8e331dda07354d2a02ef45766c029ae5e4",
"blk.31.ffn_down.weight": "8defa0e06335b793fa8be03883f0a322d6c5b33f52c69c943c35c60d16e42c0a",
"blk.31.ffn_norm.weight": "33c55d9d0c496ccfb130361fe131649346e098abaaac39c0519507e5d846721d",
"blk.31.ffn_up.weight": "599f6503f61c692c1f82001973d35119f9688db5e6be9d9c298411491c93f09b",
"output.weight": "14b8dc662bfa3308ebb2e102c562d8e52c15670e538f20f3216a9c310ca9dd41",
"output_norm.weight": "7f2294ba94ce65681df6c7ddd8698799199b9d77dc83c10bdad5c3999f0fdb82",
"rope_factors_long.weight": "e34d378664e354652c38f47d10dafb0498ccc2fb042d39ff7fef768146fff22b",
"rope_factors_short.weight": "9379146a4988f373d362fe47b06c75e7fe7c54aa4dc9558758df79b7a87471fd",
"token_embd.weight": "19a03c1fb5ac0baee93b0a7d8b0f26e9a9b011e229b694afc50ebfc13d84f8bf"
}

View File

@@ -1,124 +0,0 @@
{
"general.architecture": "bert",
"general.file_type": "1",
"general.quantization_version": "2",
"bert.attention.causal": "false",
"bert.attention.head_count": "12",
"bert.attention.layer_norm_epsilon": "1e-12",
"bert.block_count": "6",
"bert.context_length": "512",
"bert.embedding_length": "384",
"bert.feed_forward_length": "1536",
"bert.pooling_type": "1",
"tokenizer.ggml.model": "bert",
"tokenizer.ggml.padding_token_id": "0",
"tokenizer.ggml.unknown_token_id": "100",
"tokenizer.ggml.cls_token_id": "101",
"tokenizer.ggml.seperator_token_id": "102",
"tokenizer.ggml.mask_token_id": "103",
"tokenizer.ggml.token_type_count": "2",
"tokenizer.ggml.scores": "6db964fe67338aca57790481a390121ff3dd643eebe49f7dd308029ad99abb6f",
"tokenizer.ggml.token_type": "98d247c5404b6b18f05f133b92dd56edf6efefefac326794b00d7b351f6c5aa1",
"tokenizer.ggml.tokens": "9efe405e229a45ff9916f54c475d151d2200cd2ab0006f347abfb069cf096c86",
"token_embd.weight": "8c1ee80a9ea4f65aa385ba30112010068af3d209bebc6e149d3d4589c2cd0a5a",
"position_embd.weight": "6c516f0b1c4e2388ab90394dd80ad69e4e4509b890982fc3408108ae66210eb6",
"token_types.weight": "f879f8e422ed211948f28b560d3c5e17aae7993f063b51196a28cf5c0fb3da21",
"token_embd_norm.weight": "75076e095d717aab96f8b6beeee503c27940d9a76f2b891a0e3de72f8a6043e4",
"token_embd_norm.bias": "298735285ffe944e1bf03e5d35c7280326b85cf121bde9874f1af5dc51ab939d",
"blk.0.attn_q.weight": "ab0923ce4c1549175112dcdfcc860fe30137f991e03ea6857fb5993670adaf6c",
"blk.0.attn_q.bias": "a3ec29551dabf976e1d34256b8ab5ab7b758f3ed9742c3cafdbd984d5441df62",
"blk.0.attn_k.weight": "4c1038a6d035c3e9ffed7fa672b614627814752503755fbad0cfb76a41ad71ba",
"blk.0.attn_k.bias": "e0363930eb588d91816aa3d230bb03b6e2551c165117b80b8d60397413819ef9",
"blk.0.attn_v.weight": "425e2e53e3f00ce98d29c3e6a161eb55d3e6ae0d96fdb9f6242d1c4fd6eef4b3",
"blk.0.attn_v.bias": "6579173a1e65ee124fbd0bd53cbdca4225515b4f2c5f18fb1bfd000f5978f9bb",
"blk.0.attn_output.weight": "a6d70a08cd7164de5d12af65d86d657c3db35aaecde778b2b3fda9193c4c9802",
"blk.0.attn_output.bias": "2b8d12c4f9a9c5bfaa29c597839568f6e0525cb41eeaf64ddeb6bd84dfeb9701",
"blk.0.attn_output_norm.weight": "bbe6e502a473228b525aeed26cc31b7db123ad63bdc5a6eebac6ea70b8b51d62",
"blk.0.attn_output_norm.bias": "36eaacaf0007c5c62daea97aab0115390c0682914f78482e37eb76885f4b7a50",
"blk.0.ffn_up.weight": "24654561c76ce387d125759ba843f06b904ef721fcceaeff6ccc62180a48e874",
"blk.0.ffn_up.bias": "fd3f0126aa1d95768fa60eb6f4ab8a2763cfcb7e5405f35b92353031d86f4d34",
"blk.0.ffn_down.weight": "97a829763a6a5bf3329ceb4d39c424ba4787d61653a5b0bbd1f84782e4d4e0ca",
"blk.0.ffn_down.bias": "7aa980c30ae8b4ee7f69df28808dbf5c431f56ccc4a80340f644a0419f16c054",
"blk.0.layer_output_norm.weight": "ef30dad4c2a083ae1ff5039a2a6cda60ecc89bf1e486a6f8c0d15f50589603f8",
"blk.0.layer_output_norm.bias": "8b1b77e67568b1bce43fc476de1b177c53ff688d66beb66995e8eb3dc290da8a",
"blk.1.attn_q.weight": "284331622a1f6f9b87ccee4f652bd66a394ca493c4d93be4d1844e4f6159ad10",
"blk.1.attn_q.bias": "e24ebd4860330e08f6bfdd077a82db0bee33f4c8846cf1db26327a34754c7069",
"blk.1.attn_k.weight": "729dd0d555544b5bd0f7580b3c8b384256b974605f0e7487b95f295aa032997d",
"blk.1.attn_k.bias": "2aa51a828a858f35473f54477583fea54ce2ccc34ea60fbd1d228fbe9bca827f",
"blk.1.attn_v.weight": "6be304671cc311d5ca5c103f2b51467ee800c589bc5b8101e09ff5aed1f68c21",
"blk.1.attn_v.bias": "43bcbab78a8819e07f723bc9e5b737b71e87a7594f15234e882b63e327a64199",
"blk.1.attn_output.weight": "15ec8a1a12b26c9976445308a09f748ab0e4bef0f583d13ab08c3129f8738d73",
"blk.1.attn_output.bias": "dac2146f4baa6ed16f6c0dc7443831fb7ec79bedcceafd80d1a4b628a1bb072d",
"blk.1.attn_output_norm.weight": "d2151eb33bffac536787a4c9a5d2b31c7a80b17c4611877842a3cce2cd6e98d8",
"blk.1.attn_output_norm.bias": "31e1b779716dafb855d2cf5631ee168a0ccf372eb9c6ea6091f66fa97a9b9d2d",
"blk.1.ffn_up.weight": "a57547fc3fc3b77406f5cdcb0c87af9bc184701f175c39c1f35297826fce3cc7",
"blk.1.ffn_up.bias": "123be6d541d086202913c75d878c54d59a749f3af7b58f7ef9eb9e7c62a24c9a",
"blk.1.ffn_down.weight": "cfdb79788377e5cbded8790cd41b9e66c397ecab75474071fcd7cf32d30f9613",
"blk.1.ffn_down.bias": "bcb58315519a573097960891c9ae41cf4c685ab78c3e0e77471471758a7eae88",
"blk.1.layer_output_norm.weight": "819b554271452bfb1d84c2603b90377b2e41a0ac1e3aa8b417ccf9dce63375bd",
"blk.1.layer_output_norm.bias": "47a3433ac27f5ce8947fb38dd491f3706df4ef6adb0ddf74612bf0f54b19e164",
"blk.2.attn_q.weight": "1557a9ea852b1880551f7290e00aded4f35e6c4180fdcbed1b0039bf805f639e",
"blk.2.attn_q.bias": "c3bfe5f3066f655fd36b055530997b59ff33ef013563aaeb3cb8ff07dabd59a9",
"blk.2.attn_k.weight": "cfd08eb69c61ae2f9f14f9b7ff5c5394ca264b1a9f3d48156677f90dd1766289",
"blk.2.attn_k.bias": "9b839bc0e79974a0b3f5d1895972bc6f5c9a1bc16052e1af786e6a530758152d",
"blk.2.attn_v.weight": "02b26b1208480eaeeb00e7b4cf8b690006ca14759357fc44ed4a2a8924ead993",
"blk.2.attn_v.bias": "e7e6f0089fded1659a867ab736c220d9653ea7da6b1b94baf5c8d30a748b63ab",
"blk.2.attn_output.weight": "a1db121c7d33806b349cadd050300a57db49fdc91224fd07c9ac43bf4299dc79",
"blk.2.attn_output.bias": "7675128b6a92555cd955c820311e91e9417d31f48848f45d047b4100c62148b3",
"blk.2.attn_output_norm.weight": "5b4595e0fbcba67a700c4331adf746d2fba3546364a4db5607ae241947bb1a21",
"blk.2.attn_output_norm.bias": "7b8e16826ea30e5a2ba0b02e0095a901775981a296e98819625320e983060d08",
"blk.2.ffn_up.weight": "a0d815d946ac07a65095c4ae4df77b818845e6d97795c7d82f55e689d944db59",
"blk.2.ffn_up.bias": "ce37c0a4174d6bf773ded7bd016ede627ad3bdb8bc99b9992a18dc8e8898f252",
"blk.2.ffn_down.weight": "f6231d2a25426fbd45b9f1160aa484220eb227ceef0348c4a6a6de890606e5ef",
"blk.2.ffn_down.bias": "429e00556e8dc63a785238b309b9d83738500c1ef6d736fe6526ad88ea496d27",
"blk.2.layer_output_norm.weight": "651457a573adf3f7dd9ee5dfe1c8e89389e94443993aab77ec6a0b05aa621e35",
"blk.2.layer_output_norm.bias": "41fbbeda7fd89b0cef5f945ae44011c316982390401d6f75ba8c6d365e185247",
"blk.3.attn_q.weight": "95a43f32949d2cb8d22815bb27a44abfc6665ba96221af817dfe058cb6ca72c6",
"blk.3.attn_q.bias": "f4e34385e75d8108b6b3bd336106e2133a8c9be0cc343dfe5dc48c32a823c7cb",
"blk.3.attn_k.weight": "6b892da6a17d4d3265265a15f695864a31813ee8c8e710ae9bc9e1adbc6c9a18",
"blk.3.attn_k.bias": "40b8067b641a56014cee42548240aa8930820958b1933004892b5f04fbaef39e",
"blk.3.attn_v.weight": "9fcd5922319dd2a461082a5ce040c1dfe65d87d70ca6547dd0b46eeecc3eeb2b",
"blk.3.attn_v.bias": "b528c56212e66931fdbe267ac327a9c2f87cd03baff3ea719e30afe681da15f1",
"blk.3.attn_output.weight": "e3b178c1b03981e75510e0d277af23ea59cc404b5394e61bd32291825719b502",
"blk.3.attn_output.bias": "712c84d39a6a5a9c06a09da8fd9939ba0d5525524a4bba61ea4de09b48f45cae",
"blk.3.attn_output_norm.weight": "d1ffac88e675592ff72f8a617be32b4a381d443b2f8f2645dbe44a1e5745aac0",
"blk.3.attn_output_norm.bias": "ea31a1c73146234c50e0e43f485c458413714867b8e2703af66482f7db2d6c40",
"blk.3.ffn_up.weight": "4ef4f3b9a1ea6ab2ef2eb6e8b008e06a44790d099d97482a05a51e39a29afac0",
"blk.3.ffn_up.bias": "06a4296dda16f452675c51f108079fe7722552d6521c737d97734943818b9a2b",
"blk.3.ffn_down.weight": "f114b2bebe392c7d80433bb880c6730293aa4561b0b0370dcdaf7472daebd847",
"blk.3.ffn_down.bias": "2c8e67831d28a3bf613fc7912ae3259b63d72abcaf4d30efd8800758400158de",
"blk.3.layer_output_norm.weight": "a1dfeb7b5a51dd56447312ca41e2ad2f361a3ea12ddc355127f5f4219fb0a482",
"blk.3.layer_output_norm.bias": "1ed630021b25c6c6fc93fd32988b9907df966d4982a93081f639aac3044618ab",
"blk.4.attn_q.weight": "b5fae4c1f9a5f33a2a2e816ac0c01c25f422e4efdd59ef1ed93da2610e5370fc",
"blk.4.attn_q.bias": "c2e376524ea98ac3b10d9eee19ecb1b1e261fa5149efe0232844c923dfb428fb",
"blk.4.attn_k.weight": "a4632f5ebf9321d9d08f9112a4e5dda2efe5671df4a4e67fee24845f5b14af16",
"blk.4.attn_k.bias": "a9a02ffb8b8b4f6dfe487a7e0341f1d5318c9d2b793a688f34cb1b22fc66ef60",
"blk.4.attn_v.weight": "10ad8deb81d9fa093b1e5c0f24ea82aa7df43e6aca49e260fcbea56eab8cc86a",
"blk.4.attn_v.bias": "7326813e181e021130bd33ac136293fcffccce2d1d8cb59041e5b13a8cceacf6",
"blk.4.attn_output.weight": "c92573088c7437c2b3cda51490e152c27fb19e5468df591eabba5a49d5398d44",
"blk.4.attn_output.bias": "14e10b419e5859af1eb685af5c330aee67048cd704dcead9217840c6f5393222",
"blk.4.attn_output_norm.weight": "02b6831c0e0fb0edbc579a92812a1dd972cb15d14fcd382d4427c5a7b300ac44",
"blk.4.attn_output_norm.bias": "7eed5cd503bb6bb6ceb1bc8b07cc077903a4f14fb8b9d6cdf39644815ecf1374",
"blk.4.ffn_up.weight": "8d0c91d62e74d6431321116a37cf3339e630bd50ba164d3304fc4fe8dd831223",
"blk.4.ffn_up.bias": "d325f07f73c005a273c484c7be8e7abb4d6e8a5c4fd093f5869133b97629d017",
"blk.4.ffn_down.weight": "7ba7bd81143f40537b84f938e403e19f30e4928625eb371de052b9025beb4d21",
"blk.4.ffn_down.bias": "2853d9c2a75288214a4bf4907dc19d04d01926f4913d302b1aa7bdbfcce0f7a1",
"blk.4.layer_output_norm.weight": "a4ed1885fa77b90fed5300c355ef0aa0c876a8c747151d9d790939d464d57d4f",
"blk.4.layer_output_norm.bias": "62142a81e813a9e636333b2b805d6bc3b17c5e7cd4b15adce1ada6bc9a32563c",
"blk.5.attn_q.weight": "afc1dff080a72c3daad01384b1448d476aaf789871017c8ff8e144788887995d",
"blk.5.attn_q.bias": "748a820371c1d4f872c84545b36358d239c35bf6c99e2812c237d88c3292763b",
"blk.5.attn_k.weight": "59e30c1ed8acd2cbb01de5f62e7804015b9ecf98ba157d98cab016344639eda5",
"blk.5.attn_k.bias": "f839520078f9e589496e982e86d0126c7aa14196047339abffcf49a696229f77",
"blk.5.attn_v.weight": "3e21fb874e21b90308e1f46af034a3c32d3eba1628d62ae5f2246d6af5818923",
"blk.5.attn_v.bias": "5cd4852bf95c1444d10d756750f6bf49f842c0b39e9953c7f408bb67c325ac8c",
"blk.5.attn_output.weight": "636ce6a7752895f204b9d01ba0aedd9a294f908b42f372c22a16d9dd590d7471",
"blk.5.attn_output.bias": "82d924d4b0d2b94f2bbff91619216d6967a3541ce9b1531a6a60457a67b5d219",
"blk.5.attn_output_norm.weight": "5e7bd0a8d3396080f3360d7c4700bf094a06216431bd014c4479eef72ecf4271",
"blk.5.attn_output_norm.bias": "66c6de5edda5466d029c6753780be81ccd4218bf8bc00680000e0f06856ab712",
"blk.5.ffn_up.weight": "5bbf6e7ea380e216e33f8bee06d25f2265359d3876a300e92bc6e41d48e33430",
"blk.5.ffn_up.bias": "9d795388bb36fb33ad3a37fea3ccb4937838e02800a608fb47d363cd06b47370",
"blk.5.ffn_down.weight": "2fd628974e7f075479dd227b46fbd48ae8d3ca34d735b36f391ac06410730368",
"blk.5.ffn_down.bias": "cd213ba9eaa75fa541648097fbe9c96e58077e6c3ad6ad2fb1f21f8350f44291",
"blk.5.layer_output_norm.weight": "159a9df41d15b7022d136f86a2a2631c4635f9816e957472217077b522bcf52a",
"blk.5.layer_output_norm.bias": "24c1f27ffd1eb4e5be7e3a2909943e6f0980635d761fa1efdd0c19645da23766"
}

View File

@@ -1,6 +0,0 @@
{
"general.architecture": "gemma2",
"gemma2.attention.sliding_window": "4096",
"gemma2.attn_logit_softcapping": "50",
"gemma2.final_logit_softcapping": "30"
}

View File

@@ -1,188 +0,0 @@
{
"general.architecture": "gemma",
"general.file_type": "1",
"general.quantization_version": "2",
"gemma.block_count": "18",
"gemma.context_length": "8192",
"gemma.embedding_length": "2048",
"gemma.feed_forward_length": "16384",
"gemma.attention.head_count": "8",
"gemma.attention.head_count_kv": "1",
"gemma.attention.key_length": "256",
"gemma.attention.value_length": "256",
"gemma.attention.layer_norm_rms_epsilon": "1e-06",
"tokenizer.ggml.model": "llama",
"tokenizer.ggml.add_bos_token": "true",
"tokenizer.ggml.add_eos_token": "false",
"tokenizer.ggml.bos_token_id": "2",
"tokenizer.ggml.eos_token_id": "1",
"tokenizer.ggml.padding_token_id": "0",
"tokenizer.ggml.unknown_token_id": "3",
"tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8",
"tokenizer.ggml.token_type": "485e40bf3d715a4764818fc097d6a2a41db872d82ee714bc500872a3437ff48d",
"tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda",
"token_embd.weight": "17b87ab2c01c80657855a5413d0457b4a041afaeda0cc785080e44e2f04acf07",
"blk.0.attn_k.weight": "28ac0da05754ad2714ae95da28a5ad191192140b30b8fd22d108d4700c9d989f",
"blk.0.attn_norm.weight": "3f9d5675d1ab0eb8a816719dac9fab81f2e95c52be02c34263339acbc087febb",
"blk.0.attn_output.weight": "703295c2c63990ff896778685c678f145298886f680f3ed5dc2a7ad54c293265",
"blk.0.attn_q.weight": "69c2d0e4870e9d722a190d356203c9605575a16863466c3d1747966ef1cf5791",
"blk.0.attn_v.weight": "95219c9c07b5ffe9a9a01e456d845eef2b11f4fc12c93dbbba479db395444c13",
"blk.0.ffn_down.weight": "a2feb5eb3d572c57c5bafbf0ab506862df1160fe40965dcfe4b9fd855c08bed7",
"blk.0.ffn_gate.weight": "fcca072c445c31f4dc4d5dfaa785b1bdf7271342442099b74fd17268b5829fbf",
"blk.0.ffn_norm.weight": "7621f95dbd245cade6fffd6b08797d69d8e3954e960f0b5551b90d967ab95448",
"blk.0.ffn_up.weight": "14a9bcdd451403c67136391e1b6e53b3b1830f00199bd911dbcc56d8749c14f4",
"blk.1.attn_k.weight": "c70f73c5df20579cb44d971164b48b5f0d8d5abdb38b381e7a8b880ba12aa406",
"blk.1.attn_norm.weight": "88b6b91f93a1ef83425a7c7dc2a2fbd3b22704a04c64a80061df376ac8c33626",
"blk.1.attn_output.weight": "f031a537490c452be3b3bb51e6b7949a636405756e160976a1c070a792ea00ee",
"blk.1.attn_q.weight": "bdb23214b1cf9cfd30f863a0a5868e52c6809d93b7e8f44df096a94204d9896a",
"blk.1.attn_v.weight": "e9bbc0b05f2c872fb1403f8f938cd1612b502229ee401f12593b1164c61acc00",
"blk.1.ffn_down.weight": "5ff53811038b661a7b8f2bfdf213bebfb185ec1a6060b662f063714f33584d79",
"blk.1.ffn_gate.weight": "205085c8c951a5c7543b1495183cd96028fb49f67464b3e9862a2693a6077a33",
"blk.1.ffn_norm.weight": "798f354fc85afce9625f5d10093a585a966831698a0560e6c9b97ce659eb4b22",
"blk.1.ffn_up.weight": "db92dc5684cb6e90940e13f4d1da555ed20ba4f8cab1e990ddfd7553e2e91315",
"blk.2.attn_k.weight": "ef5ce360c4eed6d00d03ca4761e0f8e4b0af4509978468314be14f3d46621044",
"blk.2.attn_norm.weight": "6dadbc05dbd0d3fabb4216affa60a3de1378a82d2859dc90b338cbe70f50d455",
"blk.2.attn_output.weight": "6bbf87a966f691bbfd7c8d25629aa4e6710107bd431a667434861febb391edc5",
"blk.2.attn_q.weight": "4e575c09ae2de417ce9057ce8b073680e860a24aae13a472b68f101b760752e5",
"blk.2.attn_v.weight": "cd33f7f01141e9439afdaf2ea1aaced9feaa335e32a58daa136ebd555d4d96f4",
"blk.2.ffn_down.weight": "b970ff1b0b6494165defe2fbfa1d31425766ed71e64de9ec4e66ac3955c8bc5f",
"blk.2.ffn_gate.weight": "dbb3e1360402e0e369b101995bb686b73f95d4a7673f061be85d64d15dfb0061",
"blk.2.ffn_norm.weight": "bfb7980105d8ac9647710454f57a5cdac50598a0f6f4884e16f1d94b00844687",
"blk.2.ffn_up.weight": "50ef89339b275a438b664686f6227dd9b6e43853ed6856ec9e33ef4bbd90bda1",
"blk.3.attn_k.weight": "be942ea98151434eebcd2c1da4b00e0146152fe524a530689b1fd491cb833d21",
"blk.3.attn_norm.weight": "0df2f218daf609c289fb7c60c5f375fa99c0d4e04381ad5a494a19144edd8e20",
"blk.3.attn_output.weight": "c2184aaf86aa2cb8f47be49f60b165834e97205f39c6ee1dfd19fd4411a156ce",
"blk.3.attn_q.weight": "4f86e2a0a4221c1c84ff9c409ac89893cb95d7208cf65bf1e98e24e01125f991",
"blk.3.attn_v.weight": "abfdb8a60c349dadde641d1afc9542025e24fbf41a3238bfa9675e0b1f1e4b68",
"blk.3.ffn_down.weight": "58821a8d87008d47d122427911c6fad5272aca70c448bbae223256a74bacd07e",
"blk.3.ffn_gate.weight": "776e051f1a0ddd5c4934e69186683a75ca9a3c8c0f61911bba321fed1dd287d2",
"blk.3.ffn_norm.weight": "7f380f29335e28be90bfcfae6f6d69fdf5751211b36d2dd62aa5541ed113e4f2",
"blk.3.ffn_up.weight": "fc5ae8d488894cbd4951059675468d227da27871d26e925c9941863841c097ee",
"blk.4.attn_k.weight": "14833b078cc4c5137bdd5fdc0538047974ca147a99b0282e1b144440c78bc1db",
"blk.4.attn_norm.weight": "0a69957d4a15599fb80ad4753558020804925221457d9a5052926754d3768065",
"blk.4.attn_output.weight": "887a49b6130fb6297cf10767207c3dd97191b2cf63723449af9c27bca8dbeda0",
"blk.4.attn_q.weight": "51fd577b76764824dd6f0d4891c137ebe4736f591b5ca2793c5fff2be49abbde",
"blk.4.attn_v.weight": "1a623c43cf9c509d1b7ea0d1a5c04d0af4809665f9f9e93b7d6dba8c5df178fa",
"blk.4.ffn_down.weight": "5d61e8856d8941d2b1fd138116d015f63840d0fa1e31e20e20a5ceca1536ceec",
"blk.4.ffn_gate.weight": "06640f7273764f8ca5df7e386547417916b6cd7d565a8343153113239a94b0a1",
"blk.4.ffn_norm.weight": "91a6c6c41b894228e361435ecbc5058dca34d4911a23da5b56de219299c964d3",
"blk.4.ffn_up.weight": "d016dac1055e36d6a10b6317e57f98a904709ea892ef3194342f4d2f6326561e",
"blk.5.attn_k.weight": "987146afe124131500808cc0da33c06d207433656d41df6e6d8c99118a83bac5",
"blk.5.attn_norm.weight": "6b354938966f2608a2fb8d0f5b363ed0d8b0967c2ec8d0abd5c625b413042ded",
"blk.5.attn_output.weight": "cdcbfe02c6ff79d5326882b017a02099f5af71beedf6b1b3eb4de01e3a844536",
"blk.5.attn_q.weight": "b910d0cff781d3efb42eab0a302f46f286b2de717079175680d5b42bf8c309c8",
"blk.5.attn_v.weight": "66d3a279f747412f9f4b0e8abad44540c122ab2e811a7ee74c1f33bc36caade9",
"blk.5.ffn_down.weight": "c9b0efd2212981f16d956d8571f054b68780ad01f4917033647e359b557a4653",
"blk.5.ffn_gate.weight": "fe96b94109ca141c01f6a04788e20783019ca6ec334aa1f3134810bdb499e557",
"blk.5.ffn_norm.weight": "aa7b016e832e7055a36c6e20de58ea1936f995f390401fff1c5fc65906064e49",
"blk.5.ffn_up.weight": "555ce27c4873d3375394f38ad3b45e3d8848f9d5642dc1602383d0f0a33c2a14",
"blk.6.attn_k.weight": "88280d461db324c4f36475ce396793063e61a27283ec64511b0480890fb5b3b4",
"blk.6.attn_norm.weight": "af8f460c411f660d33196286d208f1845fd5a2b45f7b56549a4df31e7515447a",
"blk.6.attn_output.weight": "dd9996fb0a256e8375ad3917705258a33fce006bcea0f536caae420a77974d8b",
"blk.6.attn_q.weight": "7a4841541191e037cfb9b07930c4d8cab451809658b182f0ada6ccde9615c003",
"blk.6.attn_v.weight": "ae81e6a592b64d701a9d40233e986039a56cba8d8d24f61aea93c6393cf3078a",
"blk.6.ffn_down.weight": "622dd1ce1706355cbc659a8ab2c4509678ffe0f3ad34258e5e25ed2a5d951bcd",
"blk.6.ffn_gate.weight": "8389a735c0bd5591010f8ced9805a2a12c749f6df0d3c18ad4d05c2a302e7168",
"blk.6.ffn_norm.weight": "621f5346400382474d61358397bd58fb1459b07c53e376e4bca15e08b3f9b3fb",
"blk.6.ffn_up.weight": "8d834e4c42f13c251dfee36cf89e12f1bd400680d00d5c2e6cac0459e9ce2f7f",
"blk.7.attn_k.weight": "8bd0412de65a3e64901ef8fe6a28c95e116bf39dc9aa22f0126b9d36688e5ea7",
"blk.7.attn_norm.weight": "056d8e56be4e87d6dc6f900762f0dc6fde07bfdc50dd85bfc510415e2bba3f3d",
"blk.7.attn_output.weight": "27972eda51da53d416ff95aed78149a2c5a287b47d2cd46f2f544ca692ecb3bb",
"blk.7.attn_q.weight": "41eca977b9371f7932800c11a9c45b931310196919e2a0651b847703b180fc7f",
"blk.7.attn_v.weight": "13c74fd7e07f08883a09fb070a1fe5bbdd2341b4cb8d1cac07c4b637049b5774",
"blk.7.ffn_down.weight": "9e75db42468800849a9a7da603d0072c5e86c8ed2b4d8b20a312a51fb86a7a10",
"blk.7.ffn_gate.weight": "db6bdc3117f910088aaf7db51f2da63ea5bd933de36af5599c215bfb26f7db2b",
"blk.7.ffn_norm.weight": "48bb82b49bfc8679a1e77f282ee182d952db7a3c11be7ef9a102ee2ddd8011e2",
"blk.7.ffn_up.weight": "feebea87175817a0f3585ec0af09dc873d94c203581ae97a712eb356d3b49efe",
"blk.8.attn_k.weight": "d5640ad71b6af68d88e17bf8e7fc26c907d2262605457a84247dd9afc2884d69",
"blk.8.attn_norm.weight": "75b850c481a69083ae09d0207ba7317b37c735a39fcf5fef5400e6c84fb1257f",
"blk.8.attn_output.weight": "cbd669dbdea2bdd90f9f0cc97566b3dffff3c56cecb4f47290ceef30da83b2d6",
"blk.8.attn_q.weight": "9edcb63087a431bac361822497e6ecdaa06d9ea4a1a754e36da7ba9f8db81c7c",
"blk.8.attn_v.weight": "3fb72c2c4f95a83626aa3e30062f9450b09ab37c7871e229f18bbc5cf744633c",
"blk.8.ffn_down.weight": "bd69d2c9172974fff154441b237b4787fb53b2d185325442d5048130ef5bc4ef",
"blk.8.ffn_gate.weight": "d04689c80553edd011d1cbaa5d570fffa7fa91e88b66cf1352d89ab60b72f908",
"blk.8.ffn_norm.weight": "e49984183b735b7f2c4e4730c289eed9394056d2e283a00fd83ea0915df31a73",
"blk.8.ffn_up.weight": "8fe62a1ce8e847e567add6c6f6bf2922bc467495b5eb4c116b3cb85b85b3b211",
"blk.9.attn_k.weight": "d90904959e5004cf0d6e729c6bff18cc33c094798b802473c1ec55ab8d276183",
"blk.9.attn_norm.weight": "79277f290cc07411115d8fa138045edf4a17b3416ab2145409cbe8ab829fd4ee",
"blk.9.attn_output.weight": "5a21bf2e1f09a81405025f96d4153ffb630158e17269cff8ffff935c38ceb1a7",
"blk.9.attn_q.weight": "51b1d0febc3b350945be4504f55afa4347517bde0f710e1a4b88e6b17e71e7c7",
"blk.9.attn_v.weight": "aab7e1db0a8b50a03036356791ffce736ab010d15674c96eaef8049d80076054",
"blk.9.ffn_down.weight": "cbf43ec84becb40c9359a181ab0e641fd7faae7d34b549501f7cfb7afdc3d764",
"blk.9.ffn_gate.weight": "dce0e8661c778327bed7f03b6790d26710764188aed9dc746e6e05863891fa57",
"blk.9.ffn_norm.weight": "6d41642104f995c77bf31122b13237caebda3e7fcccb1367ce91db36b015e923",
"blk.9.ffn_up.weight": "82fe4c67bf24e7b2d6f6e05f7b1234c2bf90c3932951091a9066211b8e15ecbb",
"blk.10.attn_k.weight": "f6a9ed8fd8d3229b5d03175c413ffc56a07f2ce7236271986361dd3d8993f9aa",
"blk.10.attn_norm.weight": "cebbef89f0326ca8e02df3867a571e4d61c20c2a12f295f98ae590d62bc86010",
"blk.10.attn_output.weight": "34f5efb86accb4f06347d83a32558ea8eab3039d128969161a741ebacbb656ff",
"blk.10.attn_q.weight": "1e0efe27df2d5d50f7157253ba2cfd436d6781c3dc78ca176d0c16a210b5b763",
"blk.10.attn_v.weight": "8f085bf50a2b0f83cd6cdda3c8ef5a9e204a36348ed95871aac725d1f68640cf",
"blk.10.ffn_down.weight": "bf3b3cb4cace435809ac7b4cc933f20853af12f1f272d3dcefe7f19c0f203b8b",
"blk.10.ffn_gate.weight": "d3df7a1413b1c5adf1a1dcda9e5225a15c89874bae53bb6137ad1ea42fca2d34",
"blk.10.ffn_norm.weight": "a1da603b0480471b5ed8e862148cecd5fed918f8304d6933ab0bdb25b8d2fb8f",
"blk.10.ffn_up.weight": "bffbba605922e972dc47dda88a0b4659aa52236c76e5fe861a949e6d9a367492",
"blk.11.attn_k.weight": "9f31c63d66cd32c29b1eb8bb829d0c8525ce2ae936e0eefdaab6335a2d12a3df",
"blk.11.attn_norm.weight": "0bde1a266d8b2e8f202bb7e2e88b19147ca83021901f6d3cae77a4df5548c754",
"blk.11.attn_output.weight": "e10725c7cf746ed4a7e472cf7aea6cb564e5db6a1d5197adc980d650a387ccea",
"blk.11.attn_q.weight": "05ee758a7d065802630f8c65dca424364c1c8825e389aa33f9405c45e8a50cce",
"blk.11.attn_v.weight": "0c3ae7090f11775d24c51120db6e305db6aff706493e7ee123dcab74485ba789",
"blk.11.ffn_down.weight": "7ba40b8e12c09c5fb2006b77a771cb01ce894e88a3b3e1877f927a5b89c91709",
"blk.11.ffn_gate.weight": "db76388a023b98097972d354ba1c6a5e26efdeb1c596b9c28bf2cd8f6596975e",
"blk.11.ffn_norm.weight": "a38c3ae1b89a68ddc7b72c99c5b28be7fe3787c4fad9904d0c43d64eaf00c474",
"blk.11.ffn_up.weight": "13c8142f9cf1eddc658babf978daf3515c4ccc45f849f3e7e3930aa18a8480a0",
"blk.12.attn_k.weight": "f03241c36ac87cb57429a2ef22186b8d7d0b590a8b173beb01fa13d93772f3b1",
"blk.12.attn_norm.weight": "4568f654e6d65104d586e7c16ba960c83428698ce103022b7e0be15e2884e13b",
"blk.12.attn_output.weight": "04867603f82f91e41306e09b33ecda0104b3ee4834061f2c0bbdc8da33c72509",
"blk.12.attn_q.weight": "70fe04b9a8e08b6100cc8d6b58bf4cbbad15ca1de82d63baca5d352ba6c4cbae",
"blk.12.attn_v.weight": "15cb28db61a86c98687991d7e611bc92a1fcc6007f3432149cfb5fe518a4f65e",
"blk.12.ffn_down.weight": "6d10c790a4e3dc44c2dc36d96251ae97cdf30a4fa04d4c43e31bfbd038e6a7b7",
"blk.12.ffn_gate.weight": "3462a2d8f6b4743b25e24da51b90018ac2858d05ac7e582bcb69063cfdac1104",
"blk.12.ffn_norm.weight": "1f96392c1faa34e34ae5dea55a6a86c5aa4c79758952075d53d28de89dd88456",
"blk.12.ffn_up.weight": "d22eacc612a7411953d948483c5fb201e11722955ee0754da866e7bec578ac6d",
"blk.13.attn_k.weight": "5864977e6b733ea942647d6feed5c76156c48c200649c22e4e11b9e5860e57f3",
"blk.13.attn_norm.weight": "87e053535144723db4145aa5402acc54331b7696752d852bb9fc542ff33f0fb5",
"blk.13.attn_output.weight": "078145f5ad83f8b14f97a869346f7fd1583b24d1e3edadaa95d3da4242973f8f",
"blk.13.attn_q.weight": "3b8caf35504cbc4d1a7dd6e011a95760703b7f71e2218b030b1254f811362dd7",
"blk.13.attn_v.weight": "4fdf8365a603e043e5b40c4a21c84ac167f9be62794178f9d8a608dfe5653bf9",
"blk.13.ffn_down.weight": "a07d3abbfcacf48ba028df2cab895be32cc15022d23389a745286e79c1b1d1fd",
"blk.13.ffn_gate.weight": "1d2ab39666aa2909acc96787432a3ed13b19d25170f74665fadff9b17bbaffb1",
"blk.13.ffn_norm.weight": "4f2e809fda5f3eadf52578ee50e0ba36e53be91e55dce418c12dfe595f5f18e7",
"blk.13.ffn_up.weight": "8783d2720c2c37ca176a5801e0b3ef1f9cc9cf3ef1cd37af423aaf6b2a27e2bd",
"blk.14.attn_k.weight": "ce9428e2b55d43ae0c6690dbd56182f99adc427694ba8236b405cc8ea5035e86",
"blk.14.attn_norm.weight": "6abb35f9db8251d6ae954bda147c6ada2371b0574d11702e828f3c6ac99b7cc0",
"blk.14.attn_output.weight": "fe3880916d0ceb5bff672c88bbefb7060a545be609bf049beb2024b38221836d",
"blk.14.attn_q.weight": "7c8ad81be6f4a350931fd108b5f7c9e366e8c26ef62d1d85ffef5dca8fd893f8",
"blk.14.attn_v.weight": "e4bdedffacbebe38567a0734dfd67db90e911d9a9669fcde9a7c4ad8a0066c52",
"blk.14.ffn_down.weight": "ef6694dff1e05820aac0cd2b22f39ac7788b4967afc9250775575554c66aab2c",
"blk.14.ffn_gate.weight": "db63c4179e2db704bc505e2b4696e055b593e295a1b7c4c586fc793bdd5aab19",
"blk.14.ffn_norm.weight": "2796a62d832a9710148f95d533320492a33e712b2e5218659c548705bd11684d",
"blk.14.ffn_up.weight": "3f78c78d8c2d54df45f799d4ff902316628af296834afe4ceed63d4a324ff03e",
"blk.15.attn_k.weight": "6e810ee3859e07695645ee0c9a5efc7962668984a5f0a9325f47e462743b447c",
"blk.15.attn_norm.weight": "0956b576ae96db0b28cb09f761f801cfd9281432284664f0fe181c8d9c55d1ec",
"blk.15.attn_output.weight": "03a17f7e94208177aace5cc41b7f54670ba57873b7274ff6e23caf58cce110ca",
"blk.15.attn_q.weight": "b8edafe7d2216a6f8b4ae4905a906475490e6ea418f6e1d3cec563dbdc6fab91",
"blk.15.attn_v.weight": "f8ae8cae0f4cfa34a459824eba57350c3c248104ba5607e7d9dc7d7c39aaf4a6",
"blk.15.ffn_down.weight": "8d02eb439da852246d2ca67e9b7b6de0b090b80744355e64728a23e41926505b",
"blk.15.ffn_gate.weight": "ed5bf361c67db8731f186b775826f21c33bdb521111fd2d922539719a770239f",
"blk.15.ffn_norm.weight": "5942ca3c73209ac9a0c8bfd9b4aab7f7be7aee9aa12d9c35833493b44af76767",
"blk.15.ffn_up.weight": "f4bebf4ad99ec5f911327dec347be6c595814885309c7bc5647ce28c7f4d1cf5",
"blk.16.attn_k.weight": "756a534c19364448e0958b8948fe33891c6ccda0fbb4dfa2024e1f532a87804b",
"blk.16.attn_norm.weight": "386b7b9e4e6509f6af9c022d942b6c6c6cc136aeed8751ecb037c74d7c4bfb93",
"blk.16.attn_output.weight": "3ba1a766a25830b84d7c22178203635f9c5624caad290bc5e5d73da5d5e7a2ec",
"blk.16.attn_q.weight": "d39b0c91e1fda7685d50a0f7cc8d18c44b5bdc90a142c7fda0bc329cca1afa74",
"blk.16.attn_v.weight": "98b33fcb0ee3483cff1b06ecb44d7b7ffb4d34c268248e4d73dfdf82b2065b2f",
"blk.16.ffn_down.weight": "14006f5e4acb2f9416271ae562e299359cd2585739c7fc77ccbca54495563948",
"blk.16.ffn_gate.weight": "12f8abae2d301d8f88bedb6af98b1daecc7b0b8d05148594f931f30958d77aca",
"blk.16.ffn_norm.weight": "129a15a046ee96d06de288bd43c80f77a6b0fb3a159c7367154c6e4aaf362672",
"blk.16.ffn_up.weight": "b4a5911a45f3871ef1d4efb7dc7108645a564b70f818eccf45beebef2e844ee9",
"blk.17.attn_k.weight": "5e1bfcff0146ebdde3817b656952892eb671e14e75afc92fa53f84f8eecbec4c",
"blk.17.attn_norm.weight": "60bc988fab7c4b29ee9de599df41a8de00caa94fcd74677da011fac82f60f465",
"blk.17.attn_output.weight": "ba49b40d6a0b5685f749c24b0edbed3adc44dbe13b5d5e5fa1e56169fc746555",
"blk.17.attn_q.weight": "82bb415d24efcd14d03ace03f907bb70db6a204c76a0bdd1892e0fba165db87d",
"blk.17.attn_v.weight": "73dbe54beb91a899884e275ea81ffc5187a20cb7d5b68d5c299b783096999d94",
"blk.17.ffn_down.weight": "7c086166241e0664f8963fd1ca4ed74c737abfb2525ec20f8435821ff50158f3",
"blk.17.ffn_gate.weight": "51a32f78244d42a539f619c5ce661db9e6cf41636280a826d439b5444edcd28c",
"blk.17.ffn_norm.weight": "c4bb247fccd1ecc84875028af63dd20aaf5cbd17eb94a9bc36679c09285dccab",
"blk.17.ffn_up.weight": "b5886182790bc6fbadd63de9bc4ffee416f3b69a66280d197ab8c18edf769abf",
"output_norm.weight": "481f3097d0a20412e35b3a739b1b958487bcd41ff67744baa3c9acbddd2ee4d4"
}

View File

@@ -1,266 +0,0 @@
package convert
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io/fs"
"log/slog"
"os"
"slices"
"golang.org/x/exp/maps"
)
const (
_ int32 = iota
tokenTypeNormal
tokenTypeUnknown
tokenTypeControl
tokenTypeUserDefined
tokenTypeUnused
tokenTypeByte
)
type Tokenizer struct {
*Vocabulary
SpecialVocabulary []*SpecialVocabulary
Merges []string
Pre string
Template string
}
func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error) {
v, err := parseVocabulary(fsys)
if err != nil {
return nil, err
}
t := &Tokenizer{
Vocabulary: v,
Pre: "default",
}
addedTokens := make(map[string]token)
if f, err := fsys.Open("tokenizer.json"); errors.Is(err, os.ErrNotExist) {
} else if err != nil {
return nil, err
} else {
defer f.Close()
var tt tokenizer
if err := json.NewDecoder(f).Decode(&tt); err != nil {
return nil, err
}
for _, t := range tt.AddedTokens {
addedTokens[t.Content] = t
}
t.Merges = tt.Model.Merges
sha256sum := sha256.New()
for _, pt := range tt.PreTokenizer.PreTokenizers {
switch pt.Type {
case "Split":
if pt.Pattern.Regex != "" {
// create a checksum of all Split pretokenizers which should be sufficient
// to identify the pretokenizer
sha256sum.Write([]byte(pt.Pattern.Regex))
}
}
}
switch digest := hex.EncodeToString(sha256sum.Sum(nil)); digest {
case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
t.Pre = "llama-bpe"
case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
t.Pre = "deepseek-llm"
case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
t.Pre = "deepseek-coder"
case "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855":
// noop, empty pretokenizer
default:
slog.Warn("unknown pretokenizer, using default", "digest", digest)
}
}
if f, err := fsys.Open("tokenizer_config.json"); errors.Is(err, os.ErrNotExist) {
} else if err != nil {
return nil, err
} else {
defer f.Close()
var p map[string]json.RawMessage
if err := json.NewDecoder(f).Decode(&p); err != nil {
return nil, err
}
if template, ok := p["chat_template"]; ok {
if err := json.Unmarshal(template, &t.Template); err != nil {
return nil, err
}
}
for _, st := range specialTokenTypes {
sv := SpecialVocabulary{Type: st}
if bts, ok := p[fmt.Sprintf("add_%s_token", st)]; ok {
if err := json.Unmarshal(bts, &sv.AddToken); err != nil {
return nil, err
}
}
if bts, ok := p[fmt.Sprintf("%s_token", st)]; ok {
var content string
if err := json.Unmarshal(bts, &content); err != nil {
var mm map[string]any
if err := json.Unmarshal(bts, &mm); err != nil {
continue
}
content, ok = mm["content"].(string)
if !ok {
continue
}
}
sv.Content = content
}
if id, ok := addedTokens[sv.Content]; ok {
sv.ID = id.ID
t.SpecialVocabulary = append(t.SpecialVocabulary, &sv)
}
}
}
return t, nil
}
type tokenizer struct {
Version string `json:"version"`
AddedTokens []token `json:"added_tokens"`
Model struct {
Type string `json:"type"`
Vocab map[string]int `json:"vocab"`
Merges []string `json:"merges"`
} `json:"model"`
PreTokenizer struct {
PreTokenizers []struct {
Type string `json:"type"`
Pattern struct {
Regex string `json:"Regex"`
} `json:"pattern"`
} `json:"pretokenizers"`
} `json:"pre_tokenizer"`
}
type token struct {
ID int `json:"id"`
Content string `json:"content"`
Special bool `json:"special"`
UserDefined bool
}
type Vocabulary struct {
Model string
Tokens []string
Scores []float32
Types []int32
}
func parseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) {
f, err := fsys.Open("tokenizer.json")
if err != nil {
return nil, err
}
defer f.Close()
var t tokenizer
if err := json.NewDecoder(f).Decode(&t); err != nil {
return nil, err
}
tokens := make(map[int]token, len(t.Model.Vocab))
for k, v := range t.Model.Vocab {
tokens[v] = token{
ID: v,
Content: k,
}
}
for _, token := range t.AddedTokens {
token.UserDefined = true
tokens[token.ID] = token
}
keys := maps.Keys(tokens)
slices.Sort(keys)
v := Vocabulary{Model: "gpt2"}
for _, k := range keys {
token := tokens[k]
v.Tokens = append(v.Tokens, token.Content)
v.Scores = append(v.Scores, float32(token.ID))
switch {
case token.Special:
v.Types = append(v.Types, tokenTypeControl)
case token.UserDefined:
v.Types = append(v.Types, tokenTypeUserDefined)
default:
v.Types = append(v.Types, tokenTypeNormal)
}
}
return &v, nil
}
func parseVocabulary(fsys fs.FS) (*Vocabulary, error) {
patterns := []struct {
Pattern string
Func func(fs.FS) (*Vocabulary, error)
}{
{"tokenizer.model", parseSentencePiece},
{"tokenizer.json", parseVocabularyFromTokenizer},
}
for _, pattern := range patterns {
if _, err := fs.Stat(fsys, pattern.Pattern); errors.Is(err, os.ErrNotExist) {
continue
} else if err != nil {
return nil, err
}
return pattern.Func(fsys)
}
return nil, errors.New("unknown tensor format")
}
type SpecialVocabulary struct {
Type string
ID int
Content string
AddToken bool
}
func (sv SpecialVocabulary) Key() string {
switch t := sv.Type; t {
case "bos", "eos", "cls", "mask":
return t
case "unk":
return "unknown"
case "sep":
//nolint:misspell // this is an upstream typo
return "seperator"
case "pad":
return "padding"
}
panic("unknown special vocabulary type")
}

View File

@@ -1,113 +0,0 @@
package convert
import (
"cmp"
"encoding/json"
"errors"
"fmt"
"io/fs"
"os"
"slices"
"google.golang.org/protobuf/proto"
"github.com/ollama/ollama/convert/sentencepiece"
)
func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
ast, err := parseAdditionalSpecialTokens(fsys)
if err != nil {
return nil, err
}
bts, err := fs.ReadFile(fsys, "tokenizer.model")
if err != nil {
return nil, err
}
var spm sentencepiece.ModelProto
if err := proto.Unmarshal(bts, &spm); err != nil {
return nil, err
}
v := Vocabulary{Model: "llama"}
for _, piece := range spm.GetPieces() {
v.Tokens = append(v.Tokens, piece.GetPiece())
v.Scores = append(v.Scores, piece.GetScore())
switch t := piece.GetType(); t {
case sentencepiece.ModelProto_SentencePiece_UNKNOWN,
sentencepiece.ModelProto_SentencePiece_CONTROL,
sentencepiece.ModelProto_SentencePiece_UNUSED,
sentencepiece.ModelProto_SentencePiece_BYTE:
v.Types = append(v.Types, int32(t))
default:
tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL)
if slices.Contains(ast, piece.GetPiece()) {
tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)
}
v.Types = append(v.Types, tt)
}
}
f, err := fsys.Open("added_tokens.json")
if errors.Is(err, os.ErrNotExist) {
return &v, nil
} else if err != nil {
return nil, err
}
defer f.Close()
var atm map[string]int
if err := json.NewDecoder(f).Decode(&atm); err != nil {
return nil, err
}
type t struct {
id int
content string
}
var ts []t
for content, id := range atm {
ts = append(ts, t{id, content})
}
slices.SortFunc(ts, func(i, j t) int {
return cmp.Compare(i.id, j.id)
})
n := len(v.Tokens)
for i, t := range ts {
if t.id != i+n {
return nil, fmt.Errorf("invalid token id: %d", t.id)
}
v.Tokens = append(v.Tokens, t.content)
v.Scores = append(v.Scores, -1000.0)
v.Types = append(v.Types, tokenTypeUserDefined)
}
return &v, nil
}
func parseAdditionalSpecialTokens(fsys fs.FS) ([]string, error) {
f, err := fsys.Open("special_tokens_map.json")
if errors.Is(err, os.ErrNotExist) {
return nil, nil
} else if err != nil {
return nil, err
}
defer f.Close()
var m struct {
AdditionalSpecialTokens []string `json:"additional_special_tokens"`
}
if err := json.NewDecoder(f).Decode(&m); err != nil {
return nil, err
}
return m.AdditionalSpecialTokens, nil
}

286
convert/torch.go Normal file
View File

@@ -0,0 +1,286 @@
package convert
import (
"encoding/binary"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/nlpodyssey/gopickle/pytorch"
"github.com/nlpodyssey/gopickle/types"
"github.com/x448/float16"
"github.com/ollama/ollama/llm"
)
type torchWriterTo struct {
t *llm.Tensor
params *Params
bo ByteOrder
storage pytorch.StorageInterface
handler func(w io.Writer, r torchWriterTo) error
}
type TorchFormat struct{}
func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
slog.Debug("getting torch tensors")
files, err := filepath.Glob(filepath.Join(dirpath, "pytorch_model-*.bin"))
if err != nil {
slog.Error("didn't find any torch files")
return nil, err
}
var offset uint64
var tensors []llm.Tensor
for _, fn := range files {
m, err := pytorch.Load(fn)
if err != nil {
slog.Error(fmt.Sprintf("error unpickling: %q", err))
return []llm.Tensor{}, err
}
for _, k := range m.(*types.Dict).Keys() {
if strings.HasSuffix(k.(string), "self_attn.rotary_emb.inv_freq") {
continue
}
t, _ := m.(*types.Dict).Get(k)
tshape := t.(*pytorch.Tensor).Size
var size uint64
var kind uint32
switch len(tshape) {
case 0:
continue
case 1:
// convert to float32
kind = 0
size = uint64(tshape[0] * 4)
case 2:
// convert to float16
kind = 1
size = uint64(tshape[0] * tshape[1] * 2)
}
ggufName, err := tf.GetLayerName(k.(string))
if err != nil {
slog.Error("%v", err)
return nil, err
}
slog.Debug(fmt.Sprintf("finding name for '%s' -> '%s'", k.(string), ggufName))
shape := []uint64{0, 0, 0, 0}
for i := range tshape {
shape[i] = uint64(tshape[i])
}
tensor := llm.Tensor{
Name: ggufName,
Kind: kind,
Offset: offset, // calculate the offset
Shape: shape[:],
}
tensor.WriterTo = torchWriterTo{
t: &tensor,
params: params,
bo: params.ByteOrder,
storage: t.(*pytorch.Tensor).Source,
}
tensors = append(tensors, tensor)
offset += size
}
}
return tensors, nil
}
func getAltParams(dirpath string) (*Params, error) {
f, err := os.Open(filepath.Join(dirpath, "params.json"))
if err != nil {
slog.Error("no params.json")
return nil, err
}
defer f.Close()
type TorchParams struct {
HiddenSize int `json:"dim"`
AttentionHeads int `json:"n_heads"`
KeyValHeads int `json:"n_kv_heads"`
HiddenLayers int `json:"n_layers"`
RopeTheta int `json:"rope_theta"`
NormEPS float64 `json:"norm_eps"`
}
var tparams TorchParams
d := json.NewDecoder(f)
err = d.Decode(&tparams)
if err != nil {
return nil, err
}
params := &Params{
HiddenSize: tparams.HiddenSize,
AttentionHeads: tparams.AttentionHeads,
KeyValHeads: tparams.KeyValHeads,
HiddenLayers: tparams.HiddenLayers,
NormEPS: tparams.NormEPS,
}
switch {
case tparams.RopeTheta == 1000000:
// Codellama
params.ContextSize = 16384
case tparams.NormEPS == 1e-06:
// llama2
slog.Debug("Found llama2 - setting context size to 4096")
params.ContextSize = 4096
default:
params.ContextSize = 2048
}
params.ByteOrder = binary.LittleEndian
return params, nil
}
func (m *TorchFormat) GetParams(dirpath string) (*Params, error) {
f, err := os.Open(filepath.Join(dirpath, "config.json"))
if err != nil {
if os.IsNotExist(err) {
// try params.json instead
return getAltParams(dirpath)
} else {
return nil, err
}
}
var params Params
d := json.NewDecoder(f)
err = d.Decode(&params)
if err != nil {
return nil, err
}
params.ByteOrder = binary.LittleEndian
return &params, nil
}
func (m *TorchFormat) GetLayerName(n string) (string, error) {
directMap := map[string]string{
"tok_embeddings.weight": "token_embd.weight",
"output.weight": "output.weight",
"norm.weight": "output_norm.weight",
"rope.freqs": "rope_freqs.weight",
"model.embed_tokens.weight": "token_embd.weight",
"lm_head.weight": "output.weight",
"model.norm.weight": "output_norm.weight",
}
lMap := map[string]string{
"layers.(\\d+).attention_norm.weight": "blk.$1.attn_norm.weight",
"layers.(\\d+).attention_output_norm.weight": "blk.$1.attn_norm.weight",
"layers.(\\d+).feed_forward.w2.weight": "blk.$1.ffn_down.weight",
"layers.(\\d+).feed_forward.w1.weight": "blk.$1.ffn_gate.weight",
"layers.(\\d+).feed_forward.w3.weight": "blk.$1.ffn_up.weight",
"layers.(\\d+).ffn_norm.weight": "blk.$1.ffn_norm.weight",
"layers.(\\d+).attention.wk.weight": "blk.$1.attn_k.weight",
"layers.(\\d+).attention.wo.weight": "blk.$1.attn_output.weight",
"layers.(\\d+).attention.wq.weight": "blk.$1.attn_q.weight",
"layers.(\\d+).attention.wv.weight": "blk.$1.attn_v.weight",
"model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
"model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
"model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
"model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
"model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
"model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
"model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
"model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
}
v, ok := directMap[n]
if ok {
return v, nil
}
// quick hack to rename the layers to gguf format
for k, v := range lMap {
re := regexp.MustCompile(k)
newName := re.ReplaceAllString(n, v)
if newName != n {
return newName, nil
}
}
return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
}
func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) {
// use the handler if one is present
if r.handler != nil {
return 0, r.handler(w, r)
}
switch r.storage.(type) {
case *pytorch.FloatStorage:
slog.Warn(fmt.Sprintf("unexpected storage found for layer '%s'; skipping", r.t.Name))
return 0, nil
case *pytorch.HalfStorage:
switch r.t.Kind {
case 0:
data := r.storage.(*pytorch.HalfStorage).Data
slog.Debug(fmt.Sprintf("%35s F32 (%d)", r.t.Name, len(data)))
if err := binary.Write(w, r.bo, data); err != nil {
return 0, err
}
case 1:
data := r.storage.(*pytorch.HalfStorage).Data
tData := make([]uint16, len(data))
for cnt, v := range data {
tData[cnt] = uint16(float16.Fromfloat32(v))
}
slog.Debug(fmt.Sprintf("%35s F16 (%d)", r.t.Name, len(tData)))
if err := binary.Write(w, r.bo, tData); err != nil {
return 0, err
}
}
}
return 0, nil
}
func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
switch len(params.Architectures) {
case 0:
return nil, fmt.Errorf("No architecture specified to convert")
case 1:
switch params.Architectures[0] {
case "LlamaForCausalLM":
return &LlamaModel{
ModelData{
Name: name,
Path: dirPath,
Params: params,
Format: m,
},
}, nil
default:
return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
}
}
return nil, fmt.Errorf("Unknown error")
}

View File

@@ -6,7 +6,7 @@
* [Importing models](./import.md) * [Importing models](./import.md)
* [Linux Documentation](./linux.md) * [Linux Documentation](./linux.md)
* [Windows Documentation](./windows.md) * [Windows Documentation](./windows.md)
* [Docker Documentation](./docker.md) * [Docker Documentation](https://hub.docker.com/r/ollama/ollama)
### Reference ### Reference

View File

@@ -12,13 +12,12 @@
- [Pull a Model](#pull-a-model) - [Pull a Model](#pull-a-model)
- [Push a Model](#push-a-model) - [Push a Model](#push-a-model)
- [Generate Embeddings](#generate-embeddings) - [Generate Embeddings](#generate-embeddings)
- [List Running Models](#list-running-models)
## Conventions ## Conventions
### Model names ### Model names
Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version. Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama2:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
### Durations ### Durations
@@ -26,7 +25,7 @@ All durations are returned in nanoseconds.
### Streaming responses ### Streaming responses
Certain endpoints stream responses as JSON objects. Streaming can be disabled by providing `{"stream": false}` for these endpoints. Certain endpoints stream responses as JSON objects and can optional return non-streamed responses.
## Generate a completion ## Generate a completion
@@ -40,7 +39,6 @@ Generate a response for a given prompt with a provided model. This is a streamin
- `model`: (required) the [model name](#model-names) - `model`: (required) the [model name](#model-names)
- `prompt`: the prompt to generate a response for - `prompt`: the prompt to generate a response for
- `suffix`: the text after the model response
- `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`) - `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`)
Advanced parameters (optional): Advanced parameters (optional):
@@ -58,8 +56,7 @@ Advanced parameters (optional):
Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below. Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.
> [!IMPORTANT] > Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
> It's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
### Examples ### Examples
@@ -69,7 +66,7 @@ Enable JSON mode by setting the `format` parameter to `json`. This will structur
```shell ```shell
curl http://localhost:11434/api/generate -d '{ curl http://localhost:11434/api/generate -d '{
"model": "llama3", "model": "llama2",
"prompt": "Why is the sky blue?" "prompt": "Why is the sky blue?"
}' }'
``` ```
@@ -80,7 +77,7 @@ A stream of JSON objects is returned:
```json ```json
{ {
"model": "llama3", "model": "llama2",
"created_at": "2023-08-04T08:52:19.385406455-07:00", "created_at": "2023-08-04T08:52:19.385406455-07:00",
"response": "The", "response": "The",
"done": false "done": false
@@ -98,11 +95,11 @@ The final response in the stream also includes additional data about the generat
- `context`: an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory - `context`: an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory
- `response`: empty if the response was streamed, if not streamed, this will contain the full response - `response`: empty if the response was streamed, if not streamed, this will contain the full response
To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration` * `10^9`. To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`.
```json ```json
{ {
"model": "llama3", "model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z", "created_at": "2023-08-04T19:22:45.499127Z",
"response": "", "response": "",
"done": true, "done": true,
@@ -124,7 +121,7 @@ A response can be received in one reply when streaming is off.
```shell ```shell
curl http://localhost:11434/api/generate -d '{ curl http://localhost:11434/api/generate -d '{
"model": "llama3", "model": "llama2",
"prompt": "Why is the sky blue?", "prompt": "Why is the sky blue?",
"stream": false "stream": false
}' }'
@@ -136,7 +133,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
```json ```json
{ {
"model": "llama3", "model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z", "created_at": "2023-08-04T19:22:45.499127Z",
"response": "The sky is blue because it is the color of the sky.", "response": "The sky is blue because it is the color of the sky.",
"done": true, "done": true,
@@ -150,51 +147,15 @@ If `stream` is set to `false`, the response will be a single JSON object:
} }
``` ```
#### Request (with suffix)
##### Request
```shell
curl http://localhost:11434/api/generate -d '{
"model": "codellama:code",
"prompt": "def compute_gcd(a, b):",
"suffix": " return result",
"options": {
"temperature": 0
},
"stream": false
}'
```
##### Response
```json
{
"model": "codellama:code",
"created_at": "2024-07-22T20:47:51.147561Z",
"response": "\n if a == 0:\n return b\n else:\n return compute_gcd(b % a, a)\n\ndef compute_lcm(a, b):\n result = (a * b) / compute_gcd(a, b)\n",
"done": true,
"done_reason": "stop",
"context": [...],
"total_duration": 1162761250,
"load_duration": 6683708,
"prompt_eval_count": 17,
"prompt_eval_duration": 201222000,
"eval_count": 63,
"eval_duration": 953997000
}
```
#### Request (JSON mode) #### Request (JSON mode)
> [!IMPORTANT]
> When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON. > When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON.
##### Request ##### Request
```shell ```shell
curl http://localhost:11434/api/generate -d '{ curl http://localhost:11434/api/generate -d '{
"model": "llama3", "model": "llama2",
"prompt": "What color is the sky at different times of the day? Respond using JSON", "prompt": "What color is the sky at different times of the day? Respond using JSON",
"format": "json", "format": "json",
"stream": false "stream": false
@@ -205,7 +166,7 @@ curl http://localhost:11434/api/generate -d '{
```json ```json
{ {
"model": "llama3", "model": "llama2",
"created_at": "2023-11-09T21:07:55.186497Z", "created_at": "2023-11-09T21:07:55.186497Z",
"response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n", "response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
"done": true, "done": true,
@@ -288,7 +249,7 @@ curl http://localhost:11434/api/generate -d '{
#### Request (Reproducible outputs) #### Request (Reproducible outputs)
For reproducible outputs, set `seed` to a number: For reproducible outputs, set `temperature` to 0 and `seed` to a number:
##### Request ##### Request
@@ -297,7 +258,8 @@ curl http://localhost:11434/api/generate -d '{
"model": "mistral", "model": "mistral",
"prompt": "Why is the sky blue?", "prompt": "Why is the sky blue?",
"options": { "options": {
"seed": 123 "seed": 123,
"temperature": 0
} }
}' }'
``` ```
@@ -327,7 +289,7 @@ If you want to set custom options for the model at runtime rather than in the Mo
```shell ```shell
curl http://localhost:11434/api/generate -d '{ curl http://localhost:11434/api/generate -d '{
"model": "llama3", "model": "llama2",
"prompt": "Why is the sky blue?", "prompt": "Why is the sky blue?",
"stream": false, "stream": false,
"options": { "options": {
@@ -336,7 +298,6 @@ curl http://localhost:11434/api/generate -d '{
"num_predict": 100, "num_predict": 100,
"top_k": 20, "top_k": 20,
"top_p": 0.9, "top_p": 0.9,
"min_p": 0.0,
"tfs_z": 0.5, "tfs_z": 0.5,
"typical_p": 0.7, "typical_p": 0.7,
"repeat_last_n": 33, "repeat_last_n": 33,
@@ -352,6 +313,7 @@ curl http://localhost:11434/api/generate -d '{
"numa": false, "numa": false,
"num_ctx": 1024, "num_ctx": 1024,
"num_batch": 2, "num_batch": 2,
"num_gqa": 1,
"num_gpu": 1, "num_gpu": 1,
"main_gpu": 0, "main_gpu": 0,
"low_vram": false, "low_vram": false,
@@ -359,6 +321,8 @@ curl http://localhost:11434/api/generate -d '{
"vocab_only": false, "vocab_only": false,
"use_mmap": true, "use_mmap": true,
"use_mlock": false, "use_mlock": false,
"rope_frequency_base": 1.1,
"rope_frequency_scale": 0.8,
"num_thread": 8 "num_thread": 8
} }
}' }'
@@ -368,7 +332,7 @@ curl http://localhost:11434/api/generate -d '{
```json ```json
{ {
"model": "llama3", "model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z", "created_at": "2023-08-04T19:22:45.499127Z",
"response": "The sky is blue because it is the color of the sky.", "response": "The sky is blue because it is the color of the sky.",
"done": true, "done": true,
@@ -390,7 +354,7 @@ If an empty prompt is provided, the model will be loaded into memory.
```shell ```shell
curl http://localhost:11434/api/generate -d '{ curl http://localhost:11434/api/generate -d '{
"model": "llama3" "model": "llama2"
}' }'
``` ```
@@ -400,7 +364,7 @@ A single JSON object is returned:
```json ```json
{ {
"model": "llama3", "model": "llama2",
"created_at": "2023-12-18T19:52:07.071755Z", "created_at": "2023-12-18T19:52:07.071755Z",
"response": "", "response": "",
"done": true "done": true
@@ -419,14 +383,12 @@ Generate the next message in a chat with a provided model. This is a streaming e
- `model`: (required) the [model name](#model-names) - `model`: (required) the [model name](#model-names)
- `messages`: the messages of the chat, this can be used to keep a chat memory - `messages`: the messages of the chat, this can be used to keep a chat memory
- `tools`: tools for the model to use if supported. Requires `stream` to be set to `false`
The `message` object has the following fields: The `message` object has the following fields:
- `role`: the role of the message, either `system`, `user`, `assistant`, or `tool` - `role`: the role of the message, either `system`, `user` or `assistant`
- `content`: the content of the message - `content`: the content of the message
- `images` (optional): a list of images to include in the message (for multimodal models such as `llava`) - `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
- `tool_calls` (optional): a list of tools the model wants to use
Advanced parameters (optional): Advanced parameters (optional):
@@ -445,7 +407,7 @@ Send a chat message with a streaming response.
```shell ```shell
curl http://localhost:11434/api/chat -d '{ curl http://localhost:11434/api/chat -d '{
"model": "llama3", "model": "llama2",
"messages": [ "messages": [
{ {
"role": "user", "role": "user",
@@ -461,7 +423,7 @@ A stream of JSON objects is returned:
```json ```json
{ {
"model": "llama3", "model": "llama2",
"created_at": "2023-08-04T08:52:19.385406455-07:00", "created_at": "2023-08-04T08:52:19.385406455-07:00",
"message": { "message": {
"role": "assistant", "role": "assistant",
@@ -476,7 +438,7 @@ Final response:
```json ```json
{ {
"model": "llama3", "model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z", "created_at": "2023-08-04T19:22:45.499127Z",
"done": true, "done": true,
"total_duration": 4883583458, "total_duration": 4883583458,
@@ -494,7 +456,7 @@ Final response:
```shell ```shell
curl http://localhost:11434/api/chat -d '{ curl http://localhost:11434/api/chat -d '{
"model": "llama3", "model": "llama2",
"messages": [ "messages": [
{ {
"role": "user", "role": "user",
@@ -509,7 +471,7 @@ curl http://localhost:11434/api/chat -d '{
```json ```json
{ {
"model": "registry.ollama.ai/library/llama3:latest", "model": "registry.ollama.ai/library/llama2:latest",
"created_at": "2023-12-12T14:13:43.416799Z", "created_at": "2023-12-12T14:13:43.416799Z",
"message": { "message": {
"role": "assistant", "role": "assistant",
@@ -533,7 +495,7 @@ Send a chat message with a conversation history. You can use this same approach
```shell ```shell
curl http://localhost:11434/api/chat -d '{ curl http://localhost:11434/api/chat -d '{
"model": "llama3", "model": "llama2",
"messages": [ "messages": [
{ {
"role": "user", "role": "user",
@@ -557,7 +519,7 @@ A stream of JSON objects is returned:
```json ```json
{ {
"model": "llama3", "model": "llama2",
"created_at": "2023-08-04T08:52:19.385406455-07:00", "created_at": "2023-08-04T08:52:19.385406455-07:00",
"message": { "message": {
"role": "assistant", "role": "assistant",
@@ -571,7 +533,7 @@ Final response:
```json ```json
{ {
"model": "llama3", "model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z", "created_at": "2023-08-04T19:22:45.499127Z",
"done": true, "done": true,
"total_duration": 8113331500, "total_duration": 8113331500,
@@ -587,7 +549,7 @@ Final response:
##### Request ##### Request
Send a chat message with images. The images should be provided as an array, with the individual images encoded in Base64. Send a chat message with a conversation history.
```shell ```shell
curl http://localhost:11434/api/chat -d '{ curl http://localhost:11434/api/chat -d '{
@@ -629,7 +591,7 @@ curl http://localhost:11434/api/chat -d '{
```shell ```shell
curl http://localhost:11434/api/chat -d '{ curl http://localhost:11434/api/chat -d '{
"model": "llama3", "model": "llama2",
"messages": [ "messages": [
{ {
"role": "user", "role": "user",
@@ -647,7 +609,7 @@ curl http://localhost:11434/api/chat -d '{
```json ```json
{ {
"model": "registry.ollama.ai/library/llama3:latest", "model": "registry.ollama.ai/library/llama2:latest",
"created_at": "2023-12-12T14:13:43.416799Z", "created_at": "2023-12-12T14:13:43.416799Z",
"message": { "message": {
"role": "assistant", "role": "assistant",
@@ -663,79 +625,6 @@ curl http://localhost:11434/api/chat -d '{
} }
``` ```
#### Chat request (with tools)
##### Request
```
curl http://localhost:11434/api/chat -d '{
"model": "llama3.1",
"messages": [
{
"role": "user",
"content": "What is the weather today in Paris?"
}
],
"stream": false,
"tools": [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The location to get the weather for, e.g. San Francisco, CA"
},
"format": {
"type": "string",
"description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location", "format"]
}
}
}
]
}'
```
##### Response
```json
{
"model": "llama3.1",
"created_at": "2024-07-22T20:33:28.123648Z",
"message": {
"role": "assistant",
"content": "",
"tool_calls": [
{
"function": {
"name": "get_current_weather",
"arguments": {
"format": "celsius",
"location": "Paris, FR"
}
}
}
]
},
"done_reason": "stop",
"done": true,
"total_duration": 885095291,
"load_duration": 3753500,
"prompt_eval_count": 122,
"prompt_eval_duration": 328493000,
"eval_count": 33,
"eval_duration": 552222000
}
```
## Create a Model ## Create a Model
```shell ```shell
@@ -762,7 +651,7 @@ Create a new model from a `Modelfile`.
```shell ```shell
curl http://localhost:11434/api/create -d '{ curl http://localhost:11434/api/create -d '{
"name": "mario", "name": "mario",
"modelfile": "FROM llama3\nSYSTEM You are mario from Super Mario Bros." "modelfile": "FROM llama2\nSYSTEM You are mario from Super Mario Bros."
}' }'
``` ```
@@ -869,7 +758,7 @@ A single JSON object will be returned.
} }
}, },
{ {
"name": "llama3:latest", "name": "llama2:latest",
"modified_at": "2023-12-07T09:32:18.757212583-08:00", "modified_at": "2023-12-07T09:32:18.757212583-08:00",
"size": 3825819519, "size": 3825819519,
"digest": "fe938a131f40e6f6d40083c9f0f430a515233eb2edaa6d72eb85c50d64f2300e", "digest": "fe938a131f40e6f6d40083c9f0f430a515233eb2edaa6d72eb85c50d64f2300e",
@@ -891,12 +780,11 @@ A single JSON object will be returned.
POST /api/show POST /api/show
``` ```
Show information about a model including details, modelfile, template, parameters, license, system prompt. Show information about a model including details, modelfile, template, parameters, license, and system prompt.
### Parameters ### Parameters
- `name`: name of the model to show - `name`: name of the model to show
- `verbose`: (optional) if set to `true`, returns full data for verbose response fields
### Examples ### Examples
@@ -904,7 +792,7 @@ Show information about a model including details, modelfile, template, parameter
```shell ```shell
curl http://localhost:11434/api/show -d '{ curl http://localhost:11434/api/show -d '{
"name": "llama3" "name": "llama2"
}' }'
``` ```
@@ -912,41 +800,15 @@ curl http://localhost:11434/api/show -d '{
```json ```json
{ {
"modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM /Users/matt/.ollama/models/blobs/sha256:200765e1283640ffbd013184bf496e261032fa75b99498a9613be4e94d63ad52\nTEMPLATE \"\"\"{{ .System }}\nUSER: {{ .Prompt }}\nASSISTANT: \"\"\"\nPARAMETER num_ctx 4096\nPARAMETER stop \"\u003c/s\u003e\"\nPARAMETER stop \"USER:\"\nPARAMETER stop \"ASSISTANT:\"", "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM /Users/matt/.ollama/models/blobs/sha256:200765e1283640ffbd013184bf496e261032fa75b99498a9613be4e94d63ad52\nTEMPLATE \"\"\"{{ .System }}\nUSER: {{ .Prompt }}\nASSSISTANT: \"\"\"\nPARAMETER num_ctx 4096\nPARAMETER stop \"\u003c/s\u003e\"\nPARAMETER stop \"USER:\"\nPARAMETER stop \"ASSSISTANT:\"",
"parameters": "num_keep 24\nstop \"<|start_header_id|>\"\nstop \"<|end_header_id|>\"\nstop \"<|eot_id|>\"", "parameters": "num_ctx 4096\nstop \u003c/s\u003e\nstop USER:\nstop ASSSISTANT:",
"template": "{{ if .System }}<|start_header_id|>system<|end_header_id|>\n\n{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>\n\n{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>\n\n{{ .Response }}<|eot_id|>", "template": "{{ .System }}\nUSER: {{ .Prompt }}\nASSSISTANT: ",
"details": { "details": {
"parent_model": "",
"format": "gguf", "format": "gguf",
"family": "llama", "family": "llama",
"families": [ "families": ["llama", "clip"],
"llama" "parameter_size": "7B",
],
"parameter_size": "8.0B",
"quantization_level": "Q4_0" "quantization_level": "Q4_0"
},
"model_info": {
"general.architecture": "llama",
"general.file_type": 2,
"general.parameter_count": 8030261248,
"general.quantization_version": 2,
"llama.attention.head_count": 32,
"llama.attention.head_count_kv": 8,
"llama.attention.layer_norm_rms_epsilon": 0.00001,
"llama.block_count": 32,
"llama.context_length": 8192,
"llama.embedding_length": 4096,
"llama.feed_forward_length": 14336,
"llama.rope.dimension_count": 128,
"llama.rope.freq_base": 500000,
"llama.vocab_size": 128256,
"tokenizer.ggml.bos_token_id": 128000,
"tokenizer.ggml.eos_token_id": 128009,
"tokenizer.ggml.merges": [], // populates if `verbose=true`
"tokenizer.ggml.model": "gpt2",
"tokenizer.ggml.pre": "llama-bpe",
"tokenizer.ggml.token_type": [], // populates if `verbose=true`
"tokenizer.ggml.tokens": [] // populates if `verbose=true`
} }
} }
``` ```
@@ -965,8 +827,8 @@ Copy a model. Creates a model with another name from an existing model.
```shell ```shell
curl http://localhost:11434/api/copy -d '{ curl http://localhost:11434/api/copy -d '{
"source": "llama3", "source": "llama2",
"destination": "llama3-backup" "destination": "llama2-backup"
}' }'
``` ```
@@ -992,7 +854,7 @@ Delete a model and its data.
```shell ```shell
curl -X DELETE http://localhost:11434/api/delete -d '{ curl -X DELETE http://localhost:11434/api/delete -d '{
"name": "llama3:13b" "name": "llama2:13b"
}' }'
``` ```
@@ -1020,7 +882,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
```shell ```shell
curl http://localhost:11434/api/pull -d '{ curl http://localhost:11434/api/pull -d '{
"name": "llama3" "name": "llama2"
}' }'
``` ```
@@ -1139,121 +1001,6 @@ If `stream` is set to `false`, then the response is a single JSON object:
## Generate Embeddings ## Generate Embeddings
```shell
POST /api/embed
```
Generate embeddings from a model
### Parameters
- `model`: name of model to generate embeddings from
- `input`: text or list of text to generate embeddings for
Advanced parameters:
- `truncate`: truncates the end of each input to fit within context length. Returns error if `false` and context length is exceeded. Defaults to `true`
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
### Examples
#### Request
```shell
curl http://localhost:11434/api/embed -d '{
"model": "all-minilm",
"input": "Why is the sky blue?"
}'
```
#### Response
```json
{
"model": "all-minilm",
"embeddings": [[
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
]],
"total_duration": 14143917,
"load_duration": 1019500,
"prompt_eval_count": 8
}
```
#### Request (Multiple input)
```shell
curl http://localhost:11434/api/embed -d '{
"model": "all-minilm",
"input": ["Why is the sky blue?", "Why is the grass green?"]
}'
```
#### Response
```json
{
"model": "all-minilm",
"embeddings": [[
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
],[
-0.0098027075, 0.06042469, 0.025257962, -0.006364387, 0.07272725,
0.017194884, 0.09032035, -0.051705178, 0.09951512, 0.09072481
]]
}
```
## List Running Models
```shell
GET /api/ps
```
List models that are currently loaded into memory.
#### Examples
### Request
```shell
curl http://localhost:11434/api/ps
```
#### Response
A single JSON object will be returned.
```json
{
"models": [
{
"name": "mistral:latest",
"model": "mistral:latest",
"size": 5137025024,
"digest": "2ae6f6dd7a3dd734790bbbf58b8909a606e0e7e97e94b7604e0aa7ae4490e6d8",
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "7.2B",
"quantization_level": "Q4_0"
},
"expires_at": "2024-06-04T14:38:31.83753-07:00",
"size_vram": 5137025024
}
]
}
```
## Generate Embedding
> Note: this endpoint has been superseded by `/api/embed`
```shell ```shell
POST /api/embeddings POST /api/embeddings
``` ```

View File

@@ -6,8 +6,6 @@ Install required tools:
- go version 1.22 or higher - go version 1.22 or higher
- gcc version 11.4.0 or higher - gcc version 11.4.0 or higher
### MacOS
```bash ```bash
brew install go cmake gcc brew install go cmake gcc
``` ```
@@ -53,7 +51,7 @@ Typically the build scripts will auto-detect CUDA, however, if your Linux distro
or installation approach uses unusual paths, you can specify the location by or installation approach uses unusual paths, you can specify the location by
specifying an environment variable `CUDA_LIB_DIR` to the location of the shared specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70") set set of target CUDA architectues by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
Then generate dependencies: Then generate dependencies:
@@ -104,7 +102,7 @@ like to use. For example, to compile an optimized binary for an Intel i9-9880H,
you might use: you might use:
``` ```
OLLAMA_CUSTOM_CPU_DEFS="-DGGML_AVX=on -DGGML_AVX2=on -DGGML_F16C=on -DGGML_FMA=on" go generate ./... OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go generate ./...
go build . go build .
``` ```
@@ -114,18 +112,15 @@ If you have Docker available, you can build linux binaries with `./scripts/build
### Windows ### Windows
Note: The Windows build for Ollama is still under development. Note: The windows build for Ollama is still under development.
First, install required tools: Install required tools:
- MSVC toolchain - C/C++ and cmake as minimal requirements - MSVC toolchain - C/C++ and cmake as minimal requirements
- Go version 1.22 or higher - Go version 1.22 or higher
- MinGW (pick one variant) with GCC. - MinGW (pick one variant) with GCC.
- [MinGW-w64](https://www.mingw-w64.org/) - [MinGW-w64](https://www.mingw-w64.org/)
- [MSYS2](https://www.msys2.org/) - [MSYS2](https://www.msys2.org/)
- The `ThreadJob` Powershell module: `Install-Module -Name ThreadJob -Scope CurrentUser`
Then, build the `ollama` binary:
```powershell ```powershell
$env:CGO_ENABLED="1" $env:CGO_ENABLED="1"
@@ -147,4 +142,4 @@ In addition to the common Windows development tools described above, install AMD
- [AMD HIP](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html) - [AMD HIP](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html)
- [Strawberry Perl](https://strawberryperl.com/) - [Strawberry Perl](https://strawberryperl.com/)
Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`). Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).

View File

@@ -1,71 +0,0 @@
# Ollama Docker image
### CPU only
```bash
docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
```
### Nvidia GPU
Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
#### Install with Apt
1. Configure the repository
```bash
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
| sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
sudo apt-get update
```
2. Install the NVIDIA Container Toolkit packages
```bash
sudo apt-get install -y nvidia-container-toolkit
```
#### Install with Yum or Dnf
1. Configure the repository
```bash
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
```
2. Install the NVIDIA Container Toolkit packages
```bash
sudo yum install -y nvidia-container-toolkit
```
#### Configure Docker to use Nvidia driver
```
sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
```
#### Start the container
```bash
docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
```
### AMD GPU
To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
```
docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
```
### Run model locally
Now you can run a model:
```
docker exec -it ollama ollama run llama3.1
```
### Try different models
More models can be found on the [Ollama library](https://ollama.com/library).

View File

@@ -6,7 +6,7 @@ Ollama on macOS and Windows will automatically download updates. Click on the ta
On Linux, re-run the install script: On Linux, re-run the install script:
```shell ```
curl -fsSL https://ollama.com/install.sh | sh curl -fsSL https://ollama.com/install.sh | sh
``` ```
@@ -30,9 +30,9 @@ To change this when using `ollama run`, use `/set parameter`:
When using the API, specify the `num_ctx` parameter: When using the API, specify the `num_ctx` parameter:
```shell ```
curl http://localhost:11434/api/generate -d '{ curl http://localhost:11434/api/generate -d '{
"model": "llama3", "model": "llama2",
"prompt": "Why is the sky blue?", "prompt": "Why is the sky blue?",
"options": { "options": {
"num_ctx": 4096 "num_ctx": 4096
@@ -40,21 +40,6 @@ curl http://localhost:11434/api/generate -d '{
}' }'
``` ```
## How can I tell if my model was loaded onto the GPU?
Use the `ollama ps` command to see what models are currently loaded into memory.
```shell
ollama ps
NAME ID SIZE PROCESSOR UNTIL
llama3:70b bcfb190ca3a7 42 GB 100% GPU 4 minutes from now
```
The `Processor` column will show which memory the model was loaded in to:
* `100% GPU` means the model was loaded entirely into the GPU
* `100% CPU` means the model was loaded entirely in system memory
* `48%/52% CPU/GPU` means the model was loaded partially onto both the GPU and into system memory
## How do I configure Ollama server? ## How do I configure Ollama server?
Ollama server can be configured with environment variables. Ollama server can be configured with environment variables.
@@ -95,48 +80,18 @@ If Ollama is run as a systemd service, environment variables should be set using
### Setting environment variables on Windows ### Setting environment variables on Windows
On Windows, Ollama inherits your user and system environment variables. On windows, Ollama inherits your user and system environment variables.
1. First Quit Ollama by clicking on it in the task bar. 1. First Quit Ollama by clicking on it in the task bar
2. Start the Settings (Windows 11) or Control Panel (Windows 10) application and search for _environment variables_. 2. Edit system environment variables from the control panel
3. Click on _Edit environment variables for your account_. 3. Edit or create New variable(s) for your user account for `OLLAMA_HOST`, `OLLAMA_MODELS`, etc.
4. Edit or create a new variable for your user account for `OLLAMA_HOST`, `OLLAMA_MODELS`, etc. 4. Click OK/Apply to save
5. Click OK/Apply to save. 5. Run `ollama` from a new terminal window
6. Start the Ollama application from the Windows Start menu.
## How do I use Ollama behind a proxy?
Ollama is compatible with proxy servers if `HTTP_PROXY` or `HTTPS_PROXY` are configured. When using either variables, ensure it is set where `ollama serve` can access the values. When using `HTTPS_PROXY`, ensure the proxy certificate is installed as a system certificate. Refer to the section above for how to use environment variables on your platform.
### How do I use Ollama behind a proxy in Docker?
The Ollama Docker container image can be configured to use a proxy by passing `-e HTTPS_PROXY=https://proxy.example.com` when starting the container.
Alternatively, the Docker daemon can be configured to use a proxy. Instructions are available for Docker Desktop on [macOS](https://docs.docker.com/desktop/settings/mac/#proxies), [Windows](https://docs.docker.com/desktop/settings/windows/#proxies), and [Linux](https://docs.docker.com/desktop/settings/linux/#proxies), and Docker [daemon with systemd](https://docs.docker.com/config/daemon/systemd/#httphttps-proxy).
Ensure the certificate is installed as a system certificate when using HTTPS. This may require a new Docker image when using a self-signed certificate.
```dockerfile
FROM ollama/ollama
COPY my-ca.pem /usr/local/share/ca-certificates/my-ca.crt
RUN update-ca-certificates
```
Build and run this image:
```shell
docker build -t ollama-with-ca .
docker run -d -e HTTPS_PROXY=https://my.proxy.example.com -p 11434:11434 ollama-with-ca
```
## Does Ollama send my prompts and answers back to ollama.com?
No. Ollama runs locally, and conversation data does not leave your machine.
## How can I expose Ollama on my network? ## How can I expose Ollama on my network?
@@ -163,7 +118,7 @@ server {
Ollama can be accessed using a range of tools for tunneling tools. For example with Ngrok: Ollama can be accessed using a range of tools for tunneling tools. For example with Ngrok:
```shell ```
ngrok http 11434 --host-header="localhost:11434" ngrok http 11434 --host-header="localhost:11434"
``` ```
@@ -171,7 +126,7 @@ ngrok http 11434 --host-header="localhost:11434"
To use Ollama with Cloudflare Tunnel, use the `--url` and `--http-host-header` flags: To use Ollama with Cloudflare Tunnel, use the `--url` and `--http-host-header` flags:
```shell ```
cloudflared tunnel --url http://localhost:11434 --http-host-header="localhost:11434" cloudflared tunnel --url http://localhost:11434 --http-host-header="localhost:11434"
``` ```
@@ -185,7 +140,7 @@ Refer to the section [above](#how-do-i-configure-ollama-server) for how to set e
- macOS: `~/.ollama/models` - macOS: `~/.ollama/models`
- Linux: `/usr/share/ollama/.ollama/models` - Linux: `/usr/share/ollama/.ollama/models`
- Windows: `C:\Users\%username%\.ollama\models` - Windows: `C:\Users\<username>\.ollama\models`
### How do I set them to a different location? ### How do I set them to a different location?
@@ -193,10 +148,39 @@ If a different directory needs to be used, set the environment variable `OLLAMA_
Refer to the section [above](#how-do-i-configure-ollama-server) for how to set environment variables on your platform. Refer to the section [above](#how-do-i-configure-ollama-server) for how to set environment variables on your platform.
## Does Ollama send my prompts and answers back to ollama.com?
No. Ollama runs locally, and conversation data does not leave your machine.
## How can I use Ollama in Visual Studio Code? ## How can I use Ollama in Visual Studio Code?
There is already a large collection of plugins available for VSCode as well as other editors that leverage Ollama. See the list of [extensions & plugins](https://github.com/ollama/ollama#extensions--plugins) at the bottom of the main repository readme. There is already a large collection of plugins available for VSCode as well as other editors that leverage Ollama. See the list of [extensions & plugins](https://github.com/ollama/ollama#extensions--plugins) at the bottom of the main repository readme.
## How do I use Ollama behind a proxy?
Ollama is compatible with proxy servers if `HTTP_PROXY` or `HTTPS_PROXY` are configured. When using either variables, ensure it is set where `ollama serve` can access the values. When using `HTTPS_PROXY`, ensure the proxy certificate is installed as a system certificate. Refer to the section above for how to use environment variables on your platform.
### How do I use Ollama behind a proxy in Docker?
The Ollama Docker container image can be configured to use a proxy by passing `-e HTTPS_PROXY=https://proxy.example.com` when starting the container.
Alternatively, the Docker daemon can be configured to use a proxy. Instructions are available for Docker Desktop on [macOS](https://docs.docker.com/desktop/settings/mac/#proxies), [Windows](https://docs.docker.com/desktop/settings/windows/#proxies), and [Linux](https://docs.docker.com/desktop/settings/linux/#proxies), and Docker [daemon with systemd](https://docs.docker.com/config/daemon/systemd/#httphttps-proxy).
Ensure the certificate is installed as a system certificate when using HTTPS. This may require a new Docker image when using a self-signed certificate.
```dockerfile
FROM ollama/ollama
COPY my-ca.pem /usr/local/share/ca-certificates/my-ca.crt
RUN update-ca-certificates
```
Build and run this image:
```shell
docker build -t ollama-with-ca .
docker run -d -e HTTPS_PROXY=https://my.proxy.example.com -p 11434:11434 ollama-with-ca
```
## How do I use Ollama with GPU acceleration in Docker? ## How do I use Ollama with GPU acceleration in Docker?
The Ollama Docker container can be configured with GPU acceleration in Linux or Windows (with WSL2). This requires the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). See [ollama/ollama](https://hub.docker.com/r/ollama/ollama) for more details. The Ollama Docker container can be configured with GPU acceleration in Linux or Windows (with WSL2). This requires the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). See [ollama/ollama](https://hub.docker.com/r/ollama/ollama) for more details.
@@ -211,7 +195,7 @@ Open `Control Panel > Networking and Internet > View network status and tasks` a
Click on `Configure` and open the `Advanced` tab. Search through each of the properties until you find `Large Send Offload Version 2 (IPv4)` and `Large Send Offload Version 2 (IPv6)`. *Disable* both of these Click on `Configure` and open the `Advanced` tab. Search through each of the properties until you find `Large Send Offload Version 2 (IPv4)` and `Large Send Offload Version 2 (IPv6)`. *Disable* both of these
properties. properties.
## How can I preload a model into Ollama to get faster response times? ## How can I pre-load a model to get faster response times?
If you are using the API you can preload a model by sending the Ollama server an empty request. This works with both the `/api/generate` and `/api/chat` API endpoints. If you are using the API you can preload a model by sending the Ollama server an empty request. This works with both the `/api/generate` and `/api/chat` API endpoints.
@@ -225,11 +209,6 @@ To use the chat completions endpoint, use:
curl http://localhost:11434/api/chat -d '{"model": "mistral"}' curl http://localhost:11434/api/chat -d '{"model": "mistral"}'
``` ```
To preload a model using the CLI, use the command:
```shell
ollama run llama3.1 ""
```
## How do I keep a model loaded in memory or make it unload immediately? ## How do I keep a model loaded in memory or make it unload immediately?
By default models are kept in memory for 5 minutes before being unloaded. This allows for quicker response times if you are making numerous requests to the LLM. You may, however, want to free up the memory before the 5 minutes have elapsed or keep the model loaded indefinitely. Use the `keep_alive` parameter with either the `/api/generate` and `/api/chat` API endpoints to control how long the model is left in memory. By default models are kept in memory for 5 minutes before being unloaded. This allows for quicker response times if you are making numerous requests to the LLM. You may, however, want to free up the memory before the 5 minutes have elapsed or keep the model loaded indefinitely. Use the `keep_alive` parameter with either the `/api/generate` and `/api/chat` API endpoints to control how long the model is left in memory.
@@ -242,38 +221,14 @@ The `keep_alive` parameter can be set to:
For example, to preload a model and leave it in memory use: For example, to preload a model and leave it in memory use:
```shell ```shell
curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": -1}' curl http://localhost:11434/api/generate -d '{"model": "llama2", "keep_alive": -1}'
``` ```
To unload the model and free up memory use: To unload the model and free up memory use:
```shell ```shell
curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": 0}' curl http://localhost:11434/api/generate -d '{"model": "llama2", "keep_alive": 0}'
``` ```
Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable. Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable.
If you wish to override the `OLLAMA_KEEP_ALIVE` setting, use the `keep_alive` API parameter with the `/api/generate` or `/api/chat` API endpoints. If you wish to override the `OLLAMA_KEEP_ALIVE` setting, use the `keep_alive` API parameter with the `/api/generate` or `/api/chat` API endpoints.
## How do I manage the maximum number of requests the Ollama server can queue?
If too many requests are sent to the server, it will respond with a 503 error indicating the server is overloaded. You can adjust how many requests may be queue by setting `OLLAMA_MAX_QUEUE`.
## How does Ollama handle concurrent requests?
Ollama supports two levels of concurrent processing. If your system has sufficient available memory (system memory when using CPU inference, or VRAM for GPU inference) then multiple models can be loaded at the same time. For a given model, if there is sufficient available memory when the model is loaded, it is configured to allow parallel request processing.
If there is insufficient available memory to load a new model request while one or more models are already loaded, all new requests will be queued until the new model can be loaded. As prior models become idle, one or more will be unloaded to make room for the new model. Queued requests will be processed in order. When using GPU inference new models must be able to completely fit in VRAM to allow concurrent model loads.
Parallel request processing for a given model results in increasing the context size by the number of parallel requests. For example, a 2K context with 4 parallel requests will result in an 8K context and additional memory allocation.
The following server settings may be used to adjust how Ollama handles concurrent requests on most platforms:
- `OLLAMA_MAX_LOADED_MODELS` - The maximum number of models that can be loaded concurrently provided they fit in available memory. The default is 3 * the number of GPUs or 3 for CPU inference.
- `OLLAMA_NUM_PARALLEL` - The maximum number of parallel requests each model will process at the same time. The default will auto-select either 4 or 1 based on available memory.
- `OLLAMA_MAX_QUEUE` - The maximum number of requests Ollama will queue when busy before rejecting additional requests. The default is 512
Note: Windows with Radeon GPUs currently default to 1 model maximum due to limitations in ROCm v5.7 for available VRAM reporting. Once ROCm v6.2 is available, Windows Radeon will follow the defaults above. You may enable concurrent model loads on Radeon on Windows, but ensure you don't load more models than will fit into your GPUs VRAM.
## How does Ollama load models on multiple GPUs?
Installing multiple GPUs of the same brand can be a great way to increase your available VRAM to load larger models. When you load a new model, Ollama evaluates the required VRAM for the model against what is currently available. If the model will entirely fit on any single GPU, Ollama will load the model on that GPU. This typically provides the best performance as it reduces the amount of data transfering across the PCI bus during inference. If the model does not fit entirely on one GPU, then it will be spread across all the available GPUs.

View File

@@ -8,7 +8,7 @@ Check your compute compatibility to see if your card is supported:
| Compute Capability | Family | Cards | | Compute Capability | Family | Cards |
| ------------------ | ------------------- | ----------------------------------------------------------------------------------------------------------- | | ------------------ | ------------------- | ----------------------------------------------------------------------------------------------------------- |
| 9.0 | NVIDIA | `H100` | | 9.0 | NVIDIA | `H100` |
| 8.9 | GeForce RTX 40xx | `RTX 4090` `RTX 4080 SUPER` `RTX 4080` `RTX 4070 Ti SUPER` `RTX 4070 Ti` `RTX 4070 SUPER` `RTX 4070` `RTX 4060 Ti` `RTX 4060` | | 8.9 | GeForce RTX 40xx | `RTX 4090` `RTX 4080` `RTX 4070 Ti` `RTX 4060 Ti` |
| | NVIDIA Professional | `L4` `L40` `RTX 6000` | | | NVIDIA Professional | `L4` `L40` `RTX 6000` |
| 8.6 | GeForce RTX 30xx | `RTX 3090 Ti` `RTX 3090` `RTX 3080 Ti` `RTX 3080` `RTX 3070 Ti` `RTX 3070` `RTX 3060 Ti` `RTX 3060` | | 8.6 | GeForce RTX 30xx | `RTX 3090 Ti` `RTX 3090` `RTX 3080 Ti` `RTX 3080` `RTX 3070 Ti` `RTX 3070` `RTX 3060 Ti` `RTX 3060` |
| | NVIDIA Professional | `A40` `RTX A6000` `RTX A5000` `RTX A4000` `RTX A3000` `RTX A2000` `A10` `A16` `A2` | | | NVIDIA Professional | `A40` `RTX A6000` `RTX A5000` `RTX A4000` `RTX A3000` `RTX A2000` `A10` `A16` `A2` |
@@ -18,7 +18,7 @@ Check your compute compatibility to see if your card is supported:
| | Quadro | `RTX 8000` `RTX 6000` `RTX 5000` `RTX 4000` | | | Quadro | `RTX 8000` `RTX 6000` `RTX 5000` `RTX 4000` |
| 7.0 | NVIDIA | `TITAN V` `V100` `Quadro GV100` | | 7.0 | NVIDIA | `TITAN V` `V100` `Quadro GV100` |
| 6.1 | NVIDIA TITAN | `TITAN Xp` `TITAN X` | | 6.1 | NVIDIA TITAN | `TITAN Xp` `TITAN X` |
| | GeForce GTX | `GTX 1080 Ti` `GTX 1080` `GTX 1070 Ti` `GTX 1070` `GTX 1060` `GTX 1050 Ti` `GTX 1050` | | | GeForce GTX | `GTX 1080 Ti` `GTX 1080` `GTX 1070 Ti` `GTX 1070` `GTX 1060` `GTX 1050` |
| | Quadro | `P6000` `P5200` `P4200` `P3200` `P5000` `P4000` `P3000` `P2200` `P2000` `P1000` `P620` `P600` `P500` `P520` | | | Quadro | `P6000` `P5200` `P4200` `P3200` `P5000` `P4000` `P3000` `P2200` `P2000` `P1000` `P620` `P600` `P500` `P520` |
| | Tesla | `P40` `P4` | | | Tesla | `P40` `P4` |
| 6.0 | NVIDIA | `Tesla P100` `Quadro GP100` | | 6.0 | NVIDIA | `Tesla P100` `Quadro GP100` |
@@ -46,24 +46,13 @@ sudo modprobe nvidia_uvm`
## AMD Radeon ## AMD Radeon
Ollama supports the following AMD GPUs: Ollama supports the following AMD GPUs:
### Linux Support
| Family | Cards and accelerators | | Family | Cards and accelerators |
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
| AMD Radeon RX | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800` `Vega 64` `Vega 56` | | AMD Radeon RX | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800` `Vega 64` `Vega 56` |
| AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` `V420` `V340` `V320` `Vega II Duo` `Vega II` `VII` `SSG` | | AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` `V420` `V340` `V320` `Vega II Duo` `Vega II` `VII` `SSG` |
| AMD Instinct | `MI300X` `MI300A` `MI300` `MI250X` `MI250` `MI210` `MI200` `MI100` `MI60` `MI50` | | AMD Instinct | `MI300X` `MI300A` `MI300` `MI250X` `MI250` `MI210` `MI200` `MI100` `MI60` `MI50` |
### Windows Support ### Overrides
With ROCm v6.1, the following GPUs are supported on Windows.
| Family | Cards and accelerators |
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
| AMD Radeon RX | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800` |
| AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` |
### Overrides on Linux
Ollama leverages the AMD ROCm library, which does not support all AMD GPUs. In Ollama leverages the AMD ROCm library, which does not support all AMD GPUs. In
some cases you can force the system to try to use a similar LLVM target that is some cases you can force the system to try to use a similar LLVM target that is
close. For example The Radeon RX 5400 is `gfx1034` (also known as 10.3.4) close. For example The Radeon RX 5400 is `gfx1034` (also known as 10.3.4)
@@ -74,7 +63,7 @@ would set `HSA_OVERRIDE_GFX_VERSION="10.3.0"` as an environment variable for the
server. If you have an unsupported AMD GPU you can experiment using the list of server. If you have an unsupported AMD GPU you can experiment using the list of
supported types below. supported types below.
At this time, the known supported GPU types on linux are the following LLVM Targets. At this time, the known supported GPU types are the following LLVM Targets.
This table shows some example GPUs that map to these LLVM targets: This table shows some example GPUs that map to these LLVM targets:
| **LLVM Target** | **An Example GPU** | | **LLVM Target** | **An Example GPU** |
|-----------------|---------------------| |-----------------|---------------------|

View File

@@ -1,90 +1,168 @@
# Import # Import a model
GGUF models and select Safetensors models can be imported directly into Ollama. This guide walks through importing a GGUF, PyTorch or Safetensors model.
## Import GGUF ## Importing (GGUF)
A binary GGUF file can be imported directly into Ollama through a Modelfile. ### Step 1: Write a `Modelfile`
```dockerfile Start by creating a `Modelfile`. This file is the blueprint for your model, specifying weights, parameters, prompt templates and more.
FROM /path/to/file.gguf
```
FROM ./mistral-7b-v0.1.Q4_0.gguf
``` ```
## Import Safetensors (Optional) many chat models require a prompt template in order to answer correctly. A default prompt template can be specified with the `TEMPLATE` instruction in the `Modelfile`:
If the model being imported is one of these architectures, it can be imported directly into Ollama through a Modelfile: ```
FROM ./mistral-7b-v0.1.Q4_0.gguf
- LlamaForCausalLM TEMPLATE "[INST] {{ .Prompt }} [/INST]"
- MistralForCausalLM
- MixtralForCausalLM
- GemmaForCausalLM
- Phi3ForCausalLM
```dockerfile
FROM /path/to/safetensors/directory
``` ```
For architectures not directly convertable by Ollama, see llama.cpp's [guide](https://github.com/ggerganov/llama.cpp/blob/master/README.md#prepare-and-quantize) on conversion. After conversion, see [Import GGUF](#import-gguf). ### Step 2: Create the Ollama model
## Automatic Quantization Finally, create a model from your `Modelfile`:
> [!NOTE]
> Automatic quantization requires v0.1.35 or higher.
Ollama is capable of quantizing FP16 or FP32 models to any of the supported quantizations with the `-q/--quantize` flag in `ollama create`.
```dockerfile
FROM /path/to/my/gemma/f16/model
``` ```
ollama create example -f Modelfile
```
### Step 3: Run your model
Next, test the model with `ollama run`:
```
ollama run example "What is your favourite condiment?"
```
## Importing (PyTorch & Safetensors)
> Importing from PyTorch and Safetensors is a longer process than importing from GGUF. Improvements that make it easier are a work in progress.
### Setup
First, clone the `ollama/ollama` repo:
```
git clone git@github.com:ollama/ollama.git ollama
cd ollama
```
and then fetch its `llama.cpp` submodule:
```shell ```shell
$ ollama create -q Q4_K_M mymodel git submodule init
transferring model data git submodule update llm/llama.cpp
quantizing F16 model to Q4_K_M
creating new layer sha256:735e246cc1abfd06e9cdcf95504d6789a6cd1ad7577108a70d9902fef503c1bd
creating new layer sha256:0853f0ad24e5865173bbf9ffcc7b0f5d56b66fd690ab1009867e45e7d2c4db0f
writing manifest
success
``` ```
### Supported Quantizations Next, install the Python dependencies:
- `Q4_0` ```
- `Q4_1` python3 -m venv llm/llama.cpp/.venv
- `Q5_0` source llm/llama.cpp/.venv/bin/activate
- `Q5_1` pip install -r llm/llama.cpp/requirements.txt
- `Q8_0`
#### K-means Quantizations
- `Q3_K_S`
- `Q3_K_M`
- `Q3_K_L`
- `Q4_K_S`
- `Q4_K_M`
- `Q5_K_S`
- `Q5_K_M`
- `Q6_K`
## Template Detection
> [!NOTE]
> Template detection requires v0.1.42 or higher.
Ollama uses model metadata, specifically `tokenizer.chat_template`, to automatically create a template appropriate for the model you're importing.
```dockerfile
FROM /path/to/my/gemma/model
``` ```
```shell Then build the `quantize` tool:
$ ollama create mymodel
transferring model data ```
using autodetected template gemma-instruct make -C llm/llama.cpp quantize
creating new layer sha256:baa2a0edc27d19cc6b7537578a9a7ba1a4e3214dc185ed5ae43692b319af7b84
creating new layer sha256:ba66c3309914dbef07e5149a648fd1877f030d337a4f240d444ea335008943cb
writing manifest
success
``` ```
Defining a template in the Modelfile will disable this feature which may be useful if you want to use a different template than the autodetected one. ### Clone the HuggingFace repository (optional)
If the model is currently hosted in a HuggingFace repository, first clone that repository to download the raw model.
Install [Git LFS](https://docs.github.com/en/repositories/working-with-files/managing-large-files/installing-git-large-file-storage), verify it's installed, and then clone the model's repository:
```
git lfs install
git clone https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 model
```
### Convert the model
> Note: some model architectures require using specific convert scripts. For example, Qwen models require running `convert-hf-to-gguf.py` instead of `convert.py`
```
python llm/llama.cpp/convert.py ./model --outtype f16 --outfile converted.bin
```
### Quantize the model
```
llm/llama.cpp/quantize converted.bin quantized.bin q4_0
```
### Step 3: Write a `Modelfile`
Next, create a `Modelfile` for your model:
```
FROM quantized.bin
TEMPLATE "[INST] {{ .Prompt }} [/INST]"
```
### Step 4: Create the Ollama model
Finally, create a model from your `Modelfile`:
```
ollama create example -f Modelfile
```
### Step 5: Run your model
Next, test the model with `ollama run`:
```
ollama run example "What is your favourite condiment?"
```
## Publishing your model (optional early alpha)
Publishing models is in early alpha. If you'd like to publish your model to share with others, follow these steps:
1. Create [an account](https://ollama.com/signup)
2. Copy your Ollama public key:
- macOS: `cat ~/.ollama/id_ed25519.pub`
- Windows: `type %USERPROFILE%\.ollama\id_ed25519.pub`
- Linux: `cat /usr/share/ollama/.ollama/id_ed25519.pub`
3. Add your public key to your [Ollama account](https://ollama.com/settings/keys)
Next, copy your model to your username's namespace:
```
ollama cp example <your username>/example
```
Then push the model:
```
ollama push <your username>/example
```
After publishing, your model will be available at `https://ollama.com/<your username>/example`.
## Quantization reference
The quantization options are as follow (from highest highest to lowest levels of quantization). Note: some architectures such as Falcon do not support K quants.
- `q2_K`
- `q3_K`
- `q3_K_S`
- `q3_K_M`
- `q3_K_L`
- `q4_0` (recommended)
- `q4_1`
- `q4_K`
- `q4_K_S`
- `q4_K_M`
- `q5_0`
- `q5_1`
- `q5_K`
- `q5_K_S`
- `q5_K_M`
- `q6_K`
- `q8_0`
- `f16`

View File

@@ -20,12 +20,13 @@ GPU.
## Manual install ## Manual install
### Download `ollama` ### Download the `ollama` binary
Download and extract the Linux package: Ollama is distributed as a self-contained binary. Download it to a directory in your PATH:
```bash ```bash
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
sudo chmod +x /usr/bin/ollama
``` ```
### Adding Ollama as a startup service (recommended) ### Adding Ollama as a startup service (recommended)
@@ -95,17 +96,8 @@ curl -fsSL https://ollama.com/install.sh | sh
Or by downloading the ollama binary: Or by downloading the ollama binary:
```bash ```bash
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
``` sudo chmod +x /usr/bin/ollama
## Installing specific versions
Use `OLLAMA_VERSION` environment variable with the install script to install a specific version of Ollama, including pre-releases. You can find the version numbers in the [releases page](https://github.com/ollama/ollama/releases).
For example:
```
curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.1.32 sh
``` ```
## Viewing logs ## Viewing logs
@@ -113,7 +105,7 @@ curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.1.32 sh
To view logs of Ollama running as a startup service, run: To view logs of Ollama running as a startup service, run:
```bash ```bash
journalctl -e -u ollama journalctl -u ollama
``` ```
## Uninstall ## Uninstall

View File

@@ -1,7 +1,6 @@
# Ollama Model File # Ollama Model File
> [!NOTE] > Note: `Modelfile` syntax is in development
> `Modelfile` syntax is in development
A model file is the blueprint to create and share models with Ollama. A model file is the blueprint to create and share models with Ollama.
@@ -11,7 +10,7 @@ A model file is the blueprint to create and share models with Ollama.
- [Examples](#examples) - [Examples](#examples)
- [Instructions](#instructions) - [Instructions](#instructions)
- [FROM (Required)](#from-required) - [FROM (Required)](#from-required)
- [Build from llama3](#build-from-llama3) - [Build from llama2](#build-from-llama2)
- [Build from a bin file](#build-from-a-bin-file) - [Build from a bin file](#build-from-a-bin-file)
- [PARAMETER](#parameter) - [PARAMETER](#parameter)
- [Valid Parameters and Values](#valid-parameters-and-values) - [Valid Parameters and Values](#valid-parameters-and-values)
@@ -49,7 +48,7 @@ INSTRUCTION arguments
An example of a `Modelfile` creating a mario blueprint: An example of a `Modelfile` creating a mario blueprint:
```modelfile ```modelfile
FROM llama3 FROM llama2
# sets the temperature to 1 [higher is more creative, lower is more coherent] # sets the temperature to 1 [higher is more creative, lower is more coherent]
PARAMETER temperature 1 PARAMETER temperature 1
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token # sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
@@ -68,25 +67,33 @@ To use this:
More examples are available in the [examples directory](../examples). More examples are available in the [examples directory](../examples).
To view the Modelfile of a given model, use the `ollama show --modelfile` command. ### `Modelfile`s in [ollama.com/library][1]
There are two ways to view `Modelfile`s underlying the models in [ollama.com/library][1]:
- Option 1: view a details page from a model's tags page:
1. Go to a particular model's tags (e.g. https://ollama.com/library/llama2/tags)
2. Click on a tag (e.g. https://ollama.com/library/llama2:13b)
3. Scroll down to "Layers"
- Note: if the [`FROM` instruction](#from-required) is not present,
it means the model was created from a local file
- Option 2: use `ollama show` to print the `Modelfile` for any local models like so:
```bash ```bash
> ollama show --modelfile llama3 > ollama show --modelfile llama2:13b
# Modelfile generated by "ollama show" # Modelfile generated by "ollama show"
# To build a new Modelfile based on this one, replace the FROM line with: # To build a new Modelfile based on this one, replace the FROM line with:
# FROM llama3:latest # FROM llama2:13b
FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29
TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|> FROM /root/.ollama/models/blobs/sha256:123abc
TEMPLATE """[INST] {{ if .System }}<<SYS>>{{ .System }}<</SYS>>
{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|> {{ end }}{{ .Prompt }} [/INST] """
SYSTEM """"""
{{ .Response }}<|eot_id|>""" PARAMETER stop [INST]
PARAMETER stop "<|start_header_id|>" PARAMETER stop [/INST]
PARAMETER stop "<|end_header_id|>" PARAMETER stop <<SYS>>
PARAMETER stop "<|eot_id|>" PARAMETER stop <</SYS>>
PARAMETER stop "<|reserved_special_token"
``` ```
## Instructions ## Instructions
@@ -99,10 +106,10 @@ The `FROM` instruction defines the base model to use when creating a model.
FROM <model name>:<tag> FROM <model name>:<tag>
``` ```
#### Build from llama3 #### Build from llama2
```modelfile ```modelfile
FROM llama3 FROM llama2
``` ```
A list of available base models: A list of available base models:
@@ -141,7 +148,6 @@ PARAMETER <parameter> <parametervalue>
| num_predict | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context) | int | num_predict 42 | | num_predict | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context) | int | num_predict 42 |
| top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 | | top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 |
| top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 | | top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 |
| min_p | Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0) | float | min_p 0.05 |
### TEMPLATE ### TEMPLATE

View File

@@ -25,38 +25,7 @@ chat_completion = client.chat.completions.create(
'content': 'Say this is a test', 'content': 'Say this is a test',
} }
], ],
model='llama3', model='llama2',
)
response = client.chat.completions.create(
model="llava",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "What's in this image?"},
{
"type": "image_url",
"image_url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
},
],
}
],
max_tokens=300,
)
completion = client.completions.create(
model="llama3",
prompt="Say this is a test",
)
list_completion = client.models.list()
model = client.models.retrieve("llama3")
embeddings = client.embeddings.create(
model="all-minilm",
input=["why is the sky blue?", "why is the grass green?"],
) )
``` ```
@@ -73,48 +42,18 @@ const openai = new OpenAI({
}) })
const chatCompletion = await openai.chat.completions.create({ const chatCompletion = await openai.chat.completions.create({
messages: [{ role: 'user', content: 'Say this is a test' }], messages: [{ role: 'user', content: 'Say this is a test' }],
model: 'llama3', model: 'llama2',
})
const response = await openai.chat.completions.create({
model: "llava",
messages: [
{
role: "user",
content: [
{ type: "text", text: "What's in this image?" },
{
type: "image_url",
image_url: "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
},
],
},
],
})
const completion = await openai.completions.create({
model: "llama3",
prompt: "Say this is a test.",
})
const listCompletion = await openai.models.list()
const model = await openai.models.retrieve("llama3")
const embedding = await openai.embeddings.create({
model: "all-minilm",
input: ["why is the sky blue?", "why is the grass green?"],
}) })
``` ```
### `curl` ### `curl`
``` shell ```
curl http://localhost:11434/v1/chat/completions \ curl http://localhost:11434/v1/chat/completions \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{ -d '{
"model": "llama3", "model": "llama2",
"messages": [ "messages": [
{ {
"role": "system", "role": "system",
@@ -126,48 +65,6 @@ curl http://localhost:11434/v1/chat/completions \
} }
] ]
}' }'
curl http://localhost:11434/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llava",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What'\''s in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"
}
}
]
}
],
"max_tokens": 300
}'
curl http://localhost:11434/v1/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llama3",
"prompt": "Say this is a test"
}'
curl http://localhost:11434/v1/models
curl http://localhost:11434/v1/models/llama3
curl http://localhost:11434/v1/embeddings \
-H "Content-Type: application/json" \
-d '{
"model": "all-minilm",
"input": ["why is the sky blue?", "why is the grass green?"]
}'
``` ```
## Endpoints ## Endpoints
@@ -180,8 +77,8 @@ curl http://localhost:11434/v1/embeddings \
- [x] Streaming - [x] Streaming
- [x] JSON mode - [x] JSON mode
- [x] Reproducible outputs - [x] Reproducible outputs
- [x] Vision - [ ] Vision
- [x] Tools (streaming support coming soon) - [ ] Function calling
- [ ] Logprobs - [ ] Logprobs
#### Supported request fields #### Supported request fields
@@ -189,10 +86,7 @@ curl http://localhost:11434/v1/embeddings \
- [x] `model` - [x] `model`
- [x] `messages` - [x] `messages`
- [x] Text `content` - [x] Text `content`
- [x] Image `content` - [ ] Array of `content` parts
- [x] Base64 encoded image
- [ ] Image URL
- [x] Array of `content` parts
- [x] `frequency_penalty` - [x] `frequency_penalty`
- [x] `presence_penalty` - [x] `presence_penalty`
- [x] `response_format` - [x] `response_format`
@@ -202,79 +96,24 @@ curl http://localhost:11434/v1/embeddings \
- [x] `temperature` - [x] `temperature`
- [x] `top_p` - [x] `top_p`
- [x] `max_tokens` - [x] `max_tokens`
- [x] `tools` - [ ] `logit_bias`
- [ ] `tools`
- [ ] `tool_choice` - [ ] `tool_choice`
- [ ] `logit_bias`
- [ ] `user`
- [ ] `n`
### `/v1/completions`
#### Supported features
- [x] Completions
- [x] Streaming
- [x] JSON mode
- [x] Reproducible outputs
- [ ] Logprobs
#### Supported request fields
- [x] `model`
- [x] `prompt`
- [x] `frequency_penalty`
- [x] `presence_penalty`
- [x] `seed`
- [x] `stop`
- [x] `stream`
- [x] `temperature`
- [x] `top_p`
- [x] `max_tokens`
- [x] `suffix`
- [ ] `best_of`
- [ ] `echo`
- [ ] `logit_bias`
- [ ] `user` - [ ] `user`
- [ ] `n` - [ ] `n`
#### Notes #### Notes
- `prompt` currently only accepts a string - Setting `seed` will always set `temperature` to `0`
- `finish_reason` will always be `stop`
### `/v1/models` - `usage.prompt_tokens` will be 0 for completions where prompt evaluation is cached
#### Notes
- `created` corresponds to when the model was last modified
- `owned_by` corresponds to the ollama username, defaulting to `"library"`
### `/v1/models/{model}`
#### Notes
- `created` corresponds to when the model was last modified
- `owned_by` corresponds to the ollama username, defaulting to `"library"`
### `/v1/embeddings`
#### Supported request fields
- [x] `model`
- [x] `input`
- [x] string
- [x] array of strings
- [ ] array of tokens
- [ ] array of token arrays
- [ ] `encoding format`
- [ ] `dimensions`
- [ ] `user`
## Models ## Models
Before using a model, pull it locally `ollama pull`: Before using a model, pull it locally `ollama pull`:
```shell ```shell
ollama pull llama3 ollama pull llama2
``` ```
### Default model names ### Default model names
@@ -282,7 +121,7 @@ ollama pull llama3
For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name: For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name:
``` ```
ollama cp llama3 gpt-3.5-turbo ollama cp llama2 gpt-3.5-turbo
``` ```
Afterwards, this new model name can be specified the `model` field: Afterwards, this new model name can be specified the `model` field:

Some files were not shown because too many files have changed in this diff Show More