diff --git a/.dockerignore b/.dockerignore index 43f2e07dd..fada7a9b4 100644 --- a/.dockerignore +++ b/.dockerignore @@ -7,3 +7,5 @@ llm/llama.cpp .env .cache test_data +llm/build +llama/build diff --git a/.gitattributes b/.gitattributes index a8436e9cf..f1c8bcb4d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,3 @@ llm/ext_server/* linguist-vendored +* text=auto +*.go text eol=lf diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 61ca3c433..4090f2066 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -102,7 +102,8 @@ jobs: with: name: generate-windows-cpu path: | - llm/build/**/bin/* + build/**/* + build/**/*.a llm/build/**/*.a dist/windows-amd64/** @@ -147,7 +148,7 @@ jobs: run: | $ErrorActionPreference = "Stop" write-host "downloading AMD HIP Installer" - Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" + Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" write-host "Installing AMD HIP" Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait write-host "Completed AMD HIP" @@ -176,7 +177,7 @@ jobs: with: name: generate-windows-rocm path: | - llm/build/**/bin/* + build/**/* dist/windows-amd64/** - uses: actions/upload-artifact@v4 with: @@ -187,6 +188,13 @@ jobs: generate-windows-cuda: environment: release runs-on: windows + strategy: + matrix: + cuda: + - version: "11" + url: 'https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe' + - version: "12" + url: 'https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe' env: KEY_CONTAINER: ${{ vars.KEY_CONTAINER }} steps: @@ -220,11 +228,11 @@ jobs: with: go-version-file: go.mod cache: true - - name: 'Install CUDA' + - name: 'Install CUDA ${{ matrix.cuda.version }}' run: | $ErrorActionPreference = "Stop" write-host "downloading CUDA Installer" - Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe" + Invoke-WebRequest -Uri "${{ matrix.cuda.url }}" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe" write-host "Installing CUDA" Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait write-host "Completed CUDA" @@ -256,16 +264,144 @@ jobs: cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\" - uses: actions/upload-artifact@v4 with: - name: generate-windows-cuda + name: generate-windows-cuda-${{ matrix.cuda.version }} path: | - llm/build/**/bin/* + build/**/* dist/windows-amd64/** - uses: actions/upload-artifact@v4 with: - name: windows-cuda-deps + name: windows-cuda-deps-${{ matrix.cuda.version }} path: dist/deps/* - # Import the prior generation steps and build the final windows assets + + # windows arm64 generate, go build, and zip file (no installer) + # Output of this build is aggregated into the final x86 build + # for a unified windows installer + windows-arm64: + runs-on: windows-arm64 + environment: release + env: + KEY_CONTAINER: ${{ vars.KEY_CONTAINER }} + steps: + # The current Windows arm64 beta image has effectively zero dev tools installed... + - name: Install git and gzip + run: | + Set-ExecutionPolicy Bypass -Scope Process -Force + [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072 + iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) + choco install -y --no-progress git gzip + echo "C:\Program Files\Git\cmd" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "C:\ProgramData\chocolatey\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + - name: Install Visual Studio 2022 + run: | + $components = @( + "Microsoft.VisualStudio.Component.CoreEditor", + "Microsoft.VisualStudio.Workload.CoreEditor", + "Microsoft.VisualStudio.Component.Roslyn.Compiler", + "Microsoft.Component.MSBuild", + "Microsoft.VisualStudio.Component.TextTemplating", + "Microsoft.VisualStudio.Component.Debugger.JustInTime", + "Microsoft.VisualStudio.Component.VC.CoreIde", + "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", + "Microsoft.VisualStudio.Component.Windows11SDK.22621", + "Microsoft.VisualStudio.Component.VC.Tools.ARM64EC", + "Microsoft.VisualStudio.Component.VC.Tools.ARM64", + "Microsoft.VisualStudio.Component.VC.ATL", + "Microsoft.VisualStudio.Component.VC.ATL.ARM64", + "Microsoft.VisualStudio.Component.Graphics", + "Microsoft.VisualStudio.Component.VC.Redist.14.Latest", + "Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core", + "Microsoft.VisualStudio.Component.Windows11Sdk.WindowsPerformanceToolkit", + "Microsoft.VisualStudio.Component.CppBuildInsights", + "Microsoft.VisualStudio.Component.VC.DiagnosticTools", + "Microsoft.VisualStudio.ComponentGroup.WebToolsExtensions.CMake", + "Microsoft.VisualStudio.Component.VC.CMake.Project", + "Microsoft.VisualStudio.Component.VC.ASAN", + "Microsoft.VisualStudio.Component.Vcpkg", + "Microsoft.VisualStudio.Workload.NativeDesktop" + ) + $config = @{ + "version" = "1.0" + "components" = $components + "extensions" = @() + } + $configPath = "${env:RUNNER_TEMP}\vsconfig" + $config | ConvertTo-Json | Out-File -FilePath $configPath + $bootstrapperFilePath = "${env:RUNNER_TEMP}\vs_community.exe" + write-host "Downloading Visual Studio 2022" + Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_community.exe" -outfile $bootstrapperFilePath + $bootstrapperArgumentList = ('/c', $bootstrapperFilePath, '--config', $configPath, '--quiet', '--wait' ) + write-host "Installing Visual Studio 2022" + $process = Start-Process -FilePath cmd.exe -ArgumentList $bootstrapperArgumentList -Wait -PassThru + $exitCode = $process.ExitCode + write-host $exitCode + # pacman in mingw/msys2 is ~broken on windows arm right now - hangs consistently during attempts to install + # so we'll use this alternative GCC binary + - name: Install llvm-mingw GCC + run: | + $gcc_url="https://github.com/mstorsjo/llvm-mingw/releases/download/20240619/llvm-mingw-20240619-ucrt-aarch64.zip" + write-host "Downloading llvm-mingw" + Invoke-WebRequest -Uri "${gcc_url}" -OutFile "${env:RUNNER_TEMP}\gcc.zip" + write-host "Unpacking llvm-mingw" + expand-archive -path "${env:RUNNER_TEMP}\gcc.zip" -destinationpath "c:\" + mv c:\llvm-mingw-* c:\llvm-mingw + echo "c:\llvm-mingw\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + - name: Verify GCC + run: | + echo $env:PATH + gcc --version + - uses: actions/checkout@v4 + - name: Set Version + run: | + $ver=${env:GITHUB_REF_NAME}.trim("v") + write-host VERSION=$ver | Out-File -FilePath ${env:GITHUB_ENV} -Encoding utf8 -Append + - uses: 'google-github-actions/auth@v2' + with: + project_id: 'ollama' + credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}' + - run: echo "${{ vars.OLLAMA_CERT }}" | Out-File -FilePath ollama_inc.crt -Encoding utf8 + - name: install Windows SDK 8.1 to get signtool + run: | + $ErrorActionPreference = "Stop" + write-host "downloading SDK" + Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe" + Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait + write-host "Win SDK 8.1 installed" + gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe' + - name: install signing plugin + run: | + $ErrorActionPreference = "Stop" + write-host "downloading plugin" + Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip" + Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\ + write-host "Installing plugin" + & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet + write-host "plugin installed" + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + cache: true + - run: go get ./... + - run: | + $gopath=(get-command go).source | split-path -parent + $gccpath=(get-command gcc).source | split-path -parent + & "C:\Program Files\Microsoft Visual Studio\2022\Community\Common7\Tools\Launch-VsDevShell.ps1" + cd $env:GITHUB_WORKSPACE + $env:CMAKE_SYSTEM_VERSION="10.0.22621.0" + $env:PATH="$gopath;$gccpath;$env:PATH;C:\Program Files\Microsoft Visual Studio\2022\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin" + echo $env:PATH + $env:ARCH="arm64" + .\scripts\build_windows.ps1 buildOllama buildApp gatherDependencies distZip + name: 'Windows Build' + - uses: actions/upload-artifact@v4 + with: + name: windows-arm64 + path: | + dist/windows-arm64/** + dist/windows-arm64-app.exe + dist/ollama-windows-arm64.zip + + # Import the prior generation steps plus the full arm64 build, and build the final windows assets build-windows: environment: release runs-on: windows @@ -273,6 +409,7 @@ jobs: - generate-windows-cuda - generate-windows-rocm - generate-windows-cpu + - windows-arm64 env: KEY_CONTAINER: ${{ vars.KEY_CONTAINER }} steps: @@ -314,17 +451,27 @@ jobs: name: generate-windows-cpu - uses: actions/download-artifact@v4 with: - name: generate-windows-cuda + name: generate-windows-cuda-11 - uses: actions/download-artifact@v4 with: - name: windows-cuda-deps + name: generate-windows-cuda-12 + - uses: actions/download-artifact@v4 + with: + name: windows-cuda-deps-11 + - uses: actions/download-artifact@v4 + with: + name: windows-cuda-deps-12 - uses: actions/download-artifact@v4 with: name: windows-rocm-deps - uses: actions/download-artifact@v4 with: name: generate-windows-rocm - - run: dir llm/build + - uses: actions/download-artifact@v4 + with: + name: windows-arm64 + path: dist + - run: dir build - run: | $gopath=(get-command go).source | split-path -parent & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1" @@ -345,9 +492,7 @@ jobs: environment: release runs-on: linux env: - OLLAMA_SKIP_MANIFEST_CREATE: '1' - BUILD_ARCH: amd64 - PUSH: '1' + PLATFORM: linux/amd64 steps: - uses: actions/checkout@v4 with: @@ -355,15 +500,8 @@ jobs: - name: Set Version shell: bash run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ vars.DOCKER_USER }} - password: ${{ secrets.DOCKER_ACCESS_TOKEN }} - run: | ./scripts/build_linux.sh - ./scripts/build_docker.sh - mv dist/deps/* dist/ - uses: actions/upload-artifact@v4 with: name: dist-linux-amd64 @@ -377,9 +515,7 @@ jobs: environment: release runs-on: linux-arm64 env: - OLLAMA_SKIP_MANIFEST_CREATE: '1' - BUILD_ARCH: arm64 - PUSH: '1' + PLATFORM: linux/arm64 steps: - uses: actions/checkout@v4 with: @@ -408,14 +544,8 @@ jobs: sudo usermod -aG docker $USER sudo apt-get install acl sudo setfacl --modify user:$USER:rw /var/run/docker.sock - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ vars.DOCKER_USER }} - password: ${{ secrets.DOCKER_ACCESS_TOKEN }} - run: | ./scripts/build_linux.sh - ./scripts/build_docker.sh - uses: actions/upload-artifact@v4 with: name: dist-linux-arm64 @@ -423,6 +553,178 @@ jobs: dist/*linux* !dist/*-cov + # Container image build + build-container-image: + environment: release + strategy: + matrix: + runner: + - linux + - linux-arm64 + runs-on: ${{ matrix.runner }} + env: + FINAL_IMAGE_REPO: ollama/ollama + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: 'Install Docker' + if: ${{ startsWith(matrix.runner, 'linux-arm64') }} + run: | + sudo apt-get update + sudo apt-get install -y ca-certificates curl + sudo install -m 0755 -d /etc/apt/keyrings + sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc + sudo chmod a+r /etc/apt/keyrings/docker.asc + echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt-get update + sudo apt-get install -y docker-ce docker-ce-cli containerd.io + sudo usermod -aG docker $USER + sudo apt-get install acl + sudo setfacl --modify user:$USER:rw /var/run/docker.sock + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FINAL_IMAGE_REPO }} + flavor: | + latest=false + tags: | + type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr + type=semver,pattern={{version}} + - name: Set Version + shell: bash + run: | + machine=$(uname -m) + case ${machine} in + x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;; + aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;; + esac >>$GITHUB_ENV + echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ vars.DOCKER_USER }} + password: ${{ secrets.DOCKER_ACCESS_TOKEN }} + - name: Build and push by digest + id: build + uses: docker/build-push-action@v6 + with: + context: "." + platforms: linux/${{ env.ARCH }} + build-args: | + GOFLAGS + outputs: type=image,name=${{ env.FINAL_IMAGE_REPO }},push-by-digest=true,name-canonical=true,push=true + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + merge: + environment: release + runs-on: linux + needs: + - build-container-image + env: + FINAL_IMAGE_REPO: ollama/ollama + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: digests-* + merge-multiple: true + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FINAL_IMAGE_REPO }} + flavor: | + latest=false + tags: | + type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr + type=semver,pattern={{version}} + - name: Set Version + shell: bash + run: | + machine=$(uname -m) + case ${machine} in + x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;; + aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;; + esac >>$GITHUB_ENV + echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ vars.DOCKER_USER }} + password: ${{ secrets.DOCKER_ACCESS_TOKEN }} + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.FINAL_IMAGE_REPO }}@sha256:%s ' *) + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.FINAL_IMAGE_REPO }}:${{ steps.meta.outputs.version }} + build-container-image-rocm: + environment: release + runs-on: linux + env: + FINAL_IMAGE_REPO: ollama/ollama + ARCH: amd64 + PLATFORM_PAIR: linux-amd64 + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FINAL_IMAGE_REPO }} + flavor: | + latest=false + tags: | + type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr + type=semver,pattern={{version}} + - name: Set Version + shell: bash + run: | + echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ vars.DOCKER_USER }} + password: ${{ secrets.DOCKER_ACCESS_TOKEN }} + - name: Build and push by digest + id: build + uses: docker/build-push-action@v6 + with: + context: "." + target: runtime-rocm + build-args: | + GOFLAGS + tags: ${{ env.FINAL_IMAGE_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION}}-rocm + push: true + # Aggregate all the assets and ship a release release: needs: @@ -435,8 +737,6 @@ jobs: permissions: contents: write env: - OLLAMA_SKIP_IMAGE_BUILD: '1' - PUSH: '1' GH_TOKEN: ${{ github.token }} steps: - uses: actions/checkout@v4 @@ -445,12 +745,6 @@ jobs: run: | echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV echo "RELEASE_VERSION=$(echo ${GITHUB_REF_NAME} | cut -f1 -d-)" >> $GITHUB_ENV - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ vars.DOCKER_USER }} - password: ${{ secrets.DOCKER_ACCESS_TOKEN }} - - run: ./scripts/build_docker.sh - name: Retrieve built artifact uses: actions/download-artifact@v4 with: @@ -459,7 +753,8 @@ jobs: merge-multiple: true - run: | ls -lh dist/ - (cd dist; sha256sum * > sha256sum.txt) + (cd dist; find . -type f | xargs sha256sum > ../sha256sum.txt) + mv sha256sum.txt dist/ cat dist/sha256sum.txt - name: Create or update Release run: | diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 29adf56f3..26dc732af 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -58,6 +58,7 @@ jobs: runs-on: ${{ matrix.os }} env: GOARCH: ${{ matrix.arch }} + CGO_ENABLED: '1' steps: - uses: actions/checkout@v4 - uses: actions/setup-go@v5 @@ -79,12 +80,7 @@ jobs: - run: go generate -x ./... if: ${{ ! startsWith(matrix.os, 'windows-') }} name: 'Unix Go Generate' - - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.os }}-${{ matrix.arch }}-libraries - path: | - llm/build/**/bin/* - llm/build/**/*.a + - run: go build . generate-cuda: needs: [changes] if: ${{ needs.changes.outputs.GENERATE_CUDA == 'True' }} @@ -112,19 +108,13 @@ jobs: go generate -x ./... env: OLLAMA_SKIP_CPU_GENERATE: '1' - - uses: actions/upload-artifact@v4 - with: - name: cuda-${{ matrix.cuda-version }}-libraries - path: | - llm/build/**/bin/* - dist/windows-amd64/** generate-rocm: needs: [changes] if: ${{ needs.changes.outputs.GENERATE_ROCM == 'True' }} strategy: matrix: rocm-version: - - '6.1.1' + - '6.1.2' runs-on: linux container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }} steps: @@ -145,12 +135,6 @@ jobs: go generate -x ./... env: OLLAMA_SKIP_CPU_GENERATE: '1' - - uses: actions/upload-artifact@v4 - with: - name: rocm-${{ matrix.rocm-version }}-libraries - path: | - llm/build/**/bin/* - dist/windows-amd64/** # ROCm generation step generate-windows-rocm: @@ -167,7 +151,7 @@ jobs: run: | $ErrorActionPreference = "Stop" write-host "downloading AMD HIP Installer" - Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" + Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" write-host "Installing AMD HIP" Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait write-host "Completed AMD HIP" @@ -187,7 +171,6 @@ jobs: name: go generate env: OLLAMA_SKIP_CPU_GENERATE: '1' - # TODO - do we need any artifacts? # CUDA generation step generate-windows-cuda: @@ -229,7 +212,6 @@ jobs: go generate -x ./... env: OLLAMA_SKIP_CPU_GENERATE: '1' - # TODO - do we need any artifacts? lint: strategy: @@ -261,17 +243,9 @@ jobs: arm64) echo ARCH=arm64 ;; esac >>$GITHUB_ENV shell: bash - - run: | - mkdir -p llm/build/linux/$ARCH/stub/bin - touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server - if: ${{ startsWith(matrix.os, 'ubuntu-') }} - - run: | - mkdir -p llm/build/darwin/$ARCH/stub/bin - touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server - if: ${{ startsWith(matrix.os, 'macos-') }} - uses: golangci/golangci-lint-action@v6 with: - args: --timeout 8m0s -v ${{ startsWith(matrix.os, 'windows-') && '' || '--disable gofmt --disable goimports' }} + args: --timeout 8m0s -v test: strategy: matrix: @@ -299,23 +273,10 @@ jobs: cache: true - run: | case ${{ matrix.arch }} in - amd64) echo ARCH=x86_64 ;; + amd64) echo ARCH=amd64 ;; arm64) echo ARCH=arm64 ;; esac >>$GITHUB_ENV shell: bash - - run: | - mkdir -p llm/build/linux/$ARCH/stub/bin - touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server - if: ${{ startsWith(matrix.os, 'ubuntu-') }} - - run: | - mkdir -p llm/build/darwin/$ARCH/stub/bin - touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server - if: ${{ startsWith(matrix.os, 'macos-') }} - shell: bash - run: go generate ./... - run: go build - run: go test -v ./... - - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.os }}-binaries - path: ollama diff --git a/.gitignore b/.gitignore index 0d826ab61..87f8b0072 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,7 @@ ggml-metal.metal test_data *.crt llm/build +build/*/*/* +!build/**/placeholder +llama/build __debug_bin* \ No newline at end of file diff --git a/.golangci.yaml b/.golangci.yaml index cfe06e07a..2e0ed3c7b 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -7,22 +7,35 @@ linters: - bodyclose - containedctx - contextcheck + - errcheck - exportloopref + - gci - gocheckcompilerdirectives - # conditionally enable this on linux/macos - # - gofmt - # - goimports + - gofmt + - gofumpt + - gosimple + - govet + - ineffassign - intrange + - makezero - misspell - nilerr - nolintlint - nosprintfhostport - - testifylint + - staticcheck + - tenv - unconvert - unused + - usestdlibvars - wastedassign - whitespace - - usestdlibvars +linters-settings: + gci: + sections: [standard, default, localmodule] + staticcheck: + checks: + - all + - -SA1019 # omit Deprecated check severity: default-severity: error rules: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..f003a69da --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,37 @@ +# Contributing to Ollama + +Thank you for your interest in contributing to Ollama! Here are a few guidelines to help get you started. + +## Set up + +See the [development documentation](./docs/development.md) for instructions on how to build and run Ollama locally. + +## Pull requests + +### Ideal issues + +* [Bugs](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Abug): issues where Ollama stops working or where it results in an unexpected error. +* [Performance](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Aperformance): issues to make Ollama faster at model inference, downloading or uploading. +* [Security](https://github.com/ollama/ollama/blob/main/SECURITY.md): issues that could lead to a security vulnerability. As mentioned in [SECURITY.md](https://github.com/ollama/ollama/blob/main/SECURITY.md), please do not disclose security vulnerabilities publicly. + +### Issues that are harder to review + +* New features: new features (e.g. API fields, environment variables) add surface area to Ollama and make it harder to maintain in the long run as they cannot be removed without potentially breaking users in the future. +* Refactoring: large code improvements are important, but can be harder or take longer to review and merge. +* Documentation: small updates to fill in or correct missing documentation is helpful, however large documentation additions can be hard to maintain over time. + +### Issues that may not be accepted + +* Changes that break backwards compatibility in Ollama's API (including the OpenAI-compatible API) +* Changes that add significant friction to the user experience +* Changes that create a large future maintenance burden for maintainers and contributors + +### Best practices + +* Commit messages: please leave both a title and a description in your commit messages. The title should be a short summary of the changes, with a leading word that explains the section of the code being changed (e.g. `api: fix parsing of prompt field`) . In the description, leave a short 2-3 sentences that explain more about the change and its impact. +* Tests: please add test coverage to changes where possible. +* Minimize dependencies: avoid adding new dependencies unless absolutely necessary. + +## Need help? + +If you need help with anything, feel free to reach out to us on our [Discord server](https://discord.gg/ollama). diff --git a/Dockerfile b/Dockerfile index 98a3ddfd2..0f43e618d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,10 @@ -ARG GOLANG_VERSION=1.22.1 +ARG GOLANG_VERSION=1.22.5 ARG CMAKE_VERSION=3.22.1 -# this CUDA_VERSION corresponds with the one specified in docs/gpu.md -ARG CUDA_VERSION=11.3.1 -ARG ROCM_VERSION=6.1.1 +ARG CUDA_VERSION_11=11.3.1 +ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86" +ARG CUDA_VERSION_12=12.4.0 +ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a" +ARG ROCM_VERSION=6.1.2 # Copy the minimal context we need to run the generate scripts FROM scratch AS llm-code @@ -10,131 +12,243 @@ COPY .git .git COPY .gitmodules .gitmodules COPY llm llm -FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64 +FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_11-devel-centos7 AS cuda-11-build-amd64 ARG CMAKE_VERSION COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh -ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH +ENV PATH=/opt/rh/devtoolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS -RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh +ARG CUDA_V11_ARCHITECTURES +ENV GOARCH=amd64 +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 \ + OLLAMA_SKIP_CPU_GENERATE=1 \ + CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \ + CUDA_VARIANT="_v11" \ + bash gen_linux.sh -FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64 +FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_12-devel-centos7 AS cuda-12-build-amd64 ARG CMAKE_VERSION COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh -ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH +ENV PATH=/opt/rh/devtoolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS -RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh +ARG CUDA_V12_ARCHITECTURES +ENV GOARCH=amd64 +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 \ + OLLAMA_SKIP_CPU_GENERATE=1 \ + CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \ + CUDA_VARIANT="_v12" \ + OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \ + bash gen_linux.sh + +FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-runner-arm64 +ARG CMAKE_VERSION +COPY ./scripts/rh_linux_deps.sh / +RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh +ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH +COPY --from=llm-code / /go/src/github.com/ollama/ollama/ +WORKDIR /go/src/github.com/ollama/ollama/llm/generate +ARG CGO_CFLAGS +ARG CUDA_V11_ARCHITECTURES +ENV GOARCH=arm64 +RUN OLLAMA_SKIP_STATIC_GENERATE=1 \ + OLLAMA_SKIP_CPU_GENERATE=1 \ + CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \ + CUDA_VARIANT="_v11" \ + bash gen_linux.sh + +FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-runner-arm64 +ARG CMAKE_VERSION +COPY ./scripts/rh_linux_deps.sh / +RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh +ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH +COPY --from=llm-code / /go/src/github.com/ollama/ollama/ +WORKDIR /go/src/github.com/ollama/ollama/llm/generate +ARG CGO_CFLAGS +ARG CUDA_V12_ARCHITECTURES +ENV GOARCH=arm64 +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 \ + OLLAMA_SKIP_CPU_GENERATE=1 \ + CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \ + CUDA_VARIANT="_v12" \ + OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \ + bash gen_linux.sh + FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64 ARG CMAKE_VERSION COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh -ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH -ENV LIBRARY_PATH /opt/amdgpu/lib64 +ENV PATH=/opt/rh/devtoolset-10/root/usr/bin:$PATH +ENV LIBRARY_PATH=/opt/amdgpu/lib64 COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS ARG AMDGPU_TARGETS -RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh -RUN mkdir /tmp/scratch && \ - for dep in $(zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm*/bin/deps.txt.gz) ; do \ - cp ${dep} /tmp/scratch/ || exit 1 ; \ - done && \ - (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \ - mkdir -p /go/src/github.com/ollama/ollama/dist/deps/ && \ - (cd /tmp/scratch/ && tar czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz . ) - +ENV GOARCH=amd64 +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh +RUN mkdir -p ../../dist/linux-amd64-rocm/lib/ollama && \ + (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64-rocm/lib/ollama && tar xf - ) FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64 ARG CMAKE_VERSION ARG GOLANG_VERSION COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh -ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH +ENV PATH=/opt/rh/devtoolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ ARG OLLAMA_CUSTOM_CPU_DEFS ARG CGO_CFLAGS +ENV GOARCH=amd64 WORKDIR /go/src/github.com/ollama/ollama/llm/generate FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64 -RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_CPU_TARGET="static" bash gen_linux.sh FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64 -RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64 -RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" bash gen_linux.sh FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64 -RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" bash gen_linux.sh -FROM --platform=linux/arm64 centos:7 AS cpu-builder-arm64 +FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64 ARG CMAKE_VERSION ARG GOLANG_VERSION COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh -ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH +ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ ARG OLLAMA_CUSTOM_CPU_DEFS ARG CGO_CFLAGS +ENV GOARCH=arm64 WORKDIR /go/src/github.com/ollama/ollama/llm/generate FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64 -RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_CPU_TARGET="static" bash gen_linux.sh FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64 -RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh -# Intermediate stage used for ./scripts/build_linux.sh +# Intermediate stages used for ./scripts/build_linux.sh FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64 -ENV CGO_ENABLED 1 +ENV CGO_ENABLED=1 WORKDIR /go/src/github.com/ollama/ollama COPY . . -COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/ +COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/ llm/build/ +COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/build/ build/ +COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/build/ build/ +COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ +COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/build/ build/ +COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ +COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/build/ build/ +COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ +COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/build/ build/ ARG GOFLAGS ARG CGO_CFLAGS -RUN go build -trimpath . +RUN --mount=type=cache,target=/root/.ccache \ + go build -trimpath -o dist/linux-amd64/bin/ollama . +RUN cd dist/linux-$GOARCH && \ + tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz +RUN cd dist/linux-$GOARCH-rocm && \ + tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz -# Intermediate stage used for ./scripts/build_linux.sh FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64 -ENV CGO_ENABLED 1 +ENV CGO_ENABLED=1 ARG GOLANG_VERSION WORKDIR /go/src/github.com/ollama/ollama COPY . . -COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/ llm/build/ +COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ +COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/build/ build/ +COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ +COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/build/ build/ ARG GOFLAGS ARG CGO_CFLAGS -RUN go build -trimpath . +RUN --mount=type=cache,target=/root/.ccache \ + go build -trimpath -o dist/linux-arm64/bin/ollama . +RUN cd dist/linux-$GOARCH && \ + tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz -# Runtime stages -FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64 -RUN apt-get update && apt-get install -y ca-certificates -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama -FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64 -RUN apt-get update && apt-get install -y ca-certificates -COPY --from=build-arm64 /go/src/github.com/ollama/ollama/ollama /bin/ollama +FROM --platform=linux/amd64 scratch AS dist-amd64 +COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz / +FROM --platform=linux/arm64 scratch AS dist-arm64 +COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz / +FROM dist-$TARGETARCH as dist -# Radeon images are much larger so we keep it distinct from the CPU/CUDA image -FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm -RUN update-pciids -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama + +# Optimized container images do not cary nested payloads +FROM --platform=linux/amd64 static-build-amd64 AS container-build-amd64 +WORKDIR /go/src/github.com/ollama/ollama +COPY . . +ARG GOFLAGS +ARG CGO_CFLAGS +RUN --mount=type=cache,target=/root/.ccache \ + go build -trimpath -o dist/linux-amd64/bin/ollama . + +FROM --platform=linux/arm64 static-build-arm64 AS container-build-arm64 +WORKDIR /go/src/github.com/ollama/ollama +COPY . . +ARG GOFLAGS +ARG CGO_CFLAGS +RUN --mount=type=cache,target=/root/.ccache \ + go build -trimpath -o dist/linux-arm64/bin/ollama . + +FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64 +RUN apt-get update && \ + apt-get install -y ca-certificates && \ + apt-get clean && rm -rf /var/lib/apt/lists/* +COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ +COPY --from=cpu-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ + +FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64 +RUN apt-get update && \ + apt-get install -y ca-certificates && \ + apt-get clean && rm -rf /var/lib/apt/lists/* +COPY --from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/ +COPY --from=cpu-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ +COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ +COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ + +# ROCm libraries larger so we keep it distinct from the CPU/CUDA image +FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm +# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer +# across releases +COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/ +RUN apt-get update && \ + apt-get install -y ca-certificates && \ + apt-get clean && rm -rf /var/lib/apt/lists/* +COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ +COPY --from=cpu-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ EXPOSE 11434 -ENV OLLAMA_HOST 0.0.0.0 +ENV OLLAMA_HOST=0.0.0.0 ENTRYPOINT ["/bin/ollama"] CMD ["serve"] FROM runtime-$TARGETARCH EXPOSE 11434 -ENV OLLAMA_HOST 0.0.0.0 +ENV OLLAMA_HOST=0.0.0.0 ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility diff --git a/README.md b/README.md index 62f5cd65c..466f315ad 100644 --- a/README.md +++ b/README.md @@ -35,10 +35,10 @@ The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `olla ## Quickstart -To run and chat with [Llama 3](https://ollama.com/library/llama3): +To run and chat with [Llama 3.1](https://ollama.com/library/llama3.1): ``` -ollama run llama3 +ollama run llama3.1 ``` ## Model library @@ -49,10 +49,12 @@ Here are some example models that can be downloaded: | Model | Parameters | Size | Download | | ------------------ | ---------- | ----- | ------------------------------ | -| Llama 3 | 8B | 4.7GB | `ollama run llama3` | -| Llama 3 | 70B | 40GB | `ollama run llama3:70b` | +| Llama 3.1 | 8B | 4.7GB | `ollama run llama3.1` | +| Llama 3.1 | 70B | 40GB | `ollama run llama3.1:70b` | +| Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` | | Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` | | Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` | +| Gemma 2 | 2B | 1.6GB | `ollama run gemma2:2b` | | Gemma 2 | 9B | 5.5GB | `ollama run gemma2` | | Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` | | Mistral | 7B | 4.1GB | `ollama run mistral` | @@ -64,7 +66,8 @@ Here are some example models that can be downloaded: | LLaVA | 7B | 4.5GB | `ollama run llava` | | Solar | 10.7B | 6.1GB | `ollama run solar` | -> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models. +> [!NOTE] +> You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models. ## Customize a model @@ -96,16 +99,16 @@ See the [guide](docs/import.md) on importing models for more information. ### Customize a prompt -Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3` model: +Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.1` model: ``` -ollama pull llama3 +ollama pull llama3.1 ``` Create a `Modelfile`: ``` -FROM llama3 +FROM llama3.1 # set the temperature to 1 [higher is more creative, lower is more coherent] PARAMETER temperature 1 @@ -140,7 +143,7 @@ ollama create mymodel -f ./Modelfile ### Pull a model ``` -ollama pull llama3 +ollama pull llama3.1 ``` > This command can also be used to update a local model. Only the diff will be pulled. @@ -148,13 +151,13 @@ ollama pull llama3 ### Remove a model ``` -ollama rm llama3 +ollama rm llama3.1 ``` ### Copy a model ``` -ollama cp llama3 my-model +ollama cp llama3.1 my-model ``` ### Multiline input @@ -171,21 +174,21 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol ### Multimodal models ``` ->>> What's in this image? /Users/jmorgan/Desktop/smile.png +ollama run llava "What's in this image? /Users/jmorgan/Desktop/smile.png" The image features a yellow smiley face, which is likely the central focus of the picture. ``` ### Pass the prompt as an argument ``` -$ ollama run llama3 "Summarize this file: $(cat README.md)" +$ ollama run llama3.1 "Summarize this file: $(cat README.md)" Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications. ``` ### Show model information ``` -ollama show llama3 +ollama show llama3.1 ``` ### List models on your computer @@ -194,6 +197,18 @@ ollama show llama3 ollama list ``` +### List which models are currently loaded + +``` +ollama ps +``` + +### Stop a model which is currently running + +``` +ollama stop llama3.1 +``` + ### Start Ollama `ollama serve` is used when you want to start ollama without running the desktop application. @@ -213,7 +228,7 @@ Next, start the server: Finally, in a separate shell, run a model: ``` -./ollama run llama3 +./ollama run llama3.1 ``` ## REST API @@ -224,7 +239,7 @@ Ollama has a REST API for running and managing models. ``` curl http://localhost:11434/api/generate -d '{ - "model": "llama3", + "model": "llama3.1", "prompt":"Why is the sky blue?" }' ``` @@ -233,7 +248,7 @@ curl http://localhost:11434/api/generate -d '{ ``` curl http://localhost:11434/api/chat -d '{ - "model": "llama3", + "model": "llama3.1", "messages": [ { "role": "user", "content": "why is the sky blue?" } ] @@ -292,7 +307,24 @@ See the [API documentation](./docs/api.md) for all endpoints. - [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama) - [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS) - [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama) +- [AiLama](https://github.com/zeyoyt/ailama) (A Discord User App that allows you to interact with Ollama anywhere in discord ) - [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama) +- [Painting Droid](https://github.com/mateuszmigas/painting-droid) (Painting app with AI integrations) +- [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS) +- [AI Studio](https://github.com/MindWorkAI/AI-Studio) +- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client) +- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows) +- [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac) +- [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend) +- [Go-CREW](https://www.jonathanhecl.com/go-crew/) (Powerful Offline RAG in Golang) +- [PartCAD](https://github.com/openvmp/partcad/) (CAD model generation with OpenSCAD and CadQuery) +- [Ollama4j Web UI](https://github.com/ollama4j/ollama4j-web-ui) - Java-based Web UI for Ollama built with Vaadin, Spring Boot and Ollama4j +- [PyOllaMx](https://github.com/kspviswa/pyOllaMx) - macOS application capable of chatting with both Ollama and Apple MLX models. +- [Claude Dev](https://github.com/saoudrizwan/claude-dev) - VSCode extension for multi-file/whole-repo coding +- [Cherry Studio](https://github.com/kangfenmao/cherry-studio) (Desktop client with Ollama support) +- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption) +- [Archyve](https://github.com/nickthecook/archyve) (RAG-enabling document library) +- [crewAI with Mesop](https://github.com/rapidarchitect/ollama-crew-mesop) (Mesop Web Interface to run crewAI with Ollama) ### Terminal @@ -316,6 +348,12 @@ See the [API documentation](./docs/api.md) for all endpoints. - [tlm](https://github.com/yusufcanb/tlm) - [podman-ollama](https://github.com/ericcurtin/podman-ollama) - [gollama](https://github.com/sammcj/gollama) +- [Ollama eBook Summary](https://github.com/cognitivetech/ollama-ebook-summary/) +- [Ollama Mixture of Experts (MOE) in 50 lines of code](https://github.com/rapidarchitect/ollama_moe) +- [vim-intelligence-bridge](https://github.com/pepo-ec/vim-intelligence-bridge) Simple interaction of "Ollama" with the Vim editor + +### Apple Vision Pro +- [Enchanted](https://github.com/AugustDev/enchanted) ### Database @@ -325,22 +363,28 @@ See the [API documentation](./docs/api.md) for all endpoints. ### Package managers - [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/) +- [Gentoo](https://github.com/gentoo/guru/tree/master/app-misc/ollama) - [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama) - [Guix channel](https://codeberg.org/tusharhero/ollama-guix) +- [Nix package](https://search.nixos.org/packages?channel=24.05&show=ollama&from=0&size=50&sort=relevance&type=packages&query=ollama) +- [Flox](https://flox.dev/blog/ollama-part-one) ### Libraries - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa) +- [Firebase Genkit](https://firebase.google.com/docs/genkit/plugins/ollama) +- [crewAI](https://github.com/crewAIInc/crewAI) - [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example) - [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java) - [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs) - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html) - [LiteLLM](https://github.com/BerriAI/litellm) +- [OllamaFarm for Go](https://github.com/presbrey/ollamafarm) - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp) - [Ollama for Ruby](https://github.com/gbaptista/ollama-ai) - [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs) - [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp) -- [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j) +- [Ollama4j for Java](https://github.com/ollama4j/ollama4j) - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama) - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit) - [Ollama for Dart](https://github.com/breitburg/dart-ollama) @@ -357,11 +401,17 @@ See the [API documentation](./docs/api.md) for all endpoints. - [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama) - [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) with an [example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama) - [LlamaScript](https://github.com/Project-Llama/llamascript) +- [Gollm](https://docs.gollm.co/examples/ollama-example) +- [Ollamaclient for Golang](https://github.com/xyproto/ollamaclient) +- [High-level function abstraction in Go](https://gitlab.com/tozd/go/fun) +- [Ollama PHP](https://github.com/ArdaGnsrn/ollama-php) +- [Agents-Flex for Java](https://github.com/agents-flex/agents-flex) with [example](https://github.com/agents-flex/agents-flex/tree/main/agents-flex-llm/agents-flex-llm-ollama/src/test/java/com/agentsflex/llm/ollama) ### Mobile - [Enchanted](https://github.com/AugustDev/enchanted) - [Maid](https://github.com/Mobile-Artificial-Intelligence/maid) +- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption) ### Extensions & Plugins @@ -384,13 +434,18 @@ See the [API documentation](./docs/api.md) for all endpoints. - [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama) - [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot) - [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama) -- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace) +- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and Hugging Face) - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension) +- [Plasmoid Ollama Control](https://github.com/imoize/plasmoid-ollamacontrol) (KDE Plasma extension that allows you to quickly manage/control Ollama model) - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend) - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support) - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation) - [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities. - [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server) +- [vnc-lm](https://github.com/jk011ru/vnc-lm) (A containerized Discord bot with support for attachments and web links) +- [LSP-AI](https://github.com/SilasMarvin/lsp-ai) (Open-source language server for AI-powered functionality) +- [QodeAssist](https://github.com/Palm1r/QodeAssist) (AI-powered coding assistant plugin for Qt Creator) +- [Obsidian Quiz Generator plugin](https://github.com/ECuiDev/obsidian-quiz-generator) ### Supported backends diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..d38bb7c4e --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,25 @@ +# Security + +The Ollama maintainer team takes security seriously and will actively work to resolve security issues. + +## Reporting a vulnerability + +If you discover a security vulnerability, please do not open a public issue. Instead, please report it by emailing hello@ollama.com. We ask that you give us sufficient time to investigate and address the vulnerability before disclosing it publicly. + +Please include the following details in your report: +- A description of the vulnerability +- Steps to reproduce the issue +- Your assessment of the potential impact +- Any possible mitigations + +## Security best practices + +While the maintainer team does their best to secure Ollama, users are encouraged to implement their own security best practices, such as: + +- Regularly updating to the latest version of Ollama +- Securing access to hosted instances of Ollama +- Monitoring systems for unusual activity + +## Contact + +For any other questions or concerns related to security, please contact us at hello@ollama.com diff --git a/api/client.go b/api/client.go index fccbc9ad7..2528fb21f 100644 --- a/api/client.go +++ b/api/client.go @@ -18,9 +18,9 @@ import ( "bytes" "context" "encoding/json" + "errors" "fmt" "io" - "net" "net/http" "net/url" "runtime" @@ -63,13 +63,8 @@ func checkError(resp *http.Response, body []byte) error { // If the variable is not specified, a default ollama host and port will be // used. func ClientFromEnvironment() (*Client, error) { - ollamaHost := envconfig.Host - return &Client{ - base: &url.URL{ - Scheme: ollamaHost.Scheme, - Host: net.JoinHostPort(ollamaHost.Host, ollamaHost.Port), - }, + base: envconfig.Host(), http: http.DefaultClient, }, nil } @@ -178,7 +173,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f } if errorResponse.Error != "" { - return fmt.Errorf(errorResponse.Error) + return errors.New(errorResponse.Error) } if response.StatusCode >= http.StatusBadRequest { @@ -303,7 +298,7 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) { return &lr, nil } -// List running models. +// ListRunning lists running models. func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) { var lr ProcessResponse if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil { @@ -338,7 +333,7 @@ func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, err return &resp, nil } -// Hearbeat checks if the server has started and is responsive; if yes, it +// Heartbeat checks if the server has started and is responsive; if yes, it // returns nil, otherwise an error. func (c *Client) Heartbeat(ctx context.Context) error { if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil { @@ -347,7 +342,16 @@ func (c *Client) Heartbeat(ctx context.Context) error { return nil } -// Embeddings generates embeddings from a model. +// Embed generates embeddings from a model. +func (c *Client) Embed(ctx context.Context, req *EmbedRequest) (*EmbedResponse, error) { + var resp EmbedResponse + if err := c.do(ctx, http.MethodPost, "/api/embed", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +// Embeddings generates an embedding from a model. func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error) { var resp EmbeddingResponse if err := c.do(ctx, http.MethodPost, "/api/embeddings", req, &resp); err != nil { diff --git a/api/client_test.go b/api/client_test.go index fe9fd74f7..23fe9334b 100644 --- a/api/client_test.go +++ b/api/client_test.go @@ -2,8 +2,6 @@ package api import ( "testing" - - "github.com/ollama/ollama/envconfig" ) func TestClientFromEnvironment(t *testing.T) { @@ -33,7 +31,6 @@ func TestClientFromEnvironment(t *testing.T) { for k, v := range testCases { t.Run(k, func(t *testing.T) { t.Setenv("OLLAMA_HOST", v.value) - envconfig.LoadConfig() client, err := ClientFromEnvironment() if err != v.err { diff --git a/api/types.go b/api/types.go index 95ed5d37e..df7bab210 100644 --- a/api/types.go +++ b/api/types.go @@ -47,6 +47,9 @@ type GenerateRequest struct { // Prompt is the textual prompt to send to the model. Prompt string `json:"prompt"` + // Suffix is the text that comes after the inserted text. + Suffix string `json:"suffix"` + // System overrides the model's default system message/prompt. System string `json:"system"` @@ -97,17 +100,85 @@ type ChatRequest struct { // followin the request. KeepAlive *Duration `json:"keep_alive,omitempty"` + // Tools is an optional list of tools the model has access to. + Tools `json:"tools,omitempty"` + // Options lists model-specific options. Options map[string]interface{} `json:"options"` } +type Tools []Tool + +func (t Tools) String() string { + bts, _ := json.Marshal(t) + return string(bts) +} + +func (t Tool) String() string { + bts, _ := json.Marshal(t) + return string(bts) +} + // Message is a single message in a chat sequence. The message contains the // role ("system", "user", or "assistant"), the content and an optional list // of images. type Message struct { - Role string `json:"role"` - Content string `json:"content"` - Images []ImageData `json:"images,omitempty"` + Role string `json:"role"` + Content string `json:"content"` + Images []ImageData `json:"images,omitempty"` + ToolCalls []ToolCall `json:"tool_calls,omitempty"` +} + +func (m *Message) UnmarshalJSON(b []byte) error { + type Alias Message + var a Alias + if err := json.Unmarshal(b, &a); err != nil { + return err + } + + *m = Message(a) + m.Role = strings.ToLower(m.Role) + return nil +} + +type ToolCall struct { + Function ToolCallFunction `json:"function"` +} + +type ToolCallFunction struct { + Name string `json:"name"` + Arguments ToolCallFunctionArguments `json:"arguments"` +} + +type ToolCallFunctionArguments map[string]any + +func (t *ToolCallFunctionArguments) String() string { + bts, _ := json.Marshal(t) + return string(bts) +} + +type Tool struct { + Type string `json:"type"` + Function ToolFunction `json:"function"` +} + +type ToolFunction struct { + Name string `json:"name"` + Description string `json:"description"` + Parameters struct { + Type string `json:"type"` + Required []string `json:"required"` + Properties map[string]struct { + Type string `json:"type"` + Description string `json:"description"` + Enum []string `json:"enum,omitempty"` + } `json:"properties"` + } `json:"parameters"` +} + +func (t *ToolFunction) String() string { + bts, _ := json.Marshal(t) + return string(bts) } // ChatResponse is the response returned by [Client.Chat]. Its fields are @@ -143,6 +214,7 @@ type Options struct { NumPredict int `json:"num_predict,omitempty"` TopK int `json:"top_k,omitempty"` TopP float32 `json:"top_p,omitempty"` + MinP float32 `json:"min_p,omitempty"` TFSZ float32 `json:"tfs_z,omitempty"` TypicalP float32 `json:"typical_p,omitempty"` RepeatLastN int `json:"repeat_last_n,omitempty"` @@ -159,49 +231,45 @@ type Options struct { // Runner options which must be set when the model is loaded into memory type Runner struct { - UseNUMA bool `json:"numa,omitempty"` - NumCtx int `json:"num_ctx,omitempty"` - NumBatch int `json:"num_batch,omitempty"` - NumGPU int `json:"num_gpu,omitempty"` - MainGPU int `json:"main_gpu,omitempty"` - LowVRAM bool `json:"low_vram,omitempty"` - F16KV bool `json:"f16_kv,omitempty"` - LogitsAll bool `json:"logits_all,omitempty"` - VocabOnly bool `json:"vocab_only,omitempty"` - UseMMap TriState `json:"use_mmap,omitempty"` - UseMLock bool `json:"use_mlock,omitempty"` - NumThread int `json:"num_thread,omitempty"` + NumCtx int `json:"num_ctx,omitempty"` + NumBatch int `json:"num_batch,omitempty"` + NumGPU int `json:"num_gpu,omitempty"` + MainGPU int `json:"main_gpu,omitempty"` + LowVRAM bool `json:"low_vram,omitempty"` + F16KV bool `json:"f16_kv,omitempty"` + LogitsAll bool `json:"logits_all,omitempty"` + VocabOnly bool `json:"vocab_only,omitempty"` + UseMMap *bool `json:"use_mmap,omitempty"` + UseMLock bool `json:"use_mlock,omitempty"` + NumThread int `json:"num_thread,omitempty"` } -type TriState int +// EmbedRequest is the request passed to [Client.Embed]. +type EmbedRequest struct { + // Model is the model name. + Model string `json:"model"` -const ( - TriStateUndefined TriState = -1 - TriStateFalse TriState = 0 - TriStateTrue TriState = 1 -) + // Input is the input to embed. + Input any `json:"input"` -func (b *TriState) UnmarshalJSON(data []byte) error { - var v bool - if err := json.Unmarshal(data, &v); err != nil { - return err - } - if v { - *b = TriStateTrue - } - *b = TriStateFalse - return nil + // KeepAlive controls how long the model will stay loaded in memory following + // this request. + KeepAlive *Duration `json:"keep_alive,omitempty"` + + Truncate *bool `json:"truncate,omitempty"` + + // Options lists model-specific options. + Options map[string]interface{} `json:"options"` } -func (b *TriState) MarshalJSON() ([]byte, error) { - if *b == TriStateUndefined { - return nil, nil - } - var v bool - if *b == TriStateTrue { - v = true - } - return json.Marshal(v) +// EmbedResponse is the response from [Client.Embed]. +type EmbedResponse struct { + Model string `json:"model"` + Embeddings [][]float32 `json:"embeddings"` + + TotalDuration time.Duration `json:"total_duration,omitempty"` + LoadDuration time.Duration `json:"load_duration,omitempty"` + PromptEvalCount int `json:"prompt_eval_count,omitempty"` } // EmbeddingRequest is the request passed to [Client.Embeddings]. @@ -228,15 +296,17 @@ type EmbeddingResponse struct { // CreateRequest is the request passed to [Client.Create]. type CreateRequest struct { Model string `json:"model"` - Path string `json:"path"` Modelfile string `json:"modelfile"` Stream *bool `json:"stream,omitempty"` Quantize string `json:"quantize,omitempty"` - // Name is deprecated, see Model + // Deprecated: set the model name with Model instead Name string `json:"name"` - // Quantization is deprecated, see Quantize + // Deprecated: set the file content with Modelfile instead + Path string `json:"path"` + + // Deprecated: use Quantize instead Quantization string `json:"quantization,omitempty"` } @@ -244,20 +314,22 @@ type CreateRequest struct { type DeleteRequest struct { Model string `json:"model"` - // Name is deprecated, see Model + // Deprecated: set the model name with Model instead Name string `json:"name"` } // ShowRequest is the request passed to [Client.Show]. type ShowRequest struct { - Model string `json:"model"` - System string `json:"system"` + Model string `json:"model"` + System string `json:"system"` + + // Template is deprecated Template string `json:"template"` Verbose bool `json:"verbose"` Options map[string]interface{} `json:"options"` - // Name is deprecated, see Model + // Deprecated: set the model name with Model instead Name string `json:"name"` } @@ -289,7 +361,7 @@ type PullRequest struct { Password string `json:"password"` Stream *bool `json:"stream,omitempty"` - // Name is deprecated, see Model + // Deprecated: set the model name with Model instead Name string `json:"name"` } @@ -310,7 +382,7 @@ type PushRequest struct { Password string `json:"password"` Stream *bool `json:"stream,omitempty"` - // Name is deprecated, see Model + // Deprecated: set the model name with Model instead Name string `json:"name"` } @@ -345,6 +417,13 @@ type ProcessModelResponse struct { SizeVRAM int64 `json:"size_vram"` } +type RetrieveModelResponse struct { + Id string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + OwnedBy string `json:"owned_by"` +} + type TokenResponse struct { Token string `json:"token"` } @@ -427,7 +506,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error { for key, val := range m { opt, ok := jsonOpts[key] if !ok { - slog.Warn("invalid option provided", "option", opt.Name) + slog.Warn("invalid option provided", "option", key) continue } @@ -437,19 +516,6 @@ func (opts *Options) FromMap(m map[string]interface{}) error { continue } - if reflect.PointerTo(field.Type()) == reflect.TypeOf((*TriState)(nil)) { - val, ok := val.(bool) - if !ok { - return fmt.Errorf("option %q must be of type boolean", key) - } - if val { - field.SetInt(int64(TriStateTrue)) - } else { - field.SetInt(int64(TriStateFalse)) - } - continue - } - switch field.Kind() { case reflect.Int: switch t := val.(type) { @@ -496,6 +562,17 @@ func (opts *Options) FromMap(m map[string]interface{}) error { slice[i] = str } field.Set(reflect.ValueOf(slice)) + case reflect.Pointer: + var b bool + if field.Type() == reflect.TypeOf(&b) { + val, ok := val.(bool) + if !ok { + return fmt.Errorf("option %q must be of type boolean", key) + } + field.Set(reflect.ValueOf(&val)) + } else { + return fmt.Errorf("unknown type loading config params: %v %v", field.Kind(), field.Type()) + } default: return fmt.Errorf("unknown type loading config params: %v", field.Kind()) } @@ -538,8 +615,7 @@ func DefaultOptions() Options { LowVRAM: false, F16KV: true, UseMLock: false, - UseMMap: TriStateUndefined, - UseNUMA: false, + UseMMap: nil, }, } } @@ -608,19 +684,6 @@ func FormatParams(params map[string][]string) (map[string]interface{}, error) { } else { field := valueOpts.FieldByName(opt.Name) if field.IsValid() && field.CanSet() { - if reflect.PointerTo(field.Type()) == reflect.TypeOf((*TriState)(nil)) { - boolVal, err := strconv.ParseBool(vals[0]) - if err != nil { - return nil, fmt.Errorf("invalid bool value %s", vals) - } - if boolVal { - out[key] = TriStateTrue - } else { - out[key] = TriStateFalse - } - continue - } - switch field.Kind() { case reflect.Float32: floatVal, err := strconv.ParseFloat(vals[0], 32) @@ -648,6 +711,17 @@ func FormatParams(params map[string][]string) (map[string]interface{}, error) { case reflect.Slice: // TODO: only string slices are supported right now out[key] = vals + case reflect.Pointer: + var b bool + if field.Type() == reflect.TypeOf(&b) { + boolVal, err := strconv.ParseBool(vals[0]) + if err != nil { + return nil, fmt.Errorf("invalid bool value %s", vals) + } + out[key] = &boolVal + } else { + return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key) + } default: return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key) } diff --git a/api/types_test.go b/api/types_test.go index 8b6c60c62..a9de5a9a9 100644 --- a/api/types_test.go +++ b/api/types_test.go @@ -2,7 +2,7 @@ package api import ( "encoding/json" - "fmt" + "errors" "math" "testing" "time" @@ -108,25 +108,27 @@ func TestDurationMarshalUnmarshal(t *testing.T) { } func TestUseMmapParsingFromJSON(t *testing.T) { + tr := true + fa := false tests := []struct { name string req string - exp TriState + exp *bool }{ { name: "Undefined", req: `{ }`, - exp: TriStateUndefined, + exp: nil, }, { name: "True", req: `{ "use_mmap": true }`, - exp: TriStateTrue, + exp: &tr, }, { name: "False", req: `{ "use_mmap": false }`, - exp: TriStateFalse, + exp: &fa, }, } @@ -144,63 +146,88 @@ func TestUseMmapParsingFromJSON(t *testing.T) { } func TestUseMmapFormatParams(t *testing.T) { + tr := true + fa := false tests := []struct { name string req map[string][]string - exp TriState + exp *bool err error }{ { name: "True", req: map[string][]string{ - "use_mmap": []string{"true"}, + "use_mmap": {"true"}, }, - exp: TriStateTrue, + exp: &tr, err: nil, }, { name: "False", req: map[string][]string{ - "use_mmap": []string{"false"}, + "use_mmap": {"false"}, }, - exp: TriStateFalse, + exp: &fa, err: nil, }, { name: "Numeric True", req: map[string][]string{ - "use_mmap": []string{"1"}, + "use_mmap": {"1"}, }, - exp: TriStateTrue, + exp: &tr, err: nil, }, { name: "Numeric False", req: map[string][]string{ - "use_mmap": []string{"0"}, + "use_mmap": {"0"}, }, - exp: TriStateFalse, + exp: &fa, err: nil, }, { name: "invalid string", req: map[string][]string{ - "use_mmap": []string{"foo"}, + "use_mmap": {"foo"}, }, - exp: TriStateUndefined, - err: fmt.Errorf("invalid bool value [foo]"), + exp: nil, + err: errors.New("invalid bool value [foo]"), }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { resp, err := FormatParams(test.req) - require.Equal(t, err, test.err) + require.Equal(t, test.err, err) respVal, ok := resp["use_mmap"] - if test.exp != TriStateUndefined { + if test.exp != nil { assert.True(t, ok, "resp: %v", resp) - assert.Equal(t, test.exp, respVal) + assert.Equal(t, *test.exp, *respVal.(*bool)) } }) } } + +func TestMessage_UnmarshalJSON(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {`{"role": "USER", "content": "Hello!"}`, "user"}, + {`{"role": "System", "content": "Initialization complete."}`, "system"}, + {`{"role": "assistant", "content": "How can I help you?"}`, "assistant"}, + {`{"role": "TOOl", "content": "Access granted."}`, "tool"}, + } + + for _, test := range tests { + var msg Message + if err := json.Unmarshal([]byte(test.input), &msg); err != nil { + t.Errorf("Unexpected error: %v", err) + } + + if msg.Role != test.expected { + t.Errorf("role not lowercased: got %v, expected %v", msg.Role, test.expected) + } + } +} diff --git a/app/lifecycle/getstarted_nonwindows.go b/app/lifecycle/getstarted_nonwindows.go index c36d14c09..2af87ab92 100644 --- a/app/lifecycle/getstarted_nonwindows.go +++ b/app/lifecycle/getstarted_nonwindows.go @@ -2,8 +2,8 @@ package lifecycle -import "fmt" +import "errors" func GetStarted() error { - return fmt.Errorf("GetStarted not implemented") + return errors.New("not implemented") } diff --git a/app/lifecycle/getstarted_windows.go b/app/lifecycle/getstarted_windows.go index 092c3c17f..f39dc31c0 100644 --- a/app/lifecycle/getstarted_windows.go +++ b/app/lifecycle/getstarted_windows.go @@ -34,7 +34,6 @@ func GetStarted() error { Sys: &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false}, } proc, err := os.StartProcess(args[0], args, attrs) - if err != nil { return fmt.Errorf("unable to start getting started shell %w", err) } diff --git a/app/lifecycle/logging.go b/app/lifecycle/logging.go index a8f1f7cdf..9985fc3f8 100644 --- a/app/lifecycle/logging.go +++ b/app/lifecycle/logging.go @@ -14,7 +14,7 @@ import ( func InitLogging() { level := slog.LevelInfo - if envconfig.Debug { + if envconfig.Debug() { level = slog.LevelDebug } @@ -27,7 +27,7 @@ func InitLogging() { // TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion } else { rotateLogs(AppLogFile) - logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755) + logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755) if err != nil { slog.Error(fmt.Sprintf("failed to create server log %v", err)) return diff --git a/app/lifecycle/logging_nonwindows.go b/app/lifecycle/logging_nonwindows.go index 50b3a638c..205e47d77 100644 --- a/app/lifecycle/logging_nonwindows.go +++ b/app/lifecycle/logging_nonwindows.go @@ -5,5 +5,5 @@ package lifecycle import "log/slog" func ShowLogs() { - slog.Warn("ShowLogs not yet implemented") + slog.Warn("not implemented") } diff --git a/app/lifecycle/logging_test.go b/app/lifecycle/logging_test.go index a2157ca2c..8d5cdf6e7 100644 --- a/app/lifecycle/logging_test.go +++ b/app/lifecycle/logging_test.go @@ -17,7 +17,7 @@ func TestRotateLogs(t *testing.T) { // No log exists rotateLogs(logFile) - require.NoError(t, os.WriteFile(logFile, []byte("1"), 0644)) + require.NoError(t, os.WriteFile(logFile, []byte("1"), 0o644)) assert.FileExists(t, logFile) // First rotation rotateLogs(logFile) @@ -32,7 +32,7 @@ func TestRotateLogs(t *testing.T) { assert.NoFileExists(t, logFile) for i := 2; i <= LogRotationCount+1; i++ { - require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0644)) + require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0o644)) assert.FileExists(t, logFile) rotateLogs(logFile) assert.NoFileExists(t, logFile) diff --git a/app/lifecycle/server.go b/app/lifecycle/server.go index c178a1abf..37957399c 100644 --- a/app/lifecycle/server.go +++ b/app/lifecycle/server.go @@ -55,7 +55,7 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) { } rotateLogs(ServerLogFile) - logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755) + logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755) if err != nil { return nil, fmt.Errorf("failed to create server log: %w", err) } diff --git a/app/lifecycle/updater.go b/app/lifecycle/updater.go index b6d953309..4d3c7d8dc 100644 --- a/app/lifecycle/updater.go +++ b/app/lifecycle/updater.go @@ -15,6 +15,7 @@ import ( "path" "path/filepath" "runtime" + "strconv" "strings" "time" @@ -46,7 +47,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) { query.Add("os", runtime.GOOS) query.Add("arch", runtime.GOARCH) query.Add("version", version.Version) - query.Add("ts", fmt.Sprintf("%d", time.Now().Unix())) + query.Add("ts", strconv.FormatInt(time.Now().Unix(), 10)) nonce, err := auth.NewNonce(rand.Reader, 16) if err != nil { diff --git a/app/lifecycle/updater_nonwindows.go b/app/lifecycle/updater_nonwindows.go index 0f213b34f..1d2dda801 100644 --- a/app/lifecycle/updater_nonwindows.go +++ b/app/lifecycle/updater_nonwindows.go @@ -4,9 +4,9 @@ package lifecycle import ( "context" - "fmt" + "errors" ) func DoUpgrade(cancel context.CancelFunc, done chan int) error { - return fmt.Errorf("DoUpgrade not yet implemented") + return errors.New("not implemented") } diff --git a/app/lifecycle/updater_windows.go b/app/lifecycle/updater_windows.go index 4053671a5..1d3830d4e 100644 --- a/app/lifecycle/updater_windows.go +++ b/app/lifecycle/updater_windows.go @@ -2,6 +2,7 @@ package lifecycle import ( "context" + "errors" "fmt" "log/slog" "os" @@ -15,7 +16,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error { return fmt.Errorf("failed to lookup downloads: %s", err) } if len(files) == 0 { - return fmt.Errorf("no update downloads found") + return errors.New("no update downloads found") } else if len(files) > 1 { // Shouldn't happen slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files)) @@ -64,7 +65,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error { } } else { // TODO - some details about why it didn't start, or is this a pedantic error case? - return fmt.Errorf("installer process did not start") + return errors.New("installer process did not start") } // TODO should we linger for a moment and check to make sure it's actually running by checking the pid? diff --git a/app/ollama.iss b/app/ollama.iss index e6502abd3..63b5bdb0f 100644 --- a/app/ollama.iss +++ b/app/ollama.iss @@ -28,8 +28,8 @@ AppPublisher={#MyAppPublisher} AppPublisherURL={#MyAppURL} AppSupportURL={#MyAppURL} AppUpdatesURL={#MyAppURL} -ArchitecturesAllowed=x64 arm64 -ArchitecturesInstallIn64BitMode=x64 arm64 +ArchitecturesAllowed=x64compatible arm64 +ArchitecturesInstallIn64BitMode=x64compatible arm64 DefaultDirName={localappdata}\Programs\{#MyAppName} DefaultGroupName={#MyAppName} DisableProgramGroupPage=yes @@ -48,6 +48,7 @@ OutputDir=..\dist\ SetupLogging=yes CloseApplications=yes RestartApplications=no +RestartIfNeededByRun=no ; https://jrsoftware.org/ishelp/index.php?topic=setup_wizardimagefile WizardSmallImageFile=.\assets\setup.bmp @@ -86,21 +87,21 @@ Name: "english"; MessagesFile: "compiler:Default.isl" DialogFontSize=12 [Files] -Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit -Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit -Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs -Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion -Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion -#if DirExists("..\dist\windows-amd64\cuda") - Source: "..\dist\windows-amd64\cuda\*"; DestDir: "{app}\cuda\"; Flags: ignoreversion recursesubdirs -#endif -#if DirExists("..\dist\windows-amd64\oneapi") - Source: "..\dist\windows-amd64\oneapi\*"; DestDir: "{app}\oneapi\"; Flags: ignoreversion recursesubdirs -#endif -#if DirExists("..\dist\windows-amd64\rocm") - Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs +#if DirExists("..\dist\windows-amd64") +Source: "..\dist\windows-amd64-app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ;Check: not IsArm64(); Flags: ignoreversion 64bit +Source: "..\dist\windows-amd64\ollama.exe"; DestDir: "{app}"; Check: not IsArm64(); Flags: ignoreversion 64bit +Source: "..\dist\windows-amd64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Check: not IsArm64(); Flags: ignoreversion 64bit recursesubdirs #endif +#if DirExists("..\dist\windows-arm64") +Source: "..\dist\windows-arm64\vc_redist.arm64.exe"; DestDir: "{tmp}"; Check: IsArm64() and vc_redist_needed(); Flags: deleteafterinstall +Source: "..\dist\windows-arm64-app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ;Check: IsArm64(); Flags: ignoreversion 64bit +Source: "..\dist\windows-arm64\ollama.exe"; DestDir: "{app}"; Check: IsArm64(); Flags: ignoreversion 64bit +Source: "..\dist\windows-arm64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Check: IsArm64(); Flags: ignoreversion 64bit recursesubdirs +#endif + +Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion +Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion [Icons] Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico" @@ -108,6 +109,9 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico" [Run] +#if DirExists("..\dist\windows-arm64") +Filename: "{tmp}\vc_redist.arm64.exe"; Parameters: "/install /passive /norestart"; Check: IsArm64() and vc_redist_needed(); StatusMsg: "Installing VC++ Redistributables..."; Flags: waituntilterminated +#endif Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden [UninstallRun] @@ -127,6 +131,10 @@ Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\models" Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\history" ; NOTE: if the user has a custom OLLAMA_MODELS it will be preserved +[InstallDelete] +Type: filesandordirs; Name: "{%TEMP}\ollama*" +Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama" + [Messages] WizardReady=Ollama Windows Preview ReadyLabel1=%nLet's get you up and running with your own large language models. @@ -134,7 +142,7 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi ;FinishedHeadingLabel=Run your first model -;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama3 +;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama3.1 ;ClickFinish=%n [Registry] @@ -159,3 +167,39 @@ begin { Pos() returns 0 if not found } Result := Pos(';' + ExpandConstant(Param) + ';', ';' + OrigPath + ';') = 0; end; + +{ --- VC Runtime libraries discovery code - Only install vc_redist if it isn't already installed ----- } +const VCRTL_MIN_V1 = 14; +const VCRTL_MIN_V2 = 40; +const VCRTL_MIN_V3 = 33807; +const VCRTL_MIN_V4 = 0; + + // check if the minimum required vc redist is installed (by looking the registry) +function vc_redist_needed (): Boolean; +var + sRegKey: string; + v1: Cardinal; + v2: Cardinal; + v3: Cardinal; + v4: Cardinal; +begin + sRegKey := 'SOFTWARE\WOW6432Node\Microsoft\VisualStudio\14.0\VC\Runtimes\arm64'; + if (RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'Major', v1) and + RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'Minor', v2) and + RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'Bld', v3) and + RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'RBld', v4)) then + begin + Log ('VC Redist version: ' + IntToStr (v1) + + '.' + IntToStr (v2) + '.' + IntToStr (v3) + + '.' + IntToStr (v4)); + { Version info was found. Return true if later or equal to our + minimal required version RTL_MIN_Vx } + Result := not ( + (v1 > VCRTL_MIN_V1) or ((v1 = VCRTL_MIN_V1) and + ((v2 > VCRTL_MIN_V2) or ((v2 = VCRTL_MIN_V2) and + ((v3 > VCRTL_MIN_V3) or ((v3 = VCRTL_MIN_V3) and + (v4 >= VCRTL_MIN_V4))))))); + end + else + Result := TRUE; +end; diff --git a/app/ollama_welcome.ps1 b/app/ollama_welcome.ps1 index 9af37a461..46777a3a6 100644 --- a/app/ollama_welcome.ps1 +++ b/app/ollama_welcome.ps1 @@ -4,5 +4,5 @@ write-host "Welcome to Ollama!" write-host "" write-host "Run your first model:" write-host "" -write-host "`tollama run llama3" +write-host "`tollama run llama3.1" write-host "" \ No newline at end of file diff --git a/app/tray/tray_nonwindows.go b/app/tray/tray_nonwindows.go index ae5572b23..a03d233ea 100644 --- a/app/tray/tray_nonwindows.go +++ b/app/tray/tray_nonwindows.go @@ -3,11 +3,11 @@ package tray import ( - "fmt" + "errors" "github.com/ollama/ollama/app/tray/commontray" ) func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) { - return nil, fmt.Errorf("NOT IMPLEMENTED YET") + return nil, errors.New("not implemented") } diff --git a/app/tray/wintray/eventloop.go b/app/tray/wintray/eventloop.go index 0f9448947..157828a36 100644 --- a/app/tray/wintray/eventloop.go +++ b/app/tray/wintray/eventloop.go @@ -11,9 +11,7 @@ import ( "golang.org/x/sys/windows" ) -var ( - quitOnce sync.Once -) +var quitOnce sync.Once func (t *winTray) Run() { nativeLoop() diff --git a/app/tray/wintray/menus.go b/app/tray/wintray/menus.go index 9cb3b8933..596244442 100644 --- a/app/tray/wintray/menus.go +++ b/app/tray/wintray/menus.go @@ -11,12 +11,12 @@ import ( ) const ( - updatAvailableMenuID = 1 - updateMenuID = updatAvailableMenuID + 1 - separatorMenuID = updateMenuID + 1 - diagLogsMenuID = separatorMenuID + 1 - diagSeparatorMenuID = diagLogsMenuID + 1 - quitMenuID = diagSeparatorMenuID + 1 + updateAvailableMenuID = 1 + updateMenuID = updateAvailableMenuID + 1 + separatorMenuID = updateMenuID + 1 + diagLogsMenuID = separatorMenuID + 1 + diagSeparatorMenuID = diagLogsMenuID + 1 + quitMenuID = diagSeparatorMenuID + 1 ) func (t *winTray) initMenus() error { @@ -35,7 +35,7 @@ func (t *winTray) initMenus() error { func (t *winTray) UpdateAvailable(ver string) error { if !t.updateNotified { slog.Debug("updating menu and sending notification for new update") - if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil { + if err := t.addOrUpdateMenuItem(updateAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil { return fmt.Errorf("unable to create menu entries %w", err) } if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil { diff --git a/app/tray/wintray/tray.go b/app/tray/wintray/tray.go index 027ec5a50..6f8278939 100644 --- a/app/tray/wintray/tray.go +++ b/app/tray/wintray/tray.go @@ -11,10 +11,12 @@ import ( "path/filepath" "sort" "sync" + "syscall" "unsafe" - "github.com/ollama/ollama/app/tray/commontray" "golang.org/x/sys/windows" + + "github.com/ollama/ollama/app/tray/commontray" ) // Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32 @@ -414,7 +416,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) { iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash) if _, err := os.Stat(iconFilePath); os.IsNotExist(err) { - if err := os.WriteFile(iconFilePath, iconBytes, 0644); err != nil { + if err := os.WriteFile(iconFilePath, iconBytes, 0o644); err != nil { return "", err } } @@ -432,7 +434,12 @@ func (t *winTray) setIcon(src string) error { t.muNID.Lock() defer t.muNID.Unlock() t.nid.Icon = h - t.nid.Flags |= NIF_ICON + t.nid.Flags |= NIF_ICON | NIF_TIP + if toolTipUTF16, err := syscall.UTF16FromString(commontray.ToolTip); err == nil { + copy(t.nid.Tip[:], toolTipUTF16) + } else { + return err + } t.nid.Size = uint32(unsafe.Sizeof(*t.nid)) return t.nid.modify() diff --git a/app/tray/wintray/w32api.go b/app/tray/wintray/w32api.go index a1e0381de..7c7c0ac8a 100644 --- a/app/tray/wintray/w32api.go +++ b/app/tray/wintray/w32api.go @@ -61,6 +61,7 @@ const ( MIIM_SUBMENU = 0x00000004 MIM_APPLYTOSUBMENUS = 0x80000000 NIF_ICON = 0x00000002 + NIF_TIP = 0x00000004 NIF_INFO = 0x00000010 NIF_MESSAGE = 0x00000001 SW_HIDE = 0 diff --git a/auth/auth.go b/auth/auth.go index 026b2a2c7..e1d854124 100644 --- a/auth/auth.go +++ b/auth/auth.go @@ -5,6 +5,7 @@ import ( "context" "crypto/rand" "encoding/base64" + "errors" "fmt" "io" "log/slog" @@ -78,7 +79,7 @@ func Sign(ctx context.Context, bts []byte) (string, error) { publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey()) parts := bytes.Split(publicKey, []byte(" ")) if len(parts) < 2 { - return "", fmt.Errorf("malformed public key") + return "", errors.New("malformed public key") } signedData, err := privateKey.Sign(rand.Reader, bts) diff --git a/build/darwin/amd64/placeholder b/build/darwin/amd64/placeholder new file mode 100644 index 000000000..87dc27381 --- /dev/null +++ b/build/darwin/amd64/placeholder @@ -0,0 +1 @@ +This is here to make sure the build/ directory exists for the go:embed command diff --git a/build/darwin/arm64/placeholder b/build/darwin/arm64/placeholder new file mode 100644 index 000000000..87dc27381 --- /dev/null +++ b/build/darwin/arm64/placeholder @@ -0,0 +1 @@ +This is here to make sure the build/ directory exists for the go:embed command diff --git a/build/embed_darwin_amd64.go b/build/embed_darwin_amd64.go new file mode 100644 index 000000000..af1458ea9 --- /dev/null +++ b/build/embed_darwin_amd64.go @@ -0,0 +1,8 @@ +package build + +import "embed" + +// Darwin payloads separated by architecture to avoid duplicate payloads when cross compiling + +//go:embed darwin/amd64/* +var EmbedFS embed.FS diff --git a/build/embed_darwin_arm64.go b/build/embed_darwin_arm64.go new file mode 100644 index 000000000..d885365d0 --- /dev/null +++ b/build/embed_darwin_arm64.go @@ -0,0 +1,8 @@ +package build + +import "embed" + +// Darwin payloads separated by architecture to avoid duplicate payloads when cross compiling + +//go:embed darwin/arm64/* +var EmbedFS embed.FS diff --git a/build/embed_linux.go b/build/embed_linux.go new file mode 100644 index 000000000..4cf7be4c3 --- /dev/null +++ b/build/embed_linux.go @@ -0,0 +1,6 @@ +package build + +import "embed" + +//go:embed linux/* +var EmbedFS embed.FS diff --git a/build/embed_unused.go b/build/embed_unused.go new file mode 100644 index 000000000..00fbe02e8 --- /dev/null +++ b/build/embed_unused.go @@ -0,0 +1,8 @@ +//go:build !linux && !darwin + +package build + +import "embed" + +// unused on windows +var EmbedFS embed.FS diff --git a/build/linux/amd64/placeholder b/build/linux/amd64/placeholder new file mode 100644 index 000000000..87dc27381 --- /dev/null +++ b/build/linux/amd64/placeholder @@ -0,0 +1 @@ +This is here to make sure the build/ directory exists for the go:embed command diff --git a/build/linux/arm64/placeholder b/build/linux/arm64/placeholder new file mode 100644 index 000000000..87dc27381 --- /dev/null +++ b/build/linux/arm64/placeholder @@ -0,0 +1 @@ +This is here to make sure the build/ directory exists for the go:embed command diff --git a/cmd/cmd.go b/cmd/cmd.go index c898c7db6..3bb8b06ec 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -2,6 +2,7 @@ package cmd import ( "archive/zip" + "bufio" "bytes" "context" "crypto/ed25519" @@ -21,7 +22,9 @@ import ( "regexp" "runtime" "slices" + "strconv" "strings" + "sync/atomic" "syscall" "time" @@ -78,6 +81,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error { status := "transferring model data" spinner := progress.NewSpinner(status) p.Add(status, spinner) + defer p.Stop() for i := range modelfile.Commands { switch modelfile.Commands[i].Name { @@ -112,7 +116,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error { path = tempfile } - digest, err := createBlob(cmd, client, path) + digest, err := createBlob(cmd, client, path, spinner) if err != nil { return err } @@ -202,6 +206,12 @@ func tempZipFiles(path string) (string, error) { // safetensors files might be unresolved git lfs references; skip if they are // covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors files = append(files, st...) + } else if st, _ := glob(filepath.Join(path, "adapters.safetensors"), "application/octet-stream"); len(st) > 0 { + // covers adapters.safetensors + files = append(files, st...) + } else if st, _ := glob(filepath.Join(path, "adapter_model.safetensors"), "application/octet-stream"); len(st) > 0 { + // covers adapter_model.safetensors + files = append(files, st...) } else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 { // pytorch files might also be unresolved git lfs references; skip if they are // covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin @@ -221,6 +231,14 @@ func tempZipFiles(path string) (string, error) { } files = append(files, js...) + // bert models require a nested config.json + // TODO(mxyng): merge this with the glob above + js, err = glob(filepath.Join(path, "**/*.json"), "text/plain") + if err != nil { + return "", err + } + files = append(files, js...) + if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 { // add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob // tokenizer.model might be a unresolved git lfs reference; error if it is @@ -250,6 +268,11 @@ func tempZipFiles(path string) (string, error) { return "", err } + zfi.Name, err = filepath.Rel(path, file) + if err != nil { + return "", err + } + zf, err := zipfile.CreateHeader(zfi) if err != nil { return "", err @@ -263,13 +286,20 @@ func tempZipFiles(path string) (string, error) { return tempfile.Name(), nil } -func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) { +func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *progress.Spinner) (string, error) { bin, err := os.Open(path) if err != nil { return "", err } defer bin.Close() + // Get file info to retrieve the size + fileInfo, err := bin.Stat() + if err != nil { + return "", err + } + fileSize := fileInfo.Size() + hash := sha256.New() if _, err := io.Copy(hash, bin); err != nil { return "", err @@ -279,13 +309,76 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, er return "", err } + var pw progressWriter + status := "transferring model data 0%" + spinner.SetMessage(status) + + done := make(chan struct{}) + defer close(done) + + go func() { + ticker := time.NewTicker(60 * time.Millisecond) + defer ticker.Stop() + for { + select { + case <-ticker.C: + spinner.SetMessage(fmt.Sprintf("transferring model data %d%%", int(100*pw.n.Load()/fileSize))) + case <-done: + spinner.SetMessage("transferring model data 100%") + return + } + } + }() + digest := fmt.Sprintf("sha256:%x", hash.Sum(nil)) - if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil { + if err = client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil { return "", err } return digest, nil } +type progressWriter struct { + n atomic.Int64 +} + +func (w *progressWriter) Write(p []byte) (n int, err error) { + w.n.Add(int64(len(p))) + return len(p), nil +} + +func loadOrUnloadModel(cmd *cobra.Command, opts *runOptions) error { + p := progress.NewProgress(os.Stderr) + defer p.StopAndClear() + + spinner := progress.NewSpinner("") + p.Add("", spinner) + + client, err := api.ClientFromEnvironment() + if err != nil { + return err + } + + req := &api.GenerateRequest{ + Model: opts.Model, + KeepAlive: opts.KeepAlive, + } + + return client.Generate(cmd.Context(), req, func(api.GenerateResponse) error { return nil }) +} + +func StopHandler(cmd *cobra.Command, args []string) error { + opts := &runOptions{ + Model: args[0], + KeepAlive: &api.Duration{Duration: 0}, + } + if err := loadOrUnloadModel(cmd, opts); err != nil { + if strings.Contains(err.Error(), "not found") { + return fmt.Errorf("couldn't find model \"%s\" to stop", args[0]) + } + } + return nil +} + func RunHandler(cmd *cobra.Command, args []string) error { interactive := true @@ -362,9 +455,24 @@ func RunHandler(cmd *cobra.Command, args []string) error { opts.MultiModal = slices.Contains(info.Details.Families, "clip") opts.ParentModel = info.Details.ParentModel - opts.Messages = append(opts.Messages, info.Messages...) if interactive { + if err := loadOrUnloadModel(cmd, &opts); err != nil { + return err + } + + for _, msg := range info.Messages { + switch msg.Role { + case "user": + fmt.Printf(">>> %s\n", msg.Content) + case "assistant": + state := &displayResponseState{} + displayResponse(msg.Content, opts.WordWrap, state) + fmt.Println() + fmt.Println() + } + } + return generateInteractive(cmd, opts) } return generate(cmd, opts) @@ -505,7 +613,7 @@ func ListHandler(cmd *cobra.Command, args []string) error { table.SetHeaderLine(false) table.SetBorder(false) table.SetNoWhiteSpace(true) - table.SetTablePadding("\t") + table.SetTablePadding(" ") table.AppendBulk(data) table.Render() @@ -540,7 +648,15 @@ func ListRunningHandler(cmd *cobra.Command, args []string) error { cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 100) procStr = fmt.Sprintf("%d%%/%d%% CPU/GPU", int(cpuPercent), int(100-cpuPercent)) } - data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, format.HumanTime(m.ExpiresAt, "Never")}) + + var until string + delta := time.Since(m.ExpiresAt) + if delta > 0 { + until = "Stopping..." + } else { + until = format.HumanTime(m.ExpiresAt, "Never") + } + data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, until}) } } @@ -551,7 +667,7 @@ func ListRunningHandler(cmd *cobra.Command, args []string) error { table.SetHeaderLine(false) table.SetBorder(false) table.SetNoWhiteSpace(true) - table.SetTablePadding("\t") + table.SetTablePadding(" ") table.AppendBulk(data) table.Render() @@ -647,122 +763,89 @@ func ShowHandler(cmd *cobra.Command, args []string) error { return nil } - showInfo(resp) - - return nil + return showInfo(resp, os.Stdout) } -func showInfo(resp *api.ShowResponse) { - arch := resp.ModelInfo["general.architecture"].(string) +func showInfo(resp *api.ShowResponse, w io.Writer) error { + tableRender := func(header string, rows func() [][]string) { + fmt.Fprintln(w, " ", header) + table := tablewriter.NewWriter(w) + table.SetAlignment(tablewriter.ALIGN_LEFT) + table.SetBorder(false) + table.SetNoWhiteSpace(true) + table.SetTablePadding(" ") - modelData := [][]string{ - {"arch", arch}, - {"parameters", resp.Details.ParameterSize}, - {"quantization", resp.Details.QuantizationLevel}, - {"context length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)].(float64))}, - {"embedding length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.embedding_length", arch)].(float64))}, + switch header { + case "Template", "System", "License": + table.SetColWidth(100) + } + + table.AppendBulk(rows()) + table.Render() + fmt.Fprintln(w) } - mainTableData := [][]string{ - {"Model"}, - {renderSubTable(modelData, false)}, - } + tableRender("Model", func() (rows [][]string) { + if resp.ModelInfo != nil { + arch := resp.ModelInfo["general.architecture"].(string) + rows = append(rows, []string{"", "architecture", arch}) + rows = append(rows, []string{"", "parameters", format.HumanNumber(uint64(resp.ModelInfo["general.parameter_count"].(float64)))}) + rows = append(rows, []string{"", "context length", strconv.FormatFloat(resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)].(float64), 'f', -1, 64)}) + rows = append(rows, []string{"", "embedding length", strconv.FormatFloat(resp.ModelInfo[fmt.Sprintf("%s.embedding_length", arch)].(float64), 'f', -1, 64)}) + } else { + rows = append(rows, []string{"", "architecture", resp.Details.Family}) + rows = append(rows, []string{"", "parameters", resp.Details.ParameterSize}) + } + rows = append(rows, []string{"", "quantization", resp.Details.QuantizationLevel}) + return + }) if resp.ProjectorInfo != nil { - projectorData := [][]string{ - {"arch", "clip"}, - {"parameters", format.HumanNumber(uint64(resp.ProjectorInfo["general.parameter_count"].(float64)))}, - } - - if projectorType, ok := resp.ProjectorInfo["clip.projector_type"]; ok { - projectorData = append(projectorData, []string{"projector type", projectorType.(string)}) - } - - projectorData = append(projectorData, - []string{"embedding length", fmt.Sprintf("%v", resp.ProjectorInfo["clip.vision.embedding_length"].(float64))}, - []string{"projection dimensionality", fmt.Sprintf("%v", resp.ProjectorInfo["clip.vision.projection_dim"].(float64))}, - ) - - mainTableData = append(mainTableData, - []string{"Projector"}, - []string{renderSubTable(projectorData, false)}, - ) + tableRender("Projector", func() (rows [][]string) { + arch := resp.ProjectorInfo["general.architecture"].(string) + rows = append(rows, []string{"", "architecture", arch}) + rows = append(rows, []string{"", "parameters", format.HumanNumber(uint64(resp.ProjectorInfo["general.parameter_count"].(float64)))}) + rows = append(rows, []string{"", "embedding length", strconv.FormatFloat(resp.ProjectorInfo[fmt.Sprintf("%s.vision.embedding_length", arch)].(float64), 'f', -1, 64)}) + rows = append(rows, []string{"", "dimensions", strconv.FormatFloat(resp.ProjectorInfo[fmt.Sprintf("%s.vision.projection_dim", arch)].(float64), 'f', -1, 64)}) + return + }) } if resp.Parameters != "" { - mainTableData = append(mainTableData, []string{"Parameters"}, []string{formatParams(resp.Parameters)}) + tableRender("Parameters", func() (rows [][]string) { + scanner := bufio.NewScanner(strings.NewReader(resp.Parameters)) + for scanner.Scan() { + if text := scanner.Text(); text != "" { + rows = append(rows, append([]string{""}, strings.Fields(text)...)) + } + } + return + }) + } + + head := func(s string, n int) (rows [][]string) { + scanner := bufio.NewScanner(strings.NewReader(s)) + for scanner.Scan() && (len(rows) < n || n < 0) { + if text := scanner.Text(); text != "" { + rows = append(rows, []string{"", strings.TrimSpace(text)}) + } + } + return } if resp.System != "" { - mainTableData = append(mainTableData, []string{"System"}, []string{renderSubTable(twoLines(resp.System), true)}) + tableRender("System", func() [][]string { + return head(resp.System, 2) + }) } if resp.License != "" { - mainTableData = append(mainTableData, []string{"License"}, []string{renderSubTable(twoLines(resp.License), true)}) + tableRender("License", func() [][]string { + return head(resp.License, 2) + }) } - table := tablewriter.NewWriter(os.Stdout) - table.SetAutoWrapText(false) - table.SetBorder(false) - table.SetAlignment(tablewriter.ALIGN_LEFT) - - for _, v := range mainTableData { - table.Append(v) - } - - table.Render() -} - -func renderSubTable(data [][]string, file bool) string { - var buf bytes.Buffer - table := tablewriter.NewWriter(&buf) - table.SetAutoWrapText(!file) - table.SetBorder(false) - table.SetNoWhiteSpace(true) - table.SetTablePadding("\t") - table.SetAlignment(tablewriter.ALIGN_LEFT) - - for _, v := range data { - table.Append(v) - } - - table.Render() - - renderedTable := buf.String() - lines := strings.Split(renderedTable, "\n") - for i, line := range lines { - lines[i] = "\t" + line - } - - return strings.Join(lines, "\n") -} - -func twoLines(s string) [][]string { - lines := strings.Split(s, "\n") - res := [][]string{} - - count := 0 - for _, line := range lines { - line = strings.TrimSpace(line) - if line != "" { - count++ - res = append(res, []string{line}) - if count == 2 { - return res - } - } - } - return res -} - -func formatParams(s string) string { - lines := strings.Split(s, "\n") - table := [][]string{} - - for _, line := range lines { - table = append(table, strings.Fields(line)) - } - return renderSubTable(table, false) + return nil } func CopyHandler(cmd *cobra.Command, args []string) error { @@ -843,7 +926,6 @@ type runOptions struct { WordWrap bool Format string System string - Template string Images []api.ImageData Options map[string]interface{} MultiModal bool @@ -1037,7 +1119,6 @@ func generate(cmd *cobra.Command, opts runOptions) error { Images: opts.Images, Format: opts.Format, System: opts.System, - Template: opts.Template, Options: opts.Options, KeepAlive: opts.KeepAlive, } @@ -1073,12 +1154,12 @@ func generate(cmd *cobra.Command, opts runOptions) error { return nil } -func RunServer(cmd *cobra.Command, _ []string) error { +func RunServer(_ *cobra.Command, _ []string) error { if err := initializeKeypair(); err != nil { return err } - ln, err := net.Listen("tcp", net.JoinHostPort(envconfig.Host.Host, envconfig.Host.Port)) + ln, err := net.Listen("tcp", envconfig.Host().Host) if err != nil { return err } @@ -1147,7 +1228,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error { return err } if err := startApp(cmd.Context(), client); err != nil { - return fmt.Errorf("could not connect to ollama app, is it running?") + return errors.New("could not connect to ollama app, is it running?") } } return nil @@ -1254,6 +1335,15 @@ func NewCLI() *cobra.Command { runCmd.Flags().Bool("insecure", false, "Use an insecure registry") runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically") runCmd.Flags().String("format", "", "Response format (e.g. json)") + + stopCmd := &cobra.Command{ + Use: "stop MODEL", + Short: "Stop a running model", + Args: cobra.ExactArgs(1), + PreRunE: checkServerHeartbeat, + RunE: StopHandler, + } + serveCmd := &cobra.Command{ Use: "serve", Aliases: []string{"start"}, @@ -1321,6 +1411,7 @@ func NewCLI() *cobra.Command { createCmd, showCmd, runCmd, + stopCmd, pullCmd, pushCmd, listCmd, @@ -1343,10 +1434,12 @@ func NewCLI() *cobra.Command { envVars["OLLAMA_NUM_PARALLEL"], envVars["OLLAMA_NOPRUNE"], envVars["OLLAMA_ORIGINS"], + envVars["OLLAMA_SCHED_SPREAD"], envVars["OLLAMA_TMPDIR"], envVars["OLLAMA_FLASH_ATTENTION"], envVars["OLLAMA_LLM_LIBRARY"], - envVars["OLLAMA_MAX_VRAM"], + envVars["OLLAMA_GPU_OVERHEAD"], + envVars["OLLAMA_LOAD_TIMEOUT"], }) default: appendEnvDocs(cmd, envs) @@ -1358,6 +1451,7 @@ func NewCLI() *cobra.Command { createCmd, showCmd, runCmd, + stopCmd, pullCmd, pushCmd, listCmd, diff --git a/cmd/cmd_test.go b/cmd/cmd_test.go new file mode 100644 index 000000000..0f8863cc7 --- /dev/null +++ b/cmd/cmd_test.go @@ -0,0 +1,206 @@ +package cmd + +import ( + "bytes" + "os" + "path/filepath" + "testing" + + "github.com/google/go-cmp/cmp" + + "github.com/ollama/ollama/api" +) + +func TestShowInfo(t *testing.T) { + t.Run("bare details", func(t *testing.T) { + var b bytes.Buffer + if err := showInfo(&api.ShowResponse{ + Details: api.ModelDetails{ + Family: "test", + ParameterSize: "7B", + QuantizationLevel: "FP16", + }, + }, &b); err != nil { + t.Fatal(err) + } + + expect := ` Model + architecture test + parameters 7B + quantization FP16 + +` + + if diff := cmp.Diff(expect, b.String()); diff != "" { + t.Errorf("unexpected output (-want +got):\n%s", diff) + } + }) + + t.Run("bare model info", func(t *testing.T) { + var b bytes.Buffer + if err := showInfo(&api.ShowResponse{ + ModelInfo: map[string]any{ + "general.architecture": "test", + "general.parameter_count": float64(7_000_000_000), + "test.context_length": float64(0), + "test.embedding_length": float64(0), + }, + Details: api.ModelDetails{ + Family: "test", + ParameterSize: "7B", + QuantizationLevel: "FP16", + }, + }, &b); err != nil { + t.Fatal(err) + } + + expect := ` Model + architecture test + parameters 7B + context length 0 + embedding length 0 + quantization FP16 + +` + if diff := cmp.Diff(expect, b.String()); diff != "" { + t.Errorf("unexpected output (-want +got):\n%s", diff) + } + }) + + t.Run("parameters", func(t *testing.T) { + var b bytes.Buffer + if err := showInfo(&api.ShowResponse{ + Details: api.ModelDetails{ + Family: "test", + ParameterSize: "7B", + QuantizationLevel: "FP16", + }, + Parameters: ` + stop never + stop gonna + stop give + stop you + stop up + temperature 99`, + }, &b); err != nil { + t.Fatal(err) + } + + expect := ` Model + architecture test + parameters 7B + quantization FP16 + + Parameters + stop never + stop gonna + stop give + stop you + stop up + temperature 99 + +` + if diff := cmp.Diff(expect, b.String()); diff != "" { + t.Errorf("unexpected output (-want +got):\n%s", diff) + } + }) + + t.Run("project info", func(t *testing.T) { + var b bytes.Buffer + if err := showInfo(&api.ShowResponse{ + Details: api.ModelDetails{ + Family: "test", + ParameterSize: "7B", + QuantizationLevel: "FP16", + }, + ProjectorInfo: map[string]any{ + "general.architecture": "clip", + "general.parameter_count": float64(133_700_000), + "clip.vision.embedding_length": float64(0), + "clip.vision.projection_dim": float64(0), + }, + }, &b); err != nil { + t.Fatal(err) + } + + expect := ` Model + architecture test + parameters 7B + quantization FP16 + + Projector + architecture clip + parameters 133.70M + embedding length 0 + dimensions 0 + +` + if diff := cmp.Diff(expect, b.String()); diff != "" { + t.Errorf("unexpected output (-want +got):\n%s", diff) + } + }) + + t.Run("system", func(t *testing.T) { + var b bytes.Buffer + if err := showInfo(&api.ShowResponse{ + Details: api.ModelDetails{ + Family: "test", + ParameterSize: "7B", + QuantizationLevel: "FP16", + }, + System: `You are a pirate! +Ahoy, matey! +Weigh anchor! + `, + }, &b); err != nil { + t.Fatal(err) + } + + expect := ` Model + architecture test + parameters 7B + quantization FP16 + + System + You are a pirate! + Ahoy, matey! + +` + if diff := cmp.Diff(expect, b.String()); diff != "" { + t.Errorf("unexpected output (-want +got):\n%s", diff) + } + }) + + t.Run("license", func(t *testing.T) { + var b bytes.Buffer + license, err := os.ReadFile(filepath.Join("..", "LICENSE")) + if err != nil { + t.Fatal(err) + } + + if err := showInfo(&api.ShowResponse{ + Details: api.ModelDetails{ + Family: "test", + ParameterSize: "7B", + QuantizationLevel: "FP16", + }, + License: string(license), + }, &b); err != nil { + t.Fatal(err) + } + + expect := ` Model + architecture test + parameters 7B + quantization FP16 + + License + MIT License + Copyright (c) Ollama + +` + if diff := cmp.Diff(expect, b.String()); diff != "" { + t.Errorf("unexpected output (-want +got):\n%s", diff) + } + }) +} diff --git a/cmd/interactive.go b/cmd/interactive.go index 9214f2db5..94578f11b 100644 --- a/cmd/interactive.go +++ b/cmd/interactive.go @@ -1,6 +1,7 @@ package cmd import ( + "cmp" "errors" "fmt" "io" @@ -9,14 +10,14 @@ import ( "path/filepath" "regexp" "slices" - "sort" "strings" "github.com/spf13/cobra" + "golang.org/x/exp/maps" "github.com/ollama/ollama/api" "github.com/ollama/ollama/envconfig" - "github.com/ollama/ollama/progress" + "github.com/ollama/ollama/parser" "github.com/ollama/ollama/readline" "github.com/ollama/ollama/types/errtypes" ) @@ -27,49 +28,9 @@ const ( MultilineNone MultilineState = iota MultilinePrompt MultilineSystem - MultilineTemplate ) -func loadModel(cmd *cobra.Command, opts *runOptions) error { - p := progress.NewProgress(os.Stderr) - defer p.StopAndClear() - - spinner := progress.NewSpinner("") - p.Add("", spinner) - - client, err := api.ClientFromEnvironment() - if err != nil { - return err - } - - chatReq := &api.ChatRequest{ - Model: opts.Model, - KeepAlive: opts.KeepAlive, - } - - return client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error { - p.StopAndClear() - for _, msg := range opts.Messages { - switch msg.Role { - case "user": - fmt.Printf(">>> %s\n", msg.Content) - case "assistant": - state := &displayResponseState{} - displayResponse(msg.Content, opts.WordWrap, state) - fmt.Println() - fmt.Println() - } - } - return nil - }) -} - func generateInteractive(cmd *cobra.Command, opts runOptions) error { - err := loadModel(cmd, &opts) - if err != nil { - return err - } - usage := func() { fmt.Fprintln(os.Stderr, "Available Commands:") fmt.Fprintln(os.Stderr, " /set Set session variables") @@ -94,7 +55,6 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { fmt.Fprintln(os.Stderr, "Available Commands:") fmt.Fprintln(os.Stderr, " /set parameter ... Set a parameter") fmt.Fprintln(os.Stderr, " /set system Set system message") - fmt.Fprintln(os.Stderr, " /set template Set prompt template") fmt.Fprintln(os.Stderr, " /set history Enable history") fmt.Fprintln(os.Stderr, " /set nohistory Disable history") fmt.Fprintln(os.Stderr, " /set wordwrap Enable wordwrap") @@ -140,6 +100,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { fmt.Fprintln(os.Stderr, " /set parameter num_predict Max number of tokens to predict") fmt.Fprintln(os.Stderr, " /set parameter top_k Pick from top k num of tokens") fmt.Fprintln(os.Stderr, " /set parameter top_p Pick token based on sum of probabilities") + fmt.Fprintln(os.Stderr, " /set parameter min_p Pick token based on top token probability * min_p") fmt.Fprintln(os.Stderr, " /set parameter num_ctx Set the context size") fmt.Fprintln(os.Stderr, " /set parameter temperature Set creativity level") fmt.Fprintln(os.Stderr, " /set parameter repeat_penalty How strongly to penalize repetitions") @@ -159,7 +120,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { return err } - if envconfig.NoHistory { + if envconfig.NoHistory() { scanner.HistoryDisable() } @@ -204,10 +165,6 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { opts.Messages = append(opts.Messages, api.Message{Role: "system", Content: opts.System}) fmt.Println("Set system message.") sb.Reset() - case MultilineTemplate: - opts.Template = sb.String() - fmt.Println("Set prompt template.") - sb.Reset() } multiline = MultilineNone @@ -239,7 +196,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { opts.Model = args[1] opts.Messages = []api.Message{} fmt.Printf("Loading model '%s'\n", opts.Model) - if err := loadModel(cmd, &opts); err != nil { + if err := loadOrUnloadModel(cmd, &opts); err != nil { return err } continue @@ -326,17 +283,13 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { } fmt.Printf("Set parameter '%s' to '%s'\n", args[2], strings.Join(params, ", ")) opts.Options[args[2]] = fp[args[2]] - case "system", "template": + case "system": if len(args) < 3 { usageSet() continue } - if args[1] == "system" { - multiline = MultilineSystem - } else if args[1] == "template" { - multiline = MultilineTemplate - } + multiline = MultilineSystem line := strings.Join(args[2:], " ") line, ok := strings.CutPrefix(line, `"""`) @@ -356,23 +309,17 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { continue } - if args[1] == "system" { - opts.System = sb.String() // for display in modelfile - newMessage := api.Message{Role: "system", Content: sb.String()} - // Check if the slice is not empty and the last message is from 'system' - if len(opts.Messages) > 0 && opts.Messages[len(opts.Messages)-1].Role == "system" { - // Replace the last message - opts.Messages[len(opts.Messages)-1] = newMessage - } else { - opts.Messages = append(opts.Messages, newMessage) - } - fmt.Println("Set system message.") - sb.Reset() - } else if args[1] == "template" { - opts.Template = sb.String() - fmt.Println("Set prompt template.") - sb.Reset() + opts.System = sb.String() // for display in modelfile + newMessage := api.Message{Role: "system", Content: sb.String()} + // Check if the slice is not empty and the last message is from 'system' + if len(opts.Messages) > 0 && opts.Messages[len(opts.Messages)-1].Role == "system" { + // Replace the last message + opts.Messages[len(opts.Messages)-1] = newMessage + } else { + opts.Messages = append(opts.Messages, newMessage) } + fmt.Println("Set system message.") + sb.Reset() sb.Reset() continue @@ -391,10 +338,9 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { return err } req := &api.ShowRequest{ - Name: opts.Model, - System: opts.System, - Template: opts.Template, - Options: opts.Options, + Name: opts.Model, + System: opts.System, + Options: opts.Options, } resp, err := client.Show(cmd.Context(), req) if err != nil { @@ -404,7 +350,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { switch args[1] { case "info": - showInfo(resp) + _ = showInfo(resp, os.Stderr) case "license": if resp.License == "" { fmt.Println("No license was specified for this model.") @@ -437,12 +383,9 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { fmt.Println("No system message was specified for this model.") } case "template": - switch { - case opts.Template != "": - fmt.Println(opts.Template + "\n") - case resp.Template != "": + if resp.Template != "" { fmt.Println(resp.Template) - default: + } else { fmt.Println("No prompt template was specified for this model.") } default: @@ -526,35 +469,35 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { } func buildModelfile(opts runOptions) string { - var mf strings.Builder - model := opts.ParentModel - if model == "" { - model = opts.Model - } - fmt.Fprintf(&mf, "FROM %s\n", model) + var f parser.File + f.Commands = append(f.Commands, parser.Command{Name: "model", Args: cmp.Or(opts.ParentModel, opts.Model)}) + if opts.System != "" { - fmt.Fprintf(&mf, "SYSTEM \"\"\"%s\"\"\"\n", opts.System) + f.Commands = append(f.Commands, parser.Command{Name: "system", Args: opts.System}) } - if opts.Template != "" { - fmt.Fprintf(&mf, "TEMPLATE \"\"\"%s\"\"\"\n", opts.Template) - } - - keys := make([]string, 0) - for k := range opts.Options { - keys = append(keys, k) - } - sort.Strings(keys) + keys := maps.Keys(opts.Options) + slices.Sort(keys) for _, k := range keys { - fmt.Fprintf(&mf, "PARAMETER %s %v\n", k, opts.Options[k]) + v := opts.Options[k] + var cmds []parser.Command + switch t := v.(type) { + case []string: + for _, s := range t { + cmds = append(cmds, parser.Command{Name: k, Args: s}) + } + default: + cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", t)}) + } + + f.Commands = append(f.Commands, cmds...) } - fmt.Fprintln(&mf) for _, msg := range opts.Messages { - fmt.Fprintf(&mf, "MESSAGE %s \"\"\"%s\"\"\"\n", msg.Role, msg.Content) + f.Commands = append(f.Commands, parser.Command{Name: "message", Args: fmt.Sprintf("%s: %s", msg.Role, msg.Content)}) } - return mf.String() + return f.String() } func normalizeFilePath(fp string) string { @@ -640,7 +583,7 @@ func getImageData(filePath string) ([]byte, error) { // Check if the file size exceeds 100MB var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes if info.Size() > maxSize { - return nil, fmt.Errorf("file size exceeds maximum limit (100MB)") + return nil, errors.New("file size exceeds maximum limit (100MB)") } buf = make([]byte, info.Size()) diff --git a/cmd/interactive_test.go b/cmd/interactive_test.go index d9af01eb8..bb7e0abaf 100644 --- a/cmd/interactive_test.go +++ b/cmd/interactive_test.go @@ -1,12 +1,10 @@ package cmd import ( - "bytes" "testing" - "text/template" + "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "github.com/ollama/ollama/api" ) @@ -57,61 +55,53 @@ d:\path with\spaces\seven.svg inbetween7 c:\users\jdoe\eight.png inbetween8 func TestModelfileBuilder(t *testing.T) { opts := runOptions{ - Model: "hork", - System: "You are part horse and part shark, but all hork. Do horklike things", - Template: "This is a template.", + Model: "hork", + System: "You are part horse and part shark, but all hork. Do horklike things", Messages: []api.Message{ {Role: "user", Content: "Hey there hork!"}, {Role: "assistant", Content: "Yes it is true, I am half horse, half shark."}, }, - Options: map[string]interface{}{}, + Options: map[string]any{ + "temperature": 0.9, + "seed": 42, + "penalize_newline": false, + "stop": []string{"hi", "there"}, + }, } - opts.Options["temperature"] = 0.9 - opts.Options["seed"] = 42 - opts.Options["penalize_newline"] = false - opts.Options["stop"] = []string{"hi", "there"} - - mf := buildModelfile(opts) - expectedModelfile := `FROM {{.Model}} -SYSTEM """{{.System}}""" -TEMPLATE """{{.Template}}""" + t.Run("model", func(t *testing.T) { + expect := `FROM hork +SYSTEM You are part horse and part shark, but all hork. Do horklike things PARAMETER penalize_newline false PARAMETER seed 42 -PARAMETER stop [hi there] +PARAMETER stop hi +PARAMETER stop there PARAMETER temperature 0.9 - -MESSAGE user """Hey there hork!""" -MESSAGE assistant """Yes it is true, I am half horse, half shark.""" +MESSAGE user Hey there hork! +MESSAGE assistant Yes it is true, I am half horse, half shark. ` - tmpl, err := template.New("").Parse(expectedModelfile) - require.NoError(t, err) + actual := buildModelfile(opts) + if diff := cmp.Diff(expect, actual); diff != "" { + t.Errorf("mismatch (-want +got):\n%s", diff) + } + }) - var buf bytes.Buffer - err = tmpl.Execute(&buf, opts) - require.NoError(t, err) - assert.Equal(t, buf.String(), mf) - - opts.ParentModel = "horseshark" - mf = buildModelfile(opts) - expectedModelfile = `FROM {{.ParentModel}} -SYSTEM """{{.System}}""" -TEMPLATE """{{.Template}}""" + t.Run("parent model", func(t *testing.T) { + opts.ParentModel = "horseshark" + expect := `FROM horseshark +SYSTEM You are part horse and part shark, but all hork. Do horklike things PARAMETER penalize_newline false PARAMETER seed 42 -PARAMETER stop [hi there] +PARAMETER stop hi +PARAMETER stop there PARAMETER temperature 0.9 - -MESSAGE user """Hey there hork!""" -MESSAGE assistant """Yes it is true, I am half horse, half shark.""" +MESSAGE user Hey there hork! +MESSAGE assistant Yes it is true, I am half horse, half shark. ` - - tmpl, err = template.New("").Parse(expectedModelfile) - require.NoError(t, err) - - var parentBuf bytes.Buffer - err = tmpl.Execute(&parentBuf, opts) - require.NoError(t, err) - assert.Equal(t, parentBuf.String(), mf) + actual := buildModelfile(opts) + if diff := cmp.Diff(expect, actual); diff != "" { + t.Errorf("mismatch (-want +got):\n%s", diff) + } + }) } diff --git a/cmd/start_darwin.go b/cmd/start_darwin.go index 82b09ad62..1a9a1ae87 100644 --- a/cmd/start_darwin.go +++ b/cmd/start_darwin.go @@ -2,7 +2,7 @@ package cmd import ( "context" - "fmt" + "errors" "os" "os/exec" "strings" @@ -20,7 +20,7 @@ func startApp(ctx context.Context, client *api.Client) error { return err } if !strings.Contains(link, "Ollama.app") { - return fmt.Errorf("could not find ollama app") + return errors.New("could not find ollama app") } path := strings.Split(link, "Ollama.app") if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil { diff --git a/cmd/start_default.go b/cmd/start_default.go index c9d6137b9..5eabb2862 100644 --- a/cmd/start_default.go +++ b/cmd/start_default.go @@ -4,11 +4,11 @@ package cmd import ( "context" - "fmt" + "errors" "github.com/ollama/ollama/api" ) func startApp(ctx context.Context, client *api.Client) error { - return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it") + return errors.New("could not connect to ollama server, run 'ollama serve' to start it") } diff --git a/cmd/start_windows.go b/cmd/start_windows.go index 6024a2352..5bca24331 100644 --- a/cmd/start_windows.go +++ b/cmd/start_windows.go @@ -31,7 +31,7 @@ func startApp(ctx context.Context, client *api.Client) error { // Finally look in the path appExe, err = exec.LookPath(AppName) if err != nil { - return fmt.Errorf("could not locate ollama app") + return errors.New("could not locate ollama app") } } } diff --git a/convert/convert.go b/convert/convert.go index 103de457c..44783b6e8 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -1,200 +1,232 @@ package convert import ( - "cmp" - "encoding/binary" "encoding/json" + "errors" "fmt" "io" + "io/fs" "log/slog" - "os" - "path/filepath" - "slices" "strings" - "google.golang.org/protobuf/proto" - - "github.com/ollama/ollama/convert/sentencepiece" "github.com/ollama/ollama/llm" ) -const ( - _ int32 = iota - tokenTypeNormal - tokenTypeUnknown - tokenTypeControl - tokenTypeUserDefined - tokenTypeUnused - tokenTypeByte -) - -type Params struct { - Architectures []string `json:"architectures"` - VocabSize int `json:"vocab_size"` - HiddenSize int `json:"hidden_size"` // n_embd - HiddenLayers int `json:"num_hidden_layers"` // n_layer - ContextSize int `json:"max_position_embeddings"` - IntermediateSize int `json:"intermediate_size"` - AttentionHeads int `json:"num_attention_heads"` // n_head - KeyValHeads int `json:"num_key_value_heads"` - NormEPS float64 `json:"rms_norm_eps"` - BoSTokenID int `json:"bos_token_id"` - EoSTokenID int `json:"eos_token_id"` - HeadDimension int `json:"head_dim"` - PaddingTokenID int `json:"pad_token_id"` - RopeFrequencyBase float64 `json:"rope_theta"` - - Experts int `json:"num_local_experts"` - ExpertsUsed int `json:"num_experts_per_tok"` - - PreTokenizer string - - ByteOrder +type ModelParameters struct { + Architectures []string `json:"architectures"` + VocabSize uint32 `json:"vocab_size"` } -type ByteOrder interface { - binary.ByteOrder - binary.AppendByteOrder +type AdapterParameters struct { + Alpha uint32 `json:"lora_alpha"` + LoraLayers uint32 `json:"lora_layers"` + LoraParameters struct { + Rank uint32 `json:"rank"` + Alpha float32 `json:"alpha"` + Scale float32 `json:"scale"` + } `json:"lora_parameters"` } -type ModelArch interface { - GetTensors() error - LoadVocab() error - WriteGGUF(io.WriteSeeker) error +func (ModelParameters) KV(t *Tokenizer) llm.KV { + kv := llm.KV{ + "general.file_type": uint32(1), + "general.quantization_version": uint32(2), + "tokenizer.ggml.pre": t.Pre, + "tokenizer.ggml.model": t.Vocabulary.Model, + "tokenizer.ggml.tokens": t.Vocabulary.Tokens, + "tokenizer.ggml.scores": t.Vocabulary.Scores, + "tokenizer.ggml.token_type": t.Vocabulary.Types, + } + + if len(t.Merges) > 0 { + kv["tokenizer.ggml.merges"] = t.Merges + } + + if t.Template != "" { + kv["tokenizer.chat_template"] = t.Template + } + + for _, sv := range t.SpecialVocabulary { + kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID) + kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken + } + + return kv } -type ModelFormat interface { - GetLayerName(string) (string, error) - GetTensors(string, *Params) ([]llm.Tensor, error) - GetParams(string) (*Params, error) - GetModelArch(string, string, *Params) (ModelArch, error) +func (p AdapterParameters) KV() llm.KV { + var alpha float32 + if p.LoraParameters.Alpha == 0 { + alpha = float32(p.Alpha) + } else { + alpha = p.LoraParameters.Alpha + } + + kv := llm.KV{ + "adapter.lora.alpha": alpha, + "adapter.type": "lora", + "general.file_type": uint32(1), + "general.type": "adapter", + "general.version": "v0.2", + } + + return kv } -type ModelData struct { - Path string - Name string - Params *Params - Vocab *Vocab - Tensors []llm.Tensor - Format ModelFormat +func (ModelParameters) specialTokenTypes() []string { + return []string{ + "bos", "eos", "unk", "sep", "pad", "cls", "mask", + } } -func GetModelFormat(dirname string) (ModelFormat, error) { - files, err := filepath.Glob(filepath.Join(dirname, "*")) +func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error { + return llm.WriteGGUF(ws, kv, ts) +} + +func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error { + return llm.WriteGGUF(ws, kv, ts) +} + +type ModelConverter interface { + // KV maps parameters to LLM key-values + KV(*Tokenizer) llm.KV + // Tensors maps input tensors to LLM tensors. Model specific modifications can be done here. + Tensors([]Tensor) []llm.Tensor + // Replacements returns a list of string pairs to replace in tensor names. + // See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details + Replacements() []string + + // specialTokenTypes returns any special token types the model uses + specialTokenTypes() []string + // writeFile writes the model to the provided io.WriteSeeker + writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error +} + +type moreParser interface { + parseMore(fs.FS) error +} + +type AdapterConverter interface { + // KV maps parameters to LLM key-values + KV(llm.KV) llm.KV + // Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here. + Tensors([]Tensor) []llm.Tensor + // Replacements returns a list of string pairs to replace in tensor names. + // See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details + Replacements() []string + + writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error +} + +func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error { + bts, err := fs.ReadFile(fsys, "adapter_config.json") if err != nil { - return nil, err + return err } - for _, fn := range files { - if strings.HasSuffix(fn, ".safetensors") { - return &SafetensorFormat{}, nil - } else if strings.HasSuffix(fn, ".bin") || strings.HasSuffix(fn, ".pth") { - slog.Debug("model is torch") - return &TorchFormat{}, nil - } + var p AdapterParameters + if err := json.Unmarshal(bts, &p); err != nil { + return err } - return nil, fmt.Errorf("couldn't determine model format") -} + arch, ok := baseKV["general.architecture"] + if !ok { + return errors.New("architecture not set for the base model") + } -// Details on gguf's tokenizer can be found at: -// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer -type Vocab struct { - Tokens []string - Scores []float32 - Types []int32 - Merges []string -} + var conv AdapterConverter + switch arch { + case "llama": + conv = &llamaAdapter{} + case "gemma2": + conv = &gemma2Adapter{} + default: + return errors.New("unsupported architecture") + } -func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) { - slog.Info(fmt.Sprintf("reading vocab from %s", filepath.Join(dirpath, "tokenizer.model"))) - in, err := os.ReadFile(filepath.Join(dirpath, "tokenizer.model")) + ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...)) if err != nil { - return nil, err + return err } - // To regenerate sentencepiece from the protobufs use: - // protoc -I=./ --go_out=./ sentencepiece_model.proto - modelProto := &sentencepiece.ModelProto{} - if err := proto.Unmarshal(in, modelProto); err != nil { - return nil, err + if err := json.Unmarshal(bts, conv); err != nil { + return err } - v := &Vocab{ - Tokens: make([]string, 0), - Scores: make([]float32, 0), - Types: make([]int32, 0), - } - - pieces := modelProto.GetPieces() - for _, p := range pieces { - v.Tokens = append(v.Tokens, p.GetPiece()) - v.Scores = append(v.Scores, p.GetScore()) - t := p.GetType() - switch t { - case sentencepiece.ModelProto_SentencePiece_UNKNOWN: - case sentencepiece.ModelProto_SentencePiece_CONTROL: - case sentencepiece.ModelProto_SentencePiece_UNUSED: - case sentencepiece.ModelProto_SentencePiece_BYTE: - default: - t = sentencepiece.ModelProto_SentencePiece_NORMAL - } - v.Types = append(v.Types, int32(t)) - } - - slog.Info(fmt.Sprintf("vocab size: %d", len(v.Tokens))) - - // add any additional tokens - addIn, err := os.ReadFile(filepath.Join(dirpath, "added_tokens.json")) - if os.IsNotExist(err) { - return v, nil - } else if err != nil { - return nil, err - } - - slog.Info("reading user defined tokens") - - var extraTokenData map[string]int - if err := json.Unmarshal(addIn, &extraTokenData); err != nil { - return nil, err - } - - type token struct { - key string - pos int - } - - extraTokens := make([]token, 0) - for k, id := range extraTokenData { - extraTokens = append(extraTokens, token{k, id}) - } - - slices.SortFunc(extraTokens, func(a, b token) int { - return cmp.Compare(a.pos, b.pos) - }) - - numToks := len(v.Tokens) - - for cnt, t := range extraTokens { - // the token id should match the specific index for the total number of tokens - if t.pos != cnt+numToks { - return nil, fmt.Errorf("token ID '%d' for '%s' doesn't match total token size", t.pos, t.key) - } - v.Tokens = append(v.Tokens, t.key) - v.Scores = append(v.Scores, -1000.0) - v.Types = append(v.Types, tokenTypeUserDefined) - } - slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens))) - - if params.VocabSize > len(v.Tokens) { - missingTokens := params.VocabSize - len(v.Tokens) - slog.Warn(fmt.Sprintf("vocab is missing %d tokens", missingTokens)) - for cnt := range missingTokens { - v.Tokens = append(v.Tokens, fmt.Sprintf("", cnt+1)) - v.Scores = append(v.Scores, -1) - v.Types = append(v.Types, tokenTypeUserDefined) - } - } - - return v, nil + return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts)) +} + +// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations +// and files it finds in the input path. +// Supported input model formats include safetensors. +// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model. +func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error { + bts, err := fs.ReadFile(fsys, "config.json") + if err != nil { + return err + } + + var p ModelParameters + if err := json.Unmarshal(bts, &p); err != nil { + return err + } + + if len(p.Architectures) < 1 { + return errors.New("unknown architecture") + } + + var conv ModelConverter + switch p.Architectures[0] { + case "LlamaForCausalLM", "MistralForCausalLM": + conv = &llamaModel{} + case "MixtralForCausalLM": + conv = &mixtralModel{} + case "GemmaForCausalLM": + conv = &gemmaModel{} + case "Gemma2ForCausalLM": + conv = &gemma2Model{} + case "Phi3ForCausalLM": + conv = &phi3Model{} + case "BertModel": + conv = &bertModel{} + default: + return errors.New("unsupported architecture") + } + + if err := json.Unmarshal(bts, conv); err != nil { + return err + } + + if t, ok := conv.(moreParser); ok { + if err := t.parseMore(fsys); err != nil { + return err + } + } + + t, err := parseTokenizer(fsys, conv.specialTokenTypes()) + if err != nil { + return err + } + + vocabSize := int(p.VocabSize) + switch { + case vocabSize > len(t.Vocabulary.Tokens): + slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens)) + for i := range vocabSize - len(t.Vocabulary.Tokens) { + t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i)) + t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1) + t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined) + } + case vocabSize < len(t.Vocabulary.Tokens): + return fmt.Errorf("vocabulary is larger than expected '%d' instead of '%d'", len(t.Vocabulary.Tokens), vocabSize) + default: + slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens)) + } + + ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...)) + if err != nil { + return err + } + + return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts)) } diff --git a/convert/convert_bert.go b/convert/convert_bert.go new file mode 100644 index 000000000..ea5facaa5 --- /dev/null +++ b/convert/convert_bert.go @@ -0,0 +1,174 @@ +package convert + +import ( + "cmp" + "encoding/json" + "io/fs" + "path/filepath" + "slices" + "strings" + + "github.com/ollama/ollama/llm" +) + +type bertModel struct { + ModelParameters + NLayers uint32 `json:"n_layers"` + NumHiddenLayers uint32 `json:"num_hidden_layers"` + NLayer uint32 `json:"n_layer"` + MaxPositionEmbeddings uint32 `json:"max_position_embeddings"` + NCtx uint32 `json:"n_ctx"` + HiddenSize uint32 `json:"hidden_size"` + NEmbd uint32 `json:"n_embd"` + IntermediateSize uint32 `json:"intermediate_size"` + NInner uint32 `json:"n_inner"` + NumAttentionHeads uint32 `json:"num_attention_heads"` + NHead uint32 `json:"n_head"` + NumKeyValueHeads uint32 `json:"num_key_value_heads"` + LayerNormEPS float32 `json:"layer_norm_eps"` + LayerNormEpsilon float32 `json:"layer_norm_epsilon"` + NormEpsilon float32 `json:"norm_epsilon"` + + PoolingType uint32 +} + +var ( + _ ModelConverter = (*bertModel)(nil) + _ moreParser = (*bertModel)(nil) +) + +func (p *bertModel) parseMore(fsys fs.FS) error { + bts, err := fs.ReadFile(fsys, "modules.json") + if err != nil { + return err + } + + var modules []struct { + Type string `json:"type"` + Path string `json:"path"` + } + + if err := json.Unmarshal(bts, &modules); err != nil { + return err + } + + var pooling string + for _, m := range modules { + if m.Type == "sentence_transformers.models.Pooling" { + pooling = m.Path + break + } + } + + if pooling != "" { + bts, err := fs.ReadFile(fsys, filepath.Join(pooling, "config.json")) + if err != nil { + return err + } + + var pc struct { + PoolingModeCLSToken bool `json:"pooling_mode_cls_token"` + PoolingModeMeanTokens bool `json:"pooling_mode_mean_tokens"` + } + + if err := json.Unmarshal(bts, &pc); err != nil { + return err + } + + if pc.PoolingModeMeanTokens { + p.PoolingType = 1 + } else if pc.PoolingModeCLSToken { + p.PoolingType = 2 + } + } + + return nil +} + +func (p *bertModel) KV(t *Tokenizer) llm.KV { + kv := p.ModelParameters.KV(t) + kv["general.architecture"] = "bert" + kv["bert.attention.causal"] = false + kv["bert.pooling_type"] = p.PoolingType + + kv["bert.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer) + + if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 { + kv["bert.context_length"] = contextLength + } + + if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 { + kv["bert.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd) + } + + if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 { + kv["bert.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner) + } + + if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 { + kv["bert.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead) + } + + if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 { + kv["bert.attention.layer_norm_epsilon"] = layerNormEpsilon + } + + kv["tokenizer.ggml.model"] = "bert" + kv["tokenizer.ggml.token_type_count"] = uint32(2) + + // convert to phantom space tokens + for i, e := range t.Tokens { + if strings.HasPrefix(e, "[") && strings.HasSuffix(e, "]") { + // noop + } else if strings.HasPrefix(e, "##") { + t.Tokens[i] = e[2:] + } else { + t.Tokens[i] = "\u2581" + e + } + } + + kv["tokenizer.ggml.tokens"] = t.Tokens + + return kv +} + +func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor { + var out []llm.Tensor + for _, t := range ts { + if slices.Contains([]string{ + "embeddings.position_ids", + "pooler.dense.weight", + "pooler.dense.bias", + }, t.Name()) { + continue + } + + out = append(out, llm.Tensor{ + Name: t.Name(), + Kind: t.Kind(), + Shape: t.Shape(), + WriterTo: t, + }) + } + + return out +} + +func (bertModel) Replacements() []string { + return []string{ + "encoder.layer", "blk", + "encoder.layers", "blk", + "embeddings.word_embeddings", "token_embd", + "embeddings.token_type_embeddings", "token_types", + "embeddings.LayerNorm", "token_embd_norm", + "embeddings.position_embeddings", "position_embd", + "attention.self.query", "attn_q", + "attention.self.key", "attn_k", + "attention.self.value", "attn_v", + "attention.output.dense", "attn_output", + "attention.output.LayerNorm", "attn_output_norm", + "intermediate.dense", "ffn_up", + "output.dense", "ffn_down", + "output.LayerNorm", "layer_output_norm", + } +} diff --git a/convert/convert_gemma.go b/convert/convert_gemma.go new file mode 100644 index 000000000..b88652947 --- /dev/null +++ b/convert/convert_gemma.go @@ -0,0 +1,100 @@ +package convert + +import ( + "strings" + + "github.com/pdevine/tensor" + "github.com/pdevine/tensor/native" + + "github.com/ollama/ollama/llm" +) + +type gemmaModel struct { + ModelParameters + MaxPositionEmbeddings uint32 `json:"max_position_embeddings"` + HiddenSize uint32 `json:"hidden_size"` + HiddenLayers uint32 `json:"num_hidden_layers"` + IntermediateSize uint32 `json:"intermediate_size"` + NumAttentionHeads uint32 `json:"num_attention_heads"` + NumKeyValueHeads uint32 `json:"num_key_value_heads"` + RMSNormEPS float32 `json:"rms_norm_eps"` + HeadDim uint32 `json:"head_dim"` +} + +var _ ModelConverter = (*gemmaModel)(nil) + +func (p *gemmaModel) KV(t *Tokenizer) llm.KV { + kv := p.ModelParameters.KV(t) + kv["general.architecture"] = "gemma" + kv["gemma.context_length"] = p.MaxPositionEmbeddings + kv["gemma.embedding_length"] = p.HiddenSize + kv["gemma.block_count"] = p.HiddenLayers + kv["gemma.feed_forward_length"] = p.IntermediateSize + kv["gemma.attention.head_count"] = p.NumAttentionHeads + kv["gemma.attention.head_count_kv"] = p.NumKeyValueHeads + kv["gemma.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS + kv["gemma.attention.key_length"] = p.HeadDim + kv["gemma.attention.value_length"] = p.HeadDim + kv["tokenizer.ggml.eot_token_id"] = uint32(107) + kv["tokenizer.ggml.middle_token_id"] = uint32(68) + kv["tokenizer.ggml.prefix_token_id"] = uint32(67) + kv["tokenizer.ggml.suffix_token_id"] = uint32(69) + return kv +} + +func (p *gemmaModel) Tensors(ts []Tensor) []llm.Tensor { + var out []llm.Tensor + for _, t := range ts { + if strings.HasSuffix(t.Name(), "_norm.weight") { + t.SetRepacker(p.addOne) + } + + out = append(out, llm.Tensor{ + Name: t.Name(), + Kind: t.Kind(), + Shape: t.Shape(), + WriterTo: t, + }) + } + + return out +} + +func (p *gemmaModel) Replacements() []string { + return []string{ + "model.embed_tokens", "token_embd", + "model.norm", "output_norm", + "model.layers", "blk", + "input_layernorm", "attn_norm", + "self_attn.q_proj", "attn_q", + "self_attn.k_proj", "attn_k", + "self_attn.v_proj", "attn_v", + "self_attn.o_proj", "attn_output", + "mlp.gate_proj", "ffn_gate", + "mlp.down_proj", "ffn_down", + "mlp.up_proj", "ffn_up", + "post_attention_layernorm", "ffn_norm", + } +} + +func (*gemmaModel) addOne(_ string, data []float32, shape []uint64) ([]float32, error) { + n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data)) + ones := tensor.Ones(tensor.Float32, int(shape[0])) + + n, err := n.Add(ones) + if err != nil { + return nil, err + } + + ts, err := native.SelectF32(n, 0) + if err != nil { + return nil, err + } + + var f32s []float32 + for _, t := range ts { + f32s = append(f32s, t...) + } + + return f32s, nil +} diff --git a/convert/convert_gemma2.go b/convert/convert_gemma2.go new file mode 100644 index 000000000..0f98c1e38 --- /dev/null +++ b/convert/convert_gemma2.go @@ -0,0 +1,53 @@ +package convert + +import ( + "github.com/ollama/ollama/llm" +) + +type gemma2Model struct { + gemmaModel + SlidingWindow uint32 `json:"sliding_window"` + AttentionLogitSoftcap float32 `json:"attn_logit_softcapping"` + FinalLogitSoftcap float32 `json:"final_logit_softcapping"` +} + +func (p *gemma2Model) KV(t *Tokenizer) llm.KV { + kv := p.ModelParameters.KV(t) + kv["general.architecture"] = "gemma2" + kv["gemma2.context_length"] = p.MaxPositionEmbeddings + kv["gemma2.embedding_length"] = p.HiddenSize + kv["gemma2.block_count"] = p.HiddenLayers + kv["gemma2.feed_forward_length"] = p.IntermediateSize + kv["gemma2.attention.head_count"] = p.NumAttentionHeads + kv["gemma2.attention.head_count_kv"] = p.NumKeyValueHeads + kv["gemma2.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS + kv["gemma2.attention.key_length"] = p.HeadDim + kv["gemma2.attention.value_length"] = p.HeadDim + kv["gemma2.attention.sliding_window"] = p.SlidingWindow + kv["gemma2.attn_logit_softcapping"] = p.AttentionLogitSoftcap + kv["gemma2.final_logit_softcapping"] = p.FinalLogitSoftcap + kv["tokenizer.ggml.eot_token_id"] = uint32(107) + kv["tokenizer.ggml.middle_token_id"] = uint32(68) + kv["tokenizer.ggml.prefix_token_id"] = uint32(67) + kv["tokenizer.ggml.suffix_token_id"] = uint32(69) + return kv +} + +func (p *gemma2Model) Replacements() []string { + return []string{ + "model.embed_tokens", "token_embd", + "model.norm", "output_norm", + "model.layers", "blk", + "input_layernorm", "attn_norm", + "self_attn.q_proj", "attn_q", + "self_attn.k_proj", "attn_k", + "self_attn.v_proj", "attn_v", + "self_attn.o_proj", "attn_output", + "mlp.gate_proj", "ffn_gate", + "mlp.down_proj", "ffn_down", + "mlp.up_proj", "ffn_up", + "post_attention_layernorm", "post_attention_norm", + "pre_feedforward_layernorm", "ffn_norm", + "post_feedforward_layernorm", "post_ffw_norm", + } +} diff --git a/convert/convert_gemma2_adapter.go b/convert/convert_gemma2_adapter.go new file mode 100644 index 000000000..a89a25f4c --- /dev/null +++ b/convert/convert_gemma2_adapter.go @@ -0,0 +1,91 @@ +package convert + +import ( + "strings" + + "github.com/pdevine/tensor" + "github.com/pdevine/tensor/native" + + "github.com/ollama/ollama/llm" +) + +type gemma2Adapter struct { + AdapterParameters +} + +var _ AdapterConverter = (*gemma2Adapter)(nil) + +func (p *gemma2Adapter) KV(baseKV llm.KV) llm.KV { + kv := p.AdapterParameters.KV() + kv["general.architecture"] = "gemma2" + return kv +} + +func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor { + var out []llm.Tensor + for _, t := range ts { + shape := t.Shape() + if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) || + (strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) { + shape[0], shape[1] = shape[1], shape[0] + t.SetRepacker(p.repack) + } + + out = append(out, llm.Tensor{ + Name: t.Name(), + Kind: t.Kind(), + Shape: t.Shape(), + WriterTo: t, + }) + } + + return out +} + +func (p *gemma2Adapter) Replacements() []string { + return []string{ + "base_model.model.", "", + "model.layers", "blk", + "self_attn.q_proj", "attn_q", + "self_attn.k_proj", "attn_k", + "self_attn.v_proj", "attn_v", + "self_attn.o_proj", "attn_output", + "mlp.gate_proj", "ffn_gate", + "mlp.down_proj", "ffn_down", + "mlp.up_proj", "ffn_up", + "lora_A.weight", "weight.lora_a", + "lora_B.weight", "weight.lora_b", + "lora_a", "weight.lora_a", + "lora_b", "weight.lora_b", + } +} + +func (p *gemma2Adapter) repack(name string, data []float32, shape []uint64) ([]float32, error) { + dims := []int{int(shape[1]), int(shape[0])} + + n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data)) + + if err := n.T(1, 0); err != nil { + return nil, err + } + + if err := n.Reshape(dims...); err != nil { + return nil, err + } + + if err := n.Transpose(); err != nil { + return nil, err + } + + ts, err := native.SelectF32(n, 1) + if err != nil { + return nil, err + } + + var f32s []float32 + for _, t := range ts { + f32s = append(f32s, t...) + } + + return f32s, nil +} diff --git a/convert/convert_llama.go b/convert/convert_llama.go new file mode 100644 index 000000000..5dedb829d --- /dev/null +++ b/convert/convert_llama.go @@ -0,0 +1,213 @@ +package convert + +import ( + "cmp" + "fmt" + "math" + "strings" + + "github.com/pdevine/tensor" + "github.com/pdevine/tensor/native" + + "github.com/ollama/ollama/llm" +) + +type llamaModel struct { + ModelParameters + NLayers uint32 `json:"n_layers"` + NumHiddenLayers uint32 `json:"num_hidden_layers"` + NLayer uint32 `json:"n_layer"` + MaxPositionEmbeddings uint32 `json:"max_position_embeddings"` + NCtx uint32 `json:"n_ctx"` + HiddenSize uint32 `json:"hidden_size"` + NEmbd uint32 `json:"n_embd"` + IntermediateSize uint32 `json:"intermediate_size"` + NInner uint32 `json:"n_inner"` + NumAttentionHeads uint32 `json:"num_attention_heads"` + NHead uint32 `json:"n_head"` + NumKeyValueHeads uint32 `json:"num_key_value_heads"` + RopeTheta float32 `json:"rope_theta"` + RopeScaling struct { + Type string `json:"type"` + RopeType string `json:"rope_type"` + Factor float32 `json:"factor"` + LowFrequencyFactor float32 `json:"low_freq_factor"` + HighFrequencyFactor float32 `json:"high_freq_factor"` + OriginalMaxPositionalEmbeddings uint32 `json:"original_max_positional_embeddings"` + + factors ropeFactor + } `json:"rope_scaling"` + RMSNormEPS float32 `json:"rms_norm_eps"` + LayerNormEPS float32 `json:"layer_norm_eps"` + LayerNormEpsilon float32 `json:"layer_norm_epsilon"` + NormEpsilon float32 `json:"norm_epsilon"` + HeadDim uint32 `json:"head_dim"` +} + +var _ ModelConverter = (*llamaModel)(nil) + +func (p *llamaModel) KV(t *Tokenizer) llm.KV { + kv := p.ModelParameters.KV(t) + kv["general.architecture"] = "llama" + kv["llama.vocab_size"] = p.VocabSize + + kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer) + + if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 { + kv["llama.context_length"] = contextLength + } + + if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 { + kv["llama.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd) + } + + if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 { + kv["llama.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner) + } + + if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 { + kv["llama.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead) + kv["llama.rope.dimension_count"] = p.HiddenSize / headCount + } + + if p.RopeTheta > 0 { + kv["llama.rope.freq_base"] = p.RopeTheta + } + + if p.RopeScaling.Type == "linear" { + kv["llama.rope.scaling.type"] = p.RopeScaling.Type + kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor + } else if p.RopeScaling.RopeType == "llama3" { + dim := p.HiddenSize / p.NumAttentionHeads + for i := uint32(0); i < dim; i += 2 { + factor := cmp.Or(p.RopeScaling.Factor, 8.0) + factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0) + factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0) + + original := cmp.Or(p.RopeScaling.OriginalMaxPositionalEmbeddings, 8192) + lambdaLow := float32(original) / factorLow + lambdaHigh := float32(original) / factorHigh + + lambda := 2 * math.Pi * math.Pow(float64(p.RopeTheta), float64(i)/float64(dim)) + if lambda < float64(lambdaHigh) { + p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0) + } else if lambda > float64(lambdaLow) { + p.RopeScaling.factors = append(p.RopeScaling.factors, factor) + } else { + smooth := (float32(original)/float32(lambda) - factorLow) / (factorHigh - factorLow) + p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0/((1-smooth)/factor+smooth)) + } + } + } + + if p.NumKeyValueHeads > 0 { + kv["llama.attention.head_count_kv"] = p.NumKeyValueHeads + } + + if p.RMSNormEPS > 0 { + kv["llama.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS + } + + if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 { + kv["llama.attention.layer_norm_epsilon"] = layerNormEpsilon + } + + if p.HeadDim > 0 { + kv["llama.attention.key_length"] = p.HeadDim + kv["llama.attention.value_length"] = p.HeadDim + } + + return kv +} + +func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor { + var out []llm.Tensor + + if p.RopeScaling.factors != nil { + out = append(out, llm.Tensor{ + Name: "rope_freqs.weight", + Kind: 0, + Shape: []uint64{uint64(len(p.RopeScaling.factors))}, + WriterTo: p.RopeScaling.factors, + }) + } + + for _, t := range ts { + if strings.HasSuffix(t.Name(), "attn_q.weight") || + strings.HasSuffix(t.Name(), "attn_k.weight") { + t.SetRepacker(p.repack) + } + + out = append(out, llm.Tensor{ + Name: t.Name(), + Kind: t.Kind(), + Shape: t.Shape(), + WriterTo: t, + }) + } + + return out +} + +func (p *llamaModel) Replacements() []string { + return []string{ + "lm_head", "output", + "model.embed_tokens", "token_embd", + "model.norm", "output_norm", + "model.layers", "blk", + "input_layernorm", "attn_norm", + "self_attn.q_proj", "attn_q", + "self_attn.k_proj", "attn_k", + "self_attn.v_proj", "attn_v", + "self_attn.o_proj", "attn_output", + "mlp.gate_proj", "ffn_gate", + "mlp.down_proj", "ffn_down", + "mlp.up_proj", "ffn_up", + "post_attention_layernorm", "ffn_norm", + } +} + +func (p *llamaModel) repack(name string, data []float32, shape []uint64) ([]float32, error) { + var dims []int + for _, dim := range shape { + dims = append(dims, int(dim)) + } + + var heads uint32 + if strings.HasSuffix(name, "attn_q.weight") { + heads = p.NumAttentionHeads + } else if strings.HasSuffix(name, "attn_k.weight") { + heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads) + } else { + return nil, fmt.Errorf("unknown tensor for repack: %s", name) + } + + n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data)) + if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil { + return nil, err + } + + if err := n.T(0, 2, 1, 3); err != nil { + return nil, err + } + + if err := n.Reshape(dims...); err != nil { + return nil, err + } + + if err := n.Transpose(); err != nil { + return nil, err + } + + ts, err := native.SelectF32(n, 1) + if err != nil { + return nil, err + } + + var f32s []float32 + for _, t := range ts { + f32s = append(f32s, t...) + } + + return f32s, nil +} diff --git a/convert/convert_llama_adapter.go b/convert/convert_llama_adapter.go new file mode 100644 index 000000000..08ddee10a --- /dev/null +++ b/convert/convert_llama_adapter.go @@ -0,0 +1,169 @@ +package convert + +import ( + "cmp" + "strings" + + "github.com/pdevine/tensor" + "github.com/pdevine/tensor/native" + + "github.com/ollama/ollama/llm" +) + +type llamaAdapter struct { + AdapterParameters + NumAttentionHeads uint32 `json:"num_attention_heads"` + NumKeyValueHeads uint32 `json:"num_key_value_heads"` +} + +var _ AdapterConverter = (*llamaAdapter)(nil) + +func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV { + kv := p.AdapterParameters.KV() + kv["general.architecture"] = "llama" + kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"] + kv["llama.attention.head_count_kv"] = baseKV["llama.attention.head_count_kv"] + + p.NumAttentionHeads = baseKV["llama.attention.head_count"].(uint32) + + return kv +} + +func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor { + var out []llm.Tensor + for _, t := range ts { + shape := t.Shape() + if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) || + (strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) { + shape[0], shape[1] = shape[1], shape[0] + t.SetRepacker(p.repackAndTranspose) + } else { + t.SetRepacker(p.repack) + } + + out = append(out, llm.Tensor{ + Name: t.Name(), + Kind: t.Kind(), + Shape: shape, + WriterTo: t, + }) + } + + return out +} + +func (p *llamaAdapter) Replacements() []string { + return []string{ + "base_model.model.", "", + "model.layers", "blk", + "self_attn.q_proj", "attn_q", + "self_attn.k_proj", "attn_k", + "self_attn.v_proj", "attn_v", + "self_attn.o_proj", "attn_output", + "mlp.gate_proj", "ffn_gate", + "mlp.down_proj", "ffn_down", + "mlp.up_proj", "ffn_up", + "lora_A.weight", "weight.lora_a", + "lora_B.weight", "weight.lora_b", + "lora_a", "weight.lora_a", + "lora_b", "weight.lora_b", + } +} + +func (p *llamaAdapter) repack(name string, data []float32, shape []uint64) ([]float32, error) { + dims := []int{int(shape[1]), int(shape[0])} + + var heads uint32 + if strings.HasSuffix(name, "attn_q.weight.lora_a") { + heads = p.NumAttentionHeads + } else if strings.HasSuffix(name, "attn_k.weight.lora_a") { + heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads) + } else { + return data, nil + } + + n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data)) + + if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil { + return nil, err + } + + if err := n.T(0, 2, 1, 3); err != nil { + return nil, err + } + + if err := n.Reshape(dims...); err != nil { + return nil, err + } + + if err := n.Transpose(); err != nil { + return nil, err + } + + ts, err := native.SelectF32(n, 1) + if err != nil { + return nil, err + } + + var f32s []float32 + for _, t := range ts { + f32s = append(f32s, t...) + } + + return f32s, nil +} + +func (p *llamaAdapter) repackAndTranspose(name string, data []float32, shape []uint64) ([]float32, error) { + dims := []int{int(shape[1]), int(shape[0])} + + n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data)) + + var heads uint32 + if strings.HasSuffix(name, "attn_q.weight.lora_a") { + heads = p.NumAttentionHeads + } else if strings.HasSuffix(name, "attn_k.weight.lora_a") { + heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads) + } + + if heads > 0 { + if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil { + return nil, err + } + + if err := n.T(0, 2, 1, 3); err != nil { + return nil, err + } + + if err := n.Reshape(dims...); err != nil { + return nil, err + } + + if err := n.Transpose(); err != nil { + return nil, err + } + } + + if err := n.T(1, 0); err != nil { + return nil, err + } + + if err := n.Reshape(dims...); err != nil { + return nil, err + } + + if err := n.Transpose(); err != nil { + return nil, err + } + + ts, err := native.SelectF32(n, 1) + if err != nil { + return nil, err + } + + var f32s []float32 + for _, t := range ts { + f32s = append(f32s, t...) + } + + return f32s, nil +} diff --git a/convert/convert_mixtral.go b/convert/convert_mixtral.go new file mode 100644 index 000000000..43b7c8b10 --- /dev/null +++ b/convert/convert_mixtral.go @@ -0,0 +1,94 @@ +package convert + +import ( + "fmt" + "io" + "slices" + "strings" + + "github.com/ollama/ollama/llm" +) + +type mixtralModel struct { + llamaModel + NumLocalExperts uint32 `json:"num_local_experts"` + NumExpertsPerToken uint32 `json:"num_experts_per_tok"` +} + +func (p *mixtralModel) KV(t *Tokenizer) llm.KV { + kv := p.llamaModel.KV(t) + + if p.NumLocalExperts > 0 { + kv["llama.expert_count"] = p.NumLocalExperts + } + + if p.NumExpertsPerToken > 0 { + kv["llama.expert_used_count"] = p.NumExpertsPerToken + } + + return kv +} + +func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor { + oldnew := []string{ + "model.layers", "blk", + "w1", "ffn_gate_exps", + "w2", "ffn_down_exps", + "w3", "ffn_up_exps", + } + + for i := range p.NumLocalExperts { + oldnew = append(oldnew, fmt.Sprintf(".block_sparse_moe.experts.%d.", i), ".") + } + + // group experts of the same layer (model.layers.%d) and type (w[123]) into a single tensor + namer := strings.NewReplacer(oldnew...) + experts := make(map[string]experts) + + // merge experts into a single tensor while removing them from ts + ts = slices.DeleteFunc(ts, func(t Tensor) bool { + if !strings.Contains(t.Name(), ".block_sparse_moe.experts.") { + return false + } + + name := namer.Replace(t.Name()) + experts[name] = append(experts[name], t) + return true + }) + + var out []llm.Tensor + for n, e := range experts { + // TODO(mxyng): sanity check experts + out = append(out, llm.Tensor{ + Name: n, + Kind: e[0].Kind(), + Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...), + WriterTo: e, + }) + } + + return append(out, p.llamaModel.Tensors(ts)...) +} + +func (p *mixtralModel) Replacements() []string { + return append( + p.llamaModel.Replacements(), + "block_sparse_moe.gate", "ffn_gate_inp", + ) +} + +type experts []Tensor + +func (e experts) WriteTo(w io.Writer) (int64, error) { + // TODO(mxyng): experts _should_ be numerically sorted by expert but this should check + for _, t := range e { + // the canonical merged experts tensor stacks all experts along a new, 0 axis, + // e.g. `tensor.Stack(0, e[0], e[1:]...)`, which requires allocating temporary buffers + // this accomplishes the same thing by writing each expert tensor in sequence + if _, err := t.WriteTo(w); err != nil { + return 0, err + } + } + + return 0, nil +} diff --git a/convert/convert_phi3.go b/convert/convert_phi3.go new file mode 100644 index 000000000..3de0d4049 --- /dev/null +++ b/convert/convert_phi3.go @@ -0,0 +1,123 @@ +package convert + +import ( + "cmp" + "encoding/binary" + "io" + "math" + "strings" + "sync" + + "github.com/ollama/ollama/llm" +) + +type phi3Model struct { + ModelParameters + NumHiddenLayers uint32 `json:"num_hidden_layers"` + NLayers uint32 `json:"n_layers"` + HiddenSize uint32 `json:"hidden_size"` + NEmbd uint32 `json:"n_embd"` + IntermediateSize uint32 `json:"intermediate_size"` + NumAttentionHeads uint32 `json:"num_attention_heads"` + NHead uint32 `json:"n_head"` + NumKeyValueHeads uint32 `json:"num_key_value_heads"` + NHeadKV uint32 `json:"n_head_kv"` + RopeTheta float32 `json:"rope_theta"` + RopeScaling struct { + Type string `json:"type"` + LongFactor ropeFactor `json:"long_factor"` + ShortFactor ropeFactor `json:"short_factor"` + } `json:"rope_scaling"` + RMSNormEPS float32 `json:"rms_norm_eps"` + NPositions uint32 `json:"n_positions"` + MaxPositionEmbeddings uint32 `json:"max_position_embeddings"` + OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"` + SlidingWindow uint32 `json:"sliding_window"` +} + +var _ ModelConverter = (*phi3Model)(nil) + +func (p *phi3Model) KV(t *Tokenizer) llm.KV { + kv := p.ModelParameters.KV(t) + kv["general.architecture"] = "phi3" + kv["phi3.context_length"] = p.MaxPositionEmbeddings + kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd) + kv["phi3.feed_forward_length"] = p.IntermediateSize + kv["phi3.block_count"] = cmp.Or(p.NumHiddenLayers, p.NLayers) + kv["phi3.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead) + kv["phi3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NHeadKV) + kv["phi3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS + kv["phi3.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NumAttentionHeads, p.NHead) + kv["phi3.rope.freq_base"] = p.RopeTheta + kv["phi3.rope.scaling.original_context_length"] = p.OriginalMaxPositionEmbeddings + kv["phi3.attention.sliding_window"] = p.SlidingWindow + + scale := float64(p.MaxPositionEmbeddings) / float64(p.OriginalMaxPositionEmbeddings) + + switch p.RopeScaling.Type { + case "": + // no scaling + case "su", "longrope": + kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0)) + case "yarn": + kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0)) + default: + panic("unknown rope scaling type") + } + + return kv +} + +func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor { + var addRopeFactors sync.Once + + out := make([]llm.Tensor, 0, len(ts)+2) + for _, t := range ts { + if strings.HasPrefix(t.Name(), "blk.0.") { + addRopeFactors.Do(func() { + out = append(out, llm.Tensor{ + Name: "rope_factors_long.weight", + Kind: 0, + Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))}, + WriterTo: p.RopeScaling.LongFactor, + }, llm.Tensor{ + Name: "rope_factors_short.weight", + Kind: 0, + Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))}, + WriterTo: p.RopeScaling.ShortFactor, + }) + }) + } + + out = append(out, llm.Tensor{ + Name: t.Name(), + Kind: t.Kind(), + Shape: t.Shape(), + WriterTo: t, + }) + } + + return out +} + +func (p *phi3Model) Replacements() []string { + return []string{ + "lm_head", "output", + "model.embed_tokens", "token_embd", + "model.norm", "output_norm", + "model.layers", "blk", + "input_layernorm", "attn_norm", + "self_attn.qkv_proj", "attn_qkv", + "self_attn.o_proj", "attn_output", + "mlp.down_proj", "ffn_down", + "mlp.gate_up_proj", "ffn_up", + "post_attention_layernorm", "ffn_norm", + } +} + +type ropeFactor []float32 + +func (r ropeFactor) WriteTo(w io.Writer) (int64, error) { + err := binary.Write(w, binary.LittleEndian, r) + return 0, err +} diff --git a/convert/convert_test.go b/convert/convert_test.go index 6aa33a49c..2969673d5 100644 --- a/convert/convert_test.go +++ b/convert/convert_test.go @@ -1,48 +1,44 @@ -//go:build slow - package convert import ( + "bytes" + "crypto/sha256" + "encoding/binary" + "encoding/hex" + "encoding/json" + "flag" + "fmt" + "io" + "io/fs" + "log/slog" + "math" "os" "path/filepath" + "slices" + "strings" "testing" + "golang.org/x/exp/maps" + "github.com/ollama/ollama/llm" ) -func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) { +type tensorData struct { + Offsets []int `json:"data_offsets"` + Type string `json:"dtype"` + Shape []int `json:"shape"` +} + +func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) { t.Helper() - mf, err := GetModelFormat(p) - if err != nil { - t.Fatal(err) - } - - params, err := mf.GetParams(p) - if err != nil { - t.Fatal(err) - } - - arch, err := mf.GetModelArch("", p, params) - if err != nil { - t.Fatal(err) - } - - if err := arch.LoadVocab(); err != nil { - t.Fatal(err) - } - - if err := arch.GetTensors(); err != nil { - t.Fatal(err) - } - f, err := os.CreateTemp(t.TempDir(), "f16") if err != nil { t.Fatal(err) } defer f.Close() - if err := arch.WriteGGUF(f); err != nil { + if err := ConvertModel(fsys, f); err != nil { t.Fatal(err) } @@ -50,54 +46,431 @@ func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) { if err != nil { t.Fatal(err) } - defer r.Close() + t.Cleanup(func() { r.Close() }) - m, _, err := llm.DecodeGGML(r) + m, _, err := llm.DecodeGGML(r, math.MaxInt) if err != nil { t.Fatal(err) } - return m.KV(), m.Tensors() -} - -func TestConvertFull(t *testing.T) { - cases := []struct { - path string - arch string - tensors int - layers int - }{ - {"Meta-Llama-3-8B-Instruct", "llama", 291, 35}, - {"Mistral-7B-Instruct-v0.2", "llama", 291, 35}, - {"Mixtral-8x7B-Instruct-v0.1", "llama", 291, 35}, - {"gemma-2b-it", "gemma", 164, 20}, + if _, err := r.Seek(0, io.SeekStart); err != nil { + t.Fatal(err) } - for _, tt := range cases { - t.Run(tt.path, func(t *testing.T) { - p := filepath.Join("testdata", tt.path) - if _, err := os.Stat(p); err != nil { + return r, m.KV(), m.Tensors() +} + +func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors llm.Tensors) map[string]string { + actual := make(map[string]string) + for k, v := range kv { + if s, ok := v.(json.Marshaler); !ok { + actual[k] = fmt.Sprintf("%v", v) + } else { + bts, err := json.Marshal(s) + if err != nil { + t.Fatal(err) + } + + actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts)) + } + } + + for _, tensor := range tensors.Items { + sha256sum := sha256.New() + sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size())) + if _, err := io.Copy(sha256sum, sr); err != nil { + t.Fatal(err) + } + + actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil)) + } + + return actual +} + +func TestMain(m *testing.M) { + var level slog.Level + flag.TextVar(&level, "level", slog.LevelInfo, "log level") + flag.Parse() + slog.SetLogLoggerLevel(level) + os.Exit(m.Run()) +} + +func TestConvertModel(t *testing.T) { + cases := []string{ + "Meta-Llama-3-8B-Instruct", + "Meta-Llama-3.1-8B-Instruct", + "Mistral-7B-Instruct-v0.2", + "Mixtral-8x7B-Instruct-v0.1", + "gemma-2b-it", + "gemma-2-2b-it", + // microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8 + "Phi-3-mini-128k-instruct", + "all-MiniLM-L6-v2", + "gemma-2-9b-it", + } + + for i := range cases { + tt := cases[i] + t.Run(tt, func(t *testing.T) { + t.Parallel() + + p := filepath.Join("testdata", tt) + if testing.Short() { + t.Skip("skipping in short mode") + } else if _, err := os.Stat(p); err != nil { t.Skipf("%s not found", p) } - kv, tensors := convertFull(t, p) + f, kv, tensors := convertFull(t, os.DirFS(p)) + actual := generateResultsJSON(t, f, kv, tensors) - if kv.Architecture() != tt.arch { - t.Fatalf("expected llama, got %s", kv.Architecture()) + expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt))) + if err != nil { + t.Fatal(err) } - if kv.FileType().String() != "F16" { - t.Fatalf("expected F16, got %s", kv.FileType()) + var expect map[string]string + if err := json.NewDecoder(expectFile).Decode(&expect); err != nil { + t.Fatal(err) } - if len(tensors) != tt.tensors { - t.Fatalf("expected %d tensors, got %d", tt.tensors, len(tensors)) - } - - layers := tensors.Layers() - if len(layers) != tt.layers { - t.Fatalf("expected %d layers, got %d", tt.layers, len(layers)) + keys := maps.Keys(expect) + slices.Sort(keys) + for _, k := range keys { + if v, ok := actual[k]; !ok { + t.Errorf("missing %s", k) + } else if v != expect[k] { + t.Errorf("unexpected %s: want %s, got %s", k, expect[k], v) + } } }) } } + +func TestConvertInvalidTensorNames(t *testing.T) { + f, err := os.CreateTemp(t.TempDir(), "testmodel") + if err != nil { + t.Fatal(err) + } + defer f.Close() + + tempDir := t.TempDir() + + td := map[string]*tensorData{} + offset := 4096 + + td["model.layers.0.self_attn.q_proj.weight"] = &tensorData{ + Offsets: []int{0, offset}, + Type: "F32", + Shape: []int{4096, 4096}, + } + td["blk.0.attn_q.weight"] = &tensorData{ + Offsets: []int{offset, offset * 2}, + Type: "F32", + Shape: []int{4096, 4096}, + } + generateSafetensorTestData(t, tempDir, td) + + err = ConvertModel(os.DirFS(tempDir), f) + if err == nil || !strings.HasPrefix(err.Error(), "duplicate tensor name") { + t.Errorf("expected error but didn't get one") + } +} + +func TestConvertInvalidDatatype(t *testing.T) { + f, err := os.CreateTemp(t.TempDir(), "testmodel") + if err != nil { + t.Fatal(err) + } + defer f.Close() + + tempDir := t.TempDir() + + td := map[string]*tensorData{} + offset := 4096 * 14336 + + td["model.layers.0.mlp.down_proj.weight"] = &tensorData{ + Offsets: []int{0, offset}, + Type: "I8", + Shape: []int{4096, 14336}, + } + td["model.layers.0.mlp.down_proj.weight_format"] = &tensorData{ + Offsets: []int{offset, offset}, + Type: "U8", + Shape: []int{}, + } + generateSafetensorTestData(t, tempDir, td) + + err = ConvertModel(os.DirFS(tempDir), f) + if err == nil || err.Error() != "unsupported safetensors model" { + t.Errorf("expected error but didn't get one") + } +} + +func generateSafetensorTestData(t *testing.T, tempDir string, tensorData map[string]*tensorData) { + data, err := json.Marshal(tensorData) + if err != nil { + t.Fatal(err) + } + + var buf bytes.Buffer + + l := int64(len(data)) + err = binary.Write(&buf, binary.LittleEndian, l) + if err != nil { + t.Fatal(err) + } + + _, err = buf.Write(data) + if err != nil { + t.Fatal(err) + } + + fdata, err := os.Create(filepath.Join(tempDir, "model-00001-of-00001.safetensors")) + if err != nil { + t.Fatal(err) + } + defer fdata.Close() + + _, err = fdata.Write(buf.Bytes()) + if err != nil { + t.Fatal(err) + } + + configData := ` +{ + "architectures": [ + "LlamaForCausalLM" + ] +} +` + + f, err := os.Create(filepath.Join(tempDir, "config.json")) + if err != nil { + t.Fatal(err) + } + defer f.Close() + + _, err = f.WriteString(configData) + if err != nil { + t.Fatal(err) + } + + tokenizerData := ` +{ +} +` + + f, err = os.Create(filepath.Join(tempDir, "tokenizer.json")) + if err != nil { + t.Fatal(err) + } + defer f.Close() + + _, err = f.WriteString(tokenizerData) + if err != nil { + t.Fatal(err) + } +} + +func TestConvertAdapter(t *testing.T) { + type AdapterCase struct { + Name string + BaseKV map[string]any + Expected map[string]string + } + + cases := []AdapterCase{ + { + Name: "discollama", + BaseKV: map[string]any{ + "general.architecture": "llama", + "llama.attention.head_count": uint32(32), + "llama.attention.head_count_kv": uint32(8), + }, + Expected: map[string]string{ + "general.architecture": "llama", + "general.file_type": "1", + "general.parameter_count": "106496", + "general.type": "adapter", + "general.version": "v0.2", + "adapter.lora.alpha": "16", + "adapter.type": "lora", + "llama.attention.head_count": "32", + "llama.attention.head_count_kv": "8", + "blk.31.attn_q.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50", + "blk.31.attn_q.weight.lora_b": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50", + "blk.31.attn_v.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50", + "blk.31.attn_v.weight.lora_b": "071dcafe89df065d6e1c935ecb8fdf6479b3c202eb912e7da938597673ff5857", + }, + }, + } + + for _, c := range cases { + t.Run(c.Name, func(t *testing.T) { + t.Parallel() + + f, err := os.CreateTemp(t.TempDir(), "f16") + if err != nil { + t.Fatal(err) + } + defer f.Close() + + tempDir := t.TempDir() + generateLoraTestData(t, tempDir) + + if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV); err != nil { + t.Fatal(err) + } + + r, err := os.Open(f.Name()) + if err != nil { + t.Fatal(err) + } + defer r.Close() + + m, _, err := llm.DecodeGGML(r, math.MaxInt) + if err != nil { + t.Fatal(err) + } + + if _, err := r.Seek(0, io.SeekStart); err != nil { + t.Fatal(err) + } + + actual := generateResultsJSON(t, r, m.KV(), m.Tensors()) + + keys := maps.Keys(c.Expected) + slices.Sort(keys) + for _, k := range keys { + if v, ok := actual[k]; !ok { + t.Errorf("missing %s", k) + } else if v != c.Expected[k] { + t.Errorf("unexpected %s: want %s, got %s", k, c.Expected[k], v) + } + } + }) + } +} + +func generateLoraTestData(t *testing.T, tempDir string) { + offset := 4096 * 8 * 4 + + td := map[string]*tensorData{"__metadata__": nil} + td["model.layers.31.self_attn.q_proj.lora_a"] = &tensorData{ + Offsets: []int{0, offset}, + Type: "F32", + Shape: []int{4096, 8}, + } + td["model.layers.31.self_attn.q_proj.lora_b"] = &tensorData{ + Offsets: []int{offset, offset * 2}, + Type: "F32", + Shape: []int{8, 4096}, + } + td["model.layers.31.self_attn.v_proj.lora_a"] = &tensorData{ + Offsets: []int{offset * 2, offset * 3}, + Type: "F32", + Shape: []int{4096, 8}, + } + td["model.layers.31.self_attn.v_proj.lora_b"] = &tensorData{ + Offsets: []int{offset * 3, offset*3 + 8*1024*4}, + Type: "F32", + Shape: []int{8, 1024}, + } + + data, err := json.Marshal(td) + if err != nil { + t.Fatal(err) + } + + var buf bytes.Buffer + + l := int64(len(data)) + err = binary.Write(&buf, binary.LittleEndian, l) + if err != nil { + t.Fatal(err) + } + + _, err = buf.Write(data) + if err != nil { + t.Fatal(err) + } + + // write some data for the tensors + + ones := make([]float32, 4096*8) + for i := range ones { + ones[i] = float32(1) + } + + for range 3 { + err = binary.Write(&buf, binary.LittleEndian, ones) + if err != nil { + t.Fatal(err) + } + } + + ones = make([]float32, 1024*8) + for i := range ones { + ones[i] = float32(1) + } + + err = binary.Write(&buf, binary.LittleEndian, ones) + if err != nil { + t.Fatal(err) + } + + fdata, err := os.Create(filepath.Join(tempDir, "adapters.safetensors")) + if err != nil { + t.Fatal(err) + } + defer fdata.Close() + + _, err = fdata.Write(buf.Bytes()) + if err != nil { + t.Fatal(err) + } + + configData := ` +{ + "adapter_path": "adapters-test", + "batch_size": 8, + "config": "config-tiny.json", + "data": "../discollama-completion", + "grad_checkpoint": null, + "iters": 1000, + "learning_rate": 1e-05, + "lora_layers": 1, + "lora_parameters": { + "rank": 8, + "alpha": 16, + "dropout": 0.0, + "scale": 2.0 + }, + "lr_schedule": null, + "max_seq_length": 2048, + "model": "/Users/pdevine/git/Meta-Llama-3-8B-Instruct", + "resume_adapter_file": null, + "save_every": 100, + "seed": 0, + "steps_per_eval": 200, + "steps_per_report": 10, + "test": false, + "test_batches": 500, + "train": true, + "use_dora": false, + "val_batches": 25 +} +` + f, err := os.Create(filepath.Join(tempDir, "adapter_config.json")) + if err != nil { + t.Fatal(err) + } + defer f.Close() + + _, err = f.WriteString(configData) + if err != nil { + t.Fatal(err) + } +} diff --git a/convert/fs.go b/convert/fs.go new file mode 100644 index 000000000..31132dbe7 --- /dev/null +++ b/convert/fs.go @@ -0,0 +1,58 @@ +package convert + +import ( + "archive/zip" + "errors" + "io" + "io/fs" + "os" + "path/filepath" +) + +type ZipReader struct { + r *zip.Reader + p string + + // limit is the maximum size of a file that can be read directly + // from the zip archive. Files larger than this size will be extracted + limit int64 +} + +func NewZipReader(r *zip.Reader, p string, limit int64) fs.FS { + return &ZipReader{r, p, limit} +} + +func (z *ZipReader) Open(name string) (fs.File, error) { + r, err := z.r.Open(name) + if err != nil { + return nil, err + } + defer r.Close() + + if fi, err := r.Stat(); err != nil { + return nil, err + } else if fi.Size() < z.limit { + return r, nil + } + + if !filepath.IsLocal(name) { + return nil, zip.ErrInsecurePath + } + + n := filepath.Join(z.p, name) + if _, err := os.Stat(n); errors.Is(err, os.ErrNotExist) { + w, err := os.Create(n) + if err != nil { + return nil, err + } + defer w.Close() + + if _, err := io.Copy(w, r); err != nil { + return nil, err + } + } else if err != nil { + return nil, err + } + + return os.Open(n) +} diff --git a/convert/gemma.go b/convert/gemma.go deleted file mode 100644 index d01ffedf1..000000000 --- a/convert/gemma.go +++ /dev/null @@ -1,102 +0,0 @@ -package convert - -import ( - "fmt" - "io" - "log/slog" - "strings" - - "github.com/pdevine/tensor" - "github.com/pdevine/tensor/native" - - "github.com/ollama/ollama/llm" -) - -type GemmaModel struct { - ModelData -} - -func addOnes(data []float32, vectorSize int) ([]float32, error) { - n := tensor.New(tensor.WithShape(vectorSize), tensor.WithBacking(data)) - ones := tensor.Ones(tensor.Float32, vectorSize) - - n, err := n.Add(ones) - if err != nil { - return nil, err - } - - ts, err := native.SelectF32(n, 0) - if err != nil { - return nil, err - } - - var f32s []float32 - for _, t := range ts { - f32s = append(f32s, t...) - } - - return f32s, nil -} - -func (m *GemmaModel) GetTensors() error { - t, err := m.Format.GetTensors(m.Path, m.Params) - if err != nil { - return err - } - - slog.Debug(fmt.Sprintf("Total tensors: %d", len(t))) - for _, l := range t { - if strings.HasSuffix(l.Name, "norm.weight") { - wt := l.WriterTo.(safetensorWriterTo) - wt.repacker = m.Repack - l.WriterTo = wt - } - m.Tensors = append(m.Tensors, l) - } - - return nil -} - -func (m *GemmaModel) LoadVocab() error { - v, err := LoadSentencePieceTokens(m.Path, m.Params) - if err != nil { - return err - } - m.Vocab = v - return nil -} - -func (m *GemmaModel) Repack(_ string, data []float32, shape []uint64) ([]float32, error) { - return addOnes(data, int(shape[0])) -} - -func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error { - kv := llm.KV{ - "general.architecture": "gemma", - "general.name": m.Name, - "gemma.context_length": uint32(m.Params.ContextSize), - "gemma.embedding_length": uint32(m.Params.HiddenSize), - "gemma.block_count": uint32(m.Params.HiddenLayers), - "gemma.feed_forward_length": uint32(m.Params.IntermediateSize), - "gemma.attention.head_count": uint32(m.Params.AttentionHeads), - "gemma.attention.head_count_kv": uint32(m.Params.KeyValHeads), - "gemma.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - "gemma.attention.key_length": uint32(m.Params.HeadDimension), - "gemma.attention.value_length": uint32(m.Params.HeadDimension), - "general.file_type": uint32(1), - "tokenizer.ggml.model": "llama", - - "tokenizer.ggml.tokens": m.Vocab.Tokens, - "tokenizer.ggml.scores": m.Vocab.Scores, - "tokenizer.ggml.token_type": m.Vocab.Types, - - "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), - "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), - "tokenizer.ggml.padding_token_id": uint32(m.Params.PaddingTokenID), - "tokenizer.ggml.unknown_token_id": uint32(3), - "tokenizer.ggml.add_bos_token": true, - "tokenizer.ggml.add_eos_token": false, - } - - return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) -} diff --git a/convert/llama.go b/convert/llama.go deleted file mode 100644 index b4211b02d..000000000 --- a/convert/llama.go +++ /dev/null @@ -1,159 +0,0 @@ -package convert - -import ( - "cmp" - "errors" - "fmt" - "io" - "os" - "path/filepath" - "regexp" - "strings" - - "github.com/pdevine/tensor" - "github.com/pdevine/tensor/native" - - "github.com/ollama/ollama/llm" -) - -type LlamaModel struct { - ModelData -} - -func (m *LlamaModel) GetTensors() error { - t, err := m.Format.GetTensors(m.Path, m.Params) - if err != nil { - return err - } - - pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` - re, err := regexp.Compile(pattern) - if err != nil { - return err - } - - for _, l := range t { - matches := re.FindAllStringSubmatch(l.Name, -1) - if len(matches) > 0 { - switch m.Format.(type) { - case *TorchFormat: - wt := l.WriterTo.(torchWriterTo) - wt.repacker = m.Repack - l.WriterTo = wt - case *SafetensorFormat: - wt := l.WriterTo.(safetensorWriterTo) - wt.repacker = m.Repack - l.WriterTo = wt - } - } - m.Tensors = append(m.Tensors, l) - } - - return nil -} - -func (m *LlamaModel) LoadVocab() (err error) { - pre, ts, merges, err := parseTokens(filepath.Join(m.Path, "tokenizer.json")) - if errors.Is(err, os.ErrNotExist) { - return nil - } else if err != nil { - return err - } - - m.Vocab = &Vocab{} - for _, t := range ts { - m.Vocab.Tokens = append(m.Vocab.Tokens, t.Content) - m.Vocab.Types = append(m.Vocab.Types, t.Type()) - } - - m.Vocab.Merges = merges - m.Params.PreTokenizer = pre - return nil -} - -func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error { - kv := llm.KV{ - "general.architecture": "llama", - "general.name": m.Name, - "llama.vocab_size": uint32(len(m.Vocab.Tokens)), - "llama.context_length": uint32(m.Params.ContextSize), - "llama.embedding_length": uint32(m.Params.HiddenSize), - "llama.block_count": uint32(m.Params.HiddenLayers), - "llama.feed_forward_length": uint32(m.Params.IntermediateSize), - "llama.rope.freq_base": float32(m.Params.RopeFrequencyBase), - "llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads), - "llama.attention.head_count": uint32(m.Params.AttentionHeads), - "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), - "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - "general.file_type": uint32(1), - "tokenizer.ggml.model": "gpt2", - - "tokenizer.ggml.pre": m.Params.PreTokenizer, - "tokenizer.ggml.tokens": m.Vocab.Tokens, - "tokenizer.ggml.token_type": m.Vocab.Types, - - "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), - "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), - "tokenizer.ggml.unknown_token_id": uint32(0), - } - - if len(m.Vocab.Merges) > 0 { - kv["tokenizer.ggml.merges"] = m.Vocab.Merges - } else { - kv["tokenizer.ggml.scores"] = m.Vocab.Scores - } - - return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) -} - -func (m *LlamaModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) { - return llamaRepack(name, m.Params, data, shape) -} - -func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([]float32, error) { - var dims []int - for _, dim := range shape { - if dim != 0 { - dims = append(dims, int(dim)) - } - } - - var heads int - switch { - case strings.HasSuffix(name, "attn_q.weight"): - heads = params.AttentionHeads - case strings.HasSuffix(name, "attn_k.weight"): - heads = cmp.Or(params.KeyValHeads, params.AttentionHeads) - default: - return nil, fmt.Errorf("unknown tensor name: %s", name) - } - - n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data)) - if err := n.Reshape(append([]int{heads, 2, dims[0] / heads / 2}, dims[1:]...)...); err != nil { - return nil, err - } - - if err := n.T(0, 2, 1, 3); err != nil { - return nil, err - } - - if err := n.Reshape(dims...); err != nil { - return nil, err - } - - if err := n.Transpose(); err != nil { - return nil, err - } - - ts, err := native.SelectF32(n, 1) - if err != nil { - return nil, err - } - - var f32s []float32 - for _, t := range ts { - f32s = append(f32s, t...) - } - - return f32s, nil -} diff --git a/convert/mistral.go b/convert/mistral.go deleted file mode 100644 index da6874cfd..000000000 --- a/convert/mistral.go +++ /dev/null @@ -1,79 +0,0 @@ -package convert - -import ( - "io" - "regexp" - - "github.com/ollama/ollama/llm" -) - -type MistralModel struct { - ModelData -} - -func (m *MistralModel) GetTensors() error { - t, err := m.Format.GetTensors(m.Path, m.Params) - if err != nil { - return err - } - - pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` - re, err := regexp.Compile(pattern) - if err != nil { - return err - } - - for _, l := range t { - matches := re.FindAllStringSubmatch(l.Name, -1) - if len(matches) > 0 { - wt := l.WriterTo.(safetensorWriterTo) - wt.repacker = m.Repack - l.WriterTo = wt - } - m.Tensors = append(m.Tensors, l) - } - - return nil -} - -func (m *MistralModel) LoadVocab() error { - v, err := LoadSentencePieceTokens(m.Path, m.Params) - if err != nil { - return err - } - m.Vocab = v - return nil -} - -func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error { - kv := llm.KV{ - "general.architecture": "llama", - "general.name": m.Name, - "llama.context_length": uint32(m.Params.ContextSize), - "llama.embedding_length": uint32(m.Params.HiddenSize), - "llama.block_count": uint32(m.Params.HiddenLayers), - "llama.feed_forward_length": uint32(m.Params.IntermediateSize), - "llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads), - "llama.attention.head_count": uint32(m.Params.AttentionHeads), - "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), - "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - "general.file_type": uint32(1), - "tokenizer.ggml.model": "llama", - - "tokenizer.ggml.tokens": m.Vocab.Tokens, - "tokenizer.ggml.scores": m.Vocab.Scores, - "tokenizer.ggml.token_type": m.Vocab.Types, - - "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), - "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), - "tokenizer.ggml.add_bos_token": true, - "tokenizer.ggml.add_eos_token": false, - "tokenizer.ggml.unknown_token_id": uint32(0), - } - - return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) -} - -func (m *MistralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) { - return llamaRepack(name, m.Params, data, shape) -} diff --git a/convert/mixtral.go b/convert/mixtral.go deleted file mode 100644 index baea68cd3..000000000 --- a/convert/mixtral.go +++ /dev/null @@ -1,87 +0,0 @@ -package convert - -import ( - "io" - "regexp" - - "github.com/ollama/ollama/llm" -) - -type MixtralModel struct { - ModelData -} - -func (m *MixtralModel) GetTensors() error { - t, err := m.Format.GetTensors(m.Path, m.Params) - if err != nil { - return err - } - - pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` - re, err := regexp.Compile(pattern) - if err != nil { - return err - } - - for _, l := range t { - matches := re.FindAllStringSubmatch(l.Name, -1) - if len(matches) > 0 { - wt := l.WriterTo.(safetensorWriterTo) - wt.repacker = m.Repack - l.WriterTo = wt - } - m.Tensors = append(m.Tensors, l) - } - - return nil -} - -func (m *MixtralModel) LoadVocab() error { - v, err := LoadSentencePieceTokens(m.Path, m.Params) - if err != nil { - return err - } - m.Vocab = v - return nil -} - -func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error { - kv := llm.KV{ - "general.architecture": "llama", - "general.name": m.Name, - "llama.block_count": uint32(m.Params.HiddenLayers), - "llama.context_length": uint32(m.Params.ContextSize), - "llama.embedding_length": uint32(m.Params.HiddenSize), - "llama.feed_forward_length": uint32(m.Params.IntermediateSize), - "llama.attention.head_count": uint32(m.Params.AttentionHeads), - "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), - - "llama.rope.freq_base": float32(m.Params.RopeFrequencyBase), - "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - - "llama.expert_count": uint32(m.Params.Experts), - "llama.expert_used_count": uint32(m.Params.ExpertsUsed), - - "llama.vocab_size": uint32(len(m.Vocab.Tokens)), - "llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads), - - "general.file_type": uint32(1), - "tokenizer.ggml.model": "llama", - - "tokenizer.ggml.tokens": m.Vocab.Tokens, - "tokenizer.ggml.scores": m.Vocab.Scores, - "tokenizer.ggml.token_type": m.Vocab.Types, - - "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), - "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), - "tokenizer.ggml.unknown_token_id": uint32(0), - "tokenizer.ggml.add_bos_token": true, - "tokenizer.ggml.add_eos_token": false, - } - - return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) -} - -func (m *MixtralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) { - return llamaRepack(name, m.Params, data, shape) -} diff --git a/convert/reader.go b/convert/reader.go new file mode 100644 index 000000000..c1218e66d --- /dev/null +++ b/convert/reader.go @@ -0,0 +1,86 @@ +package convert + +import ( + "errors" + "io" + "io/fs" + "strings" +) + +type Tensor interface { + Name() string + Shape() []uint64 + Kind() uint32 + SetRepacker(repacker) + WriteTo(io.Writer) (int64, error) +} + +type tensorBase struct { + name string + shape []uint64 + repacker +} + +func (t tensorBase) Name() string { + return t.name +} + +func (t tensorBase) Shape() []uint64 { + return t.shape +} + +const ( + tensorKindF32 uint32 = iota + tensorKindF16 +) + +func (t tensorBase) Kind() uint32 { + if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") || + t.name == "token_types.weight" { + // these tensors are always F32 + return 0 + } + + switch len(t.shape) { + case 0: + panic("invalid tensor shape") + case 1: + return tensorKindF32 + default: + return tensorKindF16 + } +} + +func (t *tensorBase) SetRepacker(fn repacker) { + t.repacker = fn +} + +type repacker func(string, []float32, []uint64) ([]float32, error) + +func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) { + patterns := []struct { + Pattern string + Func func(fs.FS, *strings.Replacer, ...string) ([]Tensor, error) + }{ + {"model-*-of-*.safetensors", parseSafetensors}, + {"model.safetensors", parseSafetensors}, + {"adapters.safetensors", parseSafetensors}, + {"adapter_model.safetensors", parseSafetensors}, + {"pytorch_model-*-of-*.bin", parseTorch}, + {"pytorch_model.bin", parseTorch}, + {"consolidated.*.pth", parseTorch}, + } + + for _, pattern := range patterns { + matches, err := fs.Glob(fsys, pattern.Pattern) + if err != nil { + return nil, err + } + + if len(matches) > 0 { + return pattern.Func(fsys, replacer, matches...) + } + } + + return nil, errors.New("unknown tensor format") +} diff --git a/convert/reader_safetensors.go b/convert/reader_safetensors.go new file mode 100644 index 000000000..b21d219c2 --- /dev/null +++ b/convert/reader_safetensors.go @@ -0,0 +1,163 @@ +package convert + +import ( + "bytes" + "encoding/binary" + "encoding/json" + "errors" + "fmt" + "io" + "io/fs" + "slices" + "strings" + + "github.com/d4l3k/go-bfloat16" + "github.com/x448/float16" + "golang.org/x/exp/maps" +) + +type safetensorMetadata struct { + Type string `json:"dtype"` + Shape []uint64 `json:"shape"` + Offsets []int64 `json:"data_offsets"` +} + +func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) { + var ts []Tensor + for _, p := range ps { + f, err := fsys.Open(p) + if err != nil { + return nil, err + } + defer f.Close() + + var n int64 + if err := binary.Read(f, binary.LittleEndian, &n); err != nil { + return nil, err + } + + b := bytes.NewBuffer(make([]byte, 0, n)) + if _, err = io.CopyN(b, f, n); err != nil { + return nil, err + } + + var headers map[string]safetensorMetadata + if err := json.NewDecoder(b).Decode(&headers); err != nil { + return nil, err + } + + keys := maps.Keys(headers) + slices.Sort(keys) + + names := make(map[string]struct{}, len(keys)) + + for _, key := range keys { + if value := headers[key]; value.Type != "" { + // bitsandbytes quantized models are unsupported + if len(value.Shape) == 0 { + return nil, errors.New("unsupported safetensors model") + } + ggufName := replacer.Replace(key) + if _, ok := names[ggufName]; ok { + return nil, fmt.Errorf("duplicate tensor name '%s' was found for this model", ggufName) + } + names[ggufName] = struct{}{} + ts = append(ts, safetensor{ + fs: fsys, + path: p, + dtype: value.Type, + offset: safetensorsPad(n, value.Offsets[0]), + size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]), + tensorBase: &tensorBase{ + name: ggufName, + shape: value.Shape, + }, + }) + } + } + } + + return ts, nil +} + +// safetensorsPad returns the padded size of the safetensors file given a length n and offset s +func safetensorsPad(n, offset int64) int64 { + return 8 + n + offset +} + +type safetensor struct { + fs fs.FS + path string + dtype string + offset int64 + size int64 + *tensorBase +} + +func (st safetensor) WriteTo(w io.Writer) (int64, error) { + f, err := st.fs.Open(st.path) + if err != nil { + return 0, err + } + defer f.Close() + + if seeker, ok := f.(io.Seeker); ok { + if _, err := seeker.Seek(st.offset, io.SeekStart); err != nil { + return 0, err + } + } else { + if _, err := io.CopyN(io.Discard, f, st.offset); err != nil { + return 0, err + } + } + + var f32s []float32 + switch st.dtype { + case "F32": + f32s = make([]float32, st.size/4) + if err = binary.Read(f, binary.LittleEndian, f32s); err != nil { + return 0, err + } + case "F16": + u16s := make([]uint16, st.size/2) + if err = binary.Read(f, binary.LittleEndian, u16s); err != nil { + return 0, err + } + + f32s = make([]float32, len(u16s)) + for i := range u16s { + f32s[i] = float16.Frombits(u16s[i]).Float32() + } + + case "BF16": + u8s := make([]uint8, st.size) + if err = binary.Read(f, binary.LittleEndian, u8s); err != nil { + return 0, err + } + + f32s = bfloat16.DecodeFloat32(u8s) + default: + return 0, fmt.Errorf("unknown data type: %s", st.dtype) + } + + if st.repacker != nil { + f32s, err = st.repacker(st.Name(), f32s, st.Shape()) + if err != nil { + return 0, err + } + } + + switch st.Kind() { + case tensorKindF32: + return 0, binary.Write(w, binary.LittleEndian, f32s) + case tensorKindF16: + f16s := make([]uint16, len(f32s)) + for i := range f32s { + f16s[i] = float16.Fromfloat32(f32s[i]).Bits() + } + + return 0, binary.Write(w, binary.LittleEndian, f16s) + default: + return 0, fmt.Errorf("unknown storage type: %d", st.Kind()) + } +} diff --git a/convert/reader_torch.go b/convert/reader_torch.go new file mode 100644 index 000000000..1b3e1c9f1 --- /dev/null +++ b/convert/reader_torch.go @@ -0,0 +1,48 @@ +package convert + +import ( + "io" + "io/fs" + "strings" + + "github.com/nlpodyssey/gopickle/pytorch" + "github.com/nlpodyssey/gopickle/types" +) + +func parseTorch(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) { + var ts []Tensor + for _, p := range ps { + pt, err := pytorch.Load(p) + if err != nil { + return nil, err + } + + for _, k := range pt.(*types.Dict).Keys() { + t := pt.(*types.Dict).MustGet(k) + + var shape []uint64 + for dim := range t.(*pytorch.Tensor).Size { + shape = append(shape, uint64(dim)) + } + + ts = append(ts, torch{ + storage: t.(*pytorch.Tensor).Source, + tensorBase: &tensorBase{ + name: replacer.Replace(k.(string)), + shape: shape, + }, + }) + } + } + + return ts, nil +} + +type torch struct { + storage pytorch.StorageInterface + *tensorBase +} + +func (pt torch) WriteTo(w io.Writer) (int64, error) { + return 0, nil +} diff --git a/convert/safetensors.go b/convert/safetensors.go deleted file mode 100644 index f45687f10..000000000 --- a/convert/safetensors.go +++ /dev/null @@ -1,309 +0,0 @@ -package convert - -import ( - "bytes" - "encoding/binary" - "encoding/json" - "fmt" - "io" - "os" - "path/filepath" - "regexp" - "slices" - "strings" - - "github.com/d4l3k/go-bfloat16" - "github.com/x448/float16" - - "github.com/ollama/ollama/llm" -) - -type safetensorWriterTo struct { - t *llm.Tensor - - params *Params - bo ByteOrder - - filename string - dtype string - - offset, size int64 - repacker func(string, []float32, []uint64) ([]float32, error) -} - -type safetensorMetadata struct { - Type string `json:"dtype"` - Shape []uint64 `json:"shape"` - Offsets []int64 `json:"data_offsets"` -} - -type SafetensorFormat struct{} - -func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) { - var tensors []llm.Tensor - matches, err := filepath.Glob(filepath.Join(dirpath, "*.safetensors")) - if err != nil { - return nil, err - } - - var offset uint64 - for _, f := range matches { - var t []llm.Tensor - var err error - t, offset, err = m.readTensors(f, offset, params) - if err != nil { - return nil, err - } - - tensors = append(tensors, t...) - } - return tensors, nil -} - -func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) ([]llm.Tensor, uint64, error) { - f, err := os.Open(fn) - if err != nil { - return nil, 0, err - } - defer f.Close() - - var n int64 - if err := binary.Read(f, binary.LittleEndian, &n); err != nil { - return nil, 0, err - } - - b := bytes.NewBuffer(make([]byte, 0, n)) - if _, err = io.CopyN(b, f, n); err != nil { - return nil, 0, err - } - - var headers map[string]safetensorMetadata - if err := json.NewDecoder(b).Decode(&headers); err != nil { - return nil, 0, err - } - - var keys []string - for key := range headers { - if !strings.HasSuffix(key, "self_attn.rotary_embd.inv_freq") { - keys = append(keys, key) - } - } - - slices.Sort(keys) - - var tensors []llm.Tensor - for _, key := range keys { - value := headers[key] - - var kind uint32 - switch len(value.Shape) { - case 0: - // valuedata - continue - case 2: - kind = 1 - } - - name, err := m.GetLayerName(key) - if err != nil { - return nil, 0, err - } - - shape := make([]uint64, len(value.Shape)) - copy(shape, value.Shape) - - pad := func(s int64) int64 { - return 8 + n + s - } - - t := llm.Tensor{ - Name: name, - Kind: kind, - Offset: offset, - Shape: shape, - } - - t.WriterTo = safetensorWriterTo{ - t: &t, - params: params, - bo: params.ByteOrder, - filename: fn, - dtype: value.Type, - offset: pad(value.Offsets[0]), - size: pad(value.Offsets[1]) - pad(value.Offsets[0]), - } - - offset += t.Size() - tensors = append(tensors, t) - } - - return tensors, offset, nil -} - -func (m *SafetensorFormat) GetParams(dirpath string) (*Params, error) { - f, err := os.Open(filepath.Join(dirpath, "config.json")) - if err != nil { - return nil, err - } - defer f.Close() - - var params Params - - if err := json.NewDecoder(f).Decode(¶ms); err != nil { - return nil, err - } - - params.ByteOrder = binary.LittleEndian - return ¶ms, nil -} - -func (m *SafetensorFormat) GetLayerName(n string) (string, error) { - directMap := map[string]string{ - "model.embed_tokens.weight": "token_embd.weight", - "lm_head.weight": "output.weight", - "model.norm.weight": "output_norm.weight", - } - - tMap := map[string]string{ - "model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight", - "model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight", - "model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight", - "model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight", - "model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight", - "model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight", - "model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight", - "model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight", - "model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight", - "model.layers.(\\d+).block_sparse_moe.gate.weight": "blk.$1.ffn_gate_inp.weight", - "model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w1.weight": "blk.$1.ffn_gate.$2.weight", - "model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w2.weight": "blk.$1.ffn_down.$2.weight", - "model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w3.weight": "blk.$1.ffn_up.$2.weight", - } - - v, ok := directMap[n] - if ok { - return v, nil - } - - // quick hack to rename the layers to gguf format - for k, v := range tMap { - re := regexp.MustCompile(k) - newName := re.ReplaceAllString(n, v) - if newName != n { - return newName, nil - } - } - - return "", fmt.Errorf("couldn't find a layer name for '%s'", n) -} - -func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) { - f, err := os.Open(r.filename) - if err != nil { - return 0, err - } - defer f.Close() - - if _, err = f.Seek(r.offset, io.SeekStart); err != nil { - return 0, err - } - - var f32s []float32 - switch r.dtype { - case "F32": - f32s = make([]float32, r.size/4) - if err = binary.Read(f, r.bo, f32s); err != nil { - return 0, err - } - case "F16": - u16s := make([]uint16, r.size/2) - if err = binary.Read(f, r.bo, u16s); err != nil { - return 0, err - } - - for _, b := range u16s { - f32s = append(f32s, float16.Frombits(b).Float32()) - } - - case "BF16": - u8s := make([]uint8, r.size) - if err = binary.Read(f, r.bo, u8s); err != nil { - return 0, err - } - - f32s = bfloat16.DecodeFloat32(u8s) - default: - return 0, fmt.Errorf("unknown data type: %s", r.dtype) - } - - if r.repacker != nil { - f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape) - if err != nil { - return 0, err - } - } - - switch r.t.Kind { - case 0: - return 0, binary.Write(w, r.bo, f32s) - case 1: - f16s := make([]uint16, len(f32s)) - for i := range f32s { - f16s[i] = float16.Fromfloat32(f32s[i]).Bits() - } - - return 0, binary.Write(w, r.bo, f16s) - default: - return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind) - } -} - -func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) { - switch len(params.Architectures) { - case 0: - return nil, fmt.Errorf("No architecture specified to convert") - case 1: - switch params.Architectures[0] { - case "LlamaForCausalLM": - return &LlamaModel{ - ModelData{ - Name: name, - Path: dirPath, - Params: params, - Format: m, - }, - }, nil - case "MistralForCausalLM": - return &MistralModel{ - ModelData{ - Name: name, - Path: dirPath, - Params: params, - Format: m, - }, - }, nil - case "MixtralForCausalLM": - return &MixtralModel{ - ModelData{ - Name: name, - Path: dirPath, - Params: params, - Format: m, - }, - }, nil - case "GemmaForCausalLM": - return &GemmaModel{ - ModelData{ - Name: name, - Path: dirPath, - Params: params, - Format: m, - }, - }, nil - default: - return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0]) - } - } - - return nil, fmt.Errorf("Unknown error") -} diff --git a/convert/testdata/Meta-Llama-3-8B-Instruct.json b/convert/testdata/Meta-Llama-3-8B-Instruct.json new file mode 100644 index 000000000..808826bb6 --- /dev/null +++ b/convert/testdata/Meta-Llama-3-8B-Instruct.json @@ -0,0 +1,313 @@ +{ + "general.architecture": "llama", + "general.file_type": "1", + "general.quantization_version": "2", + "llama.block_count": "32", + "llama.context_length": "8192", + "llama.embedding_length": "4096", + "llama.feed_forward_length": "14336", + "llama.rope.dimension_count": "128", + "llama.rope.freq_base": "500000", + "llama.vocab_size": "128256", + "llama.attention.head_count": "32", + "llama.attention.head_count_kv": "8", + "llama.attention.layer_norm_rms_epsilon": "1e-05", + "tokenizer.ggml.model": "gpt2", + "tokenizer.ggml.pre": "llama-bpe", + "tokenizer.ggml.bos_token_id": "128000", + "tokenizer.ggml.eos_token_id": "128009", + "tokenizer.ggml.merges": "d0cbac1fcc9dcf03724b8db5c9bfb593ae1cf68fb9bc72eb1d15274dcbbf618b", + "tokenizer.ggml.token_type": "d70a88809fd7da6f1f028622685cd64268a7a922c5d343c96f25b66327358978", + "tokenizer.ggml.tokens": "765b529dbcbc42dd202ce657341c63807b51f3b07e09898f6aa6196326865d5a", + "token_embd.weight": "b53102a11d9064bbd404833e3464b1b13e08ce73300b442312cccde2f19b2698", + "blk.0.attn_norm.weight": "7318df3cca9e8d153ff0a503026a1265e63d20b2a8c1dd7a2769585082b5d1ee", + "blk.0.ffn_down.weight": "b950806a1fc722c9fad7fd0b20c3c0a7fb50f14395e1e7663a590bfd62e20900", + "blk.0.ffn_gate.weight": "e73e580af6d4f08e060a74a3c25efdf5d3bed99e183d95a5a85ae859014839fd", + "blk.0.ffn_up.weight": "c8158af679ef99746da1befb67eebb19489e0bbe6ce7d97e13e348508244e516", + "blk.0.ffn_norm.weight": "7ec69c3c31e95e49a3359003b0033f6b9e85561a3e3fd83e7476661ecdd756bb", + "blk.0.attn_k.weight": "2732303257bac969b4964e0e32ec08b5a7f5c031bb02bf6ac4467b3ea0ebcf1e", + "blk.0.attn_output.weight": "ecda1d43b4ccc91cd5b366d7e7a275353990ac78561a07c83d9c77031aba12dc", + "blk.0.attn_q.weight": "569b1f5faf92b6f00910cf7effb2d5862f91038ce5c3b0019fc10e5d79fbd5e1", + "blk.0.attn_v.weight": "aa8416c5ef7e32fb54a1f20d6ac651656845d4af240564b397c39bd83e06e3b8", + "blk.1.attn_norm.weight": "03327e02862908c2a44b2f52decdb924bf4201f400b46f8037a9cb2e1d7a61ff", + "blk.1.ffn_down.weight": "5a83a87603f38c99f8e1e370a2d5f967bb45ac51d881a609304a7811027321e0", + "blk.1.ffn_gate.weight": "31da0572c79e655186c721c231376f85e56cdcc6257c28d08c8c5b40d5c22b40", + "blk.1.ffn_up.weight": "e0c811d64ca155c8de10a868e72015d43888834804614ee1aa2953129ffbc90f", + "blk.1.ffn_norm.weight": "5861f313d6137d6f0f904d423df47fffc6069e224ff746e1b637ac9c7f0af862", + "blk.1.attn_k.weight": "5fbbec0acca6457b9416ebdcd90e526885d0224537b7628f6be376a7f275313d", + "blk.1.attn_output.weight": "b237c9763fa3f75166a6f70b70f1566e77d0d89dfa164ed1b3137393e90575c3", + "blk.1.attn_q.weight": "c0a9cf4a98b4882b16f3eb2b49d933793dcc5357abb246fd3fe3134ed2b12e1c", + "blk.1.attn_v.weight": "96867111727200cac1af7865189dd41fd62b47584e5e5f33a91f1d34509cbd40", + "blk.2.attn_norm.weight": "f392f8a88ee3a95b1cc19c40dd4ef66317037b0faaa1800f610779e129ee0539", + "blk.2.ffn_down.weight": "73823eef46632aedcc8c1cb08a736b6aa97ca97842cd1fdfc5567d8dec459662", + "blk.2.ffn_gate.weight": "f4909ae19fc3848b00bb8b9050122e74f8e903b89e22937036f4cc9fea20a718", + "blk.2.ffn_up.weight": "16f4904a3d814ea68f00519724fc4943e48444a84c786bda39aa5efc298a7d84", + "blk.2.ffn_norm.weight": "e3ccdf56e75cb969f6f69c39caf6daf7c4e70e89e25df0f4d2e4bc60e159aafe", + "blk.2.attn_k.weight": "c3beb1e0a11bcf007ef0f0d8f6bdd3082d8b29090cd29597846b5d51e308a8e5", + "blk.2.attn_output.weight": "bb9f66c32cff51154fea92933c2cd62549236f8cb1a767f9ef28d3f99809b343", + "blk.2.attn_q.weight": "8eba394132eef2a05c5a92d62d2376000f7948448d7a2dc74e6b608203add20d", + "blk.2.attn_v.weight": "88f61f77c53567c617db3eef8f30621109a750e679f6784f7911739bd42c2f02", + "blk.3.attn_norm.weight": "7b996675b7ca75fa24107b3ebe0788653ede0f49ac83b8659d71ff54d591f81a", + "blk.3.ffn_down.weight": "2cb332bc05e4821962fdc9dcbcc7cc12630f32117711b687d18fb53c0bc4fbf4", + "blk.3.ffn_gate.weight": "340b387c7f208c8f0a6db904ef8d87c1e84b7d6ad57177abd32d86c8d18b760f", + "blk.3.ffn_up.weight": "07484433f8a7ee061c55aa0de2ecc009f769b0617c9c0ec096e9bb2946df9f0e", + "blk.3.ffn_norm.weight": "4f1a4ade36b393af341240bc894a2aab09cff7e4d56dc4658445deb107f9371b", + "blk.3.attn_k.weight": "483dcd96acb4528df84b9842970994630dbd82b8715ace394aa8b39fcf8d6291", + "blk.3.attn_output.weight": "beaff0810687923585642ee11d929cbf3b43dc6f87f30ddb552c222ab57bdbb3", + "blk.3.attn_q.weight": "0739355002f6fce520863add697e0ff25fc88215322dc3f993be7bb68dcce7e8", + "blk.3.attn_v.weight": "c216d17b6d90ee3e07f82598b8161fae34de2f392dbb0f745b682b578c324767", + "blk.4.attn_norm.weight": "91ab405bc4ba15bf63af233f266aa43aaab43789a9e6596e14a357c2ac7df217", + "blk.4.ffn_down.weight": "620f34ee75cdc73aecb8949af5fbb0d2437fd81422b6d8eb7acfc52addb9fc68", + "blk.4.ffn_gate.weight": "f6feec7bc9acadf35ec22532f8998d8e50f31afedabb19263590dcf8b9a92eee", + "blk.4.ffn_up.weight": "4a72af7cd28fd07b038f6cc4406678d120517280236ea85d9e76eff40ab2cc22", + "blk.4.ffn_norm.weight": "1805b37b44d5d682bdbd2fadeafb763ee001617d7870848cc487079ee34b21f9", + "blk.4.attn_k.weight": "a1e4f9d97cdf4c1b0d177cf00c4e32d1be30c1984a239b3c9bd73f8848888853", + "blk.4.attn_output.weight": "a1547e2497c423b0aff0eee71d9300d6fdf4e4986679418b6e637b69a9a6720b", + "blk.4.attn_q.weight": "0677483a9264ea6803d03d304d87a54632242cb516e8b76b6e3e8284c2f4de04", + "blk.4.attn_v.weight": "02691ba3af344fcc1969428ab0df811ac94aaa2fd91b0dc4ec1ac0a58806980d", + "blk.5.attn_norm.weight": "ba9c028335e5c895b87a5bd1448ca429248f9746ed97bdcb8679923206117156", + "blk.5.ffn_down.weight": "ccfdc9006acad1940a6bc05042a3947f1066acd671e0bb53b7684e9eea9ef5c9", + "blk.5.ffn_gate.weight": "623157679f1e742ccc3807c0b0153ddc8450104de75ec62f1370ec3807c09cf4", + "blk.5.ffn_up.weight": "05748804c65091f963729b58b085f58351891cac8a2861f5eae26b06aa60b2a0", + "blk.5.ffn_norm.weight": "84bae55af2efc8b8429f09056c8c04990c466dae31cb3f9356038b8957f1b406", + "blk.5.attn_k.weight": "8c766180c726b037d587fc52371de6e3307140c52409011609d1225624b6a3eb", + "blk.5.attn_output.weight": "490b582b3b1dc151ae55aee8b6743dad6c01fb49e43afefb6e68394b74be3d73", + "blk.5.attn_q.weight": "6f7b8ca4d9025ec836a44bbcca46be30c66b471a9fb62943ddff8288b3731409", + "blk.5.attn_v.weight": "9f70df3ba00c9e723214b3da83ff435a2163fff5915f75515c9664c05c866c27", + "blk.6.attn_norm.weight": "1a4a66613a682df6f061fc7c4d986f9f7e9175b62f0c42fc1ef31db536bd5942", + "blk.6.ffn_down.weight": "c56f25e4e49b443dbc82d88311ee63bc1f5002cc67e52f4787fd5f003aedeac1", + "blk.6.ffn_gate.weight": "31a5cf1aa9b831a81588d508550f51fc425f9517c43254d4ef7096d38029cf04", + "blk.6.ffn_up.weight": "ce135f3a1163e0c9297a615bdbe68a67ead21edce8debbfa9f6e15e6af8d4c94", + "blk.6.ffn_norm.weight": "4e328ce0648c94e732bc40501858ef6262ad1161e2e407b0cdcf4813fa9d45d8", + "blk.6.attn_k.weight": "1eb1c4c9f9c4c7ff7f5429075e0dc6a7782bed55109fa88df209a817dd8ef960", + "blk.6.attn_output.weight": "3d32986b56873b88655ee1edabdd413fdd9ab18b82108c9ce90bdbc2d3a6f3a3", + "blk.6.attn_q.weight": "8432f583b3a2809c99c393f9beb077cb0534dd5d247c17108f2986cadc6651f6", + "blk.6.attn_v.weight": "5045381513815bb91839dbac8335ffe49bbc7b0008369de7ea97eb676c5e2b36", + "blk.7.attn_norm.weight": "3dabd003638ec2499bfc8a48c49eef34276caab4fe76894eb963207848c2fdaf", + "blk.7.ffn_down.weight": "194fae858608bdcffd235be59ab119d0b91c8549f864ea06dae69249e099935f", + "blk.7.ffn_gate.weight": "00b24c29c30246892bce0791be804a89701d4c1332777e0bcdad5d9d5666604f", + "blk.7.ffn_up.weight": "44d7082a5280080c90cef9e19d410391de34f212ca0736377769b8ddd0c82d5e", + "blk.7.ffn_norm.weight": "21fe8a7fd6911c64e0d15a788b3b4cb6d71dd6ec51de65f760ee89afbb6ae53e", + "blk.7.attn_k.weight": "57a149eec5f6744a9526cd3925ac073f9d12db0fbcb5afe042ef4dc846458c44", + "blk.7.attn_output.weight": "0e9c28a3e81a2880251ce5eed77bcb8be8aaa1a51c9cb6de820b47ed83849fc2", + "blk.7.attn_q.weight": "15ee75263ee4e2a43eb322bc159ae004bb7d77e3a7e63ee4ddab700430693fff", + "blk.7.attn_v.weight": "440aa970bba4bff429fd7b7b1de21f2ad14fb2952b776cfa4acee68d7c6e9b8f", + "blk.8.attn_norm.weight": "af5b44825633c42c1ae964c82bb2be6a242d3a751f0a91f1bae4f593e8f5b6ec", + "blk.8.ffn_down.weight": "b11c14c76adca94fa200496dd2c10743becb23aab6642443ef1ae6d8710edbc1", + "blk.8.ffn_gate.weight": "7bb03d3325bf8637ae2fa1296b0651356515578d46a7c5ca65c7a923d7de27bc", + "blk.8.ffn_up.weight": "b956ef0a0669b5a9c9bf3a8da2d1c24f52d331cfb7354f6d7c51bd65be355e30", + "blk.8.ffn_norm.weight": "c78c3d748302edfef76f71ea5cb2055c94352122eee8b9b1173779a1814d224e", + "blk.8.attn_k.weight": "c0fba6a596ed9c1c32a7055c31a935a8b31e42b77282ee47c1f03ee3bde736b5", + "blk.8.attn_output.weight": "83cf9947080c5d8d571f04a842bc3dcfe7bbb0195fb25b346e22635e8649f2d4", + "blk.8.attn_q.weight": "47409350a576b333d97b7c877d69f47f46df504f3765102dfc0be9e521c7ecd6", + "blk.8.attn_v.weight": "1999dff91404fdcf1ecb34d9eaaaa9244ec7658a74dec8feb7cfd1fddba0347e", + "blk.9.attn_norm.weight": "1e6e29d5c3889ab4e1b0a5b9998cba60179b0f1fca133515df49cbc19d092593", + "blk.9.ffn_down.weight": "acb898a6490adff592e10b4c62d70edc5941661ee6da44658500e9205357c8e9", + "blk.9.ffn_gate.weight": "4cff63013593aadc3ffbaaa6ed70ffdba1224cd43c3644bf6f4162b5ac1ab542", + "blk.9.ffn_up.weight": "f985b5a2d6cf4fe32c7256301c3c89b8ad22b59e516342c52da42d8110766a4e", + "blk.9.ffn_norm.weight": "0d659c538bc6b21ed0018f107ab674a7424a00a42946c80e07208b479b21918f", + "blk.9.attn_k.weight": "f67611d888780d1b38c1c146b361c65310c8183bdf64fd73e2259985c6e8517f", + "blk.9.attn_output.weight": "f12ca1fa62a02ddc3f77f798bfb5707e0c50bf18ee0eaa67025521a98355f26b", + "blk.9.attn_q.weight": "3865185f4361a645b086ad47b72904c095313fb1c624e511647bf1a7dfc1c476", + "blk.9.attn_v.weight": "92125bbfed63544ab56052bd1e4aa453bbf34c795249ee54cde54907c8c6d1d3", + "blk.10.attn_norm.weight": "5d6bfbe545bcc2fcb2fc75c68f64b1f4c918badaf53e0156fe2d88aa977b2f94", + "blk.10.ffn_down.weight": "1dd9da8b0d2696ab5531fbca8a29c7d67567620a9d3e5fc2a19ec5d7e4c6cc8a", + "blk.10.ffn_gate.weight": "6e55e7f014edaebda0ac6819a426221d3b025c27312a2e18cc5806f31e3db226", + "blk.10.ffn_up.weight": "d80dde54af5db51241345ee8d64c1972608644f4deeac1e8195dc423bf27474a", + "blk.10.ffn_norm.weight": "f6ca65951d58ae3379eee8247bec34ebd0db05674cc9295593573841b8a55df3", + "blk.10.attn_k.weight": "b58e350bd6b49aba0fba4e4dd6865de3a2a0651ab865dbf2419b627b53ffc187", + "blk.10.attn_output.weight": "6b26a986e12fe66ec286a21d7d5af5eaa1bfe6f2bf502165d270e4497235a54a", + "blk.10.attn_q.weight": "3440e0e5b7e0d1e426424ae5a33f4e057be623249e9035ea12e57dbe5d3893c4", + "blk.10.attn_v.weight": "ebfadcfe14bcd6dee933053df0a67e12e7a196d5cc45728c1ffb2a2daedd5ca2", + "blk.11.attn_norm.weight": "3ed057b9576cd2de84507ef64c7646dc478c651efca4c2024cbe91a4f3fbf0bc", + "blk.11.ffn_down.weight": "8ff1c2487d22f5c499761e4eb721418f141f960160d0bab779595a34e4d68898", + "blk.11.ffn_gate.weight": "9c74e4507c7e45bf39b7cc7402198cd1dd77e3fff8c625b0413acaeb16efeb9f", + "blk.11.ffn_up.weight": "4367158007161d29939e00a322bb6776016e43f648a94f9b08a96a477aae75be", + "blk.11.ffn_norm.weight": "1cc0288c1491072121f4c9a0af20be0e13af49895696a3320e4fcac608768de3", + "blk.11.attn_k.weight": "066f5b3c144fce1366835e1ebf376f768b333b8ae29f5b478c42d1d0c809c855", + "blk.11.attn_output.weight": "e0d9f3d3f2c54aed59c02713ea4fb562799ddbacbe67ca3998dfc887bc44e47b", + "blk.11.attn_q.weight": "28d3ecc8a88cb3815e89a7f7a7d043da7a71f702b337a126e4d3a2ac1cd6370f", + "blk.11.attn_v.weight": "7c5cdef10ee73bca0a3b9f6ece5f0a0155664e0ce3d8de90ccdccfab5545e5e7", + "blk.12.attn_norm.weight": "973b133301a1af760cd7b3a7955371ea0a750808b442deb6adaf7b98482bd0c6", + "blk.12.ffn_down.weight": "d6c87b4b4ca03f75546ddd6a9e7fca720585a309188723c1ace8122438d4b200", + "blk.12.ffn_gate.weight": "2189a6e0cab1540bd05d6089b922aa8fd694be51255654933c165f302a0c955f", + "blk.12.ffn_up.weight": "5affbec19b58d092b9305721e3552481fe2eff51269ea3ed91cda3b9ef84d4df", + "blk.12.ffn_norm.weight": "f650fd42a34e950f758b4a130e7b8b1a712b1dcbede0291bb8edde47aaed0ef6", + "blk.12.attn_k.weight": "59b1e86f10450a7cc188beefc0856d2dcf44e8d7fdd9cd8859c30ec1ebaf24b6", + "blk.12.attn_output.weight": "446b0d36b2f66bd72a2323f4f4e9d85a0f621e9a58872e89a27248d6b1123238", + "blk.12.attn_q.weight": "3ed6bfd39f040301ed99fad882d3e569769d594259f9948445bef0e44ec881fb", + "blk.12.attn_v.weight": "e73652cd5d0029b1931be3ba9d82508f6696dce5a29d085476a54fb7a2ddbabc", + "blk.13.attn_norm.weight": "491b85278c0bd67bd31b9b8a9720902c244bd067e53a4a03641b7c0994782e82", + "blk.13.ffn_down.weight": "ad71cc248a85e9ced49307a24a9bfae01d387e979a7689c82ff59998e09741f3", + "blk.13.ffn_gate.weight": "0a55984d53971fab97575ee0ef5882013be7fdecfa76e3fbebb5dc85a07a14d4", + "blk.13.ffn_up.weight": "378b697b35e2e53c0de98e8e29b73d42ae3ec112ec16129aa5997a9e2f3b5943", + "blk.13.ffn_norm.weight": "f8aff2f69ab286210fad45a62b03f8d10b38f96a420d7baadf6b95d7b0b0bcd2", + "blk.13.attn_k.weight": "25ceb841afb1034831bea7f4d6a6c578def2ce4d4c412c780ef147dc9a598360", + "blk.13.attn_output.weight": "a242b322889c6bdaa14b67a7bab593db39df8eea3721638ef639abbb74d482e3", + "blk.13.attn_q.weight": "d80be9945a369439e835c55cfb0e97828b8a66bb7ced534d9059c92487bf20a9", + "blk.13.attn_v.weight": "ac33274cf9b67979d9ecdc967a55175afe0c9c4aeeff6391433cd9840c818706", + "blk.14.attn_norm.weight": "12a1e1091de5b2da12c9e7c0b1c8e6f09ce2a749733cf7d5240445b8e21cd093", + "blk.14.ffn_down.weight": "cfd41965c88266e32bc2dcdadda512499c35519e8686fefb9a7f249ab2291eb5", + "blk.14.ffn_gate.weight": "8dcfe774f07a095c7c6cf0a901c9df70d938bad7b5ba347fbc8f694e7603c0d1", + "blk.14.ffn_up.weight": "c7995577fe4a72ea0fb17c4a7b6b87b959072bbfdd5edacc6c367d43465809ae", + "blk.14.ffn_norm.weight": "81c41ebde41739e7016ffec31d2256217b825dc3cae049a935f5f61a60d22003", + "blk.14.attn_k.weight": "fb708bdebe4384f5c4b479c110028554f4d122f166b8091eda7d8d65e6780eb8", + "blk.14.attn_output.weight": "f5295caf2dfdc60553dcabe17537a80577e8b153c902247daac058df23542514", + "blk.14.attn_q.weight": "c12b7a3601c68c63ab5dc9d2599ebf3f3a10abc2c59d3a2126fffd5818f2763b", + "blk.14.attn_v.weight": "1ce968d9149bf0d5e237d52cc6d6433565b4bbf03252a736262bb00a2b34a687", + "blk.15.attn_norm.weight": "266fd2c36d7dcefc6b6bb7f1c9374c41f2bab5d6c84a063b6f91c4f682dad3c4", + "blk.15.ffn_down.weight": "6154886e9ef0a6cc08ab0d264a35f497e6f0987efdac992ed04e87088bea7801", + "blk.15.ffn_gate.weight": "183d9fd3c1b5657840099053d2fd3f72ad953b1de523296159b7761f20491a76", + "blk.15.ffn_up.weight": "51546d4498842ae2340ee226a0888d5f61e7d2ca4d052dfa06a77b0451242d3d", + "blk.15.ffn_norm.weight": "ef7378091a41a25a5f58bf1bf9d3bc64ea562e7f421e1c232b1f177c30fd3500", + "blk.15.attn_k.weight": "8d556ab8d9639324141774999b6eed0e91d7ee645bf3e7a3dcd200b2e7a00751", + "blk.15.attn_output.weight": "54aa6ba87def7cbe18b0c6ab3aff5c351cb3b6ca4a0d7b2cd5f75a1312991429", + "blk.15.attn_q.weight": "10731b0dc031ea8e0ef37bd7f010e0a78518a10a6df05a8bae48e3148b73ef3e", + "blk.15.attn_v.weight": "cbbe50c2ed7224866d3cf9b489c599f3ec41a4ea1aa3181e9f4e87e1fa0cefec", + "blk.16.attn_norm.weight": "387058eb39d4b28c04cf1368247417f1faeae8ae79d894c9f293457e0eaa00b0", + "blk.16.ffn_down.weight": "2cb26ccee585e933401ad5c82ed36ddacb3289efa0b28f8cf91b020ffbd9c333", + "blk.16.ffn_gate.weight": "d745985efb5bab42304e5d509024631efe35f92f2b2ec4931ead6db97ca9727e", + "blk.16.ffn_up.weight": "7a67bd195e0642828ca36eb7818149bb70c2c25f82de07e2b5807c520daf540e", + "blk.16.ffn_norm.weight": "7cefd061c8182482a89272f8a4e88a954b12609a62716923ca1cb3593b1c1651", + "blk.16.attn_k.weight": "d7968a2de67e755b4533e061aaad1cb62f8882af92dcad67f99d6d5112513439", + "blk.16.attn_output.weight": "9e9ab5788272ca3394ea89eadbce8c86ecc3fd75b7899184d6191c134ad9aae0", + "blk.16.attn_q.weight": "ef81c261b536c1a3a093b33f44cf2d42b86e5aa2d821674f07a0c80e992ed925", + "blk.16.attn_v.weight": "aef38e7958301b4a437cbdd2fbae6197f677b09269ec1eaf63188cd5da428d25", + "blk.17.attn_norm.weight": "28f6b289f1bc3131041e9f791b7a2a3a48baee0dfea27bf7051ebbb7ed364d80", + "blk.17.ffn_down.weight": "1a502829aafc6a9bd6bc81f12573bf8632d5c8c659f0dfb13c8b2411f3b1ec05", + "blk.17.ffn_gate.weight": "ddfd8aa0eb98846ebc9afe31366249159f46ae9815199dd70161527ed241ac4d", + "blk.17.ffn_up.weight": "4211a3cc247071bd361b30de2131d02382f552855062bf3b3e004c17992e5d09", + "blk.17.ffn_norm.weight": "647e5fa99a5b0d232af36d15816539f4d27e60a50a341b00aa88bb6e4474f8b9", + "blk.17.attn_k.weight": "d9125ff33a19c502c0f8846433ffc24395048582fc2f463d34a0301a82156f02", + "blk.17.attn_output.weight": "3d64fbb1cfef04444827f37c35fd9ad3413eb2165094d339ef89f00503f09de4", + "blk.17.attn_q.weight": "e5b29424028f578beca385fd82e29f37adedf3037cd51e5889d5a1ffb0428ca7", + "blk.17.attn_v.weight": "1809c5aaf2ac04c5d65539097564ad62796e87d24bb8b9ce5b095561a61d908a", + "blk.18.attn_norm.weight": "99daca58d001c627523d3adfbca1d95f04e590382a326866544d57989d5f4835", + "blk.18.ffn_down.weight": "84f30231ce6ca0f10227541dfc602d6418c1a210386b0c4926ef1656e7d4635c", + "blk.18.ffn_gate.weight": "ca5bbe4468b541740e54f69b9e08fcc8e478c344b70551dab21b1206acfbaadb", + "blk.18.ffn_up.weight": "0b3067b9dded31686dcfdc1e247eae3974a28a61ac59e9862758dbfaad64e8f7", + "blk.18.ffn_norm.weight": "8154a102232dbc0f90ce77ae5c1ff8f26f8b6e4dcf326e9ec1645749669e7960", + "blk.18.attn_k.weight": "25abb26021ccc481471a30e0d4cbeb7e1db29828417ec5136edeb93fecf09ac4", + "blk.18.attn_output.weight": "d87d481d9b046b68efa06ccdd4ed8cbf61e692d61114b75b7fad5ed75f5d87b2", + "blk.18.attn_q.weight": "cc6400379e15766992ff1293be79dc67682c28e9e15155a78109f4b64653b164", + "blk.18.attn_v.weight": "45c75cb1dd496aea3173aafe2575b841dd1d02cbe010b3198099731eb98f531c", + "blk.19.attn_norm.weight": "65389efc75297684773284ef8e5f8789a4504b636c9f33b8a32e0ee42499fa72", + "blk.19.ffn_down.weight": "4eefab7e939f64a17e4a214ca3c77a6fa110d94f677e2d6401086f70fc538b04", + "blk.19.ffn_gate.weight": "f1c0a59cafda66f466ab585b0b8b4861b58abe87a67cea1f6a488492242edfdf", + "blk.19.ffn_up.weight": "c42d045eef588db4a0e56960a57e110e1ff92eb8041107d19899165fd3b90f17", + "blk.19.ffn_norm.weight": "a8f33eda6d5d62ff5f333ad9771783caff556641f4e7df713451385676f441fa", + "blk.19.attn_k.weight": "0bab5d9e9083492bfb05a5a3bb23b79c0e7b99ef6a6644817b4d57d5c453b8a5", + "blk.19.attn_output.weight": "c99c551d70eafad0f7aea98fb6f9251635897168eb3895f76abf0d4ea3b3aa6f", + "blk.19.attn_q.weight": "c98bde95627c3b54c9443813ca50b4e14f518319681db6bbf7b2332ba26e9a60", + "blk.19.attn_v.weight": "ff3a490518cf64904db89ce0dc7d6eb89e870f1440e41883c6b55a221f82de84", + "blk.20.ffn_gate.weight": "761f0e317229cafe9d3754048ab038a0a84e9a287b196ab65f633139f2d29aba", + "blk.20.attn_k.weight": "45d13439b41066d282e8490a726785abf513605f46c79bd0c840f6419d27e790", + "blk.20.attn_output.weight": "a3b958d84b4a097844179b7d55c18fd0e4f319cb15e918c6fde33b68de1bcac6", + "blk.20.attn_q.weight": "127ab8e7d8c3f882874904196a02712bab42e6744fde45871b67350609d19f5e", + "blk.20.attn_v.weight": "5f0ad2d14a8ae42dd3bbeccfb33295687a14055fa92c54bc946249373c1c9f17", + "blk.20.attn_norm.weight": "77300b1755edc8c70089e0f45efa646056b9add7d8568b2324d2f3e62b64971a", + "blk.20.ffn_down.weight": "ab93d0e075b42e9017b701a070d561e698050d90aac4b4b9919256fbe50c3204", + "blk.20.ffn_up.weight": "4fd6628a07acc57a48d1ef83f81b7d7aa0bce569c1160a99d307284f8821322c", + "blk.20.ffn_norm.weight": "2a9e46b9e48e8e55215de56592e1f189530037c1c94a1428e3d6f106c7f26fb2", + "blk.21.attn_norm.weight": "4b3b5912c7bc61eb9da8e47d4651f896e85d9e59c4ecaa65df7acf3c21737298", + "blk.21.ffn_down.weight": "7146f931663d93b8771cd84405cd4802ea6560d0729b0d6d44588203c095bc53", + "blk.21.ffn_gate.weight": "b44ec5d64388fa40b90b3e9976d97a8b6800fa3b97584f32e64b03daffb8601f", + "blk.21.ffn_up.weight": "0cf3643fd23c685e17062cd11e116e17ce57a405e5e78953bab94cd62fe48789", + "blk.21.ffn_norm.weight": "4ef2cdb53da166df70b39f3e6b17af51848cfa5ea3c27ad6a1ae2a1bb1da1ce9", + "blk.21.attn_k.weight": "5d40f32a706f670c19972b14176bf660d5b045e3637b110dbf8d7de4ff32101a", + "blk.21.attn_output.weight": "18afaa916752ce16c9653ec0ec7e2fe60be55faa2aa5025d147be184adb75cac", + "blk.21.attn_q.weight": "2621daa5f858931514a4b2f0fe8d81cf9b96f541e6af99bfa7539e9bde8e34ee", + "blk.21.attn_v.weight": "63226dafc54c899bbce4aa49efceeedd8908e94faa613450fdda91f332b62864", + "blk.22.attn_norm.weight": "cf3058daab4d2c04387e7d169d1553bb8e7358eea66285ec067703f6ce62043a", + "blk.22.ffn_down.weight": "6a58d5fd220abdbac6cee7ba048abab794731af318f04982c2506df59413d0b3", + "blk.22.ffn_gate.weight": "d5614535324b03c7b91727a903b2a72f8d07ad17f7aa8b61ea173cf9b895069e", + "blk.22.ffn_up.weight": "ec20da3949566e93f66cabb67f8cd7eab399047ec6ebf5d43edfaf3669b82296", + "blk.22.ffn_norm.weight": "84c82f38f53a649972a44466fc476bf764e064ce18de870291edc302f3700e28", + "blk.22.attn_k.weight": "a3d2ecc37fde7c201176bb8abadf27f0d8ede9679a6034913e03d9db924fda12", + "blk.22.attn_output.weight": "5a3b8bb433f43a387df43dd371bdf80ddfac986dfeaf38e9bac1d7a0ec6628de", + "blk.22.attn_q.weight": "3a875cec661b4859f30a8fd2c866811184b25b68c9e36fe2663d299caf8b59c6", + "blk.22.attn_v.weight": "8717a83b79035058dcfd3ef6f8e5b36e71d77379e5a239e1899eef8766fb7703", + "blk.23.attn_norm.weight": "2b4a68a0a2f023dd646e4755c9bef17c2f631901154afd839edac7ac006ec99c", + "blk.23.ffn_down.weight": "29499b1586c6fc4883c9b7a9c8cf388035146b5aecf90c5c4c8c8e082c71e7d7", + "blk.23.ffn_gate.weight": "7d6554036d21c587b9b556428054f9c15cbef96d24b257f906fcef4ae38bd9c8", + "blk.23.ffn_up.weight": "19761ecb288d6ebd44b681c4535661583b1e19dc29e96d0c007333cd8f00aacf", + "blk.23.ffn_norm.weight": "37dc35500790a4ca33807b39cf7af65065e535dc25b9e94f3ed2759f61887ac9", + "blk.23.attn_k.weight": "717547d00323817b0cb40a72ec5f8cf42ecd1f9e3e42715c2cc5e38f07fffffe", + "blk.23.attn_output.weight": "a24786feb6a905fdf166d7500133757cbe494779d4ebcba9eb03046b319557df", + "blk.23.attn_q.weight": "6a2c4a98f138b928d22136efa163562691d3b4ed526d52d46a2fa2694a8f3965", + "blk.23.attn_v.weight": "c6e6081eb9c38a7fda023085957b460e9ea321e1fff408b38c2b58595c39979c", + "blk.24.attn_norm.weight": "5e6283f891e538670425f3e244b08dc6f96f33dfa4aefa913f8eb17212421850", + "blk.24.ffn_down.weight": "e09eb170f389deea0a4a1cbfdb52c12490768a2c60491b7bef8a4c445e2a08f5", + "blk.24.ffn_gate.weight": "af29d815cf49a38fc2ebd0bf9b2dd9933d023a29f2d766981acb9a1b53f09117", + "blk.24.ffn_up.weight": "36ccd9333426666de9d3088bd4dcdf5b624b09dca9e3a83a22fc0383f2d950fa", + "blk.24.ffn_norm.weight": "a88e1692318826db6ac42582d182e51a3c698c655d0e21e04fa086318832d07b", + "blk.24.attn_k.weight": "f7d61d6d1225289bcc502e3bbb0168b4584add0253218c1b77ac92ccef9a1c2e", + "blk.24.attn_output.weight": "85a1363b3ccc87312094c2195022687c16b0dad7fafb9e80bb4ec474d53c29ac", + "blk.24.attn_q.weight": "53482a2c008f42f4fad779ca323addc3712040149dfc12f782417756388a72bb", + "blk.24.attn_v.weight": "67498272369af7dd10097c73b07f731b565cfc9a559e711cc0d526389e7b44e2", + "blk.25.attn_norm.weight": "98dd617def5cb7825ee4833132ca2da2121245921585e1d9e36b93344adc321b", + "blk.25.ffn_down.weight": "7fd477d6c50aed5f424a878dd284343379cffbee8a34c0b6e55100c8305fa13f", + "blk.25.ffn_gate.weight": "f892c9806c8ec22e8aa746734ac9213428c534921cf161239e1d249fdb5d1ec0", + "blk.25.ffn_up.weight": "528bed14c9bf9762f790525ee40412545221f4321d2a2323fa8e73c58b7643c5", + "blk.25.ffn_norm.weight": "ca5831966672e7be6a578feeb631ec3570d3b5afe12860819ccb96e896ffc346", + "blk.25.attn_k.weight": "610d3068cc9b20401f0c3a0efea39a279dd9f564fde19baf3403b2ec2319e4c4", + "blk.25.attn_output.weight": "798aaf702e53b657265ac3b5e6caf3a0ab515bdadfeb1a3a156b4f3bfba76666", + "blk.25.attn_q.weight": "8a7fa25248de83029fb97b51d036a01baebe31fcb4be121ab00dd8b7de209b10", + "blk.25.attn_v.weight": "2a53d5e9f8a1218c66958c6388d3b37400a9af7956c785024ca44bfbc3c7d371", + "blk.26.attn_norm.weight": "5f44fc043481eb0771f3e6d2420bcbcf73140afb9a9feb8eddb6575452acebee", + "blk.26.ffn_down.weight": "944a60a409d0d5b6a851e33c69aca152454b691711a8b96f5bcc488772ab2833", + "blk.26.ffn_gate.weight": "2a0ca4abb3de5593e6693d8be69b63d6d1a639855ac8332a75f520353f030c62", + "blk.26.ffn_up.weight": "0b1df496163f9ac07bf89375d3eb441b51a81d41b47d769a04a61efc18dbe35b", + "blk.26.ffn_norm.weight": "56b8dd046e9be6ea71f7efd80dbd14e7fb1aa020d3cd38e063275f3873fd12f8", + "blk.26.attn_k.weight": "b1dabfabb970e6971c7ea6e53c63cf7ef56341e6a2edd9cf177785cad9af2f9a", + "blk.26.attn_output.weight": "39532c7e836baad164a655fb97ec5114ea4da37ffba9fdea2684f6e4450e6f84", + "blk.26.attn_q.weight": "8f48bf6aaa1252bc149e98af2be1777a5c0d2c3274c6d314171ea9344a41b604", + "blk.26.attn_v.weight": "02fb145f7fd905133750e90571effacadddfd3f4966552dc59982ac3900ab8c4", + "blk.27.attn_norm.weight": "654d168fc3cab716d91261f5719f180b7d697218401633b4878a759f1b5283f2", + "blk.27.ffn_down.weight": "2823272bec3a1c12f02cc4cb24aa4031abd7e9dbe0b02676e2305b21671818f0", + "blk.27.ffn_gate.weight": "b1a1d40cd02f97182cac17a79971d1934ee0daf3aa0bf11303568c636e208a64", + "blk.27.ffn_up.weight": "ed62ec72a020d070e64eb7b50237b32213944727b5b2427f45d989f50df5fb2a", + "blk.27.ffn_norm.weight": "c69649ac65d694b306a905dee8b03b89eec1ed188b1eaaf38f8e29d4b12e38a0", + "blk.27.attn_k.weight": "cc57bbf413f1fd227128dc66efc8590c73634cbd6f96d01ec4878b5e7ca6a925", + "blk.27.attn_output.weight": "cac407ad02361d53207b3c7e25ceab84dcb4347b8087055162e2efe14d11d84a", + "blk.27.attn_q.weight": "0af18e07cee12015761c07c94407024f4f4d77d97bdb24163db0e16669e2cef3", + "blk.27.attn_v.weight": "a1d08fbdfa40af773c5adcf93bd68b78a44ed144e3fc6bbeb8af02e937527eb6", + "blk.28.attn_norm.weight": "f39a51f814512b040a1082143150e4a49ff730f85cef49d7f77fc79d83e91f40", + "blk.28.ffn_down.weight": "74f29ed51055d1c1adb8f0660bbe538a27e016c65650f2d67efc6f1c84fa1b45", + "blk.28.ffn_gate.weight": "ae48bb16487ded6781c60aafc0bf738fb4ae15729952906f247d216592ce249a", + "blk.28.ffn_up.weight": "543009727718ac22f11ee4b17815f68ea6f15ba1f3e7ed5ecdb755cf6417565b", + "blk.28.ffn_norm.weight": "b8f9e54c322079ff20a82b88948cdc2916c22c7db40b9a9ed6d3cbe89efb727e", + "blk.28.attn_k.weight": "55d055ba653b728d6e784f9e013786fed07115c9fdf23367e3941386d5e77db8", + "blk.28.attn_output.weight": "155101c03ddbf18f4fd0694bfc982f33c7bae25c9b087d6f5273c2bfbffcf2c9", + "blk.28.attn_q.weight": "1ed19bfdd22e9c14eca014739982492e9516d411515a8585f65cf754d849e53f", + "blk.28.attn_v.weight": "11ba854dd575c025d37256eee9041f6d1bd2b549a083d6409a09bfc1542913f3", + "blk.29.attn_norm.weight": "02b0bf5e2fcefd11a153cc988c81ba672682e4844fcf6442423e21a0e10d566d", + "blk.29.ffn_down.weight": "594bb692ec2779938721ff4748666ca8370e0e4fe85229503f616438b8884f5f", + "blk.29.ffn_gate.weight": "8bedcf47e91dcb2cf4093de56b048ee411faab6ff472f89ab2c9c113a08e6967", + "blk.29.ffn_up.weight": "e241a547b5fd6dfca8200b8141e21c1c487a96cbc4e5855f181a7ed1be91b642", + "blk.29.ffn_norm.weight": "e63eba5e4c6b288bfd9f15e46e236086456c8b7f1f9c732c0b5de84962a2e7cc", + "blk.29.attn_k.weight": "afe5979d5bcf211aebb526620f5974bcb0a2c39c8be71e815575c55d6385e3aa", + "blk.29.attn_output.weight": "9c944ed44b124b014906fc240afd3b90aed56bbd9567f2eddfd5b7a685b3cb48", + "blk.29.attn_q.weight": "e234e08e5c1bd9245a2edc8d63e9933b6b879f97c01392209cad4f55f05f3ada", + "blk.29.attn_v.weight": "5cb8e3e5f954e775c5a5e4de7a9a62b17e9c6931bb0ff0e2f82c4126fd3e1a1c", + "blk.30.attn_norm.weight": "a65483ee51a0b214144ec8a14f28ea5437586e9e12ebe342a57d1f8627ee12af", + "blk.30.ffn_down.weight": "417959da77ceb33ead4271cbb9428b195196173a893c44e52880a7ec61b4856b", + "blk.30.ffn_gate.weight": "a0d503ffcbe45dc927600bb98c9f6082487e65cb577ab545add400d666a87638", + "blk.30.ffn_up.weight": "f8ab957b82ffcd10b21303cb5e866209b6fe95f827b1b94e9a949207952d12c0", + "blk.30.ffn_norm.weight": "210c7ceb0514a9ef27b5d4d1b3aff6dde43f1af0345a050d71097940e0e73e03", + "blk.30.attn_k.weight": "16861b9abcf5a3fe73c93d977ca45a1e6daa65be0fd85c2cff53486ce2033afa", + "blk.30.attn_output.weight": "ca541fb2e57e2257118c35784845b0c731278af8db3036ac53d71aa1681fdbdc", + "blk.30.attn_q.weight": "f7834917748e26bb456b945e230bc926c228e93696bc01fbc2b134bdeeac71a1", + "blk.30.attn_v.weight": "9292783171dbe5eb689d17c9bda11e537f0e9b328fced6986c938d61ed590e81", + "blk.31.ffn_gate.weight": "e4766a04bcd8f937ba883c6a144101e546747804ca66c35c97281d6ccb47b566", + "blk.31.ffn_up.weight": "cc1e666116f7e6b06736db4aa4b81003c583f54f4d9200bfa48842249940e16a", + "blk.31.attn_k.weight": "fc80b57557687504efae7d24265cb7dc39b8f826bb3d897a11783012dbedc44f", + "blk.31.attn_output.weight": "215617f50a1f5d9b2250b82f3652b35a9e9aa0ad9ef2b485d73965a14b2b872a", + "blk.31.attn_q.weight": "274b4f1dfb0bdec28632705677049fb3e327ce6d9e1f3baaad1560439039982f", + "blk.31.attn_v.weight": "e641b8b926f9dfcbbf6b6da1c02555525ac4b1c306d96f20cfbba7d6662c4e56", + "blk.31.attn_norm.weight": "b3243c361d4041ddb892ce6862dd5091f57d87357e3c67e177451b85d8baf34d", + "blk.31.ffn_down.weight": "0a00cd3ecd5e91624a27f9e239b1de425d5ba3cfff82c256a11a4ad434abf3c2", + "blk.31.ffn_norm.weight": "2a0d67ea2bb1303975712243f07273c92fce83baa11b1cd6d8e42e74ea3c810b", + "output.weight": "768615f077fb797967844571c58b94d7c399d884d115be3ab4b0154504cae892", + "output_norm.weight": "7cc5b7ce10e5082000fa00bfa68af8c7c5da218e59e2c41cf2f1499d40ca229e" +} diff --git a/convert/testdata/Meta-Llama-3.1-8B-Instruct.json b/convert/testdata/Meta-Llama-3.1-8B-Instruct.json new file mode 100644 index 000000000..ad7cd20ac --- /dev/null +++ b/convert/testdata/Meta-Llama-3.1-8B-Instruct.json @@ -0,0 +1,3 @@ +{ + "rope_freqs.weight": "80fd5efb2f729381785b293a091a268cfeceb0079167f6ece9b07070e662b222" +} diff --git a/convert/testdata/Mistral-7B-Instruct-v0.2.json b/convert/testdata/Mistral-7B-Instruct-v0.2.json new file mode 100644 index 000000000..88d447b3a --- /dev/null +++ b/convert/testdata/Mistral-7B-Instruct-v0.2.json @@ -0,0 +1,313 @@ +{ + "general.architecture": "llama", + "general.file_type": "1", + "general.quantization_version": "2", + "llama.block_count": "32", + "llama.context_length": "32768", + "llama.embedding_length": "4096", + "llama.feed_forward_length": "14336", + "llama.attention.head_count": "32", + "llama.attention.head_count_kv": "8", + "llama.attention.layer_norm_rms_epsilon": "1e-05", + "llama.rope.dimension_count": "128", + "tokenizer.ggml.model": "llama", + "tokenizer.ggml.add_bos_token": "true", + "tokenizer.ggml.add_eos_token": "false", + "tokenizer.ggml.bos_token_id": "1", + "tokenizer.ggml.eos_token_id": "2", + "tokenizer.ggml.unknown_token_id": "0", + "tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676", + "tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e", + "tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6", + "token_embd.weight": "cde834ccac5e94324b25cb81b02d27312cac0c551b55a7e1d555d90bf6cb6e81", + "blk.0.attn_k.weight": "458bfdd9715c66e017c2447b1ed3c582963a3111479314e664faad8c914f42be", + "blk.0.attn_norm.weight": "e1fd60b95f713bae7b7e3ca933c64ae6c9cd1e8d808000204bbfdc19f0ba635b", + "blk.0.attn_output.weight": "df13b6a157d9d4f96c53b012b3b9bcd207d0c94144cbd22ae3ec13bb07d6c373", + "blk.0.attn_q.weight": "13b4126b4245bf06c915a93317c42b8174e05053535ec99dc576541e4cec7c25", + "blk.0.attn_v.weight": "5b1781d3a341214511b27eb4e268674ea3ea829dbdf8ae5a6bb89b3c0b33fafd", + "blk.0.ffn_down.weight": "49186f5d8148d316b07458841d13a2e66587f4af69b776188a809591ed9c070d", + "blk.0.ffn_gate.weight": "4397e30ece09136f00f4ff84ff49e5241b765a374deb8c5a12e897e2bf73473e", + "blk.0.ffn_norm.weight": "43260589aac3850a779bca3f9649f793bbfbe5db538361cb743b3830217f8287", + "blk.0.ffn_up.weight": "fd7ac918240a07566f6967527ffca58fcf433a30b78fdd6d84b2136d4ebd9987", + "blk.1.attn_k.weight": "209839566c7d235bdc20565a4766378b6ee8553133a5a3315abe8a85baa80712", + "blk.1.attn_norm.weight": "58c52986f7c69784ba327cb7f350923420782bee17fa39b1fbd13839d4005357", + "blk.1.attn_output.weight": "5067cc628449682665dfcf59b16e58fe2a9d2a81cb099f0fcd42f4f8670c6740", + "blk.1.attn_q.weight": "f410f9f0dd5edc09401af597d02e2a4c727f1502ec3ec3898321617b36c6df6b", + "blk.1.attn_v.weight": "d40fa49e07c102c0644e130e7909eaa93ed0d54e2edddc0759e721d58a4e4f5e", + "blk.1.ffn_down.weight": "594b1eff6ed4defbdd819fabbe2d48764984f08878a860bdb808511d5a25b8db", + "blk.1.ffn_gate.weight": "4cda97541e388a5bb607ce4cc8b3db1da7045830a630e7ba4d17807befcff346", + "blk.1.ffn_norm.weight": "66c13d7481be65b97aa474735ddc9674f33d512ddda76fa6fb45c7464b09f1ed", + "blk.1.ffn_up.weight": "1adc6de288ba4cc1237833ca8b4eb81107149842e38bc452e18e5cfe284338a2", + "blk.2.attn_k.weight": "5420423559f236ab22d85a00849f31e0cc6e9c7dd879de724393d8cd2b379153", + "blk.2.attn_norm.weight": "495fe1ab40cc52aa054ddd4f0c2d2790f4326c8d103296b1b38f3b1060db2a24", + "blk.2.attn_output.weight": "ccb83e7085381f558bfd65588c525ad2671feddcbc3887afb4038ad9c7aac348", + "blk.2.attn_q.weight": "2e8f77478392bc93c2a391f2e0f4a173a952bbab88a7aca099c6ee909726409a", + "blk.2.attn_v.weight": "d64512590f3b7ebbb9e77c2eb97fbda90b00d45c944f2b174f03a2cb11007567", + "blk.2.ffn_down.weight": "1de5084a05dcaa6b1bd926e83517dbe9ebe7fde79235fe56018b3028b1aa6397", + "blk.2.ffn_gate.weight": "cbea526b557f49aad8c976973cf367fcd12175b900f551984f498b9e07e4b7fd", + "blk.2.ffn_norm.weight": "530aa49b10c7eae08899d143409240deb95dae4e1d5bf78cea3b26393cff3ba1", + "blk.2.ffn_up.weight": "13a5fc19b96b4dcc1e9bd01998c8272ebe52034c1933ed123a506b711fae9a5c", + "blk.3.attn_k.weight": "1913b63a73305941d8cdc472e7f101c633d3357a78602eac0a4b49a744261075", + "blk.3.attn_norm.weight": "9c11bed5ab41f4adbfdae4ead65b525c8f19443e656a8c61ba412a4e1ad1193b", + "blk.3.attn_output.weight": "bb0b42c1d34779c5943272ed71f1dbb31ad8edd75f8bcd5c868f88505ac3a610", + "blk.3.attn_q.weight": "3461a1fe4e49f5319ea047cae98ccdb46528a3ec23831183fe87610b48c94948", + "blk.3.attn_v.weight": "82aa30be6a61526a41fb79bb28a2617416f5909f0477aa9e95e16be9370fcb38", + "blk.3.ffn_down.weight": "68521011ae03f5e3b0966127111afa8ee9f2eaeeef8d3a0b86b633e0332e9fbf", + "blk.3.ffn_gate.weight": "1e89e26338fd364bb679695968c65106382f15ad55c95cbb5ec9bdfeb766f432", + "blk.3.ffn_norm.weight": "c81932529a5a8c417c27b888dbe95fff8b447c2ea5f6f560444ec5d50b93832c", + "blk.3.ffn_up.weight": "305021735afd8669afefd713f56137248d5e817e60471a112ad06b7fa07ffe88", + "blk.4.attn_k.weight": "cc26ba5c5c28082a79e6abfe61186029e80b145252ca6a7924c437f0bcf2d51b", + "blk.4.attn_norm.weight": "302d251fdcc91f7468cf33f80b49484251d8917d7018ad264ab3a85c8ecf9ddd", + "blk.4.attn_output.weight": "a012f5bee3520cd4ce51f0076c132ebc3653309f304032ad051aa308f55f36de", + "blk.4.attn_q.weight": "3c8d607e447f5ef21e73af71e3c0d32fae16f91f31faae34ff06912cf9cb68fa", + "blk.4.attn_v.weight": "49f6c81a634ce46d71c2350206ecbd231b1732af96e4e4e67693c41a07e007d8", + "blk.4.ffn_down.weight": "e89504f311a4a34dc819a67b761022f14d71c43df3ead4f892c87aaa8e9f0adf", + "blk.4.ffn_gate.weight": "18b22f079a2fbaefe3572eec61fdcd996fd747724e2f0ff4f08cfcb43eb7bfb6", + "blk.4.ffn_norm.weight": "22415a492c168a0878912b05c854a631228b01c3ea8842e1d75989ec46c18a65", + "blk.4.ffn_up.weight": "f57379eae2874d8853f14ddf0f0fcc4ff1338574d5ed5d7e88331d5fb84f5642", + "blk.5.attn_k.weight": "d627af853c40bddf9762ce3988008c1ff17f2686fa8f73a0b5da38010147c316", + "blk.5.attn_norm.weight": "9ce01092c7f7f1c3ef72d6b794da12d77aa1f6a24fb96ba1b9bd5a0bcc3e2443", + "blk.5.attn_output.weight": "0388da8064c4b6b795ce2d8079e8a36535e82b2c9cf794e38ce8ae460aae726d", + "blk.5.attn_q.weight": "039b7ce1c909761fdf475c06cf14cabe5a90199282c89e4dcf460e95a4b6275d", + "blk.5.attn_v.weight": "c47bfd8d2496bdb6e00e03b903e15fd0ee806a515094ec257e43cc433147ab7e", + "blk.5.ffn_down.weight": "1d62e6708974bae318cbf00a8bf621d9ba0537e549ce4710a536520a8d14168e", + "blk.5.ffn_gate.weight": "8b42b1b11c92db19985094cbb50434e3a7c9cfea71ee6f21ea79eae7c49284a5", + "blk.5.ffn_norm.weight": "e0bc520f1505e687ec391d632a381d38d8ebcdec19f614a11a2000ab573e8b7b", + "blk.5.ffn_up.weight": "8cdcd17d2ea89bb9ab902dbc6bf3f827fa4ee029c6bf19eecbdefd146d8b6f2f", + "blk.6.attn_k.weight": "5dc6bcff89794d1756bf57ec665b58622d9352130d31082a6c66e1a079f99932", + "blk.6.attn_norm.weight": "13b26008abe0f119b5104b9d78ebd5e797d3cdd68122b93d73a3b4831a54d085", + "blk.6.attn_output.weight": "f5a49917ea70c3fb311ccfffbfafa63ab18416a5d55e5429b70ce8bfba57c075", + "blk.6.attn_q.weight": "d9c2f652c87dbd09ec3822e12876648fa32e86553ac25afab723b1cd9f8cef90", + "blk.6.attn_v.weight": "5ecc5fe67609a35151011cb526f45c56fc0a999079ae0ff37c755ca03c68c555", + "blk.6.ffn_down.weight": "0ec125ae0ecb2d9277fdb1b04f17efee94e37d0ae37311057c212ca2db3fe6d1", + "blk.6.ffn_gate.weight": "fa4d6d38355ee8aa3b80b476d65ae7e343c9b7770d7b097fc848ee8a6e091d1f", + "blk.6.ffn_norm.weight": "30e8f7defc627532e1739dc76d31223d45767391a431f925b63dabe334b0f392", + "blk.6.ffn_up.weight": "6b97cc32b290fa9087806b5d65aa6dc1760737730c8c71394cc4f30c2157f9ab", + "blk.7.attn_k.weight": "0231cb127cb7c3714cd72b8f39343891d7715a9bab2237ade9e7bc5f4ed2e68a", + "blk.7.attn_norm.weight": "7c3187f07eead7d219d98ab2daf87905e88d5f1ace109b6f5fa55dce3914981f", + "blk.7.attn_output.weight": "2f30ad972c284ae7c8eb0482053433495ebe8fe9c5ee2c28b4bc4ed1f33050fe", + "blk.7.attn_q.weight": "3a2b4b8d61cc9956d304fa9f82a9e65b4bb9fda2196670b16df7e0d8c43eff2c", + "blk.7.attn_v.weight": "d2aab97d0dcf0f61dd2f32848f7a8a99c423a4948a660a660a03a546972b8db8", + "blk.7.ffn_down.weight": "2270d520468c5549cd30023ff9c452a277058310104c4239a616373fc5a94387", + "blk.7.ffn_gate.weight": "4134a3ef71b3eac8f76b6f1a2e58625b3bae48081f175994bc3ed7d8b0d4f2d0", + "blk.7.ffn_norm.weight": "42df4abd4b8769b16f3930068f96960af1b061f1aeb7505384f272233b2badff", + "blk.7.ffn_up.weight": "c920549054ec16ff8c73a72f5d837cf4e11885e44db57c1c1c584c18fbd7a9a5", + "blk.8.attn_k.weight": "01c609bd3bf31ce65688f1f640ee413740e821330134d4ed1877a3065d1527d5", + "blk.8.attn_norm.weight": "48857411f769b00290f4e4f2e593e092781fdc2503f80c1e3eeda1b85a20f74d", + "blk.8.attn_output.weight": "90fb273f8df83744554bd59236515c16c5a5a698ca3fbedc17cc89ddcee354ff", + "blk.8.attn_q.weight": "ade617ac4653c7f00593dbb51837a468afef20a14eaab3780fb96ac3d6714369", + "blk.8.attn_v.weight": "c2c37496494864fee5c527d1fe1f88529d31c73f9cbd02ef9b2e9b23611ea50f", + "blk.8.ffn_down.weight": "2da58572e9ad79087c03cbb0c23c9ef69f93ec221fd5fe4ed92fb93871d23ffa", + "blk.8.ffn_gate.weight": "4483294e628edaa4901708e73e92c917bdd93b780fa01aa74aed57166f2bbf0a", + "blk.8.ffn_norm.weight": "c0cbb7a4f8123b62f0c4652a687f3b394802bc32870dc446eefb709e42043a7f", + "blk.8.ffn_up.weight": "9eaf8a2060cb9224cd585997cd671866c4051ad885c2c6d9fdc7056c2a5c0d89", + "blk.9.attn_k.weight": "5dd36c45fbc9c50fd35c36cd75576288506971eac5c5311d4f5c16ef60099645", + "blk.9.attn_norm.weight": "3c8ca64f2f75ed7c8fc1da010c23be787648139a96ca0ef3ad10be7b14942b8d", + "blk.9.attn_output.weight": "6277e1f833024f53c409be919ec76d34464a78b278c8f9dbf79e777746e3b995", + "blk.9.attn_q.weight": "87352b70d9e328c2d51d59090cf5ea5a046529864a890d0bc8986447a0a5c006", + "blk.9.attn_v.weight": "2efdf01161d7a82a9117cc2d87d37dba5ffefcf730781cb94fcc95130e48ff9e", + "blk.9.ffn_down.weight": "e7658a2ca984961c7ace16acb679387bedb1fef656b5330bbbf588db19673a75", + "blk.9.ffn_gate.weight": "773cd330d4ff5d64be8af00adf2e2722fae4e33fc26bb9d03549f6f4b3b0fe57", + "blk.9.ffn_norm.weight": "c8b86cd5c43b332f72060b807091c33a258e5dac01358ff4733b916cd34c9c97", + "blk.9.ffn_up.weight": "d8cc3bcff18bd46124ba2aa7caacc71220b44eeef6fccb993b4c6cb53e8f2c3a", + "blk.10.attn_k.weight": "964bdf3b4e77b915a216f750ff7b0f2eb1dd6bfa071358aef21010b90111044d", + "blk.10.attn_norm.weight": "59ed411d91d14775764eb514acb0895a75a10cbbfbc1c15d453bc50f8046cb7f", + "blk.10.attn_output.weight": "4d35a2a44cfe4ac0a83fd3ab0dcf1f5a0bf54cdb3b7be9fc353ed32c8a3eb81c", + "blk.10.attn_q.weight": "defff5339450dd881ac352f5c459293f39e07b9619ebd10ed632d79a3f310278", + "blk.10.attn_v.weight": "b9803e8d6a54acea58f662d4c0a5c8ebdf986676de7dfe12d4b288937881ce93", + "blk.10.ffn_down.weight": "eba856be64e4be20b92fb4639a783454dd92427250759df92a337e39f1971c08", + "blk.10.ffn_gate.weight": "2d5c509b066584db4de3632b01234e86edcde35409c5ebce18957dc80fe465e3", + "blk.10.ffn_norm.weight": "ecb9a8679945ff0273856624ce435dd250ffe5a440ea0861a5c84f0e4c44d2c6", + "blk.10.ffn_up.weight": "e76ec7e993f399af02958778c643aa78368e3067846714165eb5aba9d5f547f5", + "blk.11.attn_k.weight": "29c6d1f34bd3ba2f0904e57b32a5bf8dcb2834d439159a33edf234ce0b775677", + "blk.11.attn_norm.weight": "b5817b275149cd2abe18a6a10e19854605fc58fd364666744362ceee8cfe49f4", + "blk.11.attn_output.weight": "1e05653220e237cbe0cc770033e183c9a0eed5680510997409b16186c6691950", + "blk.11.attn_q.weight": "03db725ae669151e4d536e50285b3b047ad097f52475df208ed3e790e31a44be", + "blk.11.attn_v.weight": "27cdf1d4e971326c451a4615a0b79a8c7fe9508f9b76c0d52fa01971fc7eb403", + "blk.11.ffn_down.weight": "176938cd7c2966094f614cace8ba568b10532e45a0d438f80eccd19b6c2a7f87", + "blk.11.ffn_gate.weight": "9782339915dd6fa70013628a01524ee1d01ad8beab04068da7ac6a5ee7603a60", + "blk.11.ffn_norm.weight": "8245f6391e3be97811c0ff27f0d8f484ecc82a468a837c893f059745bfcd95eb", + "blk.11.ffn_up.weight": "15616ddde096d0d25e906375c548b6de4bd5576d1f6b68eefdc29f14e183af42", + "blk.12.attn_k.weight": "66dd21604993edd1b1fe547bcaa06f5bb7e31c9204902d147a227e4badf7feec", + "blk.12.attn_norm.weight": "23a69f85dd8a0904b9839cc5d0afcda299b74e82ae2642106224a1c820f2b761", + "blk.12.attn_output.weight": "4a98d132e376beb274a39d4ea9b6a1b870ad5c66625439d7ff6f45c229c3ca04", + "blk.12.attn_q.weight": "1c6c309d63afcfde32fe37257e300a78e25d01117e33490801107c0e75d1ea66", + "blk.12.attn_v.weight": "723d9e4ebe4e2b1974afa01d8f512b52933698fa36717dd47b37b07760c50a10", + "blk.12.ffn_down.weight": "00e0fb09e1f1fbbf3803f1dee373eaae7a93756b6e13063ab77f9927bc6f996a", + "blk.12.ffn_gate.weight": "89159f7f97aefb1e100107e3ac2d694e1008ad873f79bb953d60c2c1bb22724d", + "blk.12.ffn_norm.weight": "5f70aebd0e43a39d6373d8658cc670c13aadd7818831d3d84f761d5f688442f0", + "blk.12.ffn_up.weight": "faec21b446f061eb4dca561a3180712724347b77a71eb312e7afe9be9e89fa04", + "blk.13.attn_k.weight": "3d440825d19eac3b1753b34d94fee2b3a3cb6636c10b2703ffcf688d3c1eded3", + "blk.13.attn_norm.weight": "47b575e57e410738ad13fd3c74bb49c06b3d31030910834ece509cd1a5c6d9be", + "blk.13.attn_output.weight": "05436d8e613f4475741c1798a7c371b53d61b229507fa04fe23c504ba1f0e12a", + "blk.13.attn_q.weight": "002b5024ce520da41256e3ded5cdc60e5ae07ad9b202cb19d76ab511efd02b1b", + "blk.13.attn_v.weight": "c1f2d6763587c50312cee0d7140fa2c7ee326f5b172bc99b2d8946e08329cabd", + "blk.13.ffn_down.weight": "b5c4e0d8a3ff96cd76a135e415b89f02d28c28f7f3c16a36af31ef0ab8773da5", + "blk.13.ffn_gate.weight": "ae06e9e3d2e1f64c7ad23a4009dc904c2eccd7241f9f91c4974ab2504f116be0", + "blk.13.ffn_norm.weight": "e44a22321bcbcb4a3c345b504e939e8071370f54a8cd702fabdb40b97e0d7683", + "blk.13.ffn_up.weight": "7e6f366d538e21ad431264b12c011892d0be9dfe4c4da9f730af677f920641ba", + "blk.14.attn_k.weight": "95492d6417952ec24b2cab87bceb750fc7e95ac6b1944fc328a3852d980164be", + "blk.14.attn_norm.weight": "6b7b09e1c51addcdbb160ea59edf032531421c520ec5645fe1ff9ca4180cef54", + "blk.14.attn_output.weight": "75887474e4d72c218e6ab0f69f1bf3ec3dc414d51b36fc59df00cdb23421bb6a", + "blk.14.attn_q.weight": "940e33f76e48c21215d19e8a21234c8246d4d084381a7d9806aecb24b071d5bd", + "blk.14.attn_v.weight": "c58601cf5a9833f80f7f9a5b2656e8eab5eb133211446ebd48f8be15fed4ebb9", + "blk.14.ffn_down.weight": "f9f886e7f9b2a54d717b08947a25a0a93e8c2a5b8bcd5a907c06817c8ee3ac11", + "blk.14.ffn_gate.weight": "727ed0ee68594a3f59d704ed3240b6929f083b9c36650fb848d182315737245c", + "blk.14.ffn_norm.weight": "bd2471008ff1b2bae9aa26bea019393fb2bbc5b9493b8cec3ebd2c280fca24ca", + "blk.14.ffn_up.weight": "b006446769f51e4f93b503c4727deae897bc1fc7f4fad49f85024b63c4548d38", + "blk.15.attn_k.weight": "23bb70f9035356624039547a603e46be7d1e4403616eafc2451cc09c5373d522", + "blk.15.attn_norm.weight": "718cb371ca052eeb3bfac6ac506abb887df125271821fd171797a7f2d8dd6313", + "blk.15.attn_output.weight": "c76a2695a204b43a8e5acfa5720590b5d449a9ad9e082cbe3e80fab5903ea16a", + "blk.15.attn_q.weight": "2b3e4037b9e91bdd26d6e8d904cf39f948192dcf09bb6445cb55ca058d4f4626", + "blk.15.attn_v.weight": "7c15e89b6acafc8619e86aa9d412f5893ab17843ff2cfaf40eea9637b24910c6", + "blk.15.ffn_down.weight": "e16fd4bdc6d1c1209c6b633454df4992870c8cefb2cb0e8c92a7e489e9fb5d19", + "blk.15.ffn_gate.weight": "95a46bea366c260337c537fde06b4cbeaeec52484a69c3390bb1d178eb0525c9", + "blk.15.ffn_norm.weight": "37730293f704da265dc6d1896b3be00c39c0a41dab07f573af39dc30a481d623", + "blk.15.ffn_up.weight": "ba74a199da2d0875d7410824238c4ffafbda3993568812284a72b8800df91f15", + "blk.16.attn_k.weight": "f58f79a2a91c9a763adefce0c53a71eb5ce6bd8442f4af554b04b58083bff27e", + "blk.16.attn_norm.weight": "0c16e41b95e81978e0e0e3b338e2afe2d297426578cacee94de15df74e94eaad", + "blk.16.attn_output.weight": "ead22fc337514e4add49aee19720008558e52090466866e849671953a1fccba4", + "blk.16.attn_q.weight": "ef59c4e8fe8918c1add43d7e9c6fb3ef799dd3e1bdd731ec7b6a4a6f97c86048", + "blk.16.attn_v.weight": "902e6b84c2b64241470b13e6f412f859f66b4b223bcfb9c15d5cb1106b07ef3b", + "blk.16.ffn_down.weight": "2ad6e9eb4d8372c32a554395d460d17cfb02d6dbcb757cc962b6bfa36db4f5ee", + "blk.16.ffn_gate.weight": "825b2d50fcce3dbe6a5d8d8a50a95466f83ca4a10343efe67894c20b4628fb15", + "blk.16.ffn_norm.weight": "3bf6ac90befb0e17e077c8ea9454a8485a30f89f2d761ec7751b60c90aed1af9", + "blk.16.ffn_up.weight": "9fbdd08739b32411f5ab0252174d386bab19eb0b17884862f760429b7d41d78c", + "blk.17.attn_k.weight": "4033398718bf3674830ed1b73071ed8482b6dd4ef27f31a6c5fbb998321b6c07", + "blk.17.attn_norm.weight": "714f2e8ac9592966a0f1c02ee979eee8f84586405b992e8ee9543e840199ffa1", + "blk.17.attn_output.weight": "b6bbb618597d767b8f535117be68f92911e4a71d4eb4d8b5d943444151445ece", + "blk.17.attn_q.weight": "b84a0dc00ceb515faa2628125dcec502eed923077b21cfe900a4ff16c2e5f9ed", + "blk.17.attn_v.weight": "4387c7d6a17da9cc7a6bca8f4a75618b20407d570792056283a8e93b6ec65f18", + "blk.17.ffn_down.weight": "47db95c6f1e12b399c3eaf9ddba261782dd71173dd163b52af96541cf87b5196", + "blk.17.ffn_gate.weight": "59abaded0aedfd12f01df81f7a811e84db6a227f51b60abe9a247ca726e87392", + "blk.17.ffn_norm.weight": "b7e86445be5c7b722e01ddb98d5c7527ca86cb827ce0354f2c269e0f2558751e", + "blk.17.ffn_up.weight": "8e31c293bac649d2f60da4b3fc4a3acdce1111ec6058d8805eeeb242443011de", + "blk.18.attn_k.weight": "5ce762ab7b032511c131df81093b587871718c7097f79d8e07d707571f18a47b", + "blk.18.attn_norm.weight": "1f52cdc7af1f4dc1f0ef6ad1ad02e18cda32133654e57cfa9c72ada9c0b1d995", + "blk.18.attn_output.weight": "6486957f30bf8a88516e25772c6650f98b13923f490a2865a8752e36439d1cfa", + "blk.18.attn_q.weight": "93621c8abf69d2ca29c5207180eb628fb2b544d89de6c4a7fb0699be95534899", + "blk.18.attn_v.weight": "11604083b5a74828ac1d226af015ad5dc0215a1fdca44fa7131c2163c02d8156", + "blk.18.ffn_down.weight": "8f9997feb94385f106915df810239c9753b31efda2bf14bdf18a9fbbeec8233d", + "blk.18.ffn_gate.weight": "427c213b3a4e94af703429daf2f65766f70424d8230c123e7e712a18bceb5ecb", + "blk.18.ffn_norm.weight": "c45d305c4ea6a54013ba112f12dafaade064a32cf01317373464a3618d8ba44a", + "blk.18.ffn_up.weight": "a2811f2e73ac9eb9cce91a21a454e84e230a155244e2cd73f2c12aad3c9b8cfd", + "blk.19.attn_k.weight": "b2daed159925eac58c291e2f1e2000beed21002b03c9e1bc7e7a52e22240666c", + "blk.19.attn_norm.weight": "6307306ede2ab5bffa1bcac3f8b139354678c0376b1d9f5530c1fcb4268cfeb4", + "blk.19.attn_output.weight": "ebb98218b2a9c84d3fb6baeb02c5df264b7ab80d994d1098ba1cd47aa398effe", + "blk.19.attn_q.weight": "4f10df2ad09177e7528e9456039b670d07db22940a49417101b725d239c16724", + "blk.19.attn_v.weight": "30f1efc5114badaeaafa91fa466dc7fa14b1616db433c6f563ab851f7333a5dd", + "blk.19.ffn_down.weight": "be5ec7fe6b48855cd0015b0e430d1b70c620de87a7ff188c7c1afef546d7b6bd", + "blk.19.ffn_gate.weight": "10dffea4213881f8a9b583ee0fd370e033756d32255ed15053f794375b9400e9", + "blk.19.ffn_norm.weight": "e75cd24ade45dca78fdb0cbcaaa2d4a17d83a5a73dcc94ce0ec2d68fbdb2a881", + "blk.19.ffn_up.weight": "63e81bdb951410ffa81bcfba1b94a679ec9ebae59cd1623ce2651ed5d4c78bfd", + "blk.20.attn_k.weight": "c2fc5ad39e9bdd45e73c6e54aecc474388d944c4be1ee1921b7fcd035bad02e0", + "blk.20.attn_norm.weight": "aaa9169171937bdce20c1f057e94e9252f221cabacf1ced12e11b9586f23d308", + "blk.20.attn_output.weight": "a9f4fb496e4bc053e3f6cf2e72e22d4cd2b545ef6c32f7e782c2ef6ebcc21d4b", + "blk.20.attn_q.weight": "5a07ac619ed251494170b213921ef3fcc4c2712839da262516d9d5b8ea1ff185", + "blk.20.attn_v.weight": "d6689473105d241eacb17f09f06000ee237336916cf5ec4f48271c5b41bcb8e7", + "blk.20.ffn_down.weight": "74be38db51df736f26ede7c6b52ea787e385f181cb66231e2cced4556a25c9b8", + "blk.20.ffn_gate.weight": "ea91e06dc3d051c0ba0243b5a8bb40edbf254eadfb54fda7247e05cfdd88cbe2", + "blk.20.ffn_norm.weight": "5fbd357b3d6f44a7a91e8a4fc246b24303891b7957e0f3c32818ae5dc16ddd8d", + "blk.20.ffn_up.weight": "fe3290333e056af4ed12942ac72aeba97a6b562e2db05e79cd35dd07eab5b101", + "blk.21.attn_k.weight": "201ec6ee95f06ea5eb80fe86fd07bd016d3ae9ab6abd25d631834414e14a010e", + "blk.21.attn_norm.weight": "ea8154f93e06485828475a00b98cc397ac84768dd70e06ecc0c075b5712d7276", + "blk.21.attn_output.weight": "9f8af74d531478fd304723fd8e4e01578db598441b80dc7c960cb801dbbc501e", + "blk.21.attn_q.weight": "277de9953a8d3cff894ffd06c15ad0ee1407e319df0c1a693d4f45fa9c74ac7f", + "blk.21.attn_v.weight": "6bfdc16cfb898909b7788ddd39dd04b928f31d6732772195d53c558004638dca", + "blk.21.ffn_down.weight": "173877146cb94801157796ee9e5eecf3f46acb3b5e797f90b83a3fc22395eb30", + "blk.21.ffn_gate.weight": "53146713e2ca1be80496024077a028f6b6d749b02e71003c349e113b436f48f4", + "blk.21.ffn_norm.weight": "b28b97e18ab20a5c553ba422f7d7f6014f5902f1d62a69abd20d9fe19a5f9462", + "blk.21.ffn_up.weight": "5c39d0ac4d602b8ec8909dade93b2efcd6b6d9d84a19b252d76bb66dcfaab87c", + "blk.22.attn_k.weight": "01f26272c82917a87a3ccf922fa1d521a952b05de878241b7efe3525b617ac87", + "blk.22.attn_norm.weight": "5ffc96249d8873b506e9eb7158bdfd07fa1429e53c1951430ca7505d25f11c76", + "blk.22.attn_output.weight": "9c2201569358f720244b9c9497e4da02585a167b1414c8a506b85ad75ba990d0", + "blk.22.attn_q.weight": "906036eb4ddf027f6d920f9356a6a2a5e529b96f4e1231a0496d46b4434a5842", + "blk.22.attn_v.weight": "30ede8b0d166003a4b8a81fc99437f557719fc36e5c4dd510c9f161f36a47e73", + "blk.22.ffn_down.weight": "d04c164beabab30e1837b843e18852260efccfbb9d96a34ddd816e6fb3ba23c5", + "blk.22.ffn_gate.weight": "19c889db6b19179f0a62d5981a1506592c65de83760d67afbe00d202202750a8", + "blk.22.ffn_norm.weight": "4885eff2d851b32dbd306bd632c725857e6d164f0fa8b3d5857e572e6ef98ee9", + "blk.22.ffn_up.weight": "365594d8db8e95cf87cc33ac23947942dc326110175cc8ec5a07b5c7059089a7", + "blk.23.attn_k.weight": "badfea1569da0fc6ab817c5727ca3a69b07d9cfd622fb8be5e66678d5b3f7ae2", + "blk.23.attn_norm.weight": "8968f78a379ac3ca5458b4ed4251e8d9112aca6d6dd1ef6440b4bb0b380375a4", + "blk.23.attn_output.weight": "93e43393c03956287b1fe31e9735ff1cfe84f4ae56b83dbaebe96275e4e11831", + "blk.23.attn_q.weight": "aaff73c725a8700ae66bf26ac8869dfe96738eff23a8ff340de2ab53400a5795", + "blk.23.attn_v.weight": "3a86a8dcf14a746ed1411f5a7e634064bc4dfd6511c24cfeccfb2c9ebb6b4101", + "blk.23.ffn_down.weight": "d4da6f37bd7ef69bb203f7b0dd59f50bce37432c70627e6cf274ab81548af5cf", + "blk.23.ffn_gate.weight": "5b6072936c4a693923bb4e3d1473fd45545cb02fc07799aca458ef0449a04061", + "blk.23.ffn_norm.weight": "cd76e37025f84773180298ddb15e0d4ba9cfc7d832e19c791049daa47c6d9c10", + "blk.23.ffn_up.weight": "cde43b99b83124a13b2e4753d12674b3a61dfb34c04703007ced3e8e2aee1801", + "blk.24.attn_k.weight": "457379edc4cce4cbbe107385079019bc922264fdfc7bd1d1ae84343a81460c66", + "blk.24.attn_norm.weight": "0ce0dfab2edeede5da419fa7833db78e36222cf25c358d08f3ec664310f031fb", + "blk.24.attn_output.weight": "0cf91c2fd40c204d2fd4b9c85b69281e5ad4ea8442972fcd44b5fc8e835ffdf8", + "blk.24.attn_q.weight": "87ede30c09eafec6a4e6285674c1bc4637140b168b2da4ed34f36fdb6e176cc9", + "blk.24.attn_v.weight": "4c0b078b2798ca35d6d2c2258fe499820d2bc88700654ba4016e4b028f563590", + "blk.24.ffn_down.weight": "cdb8540c32b1ab988f984484928d39f6841f2131c1cebe90ad9456737fccbcaf", + "blk.24.ffn_gate.weight": "da2e0e913648b5526bd2bbb344038dd067639343aed3b413662b064b0db7556e", + "blk.24.ffn_norm.weight": "8940bd781c610d75eb2be63cfc8d869a3af05e53c963dc7fd4c6f653df5a80ab", + "blk.24.ffn_up.weight": "90cbac2a58801abe11ed6c24560aa4acb949f79429f2aa8ff129ac05868bb87d", + "blk.25.attn_k.weight": "90607131e36998e990ce718ad05cbecd1bcaed010931401ce6baa3b0d93ebce6", + "blk.25.attn_norm.weight": "fbf679c85656c04a6cf8fedd5412c1ace22960e6c2d47f2d43997827811fbb97", + "blk.25.attn_output.weight": "08412724ee7a2086514406e6f68fb9f622e10bac25b0c373b294709f4b09bd2b", + "blk.25.attn_q.weight": "9c1238e98a2747654a0d4371d3e7ea8b979867f609dc42482544f25591e85c7f", + "blk.25.attn_v.weight": "a57796a535c6cb09581cbafd6a91dc14adc8cca2a2465a7ffd0aec546cd84074", + "blk.25.ffn_down.weight": "f7e34e8a6391b480da08b52640613ccadce268373934b409759743a1735b74d6", + "blk.25.ffn_gate.weight": "b8d0b2f4612678b5ce42bd4a683f8024514b75fb5ebf6b22c600811e95582ee4", + "blk.25.ffn_norm.weight": "cde1fdba2369d315f3c6940a997c471ec891924e642505db580d732763bd7b75", + "blk.25.ffn_up.weight": "72e700c32ac8b9c47559c2222e45888a480b527ea512075423c5dc01678e2bb3", + "blk.26.attn_k.weight": "6ac83b3414ae75bf3a9055c32e49d2c40fe611ab21f8444f03d2f465d18122c9", + "blk.26.attn_norm.weight": "55f9d6dc9d75973dc75136ecb9d991b4398097ac133070873fb96ec76a6f60bc", + "blk.26.attn_output.weight": "ebc4fcbd15b33263e50ed2ad45740867cce15bc90e1216623babcb1820734509", + "blk.26.attn_q.weight": "080f057521073e412936fe3fee64fd574c8128fa4a148b879d3e598fe4954581", + "blk.26.attn_v.weight": "0fa2830d6746487ac91b243716e4302361f891e4e008eddd14abec47c7809d5e", + "blk.26.ffn_down.weight": "cb2ab8af1653adc57111ada49d2825c6995e338c8208455b92de10e580f60f31", + "blk.26.ffn_gate.weight": "231ce30966086bce2dc0e0afd34a22a1958cfda7a57c41b3b8e9444c5dfde8a6", + "blk.26.ffn_norm.weight": "35d959d25d17b00617590f5d5831bf705c385c51e46297a14375a700effca6af", + "blk.26.ffn_up.weight": "367680c8d332538b467d1ef87cfeb36cc5c6af564c5023c5fb50e728e3438287", + "blk.27.attn_k.weight": "0bfcb351c6d17aeac5b55a915074fbdf00f11c4bda98babb196ac8804805746b", + "blk.27.attn_norm.weight": "5d598a88c2e75ba59dd7ba4fee940bdec92d72038f1286536d2dfb71d008a09c", + "blk.27.attn_output.weight": "23a9da7347336479f6a10ded14cb3f46e06b5bd56dc4b0fbc526c688552ec840", + "blk.27.attn_q.weight": "b83319dba9055f069208e9c9d66da08bc6874f23e575288fcd81697d1777aa54", + "blk.27.attn_v.weight": "36ed34ccb2f36fdf16b2c2dd225a98ea6b7b0e376e7791191136ccd7bd7a4add", + "blk.27.ffn_down.weight": "5488e1d3a58c71b5e9ddda430540b4776b268cfe1457cbc1c2622dedd9e4526e", + "blk.27.ffn_gate.weight": "4ff48011ee0bac39af704849d9132a2410392c87a509c684f2062f6b76b498fb", + "blk.27.ffn_norm.weight": "32afe99675983da3de2961d1b5ca41c98970a356823597fe29e91f6e86abf0e8", + "blk.27.ffn_up.weight": "1eae3088a75629571fdbf6a20f141bc2bb2ed3f5ba2b9fd1d949f80695e442a1", + "blk.28.attn_k.weight": "c4e80af714962d6f9040d2c09f316f4a1cbc3a2e994e19902d7c653cf3c73dba", + "blk.28.attn_norm.weight": "c1ecf85dedc1c83d5d402bb7c94fb8b9c11f1a3e5f64e7680f80912d4a560794", + "blk.28.attn_output.weight": "72ba47c061b21f5ebc5213a455eaf6fc49c8f8e04ff9ce37e6ed4921b629161d", + "blk.28.attn_q.weight": "c4abc47234307f44b8ca789aa6668e298158fa4b459b2c1e84bd581806591cc1", + "blk.28.attn_v.weight": "aeba950799d4950e491ad0fcbe30334e39b8975177990a2cb339031c45ac153c", + "blk.28.ffn_down.weight": "4e84ce382a37b994fb8608df451a60040559e3f4f3241c3b3cb8989a3ed50d83", + "blk.28.ffn_gate.weight": "04df157acdc8e8534ad60acc2d2a4dd3a7a6610f6382535ec728994fa6f83f83", + "blk.28.ffn_norm.weight": "4d0386dae2bd1c1a9d0f9730718333e3a486c3bc6a5c5d482193c75d39832c80", + "blk.28.ffn_up.weight": "fec60bb0a3daf182a14bd8311fe6dd1e3fd020c5fc273e2549cdb1a2d6b79b05", + "blk.29.attn_k.weight": "b0532a263aa5a4e2a7a80adc83fc5dec974493bd18da7f953e7ebfc3f3a19aae", + "blk.29.attn_norm.weight": "593fc3b4000c35b7a59dace09ca1756c08be0105b2edd354a0e1c16c82898859", + "blk.29.attn_output.weight": "315b896f9f0cbacd0ca8937384c3a3a227efa908cb8c3a9125ec00c480e32b9b", + "blk.29.attn_q.weight": "d482d45386d4ad3394f08e9dff233ee3a70d0427d65c0b8fa05905da7e25ca53", + "blk.29.attn_v.weight": "cd3b5a6e2852da796902930a6a84bc87fc6a7c7bf51f8fc23758d12a39013b36", + "blk.29.ffn_down.weight": "5b3dba6f9753bd1b1ebcba65ef5373dd62c38e755c44b7231b95d93d45761f89", + "blk.29.ffn_gate.weight": "8610d9d2db15c256243ffcca3ffd31786d0ada0af0e7c7aa3fd20524370ab036", + "blk.29.ffn_norm.weight": "1a2ef2d38b7ac3e51190b9ccb8b6552ba83ab290e523356a7f851ddb35dedca2", + "blk.29.ffn_up.weight": "a5fdd15811bde16dc27677cf1a4c97daab4c28cb12a9530f1a0e573134fdb69c", + "blk.30.attn_k.weight": "1efeb0b5f4b45a85cdf47300f892ac77ac1f38000ec3653565d1303d1fb8c743", + "blk.30.attn_norm.weight": "c73934c182c7fe80838ec1d0b92f50a583f75f7a3d78d822f009b58ad2c80e65", + "blk.30.attn_output.weight": "3a0fd89de2d274614750345d827a9c886a4f97b343a13cdf680390505df596a3", + "blk.30.attn_q.weight": "711e113362bdb067db843c66236704eb1cd3fc5f40e3767143e96d510686ef4e", + "blk.30.attn_v.weight": "82b12a9a74fd3d91b73cc2e841e2b3f0a5197ccd2998afa17020995f880d2267", + "blk.30.ffn_down.weight": "af9f4b1287c0d824ae22d6e335d19e04a70135b835be7caa2435f1d85e931993", + "blk.30.ffn_gate.weight": "e2ab3e6f15f5c50fca66c084cb6a57a2b6b82406d65150e82ea0437b93dd9a46", + "blk.30.ffn_norm.weight": "c1b9c325c83f00e177386a4d7e769945f2995e60950c4a576c0a2c4ab9703d04", + "blk.30.ffn_up.weight": "9b94a21efd419715d82071b490d3b635cf1e8da080620dcc39e5bde976d7e9a6", + "blk.31.attn_k.weight": "0db0d82e3ddcc2c06209f5f013e1d72a84a996c40bf00186be485b909cc268e8", + "blk.31.attn_norm.weight": "2b8b7239471f57140c5cdfe06bd224a4f6326282f99736e44fba4c7b120ac101", + "blk.31.attn_output.weight": "a310b048840cc3ff2be4b84796340e8e2cdf05ec89d14bd3655c109b2bfa9fcd", + "blk.31.attn_q.weight": "f45e0cd95645175ea82813455356d171838539bc3f7676d877c698f2af0a0eda", + "blk.31.attn_v.weight": "8bde008e809112aa7e7c23e9c3099087bcc557313b01306c87efa0a4a30805ba", + "blk.31.ffn_down.weight": "8266fec7e203fbfad7033120861e44984581ff8b6851d01dfb7b81c5d8fa90ec", + "blk.31.ffn_gate.weight": "b73bc0aa5baf006d9ef6403104891b8133671b0992398fe038380b67e0d7e2cf", + "blk.31.ffn_norm.weight": "9c62cc27a7b6017c1df8ad49bff249a8245e8895c6754f402cd44623fda83268", + "blk.31.ffn_up.weight": "5b970a4694ea3171a0167f6e1636d9f00268bc1c9640430ffc35218494884adb", + "output.weight": "74fa0ef08c57a30e633e7117b1e9c805f833e2e5e21434bc79ddf9c92c6d7330", + "output_norm.weight": "59b8a59fd3fbf39353506116e43e5e76edd0cbf2a2873d869da4cf27a04997c3" +} diff --git a/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json b/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json new file mode 100644 index 000000000..a15965324 --- /dev/null +++ b/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json @@ -0,0 +1,348 @@ +{ + "general.architecture": "llama", + "general.file_type": "1", + "general.quantization_version": "2", + "llama.block_count": "32", + "llama.context_length": "32768", + "llama.embedding_length": "4096", + "llama.feed_forward_length": "14336", + "llama.rope.dimension_count": "128", + "llama.rope.freq_base": "1e+06", + "llama.attention.head_count": "32", + "llama.attention.head_count_kv": "8", + "llama.attention.layer_norm_rms_epsilon": "1e-05", + "llama.expert_count": "8", + "llama.expert_used_count": "2", + "tokenizer.ggml.model": "llama", + "tokenizer.ggml.add_bos_token": "true", + "tokenizer.ggml.add_eos_token": "false", + "tokenizer.ggml.bos_token_id": "1", + "tokenizer.ggml.eos_token_id": "2", + "tokenizer.ggml.unknown_token_id": "0", + "tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676", + "tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e", + "tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6", + "token_embd.weight": "1d1d1d39a867d5a4bfb32792a47247d2638c10c95a6259391d02843583505cc4", + "blk.0.ffn_gate_exps.weight": "2e5cd43ac3f26c44f071926ff6c3f239ecc52a34bc9a5b5906d3d4c1bf2fbbfa", + "blk.0.ffn_down_exps.weight": "a4dfc7e7c96e7402eb70279601675b956bb7331da8101e63fe5c0a611b6972e5", + "blk.0.ffn_up_exps.weight": "2d5d87b378b2319c344ed2c642598b6f7cb6beeb582a8ea51abc9ae690d473c3", + "blk.0.ffn_gate_inp.weight": "a46aaf5aba7401ce6e41f158242b4879d34901661f3ede85496cbd0ce79d6314", + "blk.0.attn_norm.weight": "3fe37d913bdd2b65076bcdd6efe64a37b0b03cacbb1b80b9f7089068aa35f38c", + "blk.0.ffn_norm.weight": "5e14308a3c894734eb204c8f558bdc817e94bbd5b4e9cb4094e91ba388c8f7f2", + "blk.0.attn_k.weight": "73d943dcac0911e87bd771f4aa1c901e1bfe1aed293af06e1a67812159859f67", + "blk.0.attn_output.weight": "4c5f754c855e262e8d4c94c6fbbb57af06399dc0e170d7d99a1a17fc9aab9227", + "blk.0.attn_q.weight": "d6fd7403c873d49c05f6f03208f30d99ad34cb3b71c9990c47334d502a8e4c7b", + "blk.0.attn_v.weight": "cf17cf64b2d683bd9de6cebaf60e5c264df6fdc38fe719dde9d54c80334f6366", + "blk.1.ffn_gate_inp.weight": "0d524de81cd915816b4e714bf595ad6946a9130b3de731cd89428b2781230809", + "blk.1.attn_k.weight": "2ea47f412992b374c70674730fe84700e0c8cce177086ce9b6635e42408964bd", + "blk.1.attn_output.weight": "b4b2520794d54113e86c8ff678eacfc62e35be4395a594a6c8c22b4383ebcc0c", + "blk.1.attn_q.weight": "5db930c98c4f91f6eab57eb974c72210b158e366d23d6d2890b2759c053bee33", + "blk.1.attn_v.weight": "079bdde09668394bf7af9f8bc175017b4f48f0ab64e6dd855a4d7561d1693c0f", + "blk.1.ffn_gate_exps.weight": "146a62de19f9ab093deb101f9640534ffc3dc40d69f508be12fc0475d01b0c7a", + "blk.1.ffn_down_exps.weight": "949da94a3c0f375160672a979e85f7def284264b10d48d038238aad5f5ece793", + "blk.1.ffn_up_exps.weight": "7016a3f467d9e3f2f4b4019579ed86b757469cd367f2b225483305376b4bb3c1", + "blk.1.attn_norm.weight": "1614d1e6ed537737275eb888666c7bac533f4eefbe73dec92b591045ca9e1afd", + "blk.1.ffn_norm.weight": "405a455fa7d1ec36894652ceb554bbcb09a07fd6405f42741e66dc4a4665c19c", + "blk.2.ffn_gate_exps.weight": "90d5003fc7421f44220c0842d43128955e91488f6f785fe570b62d81b719e964", + "blk.2.ffn_down_exps.weight": "ecdc2b5a8b504ef0a7833acff47d69b0c1fa9c22126de1bb120ff5e48c3d6e2c", + "blk.2.ffn_up_exps.weight": "2cbd9485a32460d315eb50a2f3b00863fd77245bfe885b7565efac1cdb1f191e", + "blk.2.ffn_gate_inp.weight": "0d0a17a1a2c7a61f2cca49ecbb479154dc93a870873257bc4f225e7607f2e2c2", + "blk.2.attn_norm.weight": "b2e4c5a977f87a6f880896bd73596234c9b83622fa0d7add5892501e3155913c", + "blk.2.ffn_norm.weight": "0ab875b4280afa922376cfc7b9aa3f7071c9432ea1254091ce7de3749df0e8e6", + "blk.2.attn_k.weight": "bb884af51fb51550acfef54ccf1b58ce8284e587806e6a2f88c8265e1ad05a5e", + "blk.2.attn_output.weight": "0f03099ba1ef342ea61af9cd71d028123bbd8b1dd7d7fd9b509aef77815427d9", + "blk.2.attn_q.weight": "8fad0d29eb4c9d24e564774ee3316b9eb7a4c4985e4567111d2c836c830f6cf3", + "blk.2.attn_v.weight": "fe04c847ff677632401a94e7b6b6fdca60391ab21cb23bd791533115de6303a1", + "blk.3.ffn_gate_inp.weight": "29e3aaa724590c070e614af8288939603d2641b0ef11e8c0f476bebb2776673c", + "blk.3.attn_k.weight": "231cc5631def10f7f292d8862d6125ff555164cd70480ac76362149fad204497", + "blk.3.attn_output.weight": "86467a605c62852e05fda1a7ef43150df2cf715fe59785dbcba09f1c27cfa086", + "blk.3.attn_q.weight": "901822402453922225c2d6ac79616691d48217635d5ff7338daa971d5ddee210", + "blk.3.attn_v.weight": "27030784f44375720df2f090933645a31a022d3fb3b14573e5ca0b78f44070c1", + "blk.3.ffn_gate_exps.weight": "231ba59cc0b988d125d77bf627aa3f04636684870af88f081f3944b48a160d86", + "blk.3.ffn_down_exps.weight": "530c3ab44ae4d66e8afa4d10c153ba5dfcdfb7321989a988e62e9d12e7234625", + "blk.3.ffn_up_exps.weight": "b85c2d4d9d11332e702b3c0a6610d4f525f9a93e5d12f5c7c55c592c40755e75", + "blk.3.attn_norm.weight": "05dbb6d88cfa6b199f9d705ccbda97c0ef13f9ec875c595398a1a42d009a4555", + "blk.3.ffn_norm.weight": "6880b1c27d46969ce36fac049c05dc8b89e4bb47dc89df357e32df7e18fc512e", + "blk.4.ffn_gate_exps.weight": "a883b4f225b760c5a2f6605dc5e2167ab85bb398c70bf64ceb539fcbd6128dcd", + "blk.4.ffn_down_exps.weight": "d291bb656aae77947d4b525e2819bf4112afece53ff31de9dab999af1f65f9c4", + "blk.4.ffn_up_exps.weight": "38592afb8ba3dcfb26970f906174f7d3fa62da44fa4be4fc6912a19030ea9164", + "blk.4.ffn_gate_inp.weight": "1596cb74e8fd6c3080b937b06468bb397b0dbb661e6d180a6bcbdc43e8bfd0c6", + "blk.4.attn_norm.weight": "f90c83c5ff4366281d283384efc941620542b9cfdea160d678dc54a75e33f758", + "blk.4.ffn_norm.weight": "d28d8c49d1746b7cc085562d1074905fd14023844de823dc4fb22202bb280790", + "blk.4.attn_k.weight": "792bbf412cc357140fdaba543e547a9b2f7582919e307bbd9a80c7d6d8f5f1f9", + "blk.4.attn_output.weight": "d98e4a062d2631d9c315f1990d5f6ca9a88e7e0e46387f611ccb0353f876aa12", + "blk.4.attn_q.weight": "1a11a55a91d9f748a72176ff6b1c174844df406e00d1b66b9aa64dc6ee4bcd1d", + "blk.4.attn_v.weight": "04cb3c02b12a6313c7ac7044513441083d534fb4c5a3f63bbaa58f7edbd2fadb", + "blk.5.ffn_gate_inp.weight": "cbd5cdf015d33a2da6703eb74c22fcb97581fb9175435173b6dc4f9e8364320d", + "blk.5.attn_k.weight": "4fdf3405e4d657403f5647b51233521310ee984b4b81bbcd901cb3e6ab76b7ff", + "blk.5.attn_output.weight": "4a25662c46979a29600ed77e1907cf81fb16ef30e724c155444e54ccb76af481", + "blk.5.attn_q.weight": "e2acb30e30b97300039bb20ad0878f05159d5657fa811748a51d5b6fb35d631e", + "blk.5.attn_v.weight": "306504b6a26aa123c63dbbed3f4ced0ed2ee8fb6a30bf0093539b817539f5ece", + "blk.5.ffn_gate_exps.weight": "7e34df9b9944dbeea5e8565786d3aa6937314a4b87acd4d0874687877c5a39fd", + "blk.5.ffn_down_exps.weight": "c4b7a57a42b5ac0a8ae27dcd5cb2646d7a7cc7123126d44a56ab128e85f60b13", + "blk.5.ffn_up_exps.weight": "09d47593b6dd6c664a9155bff02fc2eb7ac4a70219a88162d05c802a01d3c6ba", + "blk.5.attn_norm.weight": "58804a036d6ac4c1fe357b8b6a97a5c37cae1c2f06ee0086c041d449c1c6ef6a", + "blk.5.ffn_norm.weight": "d872dee6789f0826211aa46ca9d0869e3e96bcace9e77d6559a7b6f3e524f3ca", + "blk.6.ffn_gate_inp.weight": "fb1eae732e974d6c1d020a5b4ef98c5f33016f984701bcea656f999a99daad66", + "blk.6.attn_k.weight": "55e9c59c5051ab5519b3a7962e1b5fa96a3c0251cb6200dc2f177885ad2de470", + "blk.6.attn_output.weight": "f3c834a8d0027370350e2b6294d95434d31432e57be6313b013c15a56303d61c", + "blk.6.attn_q.weight": "efaefe5f11c2140dc7cb532b0832c2a0b363a165cbda21f00fadae77efca377b", + "blk.6.attn_v.weight": "900bd734d75616d846a90a121c97e081c956a3d1ab012f66dd0bc62c43e1ec3c", + "blk.6.ffn_gate_exps.weight": "312a99661b1468fcaed2474621116f1681432755e973f3ee79d01912974fd424", + "blk.6.ffn_down_exps.weight": "ac9cd7db67a2ef0d2b5def86873673d05e48d49d147dd944469dbb8e2d4c46f6", + "blk.6.ffn_up_exps.weight": "57613e7e09579400a1a09fee4445acfbfe83f2f327fdf317877787d96ada6b84", + "blk.6.attn_norm.weight": "0e8801e09885c633bc01a9a5b85d4e878d30158a4eb41a937dc5b760ebd044cb", + "blk.6.ffn_norm.weight": "b8c58062ac93072f878446b0e7f958c737aa47fb769fc3a8f593133d12db2dd1", + "blk.7.ffn_gate_exps.weight": "1ef611732ff13edfa8d30981ed9dac00c15ceba9fc012ed0b199e9280a849948", + "blk.7.ffn_down_exps.weight": "856c6811945c7b0fa461ca17811cfa43436b4cdf5326bad23cbc30883486d7cc", + "blk.7.ffn_up_exps.weight": "6725e3e33994302ee13fa5ec163631ce2dcaa08aadde8fc166c2265d4561c5c5", + "blk.7.ffn_gate_inp.weight": "36b49d7f80c1003dc392b2c1b9960cd49889dd69e77b26b9e4b13d01f3d0a32a", + "blk.7.attn_norm.weight": "7a0ec49acc5e20ee71c6f80ca02f4f1e564c485e0ae0621309e7c2eb0c616cf0", + "blk.7.ffn_norm.weight": "eeae035c39ab6e64bc06a4baa1bf6e50d4c8b8797cb0ad8abd48be86974802c0", + "blk.7.attn_k.weight": "e8f78c1def01a7a38d2d9bf7becb17755e28fefe4927856f7890fbee52840187", + "blk.7.attn_output.weight": "5367f05ac3bb49ef8745ba5902e1bdd4442415a3ebff2c7e1a3918d7be6fe948", + "blk.7.attn_q.weight": "37c95fc5acc55a4f6e5f02cab9be60e4fe54c08b65f98f4455741b4aa542ff4e", + "blk.7.attn_v.weight": "c89f1343486ba55814233511e94090f7365662a8a4214aa4c278cdadc79196c2", + "blk.8.ffn_gate_inp.weight": "4e239afe8c7afb8de3a005757c887cf14b1622ca2d224227591cb0e5301f4c17", + "blk.8.attn_k.weight": "2ad0229f30fdcc1e85ce64e00d8f75902238294844a81d5af43e14ba75c02983", + "blk.8.attn_output.weight": "2e44a4722acb3b521b81d0b910f8ca2f6c286d874a92ddd02150566454061699", + "blk.8.attn_q.weight": "1cd2b09cb2f43e08de776b5f7eac197a5a6d4ffdfd52b21baa36319450147bd0", + "blk.8.attn_v.weight": "5a22c57ebfd33ac500cbcfd321d5b5b1783f8728801db6f3f8bed51c7183e4db", + "blk.8.ffn_gate_exps.weight": "91063fe56cb4f3ff3b41052bb5046fcf8ef61516a603ee90aab893a9d68c15a7", + "blk.8.ffn_down_exps.weight": "d4c3abc8f1d1b462f67f70bd8f404b3fcf45dceeaa8527fa120527254c383c90", + "blk.8.ffn_up_exps.weight": "76a1a1f08ec577716a2e7027b45293e9205751126424f1bebe1de89c78f087d5", + "blk.8.attn_norm.weight": "f980d774da39eb76c52358afac3e38cb4c81cb323deaabbe5c41822e3f17a98e", + "blk.8.ffn_norm.weight": "1c937658cf90f1a85db9a5f26e077730fdd4b694607dbeeb825c5fb2bc407e0b", + "blk.9.ffn_gate_exps.weight": "a2532471ecb7896d5c78e5a34e10cfaf4125265e1595166c8d0d0dfbe2a3187f", + "blk.9.ffn_down_exps.weight": "b47921a28412d48fee450b8b9d97cee42344a2e69f06d407fd9523d7adf13333", + "blk.9.ffn_up_exps.weight": "7c461bd1b2a73b439cff6a10d94afa01e8b06f7e6f09d9a6f28e3876aef48bce", + "blk.9.ffn_gate_inp.weight": "1648dfb08b5c06d7953a5a97ecb764995fae9487fb729a1c867023b2538149d0", + "blk.9.attn_norm.weight": "8635db0f299882a63b7cfcd1d4259c9e53fab22c31d3d054de36b1001380b31b", + "blk.9.ffn_norm.weight": "f9309aa323062d174c463613afef9b0a33501b510bfaa58a8e0e866d12ffef3c", + "blk.9.attn_k.weight": "dfe62030441e947a588512d18d9c6e4ed72c2f71c227d622c095e4263b23dadf", + "blk.9.attn_output.weight": "1977beb75c6349c50ba7dd3865d7c0a9c5c5ddc854413147b0eec98ac4fda351", + "blk.9.attn_q.weight": "eb132596719605cd6bd1782487f121994629e115190edd69240b12af66e734f5", + "blk.9.attn_v.weight": "9e708f15d332d7c5187b0693b1a977eb30a2fa10bf7df48ed9d7537c0aa6ed99", + "blk.10.ffn_gate_inp.weight": "97503a5d166c1925f9b65c0eed980753d411714d66896f3d0fad5286c7aba702", + "blk.10.attn_k.weight": "1ebdd222336bd25b48df1b138cdbe09021c4a5562ea7cb78cadd1255d2be3a39", + "blk.10.attn_output.weight": "5e98faa38e9d514b9057e1c8342c509cbe1083defd518e506f6bad89117d1f5a", + "blk.10.attn_q.weight": "3323a26c87d936d1dd87c577d0b763459fced726679612c874b3de5fc6d969c5", + "blk.10.attn_v.weight": "d5fa73cb56aca388e205f44455e4b4f676fdc12ed7fac4542fbb3b41ecea59ad", + "blk.10.ffn_gate_exps.weight": "225021b53782800906cd13b70be3a4161e8b300b97f984a959ccad6a6e8adcbd", + "blk.10.ffn_down_exps.weight": "f08eb91526bd22f5fd0402fe925d6141cdbb308a1ced0330858d0c85c71f5ef3", + "blk.10.ffn_up_exps.weight": "a9f688350c3b53eaada5103b5848bd9a3d7d6b327a70fa16c24bf28ece933eac", + "blk.10.attn_norm.weight": "5ba426c9dfc79805015ccd76cd1068b0ad3bb7a8453e14bb1d35486f122d8f95", + "blk.10.ffn_norm.weight": "98891d6acbc3986b2581b7a3af9f5946a392d9188972c6a8b15d4e745a4f2482", + "blk.11.ffn_gate_inp.weight": "b2365a60566e7dace892e1cb0e62eb73ce387352601723e847052b34874feaa6", + "blk.11.attn_k.weight": "0efbc1d1430505543ff71532a4fcda821aeac616ef6c1dca40e00d4f2ff70bea", + "blk.11.attn_output.weight": "3d5bd4d9a41236f30d4293edb9ae27beaa113ffb31b4fbfadff3a4c370dfd3e6", + "blk.11.attn_q.weight": "aa11e9db14dd9c77951511443077c2a1a78070753d7bd3d9811038473f69e325", + "blk.11.attn_v.weight": "5adc567f377aa11d1763d35f50e53fb2896a8b03b623ac36acc45efa2486d512", + "blk.11.ffn_gate_exps.weight": "71d07d982aabfab9eed3c733d49c20f023bf475368fc71db5084d91beadc4b47", + "blk.11.ffn_down_exps.weight": "9a06e61461e48b3925a9f7d9cca634d048c8b62163d7bc5c43e35899f959319e", + "blk.11.ffn_up_exps.weight": "bc05494d0dcec61021b3ac0c5bc1bf502736cadf48224e213bc139d562699a89", + "blk.11.attn_norm.weight": "a5758a10bdd0404ae1470e8e9db903985d4d07f60553c5001a5e7b660d4f7ada", + "blk.11.ffn_norm.weight": "814ae037563aad3771787316bec4806c95bf6f5991dd6474b4b1e5cc13dc18ee", + "blk.12.ffn_gate_exps.weight": "3a68b831ba1606fb9ef6dffed4732032447ecef23ea563ff4e79317586c7eb49", + "blk.12.ffn_down_exps.weight": "268b25e13f4b7beab08686e83705a41b21d15251809ee4784526f78a580da829", + "blk.12.ffn_up_exps.weight": "9105751a5b5b42ca2614d0456f24f779d2e2ac8cdff0f96842aa7ae2b70f341e", + "blk.12.ffn_gate_inp.weight": "d0de1558cc1d458c5c504f63ddc59785c323df7330474bb0644c346104b40a3a", + "blk.12.attn_norm.weight": "859a4c8113678e2e202d10299850e0cfb52eb11ea50bcbf4fe3ff39bdd394154", + "blk.12.ffn_norm.weight": "7fbf4c459c1760218877e9ee3f5ad49e960956a4369bcfe96c143f04ff9ddf97", + "blk.12.attn_k.weight": "0a7e254fdf3730a57372b6ff421a613eabaea68cdefd64800857941411318374", + "blk.12.attn_output.weight": "ceb763fc15d88af149d8fb78e82db2b7dab3aeae584af8cf7611a12356a397e5", + "blk.12.attn_q.weight": "a43402d23c46cb2d3cb3c2a98c81b19d10026b7e6742370fed6b2880b6e049b5", + "blk.12.attn_v.weight": "3bc24f2c0480ce91ef72993ee8f1cf962f7359e12183424583ffa1246bf3db52", + "blk.13.ffn_gate_inp.weight": "a6d68c82bfe66d8bab68f980f5f18268a9e2c0cd6b8832ed39010e0de198ae05", + "blk.13.attn_k.weight": "0166c39546b37dc2e01b2b396ba43e183f797dd04eaa51a6d103d8b58ee4bace", + "blk.13.attn_output.weight": "2ce5eb198deab9557475a58b69b11e9874b547e05c23f223c6e42fa35ddca069", + "blk.13.attn_q.weight": "745c1bbdf434284a7fae98f45e821c076dd9c2a2467dba6a9d8cf0041e419dbc", + "blk.13.attn_v.weight": "9ece68d5ac64d1421ea7aa32e1cff9cc1fecf5175f4c4da858dd31d8633e3337", + "blk.13.ffn_gate_exps.weight": "ccfdcb4670b131689de12d396a010b5ea737795cf5c15a14a304d720b3c7c899", + "blk.13.ffn_down_exps.weight": "8b8fb328664764f1aaa5cbdec336d5654e981e965a02ef622bde5f07ea1c164d", + "blk.13.ffn_up_exps.weight": "d2ace0236c2fb3365fdc85499d676a7f65813c48e5085348b1df1799922766ec", + "blk.13.attn_norm.weight": "1ed29d7d89ce52d7cb4d57e895ff7115430466e917136c049c385c030ed44e9c", + "blk.13.ffn_norm.weight": "a194fc542597a4dcfdfaec5e3cba2a2b2b21b21edfc87c39c0d7f7651355bc4d", + "blk.14.ffn_gate_exps.weight": "a625e3574e5e740e7f8e2f9c40390f2f382c720aab5b10534e298002dd8d1fb9", + "blk.14.ffn_down_exps.weight": "bc366f015b83c865946afd74c8a884943e0ea2c671314a0b7bb72f21a44d2f78", + "blk.14.ffn_up_exps.weight": "ee3199bf2086de77b49f57f487676be8ee70e102a2fb5a5ef8ddbbc28a9eff41", + "blk.14.ffn_gate_inp.weight": "2b437870c850fa2e2044d032bb02908af634356e37466fdae260b933e48ee8b4", + "blk.14.attn_norm.weight": "cd8344d193a1cbd42bd898e17f4bcb1ca0b2918420fbdafa9249a6f2b7f4ae06", + "blk.14.ffn_norm.weight": "70eec40374e558fed5b07257283cf36342b6b0129285a00007deb59c32c9f7c8", + "blk.14.attn_k.weight": "4053bdb507e0543d724b632570bac86b31707696d90a0db44c49b2a082e0d599", + "blk.14.attn_output.weight": "0182632cb0e06a07241b8293d25d109fbc1862e1e337d435f908e8681e2eb1ab", + "blk.14.attn_q.weight": "ffc7794a4c1b6f793c842dba969435330a7a80b9212e457b4b2ac33e68b41241", + "blk.14.attn_v.weight": "6411805292d528e61bbaad8f9aab9dd073529a17946c057fb06864fad9cf3211", + "blk.15.ffn_gate_inp.weight": "77d0744567c76e6abb67f81ba9c715b2b544841186d5b948309571eff213bafb", + "blk.15.attn_k.weight": "1f7957954ea4c6521c257b35a360e868ffa02bdb3de91f146d5e06bb4a545c98", + "blk.15.attn_output.weight": "d7809d36bd8d3342240c46fd87bcc7f9821a222f48d9a95e45ae50460265d3cf", + "blk.15.attn_q.weight": "25f509313ae4d8401b871904059f472a26f5714e7c791c725de77a1a522c976e", + "blk.15.attn_v.weight": "96fedf5a591fc0f020e6de10fd72ff12b3ef9cf70cd21dabaa0d3e7b06f54e73", + "blk.15.ffn_gate_exps.weight": "8f950d976b2fd9a3d213b84123cf114c1377efde9352767fb2ddee89e177c8ef", + "blk.15.ffn_down_exps.weight": "6fd09d1557bb94b06efbd4f6a1ca4be532a202ba290e9315bc8da3d12a5c4c4a", + "blk.15.ffn_up_exps.weight": "cbeb59ae7b0266a928dc7e3a6e70a9330b92f9ee1b17ee1ed91022108204a33c", + "blk.15.attn_norm.weight": "2005330911ac2edc7b6d27aca021c67d30d16eb632e49b1a13f30fdb2717aed0", + "blk.15.ffn_norm.weight": "0e9198f3b548eb78acc8961f2b3350d238d26cec110933ba753a8cf0035c501c", + "blk.16.ffn_gate_inp.weight": "a41d1f99d739c8b150c3945b6949763988d0c6a4c5a2b5855592ca1a48ed23d5", + "blk.16.attn_k.weight": "b624e2ec88c2d3047f60530fb87e72cb4a5e655a9663f6f3e9b09e5ad32cddaa", + "blk.16.attn_output.weight": "687759ea75e45108526ffc1573d6fdf084728079bfc2dc89b9979e76280f43c4", + "blk.16.attn_q.weight": "beff3a45c7e9ec82ffc6d3c701126be28654d10aabd747d03441210491fd31b6", + "blk.16.attn_v.weight": "43a349b13f0b9d040cacecd942bcb168c030fef8c75c987d59a4fce6c14e855b", + "blk.16.ffn_gate_exps.weight": "793406d6c13d727c82bb7b692ca98d65ca975baee69fc57be5378d77c5a19b62", + "blk.16.ffn_down_exps.weight": "9bad3dd150d0230404b7f886ac7ff8803225757e813f195cdb26bad245243b4d", + "blk.16.ffn_up_exps.weight": "7449d663023fea3496475bf0a9c1de7272ad0ce9adcb3265e8e424badaa674dc", + "blk.16.attn_norm.weight": "a424ce34c195a401df1ce37ac4f2794e8a6720b1ee8acb21428e2b68c65e0125", + "blk.16.ffn_norm.weight": "405a68bb8e16e1064df2de55ca3cd9ceddda1d9fc0af007a9bd7cad4b2676248", + "blk.17.ffn_gate_exps.weight": "97c6e5321491ca5dc039ee88da0eb0e78f347372785411809af84b3298cb19dd", + "blk.17.ffn_down_exps.weight": "1617ac19788a1be19bac69277408761e6bdf5719d63a8c7fea14d41cc27641b5", + "blk.17.ffn_up_exps.weight": "4ead1c365f112581c10610ea3f63d2a1474311d2503d2060fed4b458ef337f5d", + "blk.17.ffn_gate_inp.weight": "ed4b3393f2523f2b5e0fc7680a1caa2842e605728a529b5af68a7fa8d7abf940", + "blk.17.attn_norm.weight": "beac17ef86a7fb2b5840cc72f7a95a5e3d6bd24e7fa698e0b0ebb9bdac45c561", + "blk.17.ffn_norm.weight": "81cb58ec6d6dc02a0b4ede10adc336dc865fa76f982d4eab0e4a37b40f5b0fac", + "blk.17.attn_k.weight": "eab569e5ea8c8b05e5a6a209fba031129453c2e28181eee3e736b3b04b36bbec", + "blk.17.attn_output.weight": "f85b70f01438ce8fe5d10599b113f30bf18dee2bbae0657d3eba295870001db3", + "blk.17.attn_q.weight": "887ceebfbf6a2b94b43d2df4439ac3a5bbc29311d4b28addc04d525546032047", + "blk.17.attn_v.weight": "2df9414d65014c06a93da22ba3a668be7b83e2e8008e98d7771f7dfebed98298", + "blk.18.ffn_gate_inp.weight": "9b07741a0950fc667e5fd25937e33bc22e1f764f80eb4ff3119f005327ae0f6e", + "blk.18.attn_k.weight": "8649598dbb63938744c39bcda5ce8c31773e29c573be8d4d2c114f5030f8d3e8", + "blk.18.attn_output.weight": "f8e391adb92622298ca834d5d1eda48b69c3b1c51c5a584ef6c54a725c298d75", + "blk.18.attn_q.weight": "84bf8708a2eed618f48f69c178ed7dd11fa4c468102376e72e910ebd037d131f", + "blk.18.attn_v.weight": "31db3cd773f09548c2c1b1eac2718e46364a7810970fe9c433fad9d8de5397eb", + "blk.18.ffn_gate_exps.weight": "be2a2ba378002f1b61f86c273a69eede9b93786d5ce96b4fee1861f730dca4c4", + "blk.18.ffn_down_exps.weight": "d35196159e37705db50a5343e3989f7335477f1a4add67ef42ad64a638cd07ae", + "blk.18.ffn_up_exps.weight": "c6ceedd86e97913a6dcadc838e7abb762d629fb8dd55f15cf02fd9bd66d2ba78", + "blk.18.attn_norm.weight": "41f0b1ad83d6e3cb9fbe0d27878c2e7ad4a351b9f554a6bc9117c01745cdf6e5", + "blk.18.ffn_norm.weight": "96646204bd0d82f25dc77faba4dbd86b1332e449313e6684e00122da8be99057", + "blk.19.ffn_gate_exps.weight": "c6eb7f61e7938bda0492dbc05e51e8f631c99224fe18e99861fc4fc53ba9e9ff", + "blk.19.ffn_down_exps.weight": "4384803da3a3a3d44120d7dd192fe2c9bbd9a1a0cb492dbec1fdd7565230f1e8", + "blk.19.ffn_up_exps.weight": "22d73de2fbb8bb0f1bd2caf17fad8a355c47d914143f7f6e6d0128f66f074a60", + "blk.19.ffn_gate_inp.weight": "9a0cc4a2301a5634022fbce41189021bf0d1a961792d2d9330fd35556d18e5bd", + "blk.19.attn_norm.weight": "c5cc56ec5df9a1f7d5ad71fbda49f1433132e58895d45cb44c73420bd61ebd6b", + "blk.19.ffn_norm.weight": "77e17de741742ef2482fc7872fd423c8e3c1454dc4d2be89ee939084b6d78bc0", + "blk.19.attn_k.weight": "a92ea36ce2e3569656306aeefb835ccd5d1b03b33a86e0d3d030644cc923b813", + "blk.19.attn_output.weight": "5e2a912b37855f84ea964907a1a86d609cbdd79efa0c93c3e8e2fc07caf7c226", + "blk.19.attn_q.weight": "4ef3a5913292ac3c1a6fd3e9e53d011021f2b41d0276cf849706d1ca925cf7a7", + "blk.19.attn_v.weight": "42981b75b68ae852cee638b5433605c147da4392aaa6d7a06e756115b0171f39", + "blk.20.ffn_gate_inp.weight": "71381b9879a7c80b9f7b475abc0aa31b8cd71ccc00856ebe89764a2acb9df2dc", + "blk.20.attn_k.weight": "1928b7ebc054eb3967929ed6fb446314d5352f4aaf8b475ce55c6345019f2ea4", + "blk.20.attn_output.weight": "6071ecd9ca91af0d2ba93fef4a1a56f3b243dd70f862a21a2d164d56f386043b", + "blk.20.attn_q.weight": "002e95042a40f36ceed5829e3d0c8072e5f5e4ee86a089e2902b2348fed24dd5", + "blk.20.attn_v.weight": "42f509cdb1c0e298f89f896e349be86952c5168e49b3f83bb17badbcb7596d57", + "blk.20.ffn_gate_exps.weight": "a684a3ffe4b0a57c819a5fa9cb3521de223f392732927271e97ce925b6e33765", + "blk.20.ffn_down_exps.weight": "e3081a7bc7ba750d8a4886bc8ca4f231b55db4ca082b54b4106c7531964725cb", + "blk.20.ffn_up_exps.weight": "fad0fd5eca36ab154788da28be8ec25bb5d6db06c9d133db89e96df358a2f6a2", + "blk.20.attn_norm.weight": "c3e3f2429715ae95e884ef1246b0b461b23c5cc0ed08beecf70a14cddd184820", + "blk.20.ffn_norm.weight": "ff31f609dda65ca496b0584fabea6550e42edd05ebf229812aa6b7bb5ede15e6", + "blk.21.ffn_gate_exps.weight": "366f09ef0ecfb86808eb3296cc9abdb957951d27f6533c03f1422b54061da660", + "blk.21.ffn_down_exps.weight": "3fc495947d27fcca7fc0893c8a96e5d48ba27b2c8c58f8fcfb8dcfcd5539741c", + "blk.21.ffn_up_exps.weight": "6713ed51410bcc8283cbb001c4ad784098f25701e8021f4fa4f411e186859c4a", + "blk.21.ffn_gate_inp.weight": "6d4c92c01ec801647134d907bf1108878156df266a6107abc10526332b328b93", + "blk.21.attn_norm.weight": "27605719ae2df24f4f2e85a730927cab20367631612cb501631f6bbf38eb1209", + "blk.21.ffn_norm.weight": "ca80ee8177db185b15a4a378c1cb6f7143c76546a7f1726bda23f329323d4ffa", + "blk.21.attn_k.weight": "9e49f743d4a5bda9b4bd9c40c2ca37cdae5aec7e54cb193897ac8b4945ada14d", + "blk.21.attn_output.weight": "ab923540879753feaed152f5950f69cdd83d8f2413ca873f5f038b63ab0aea12", + "blk.21.attn_q.weight": "62617fc3f1c9d2aa672a4d91a121c7a91b92d145b65e75f0b06b4bb7c825dc36", + "blk.21.attn_v.weight": "15f8b2e72f8e8e992f2f6b3e93238a9d7be7bd6136f91c9d04b4b4cd0cd60369", + "blk.22.ffn_gate_inp.weight": "3ddb1773d9257b68add7a2a4e94dad25ed926803e02707863dd742ab9b2dc179", + "blk.22.attn_k.weight": "680e45a9e8d5feddee5266e119dc053bf80718fa9af1cf6803e6f493b265f1eb", + "blk.22.attn_output.weight": "0d5fae3402fb2c5aa3a860010e3973fc8e3168d1015f7a76b7b2964681693206", + "blk.22.attn_q.weight": "eee7e3d426ab533bd18d62c9aa142eedbde394bed07db58313e0fccc82a23237", + "blk.22.attn_v.weight": "26b5be1fe3c2b6824c5a648a3e4bdf17691904526fca158fbc3ebb627b67e2f4", + "blk.22.ffn_gate_exps.weight": "32ab7a7735313d60f6a75229b1aeee940b6aee176c9648536bf5921b0dc2929a", + "blk.22.ffn_down_exps.weight": "67590808f6a67777d3eb7976c31fe616d388b98fecbb12253b72d1241d70753f", + "blk.22.ffn_up_exps.weight": "fc245c0183e6d90829ff5e71a4ec93e4860b3d4c1a17b9dda2fb64f5f5c9ed32", + "blk.22.attn_norm.weight": "128e99d206d4d6724758ec97468af767fa0aea592149c324b731659c1e74a1a8", + "blk.22.ffn_norm.weight": "e45f498033f0cffa15da0eff2c47b4472e43fcf8921729fc4eeb2e3a6b3c78e2", + "blk.23.ffn_gate_inp.weight": "d63e686f5325fbc89fa242c2c52a3b8ff54f867dca914c9ae6eea13e9d6f46e5", + "blk.23.attn_k.weight": "f71f5a577f46ea12b1818f3a5ff4b85ddc45f9a2afb0fa2e041d71a3e31c6779", + "blk.23.attn_output.weight": "92b13563c1e0eac0d748fb67b235dfd7a64c8f16e2dafb316885744582e23b4b", + "blk.23.attn_q.weight": "2f9b9c35dc4f912f3f51c06e2d68f417b51a0de0a84aac530a64f9d3d7b0a2dd", + "blk.23.attn_v.weight": "268e40813806e74a5c364b19556d087bf8374e76e7b6fcf55c381eb7da13ccd1", + "blk.23.ffn_gate_exps.weight": "12f857e7a7ce228afac34d99b602c8d6fe96984f2a21118f459a58cb767ee65e", + "blk.23.ffn_down_exps.weight": "cdb082c16599c3bb36a28066dcc122d9529b54fa91b6cf0153437ec960a5e16d", + "blk.23.ffn_up_exps.weight": "f4b99f6f44d7b8b5a305894e88633bf5938fc1f6303a2b2092399da9c8b64d7c", + "blk.23.attn_norm.weight": "a691392210383915916b4d3886d5e4d56e7855e27e37e414fbd73bf66b3712e6", + "blk.23.ffn_norm.weight": "0c3dc72f667e5ae19b69bfa9f2bd2a01a57681f89ef9527bad4eb0d8c7b70da8", + "blk.24.ffn_gate_exps.weight": "86baca2a3157994df7fd8ced5e08436d5c1810dc29c0715637c36de723e0e7d1", + "blk.24.ffn_down_exps.weight": "ac5d559562b35c34993e34b071f66d15c65be5907797078c2d2a49aba54e3192", + "blk.24.ffn_up_exps.weight": "fce0a099cf09777f44fbab3606ceb75f7fae6f0b80725f9e871654b8cdf9262a", + "blk.24.ffn_gate_inp.weight": "e7c6800c0cfc56b565b2d35ad6f1dbfdb70dd0b05b338bc8da2286ffc3678d79", + "blk.24.attn_norm.weight": "dc6cc18ec52d102d015153c4a1132f9d7a504e29cbdec81c5edbf3b9e65815e1", + "blk.24.ffn_norm.weight": "480d5a1397af5e0e657f1e67d20ec0cdef5724e71246a326843321b87ffabd33", + "blk.24.attn_k.weight": "338c0597954a9b95a782545b2fe36469553e73f86ae2d2b5697767b28e1c7daa", + "blk.24.attn_output.weight": "a77d23b79933c67e52f1eef7f83a3dff4f767ce0bbcc39572f8cec4acd457643", + "blk.24.attn_q.weight": "45c9478593002be1998e96e70668aafa2dd3972380fbc1df12fb05c24ba959e0", + "blk.24.attn_v.weight": "515729420885408a6a9614bc27cda393ed907521318d14d21335d39a3eff0b61", + "blk.25.ffn_gate_inp.weight": "aae4ac40e9ab3925241f9d784b54b38851d9bc999a6c3bc03fc3f17c9b28a67c", + "blk.25.attn_k.weight": "4ab4808d02396c35b00b426f536015673b71c17ae6cd55bbc2e6bfe7a4c59d0c", + "blk.25.attn_output.weight": "1990bb982b77e0c947cd1a8ef0b36227ee1259e6dbbc2829e5c136edf88675eb", + "blk.25.attn_q.weight": "a1490f3048e8c0ec8784f8550c43adf5cc8d0f2f90131c934713fe4b1b015bd7", + "blk.25.attn_v.weight": "f15e53c6d45b3b6f58808fa968425d65e0b26b7f9b268127a77abb1227c67431", + "blk.25.ffn_gate_exps.weight": "656662447ff54f56ee80f78a1b9483f7efdc40f7375d0cd8a9c72ccf21f77e7b", + "blk.25.ffn_down_exps.weight": "db06f101bccbaef19cced0f6c185166e18202465f4a42cddfd535fbe5cbabb4a", + "blk.25.ffn_up_exps.weight": "584a7b02456f27fe1d8d3c7ccd21d426b6ea887795a3ed77f704596a1e3841d7", + "blk.25.attn_norm.weight": "8f0f3597982930fd237e9d609776c64f2b909a455b21678f83a7ebd4bbb83e64", + "blk.25.ffn_norm.weight": "3e7079c32582afba0c55e032f254adc18d2997705eec860185e9a6dd3d82f07e", + "blk.26.ffn_gate_exps.weight": "e70341691b583b86489812b29b77aa41eb658b1865733d6118da54c66e3bfcc6", + "blk.26.ffn_down_exps.weight": "5c1b812d11dfb064af816ced5ab6463bf9722eefdfc341b8a93705d5038fd781", + "blk.26.ffn_up_exps.weight": "e18118362ae54ef7432781c83884f9fb230a9d934e342aabeda8822ea5f71fb6", + "blk.26.ffn_gate_inp.weight": "cd1c5f6710166b9567c6b74c97b2348b191c60aa860958c6bc264ab095261dff", + "blk.26.attn_norm.weight": "71d087531af2520bda2e676c489e8529cef5db8aeea1eec0a937a8b4f2fa2e54", + "blk.26.ffn_norm.weight": "7f704e936fda28eb5c2cc339f0f6a5f78170b5aa43c01265b21668870d819c82", + "blk.26.attn_k.weight": "1cc62a0ce0ae251275d898c52c4a9fba5995fca10955d2011d10dd1a59e1afb8", + "blk.26.attn_output.weight": "636e881b1505f9cef656a4be98bec6a4765321d51f9bf1dac8933397cf44b765", + "blk.26.attn_q.weight": "89a3c4d202d7d6adebb9e0c1bcfd8b775f6456386f1be25e86e43acc949c1e16", + "blk.26.attn_v.weight": "ff2cc963b597cdf1a21703f3e7022af3bb4c65a34a19e19d9309a7c5e198b5bd", + "blk.27.ffn_gate_inp.weight": "6150139498fefe380bb99d11e72028da47a15ecb73dfc5b2774f726f4bed8f9e", + "blk.27.attn_k.weight": "f286eb9e5c56c7b801a497aedc40158c2a27877d7f9fb59b3fc67834798902d2", + "blk.27.attn_output.weight": "5dc3d3a05f9f7729509147fd09c16fb53f85f520cdab5cb69abf4bae3fd460c7", + "blk.27.attn_q.weight": "8462e40f86b24251960d6f35a9ea99b8793a01937faf1aec2859f2e5395dbb61", + "blk.27.attn_v.weight": "bac1a99e38e25953f8315f7212eb9777dc216cadb09b959977885ae62724ceca", + "blk.27.ffn_gate_exps.weight": "6a15eca7f0f6ecfd93db2e55c63875348ec4a78c4ff643ec46df9e958c0101e4", + "blk.27.ffn_down_exps.weight": "2e1c91247c4359e2073a8e5f26fd7f6426da7be3ed5bc65dcfff701f0a5022b2", + "blk.27.ffn_up_exps.weight": "65d6f5c553c9332085eae4aeadf25090b5d7768212ea7b08ed698102c21b29a1", + "blk.27.attn_norm.weight": "7fab8ae63ec8e91ce625cd130ab96d8427dad3a7413bb21b25ec5f408c5b9f5a", + "blk.27.ffn_norm.weight": "532720546b0fdcd423a02ca6e3e9d8aacb84b1b3e8269968f88a47fe2a69bab4", + "blk.28.ffn_gate_inp.weight": "a305ea58d98962d9dcf0c53ad2389b7acc8936fb35a0e3fc9410e7767cd49dea", + "blk.28.attn_k.weight": "8315e8a2e4f78dfdf36d4fc18fffc74bc95fe42c3ae4f9af2b6c874612c0f71b", + "blk.28.attn_output.weight": "9b5fdedd32d39ef46a22cca7cd5355d7b93bd07ea305f466a8aad6ca5a4f3778", + "blk.28.attn_q.weight": "4e8fb96997c30e231c437130f410d7c91d541a816f6c568b5f3bfdb4b8dece74", + "blk.28.attn_v.weight": "1fec739cf3bd7b4913f72ca358d4cf31391c304de44ac0ae31ecb825beaa7cfd", + "blk.28.ffn_gate_exps.weight": "9f259789d535e09268266b9a8020f32d6a6779966c909d91d3a10574f06238a2", + "blk.28.ffn_down_exps.weight": "516d3f8abaedb01b9916a4b67d4672159769138ef2850158bc1b32c41e31f0e8", + "blk.28.ffn_up_exps.weight": "f2f1d88d2c31ed588806fb5ad981d68f5134d7284c4fc022fd018de2eef437fc", + "blk.28.attn_norm.weight": "960fd005598deadaebd969996f4367a9dbfad90539a863674fe95730935acc64", + "blk.28.ffn_norm.weight": "e1993b37ced93d4049e9af2c47b0d9207d8f7e6f2cc3a52f57bef30bc806d805", + "blk.29.ffn_gate_exps.weight": "58927146338f443513337476b3cd30e6341742f096c2beb5890d400f10121298", + "blk.29.ffn_down_exps.weight": "03a3386e4f0b75a28c5608e23b2de8f0de25f21954e4aa7fc343431bde9db07e", + "blk.29.ffn_up_exps.weight": "6916b7490a7ae7b04a5d81cc1e7ac9b20c483434f3b186b12d87fe176bf1567b", + "blk.29.ffn_gate_inp.weight": "98e710e467a3d567abe4ce29d78b8e8dc033148762290c0c5e1ae4d78efd8c78", + "blk.29.attn_norm.weight": "4e64cb307d37be20d55f38c94faf7e451d11df5e60df347906cbaf9c5441be71", + "blk.29.ffn_norm.weight": "696c23a52f742679bd44440d687a4c44b4302d57f1e9dc5610d23374336187e7", + "blk.29.attn_k.weight": "e85253652fd6120c623634ba66b725bf7cd491318b54ccdad2c7df8851d64c0a", + "blk.29.attn_output.weight": "4f650a71efb150d1f24cd4d114d4187bf570ac424da3b92ea6455abdf1aea705", + "blk.29.attn_q.weight": "69fa7da901026ebcbbbc848455b425458b7e3295007d7fc093acf4b38e2166ea", + "blk.29.attn_v.weight": "17e2e7590b317b21f106de546aafd955579703d1e95d6aea044ee72ec3a514c9", + "blk.30.ffn_gate_inp.weight": "3a03284b4aa60d59d4a2ec86253469b61fc656372afca427cb77a5332fbcc62c", + "blk.30.attn_k.weight": "d518cfd0db9708e769eb1399e87ee49357dc54d5afdbac3d4c0ca46c64e789eb", + "blk.30.attn_output.weight": "9b44378714d784c5ef9ab604359091baca4e0ec222afa139b7f840eaefb371fd", + "blk.30.attn_q.weight": "cbb95365bbfbcad0c9cd99b4eebb5a5d32de68ce08e4063b5ec3e792b7548044", + "blk.30.attn_v.weight": "e7985c04fe1740e35a9598f43b67b0922b4fc2d00b68a92a9f917b82c3248de1", + "blk.30.ffn_gate_exps.weight": "8ac4bbd07935d98f895ba94dc174e5ad5046c3c222b53729d60f987c05e7eb70", + "blk.30.ffn_down_exps.weight": "dd672cc71e82abf05064a18121b8e55fe1a4f19bc1d7cb9a142f4add54bc336e", + "blk.30.ffn_up_exps.weight": "12282f664a2a12aa25e2deac58946108715ebb978bafed5274cef24569107646", + "blk.30.attn_norm.weight": "1a33458fee054c6c9c896a4bb0a4e1fbfa0293b2408c7dd2b81d692e966e7273", + "blk.30.ffn_norm.weight": "311e33b68051f507f1478ed8f2693fddb846170ddb7285a91be43f795c2ce31e", + "blk.31.ffn_gate_exps.weight": "8af43d9867a51cd8392fb48b981b0ceee0ae979c491c07d711b3b56b5162c786", + "blk.31.ffn_down_exps.weight": "5579cb7758c1600b19d1f540deffe081b575962e37437b3b2efb2fb0a2924e40", + "blk.31.ffn_up_exps.weight": "f2e7c005276b3a001fb40753f027fa10b4d5a346f43cf4b4bbdeec6e74e1cf6a", + "blk.31.ffn_gate_inp.weight": "89885dc0e30b6b16a90c0331d7fa3174671e941364e8102d934f02132237e61b", + "blk.31.attn_norm.weight": "99e4e9bf86a9edf8c404153a7e8a82324ba79da462622196e2faba161bd95172", + "blk.31.ffn_norm.weight": "55335997cf6de781bf332b943de96ff4646966b05d9fee86b76ea897e27b6ca7", + "blk.31.attn_k.weight": "cee570762b78da6316b637892cc4b080e40f57af5551ffb1866b9a8e80e96628", + "blk.31.attn_output.weight": "fa321ff55ec7819ead7b819fd45215262f39744569765ba2113c989c03588802", + "blk.31.attn_q.weight": "9e2c409b878f8a2a1436874abf428fceb1c534b21f9ad4dd6f532b8a469007f0", + "blk.31.attn_v.weight": "a845d0be68ba537b4a775bfba4d897faf7c82a811a2612b0b7420cc4f3574cb8", + "output.weight": "16101cbb74b54cda9ebc07ca3c762e3263a56efb3cc011156184b95807d7cf13", + "output_norm.weight": "d7aa61585baedd60157aafe157930785742c55989c288573566a971b02423564" +} diff --git a/convert/testdata/Phi-3-mini-128k-instruct.json b/convert/testdata/Phi-3-mini-128k-instruct.json new file mode 100644 index 000000000..19296f5a9 --- /dev/null +++ b/convert/testdata/Phi-3-mini-128k-instruct.json @@ -0,0 +1,225 @@ +{ + "general.architecture": "phi3", + "general.file_type": "1", + "general.quantization_version": "2", + "phi3.block_count": "32", + "phi3.context_length": "131072", + "phi3.embedding_length": "3072", + "phi3.feed_forward_length": "8192", + "phi3.rope.scaling.original_context_length": "4096", + "phi3.rope.dimension_count": "96", + "phi3.rope.freq_base": "10000", + "phi3.rope.scaling.attn_factor": "1.1902381", + "phi3.attention.head_count": "32", + "phi3.attention.head_count_kv": "32", + "phi3.attention.layer_norm_rms_epsilon": "1e-05", + "phi3.attention.sliding_window": "262144", + "tokenizer.ggml.model": "llama", + "tokenizer.ggml.pre": "default", + "tokenizer.ggml.add_bos_token": "false", + "tokenizer.ggml.add_eos_token": "false", + "tokenizer.ggml.bos_token_id": "1", + "tokenizer.ggml.eos_token_id": "32000", + "tokenizer.ggml.unknown_token_id": "0", + "tokenizer.ggml.padding_token_id": "32000", + "tokenizer.ggml.scores": "6e37bcde2adc7e350e87c496eddd7a2124329c1dc66c5bf3ad3997253e4f7a62", + "tokenizer.ggml.token_type": "b6ecf55ec64ee67d87750bdb8d757a2c58bf78377e9f4219f5689a6c4dea57ce", + "tokenizer.ggml.tokens": "d168da3ddd3eee820916945fcb9baf24dd3cde42f606cffa2d19e7c8a8743918", + "blk.0.attn_norm.weight": "216aeb2c9e0c271f899e1ef2a63cceeb8f41e97642e84fada54b1d3c1c11cf25", + "blk.0.attn_output.weight": "b597d56f7188ffc1fafc273fadc59d41738cffd677ae98c61a62c3285b3a3099", + "blk.0.attn_qkv.weight": "d28a6b44e13f59be5483e4be2bedb544e346168d720aca27f47d1a5a722be91e", + "blk.0.ffn_down.weight": "4a691370e5a61fcbbf540fbcbf4c0f1d15dec0364528c0e916d0744f6262b63b", + "blk.0.ffn_norm.weight": "0c00af2b4a3128bec64a0cbb1084b042fdbe13d9ad0d03bd577f9449dfead338", + "blk.0.ffn_up.weight": "b32b52f790c1c083bfb8a3126dc1111cfeeb28dc8c584a930a1e5334cb176bf4", + "blk.1.attn_norm.weight": "68748011503c6c029e8e69a84a8e5a89338f378769627b6dbf7f93d715c292e1", + "blk.1.attn_output.weight": "2267344add13b048ca59e4377c86dc512be8046a57156901fa32a20fa74e4ee0", + "blk.1.attn_qkv.weight": "9109d2e3d7a2eacfda5226587b8be124a3bf44b972da7ebb17aa15795897eacc", + "blk.1.ffn_down.weight": "d675df4df4dd039c0c339ad6445d39eddd2004db6bf35bed6314c7497245a633", + "blk.1.ffn_norm.weight": "3b5767ae977bc8baaa06b06efdbea193b6b3ba605ce76d77a76ce317e935500c", + "blk.1.ffn_up.weight": "80dfd6d9d234b00334c89b8e0a02f81899c2efd377321c34ba5ba51a5f61b5ff", + "blk.2.attn_norm.weight": "6a6743b057e5088f145bc179e92c9bfb41163e7295d7b81c62e23dd89d2b59c4", + "blk.2.attn_output.weight": "bc5491ea54e0db81462d7d9b7d25cbdda380c2db8de041bd1c4ab7b76a1d19c3", + "blk.2.attn_qkv.weight": "a61287a9852e2f5aca9c100b471d98398b2913a3497c743de3c70ec9ddd7087f", + "blk.2.ffn_down.weight": "4fddcc382c8dceeab027fe43d8d44e67edb5e8ce4b9a1b7f773c87770380ade1", + "blk.2.ffn_norm.weight": "07e05f82b3f63f711db3b684ca79aed25c0657917e66f88af47348a82065c227", + "blk.2.ffn_up.weight": "4835a682ef1826c12df01ae7663fc45f9c82bc8e64b665f13fb7da8e201ec0fb", + "blk.3.attn_norm.weight": "f22aba7c03999ba7136f39cda747a39715e498699dc1716cd97fc5dfc58d1b1c", + "blk.3.attn_output.weight": "53b579855366fd786c5126b2b30aac4d583ca7bda56833c4865f5cadb5c18c6d", + "blk.3.attn_qkv.weight": "bb56aba78158123140fcea59c69ac562ca208f6d3086819417cdad8c50f333ad", + "blk.3.ffn_down.weight": "97280897a7cd86db2830c004bccc5bc094f50e293baded0189159a2019145a6e", + "blk.3.ffn_norm.weight": "10a8c99f8b57a960e8e0a1133c4a26f9148403d1b9bff2eff114917de996f3b5", + "blk.3.ffn_up.weight": "7324046c915e75d621b2043597a245a428d8eea31869135e6257a861491d8dcc", + "blk.4.attn_norm.weight": "507d8e164de94646edbfe33def8e8fbf7c9a6ee3fbaedb5000f72d9f51ec5e36", + "blk.4.attn_output.weight": "bbb3429e6efa98c150e0fdbf48c16180cbf0d0cbc1b3c253c6c319d78f4593a2", + "blk.4.attn_qkv.weight": "b95ee5be0786d3901273d806c339fe6c20e6bfffd2a20672a9f56af80921e8ab", + "blk.4.ffn_down.weight": "806bbf91df92a5a22bd5aa1ffb7fc2869f7293ffc7704771c290ecc583b27975", + "blk.4.ffn_norm.weight": "cfc2930a81df7aee3a5e7f726a15c1182233e868bf0d9d37f6b6ae6d8c15c234", + "blk.4.ffn_up.weight": "c3390c69533de2c8424e8069323ccc5d0c4543111535da04cf2c7d26745576aa", + "blk.5.attn_norm.weight": "0d71c4fbcefabbd021569442853d2fe90668b19409ae2805a718a829ca60beab", + "blk.5.attn_output.weight": "10ebd93629112bf2df5c30dd0953a4a5e9020306768283181ed426934d47e14f", + "blk.5.attn_qkv.weight": "5cb05633369f12d4b00e0ff787736bd846856682115720ebc6cce05270c334f6", + "blk.5.ffn_down.weight": "e28bcc5094212eafc7476dbc5b7a520d25b79578cbf4229d698e2655956a80ad", + "blk.5.ffn_norm.weight": "b6f2c4cf9f34bb4d59989f96165c14a67dc1e266ad0a6d0fcc49f1add929e6ff", + "blk.5.ffn_up.weight": "0f9ef99423cc07ebedc0e9cfa95809f2d7108d910bb4ef97ebc0b0309c440750", + "blk.6.attn_norm.weight": "b3edcc47a42218234f7564d7470611b49401a41ae8cd42123f86557c69f5d7f2", + "blk.6.attn_output.weight": "eb9b7d257b388bb5b8fe0515e5c6873317239cb94cda236e4b6ada2a6c57c65c", + "blk.6.attn_qkv.weight": "eb968081f478c52f07bd9c2761741e982dba33cc4eeadeea3557d391b9ac2106", + "blk.6.ffn_down.weight": "1b8588bb7463206290322695577dcfced300895d6e6f4b26966c53a9ae2f0f84", + "blk.6.ffn_norm.weight": "1219c04b7770983c77814200eefe743f46d15328ea2b12711e44f8103eab08d3", + "blk.6.ffn_up.weight": "197ef287239fec47c55677f0fbb66eaf0644f775bc382de843971730721394f6", + "blk.7.attn_norm.weight": "b630ad08c80d564ed1c024384818e9fd3f22a36cd7a14aa96e7e2759a8285099", + "blk.7.attn_output.weight": "970255aa750828a47d6b9d399f9612b5bf25aefe7dadbcba41fc416d0d4067c1", + "blk.7.attn_qkv.weight": "ebb157c880293e6de8d629f263ba8853ed1dbdc02c311d43432bb8cfbb310739", + "blk.7.ffn_down.weight": "24bcd4db4cba844c89f878b81843c373dbbc0675e889d32c5b12e63384a7b670", + "blk.7.ffn_norm.weight": "b9c6f71001808ee873ce7db8056e4b53fb4cccec8b7f0f312899b575fae39d39", + "blk.7.ffn_up.weight": "979f1828d227455c26015a2a11afe9dd05f2bb97a8ba6b38c8dab3f50e627401", + "blk.8.attn_norm.weight": "4e8e347e3775010b7112ee630f2f4f2383be7ff64e6ca6154b9b22566552eaa6", + "blk.8.attn_output.weight": "65a44babf44a435a1829945211b3168f9ec78ac3cb7a049a733e93d11f0d6659", + "blk.8.attn_qkv.weight": "343ed07671da400b040812a4058482fa38284b5d9af9becfed07417fe26ce747", + "blk.8.ffn_down.weight": "7fb7e073e3c2c503c4e9d60efa0988fed7398d900cc003695fe3fffd3e188b82", + "blk.8.ffn_norm.weight": "b07c1f655d8593e3892a2cf73f8a0c19ce8e5cb613fafbe7cbd430da8ce4c57d", + "blk.8.ffn_up.weight": "8b26e14de54b3fdc2e2d3ea41720f9d9c236a93688c3b7fd7bf43f5fbb327c9b", + "blk.9.attn_norm.weight": "46394d408a8e316916177e6aa261de32e137a82d729c0b1800b072f0c38c39b6", + "blk.9.attn_output.weight": "d57f3d46107947a7073373a0b35d6ecf7759b5df15406f4a3590a60666af6b16", + "blk.9.attn_qkv.weight": "14bb8ace8c5453148f4b536e9f4279c813f31136716947256f5cca333448639c", + "blk.9.ffn_down.weight": "2b8d98e2b5ed68338f6e4de43bf7de0c4858cc69103cd5177725f7444eec7694", + "blk.9.ffn_norm.weight": "41a499dfd418cc4c6b8c12313f673f7e2cd4a3f9c4065eb6c4feb5eed02fb542", + "blk.9.ffn_up.weight": "143aab7533a64b17fbe201490a6f674bc7f0bd370c094500b2e100419073d1c2", + "blk.10.attn_norm.weight": "ebb670aafd36816a794347287269d8f1a5b19c1e3c0a1e38023bc19fdba9b073", + "blk.10.attn_output.weight": "b5d65bbc0ed5e49fdd9d754bc18163cd042a285024d0cf6f954c503bc8c877cb", + "blk.10.attn_qkv.weight": "f06b15bac88da798fa34a62b03eaac0dbe8b846020516603c387541f2d8dd672", + "blk.10.ffn_down.weight": "fb091fcd1b4de25d1bea94d1755e255cb02914a030d23e3a234e57b8d46bde6e", + "blk.10.ffn_norm.weight": "eb347bdf9c40414af87e13a8e72e40b31f004b50f7cb366f1a219ced60a61355", + "blk.10.ffn_up.weight": "ed2d52fc881a173f404fe8a1067862c9856d6c3e0d2e90a330a7aa394e3f84d1", + "blk.11.attn_norm.weight": "64e252603cf010a0e502ca39fdf8d0a196a79aec67c0d2bb9213fc0cb80c47d4", + "blk.11.attn_output.weight": "228e33e21c69f52efc74fdfc831bc9af271e44b2a29a3dced1d64e667ce36eb5", + "blk.11.attn_qkv.weight": "ab9ce6d4ef9e42ee0da3f20a7708a3bbc5e79e967b05fa86ba946a05e2eb63eb", + "blk.11.ffn_down.weight": "0ca133b7835c98dc77c25d64e4eb7873778bdb5e4d22d8b80f920f46865b43bd", + "blk.11.ffn_norm.weight": "02455741a0dfd161c79aa1ecc381901721f229fdcda5615622a629631fb61cfd", + "blk.11.ffn_up.weight": "9fecdcc099fbb8e23c6b1ea9294702a027f4a58d265543ec5e7be79b8f63b354", + "blk.12.attn_norm.weight": "783bb459911b1b3609a9b2bdfe272f1670add73b5471da738e07ac47e2e07dfd", + "blk.12.attn_output.weight": "1e1a914c9e48b857206ac5a1f7cead994bc1ea91d5d4fff8c834d73f2e38ef5d", + "blk.12.attn_qkv.weight": "5953e7185ccb87fb4dae8f9426ec86315d4c7794326e8ab59b3a95d4af2189f0", + "blk.12.ffn_down.weight": "a3eecf0f394f86e2cfb48a5940a5c50ca86d71883b2f79fcc642a935fabce0d4", + "blk.12.ffn_norm.weight": "0a4272e41373c23bd72f10d2d82930aa3a1480aac75832bfbf01cebf0b86b6a4", + "blk.12.ffn_up.weight": "06f42776de3a7ceac3025f26a7a8bd20e062233cce2bdaa2183470dc4b30b87d", + "blk.13.attn_norm.weight": "5915da60fb03e201fa649faba780e5fdf1c761c262b206e5415cf83181f65780", + "blk.13.attn_output.weight": "4dbf6eab074fa3835fd32bd631a8208e511037d5056d2fd3015735cca7674ef7", + "blk.13.attn_qkv.weight": "d3d8339a1c4782d9e73d77fdebe154d3c5b83ac40c9175b3e91a4977d08f876b", + "blk.13.ffn_down.weight": "de6772b46a55e1fd42b007637dfbf68b6598e5d5b61622da0935002e1e192d3a", + "blk.13.ffn_norm.weight": "5a640ea3b8c7be49c95a58a2327e10d8e8d9d142504bde5c8091613e5b961d7a", + "blk.13.ffn_up.weight": "f35e3545e4bd3531b2e843b5efd31dee0c13c807ee6386e65473ba67bbec30d0", + "blk.14.attn_norm.weight": "9b34986450b7c98b4927e81e61a816f9e84b1addc7c14926402100037aad6678", + "blk.14.attn_output.weight": "155d52efb23d366016d861a251d4d1f4a0c13699188c50d50dba016a0d8bfcd9", + "blk.14.attn_qkv.weight": "8e1415084e1f33c73a777f19e752489f4dd312cca047733e5ea643cd4a955e04", + "blk.14.ffn_down.weight": "a2a142226b94baa01ccb65bdea2b7418e49085c1d9c3c63e544e3112c58a25da", + "blk.14.ffn_norm.weight": "8aecfd9b0ae6affaea31a80c5c9a4a14b31deaa0db7bd8f6da2a64d23447921c", + "blk.14.ffn_up.weight": "0c1407237b8c1bd02f193346b5681926fe698a5055eac6a7450451b0f991707c", + "blk.15.attn_norm.weight": "e037bd19880bfa83d983200fb0c7866f8ad16c3ff5cc4b4f3a37ca7373870ff6", + "blk.15.attn_output.weight": "045fe4fc95cc129a1b92771b179c11b12845c4c088786c607f17bd98857e68e1", + "blk.15.attn_qkv.weight": "7621b7559705cab1d4dea1c69f76dbf9dc1c8837a203b656f484703b9c1b70ce", + "blk.15.ffn_down.weight": "7e5ac20e290bc60761e1cd972354fde225b7fa861048d44d9a0dd9b046d55f58", + "blk.15.ffn_norm.weight": "b6d830d88f1db1825687973c8c2b1a24c6fa84f07af8d0e3ef9c86009baca0b2", + "blk.15.ffn_up.weight": "dcda0957cd04fc45476774dba2bbf9aa89d6b05d5ca7b10ae6f73ad2c49b1cd3", + "blk.16.attn_norm.weight": "4ee9b70ba15cb2a08240f93990e90f5068c48fceb481f8e2186bec8b7214eb3f", + "blk.16.attn_output.weight": "315cfe5536658d2498192b2980eade15b2c9a4ff220e4011911457b1727fa103", + "blk.16.attn_qkv.weight": "3c8122e3ad637583b9dcde8ff3a323267d3014bb1f0f9771e5322260ca9ecc8d", + "blk.16.ffn_down.weight": "3b5fbebd5ee2b86cad96fb8a9b45a8770d08f82c1c8b74d7061e866f7020a18d", + "blk.16.ffn_norm.weight": "ffab69f20bda372de6e5878f0539163e2fc6ba113621ded95705fc3b1465c9f0", + "blk.16.ffn_up.weight": "0935ea3d258da42d6258406365f39f58ddaabfe97ea5977580db3635188f24a1", + "blk.17.attn_norm.weight": "f030441733f3d147b4a06a1eb4aeb8465c7c24d9c53bf4c48fe7e134d3629803", + "blk.17.attn_output.weight": "07a955ef09e8dc766ac0df647d0b2c69f23c4c69a7137654b4aad80303ed0eda", + "blk.17.attn_qkv.weight": "1c10688061e21e2fe12ad0cb54bf03895c1f83c3b0df743a42f548b52cbca1b2", + "blk.17.ffn_down.weight": "ebb9cc9836f41d88fdae2aa9a4355514e4edaec8d1577ffeb947a35204e77f52", + "blk.17.ffn_norm.weight": "50aff44f6528b13db5389f2ddcdb7676244947610bd7ffbff3f881c968c2a0d4", + "blk.17.ffn_up.weight": "d716537949582be33bde6b02e38f5a70081c9642a9fb05a61312126718b8d148", + "blk.18.attn_norm.weight": "0ea695c4e53d637902f46663a6ee42adc493c36794476acc7dbddaa05b13840d", + "blk.18.attn_output.weight": "5fd35b500221a612eb4f4bddf0e9b6b7db4d7733032a75f8802fb2d884647c2e", + "blk.18.attn_qkv.weight": "b0da37fd030fe69581f990bf23bfd35467a1bbe558af6de7c0924f6b72e92317", + "blk.18.ffn_down.weight": "b355c33f44b328f4bb977567de8f7544db4b005d7a8fbded658518ecf3c5a153", + "blk.18.ffn_norm.weight": "58b3fe9094079989a86e0387143259e1cc35952d24dc3df290c4ba6df44f5c51", + "blk.18.ffn_up.weight": "2ce530954c342c30ed2ead5353f931960bfae1d278868504c0efb973560fabbe", + "blk.19.attn_norm.weight": "533e9aed66feea8f0392aa81f9e293240e1f009a5334253915fb60c2749b615d", + "blk.19.attn_output.weight": "84f2d00f98a4113a779d3b5d1c3e7c914eb47784d3ab13b290367c124c2994aa", + "blk.19.attn_qkv.weight": "fbe6b9f53b07fa7537d3b3d452d20a9bc666f9fd41ec2091dd28bc2f70fc668f", + "blk.19.ffn_down.weight": "b30199e098c8bb3f890183d8b18471e80b62b604729b277ad62488dd71e1206b", + "blk.19.ffn_norm.weight": "c81373e41cd340b7badb19f9517c77c4250b4eb9a02dc758b8b49b652487d7ff", + "blk.19.ffn_up.weight": "5a5cb083ca7725720e3a890f7fa46354760e8007a8188849a092e305694a75e3", + "blk.20.attn_norm.weight": "4953091b4477e354357a8e743ba0a1900633e52f1599ee082a0c9b0b2b5cd978", + "blk.20.attn_output.weight": "62d54f7749cd6856097b2632066a322b0296df915fe66f382c5b5981be0d4f23", + "blk.20.attn_qkv.weight": "406de9e35b0729ebe902d7a47905cc7fb29a921431ed35dbef0c03e5690a1329", + "blk.20.ffn_down.weight": "62fb678b0d1261e19a4903a2b347d67afcc8acff01feb33a687a35a2d1e6f9a5", + "blk.20.ffn_norm.weight": "cd9d36b7e71e55c8925b97bb09c28219f182626bcff094878ae39c3db887a14b", + "blk.20.ffn_up.weight": "b9276771d79d3e932e73ccc520c3f8476342b9ef312ed2ee1e0da822e6e3ad18", + "blk.21.attn_norm.weight": "66d8c8a35e13ce9c2a0e75b670150e2c31484a55c2316df46075312196178ed3", + "blk.21.attn_output.weight": "12ab46c9382648f9b3350fdd92a6be6352743d62d6b520d7e2024e0c838588f5", + "blk.21.attn_qkv.weight": "a7909676ee1675ca23cd29a5fdd226df8dd9d68f94c6c9bbb51dd9fd38504008", + "blk.21.ffn_down.weight": "6fb317279c6542e82f97d5a12a60fac1bd0fa0405154f9fbe265e2fe39bd49cc", + "blk.21.ffn_norm.weight": "c0f703eb3ff161b5ba4490d87d8684b8a6c47a8f433e12f418333b9db439010a", + "blk.21.ffn_up.weight": "6dbdb80ef0c35e364bbce12d40d5e74c7963c7b55d58d9579567a07ffce7b863", + "blk.22.attn_norm.weight": "f94237433bf03d675cb2f655b81ca91a1ce2447bc6b00b13d6b0ccfe2d411eff", + "blk.22.attn_output.weight": "e821f95995ce497c01e63ca64f737713b1b65f11df1903e51d444aa516f33f71", + "blk.22.attn_qkv.weight": "1b0f717c73afb5eb4c82a1708c4e85c969e8a2a8770d9ddb78b1870a2d8a781e", + "blk.22.ffn_down.weight": "0f33f7a3cdc685484be99aa0c03642b0b20850a27d1fddbe054b13a9382f3ccb", + "blk.22.ffn_norm.weight": "9df285cf211ddd7df2b36a50489af574755c7d4d98b29a05cd04566ae613c8dc", + "blk.22.ffn_up.weight": "63ac300e1efb34041dd0136cf43ea622fac6f0caccce1cd9262f5e08d2cf179c", + "blk.23.attn_norm.weight": "5f72d9e88689b4027b28f5f8f26cd3abb03635ceea7ec98a4c91a9fc691f6707", + "blk.23.attn_output.weight": "6ecf04ff61125c5fc768f8656497152149373daf321ee9c957e8f7245a1184d1", + "blk.23.attn_qkv.weight": "a9d9978806724c2959f2cf386c233831f08e1e933dbf2b32665e788d9d512ea4", + "blk.23.ffn_down.weight": "72c7d17886a3da17fa0daa456aa5e877b2ef5b8b403182b870d9ca5ca9c70347", + "blk.23.ffn_norm.weight": "971e4b712e3025a13419b5b57d674b5e4ab7f18f74b57b9afc4671623da90c4b", + "blk.23.ffn_up.weight": "df2b5c7dbd5834545b815073af0c7355b065124e6d6f0fee78d8fa5b2076dc3e", + "blk.24.attn_norm.weight": "c41957c4a79ad3b16f6e11daec1c7f530b9f3f4b618e1e4367c3b67787ac4ab6", + "blk.24.attn_output.weight": "ef7d61f5fc88ac6f31bf60cb5f4d2d6b8df42d38825807112361a7224b0dee3b", + "blk.24.attn_qkv.weight": "3e6a58fe7d49c90bb6971efbad3371c32256881173ea5aee4b0c296cb206490f", + "blk.24.ffn_down.weight": "f43619144047de42fed81dfa495f1815d3cb771330e574043e2b67620819292c", + "blk.24.ffn_norm.weight": "5501d4a2a98c8ca6b42e77b53b221dbc08f530f6a067256d787534ec6fe028bd", + "blk.24.ffn_up.weight": "d64c8b0e509e2b1118f6000176f8956cacecdbb200c7e95ed93fb78b6e26c84a", + "blk.25.attn_norm.weight": "502fa3c302d371f61c5791f4615b73018ffb1daa09b6499b227116581244c5d4", + "blk.25.attn_output.weight": "ad8391d4e9c980856f2547aa945b2b6a407a6382158dc1ddd4f08d94ecc24be6", + "blk.25.attn_qkv.weight": "42e8983780d4a01a02c54ad23d4df21eea437f119a10af5a9c12a76a42d308c1", + "blk.25.ffn_down.weight": "302dd010d4e0ab4eeaee89090409ea0dddeeeed3236415eb8f97c942497eea91", + "blk.25.ffn_norm.weight": "fb34c1ee5bca96986c08834df0a0c047ba041c1123ac1f563e9d64312bf82d6a", + "blk.25.ffn_up.weight": "10739a8de156816d93c92b935386540bfa976bdbef204f0312960f6fc657582f", + "blk.26.attn_norm.weight": "7036c711609128c4e55968ff3681d3043338879a5737efd6c2ac9e1a2a61f1a0", + "blk.26.attn_output.weight": "db5db45dead5cb911fa01da59832f121b7c18b2d167bf53741c40819f24d346c", + "blk.26.attn_qkv.weight": "cae34c6b7f82ed14348d5ed30a79919c383737c1694a9cb9c0de609d3b0c1d0a", + "blk.26.ffn_down.weight": "491ec3a4da9b4f49f8ebc6be658ce397a9b801ae9fb35e82177e47808c65e5d0", + "blk.26.ffn_norm.weight": "fd7059d75d7f0e5288511ddeeb0f772eb3cae3ccfe4226b877015834edc3c386", + "blk.26.ffn_up.weight": "ea1ee1274c56458ce056d2205e5bb6e5422ce4cb0ad58006b8141749b97a0c39", + "blk.27.attn_norm.weight": "cc362c9a937609265052cd38544af17a1a7448cea086d4c801139e1fc865832d", + "blk.27.attn_output.weight": "ba757a81dabde9cb1b069d1bb616fe79649a1724f756567ec61caed1304fe6cf", + "blk.27.attn_qkv.weight": "1ab8d7d02d87756c12c2275636823aa5ede3d683178225c4cac4bd892c319bd4", + "blk.27.ffn_down.weight": "deb1c711c8a66acf4dcd2d088e1548f8e08f296f755e4067d6557fa55afde88c", + "blk.27.ffn_norm.weight": "fc6242d8cb8a4a37a8ddb7e41e7e60a63d4a89edf36acb35df052f10b9c91ece", + "blk.27.ffn_up.weight": "8df39b09c4801f343aca78f2918a1f6db78c8c55e591eda4c69eadb74c26e180", + "blk.28.attn_norm.weight": "75b539308f77e3cefdc6d98484d8b5cbf0538f0c2869a77b7373a145a18bc850", + "blk.28.attn_output.weight": "ae128940eb60a6d2e121762ef4b3e9dcf9eb3e105b249507fa7f12de0e19822c", + "blk.28.attn_qkv.weight": "bdda781c288e9326c240e33905f8e621b6a2ad902e620739d34f93fcd6f933de", + "blk.28.ffn_down.weight": "f1d6e6d1c286b1138bfd7e53fe477f399ae93bc2c04e35416f84218ed7247965", + "blk.28.ffn_norm.weight": "3f837ce82c8b9bde0d61d08b6f5fe5574886ea5328dbdc53f2929f18da8b4087", + "blk.28.ffn_up.weight": "2af027002e31d1b6cfedbdb30a2b9d7213f3aa691167c353913adfd48fda31e4", + "blk.29.attn_norm.weight": "61e8003b5329462ffe0fe172f2b160260de006aed858332d49d75504b6b6aa7a", + "blk.29.attn_output.weight": "ca44542a72a37476dc73dbdcc01f5b7497cb3ebc4ea230a55c9634ccd8e56ad4", + "blk.29.attn_qkv.weight": "abb3d9d6abe57872ae3daa51935d43264093ded5ce63b49d1e280ee5758be0e4", + "blk.29.ffn_down.weight": "6764b895fce881df097489c263446f0106de36217997660c15984b3ee22a5a06", + "blk.29.ffn_norm.weight": "89e03e9a33fc0e6e31ba9f0c2bd7c5734a118c5602bb90148793e08a80e8d0ae", + "blk.29.ffn_up.weight": "fa7ad57a84954f4121653152efed1a871d8adb20a1ea9086e3e849ce359d7d2e", + "blk.30.attn_norm.weight": "91a697aca1e42af54f806a20211031c3369e8d0bd58df1b0147fe24954e1f5a4", + "blk.30.attn_output.weight": "36063fcf766c89ac75be56f688cc63cefe5f2c733fbf4378ea9956ad386fa148", + "blk.30.attn_qkv.weight": "2cacd1161f1121a2c0b979930134f4666f73fb8d7237b3b0659ae091b15955a6", + "blk.30.ffn_down.weight": "9f3fcb6217100595850c05dc98f9ab2a263afdb6ab28df2fcb08aeff512057d7", + "blk.30.ffn_norm.weight": "6c600bc1fc7de39d4f8917b81fc7d1d5ed2a9b56492234c13a4bd6028c30d880", + "blk.30.ffn_up.weight": "73cabd1bb011956b2689ea3338bb76642ef3a57c197377d666d2ab5f56317668", + "blk.31.attn_norm.weight": "72d3e1cc771380645fa75a899858c95f39857a4f3f1ed60fe1578df383b8bc53", + "blk.31.attn_output.weight": "40089cdd29994dc19a1d89fa15902a89cfeca3540f12dc9bf4d00ef82506e456", + "blk.31.attn_qkv.weight": "1d0bb40e9258071ae14290a53c619a8e331dda07354d2a02ef45766c029ae5e4", + "blk.31.ffn_down.weight": "8defa0e06335b793fa8be03883f0a322d6c5b33f52c69c943c35c60d16e42c0a", + "blk.31.ffn_norm.weight": "33c55d9d0c496ccfb130361fe131649346e098abaaac39c0519507e5d846721d", + "blk.31.ffn_up.weight": "599f6503f61c692c1f82001973d35119f9688db5e6be9d9c298411491c93f09b", + "output.weight": "14b8dc662bfa3308ebb2e102c562d8e52c15670e538f20f3216a9c310ca9dd41", + "output_norm.weight": "7f2294ba94ce65681df6c7ddd8698799199b9d77dc83c10bdad5c3999f0fdb82", + "rope_factors_long.weight": "e34d378664e354652c38f47d10dafb0498ccc2fb042d39ff7fef768146fff22b", + "rope_factors_short.weight": "9379146a4988f373d362fe47b06c75e7fe7c54aa4dc9558758df79b7a87471fd", + "token_embd.weight": "19a03c1fb5ac0baee93b0a7d8b0f26e9a9b011e229b694afc50ebfc13d84f8bf" +} diff --git a/convert/testdata/all-MiniLM-L6-v2.json b/convert/testdata/all-MiniLM-L6-v2.json new file mode 100644 index 000000000..15c8f039c --- /dev/null +++ b/convert/testdata/all-MiniLM-L6-v2.json @@ -0,0 +1,124 @@ +{ + "general.architecture": "bert", + "general.file_type": "1", + "general.quantization_version": "2", + "bert.attention.causal": "false", + "bert.attention.head_count": "12", + "bert.attention.layer_norm_epsilon": "1e-12", + "bert.block_count": "6", + "bert.context_length": "512", + "bert.embedding_length": "384", + "bert.feed_forward_length": "1536", + "bert.pooling_type": "1", + "tokenizer.ggml.model": "bert", + "tokenizer.ggml.padding_token_id": "0", + "tokenizer.ggml.unknown_token_id": "100", + "tokenizer.ggml.cls_token_id": "101", + "tokenizer.ggml.seperator_token_id": "102", + "tokenizer.ggml.mask_token_id": "103", + "tokenizer.ggml.token_type_count": "2", + "tokenizer.ggml.scores": "6db964fe67338aca57790481a390121ff3dd643eebe49f7dd308029ad99abb6f", + "tokenizer.ggml.token_type": "98d247c5404b6b18f05f133b92dd56edf6efefefac326794b00d7b351f6c5aa1", + "tokenizer.ggml.tokens": "9efe405e229a45ff9916f54c475d151d2200cd2ab0006f347abfb069cf096c86", + "token_embd.weight": "8c1ee80a9ea4f65aa385ba30112010068af3d209bebc6e149d3d4589c2cd0a5a", + "position_embd.weight": "6c516f0b1c4e2388ab90394dd80ad69e4e4509b890982fc3408108ae66210eb6", + "token_types.weight": "f879f8e422ed211948f28b560d3c5e17aae7993f063b51196a28cf5c0fb3da21", + "token_embd_norm.weight": "75076e095d717aab96f8b6beeee503c27940d9a76f2b891a0e3de72f8a6043e4", + "token_embd_norm.bias": "298735285ffe944e1bf03e5d35c7280326b85cf121bde9874f1af5dc51ab939d", + "blk.0.attn_q.weight": "ab0923ce4c1549175112dcdfcc860fe30137f991e03ea6857fb5993670adaf6c", + "blk.0.attn_q.bias": "a3ec29551dabf976e1d34256b8ab5ab7b758f3ed9742c3cafdbd984d5441df62", + "blk.0.attn_k.weight": "4c1038a6d035c3e9ffed7fa672b614627814752503755fbad0cfb76a41ad71ba", + "blk.0.attn_k.bias": "e0363930eb588d91816aa3d230bb03b6e2551c165117b80b8d60397413819ef9", + "blk.0.attn_v.weight": "425e2e53e3f00ce98d29c3e6a161eb55d3e6ae0d96fdb9f6242d1c4fd6eef4b3", + "blk.0.attn_v.bias": "6579173a1e65ee124fbd0bd53cbdca4225515b4f2c5f18fb1bfd000f5978f9bb", + "blk.0.attn_output.weight": "a6d70a08cd7164de5d12af65d86d657c3db35aaecde778b2b3fda9193c4c9802", + "blk.0.attn_output.bias": "2b8d12c4f9a9c5bfaa29c597839568f6e0525cb41eeaf64ddeb6bd84dfeb9701", + "blk.0.attn_output_norm.weight": "bbe6e502a473228b525aeed26cc31b7db123ad63bdc5a6eebac6ea70b8b51d62", + "blk.0.attn_output_norm.bias": "36eaacaf0007c5c62daea97aab0115390c0682914f78482e37eb76885f4b7a50", + "blk.0.ffn_up.weight": "24654561c76ce387d125759ba843f06b904ef721fcceaeff6ccc62180a48e874", + "blk.0.ffn_up.bias": "fd3f0126aa1d95768fa60eb6f4ab8a2763cfcb7e5405f35b92353031d86f4d34", + "blk.0.ffn_down.weight": "97a829763a6a5bf3329ceb4d39c424ba4787d61653a5b0bbd1f84782e4d4e0ca", + "blk.0.ffn_down.bias": "7aa980c30ae8b4ee7f69df28808dbf5c431f56ccc4a80340f644a0419f16c054", + "blk.0.layer_output_norm.weight": "ef30dad4c2a083ae1ff5039a2a6cda60ecc89bf1e486a6f8c0d15f50589603f8", + "blk.0.layer_output_norm.bias": "8b1b77e67568b1bce43fc476de1b177c53ff688d66beb66995e8eb3dc290da8a", + "blk.1.attn_q.weight": "284331622a1f6f9b87ccee4f652bd66a394ca493c4d93be4d1844e4f6159ad10", + "blk.1.attn_q.bias": "e24ebd4860330e08f6bfdd077a82db0bee33f4c8846cf1db26327a34754c7069", + "blk.1.attn_k.weight": "729dd0d555544b5bd0f7580b3c8b384256b974605f0e7487b95f295aa032997d", + "blk.1.attn_k.bias": "2aa51a828a858f35473f54477583fea54ce2ccc34ea60fbd1d228fbe9bca827f", + "blk.1.attn_v.weight": "6be304671cc311d5ca5c103f2b51467ee800c589bc5b8101e09ff5aed1f68c21", + "blk.1.attn_v.bias": "43bcbab78a8819e07f723bc9e5b737b71e87a7594f15234e882b63e327a64199", + "blk.1.attn_output.weight": "15ec8a1a12b26c9976445308a09f748ab0e4bef0f583d13ab08c3129f8738d73", + "blk.1.attn_output.bias": "dac2146f4baa6ed16f6c0dc7443831fb7ec79bedcceafd80d1a4b628a1bb072d", + "blk.1.attn_output_norm.weight": "d2151eb33bffac536787a4c9a5d2b31c7a80b17c4611877842a3cce2cd6e98d8", + "blk.1.attn_output_norm.bias": "31e1b779716dafb855d2cf5631ee168a0ccf372eb9c6ea6091f66fa97a9b9d2d", + "blk.1.ffn_up.weight": "a57547fc3fc3b77406f5cdcb0c87af9bc184701f175c39c1f35297826fce3cc7", + "blk.1.ffn_up.bias": "123be6d541d086202913c75d878c54d59a749f3af7b58f7ef9eb9e7c62a24c9a", + "blk.1.ffn_down.weight": "cfdb79788377e5cbded8790cd41b9e66c397ecab75474071fcd7cf32d30f9613", + "blk.1.ffn_down.bias": "bcb58315519a573097960891c9ae41cf4c685ab78c3e0e77471471758a7eae88", + "blk.1.layer_output_norm.weight": "819b554271452bfb1d84c2603b90377b2e41a0ac1e3aa8b417ccf9dce63375bd", + "blk.1.layer_output_norm.bias": "47a3433ac27f5ce8947fb38dd491f3706df4ef6adb0ddf74612bf0f54b19e164", + "blk.2.attn_q.weight": "1557a9ea852b1880551f7290e00aded4f35e6c4180fdcbed1b0039bf805f639e", + "blk.2.attn_q.bias": "c3bfe5f3066f655fd36b055530997b59ff33ef013563aaeb3cb8ff07dabd59a9", + "blk.2.attn_k.weight": "cfd08eb69c61ae2f9f14f9b7ff5c5394ca264b1a9f3d48156677f90dd1766289", + "blk.2.attn_k.bias": "9b839bc0e79974a0b3f5d1895972bc6f5c9a1bc16052e1af786e6a530758152d", + "blk.2.attn_v.weight": "02b26b1208480eaeeb00e7b4cf8b690006ca14759357fc44ed4a2a8924ead993", + "blk.2.attn_v.bias": "e7e6f0089fded1659a867ab736c220d9653ea7da6b1b94baf5c8d30a748b63ab", + "blk.2.attn_output.weight": "a1db121c7d33806b349cadd050300a57db49fdc91224fd07c9ac43bf4299dc79", + "blk.2.attn_output.bias": "7675128b6a92555cd955c820311e91e9417d31f48848f45d047b4100c62148b3", + "blk.2.attn_output_norm.weight": "5b4595e0fbcba67a700c4331adf746d2fba3546364a4db5607ae241947bb1a21", + "blk.2.attn_output_norm.bias": "7b8e16826ea30e5a2ba0b02e0095a901775981a296e98819625320e983060d08", + "blk.2.ffn_up.weight": "a0d815d946ac07a65095c4ae4df77b818845e6d97795c7d82f55e689d944db59", + "blk.2.ffn_up.bias": "ce37c0a4174d6bf773ded7bd016ede627ad3bdb8bc99b9992a18dc8e8898f252", + "blk.2.ffn_down.weight": "f6231d2a25426fbd45b9f1160aa484220eb227ceef0348c4a6a6de890606e5ef", + "blk.2.ffn_down.bias": "429e00556e8dc63a785238b309b9d83738500c1ef6d736fe6526ad88ea496d27", + "blk.2.layer_output_norm.weight": "651457a573adf3f7dd9ee5dfe1c8e89389e94443993aab77ec6a0b05aa621e35", + "blk.2.layer_output_norm.bias": "41fbbeda7fd89b0cef5f945ae44011c316982390401d6f75ba8c6d365e185247", + "blk.3.attn_q.weight": "95a43f32949d2cb8d22815bb27a44abfc6665ba96221af817dfe058cb6ca72c6", + "blk.3.attn_q.bias": "f4e34385e75d8108b6b3bd336106e2133a8c9be0cc343dfe5dc48c32a823c7cb", + "blk.3.attn_k.weight": "6b892da6a17d4d3265265a15f695864a31813ee8c8e710ae9bc9e1adbc6c9a18", + "blk.3.attn_k.bias": "40b8067b641a56014cee42548240aa8930820958b1933004892b5f04fbaef39e", + "blk.3.attn_v.weight": "9fcd5922319dd2a461082a5ce040c1dfe65d87d70ca6547dd0b46eeecc3eeb2b", + "blk.3.attn_v.bias": "b528c56212e66931fdbe267ac327a9c2f87cd03baff3ea719e30afe681da15f1", + "blk.3.attn_output.weight": "e3b178c1b03981e75510e0d277af23ea59cc404b5394e61bd32291825719b502", + "blk.3.attn_output.bias": "712c84d39a6a5a9c06a09da8fd9939ba0d5525524a4bba61ea4de09b48f45cae", + "blk.3.attn_output_norm.weight": "d1ffac88e675592ff72f8a617be32b4a381d443b2f8f2645dbe44a1e5745aac0", + "blk.3.attn_output_norm.bias": "ea31a1c73146234c50e0e43f485c458413714867b8e2703af66482f7db2d6c40", + "blk.3.ffn_up.weight": "4ef4f3b9a1ea6ab2ef2eb6e8b008e06a44790d099d97482a05a51e39a29afac0", + "blk.3.ffn_up.bias": "06a4296dda16f452675c51f108079fe7722552d6521c737d97734943818b9a2b", + "blk.3.ffn_down.weight": "f114b2bebe392c7d80433bb880c6730293aa4561b0b0370dcdaf7472daebd847", + "blk.3.ffn_down.bias": "2c8e67831d28a3bf613fc7912ae3259b63d72abcaf4d30efd8800758400158de", + "blk.3.layer_output_norm.weight": "a1dfeb7b5a51dd56447312ca41e2ad2f361a3ea12ddc355127f5f4219fb0a482", + "blk.3.layer_output_norm.bias": "1ed630021b25c6c6fc93fd32988b9907df966d4982a93081f639aac3044618ab", + "blk.4.attn_q.weight": "b5fae4c1f9a5f33a2a2e816ac0c01c25f422e4efdd59ef1ed93da2610e5370fc", + "blk.4.attn_q.bias": "c2e376524ea98ac3b10d9eee19ecb1b1e261fa5149efe0232844c923dfb428fb", + "blk.4.attn_k.weight": "a4632f5ebf9321d9d08f9112a4e5dda2efe5671df4a4e67fee24845f5b14af16", + "blk.4.attn_k.bias": "a9a02ffb8b8b4f6dfe487a7e0341f1d5318c9d2b793a688f34cb1b22fc66ef60", + "blk.4.attn_v.weight": "10ad8deb81d9fa093b1e5c0f24ea82aa7df43e6aca49e260fcbea56eab8cc86a", + "blk.4.attn_v.bias": "7326813e181e021130bd33ac136293fcffccce2d1d8cb59041e5b13a8cceacf6", + "blk.4.attn_output.weight": "c92573088c7437c2b3cda51490e152c27fb19e5468df591eabba5a49d5398d44", + "blk.4.attn_output.bias": "14e10b419e5859af1eb685af5c330aee67048cd704dcead9217840c6f5393222", + "blk.4.attn_output_norm.weight": "02b6831c0e0fb0edbc579a92812a1dd972cb15d14fcd382d4427c5a7b300ac44", + "blk.4.attn_output_norm.bias": "7eed5cd503bb6bb6ceb1bc8b07cc077903a4f14fb8b9d6cdf39644815ecf1374", + "blk.4.ffn_up.weight": "8d0c91d62e74d6431321116a37cf3339e630bd50ba164d3304fc4fe8dd831223", + "blk.4.ffn_up.bias": "d325f07f73c005a273c484c7be8e7abb4d6e8a5c4fd093f5869133b97629d017", + "blk.4.ffn_down.weight": "7ba7bd81143f40537b84f938e403e19f30e4928625eb371de052b9025beb4d21", + "blk.4.ffn_down.bias": "2853d9c2a75288214a4bf4907dc19d04d01926f4913d302b1aa7bdbfcce0f7a1", + "blk.4.layer_output_norm.weight": "a4ed1885fa77b90fed5300c355ef0aa0c876a8c747151d9d790939d464d57d4f", + "blk.4.layer_output_norm.bias": "62142a81e813a9e636333b2b805d6bc3b17c5e7cd4b15adce1ada6bc9a32563c", + "blk.5.attn_q.weight": "afc1dff080a72c3daad01384b1448d476aaf789871017c8ff8e144788887995d", + "blk.5.attn_q.bias": "748a820371c1d4f872c84545b36358d239c35bf6c99e2812c237d88c3292763b", + "blk.5.attn_k.weight": "59e30c1ed8acd2cbb01de5f62e7804015b9ecf98ba157d98cab016344639eda5", + "blk.5.attn_k.bias": "f839520078f9e589496e982e86d0126c7aa14196047339abffcf49a696229f77", + "blk.5.attn_v.weight": "3e21fb874e21b90308e1f46af034a3c32d3eba1628d62ae5f2246d6af5818923", + "blk.5.attn_v.bias": "5cd4852bf95c1444d10d756750f6bf49f842c0b39e9953c7f408bb67c325ac8c", + "blk.5.attn_output.weight": "636ce6a7752895f204b9d01ba0aedd9a294f908b42f372c22a16d9dd590d7471", + "blk.5.attn_output.bias": "82d924d4b0d2b94f2bbff91619216d6967a3541ce9b1531a6a60457a67b5d219", + "blk.5.attn_output_norm.weight": "5e7bd0a8d3396080f3360d7c4700bf094a06216431bd014c4479eef72ecf4271", + "blk.5.attn_output_norm.bias": "66c6de5edda5466d029c6753780be81ccd4218bf8bc00680000e0f06856ab712", + "blk.5.ffn_up.weight": "5bbf6e7ea380e216e33f8bee06d25f2265359d3876a300e92bc6e41d48e33430", + "blk.5.ffn_up.bias": "9d795388bb36fb33ad3a37fea3ccb4937838e02800a608fb47d363cd06b47370", + "blk.5.ffn_down.weight": "2fd628974e7f075479dd227b46fbd48ae8d3ca34d735b36f391ac06410730368", + "blk.5.ffn_down.bias": "cd213ba9eaa75fa541648097fbe9c96e58077e6c3ad6ad2fb1f21f8350f44291", + "blk.5.layer_output_norm.weight": "159a9df41d15b7022d136f86a2a2631c4635f9816e957472217077b522bcf52a", + "blk.5.layer_output_norm.bias": "24c1f27ffd1eb4e5be7e3a2909943e6f0980635d761fa1efdd0c19645da23766" +} diff --git a/convert/testdata/gemma-2-2b-it.json b/convert/testdata/gemma-2-2b-it.json new file mode 100644 index 000000000..8f34e6672 --- /dev/null +++ b/convert/testdata/gemma-2-2b-it.json @@ -0,0 +1,312 @@ +{ + "general.architecture": "gemma2", + "general.file_type": "1", + "general.quantization_version": "2", + "gemma2.block_count": "26", + "gemma2.context_length": "8192", + "gemma2.embedding_length": "2304", + "gemma2.feed_forward_length": "9216", + "gemma2.attention.head_count": "8", + "gemma2.attention.head_count_kv": "4", + "gemma2.attention.key_length": "256", + "gemma2.attention.value_length": "256", + "gemma2.attention.layer_norm_rms_epsilon": "1e-06", + "tokenizer.ggml.model": "llama", + "tokenizer.ggml.add_bos_token": "true", + "tokenizer.ggml.add_eos_token": "false", + "tokenizer.ggml.bos_token_id": "2", + "tokenizer.ggml.eos_token_id": "1", + "tokenizer.ggml.padding_token_id": "0", + "tokenizer.ggml.unknown_token_id": "3", + "tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8", + "tokenizer.ggml.token_type": "8d40143b3477df77beea4139420335ede458bf5e14102f01b0170197b55da8d8", + "tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda", + "token_embd.weight": "64a9d30707e659e2e673656d71f5aef7a9fb9fd83bb9a77558dfc5abbe218a05", + "blk.0.attn_k.weight": "d8b4437c5edb3cddf6af9987038e1bb2b191c4f0fce0e160d2abace717f5d5d7", + "blk.0.attn_norm.weight": "1eb73e3f7aa8e502f6ca31cd19efbb8e4fd9a89692e13e48ac8205545a7fa7e8", + "blk.0.attn_output.weight": "39e7b78e57d356a22dd89ce1c4d7163b970712ba756545e1703f97866cd2192e", + "blk.0.attn_q.weight": "795058e23b6109febd9d55c89e1eebe6af0714ec8c56fd86a160876a6135ffe8", + "blk.0.attn_v.weight": "0cd6e583d1887c020472e961bbb113fe5a0d23ae2f1c2c876fc366cdb7692b52", + "blk.0.ffn_down.weight": "51eb4d962189e945a84e94e0dc1aad3f8f90cc1a11e18029670afcd0ea0acb1b", + "blk.0.ffn_gate.weight": "9811a29b8ad48432925897ab21dfcb13c5cbd372aeccbbefca9b7866883b4ce3", + "blk.0.ffn_norm.weight": "92cbf4652ef503c1de5b10f2be00b3fcf00100980cb3baa8f3013a8d8bf3d851", + "blk.0.ffn_up.weight": "af87de21746879483ed1b374cdd76b19ba11ca2b6dbb1beba98efdf3be3e8077", + "blk.0.post_attention_norm.weight": "32e135f1f258ffe407018899e39af1725d59d66d60022b9a21575ba160e0357a", + "blk.0.post_ffw_norm.weight": "ba286f5ac11b07fbc986173708c66f1920427be5a6d108af38fa0a837c1c8eb6", + "blk.1.attn_k.weight": "51584435552051f7fade76beca582b3f7190cf7fc07adcf527c2774d4b1c3901", + "blk.1.attn_norm.weight": "6833104c7fbf35a7e799ae56c262b97fffa14789642aee14381b25acd21ed80a", + "blk.1.attn_output.weight": "14c39481369087bf292ac9a3ab2ef166f9fe376a9f90c246653213ef264febdc", + "blk.1.attn_q.weight": "443f64ae2229f857c69d6bebb7800b685786cb77884c3ae19d4286aeed081325", + "blk.1.attn_v.weight": "0df482de2038f1e4c8a7733ac0ddb69ad90759dab5968b942af0155588de4c4a", + "blk.1.ffn_down.weight": "66f30763a8bbbcaea609a0087ed75fadb5e771c06378dd2cea94cf17e492e8cf", + "blk.1.ffn_gate.weight": "a7151bff00a545fa18b2c92dcd2a14572ccf9beb957a6c494f1374e8ebe174c9", + "blk.1.ffn_norm.weight": "e197d71ea11b5276bc0167d2663b88089b3ff42b47ba91e85f6c5d95f6306435", + "blk.1.ffn_up.weight": "57c182e0b14cccd1350d388f0c616991702e74281db54637451b70f4ccc24f9b", + "blk.1.post_attention_norm.weight": "3c56f837168d784c2d8bac247c130bdca6610c095c8da4558c536ccad7605609", + "blk.1.post_ffw_norm.weight": "d2a51d320fd01069dd7ccaa7082f16a7faeb671885607d7900b10a89c354d0fa", + "blk.2.attn_k.weight": "bc103c818192de7ce36caaf89dc117be4df13fb902e0bd9a23c64edace5df9b6", + "blk.2.attn_norm.weight": "0f2503aa126083a5d6ac72481be1ef66c6014705b573682b35bd864e4749a3d5", + "blk.2.attn_output.weight": "05fcd4a1226e482f91803a266f72caca887a93e63c2d2ba5611ab3c68d38743a", + "blk.2.attn_q.weight": "6a10b5c2fd423d1e4c4fd60fa8c154a0159b6b2501ea79cae2ef19f45a674e5e", + "blk.2.attn_v.weight": "3cf891945a1f8ae7cc908a5c6b729ff5b70f4436c5ffdbf245cc0ed4cc19cd1b", + "blk.2.ffn_down.weight": "ea204fd04e0d2fc728a9861a459216bbfec629c152004ba625f52cd8837bd51e", + "blk.2.ffn_gate.weight": "3a3518729f1b8b64a82b8792f33987db5418fdb094be0263c68f146a5c38de54", + "blk.2.ffn_norm.weight": "754ede678b725de41a34b82f0edf7688b5c065be7c0d46df6f7ad9430d986884", + "blk.2.ffn_up.weight": "ffdcb88439f5828ffbd9fc844b03ff91637b790b9838097258cc3ae75935720c", + "blk.2.post_attention_norm.weight": "4b3f53b7ba26e8c36b2dfda3b7e5fc4b1065257cefdea235fc7df9af130ac2fd", + "blk.2.post_ffw_norm.weight": "e550369e26b8485e2b54ad34b34bc98af5494287dcc513c2c39cf1eaa5b89d07", + "blk.3.attn_k.weight": "89f24ea450e37d9e95757651a83205c085d81b354ee9489dd6310a391d8409f3", + "blk.3.attn_norm.weight": "24e2ea662b7cb822b4ca5cd61bc17f2709f406d990ec3b4a0dac1cc112db45cf", + "blk.3.attn_output.weight": "ac4dad69473c6e3fac56669212cadd8c34ecc5973d945972e974d94805334967", + "blk.3.attn_q.weight": "b6a9c9a7d4722b9096631c65de62228dfddca6e26edfe6af7fce01e116ef0f4c", + "blk.3.attn_v.weight": "f272a960a40093942309bc342a379984cbacec2d7bc64428db3f64e6b1887ed4", + "blk.3.ffn_down.weight": "c0188ba50d8228805982029c277fc0e87aa57473b8363037c648f6d006ff828a", + "blk.3.ffn_gate.weight": "a04aec1561ee6c0fbb18c3db49dc62fb533619cf697fd548cbf2279761aaec3b", + "blk.3.ffn_norm.weight": "bc053837d44087ec05eb5d9458357b2a5be787789b19cdbbdc694b57697f99a6", + "blk.3.ffn_up.weight": "b3ce8b274f20796d3b1a7c08ba27a919066f9de89a782faa544c4a8d6bea1382", + "blk.3.post_attention_norm.weight": "9c922dee7a7df5667289e2788e60170238239cee2dfdbbd9e435763f9f416718", + "blk.3.post_ffw_norm.weight": "b682544ac953ad2e0b49027ed8916f2e9d1aba5d1587bb4127ac703570c7a03a", + "blk.4.attn_k.weight": "143b0cbb4b787b95c2b6212374410e32173ccef2adb914908a2f89a7916de512", + "blk.4.attn_norm.weight": "5668f60491b780273745192662d02c9a92a4f692b29d16aa0bbc7413fec4f85b", + "blk.4.attn_output.weight": "b9f2bdb68be1e0cf66dd19f8fa2afb105910ad2ef394864cb32cea8f8944e0d5", + "blk.4.attn_q.weight": "ddcf1343dafbc2dfcd0b8741225af22fe4b54b2becce29240bd01c34265d126c", + "blk.4.attn_v.weight": "6dc7074366e7ed52d9f48c594dcc85bef738e096276cb99d28228c89eecc5b9c", + "blk.4.ffn_down.weight": "30334ffc59ce343cf2a1b973174acb7722823463adc07e19a99bd0f404bc9906", + "blk.4.ffn_gate.weight": "890f7c8af208d63b28db52c4b8c16c2288a382d87ff5a6a6d6b0a5b3bf27e6cd", + "blk.4.ffn_norm.weight": "ff0316cc7847221eb86a90c1ab441d4ee61553d410c66414a7755021b3b12448", + "blk.4.ffn_up.weight": "6af97d113f91564c636734f215e25ee602d48eb045458f300b3ec7582be0f41d", + "blk.4.post_attention_norm.weight": "69438f231e105e68216b078bdeb35a7cdc8b12c4e2845e18ecf4c8d361d6a321", + "blk.4.post_ffw_norm.weight": "0fd535da78bcf2b32c95b05b2b83dc49817393765be90d8cc1ed3d56f47b68ec", + "blk.5.attn_k.weight": "0166eb3c6d20dcf3d3c169e94caa8dee057535bb525e29f698fb6f8844f18a6c", + "blk.5.attn_norm.weight": "a7808f27f164023d5cde2be00fc23cac6c71aa0ddeb60bc23e12411b80087672", + "blk.5.attn_output.weight": "8b65b2027a0842b68c5308f91d6a31de9599d794157d77df8418b19f9e0d9334", + "blk.5.attn_q.weight": "966bc626ef2c2394d872087a41c126bb1b67d1d5f6de920204ef5e5b16c34003", + "blk.5.attn_v.weight": "9a362aef3f4437fbf0ef6e1ba785f3329c3db2960f93fe36547d2795e9c254ea", + "blk.5.ffn_down.weight": "63e53541d34197720c06f297aa8142ac6b6eec002c7987b296f26e8b1400f931", + "blk.5.ffn_gate.weight": "d9591fdd32f783e0fc26e20d5d587ee8971ac8ae2e4c818c6eac1c125c7c7f37", + "blk.5.ffn_norm.weight": "677334cc60ecce3a7f4ab3acda15d359353d7358872f614ad8914e3780e9fc6e", + "blk.5.ffn_up.weight": "a63764110e1c655ffbd55af0669b2dfe4cc29d0e198d33a8e5426461b08a85f7", + "blk.5.post_attention_norm.weight": "c55499f859b2c0a7f5cabceaae47309a5ad38bc29d0f4a8db81f1357023162a9", + "blk.5.post_ffw_norm.weight": "82752754665f842418f3e302cb5f43d1e0504dcd124c4b8ddb77018b2c793837", + "blk.6.attn_k.weight": "e20a5f0d6c807273c8d491439566b428497ac02097cf0aa55e33748c28e14be6", + "blk.6.attn_norm.weight": "2c6ba42fd3c73d72073ced03a32dd28d70a89ed9bbbc8fea1ba03a7ade951e6c", + "blk.6.attn_output.weight": "4de7c5c2f4a133a266e17ed8c14c52959466b54cc7ab9e19f789a33b4850f284", + "blk.6.attn_q.weight": "56462d921800e6b8cd2213fef04c4ff16d728905cb2f4c58e966d0a053a3b0ae", + "blk.6.attn_v.weight": "b758dcbff769d6240c2245ede1dbc62c4170a67c77458e866312589220fe29af", + "blk.6.ffn_down.weight": "582247fb3c2bf687cbe9413fe18d18ad47bef4b65df7d78905e10335c6134764", + "blk.6.ffn_gate.weight": "3035444d5286aefb7a6d04e55bc27e1fac7cf895cd5be02319a431b8e047b4ae", + "blk.6.ffn_norm.weight": "e582d24c66e01b96faa20ce6adfda3d8583b11e809bff89969927398175e369a", + "blk.6.ffn_up.weight": "6f4b7bbfedeacf61a4866ae0616c4ba6c9e856662e8f00ae6aaec7f52c53e7b4", + "blk.6.post_attention_norm.weight": "8fe51b50bd677d21586aecab0b565c4bf9fa68ad50bfe366f45e8fea3c657ca8", + "blk.6.post_ffw_norm.weight": "81ba3cb4c2bf5c546b86855b7a885d3fafededc67eb3a35cd3598b03c9e26e65", + "blk.7.attn_k.weight": "2e044179cdcae0946708c86bfea7aa0391e1f7e2a09b33fca035d384cc3ca758", + "blk.7.attn_norm.weight": "94b48c546b046803c60e75a3acb17a356b710735989938021b565f68df9b4985", + "blk.7.attn_output.weight": "65709b4ad7a581f4d75793d39d4032a359f6bcc0c3835205242a0b99e5b66824", + "blk.7.attn_q.weight": "8ded993c95d1f7caf201ceb6fa035cd6ed6d351b50b999fa9355dfee9486cb5b", + "blk.7.attn_v.weight": "c92d5e2d2d48397542bc03bea25bf39154075e66c5bb1ead85188505aa04ae91", + "blk.7.ffn_down.weight": "e8ba8fb57208805ef1dc23cd7c86e9a2d1fb7c52c3940d292cd5bb2eb24b3fac", + "blk.7.ffn_gate.weight": "f0f06d6a2e06c5ac252083bc61d05c814e6289d3f4e4a87d2f06918254c02c36", + "blk.7.ffn_norm.weight": "ebf8ef775f72624148e09d68a4332187a7a5020c521fe0623da1cd3485ad33e0", + "blk.7.ffn_up.weight": "a554adc4fc7122c247c77670e169916ba1794c787b5be30a2b36705138f1f746", + "blk.7.post_attention_norm.weight": "3aa6bc21d85c3a0c12b964e82b12feaedfdd13130c3cd2229228e24e0967ebdf", + "blk.7.post_ffw_norm.weight": "508bc7b19ee8ff08f0007c890133a462fc57c7e72b16ee8f6dd64def264ef876", + "blk.8.attn_k.weight": "363c8e74056642fe9e7c2f3f9769d57319cd3fa0a6022810189ab8d894322885", + "blk.8.attn_norm.weight": "685b49a1f1acb169f4df0bdd8e3de6943f3033cebad14b898a72000595610d92", + "blk.8.attn_output.weight": "7bde571e4efef1c6a6143f0526721dfb59e0a0ea0e1a3616a322b2eb937efa48", + "blk.8.attn_q.weight": "fc993dbc1074c28a0e1d85e5ab2f4ea6a9c6c1affe7ee56027000a275daed9b6", + "blk.8.attn_v.weight": "281e8791d3aef9b3864f1cb054da0ae0c2fef4ce0a58b1bad8bc136b2fa0f62b", + "blk.8.ffn_down.weight": "b1164a2578a7f87ed99c2bbc76c5dfbbbc6a1a803605391acc3f320fc989ffd7", + "blk.8.ffn_gate.weight": "6b39a3b3aaaa79aee61416b54d62160b9258042650e61c6b47bc77c2dd17daf3", + "blk.8.ffn_norm.weight": "17ea1362c72da27f12bc936500492035bdef3fd8f940cb12b57f37d42ba8ecb1", + "blk.8.ffn_up.weight": "bc3a7c47afc440d2bdf8fbe9ddf2c9220467472c60c8b4ded8c0f181470ec96c", + "blk.8.post_attention_norm.weight": "5c506204e00411ef9c8b4134d40eedcc19fffe68dd0af7d7cc49dcabf2dfac7e", + "blk.8.post_ffw_norm.weight": "002faec235c3678864e2901eed275ce4e9dc229164a91c9cd4c965142ba62305", + "blk.9.attn_k.weight": "0bab39d8c237f1b6d0010db40467142625a9e6f2e0e4c49a56c12b41e4e0b1fa", + "blk.9.attn_norm.weight": "de5f38e873b17f07aa7598831b89cc1cae2c9bc3eb2e042ee9af059d2563e84e", + "blk.9.attn_output.weight": "8a8184702c25a62df9ff309c0c7badc8587208523b2be3e8fa90ce7080573e6f", + "blk.9.attn_q.weight": "7c961b2431b09ddf95377acd07201cb91bf13d9cd3ae0f2c25c7d6a0358d9f50", + "blk.9.attn_v.weight": "e22d240cb4743067033e659cbf210ebe2ebbab3e1dea6ccbe5eaa982382ca038", + "blk.9.ffn_down.weight": "a426f81210f03d6ad53277416e1fdcdf37d8065e4817613edaf6c67a343426be", + "blk.9.ffn_gate.weight": "a82eba825cb77b8e64f85ff99ede2fc71bc9b01751eeb17e9e6c246ee12ea62e", + "blk.9.ffn_norm.weight": "1a97f9b1302a3a326d534c5c3fed2db6db0ae45fd0edd381a3e4fc1c75d81030", + "blk.9.ffn_up.weight": "5f20bac2bbf03bb42adb92fbf99561651e1edda57e0b61935ac7f6c08c0ed7cb", + "blk.9.post_attention_norm.weight": "9f9866d13988e1946b1e1c80d9374a92a6e3be33748f8eaed3e126d1e1a4c796", + "blk.9.post_ffw_norm.weight": "a6896dbf698db4dbbe5dbf12417d4fd80e9cad0c539c858892ec0aa5b046bb58", + "blk.10.attn_k.weight": "ca8446e5d21ecd4e6a70dca8d321be480be4fba94d70cba065205436feb44270", + "blk.10.attn_norm.weight": "4f41fe290e8f21f63b82151b6cce94bf7318d121468816b0c58af0ff7c1658ab", + "blk.10.attn_output.weight": "c626d2e9681c5c941bbde43dddfae1a8d4986bf2be4470857bc8e8bd7f869044", + "blk.10.attn_q.weight": "1e61b210a13a429977325cf15d781ab77d604cfa862f4270329cbd94237d5835", + "blk.10.attn_v.weight": "8ff8d3e3f058ec3b35ada1057f2ed59c06494d0e0be6a8dc3ff9edf9f0e1a115", + "blk.10.ffn_down.weight": "bcebc04219f8081a5f483e58103c0ddbbbc631a0a54fd6dd9d55778e041f70ee", + "blk.10.ffn_gate.weight": "7a23a1e620ef871384ddf9611ccdcfb893fbf013cc203ac8e72f745420f1eea0", + "blk.10.ffn_norm.weight": "e3a375e43c349a1c6c66c22328e513cc1af3137fe839e43dc8e9be2f65914fd7", + "blk.10.ffn_up.weight": "5d182e7c94369194fca5f19cbbe668a999911e57f3d363bc7fb6088428700cb9", + "blk.10.post_attention_norm.weight": "b841c6308296e8984f3c5f549c6e3a242f4b3e19141e1f54cc08de9c46759c09", + "blk.10.post_ffw_norm.weight": "9d66fa05b5c940208f634f5053d809094c99a2a10a1d1e8847c8281fbd99fb49", + "blk.11.attn_k.weight": "14adf24ebb2bb17b336ca81cec3e690fd854782f4440ca6c66cc1d7e7bf1c850", + "blk.11.attn_norm.weight": "2d2213f311f50414702b5b34f22aafb9d9a0b6787243e7578562583dc40ad195", + "blk.11.attn_output.weight": "de1f14cc2a7fff00cf11b229f0576999205f17b9536e97abc9d6de3cc79a7884", + "blk.11.attn_q.weight": "2bcc5c147524003109ece0be08b89ac8b25baa71416ffa76573c6c052ffc6eea", + "blk.11.attn_v.weight": "2e6ab8573070c22dc1e0d7aebe4d52123226dacf7822dcce06fadbb38fb036a4", + "blk.11.ffn_down.weight": "1b86902f4e36868421e5228b9445051f8290b292df22a6d1af836dcecc1f25c3", + "blk.11.ffn_gate.weight": "e756e8081bd0a16aea4a9ef5076ad102113524f7a3d50a3a77aaa7f7938b63e8", + "blk.11.ffn_norm.weight": "6913887267be227cf9d1991a3dd8db2e7e74bb9b5fbdfcb9ac954fd7d7b95b3b", + "blk.11.ffn_up.weight": "619a3ac0609ebdf42c3fb2b6e4b1db48df79e6dd8418d7ab8f1bbff13d8a6a50", + "blk.11.post_attention_norm.weight": "e4b4ba92cef7b6a78407e8ab1b0307d47dac6c3df7b6817e28038317ff662d7e", + "blk.11.post_ffw_norm.weight": "40aceeec58cb855f0c158c9cc217168fcd5d0e735567d587217b1d78df17bc5f", + "blk.12.attn_k.weight": "c54c5a4d4892522022d1aa2204cfc624f0b4042caa536e678967316293fe5cb1", + "blk.12.attn_norm.weight": "7cd2ef58298569ffdf244d9b390f3917245276c8206e5780af5f96d8c0bbb446", + "blk.12.attn_output.weight": "85495ef9cc8b3deb21f741bde463ff6493acae2be51f02ecdeef952cbdec3375", + "blk.12.attn_q.weight": "d19383f83fd119bfb8c0280c9515705c11d8e7d502019fcf8f49efeef0d106d0", + "blk.12.attn_v.weight": "869ac669ba49531d9128892a0e27cef15de508ff40cdf80cc1681dde50d09204", + "blk.12.ffn_down.weight": "578f39f8f9fc2f09138afc884a952d7cc3a9a31de4216acd10e88e19e0b75f8c", + "blk.12.ffn_gate.weight": "e29a0186bc6c4a0720246306e922d3a83f777dadcf4ac80bad468287031cc8b5", + "blk.12.ffn_norm.weight": "e1ee95c6584b5cb57fcf1db8ce2bcc03aff91eb389238c094a61c00dde93d1f2", + "blk.12.ffn_up.weight": "2a826f06d7cdfb3edc6ae250ff44363ef77a2a9cdf96313e23a331b99ebfa17d", + "blk.12.post_attention_norm.weight": "4bafc7699b948d5cbc0d3e09b418b06c6abc4651a61ada9609d9a2f21c7e5607", + "blk.12.post_ffw_norm.weight": "bbb8c34a7176bb1a49f9fe2bacca0bd26b673d52c0835b2e90fa11f2962f077f", + "blk.13.attn_k.weight": "ffeefccfe8255d1b694382012ff4134eee5fec9d9491c8d0ff0a13832d1a37e8", + "blk.13.attn_norm.weight": "35713726529e3887c4135a88e86e8a4d7270ba5b9f2d1ab462622fbf40a7cdce", + "blk.13.attn_output.weight": "0d60b7c5cd71190a9ef4b873b0f516be15447c32d83914db2794b14592b0b460", + "blk.13.attn_q.weight": "8296069e65bef794cefc61257fc65789b3cb22955e30f3df129205e5041b2222", + "blk.13.attn_v.weight": "ca0f4ab9d16a748fc643a5c0c7a19826a811bf2a4e7316a8c935d4bf0ce8abc6", + "blk.13.ffn_down.weight": "d5514e0c8e7b3ed1cbcc1605eb5be1733b6ab3514cf8a0508fc72f7d05ed8bcb", + "blk.13.ffn_gate.weight": "8108e517a82e08a3aefbbd267bfa50a1668f92a76273280ce8a6bc1f6dd61521", + "blk.13.ffn_norm.weight": "5fcb6132d2134bf1f835b904a99820fa501dbc57d2224129f7098bf3cabc1d36", + "blk.13.ffn_up.weight": "6d744b7cd390a3cae3aa350dd379b81246acd056a2259996b6aaadece8465ccc", + "blk.13.post_attention_norm.weight": "e08b14698912509790e9575b8676971fbb0a4d82d719367e3756c0d0c4ab8cc0", + "blk.13.post_ffw_norm.weight": "2b196e4450fc5f1e7367b2cf7fe33a15fe919fbcdd861d11002346f16e980535", + "blk.14.attn_k.weight": "120e5f48d7268dfd9ab5f4bc9cc57a7cec63ea9635f56b80d435eb22936e9483", + "blk.14.attn_norm.weight": "146367bcce4db72cc894419a2e0145a6f533507dd68e4739c10ee480308c401f", + "blk.14.attn_output.weight": "720fa0165e756876c5cb6ad9e2780dd910390933f3f8849e5add5da04266650b", + "blk.14.attn_q.weight": "f5183466f56219ca1aca52d8b82c2d966a4198fea40fdd6b39f4d8b06ca2a6dd", + "blk.14.attn_v.weight": "24f8ea3d5512cd37c43c8329cb0da0c90d1895aef763ac2dcee3fe5157ec50a2", + "blk.14.ffn_down.weight": "e29960965b384ae5ab3d898a4dbaa8fddd28fa0e477ac28bcac49dec12a5ac67", + "blk.14.ffn_gate.weight": "6d0d6a74bfe9692e8f8eedff0fc34fc4fa1c8687794f35f2e2b033ab2d7510b8", + "blk.14.ffn_norm.weight": "f7036c1a9a71e046c9d2af16e9218fda5dbb0f7241ab44747abed1f0f9d602ca", + "blk.14.ffn_up.weight": "7d69ea1424007ffc9c12247dd0308c616e93ac02a59ec341cfa48f92d6ce3b10", + "blk.14.post_attention_norm.weight": "65b9712834d9445d4236bec362f3fb795c20d60c541b3dc6dbb7914d9b493e41", + "blk.14.post_ffw_norm.weight": "9c6a8da2e4e437d5cfdf3b9097e9f8b64bf07946a048badec20f4d374613f38f", + "blk.15.attn_k.weight": "864bc618303a0e4ee67fb1d5e751de61e936cd51e96669dd86f8cd08f2305045", + "blk.15.attn_norm.weight": "f9f4187da6eeadc2fc5921d8fe669741697d16c13d71e4aaeb73b82f50dc577e", + "blk.15.attn_output.weight": "ce2419a0b097036b2a31f2f4ad731d5814bcc2ef4c511786e24471e5eefd273b", + "blk.15.attn_q.weight": "9539db5a970d11ebe99722d1e13fcd635e250033630811efe583d2f97778e4a9", + "blk.15.attn_v.weight": "1c834b48ccd88adaeabb7d8bcb6be0bcd6d5ac1354ce88fc28f19a1a96b81ab3", + "blk.15.ffn_down.weight": "bc1f97a65dde6fa2c1e5397afb612266944b343f2eaa868b635ddd25829f8a42", + "blk.15.ffn_gate.weight": "1b14529d57056b79037f6cb5008132e62cc35992353b38dda59572274623103b", + "blk.15.ffn_norm.weight": "9af77458de9ee55c66f93865759f9c2c398557f94f3fa8fa6af30543d7339cde", + "blk.15.ffn_up.weight": "41d524a26b61a9595816b4fd53cf57ef50a702e4ef32933ff6136dca9136a267", + "blk.15.post_attention_norm.weight": "c60a03cd0e63a7db5c80015e58e9b97ba2208caa19f66a6fef5c4447eca900ce", + "blk.15.post_ffw_norm.weight": "34f7f9f96769215bbc3d17084df091864aef96a6645b7d0b3b7d9bd92f1a4b0b", + "blk.16.attn_k.weight": "7e27240d9f3a8c6cf0f4a980113d43234f514eadc3e3e1792b86efb29ffb1a6d", + "blk.16.attn_norm.weight": "af798acc0899282a30448edec48223b3e8efda177090273e612d8eca5e377301", + "blk.16.attn_output.weight": "79df39a3709d3d53e84146291e0944a7a653d06705293d9ccb5648dceadb432c", + "blk.16.attn_q.weight": "db58a1c3b83ad294804e5fd7321005719e200659173466df5a52a182b80b7165", + "blk.16.attn_v.weight": "2af6d48cbaeb225b5c1a704f76abd89c8ab1521417695b112b4dcc2cbd39b74d", + "blk.16.ffn_down.weight": "fc1c813eb5e7da3d6194569d6cb21602fc6eff2dc8e1b0eb753f2d5df148189c", + "blk.16.ffn_gate.weight": "7a80bcbc42464bd55df4814a6edbd7b5c153e0428323bbe49de55e2d2add33e7", + "blk.16.ffn_norm.weight": "2041685ee926d30f3f2ae4ec35b5688f1cd834167a6359a7d4057eac804c58b2", + "blk.16.ffn_up.weight": "8da4b718973ac1d43b928829bc45e062fd101984d6c98dd825bd7c5d08ebfbe3", + "blk.16.post_attention_norm.weight": "975c48fe680a6167438a106140a8872eee7765191f152d80e3b8ddf47693e095", + "blk.16.post_ffw_norm.weight": "4de2d4d483acfe4fc77860ea929025df2f4e15c10729413f36a18c94eaa6d689", + "blk.17.attn_k.weight": "f937e61f0af8c4cd98ee742648eb60e02e579683e21d421071295a3b70aebaad", + "blk.17.attn_norm.weight": "c3270583ed28b7e423f5b170c59113234f258169b93a867d9274f4c10b7cb115", + "blk.17.attn_output.weight": "b8c1150e81e685e539a5dcf2c19047a24eba2b281fabe166674b1d71ef4612ea", + "blk.17.attn_q.weight": "c255100ae2011e7dc7e3bf3bc3ccd96d859fbb98581cae993d7b82c1ba8e8b39", + "blk.17.attn_v.weight": "5830bb0a555984c6485348067f70b5d22ae337c011aa9248dac2ff4c95944551", + "blk.17.ffn_down.weight": "8ff9a7cccaa3776434a9d895aae4fb5c36c736bf2ec98784226b4c234940fbb0", + "blk.17.ffn_gate.weight": "1b52876739712831c272911533da206f407b46034a1a4ae8a88c1f96b6bd5747", + "blk.17.ffn_norm.weight": "d0e16ba5e87c91b545334e022058c7d03849665c3b1a6298771b656531366b66", + "blk.17.ffn_up.weight": "4dd6211d01dbebbe21052708eddc242b082a58b5f18ed16479e17987c1d3432e", + "blk.17.post_attention_norm.weight": "6f49c775c7417dade77ba8268a0f8441c1e5ec28b5d7e4dc5ed07a04d04600c8", + "blk.17.post_ffw_norm.weight": "b91a0bb2e6679e9c9be06ad323adae441d00a3d673efb19d7c4954be2aa84b27", + "blk.18.attn_k.weight": "22b565ace1b4da8b33865a58625be1d90beea9891f29686a69fa9cf7c93217db", + "blk.18.attn_norm.weight": "3e0160d7063c8753de65d2356a66648e47d921efdc5c917efb8209892120f8db", + "blk.18.attn_output.weight": "e3180f0bb4ca90b31e9b08158db38e332de62dfbaefe34aa94cc316409331e09", + "blk.18.attn_q.weight": "f3a5a83614c3ba7ea41cdd5b1b0819a241ee2a951a381ce4a9e001d3f700ed8f", + "blk.18.attn_v.weight": "f3350a5984fb951fc738adcf78147e6d812ff1c576670c460cafc99c253c1654", + "blk.18.ffn_down.weight": "9e9d09b13a33525e14bdaee6efc65c551ac7cf7680e534b940ab122a3a7c1ac9", + "blk.18.ffn_gate.weight": "ebaec8b4b578a2e8d815baac12f1675c208f80c68074d5a18288a2e1a60680ee", + "blk.18.ffn_norm.weight": "33e7687c53a242f2f8dc7093a491c97b18d4a5a8c14d183f02bd586a770f05aa", + "blk.18.ffn_up.weight": "78a1816662378ce56cc870e705174492781897b3afd2d4d97a51f10f2f2987c1", + "blk.18.post_attention_norm.weight": "a58dde3f12df3e94cbc27d87c8ea86f89af8a388a506446ff6758f05399b05fc", + "blk.18.post_ffw_norm.weight": "cebf90cc143577d483cca27b032dfd82031ee59bdf17c0e2cf60a0a3ad5bf996", + "blk.19.attn_k.weight": "4683375d0599ac9e2232196aae1e90af13a14cae26e865465de5c8e257bb2055", + "blk.19.attn_norm.weight": "f3eba936bfb1814bbcb0a1d62739eb66daac839df8c9c836fe0e94860df88525", + "blk.19.attn_output.weight": "51c0f01d38a9dcfe9bdbc4643576fab164c1d9e4b7168b7695c0ee55e6965667", + "blk.19.attn_q.weight": "28d15b69b8416f2e7ddc88fe381cb1e2ef2ad705fb1c268139ba96498cc74848", + "blk.19.attn_v.weight": "6860f1cd720638e63a981fa2c0b4db900129826bcb9823c9ddf9fb8b1b9f3383", + "blk.19.ffn_down.weight": "bc7f2d7827ee01c2dd41401c7b3b1700ad3a4ff620e8bb734f92630d342dcc7f", + "blk.19.ffn_gate.weight": "54d03ef69ba373fc410fbca8f1e34a565d58e4296d9a035ff7e48340b9c848e7", + "blk.19.ffn_norm.weight": "9178fc796a340ee6e8128ca74c0cb6203d1adbed6927af4e5ac7863da57affc7", + "blk.19.ffn_up.weight": "a77bd708026c6e83ad5c79c223278e74621bcf74a9641c7818d96b595daaad20", + "blk.19.post_attention_norm.weight": "ae94aa26f4c411bf9496a6fd4a6df64ee589ee1ae9a04b531d45acc95721e582", + "blk.19.post_ffw_norm.weight": "9ad210700edeef12133bdcff04bf1c7f62b49f6f4a9ba483c7cdc59857c24a5c", + "blk.20.attn_k.weight": "e35bce1e9f4a7a09ef34721f57ea38cfca68c272f52d923fe50af8308f66cfaa", + "blk.20.attn_norm.weight": "644800f6926fd34f233795c4dec1151a295d2138ca8cac33e3e48167d26f8b41", + "blk.20.attn_output.weight": "8d3758cd236471741e1ad66c0710cb79077dc8c7a3a292d35bc551c0c5abe627", + "blk.20.attn_q.weight": "c333b1f0f6f956b5d73891df10b1a0321e55fc31c40d623a24e1f52caa6a998b", + "blk.20.attn_v.weight": "8562b418d0c4868a050fb19fa3fcaf50a8cf1c669f537d666c80c7b3a04714e1", + "blk.20.ffn_down.weight": "97efb608ac44cc804198faec3ee66eafe56ced6b7ca5359700c6f1df75b7205e", + "blk.20.ffn_gate.weight": "5c61151d86f28415c73c73d90ec088c646cbe5c1640197caf58eb501ba7db293", + "blk.20.ffn_norm.weight": "24bbe0a701afd4bbeea65b3edde712b3cbb2281043bbc43dbf250582453116ed", + "blk.20.ffn_up.weight": "e170cf68e249566aa99eb6f6b265679bf9a5a6b76830ba24e7e130c2515910c4", + "blk.20.post_attention_norm.weight": "e092d751cfe20dbf2d348358f3b38397bd83e4ed94d6bbaa6bbaddcd902b2ac4", + "blk.20.post_ffw_norm.weight": "219a18a47dcba76e669e4322223a5a9227bd3db1de3fbd3d3cfb22e54a783c5a", + "blk.21.attn_k.weight": "c3a095ebddb42c63824f1c98da65263dc88e4d790a26aa1632840b44f5cc7cb1", + "blk.21.attn_norm.weight": "ef8bbaded5fbc45ad9cf3985ae02174524e7090fe6362811124f942ef643bec7", + "blk.21.attn_output.weight": "668f018aba72baac6252aa3ad58569ddd55ab751a0dd8d7bcc9fb9b6efb4bf53", + "blk.21.attn_q.weight": "e759c65663089f3bbbd51847934c185e680c82f1249065d5d487da638e519e6d", + "blk.21.attn_v.weight": "2ff57762686cf9ba1f5a6be76503454b97556ce67f4ac98254bd0562231197ba", + "blk.21.ffn_down.weight": "3fd106556fb721b1c28ae3f4026bc83eb1b08ed910f2ba5f466c6b5f327d91cb", + "blk.21.ffn_gate.weight": "338022d882f4b6619e8054a6fb909696fa3eef3013cf69b65c3cacdfc5b9e42c", + "blk.21.ffn_norm.weight": "1e77660c23a3f9653ee721a863d1960f773d87437cabc4dc0a6e17ee3d4e5e44", + "blk.21.ffn_up.weight": "7d31b20fbc2e6eba8f350f170069dc36f0cb12f68fbc4206ec5022a74085ebcb", + "blk.21.post_attention_norm.weight": "9638bae8d8bdcd7ed68da282979cd84a07c41ff9cabcaea94ebc846a1803db23", + "blk.21.post_ffw_norm.weight": "d622ef11115fe0cbe04b727d5a3b6371e7f39bf08c8d5eb9bc6da52e3f3cfb9d", + "blk.22.attn_k.weight": "5c321cb29deffbe57de200dd206a62005f1e80acb86c4fd2349dd44c8d3594fd", + "blk.22.attn_norm.weight": "198d949705d7170a331d75889d8c7500c3635254dac2cc6aa4dc35d556584536", + "blk.22.attn_output.weight": "19805cd5d7025b457e5d41d70db8b3fd63c2dd0e4a94d3ef1704d50ef4e749e8", + "blk.22.attn_q.weight": "177836cd583fc87405975ddc21ebfebdaa090a0363799664c72caa3da851ae2c", + "blk.22.attn_v.weight": "fea255692483e30d0108f9e4e250eb3ed7dbda8d83f499b06519b8c223ae6096", + "blk.22.ffn_down.weight": "00cb8939f03e5817d6d412de8cf2c923c9568d5493e382cec7faf5718fb034eb", + "blk.22.ffn_gate.weight": "b0591065b91281b2fbd8a9567f3568d40479f680e1f0a29e27ae213f37642489", + "blk.22.ffn_norm.weight": "96b5c5d0737c2ceb8fc869f54adb9e5f46e28cb7b177c40f49fa926b923c00f8", + "blk.22.ffn_up.weight": "81f472185b24344ab0594ea8246cc6e200e0dc1cab4943e74fbe4ca19d5a9701", + "blk.22.post_attention_norm.weight": "27fa9aa6260aa3071e0391e1a1d49322dcb6e8072315b8a9b7064087108dbd06", + "blk.22.post_ffw_norm.weight": "f37e1dcd7f643d9545675ffe9dc527a11eba86eb204989c2f44f636b266d896a", + "blk.23.attn_k.weight": "5d82f36658a56c3f94d0bb2d61f65509c966fa6568f81812e0d3e338b380ef8c", + "blk.23.attn_norm.weight": "b7983f88d9cad88bc88a528923e6da592ad20e699965b223ebc10840fe1f4fec", + "blk.23.attn_output.weight": "59f97f80f430d71606aab0158a195aed29ccd3405e6c0a5c41c809be8eb01898", + "blk.23.attn_q.weight": "53ac4789fe958919cc02ea4222bcd64c0ea1b4baa54304bff46635bdf42f7490", + "blk.23.attn_v.weight": "ec8abe09b9e84dbb52c7a068094657c6d3c62fe551ba8d7c3a3f23da622e9756", + "blk.23.ffn_down.weight": "3cf547eccb1b82aa64f208cee9682d7f558ca84e0aead7d9d3d1420d90f3d992", + "blk.23.ffn_gate.weight": "366aa2486d911ba81eb519119e13807deacf7e9908bc1975a2a63e00d6b10124", + "blk.23.ffn_norm.weight": "6d1d4a4af34bb7dc090ac87d6457d398c3e0fb68bd2e2b60b099dc318b6cfac3", + "blk.23.ffn_up.weight": "53f76692e253f5d2420b3f200c731b9f3b7a83e379920b4a067c729b4674aa4d", + "blk.23.post_attention_norm.weight": "7c952fa0efa76b3f048c8c4c9e8dcb5e3724d231327eda6423a34d3f3d3367de", + "blk.23.post_ffw_norm.weight": "7ab188cfe61f0a91b40309a0ab6bfa99f19d0ff2a37b6ac10e5f0c7f44eb5270", + "blk.24.attn_k.weight": "225798792f9bfdd10eff0505ebe61e0aad0209c17b431f6044ee7968ffe8c198", + "blk.24.attn_norm.weight": "635e3c1ebf5219bbebfc40ef164bc32d2b726ef595a94da64ac524ae878e2915", + "blk.24.attn_output.weight": "482f5bb2db8d9ed22b253d9a3296333b239efe698e5992e5d77e7e12dc2a5cf5", + "blk.24.attn_q.weight": "43805bbccddb65d58fffc4be9b5c374d4e1df1395ec1e1ffb4bcff03e98d5adb", + "blk.24.attn_v.weight": "fa741af54b4a3b1775d32f59134756090c5df2e7345a12a2d8db94fe289667a7", + "blk.24.ffn_down.weight": "83c6351e3162626b276f524a57836144625c2556dbe321b57cbd8fd486a68fab", + "blk.24.ffn_gate.weight": "fbe66be0d84d12cea5176cc7eaef64382ffc7324cd9d6266a3342dc43442f2ac", + "blk.24.ffn_norm.weight": "77c1445a8639ad24938bdf0280233eea2362d47391421833dfa72ec756dfc1e8", + "blk.24.ffn_up.weight": "78235ac729ee23c1cf1ae543751e3af32776d8808cee6e529c2a625a1f027654", + "blk.24.post_attention_norm.weight": "161f71b6d07628d43e4ae51a4c9088ec6ca2db123a17986a14505d83fdd04dad", + "blk.24.post_ffw_norm.weight": "cf1ba692aa683368b02ac413e69b2521b98c69a5274eacbb54165b53bf38a8b2", + "blk.25.attn_k.weight": "057a56bd8c8d2b41608d1f71faa3052902152ddf85e47669ad950c1c3e77c33f", + "blk.25.attn_norm.weight": "b7179fe02c334da556ddcf6c1b502245639a728c4cbba8b552d8e1df4565ee9d", + "blk.25.attn_output.weight": "4fed8b05b08a0ff75ffd022701bbeb52f17b23d09332a1ddcba737244bd0d3b0", + "blk.25.attn_q.weight": "c52e99f5d38bf7538d6106a0bbf38ac6dc6296bca9a3f849afa384ea67b4af01", + "blk.25.attn_v.weight": "c49c23d8e1cfa6a8eb971eb69942204890c6d7d830dc8774c84b108a80598912", + "blk.25.ffn_down.weight": "c08d4dc8412b19fdc870c164b83c341b236ec6fe7bb4a9bcfe0dc100faa20286", + "blk.25.ffn_gate.weight": "1a4cb3f36735d59181721471452807903006539e5e1b5ceb4f72d1d7ae134127", + "blk.25.ffn_norm.weight": "8fd6bd0dcec5198761525a36992a57c9ec5e9da60a22092839a84ae8c4e87f26", + "blk.25.ffn_up.weight": "3a00f39bdd5f31dc5e3b281d2002e1ac4f2475d49a0ac1d7720a25b377dcd04a", + "blk.25.post_attention_norm.weight": "e5f31a648612c859b6d21c9ee426e87a86cb1973dfdd86276c767371d9cef5ad", + "blk.25.post_ffw_norm.weight": "553c3bd774922c99c2384380a142d019881d30dbf0fe3bf9430dabfb3f6cbd33", + "output_norm.weight": "49445c4585ab0a8135717a0bdb1cda4a062a030177d0119561d91542aec5744b" +} diff --git a/convert/testdata/gemma-2-9b-it.json b/convert/testdata/gemma-2-9b-it.json new file mode 100644 index 000000000..90cdbee47 --- /dev/null +++ b/convert/testdata/gemma-2-9b-it.json @@ -0,0 +1,6 @@ +{ + "general.architecture": "gemma2", + "gemma2.attention.sliding_window": "4096", + "gemma2.attn_logit_softcapping": "50", + "gemma2.final_logit_softcapping": "30" +} diff --git a/convert/testdata/gemma-2b-it.json b/convert/testdata/gemma-2b-it.json new file mode 100644 index 000000000..0482f1e14 --- /dev/null +++ b/convert/testdata/gemma-2b-it.json @@ -0,0 +1,188 @@ +{ + "general.architecture": "gemma", + "general.file_type": "1", + "general.quantization_version": "2", + "gemma.block_count": "18", + "gemma.context_length": "8192", + "gemma.embedding_length": "2048", + "gemma.feed_forward_length": "16384", + "gemma.attention.head_count": "8", + "gemma.attention.head_count_kv": "1", + "gemma.attention.key_length": "256", + "gemma.attention.value_length": "256", + "gemma.attention.layer_norm_rms_epsilon": "1e-06", + "tokenizer.ggml.model": "llama", + "tokenizer.ggml.add_bos_token": "true", + "tokenizer.ggml.add_eos_token": "false", + "tokenizer.ggml.bos_token_id": "2", + "tokenizer.ggml.eos_token_id": "1", + "tokenizer.ggml.padding_token_id": "0", + "tokenizer.ggml.unknown_token_id": "3", + "tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8", + "tokenizer.ggml.token_type": "485e40bf3d715a4764818fc097d6a2a41db872d82ee714bc500872a3437ff48d", + "tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda", + "token_embd.weight": "17b87ab2c01c80657855a5413d0457b4a041afaeda0cc785080e44e2f04acf07", + "blk.0.attn_k.weight": "28ac0da05754ad2714ae95da28a5ad191192140b30b8fd22d108d4700c9d989f", + "blk.0.attn_norm.weight": "3f9d5675d1ab0eb8a816719dac9fab81f2e95c52be02c34263339acbc087febb", + "blk.0.attn_output.weight": "703295c2c63990ff896778685c678f145298886f680f3ed5dc2a7ad54c293265", + "blk.0.attn_q.weight": "69c2d0e4870e9d722a190d356203c9605575a16863466c3d1747966ef1cf5791", + "blk.0.attn_v.weight": "95219c9c07b5ffe9a9a01e456d845eef2b11f4fc12c93dbbba479db395444c13", + "blk.0.ffn_down.weight": "a2feb5eb3d572c57c5bafbf0ab506862df1160fe40965dcfe4b9fd855c08bed7", + "blk.0.ffn_gate.weight": "fcca072c445c31f4dc4d5dfaa785b1bdf7271342442099b74fd17268b5829fbf", + "blk.0.ffn_norm.weight": "7621f95dbd245cade6fffd6b08797d69d8e3954e960f0b5551b90d967ab95448", + "blk.0.ffn_up.weight": "14a9bcdd451403c67136391e1b6e53b3b1830f00199bd911dbcc56d8749c14f4", + "blk.1.attn_k.weight": "c70f73c5df20579cb44d971164b48b5f0d8d5abdb38b381e7a8b880ba12aa406", + "blk.1.attn_norm.weight": "88b6b91f93a1ef83425a7c7dc2a2fbd3b22704a04c64a80061df376ac8c33626", + "blk.1.attn_output.weight": "f031a537490c452be3b3bb51e6b7949a636405756e160976a1c070a792ea00ee", + "blk.1.attn_q.weight": "bdb23214b1cf9cfd30f863a0a5868e52c6809d93b7e8f44df096a94204d9896a", + "blk.1.attn_v.weight": "e9bbc0b05f2c872fb1403f8f938cd1612b502229ee401f12593b1164c61acc00", + "blk.1.ffn_down.weight": "5ff53811038b661a7b8f2bfdf213bebfb185ec1a6060b662f063714f33584d79", + "blk.1.ffn_gate.weight": "205085c8c951a5c7543b1495183cd96028fb49f67464b3e9862a2693a6077a33", + "blk.1.ffn_norm.weight": "798f354fc85afce9625f5d10093a585a966831698a0560e6c9b97ce659eb4b22", + "blk.1.ffn_up.weight": "db92dc5684cb6e90940e13f4d1da555ed20ba4f8cab1e990ddfd7553e2e91315", + "blk.2.attn_k.weight": "ef5ce360c4eed6d00d03ca4761e0f8e4b0af4509978468314be14f3d46621044", + "blk.2.attn_norm.weight": "6dadbc05dbd0d3fabb4216affa60a3de1378a82d2859dc90b338cbe70f50d455", + "blk.2.attn_output.weight": "6bbf87a966f691bbfd7c8d25629aa4e6710107bd431a667434861febb391edc5", + "blk.2.attn_q.weight": "4e575c09ae2de417ce9057ce8b073680e860a24aae13a472b68f101b760752e5", + "blk.2.attn_v.weight": "cd33f7f01141e9439afdaf2ea1aaced9feaa335e32a58daa136ebd555d4d96f4", + "blk.2.ffn_down.weight": "b970ff1b0b6494165defe2fbfa1d31425766ed71e64de9ec4e66ac3955c8bc5f", + "blk.2.ffn_gate.weight": "dbb3e1360402e0e369b101995bb686b73f95d4a7673f061be85d64d15dfb0061", + "blk.2.ffn_norm.weight": "bfb7980105d8ac9647710454f57a5cdac50598a0f6f4884e16f1d94b00844687", + "blk.2.ffn_up.weight": "50ef89339b275a438b664686f6227dd9b6e43853ed6856ec9e33ef4bbd90bda1", + "blk.3.attn_k.weight": "be942ea98151434eebcd2c1da4b00e0146152fe524a530689b1fd491cb833d21", + "blk.3.attn_norm.weight": "0df2f218daf609c289fb7c60c5f375fa99c0d4e04381ad5a494a19144edd8e20", + "blk.3.attn_output.weight": "c2184aaf86aa2cb8f47be49f60b165834e97205f39c6ee1dfd19fd4411a156ce", + "blk.3.attn_q.weight": "4f86e2a0a4221c1c84ff9c409ac89893cb95d7208cf65bf1e98e24e01125f991", + "blk.3.attn_v.weight": "abfdb8a60c349dadde641d1afc9542025e24fbf41a3238bfa9675e0b1f1e4b68", + "blk.3.ffn_down.weight": "58821a8d87008d47d122427911c6fad5272aca70c448bbae223256a74bacd07e", + "blk.3.ffn_gate.weight": "776e051f1a0ddd5c4934e69186683a75ca9a3c8c0f61911bba321fed1dd287d2", + "blk.3.ffn_norm.weight": "7f380f29335e28be90bfcfae6f6d69fdf5751211b36d2dd62aa5541ed113e4f2", + "blk.3.ffn_up.weight": "fc5ae8d488894cbd4951059675468d227da27871d26e925c9941863841c097ee", + "blk.4.attn_k.weight": "14833b078cc4c5137bdd5fdc0538047974ca147a99b0282e1b144440c78bc1db", + "blk.4.attn_norm.weight": "0a69957d4a15599fb80ad4753558020804925221457d9a5052926754d3768065", + "blk.4.attn_output.weight": "887a49b6130fb6297cf10767207c3dd97191b2cf63723449af9c27bca8dbeda0", + "blk.4.attn_q.weight": "51fd577b76764824dd6f0d4891c137ebe4736f591b5ca2793c5fff2be49abbde", + "blk.4.attn_v.weight": "1a623c43cf9c509d1b7ea0d1a5c04d0af4809665f9f9e93b7d6dba8c5df178fa", + "blk.4.ffn_down.weight": "5d61e8856d8941d2b1fd138116d015f63840d0fa1e31e20e20a5ceca1536ceec", + "blk.4.ffn_gate.weight": "06640f7273764f8ca5df7e386547417916b6cd7d565a8343153113239a94b0a1", + "blk.4.ffn_norm.weight": "91a6c6c41b894228e361435ecbc5058dca34d4911a23da5b56de219299c964d3", + "blk.4.ffn_up.weight": "d016dac1055e36d6a10b6317e57f98a904709ea892ef3194342f4d2f6326561e", + "blk.5.attn_k.weight": "987146afe124131500808cc0da33c06d207433656d41df6e6d8c99118a83bac5", + "blk.5.attn_norm.weight": "6b354938966f2608a2fb8d0f5b363ed0d8b0967c2ec8d0abd5c625b413042ded", + "blk.5.attn_output.weight": "cdcbfe02c6ff79d5326882b017a02099f5af71beedf6b1b3eb4de01e3a844536", + "blk.5.attn_q.weight": "b910d0cff781d3efb42eab0a302f46f286b2de717079175680d5b42bf8c309c8", + "blk.5.attn_v.weight": "66d3a279f747412f9f4b0e8abad44540c122ab2e811a7ee74c1f33bc36caade9", + "blk.5.ffn_down.weight": "c9b0efd2212981f16d956d8571f054b68780ad01f4917033647e359b557a4653", + "blk.5.ffn_gate.weight": "fe96b94109ca141c01f6a04788e20783019ca6ec334aa1f3134810bdb499e557", + "blk.5.ffn_norm.weight": "aa7b016e832e7055a36c6e20de58ea1936f995f390401fff1c5fc65906064e49", + "blk.5.ffn_up.weight": "555ce27c4873d3375394f38ad3b45e3d8848f9d5642dc1602383d0f0a33c2a14", + "blk.6.attn_k.weight": "88280d461db324c4f36475ce396793063e61a27283ec64511b0480890fb5b3b4", + "blk.6.attn_norm.weight": "af8f460c411f660d33196286d208f1845fd5a2b45f7b56549a4df31e7515447a", + "blk.6.attn_output.weight": "dd9996fb0a256e8375ad3917705258a33fce006bcea0f536caae420a77974d8b", + "blk.6.attn_q.weight": "7a4841541191e037cfb9b07930c4d8cab451809658b182f0ada6ccde9615c003", + "blk.6.attn_v.weight": "ae81e6a592b64d701a9d40233e986039a56cba8d8d24f61aea93c6393cf3078a", + "blk.6.ffn_down.weight": "622dd1ce1706355cbc659a8ab2c4509678ffe0f3ad34258e5e25ed2a5d951bcd", + "blk.6.ffn_gate.weight": "8389a735c0bd5591010f8ced9805a2a12c749f6df0d3c18ad4d05c2a302e7168", + "blk.6.ffn_norm.weight": "621f5346400382474d61358397bd58fb1459b07c53e376e4bca15e08b3f9b3fb", + "blk.6.ffn_up.weight": "8d834e4c42f13c251dfee36cf89e12f1bd400680d00d5c2e6cac0459e9ce2f7f", + "blk.7.attn_k.weight": "8bd0412de65a3e64901ef8fe6a28c95e116bf39dc9aa22f0126b9d36688e5ea7", + "blk.7.attn_norm.weight": "056d8e56be4e87d6dc6f900762f0dc6fde07bfdc50dd85bfc510415e2bba3f3d", + "blk.7.attn_output.weight": "27972eda51da53d416ff95aed78149a2c5a287b47d2cd46f2f544ca692ecb3bb", + "blk.7.attn_q.weight": "41eca977b9371f7932800c11a9c45b931310196919e2a0651b847703b180fc7f", + "blk.7.attn_v.weight": "13c74fd7e07f08883a09fb070a1fe5bbdd2341b4cb8d1cac07c4b637049b5774", + "blk.7.ffn_down.weight": "9e75db42468800849a9a7da603d0072c5e86c8ed2b4d8b20a312a51fb86a7a10", + "blk.7.ffn_gate.weight": "db6bdc3117f910088aaf7db51f2da63ea5bd933de36af5599c215bfb26f7db2b", + "blk.7.ffn_norm.weight": "48bb82b49bfc8679a1e77f282ee182d952db7a3c11be7ef9a102ee2ddd8011e2", + "blk.7.ffn_up.weight": "feebea87175817a0f3585ec0af09dc873d94c203581ae97a712eb356d3b49efe", + "blk.8.attn_k.weight": "d5640ad71b6af68d88e17bf8e7fc26c907d2262605457a84247dd9afc2884d69", + "blk.8.attn_norm.weight": "75b850c481a69083ae09d0207ba7317b37c735a39fcf5fef5400e6c84fb1257f", + "blk.8.attn_output.weight": "cbd669dbdea2bdd90f9f0cc97566b3dffff3c56cecb4f47290ceef30da83b2d6", + "blk.8.attn_q.weight": "9edcb63087a431bac361822497e6ecdaa06d9ea4a1a754e36da7ba9f8db81c7c", + "blk.8.attn_v.weight": "3fb72c2c4f95a83626aa3e30062f9450b09ab37c7871e229f18bbc5cf744633c", + "blk.8.ffn_down.weight": "bd69d2c9172974fff154441b237b4787fb53b2d185325442d5048130ef5bc4ef", + "blk.8.ffn_gate.weight": "d04689c80553edd011d1cbaa5d570fffa7fa91e88b66cf1352d89ab60b72f908", + "blk.8.ffn_norm.weight": "e49984183b735b7f2c4e4730c289eed9394056d2e283a00fd83ea0915df31a73", + "blk.8.ffn_up.weight": "8fe62a1ce8e847e567add6c6f6bf2922bc467495b5eb4c116b3cb85b85b3b211", + "blk.9.attn_k.weight": "d90904959e5004cf0d6e729c6bff18cc33c094798b802473c1ec55ab8d276183", + "blk.9.attn_norm.weight": "79277f290cc07411115d8fa138045edf4a17b3416ab2145409cbe8ab829fd4ee", + "blk.9.attn_output.weight": "5a21bf2e1f09a81405025f96d4153ffb630158e17269cff8ffff935c38ceb1a7", + "blk.9.attn_q.weight": "51b1d0febc3b350945be4504f55afa4347517bde0f710e1a4b88e6b17e71e7c7", + "blk.9.attn_v.weight": "aab7e1db0a8b50a03036356791ffce736ab010d15674c96eaef8049d80076054", + "blk.9.ffn_down.weight": "cbf43ec84becb40c9359a181ab0e641fd7faae7d34b549501f7cfb7afdc3d764", + "blk.9.ffn_gate.weight": "dce0e8661c778327bed7f03b6790d26710764188aed9dc746e6e05863891fa57", + "blk.9.ffn_norm.weight": "6d41642104f995c77bf31122b13237caebda3e7fcccb1367ce91db36b015e923", + "blk.9.ffn_up.weight": "82fe4c67bf24e7b2d6f6e05f7b1234c2bf90c3932951091a9066211b8e15ecbb", + "blk.10.attn_k.weight": "f6a9ed8fd8d3229b5d03175c413ffc56a07f2ce7236271986361dd3d8993f9aa", + "blk.10.attn_norm.weight": "cebbef89f0326ca8e02df3867a571e4d61c20c2a12f295f98ae590d62bc86010", + "blk.10.attn_output.weight": "34f5efb86accb4f06347d83a32558ea8eab3039d128969161a741ebacbb656ff", + "blk.10.attn_q.weight": "1e0efe27df2d5d50f7157253ba2cfd436d6781c3dc78ca176d0c16a210b5b763", + "blk.10.attn_v.weight": "8f085bf50a2b0f83cd6cdda3c8ef5a9e204a36348ed95871aac725d1f68640cf", + "blk.10.ffn_down.weight": "bf3b3cb4cace435809ac7b4cc933f20853af12f1f272d3dcefe7f19c0f203b8b", + "blk.10.ffn_gate.weight": "d3df7a1413b1c5adf1a1dcda9e5225a15c89874bae53bb6137ad1ea42fca2d34", + "blk.10.ffn_norm.weight": "a1da603b0480471b5ed8e862148cecd5fed918f8304d6933ab0bdb25b8d2fb8f", + "blk.10.ffn_up.weight": "bffbba605922e972dc47dda88a0b4659aa52236c76e5fe861a949e6d9a367492", + "blk.11.attn_k.weight": "9f31c63d66cd32c29b1eb8bb829d0c8525ce2ae936e0eefdaab6335a2d12a3df", + "blk.11.attn_norm.weight": "0bde1a266d8b2e8f202bb7e2e88b19147ca83021901f6d3cae77a4df5548c754", + "blk.11.attn_output.weight": "e10725c7cf746ed4a7e472cf7aea6cb564e5db6a1d5197adc980d650a387ccea", + "blk.11.attn_q.weight": "05ee758a7d065802630f8c65dca424364c1c8825e389aa33f9405c45e8a50cce", + "blk.11.attn_v.weight": "0c3ae7090f11775d24c51120db6e305db6aff706493e7ee123dcab74485ba789", + "blk.11.ffn_down.weight": "7ba40b8e12c09c5fb2006b77a771cb01ce894e88a3b3e1877f927a5b89c91709", + "blk.11.ffn_gate.weight": "db76388a023b98097972d354ba1c6a5e26efdeb1c596b9c28bf2cd8f6596975e", + "blk.11.ffn_norm.weight": "a38c3ae1b89a68ddc7b72c99c5b28be7fe3787c4fad9904d0c43d64eaf00c474", + "blk.11.ffn_up.weight": "13c8142f9cf1eddc658babf978daf3515c4ccc45f849f3e7e3930aa18a8480a0", + "blk.12.attn_k.weight": "f03241c36ac87cb57429a2ef22186b8d7d0b590a8b173beb01fa13d93772f3b1", + "blk.12.attn_norm.weight": "4568f654e6d65104d586e7c16ba960c83428698ce103022b7e0be15e2884e13b", + "blk.12.attn_output.weight": "04867603f82f91e41306e09b33ecda0104b3ee4834061f2c0bbdc8da33c72509", + "blk.12.attn_q.weight": "70fe04b9a8e08b6100cc8d6b58bf4cbbad15ca1de82d63baca5d352ba6c4cbae", + "blk.12.attn_v.weight": "15cb28db61a86c98687991d7e611bc92a1fcc6007f3432149cfb5fe518a4f65e", + "blk.12.ffn_down.weight": "6d10c790a4e3dc44c2dc36d96251ae97cdf30a4fa04d4c43e31bfbd038e6a7b7", + "blk.12.ffn_gate.weight": "3462a2d8f6b4743b25e24da51b90018ac2858d05ac7e582bcb69063cfdac1104", + "blk.12.ffn_norm.weight": "1f96392c1faa34e34ae5dea55a6a86c5aa4c79758952075d53d28de89dd88456", + "blk.12.ffn_up.weight": "d22eacc612a7411953d948483c5fb201e11722955ee0754da866e7bec578ac6d", + "blk.13.attn_k.weight": "5864977e6b733ea942647d6feed5c76156c48c200649c22e4e11b9e5860e57f3", + "blk.13.attn_norm.weight": "87e053535144723db4145aa5402acc54331b7696752d852bb9fc542ff33f0fb5", + "blk.13.attn_output.weight": "078145f5ad83f8b14f97a869346f7fd1583b24d1e3edadaa95d3da4242973f8f", + "blk.13.attn_q.weight": "3b8caf35504cbc4d1a7dd6e011a95760703b7f71e2218b030b1254f811362dd7", + "blk.13.attn_v.weight": "4fdf8365a603e043e5b40c4a21c84ac167f9be62794178f9d8a608dfe5653bf9", + "blk.13.ffn_down.weight": "a07d3abbfcacf48ba028df2cab895be32cc15022d23389a745286e79c1b1d1fd", + "blk.13.ffn_gate.weight": "1d2ab39666aa2909acc96787432a3ed13b19d25170f74665fadff9b17bbaffb1", + "blk.13.ffn_norm.weight": "4f2e809fda5f3eadf52578ee50e0ba36e53be91e55dce418c12dfe595f5f18e7", + "blk.13.ffn_up.weight": "8783d2720c2c37ca176a5801e0b3ef1f9cc9cf3ef1cd37af423aaf6b2a27e2bd", + "blk.14.attn_k.weight": "ce9428e2b55d43ae0c6690dbd56182f99adc427694ba8236b405cc8ea5035e86", + "blk.14.attn_norm.weight": "6abb35f9db8251d6ae954bda147c6ada2371b0574d11702e828f3c6ac99b7cc0", + "blk.14.attn_output.weight": "fe3880916d0ceb5bff672c88bbefb7060a545be609bf049beb2024b38221836d", + "blk.14.attn_q.weight": "7c8ad81be6f4a350931fd108b5f7c9e366e8c26ef62d1d85ffef5dca8fd893f8", + "blk.14.attn_v.weight": "e4bdedffacbebe38567a0734dfd67db90e911d9a9669fcde9a7c4ad8a0066c52", + "blk.14.ffn_down.weight": "ef6694dff1e05820aac0cd2b22f39ac7788b4967afc9250775575554c66aab2c", + "blk.14.ffn_gate.weight": "db63c4179e2db704bc505e2b4696e055b593e295a1b7c4c586fc793bdd5aab19", + "blk.14.ffn_norm.weight": "2796a62d832a9710148f95d533320492a33e712b2e5218659c548705bd11684d", + "blk.14.ffn_up.weight": "3f78c78d8c2d54df45f799d4ff902316628af296834afe4ceed63d4a324ff03e", + "blk.15.attn_k.weight": "6e810ee3859e07695645ee0c9a5efc7962668984a5f0a9325f47e462743b447c", + "blk.15.attn_norm.weight": "0956b576ae96db0b28cb09f761f801cfd9281432284664f0fe181c8d9c55d1ec", + "blk.15.attn_output.weight": "03a17f7e94208177aace5cc41b7f54670ba57873b7274ff6e23caf58cce110ca", + "blk.15.attn_q.weight": "b8edafe7d2216a6f8b4ae4905a906475490e6ea418f6e1d3cec563dbdc6fab91", + "blk.15.attn_v.weight": "f8ae8cae0f4cfa34a459824eba57350c3c248104ba5607e7d9dc7d7c39aaf4a6", + "blk.15.ffn_down.weight": "8d02eb439da852246d2ca67e9b7b6de0b090b80744355e64728a23e41926505b", + "blk.15.ffn_gate.weight": "ed5bf361c67db8731f186b775826f21c33bdb521111fd2d922539719a770239f", + "blk.15.ffn_norm.weight": "5942ca3c73209ac9a0c8bfd9b4aab7f7be7aee9aa12d9c35833493b44af76767", + "blk.15.ffn_up.weight": "f4bebf4ad99ec5f911327dec347be6c595814885309c7bc5647ce28c7f4d1cf5", + "blk.16.attn_k.weight": "756a534c19364448e0958b8948fe33891c6ccda0fbb4dfa2024e1f532a87804b", + "blk.16.attn_norm.weight": "386b7b9e4e6509f6af9c022d942b6c6c6cc136aeed8751ecb037c74d7c4bfb93", + "blk.16.attn_output.weight": "3ba1a766a25830b84d7c22178203635f9c5624caad290bc5e5d73da5d5e7a2ec", + "blk.16.attn_q.weight": "d39b0c91e1fda7685d50a0f7cc8d18c44b5bdc90a142c7fda0bc329cca1afa74", + "blk.16.attn_v.weight": "98b33fcb0ee3483cff1b06ecb44d7b7ffb4d34c268248e4d73dfdf82b2065b2f", + "blk.16.ffn_down.weight": "14006f5e4acb2f9416271ae562e299359cd2585739c7fc77ccbca54495563948", + "blk.16.ffn_gate.weight": "12f8abae2d301d8f88bedb6af98b1daecc7b0b8d05148594f931f30958d77aca", + "blk.16.ffn_norm.weight": "129a15a046ee96d06de288bd43c80f77a6b0fb3a159c7367154c6e4aaf362672", + "blk.16.ffn_up.weight": "b4a5911a45f3871ef1d4efb7dc7108645a564b70f818eccf45beebef2e844ee9", + "blk.17.attn_k.weight": "5e1bfcff0146ebdde3817b656952892eb671e14e75afc92fa53f84f8eecbec4c", + "blk.17.attn_norm.weight": "60bc988fab7c4b29ee9de599df41a8de00caa94fcd74677da011fac82f60f465", + "blk.17.attn_output.weight": "ba49b40d6a0b5685f749c24b0edbed3adc44dbe13b5d5e5fa1e56169fc746555", + "blk.17.attn_q.weight": "82bb415d24efcd14d03ace03f907bb70db6a204c76a0bdd1892e0fba165db87d", + "blk.17.attn_v.weight": "73dbe54beb91a899884e275ea81ffc5187a20cb7d5b68d5c299b783096999d94", + "blk.17.ffn_down.weight": "7c086166241e0664f8963fd1ca4ed74c737abfb2525ec20f8435821ff50158f3", + "blk.17.ffn_gate.weight": "51a32f78244d42a539f619c5ce661db9e6cf41636280a826d439b5444edcd28c", + "blk.17.ffn_norm.weight": "c4bb247fccd1ecc84875028af63dd20aaf5cbd17eb94a9bc36679c09285dccab", + "blk.17.ffn_up.weight": "b5886182790bc6fbadd63de9bc4ffee416f3b69a66280d197ab8c18edf769abf", + "output_norm.weight": "481f3097d0a20412e35b3a739b1b958487bcd41ff67744baa3c9acbddd2ee4d4" +} diff --git a/convert/tokenizer.go b/convert/tokenizer.go index fd6df5f53..14d6ba66c 100644 --- a/convert/tokenizer.go +++ b/convert/tokenizer.go @@ -1,10 +1,12 @@ package convert import ( - "cmp" "crypto/sha256" + "encoding/hex" "encoding/json" + "errors" "fmt" + "io/fs" "log/slog" "os" "slices" @@ -12,10 +14,152 @@ import ( "golang.org/x/exp/maps" ) +const ( + _ int32 = iota + tokenTypeNormal + tokenTypeUnknown + tokenTypeControl + tokenTypeUserDefined + tokenTypeUnused + tokenTypeByte +) + type Tokenizer struct { - Version string `json:"version"` - AddedTokens []Token `json:"added_tokens"` - Model TokenizerModel `json:"model"` + *Vocabulary + SpecialVocabulary []*SpecialVocabulary + Merges []string + + Pre string + Template string +} + +func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error) { + v, err := parseVocabulary(fsys) + if err != nil { + return nil, err + } + + t := &Tokenizer{ + Vocabulary: v, + Pre: "default", + } + + addedTokens := make(map[string]token) + if f, err := fsys.Open("tokenizer.json"); errors.Is(err, os.ErrNotExist) { + } else if err != nil { + return nil, err + } else { + defer f.Close() + + var tt tokenizer + if err := json.NewDecoder(f).Decode(&tt); err != nil { + return nil, err + } + + for _, t := range tt.AddedTokens { + addedTokens[t.Content] = t + } + + t.Merges = tt.Model.Merges + + sha256sum := sha256.New() + for _, pt := range tt.PreTokenizer.PreTokenizers { + switch pt.Type { + case "Split": + if pt.Pattern.Regex != "" { + // create a checksum of all Split pretokenizers which should be sufficient + // to identify the pretokenizer + sha256sum.Write([]byte(pt.Pattern.Regex)) + } + } + } + + switch digest := hex.EncodeToString(sha256sum.Sum(nil)); digest { + case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f": + t.Pre = "llama-bpe" + case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02": + t.Pre = "deepseek-llm" + case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e": + t.Pre = "deepseek-coder" + case "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855": + // noop, empty pretokenizer + default: + slog.Warn("unknown pretokenizer, using default", "digest", digest) + } + } + + if f, err := fsys.Open("tokenizer_config.json"); errors.Is(err, os.ErrNotExist) { + } else if err != nil { + return nil, err + } else { + defer f.Close() + + var p map[string]json.RawMessage + if err := json.NewDecoder(f).Decode(&p); err != nil { + return nil, err + } + + if template, ok := p["chat_template"]; ok { + var s []struct { + Name string `json:"name"` + Template string `json:"template"` + } + if err := json.Unmarshal(template, &t.Template); err == nil { + // noop + } else if err := json.Unmarshal(template, &s); err == nil { + for _, e := range s { + if e.Name == "default" { + t.Template = e.Template + break + } + } + } else { + return nil, fmt.Errorf("invalid chat_template: %w", err) + } + } + + for _, st := range specialTokenTypes { + sv := SpecialVocabulary{Type: st} + if bts, ok := p[fmt.Sprintf("add_%s_token", st)]; ok { + if err := json.Unmarshal(bts, &sv.AddToken); err != nil { + return nil, err + } + } + + if bts, ok := p[fmt.Sprintf("%s_token", st)]; ok { + var content string + if err := json.Unmarshal(bts, &content); err != nil { + var mm map[string]any + if err := json.Unmarshal(bts, &mm); err != nil { + continue + } + + content, ok = mm["content"].(string) + if !ok { + continue + } + } + + sv.Content = content + } + + if id, ok := addedTokens[sv.Content]; ok { + sv.ID = id.ID + t.SpecialVocabulary = append(t.SpecialVocabulary, &sv) + } + } + } + + return t, nil +} + +type tokenizer struct { + AddedTokens []token `json:"added_tokens"` + Model struct { + Type string `json:"type"` + Vocab map[string]int `json:"vocab"` + Merges []string `json:"merges"` + } `json:"model"` PreTokenizer struct { PreTokenizers []struct { @@ -27,80 +171,108 @@ type Tokenizer struct { } `json:"pre_tokenizer"` } -type TokenizerModel struct { - Type string `json:"type"` - Vocab map[string]int `json:"vocab"` - Merges []string `json:"merges"` - Tokens []Token -} - -type Token struct { +type token struct { ID int `json:"id"` Content string `json:"content"` Special bool `json:"special"` UserDefined bool } -func (t *Token) Type() int32 { - switch { - case t.Special: - return tokenTypeControl - case t.UserDefined: - return tokenTypeUserDefined - default: - return tokenTypeNormal - } +type Vocabulary struct { + Model string + Tokens []string + Scores []float32 + Types []int32 } -func (t *Tokenizer) maxID() int { - return max( - slices.Max(maps.Values(t.Model.Vocab)), - slices.MaxFunc(t.AddedTokens, func(a, b Token) int { - return cmp.Compare(a.ID, b.ID) - }).ID, - ) -} - -func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, err error) { - f, err := os.Open(dirpath) +func parseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) { + f, err := fsys.Open("tokenizer.json") if err != nil { - panic(err) + return nil, err } defer f.Close() - var t Tokenizer + var t tokenizer if err := json.NewDecoder(f).Decode(&t); err != nil { - return "", nil, nil, err + return nil, err } - tokens = make([]Token, t.maxID()+1) + tokens := make(map[int]token, len(t.Model.Vocab)) for k, v := range t.Model.Vocab { - tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false} - } - - for _, v := range t.AddedTokens { - v.UserDefined = true - tokens[v.ID] = v - } - - sha256sum := sha256.New() - for _, pt := range t.PreTokenizer.PreTokenizers { - if pt.Type == "Split" && pt.Pattern.Regex != "" { - sha256sum.Write([]byte(pt.Pattern.Regex)) + tokens[v] = token{ + ID: v, + Content: k, } } - switch digest := fmt.Sprintf("%x", sha256sum.Sum(nil)); digest { - case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f": - pre = "llama-bpe" - case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02": - pre = "deepseek-llm" - case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e": - pre = "deepseek-coder" - default: - slog.Warn("unknown pretokenizer, using default", "digest", digest) - pre = "default" + for _, token := range t.AddedTokens { + token.UserDefined = true + tokens[token.ID] = token } - return pre, tokens, t.Model.Merges, nil + keys := maps.Keys(tokens) + slices.Sort(keys) + + v := Vocabulary{Model: "gpt2"} + for _, k := range keys { + token := tokens[k] + v.Tokens = append(v.Tokens, token.Content) + v.Scores = append(v.Scores, float32(token.ID)) + + switch { + case token.Special: + v.Types = append(v.Types, tokenTypeControl) + case token.UserDefined: + v.Types = append(v.Types, tokenTypeUserDefined) + default: + v.Types = append(v.Types, tokenTypeNormal) + } + } + + return &v, nil +} + +func parseVocabulary(fsys fs.FS) (*Vocabulary, error) { + patterns := []struct { + Pattern string + Func func(fs.FS) (*Vocabulary, error) + }{ + {"tokenizer.model", parseSentencePiece}, + {"tokenizer.json", parseVocabularyFromTokenizer}, + } + + for _, pattern := range patterns { + if _, err := fs.Stat(fsys, pattern.Pattern); errors.Is(err, os.ErrNotExist) { + continue + } else if err != nil { + return nil, err + } + + return pattern.Func(fsys) + } + + return nil, errors.New("unknown tokenizer format") +} + +type SpecialVocabulary struct { + Type string + ID int + Content string + AddToken bool +} + +func (sv SpecialVocabulary) Key() string { + switch t := sv.Type; t { + case "bos", "eos", "cls", "mask": + return t + case "unk": + return "unknown" + case "sep": + //nolint:misspell // this is an upstream typo + return "seperator" + case "pad": + return "padding" + } + + panic("unknown special vocabulary type") } diff --git a/convert/tokenizer_spm.go b/convert/tokenizer_spm.go new file mode 100644 index 000000000..5e506087c --- /dev/null +++ b/convert/tokenizer_spm.go @@ -0,0 +1,113 @@ +package convert + +import ( + "cmp" + "encoding/json" + "errors" + "fmt" + "io/fs" + "os" + "slices" + + "google.golang.org/protobuf/proto" + + "github.com/ollama/ollama/convert/sentencepiece" +) + +func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) { + ast, err := parseAdditionalSpecialTokens(fsys) + if err != nil { + return nil, err + } + + bts, err := fs.ReadFile(fsys, "tokenizer.model") + if err != nil { + return nil, err + } + + var spm sentencepiece.ModelProto + if err := proto.Unmarshal(bts, &spm); err != nil { + return nil, err + } + + v := Vocabulary{Model: "llama"} + for _, piece := range spm.GetPieces() { + v.Tokens = append(v.Tokens, piece.GetPiece()) + v.Scores = append(v.Scores, piece.GetScore()) + + switch t := piece.GetType(); t { + case sentencepiece.ModelProto_SentencePiece_UNKNOWN, + sentencepiece.ModelProto_SentencePiece_CONTROL, + sentencepiece.ModelProto_SentencePiece_UNUSED, + sentencepiece.ModelProto_SentencePiece_BYTE: + v.Types = append(v.Types, int32(t)) + default: + tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL) + if slices.Contains(ast, piece.GetPiece()) { + tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL) + } + + v.Types = append(v.Types, tt) + } + } + + f, err := fsys.Open("added_tokens.json") + if errors.Is(err, os.ErrNotExist) { + return &v, nil + } else if err != nil { + return nil, err + } + defer f.Close() + + var atm map[string]int + if err := json.NewDecoder(f).Decode(&atm); err != nil { + return nil, err + } + + type t struct { + id int + content string + } + + var ts []t + for content, id := range atm { + ts = append(ts, t{id, content}) + } + + slices.SortFunc(ts, func(i, j t) int { + return cmp.Compare(i.id, j.id) + }) + + n := len(v.Tokens) + for i, t := range ts { + if t.id != i+n { + return nil, fmt.Errorf("invalid token id: %d", t.id) + } + + v.Tokens = append(v.Tokens, t.content) + v.Scores = append(v.Scores, -1000.0) + v.Types = append(v.Types, tokenTypeUserDefined) + } + + return &v, nil +} + +func parseAdditionalSpecialTokens(fsys fs.FS) ([]string, error) { + f, err := fsys.Open("special_tokens_map.json") + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } else if err != nil { + return nil, err + } + defer f.Close() + + var m struct { + AdditionalSpecialTokens []string `json:"additional_special_tokens"` + } + + if err := json.NewDecoder(f).Decode(&m); err != nil { + return nil, err + } + + return m.AdditionalSpecialTokens, nil +} diff --git a/convert/tokenizer_test.go b/convert/tokenizer_test.go new file mode 100644 index 000000000..d9550e095 --- /dev/null +++ b/convert/tokenizer_test.go @@ -0,0 +1,208 @@ +package convert + +import ( + "io" + "io/fs" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" +) + +func createTokenizerFS(t *testing.T, dir string, files map[string]io.Reader) fs.FS { + t.Helper() + + for k, v := range files { + if err := func() error { + f, err := os.Create(filepath.Join(dir, k)) + if err != nil { + return err + } + defer f.Close() + + if _, err := io.Copy(f, v); err != nil { + return err + } + + return nil + }(); err != nil { + t.Fatalf("unexpected error: %v", err) + } + } + + return os.DirFS(dir) +} + +func TestParseTokenizer(t *testing.T) { + cases := []struct { + name string + fsys fs.FS + specialTokenTypes []string + want *Tokenizer + }{ + { + name: "string chat template", + fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{ + "tokenizer.json": strings.NewReader(`{}`), + "tokenizer_config.json": strings.NewReader(`{ + "chat_template": "" + }`), + }), + want: &Tokenizer{ + Vocabulary: &Vocabulary{Model: "gpt2"}, + Pre: "default", + Template: "", + }, + }, + { + name: "list chat template", + fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{ + "tokenizer.json": strings.NewReader(`{}`), + "tokenizer_config.json": strings.NewReader(`{ + "chat_template": [ + { + "name": "default", + "template": "" + }, + { + "name": "tools", + "template": "" + } + ] + }`), + }), + want: &Tokenizer{ + Vocabulary: &Vocabulary{Model: "gpt2"}, + Pre: "default", + Template: "", + }, + }, + { + name: "added tokens", + fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{ + "tokenizer.json": strings.NewReader(`{ + "added_tokens": [ + { + "id": 999, + "content": "", + "special": false + } + ] + }`), + }), + want: &Tokenizer{ + Vocabulary: &Vocabulary{ + Model: "gpt2", + Tokens: []string{""}, + Scores: []float32{999}, + Types: []int32{4}, + }, + Pre: "default", + }, + }, + { + name: "added tokens overlap vocab", + fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{ + "tokenizer.json": strings.NewReader(`{ + "added_tokens": [ + { + "id": 0, + "content": "", + "special": true + } + ], + "model": { + "vocab": { + "": 0 + } + } + }`), + }), + want: &Tokenizer{ + Vocabulary: &Vocabulary{ + Model: "gpt2", + Tokens: []string{""}, + Scores: []float32{0}, + Types: []int32{3}, + }, + Pre: "default", + }, + }, + { + name: "special token types", + fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{ + "tokenizer.json": strings.NewReader(`{ + "added_tokens": [ + { + "id": 0, + "content": "", + "special": true + }, + { + "id": 1, + "content": "", + "special": true + }, + { + "id": 2, + "content": "", + "special": true + }, + { + "id": 3, + "content": "", + "special": true + } + ], + "model": { + "vocab": { + "": 0, + "": 1, + "": 2, + "": 3 + } + } + }`), + "tokenizer_config.json": strings.NewReader(`{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": "", + "eos_token": "", + "pad_token": "", + "unk_token": "" + }`), + }), + specialTokenTypes: []string{"pad", "eos", "bos", "unk"}, + want: &Tokenizer{ + Vocabulary: &Vocabulary{ + Model: "gpt2", + Tokens: []string{"", "", "", ""}, + Scores: []float32{0, 1, 2, 3}, + Types: []int32{3, 3, 3, 3}, + }, + SpecialVocabulary: []*SpecialVocabulary{ + {Type: "pad", Content: "", ID: 0, AddToken: false}, + {Type: "eos", Content: "", ID: 1, AddToken: false}, + {Type: "bos", Content: "", ID: 2, AddToken: true}, + {Type: "unk", Content: "", ID: 3, AddToken: false}, + }, + Pre: "default", + }, + }, + } + + for _, tt := range cases { + t.Run(tt.name, func(t *testing.T) { + tokenizer, err := parseTokenizer(tt.fsys, tt.specialTokenTypes) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if diff := cmp.Diff(tt.want, tokenizer); diff != "" { + t.Errorf("unexpected tokenizer (-want +got):\n%s", diff) + } + }) + } +} diff --git a/convert/torch.go b/convert/torch.go deleted file mode 100644 index 55414adc6..000000000 --- a/convert/torch.go +++ /dev/null @@ -1,287 +0,0 @@ -package convert - -import ( - "encoding/binary" - "encoding/json" - "fmt" - "io" - "log/slog" - "os" - "path/filepath" - "regexp" - "strings" - - "github.com/nlpodyssey/gopickle/pytorch" - "github.com/nlpodyssey/gopickle/types" - "github.com/x448/float16" - - "github.com/ollama/ollama/llm" -) - -type torchWriterTo struct { - t *llm.Tensor - - params *Params - bo ByteOrder - - storage pytorch.StorageInterface - repacker func(string, []float32, []uint64) ([]float32, error) -} - -type TorchFormat struct{} - -func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) { - slog.Debug("getting torch tensors") - - var files []string - if pt, _ := filepath.Glob(filepath.Join(dirpath, "consolidated*.pth")); len(pt) > 0 { - files = append(files, pt...) - } else if pt, _ := filepath.Glob(filepath.Join(dirpath, "pytorch_model*.pth")); len(pt) > 0 { - files = append(files, pt...) - } - - var offset uint64 - var tensors []llm.Tensor - for _, fn := range files { - m, err := pytorch.Load(fn) - if err != nil { - slog.Error(fmt.Sprintf("error unpickling: %q", err)) - return []llm.Tensor{}, err - } - - for _, k := range m.(*types.Dict).Keys() { - if strings.HasSuffix(k.(string), "self_attn.rotary_emb.inv_freq") { - continue - } - - t, _ := m.(*types.Dict).Get(k) - tshape := t.(*pytorch.Tensor).Size - - var size uint64 - var kind uint32 - switch len(tshape) { - case 0: - continue - case 1: - // convert to float32 - kind = 0 - size = uint64(tshape[0] * 4) - case 2: - // convert to float16 - kind = 1 - size = uint64(tshape[0] * tshape[1] * 2) - } - - ggufName, err := tf.GetLayerName(k.(string)) - if err != nil { - slog.Error(err.Error()) - return nil, err - } - slog.Debug(fmt.Sprintf("'%35s': '%30s' %10d [%#v]", k.(string), ggufName, size, tshape)) - - shape := []uint64{0, 0, 0, 0} - for i := range tshape { - shape[i] = uint64(tshape[i]) - } - - tensor := llm.Tensor{ - Name: ggufName, - Kind: kind, - Offset: offset, // calculate the offset - Shape: shape, - } - - tensor.WriterTo = torchWriterTo{ - t: &tensor, - params: params, - bo: params.ByteOrder, - storage: t.(*pytorch.Tensor).Source, - } - - tensors = append(tensors, tensor) - offset += size - } - } - - return tensors, nil -} - -func getAltParams(dirpath string) (*Params, error) { - f, err := os.Open(filepath.Join(dirpath, "params.json")) - if err != nil { - slog.Error("no params.json") - return nil, err - } - defer f.Close() - - type TorchParams struct { - HiddenSize int `json:"dim"` - AttentionHeads int `json:"n_heads"` - KeyValHeads int `json:"n_kv_heads"` - HiddenLayers int `json:"n_layers"` - RopeTheta float64 `json:"rope_theta"` - NormEPS float64 `json:"norm_eps"` - } - - var tparams TorchParams - - d := json.NewDecoder(f) - err = d.Decode(&tparams) - if err != nil { - return nil, err - } - - params := &Params{ - Architectures: []string{"LlamaForCausalLM"}, - HiddenSize: tparams.HiddenSize, - AttentionHeads: tparams.AttentionHeads, - KeyValHeads: tparams.KeyValHeads, - HiddenLayers: tparams.HiddenLayers, - NormEPS: tparams.NormEPS, - } - - switch { - case tparams.RopeTheta == 1000000: - // Codellama - params.ContextSize = 16384 - case tparams.NormEPS == 1e-06: - // llama2 - slog.Debug("Found llama2 - setting context size to 4096") - params.ContextSize = 4096 - default: - params.ContextSize = 2048 - } - - params.ByteOrder = binary.LittleEndian - return params, nil -} - -func (m *TorchFormat) GetParams(dirpath string) (*Params, error) { - f, err := os.Open(filepath.Join(dirpath, "config.json")) - if err != nil { - if os.IsNotExist(err) { - // try params.json instead - return getAltParams(dirpath) - } else { - return nil, err - } - } - - var params Params - d := json.NewDecoder(f) - err = d.Decode(¶ms) - if err != nil { - return nil, err - } - - params.ByteOrder = binary.LittleEndian - return ¶ms, nil -} - -func (m *TorchFormat) GetLayerName(n string) (string, error) { - directMap := map[string]string{ - "tok_embeddings.weight": "token_embd.weight", - "output.weight": "output.weight", - "norm.weight": "output_norm.weight", - "rope.freqs": "rope_freqs.weight", - "model.embed_tokens.weight": "token_embd.weight", - "lm_head.weight": "output.weight", - "model.norm.weight": "output_norm.weight", - } - - lMap := map[string]string{ - "layers.(\\d+).attention_norm.weight": "blk.$1.attn_norm.weight", - "layers.(\\d+).attention_output_norm.weight": "blk.$1.attn_norm.weight", - "layers.(\\d+).feed_forward.w2.weight": "blk.$1.ffn_down.weight", - "layers.(\\d+).feed_forward.w1.weight": "blk.$1.ffn_gate.weight", - "layers.(\\d+).feed_forward.w3.weight": "blk.$1.ffn_up.weight", - "layers.(\\d+).ffn_norm.weight": "blk.$1.ffn_norm.weight", - "layers.(\\d+).attention.wk.weight": "blk.$1.attn_k.weight", - "layers.(\\d+).attention.wo.weight": "blk.$1.attn_output.weight", - "layers.(\\d+).attention.wq.weight": "blk.$1.attn_q.weight", - "layers.(\\d+).attention.wv.weight": "blk.$1.attn_v.weight", - "model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight", - "model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight", - "model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight", - "model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight", - "model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight", - "model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight", - "model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight", - "model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight", - "model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight", - } - - v, ok := directMap[n] - if ok { - return v, nil - } - - // quick hack to rename the layers to gguf format - for k, v := range lMap { - re := regexp.MustCompile(k) - newName := re.ReplaceAllString(n, v) - if newName != n { - return newName, nil - } - } - - return "", fmt.Errorf("couldn't find a layer name for '%s'", n) -} - -func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) { - var f32s []float32 - switch s := r.storage.(type) { - case *pytorch.FloatStorage: - f32s = s.Data - case *pytorch.HalfStorage: - f32s = s.Data - case *pytorch.BFloat16Storage: - f32s = s.Data - default: - return 0, fmt.Errorf("unknown data type: %T", s) - } - - if r.repacker != nil { - f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape) - if err != nil { - return 0, err - } - } - - switch r.t.Kind { - case 0: - return 0, binary.Write(w, r.bo, f32s) - case 1: - f16s := make([]uint16, len(f32s)) - for i := range f32s { - f16s[i] = float16.Fromfloat32(f32s[i]).Bits() - } - - return 0, binary.Write(w, r.bo, f16s) - default: - return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind) - } -} - -func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) { - switch len(params.Architectures) { - case 0: - return nil, fmt.Errorf("No architecture specified to convert") - case 1: - switch params.Architectures[0] { - case "LlamaForCausalLM": - return &LlamaModel{ - ModelData{ - Name: name, - Path: dirPath, - Params: params, - Format: m, - }, - }, nil - default: - return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0]) - } - } - - return nil, fmt.Errorf("Unknown error") -} diff --git a/docs/api.md b/docs/api.md index c577bb1a5..95e79e007 100644 --- a/docs/api.md +++ b/docs/api.md @@ -40,6 +40,7 @@ Generate a response for a given prompt with a provided model. This is a streamin - `model`: (required) the [model name](#model-names) - `prompt`: the prompt to generate a response for +- `suffix`: the text after the model response - `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`) Advanced parameters (optional): @@ -57,7 +58,8 @@ Advanced parameters (optional): Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below. -> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace. +> [!IMPORTANT] +> It's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace. ### Examples @@ -67,7 +69,7 @@ Enable JSON mode by setting the `format` parameter to `json`. This will structur ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3", + "model": "llama3.1", "prompt": "Why is the sky blue?" }' ``` @@ -78,7 +80,7 @@ A stream of JSON objects is returned: ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T08:52:19.385406455-07:00", "response": "The", "done": false @@ -100,7 +102,7 @@ To calculate how fast the response is generated in tokens per second (token/s), ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T19:22:45.499127Z", "response": "", "done": true, @@ -122,7 +124,7 @@ A response can be received in one reply when streaming is off. ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3", + "model": "llama3.1", "prompt": "Why is the sky blue?", "stream": false }' @@ -134,7 +136,7 @@ If `stream` is set to `false`, the response will be a single JSON object: ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T19:22:45.499127Z", "response": "The sky is blue because it is the color of the sky.", "done": true, @@ -148,15 +150,51 @@ If `stream` is set to `false`, the response will be a single JSON object: } ``` +#### Request (with suffix) + +##### Request + +```shell +curl http://localhost:11434/api/generate -d '{ + "model": "codellama:code", + "prompt": "def compute_gcd(a, b):", + "suffix": " return result", + "options": { + "temperature": 0 + }, + "stream": false +}' +``` + +##### Response + +```json +{ + "model": "codellama:code", + "created_at": "2024-07-22T20:47:51.147561Z", + "response": "\n if a == 0:\n return b\n else:\n return compute_gcd(b % a, a)\n\ndef compute_lcm(a, b):\n result = (a * b) / compute_gcd(a, b)\n", + "done": true, + "done_reason": "stop", + "context": [...], + "total_duration": 1162761250, + "load_duration": 6683708, + "prompt_eval_count": 17, + "prompt_eval_duration": 201222000, + "eval_count": 63, + "eval_duration": 953997000 +} +``` + #### Request (JSON mode) +> [!IMPORTANT] > When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON. ##### Request ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3", + "model": "llama3.1", "prompt": "What color is the sky at different times of the day? Respond using JSON", "format": "json", "stream": false @@ -167,7 +205,7 @@ curl http://localhost:11434/api/generate -d '{ ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-11-09T21:07:55.186497Z", "response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n", "done": true, @@ -289,7 +327,7 @@ If you want to set custom options for the model at runtime rather than in the Mo ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3", + "model": "llama3.1", "prompt": "Why is the sky blue?", "stream": false, "options": { @@ -298,6 +336,7 @@ curl http://localhost:11434/api/generate -d '{ "num_predict": 100, "top_k": 20, "top_p": 0.9, + "min_p": 0.0, "tfs_z": 0.5, "typical_p": 0.7, "repeat_last_n": 33, @@ -329,7 +368,7 @@ curl http://localhost:11434/api/generate -d '{ ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T19:22:45.499127Z", "response": "The sky is blue because it is the color of the sky.", "done": true, @@ -351,7 +390,7 @@ If an empty prompt is provided, the model will be loaded into memory. ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3" + "model": "llama3.1" }' ``` @@ -361,13 +400,40 @@ A single JSON object is returned: ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-12-18T19:52:07.071755Z", "response": "", "done": true } ``` +#### Unload a model + +If an empty prompt is provided and the `keep_alive` parameter is set to `0`, a model will be unloaded from memory. + +##### Request + +```shell +curl http://localhost:11434/api/generate -d '{ + "model": "llama3.1", + "keep_alive": 0 +}' +``` + +##### Response + +A single JSON object is returned: + +```json +{ + "model": "llama3.1", + "created_at": "2024-09-12T03:54:03.516566Z", + "response": "", + "done": true, + "done_reason": "unload" +} +``` + ## Generate a chat completion ```shell @@ -380,12 +446,14 @@ Generate the next message in a chat with a provided model. This is a streaming e - `model`: (required) the [model name](#model-names) - `messages`: the messages of the chat, this can be used to keep a chat memory +- `tools`: tools for the model to use if supported. Requires `stream` to be set to `false` The `message` object has the following fields: -- `role`: the role of the message, either `system`, `user` or `assistant` +- `role`: the role of the message, either `system`, `user`, `assistant`, or `tool` - `content`: the content of the message - `images` (optional): a list of images to include in the message (for multimodal models such as `llava`) +- `tool_calls` (optional): a list of tools the model wants to use Advanced parameters (optional): @@ -404,7 +472,7 @@ Send a chat message with a streaming response. ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama3", + "model": "llama3.1", "messages": [ { "role": "user", @@ -420,7 +488,7 @@ A stream of JSON objects is returned: ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T08:52:19.385406455-07:00", "message": { "role": "assistant", @@ -435,7 +503,7 @@ Final response: ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T19:22:45.499127Z", "done": true, "total_duration": 4883583458, @@ -453,7 +521,7 @@ Final response: ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama3", + "model": "llama3.1", "messages": [ { "role": "user", @@ -468,7 +536,7 @@ curl http://localhost:11434/api/chat -d '{ ```json { - "model": "registry.ollama.ai/library/llama3:latest", + "model": "llama3.1", "created_at": "2023-12-12T14:13:43.416799Z", "message": { "role": "assistant", @@ -492,7 +560,7 @@ Send a chat message with a conversation history. You can use this same approach ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama3", + "model": "llama3.1", "messages": [ { "role": "user", @@ -516,7 +584,7 @@ A stream of JSON objects is returned: ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T08:52:19.385406455-07:00", "message": { "role": "assistant", @@ -530,7 +598,7 @@ Final response: ```json { - "model": "llama3", + "model": "llama3.1", "created_at": "2023-08-04T19:22:45.499127Z", "done": true, "total_duration": 8113331500, @@ -546,7 +614,7 @@ Final response: ##### Request -Send a chat message with a conversation history. +Send a chat message with images. The images should be provided as an array, with the individual images encoded in Base64. ```shell curl http://localhost:11434/api/chat -d '{ @@ -588,7 +656,7 @@ curl http://localhost:11434/api/chat -d '{ ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama3", + "model": "llama3.1", "messages": [ { "role": "user", @@ -606,7 +674,7 @@ curl http://localhost:11434/api/chat -d '{ ```json { - "model": "registry.ollama.ai/library/llama3:latest", + "model": "llama3.1", "created_at": "2023-12-12T14:13:43.416799Z", "message": { "role": "assistant", @@ -622,6 +690,137 @@ curl http://localhost:11434/api/chat -d '{ } ``` +#### Chat request (with tools) + +##### Request + +``` +curl http://localhost:11434/api/chat -d '{ + "model": "llama3.1", + "messages": [ + { + "role": "user", + "content": "What is the weather today in Paris?" + } + ], + "stream": false, + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The location to get the weather for, e.g. San Francisco, CA" + }, + "format": { + "type": "string", + "description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'", + "enum": ["celsius", "fahrenheit"] + } + }, + "required": ["location", "format"] + } + } + } + ] +}' +``` + +##### Response + +```json +{ + "model": "llama3.1", + "created_at": "2024-07-22T20:33:28.123648Z", + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "function": { + "name": "get_current_weather", + "arguments": { + "format": "celsius", + "location": "Paris, FR" + } + } + } + ] + }, + "done_reason": "stop", + "done": true, + "total_duration": 885095291, + "load_duration": 3753500, + "prompt_eval_count": 122, + "prompt_eval_duration": 328493000, + "eval_count": 33, + "eval_duration": 552222000 +} +``` + +#### Load a model + +If the messages array is empty, the model will be loaded into memory. + +##### Request + +``` +curl http://localhost:11434/api/chat -d '{ + "model": "llama3.1", + "messages": [] +}' +``` + +##### Response +```json +{ + "model": "llama3.1", + "created_at":"2024-09-12T21:17:29.110811Z", + "message": { + "role": "assistant", + "content": "" + }, + "done_reason": "load", + "done": true +} +``` + +#### Unload a model + +If the messages array is empty and the `keep_alive` parameter is set to `0`, a model will be unloaded from memory. + +##### Request + +``` +curl http://localhost:11434/api/chat -d '{ + "model": "llama3.1", + "messages": [], + "keep_alive": 0 +}' +``` + +##### Response + +A single JSON object is returned: + +```json +{ + "model": "llama3.1", + "created_at":"2024-09-12T21:33:17.547535Z", + "message": { + "role": "assistant", + "content": "" + }, + "done_reason": "unload", + "done": true +} +``` + ## Create a Model ```shell @@ -790,7 +989,7 @@ Show information about a model including details, modelfile, template, parameter ```shell curl http://localhost:11434/api/show -d '{ - "name": "llama3" + "name": "llama3.1" }' ``` @@ -851,7 +1050,7 @@ Copy a model. Creates a model with another name from an existing model. ```shell curl http://localhost:11434/api/copy -d '{ - "source": "llama3", + "source": "llama3.1", "destination": "llama3-backup" }' ``` @@ -906,7 +1105,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where ```shell curl http://localhost:11434/api/pull -d '{ - "name": "llama3" + "name": "llama3.1" }' ``` @@ -1026,7 +1225,7 @@ If `stream` is set to `false`, then the response is a single JSON object: ## Generate Embeddings ```shell -POST /api/embeddings +POST /api/embed ``` Generate embeddings from a model @@ -1034,10 +1233,11 @@ Generate embeddings from a model ### Parameters - `model`: name of model to generate embeddings from -- `prompt`: text to generate embeddings for +- `input`: text or list of text to generate embeddings for Advanced parameters: +- `truncate`: truncates the end of each input to fit within context length. Returns error if `false` and context length is exceeded. Defaults to `true` - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature` - `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`) @@ -1046,9 +1246,9 @@ Advanced parameters: #### Request ```shell -curl http://localhost:11434/api/embeddings -d '{ +curl http://localhost:11434/api/embed -d '{ "model": "all-minilm", - "prompt": "Here is an article about llamas..." + "input": "Why is the sky blue?" }' ``` @@ -1056,10 +1256,38 @@ curl http://localhost:11434/api/embeddings -d '{ ```json { - "embedding": [ - 0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313, - 0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281 - ] + "model": "all-minilm", + "embeddings": [[ + 0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814, + 0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348 + ]], + "total_duration": 14143917, + "load_duration": 1019500, + "prompt_eval_count": 8 +} +``` + +#### Request (Multiple input) + +```shell +curl http://localhost:11434/api/embed -d '{ + "model": "all-minilm", + "input": ["Why is the sky blue?", "Why is the grass green?"] +}' +``` + +#### Response + +```json +{ + "model": "all-minilm", + "embeddings": [[ + 0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814, + 0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348 + ],[ + -0.0098027075, 0.06042469, 0.025257962, -0.006364387, 0.07272725, + 0.017194884, 0.09032035, -0.051705178, 0.09951512, 0.09072481 + ]] } ``` @@ -1106,3 +1334,45 @@ A single JSON object will be returned. ] } ``` + +## Generate Embedding + +> Note: this endpoint has been superseded by `/api/embed` + +```shell +POST /api/embeddings +``` + +Generate embeddings from a model + +### Parameters + +- `model`: name of model to generate embeddings from +- `prompt`: text to generate embeddings for + +Advanced parameters: + +- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature` +- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`) + +### Examples + +#### Request + +```shell +curl http://localhost:11434/api/embeddings -d '{ + "model": "all-minilm", + "prompt": "Here is an article about llamas..." +}' +``` + +#### Response + +```json +{ + "embedding": [ + 0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313, + 0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281 + ] +} +``` diff --git a/docs/development.md b/docs/development.md index 2a6886a43..e67689abc 100644 --- a/docs/development.md +++ b/docs/development.md @@ -104,7 +104,7 @@ like to use. For example, to compile an optimized binary for an Intel i9-9880H, you might use: ``` -OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go generate ./... +OLLAMA_CUSTOM_CPU_DEFS="-DGGML_AVX=on -DGGML_AVX2=on -DGGML_F16C=on -DGGML_FMA=on" go generate ./... go build . ``` @@ -148,3 +148,22 @@ In addition to the common Windows development tools described above, install AMD - [Strawberry Perl](https://strawberryperl.com/) Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`). + +#### Windows arm64 + +The default `Developer PowerShell for VS 2022` may default to x86 which is not what you want. To ensure you get an arm64 development environment, start a plain PowerShell terminal and run: + +```powershell +import-module 'C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\Common7\\Tools\\Microsoft.VisualStudio.DevShell.dll' +Enter-VsDevShell -Arch arm64 -vsinstallpath 'C:\\Program Files\\Microsoft Visual Studio\\2022\\Community' -skipautomaticlocation +``` + +You can confirm with `write-host $env:VSCMD_ARG_TGT_ARCH` + +Follow the instructions at https://www.msys2.org/wiki/arm64/ to set up an arm64 msys2 environment. Ollama requires gcc and mingw32-make to compile, which is not currently available on Windows arm64, but a gcc compatibility adapter is available via `mingw-w64-clang-aarch64-gcc-compat`. At a minimum you will need to install the following: + +``` +pacman -S mingw-w64-clang-aarch64-clang mingw-w64-clang-aarch64-gcc-compat mingw-w64-clang-aarch64-make make +``` + +You will need to ensure your PATH includes go, cmake, gcc and clang mingw32-make to build ollama from source. (typically `C:\msys64\clangarm64\bin\`) \ No newline at end of file diff --git a/docs/docker.md b/docs/docker.md index 0b58562b7..314666b26 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -1,71 +1,71 @@ -# Ollama Docker image - -### CPU only - -```bash -docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama -``` - -### Nvidia GPU -Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation). - -#### Install with Apt -1. Configure the repository -```bash -curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \ - | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg -curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \ - | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \ - | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list -sudo apt-get update -``` -2. Install the NVIDIA Container Toolkit packages -```bash -sudo apt-get install -y nvidia-container-toolkit -``` - -#### Install with Yum or Dnf -1. Configure the repository - -```bash -curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \ - | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo -``` - -2. Install the NVIDIA Container Toolkit packages - -```bash -sudo yum install -y nvidia-container-toolkit -``` - -#### Configure Docker to use Nvidia driver -``` -sudo nvidia-ctk runtime configure --runtime=docker -sudo systemctl restart docker -``` - -#### Start the container - -```bash -docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama -``` - -### AMD GPU - -To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command: - -``` -docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm -``` - -### Run model locally - -Now you can run a model: - -``` -docker exec -it ollama ollama run llama3 -``` - -### Try different models - -More models can be found on the [Ollama library](https://ollama.com/library). +# Ollama Docker image + +### CPU only + +```bash +docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama +``` + +### Nvidia GPU +Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation). + +#### Install with Apt +1. Configure the repository +```bash +curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \ + | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg +curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \ + | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \ + | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list +sudo apt-get update +``` +2. Install the NVIDIA Container Toolkit packages +```bash +sudo apt-get install -y nvidia-container-toolkit +``` + +#### Install with Yum or Dnf +1. Configure the repository + +```bash +curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \ + | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo +``` + +2. Install the NVIDIA Container Toolkit packages + +```bash +sudo yum install -y nvidia-container-toolkit +``` + +#### Configure Docker to use Nvidia driver +``` +sudo nvidia-ctk runtime configure --runtime=docker +sudo systemctl restart docker +``` + +#### Start the container + +```bash +docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama +``` + +### AMD GPU + +To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command: + +``` +docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm +``` + +### Run model locally + +Now you can run a model: + +``` +docker exec -it ollama ollama run llama3.1 +``` + +### Try different models + +More models can be found on the [Ollama library](https://ollama.com/library). diff --git a/docs/faq.md b/docs/faq.md index 841f1d13d..b2b1ca304 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -32,7 +32,7 @@ When using the API, specify the `num_ctx` parameter: ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3", + "model": "llama3.1", "prompt": "Why is the sky blue?", "options": { "num_ctx": 4096 @@ -111,7 +111,10 @@ On Windows, Ollama inherits your user and system environment variables. ## How do I use Ollama behind a proxy? -Ollama is compatible with proxy servers if `HTTP_PROXY` or `HTTPS_PROXY` are configured. When using either variables, ensure it is set where `ollama serve` can access the values. When using `HTTPS_PROXY`, ensure the proxy certificate is installed as a system certificate. Refer to the section above for how to use environment variables on your platform. +Ollama pulls models from the Internet and may require a proxy server to access the models. Use `HTTPS_PROXY` to redirect outbound requests through the proxy. Ensure the proxy certificate is installed as a system certificate. Refer to the section above for how to use environment variables on your platform. + +> [!NOTE] +> Avoid setting `HTTP_PROXY`. Ollama does not use HTTP for model pulls, only HTTPS. Setting `HTTP_PROXY` may interrupt client connections to the server. ### How do I use Ollama behind a proxy in Docker? @@ -191,6 +194,8 @@ Refer to the section [above](#how-do-i-configure-ollama-server) for how to set e If a different directory needs to be used, set the environment variable `OLLAMA_MODELS` to the chosen directory. +> Note: on Linux using the standard installer, the `ollama` user needs read and write access to the specified directory. To assign the directory to the `ollama` user run `sudo chown -R ollama:ollama `. + Refer to the section [above](#how-do-i-configure-ollama-server) for how to set environment variables on your platform. ## How can I use Ollama in Visual Studio Code? @@ -227,14 +232,18 @@ curl http://localhost:11434/api/chat -d '{"model": "mistral"}' To preload a model using the CLI, use the command: ```shell -ollama run llama3 "" +ollama run llama3.1 "" ``` ## How do I keep a model loaded in memory or make it unload immediately? -By default models are kept in memory for 5 minutes before being unloaded. This allows for quicker response times if you are making numerous requests to the LLM. You may, however, want to free up the memory before the 5 minutes have elapsed or keep the model loaded indefinitely. Use the `keep_alive` parameter with either the `/api/generate` and `/api/chat` API endpoints to control how long the model is left in memory. +By default models are kept in memory for 5 minutes before being unloaded. This allows for quicker response times if you're making numerous requests to the LLM. If you want to immediately unload a model from memory, use the `ollama stop` command: -The `keep_alive` parameter can be set to: +```shell +ollama stop llama3.1 +``` + +If you're using the API, use the `keep_alive` parameter with the `/api/generate` and `/api/chat` endpoints to set the amount of time that a model stays in memory. The `keep_alive` parameter can be set to: * a duration string (such as "10m" or "24h") * a number in seconds (such as 3600) * any negative number which will keep the model loaded in memory (e.g. -1 or "-1m") @@ -242,17 +251,17 @@ The `keep_alive` parameter can be set to: For example, to preload a model and leave it in memory use: ```shell -curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": -1}' +curl http://localhost:11434/api/generate -d '{"model": "llama3.1", "keep_alive": -1}' ``` To unload the model and free up memory use: ```shell -curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": 0}' +curl http://localhost:11434/api/generate -d '{"model": "llama3.1", "keep_alive": 0}' ``` -Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable. +Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to the section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable. -If you wish to override the `OLLAMA_KEEP_ALIVE` setting, use the `keep_alive` API parameter with the `/api/generate` or `/api/chat` API endpoints. +The `keep_alive` API parameter with the `/api/generate` and `/api/chat` API endpoints will override the `OLLAMA_KEEP_ALIVE` setting. ## How do I manage the maximum number of requests the Ollama server can queue? @@ -266,8 +275,14 @@ If there is insufficient available memory to load a new model request while one Parallel request processing for a given model results in increasing the context size by the number of parallel requests. For example, a 2K context with 4 parallel requests will result in an 8K context and additional memory allocation. -The following server settings may be used to adjust how Ollama handles concurrent requests: +The following server settings may be used to adjust how Ollama handles concurrent requests on most platforms: - `OLLAMA_MAX_LOADED_MODELS` - The maximum number of models that can be loaded concurrently provided they fit in available memory. The default is 3 * the number of GPUs or 3 for CPU inference. - `OLLAMA_NUM_PARALLEL` - The maximum number of parallel requests each model will process at the same time. The default will auto-select either 4 or 1 based on available memory. - `OLLAMA_MAX_QUEUE` - The maximum number of requests Ollama will queue when busy before rejecting additional requests. The default is 512 + +Note: Windows with Radeon GPUs currently default to 1 model maximum due to limitations in ROCm v5.7 for available VRAM reporting. Once ROCm v6.2 is available, Windows Radeon will follow the defaults above. You may enable concurrent model loads on Radeon on Windows, but ensure you don't load more models than will fit into your GPUs VRAM. + +## How does Ollama load models on multiple GPUs? + +Installing multiple GPUs of the same brand can be a great way to increase your available VRAM to load larger models. When you load a new model, Ollama evaluates the required VRAM for the model against what is currently available. If the model will entirely fit on any single GPU, Ollama will load the model on that GPU. This typically provides the best performance as it reduces the amount of data transfering across the PCI bus during inference. If the model does not fit entirely on one GPU, then it will be spread across all the available GPUs. diff --git a/docs/gpu.md b/docs/gpu.md index 80f276c3b..2913a2e27 100644 --- a/docs/gpu.md +++ b/docs/gpu.md @@ -10,7 +10,7 @@ Check your compute compatibility to see if your card is supported: | 9.0 | NVIDIA | `H100` | | 8.9 | GeForce RTX 40xx | `RTX 4090` `RTX 4080 SUPER` `RTX 4080` `RTX 4070 Ti SUPER` `RTX 4070 Ti` `RTX 4070 SUPER` `RTX 4070` `RTX 4060 Ti` `RTX 4060` | | | NVIDIA Professional | `L4` `L40` `RTX 6000` | -| 8.6 | GeForce RTX 30xx | `RTX 3090 Ti` `RTX 3090` `RTX 3080 Ti` `RTX 3080` `RTX 3070 Ti` `RTX 3070` `RTX 3060 Ti` `RTX 3060` | +| 8.6 | GeForce RTX 30xx | `RTX 3090 Ti` `RTX 3090` `RTX 3080 Ti` `RTX 3080` `RTX 3070 Ti` `RTX 3070` `RTX 3060 Ti` `RTX 3060` `RTX 3050 Ti` `RTX 3050` | | | NVIDIA Professional | `A40` `RTX A6000` `RTX A5000` `RTX A4000` `RTX A3000` `RTX A2000` `A10` `A16` `A2` | | 8.0 | NVIDIA | `A100` `A30` | | 7.5 | GeForce GTX/RTX | `GTX 1650 Ti` `TITAN RTX` `RTX 2080 Ti` `RTX 2080` `RTX 2070` `RTX 2060` | @@ -46,13 +46,24 @@ sudo modprobe nvidia_uvm` ## AMD Radeon Ollama supports the following AMD GPUs: + +### Linux Support | Family | Cards and accelerators | | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | | AMD Radeon RX | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800` `Vega 64` `Vega 56` | | AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` `V420` `V340` `V320` `Vega II Duo` `Vega II` `VII` `SSG` | | AMD Instinct | `MI300X` `MI300A` `MI300` `MI250X` `MI250` `MI210` `MI200` `MI100` `MI60` `MI50` | -### Overrides +### Windows Support +With ROCm v6.1, the following GPUs are supported on Windows. + +| Family | Cards and accelerators | +| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | +| AMD Radeon RX | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800` | +| AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` | + + +### Overrides on Linux Ollama leverages the AMD ROCm library, which does not support all AMD GPUs. In some cases you can force the system to try to use a similar LLVM target that is close. For example The Radeon RX 5400 is `gfx1034` (also known as 10.3.4) @@ -63,7 +74,7 @@ would set `HSA_OVERRIDE_GFX_VERSION="10.3.0"` as an environment variable for the server. If you have an unsupported AMD GPU you can experiment using the list of supported types below. -At this time, the known supported GPU types are the following LLVM Targets. +At this time, the known supported GPU types on linux are the following LLVM Targets. This table shows some example GPUs that map to these LLVM targets: | **LLVM Target** | **An Example GPU** | |-----------------|---------------------| diff --git a/docs/images/ollama-keys.png b/docs/images/ollama-keys.png new file mode 100644 index 000000000..942079a86 Binary files /dev/null and b/docs/images/ollama-keys.png differ diff --git a/docs/images/signup.png b/docs/images/signup.png new file mode 100644 index 000000000..e80bb4e7e Binary files /dev/null and b/docs/images/signup.png differ diff --git a/docs/import.md b/docs/import.md index f34f09ace..2346886f1 100644 --- a/docs/import.md +++ b/docs/import.md @@ -1,42 +1,129 @@ -# Import +# Importing a model -GGUF models and select Safetensors models can be imported directly into Ollama. +## Table of Contents -## Import GGUF + * [Importing a Safetensors adapter](#Importing-a-fine-tuned-adapter-from-Safetensors-weights) + * [Importing a Safetensors model](#Importing-a-model-from-Safetensors-weights) + * [Importing a GGUF file](#Importing-a-GGUF-based-model-or-adapter) + * [Sharing models on ollama.com](#Sharing-your-model-on-ollamacom) -A binary GGUF file can be imported directly into Ollama through a Modelfile. +## Importing a fine tuned adapter from Safetensors weights + +First, create a `Modelfile` with a `FROM` command pointing at the base model you used for fine tuning, and an `ADAPTER` command which points to the directory with your Safetensors adapter: ```dockerfile -FROM /path/to/file.gguf +FROM +ADAPTER /path/to/safetensors/adapter/directory ``` -## Import Safetensors +Make sure that you use the same base model in the `FROM` command as you used to create the adapter otherwise you will get erratic results. Most frameworks use different quantization methods, so it's best to use non-quantized (i.e. non-QLoRA) adapters. If your adapter is in the same directory as your `Modelfile`, use `ADAPTER .` to specify the adapter path. -If the model being imported is one of these architectures, it can be imported directly into Ollama through a Modelfile: +Now run `ollama create` from the directory where the `Modelfile` was created: - - LlamaForCausalLM - - MistralForCausalLM - - GemmaForCausalLM +```bash +ollama create my-model +``` + +Lastly, test the model: + +```bash +ollama run my-model +``` + +Ollama supports importing adapters based on several different model architectures including: + + * Llama (including Llama 2, Llama 3, and Llama 3.1); + * Mistral (including Mistral 1, Mistral 2, and Mixtral); and + * Gemma (including Gemma 1 and Gemma 2) + +You can create the adapter using a fine tuning framework or tool which can output adapters in the Safetensors format, such as: + + * Hugging Face [fine tuning framework](https://huggingface.co/docs/transformers/en/training) + * [Unsloth](https://github.com/unslothai/unsloth) + * [MLX](https://github.com/ml-explore/mlx) + + +## Importing a model from Safetensors weights + +First, create a `Modelfile` with a `FROM` command which points to the directory containing your Safetensors weights: ```dockerfile FROM /path/to/safetensors/directory ``` -For architectures not directly convertable by Ollama, see llama.cpp's [guide](https://github.com/ggerganov/llama.cpp/blob/master/README.md#prepare-and-quantize) on conversion. After conversion, see [Import GGUF](#import-gguf). +If you create the Modelfile in the same directory as the weights, you can use the command `FROM .`. -## Automatic Quantization +Now run the `ollama create` command from the directory where you created the `Modelfile`: -> [!NOTE] -> Automatic quantization requires v0.1.35 or higher. +```shell +ollama create my-model +``` -Ollama is capable of quantizing FP16 or FP32 models to any of the supported quantizations with the `-q/--quantize` flag in `ollama create`. +Lastly, test the model: + +```shell +ollama run my-model +``` + +Ollama supports importing models for several different architectures including: + + * Llama (including Llama 2, Llama 3, and Llama 3.1); + * Mistral (including Mistral 1, Mistral 2, and Mixtral); + * Gemma (including Gemma 1 and Gemma 2); and + * Phi3 + +This includes importing foundation models as well as any fine tuned models which which have been _fused_ with a foundation model. + + +## Importing a GGUF based model or adapter + +If you have a GGUF based model or adapter it is possible to import it into Ollama. You can obtain a GGUF model or adapter by: + + * converting a Safetensors model with the `convert_hf_to_gguf.py` from Llama.cpp; + * converting a Safetensors adapter with the `convert_lora_to_gguf.py` from Llama.cpp; or + * downloading a model or adapter from a place such as HuggingFace + +To import a GGUF model, create a `Modelfile` containg: + +```dockerfile +FROM /path/to/file.gguf +``` + +For a GGUF adapter, create the `Modelfile` with: + +```dockerfile +FROM +ADAPTER /path/to/file.gguf +``` + +When importing a GGUF adapter, it's important to use the same base model as the base model that the adapter was created with. You can use: + + * a model from Ollama + * a GGUF file + * a Safetensors based model + +Once you have created your `Modelfile`, use the `ollama create` command to build the model. + +```shell +ollama create my-model +``` + +## Quantizing a Model + +Quantizing a model allows you to run models faster and with less memory consumption but at reduced accuracy. This allows you to run a model on more modest hardware. + +Ollama can quantize FP16 and FP32 based models into different quantization levels using the `-q/--quantize` flag with the `ollama create` command. + +First, create a Modelfile with the FP16 or FP32 based model you wish to quantize. ```dockerfile FROM /path/to/my/gemma/f16/model ``` +Use `ollama create` to then create the quantized model. + ```shell -$ ollama create -q Q4_K_M mymodel +$ ollama create --quantize q4_K_M mymodel transferring model data quantizing F16 model to Q4_K_M creating new layer sha256:735e246cc1abfd06e9cdcf95504d6789a6cd1ad7577108a70d9902fef503c1bd @@ -47,42 +134,53 @@ success ### Supported Quantizations -- `Q4_0` -- `Q4_1` -- `Q5_0` -- `Q5_1` -- `Q8_0` +- `q4_0` +- `q4_1` +- `q5_0` +- `q5_1` +- `q8_0` #### K-means Quantizations -- `Q3_K_S` -- `Q3_K_M` -- `Q3_K_L` -- `Q4_K_S` -- `Q4_K_M` -- `Q5_K_S` -- `Q5_K_M` -- `Q6_K` +- `q3_K_S` +- `q3_K_M` +- `q3_K_L` +- `q4_K_S` +- `q4_K_M` +- `q5_K_S` +- `q5_K_M` +- `q6_K` -## Template Detection -> [!NOTE] -> Template detection requires v0.1.42 or higher. +## Sharing your model on ollama.com -Ollama uses model metadata, specifically `tokenizer.chat_template`, to automatically create a template appropriate for the model you're importing. +You can share any model you have created by pushing it to [ollama.com](https://ollama.com) so that other users can try it out. -```dockerfile -FROM /path/to/my/gemma/model -``` +First, use your browser to go to the [Ollama Sign-Up](https://ollama.com/signup) page. If you already have an account, you can skip this step. + +Sign-Up + +The `Username` field will be used as part of your model's name (e.g. `jmorganca/mymodel`), so make sure you are comfortable with the username that you have selected. + +Now that you have created an account and are signed-in, go to the [Ollama Keys Settings](https://ollama.com/settings/keys) page. + +Follow the directions on the page to determine where your Ollama Public Key is located. + +Ollama Keys + +Click on the `Add Ollama Public Key` button, and copy and paste the contents of your Ollama Public Key into the text field. + +To push a model to [ollama.com](https://ollama.com), first make sure that it is named correctly with your username. You may have to use the `ollama cp` command to copy +your model to give it the correct name. Once you're happy with your model's name, use the `ollama push` command to push it to [ollama.com](https://ollama.com). ```shell -$ ollama create mymodel -transferring model data -using autodetected template gemma-instruct -creating new layer sha256:baa2a0edc27d19cc6b7537578a9a7ba1a4e3214dc185ed5ae43692b319af7b84 -creating new layer sha256:ba66c3309914dbef07e5149a648fd1877f030d337a4f240d444ea335008943cb -writing manifest -success +ollama cp mymodel myuser/mymodel +ollama push myuser/mymodel +``` + +Once your model has been pushed, other users can pull and run it by using the command: + +```shell +ollama run myuser/mymodel ``` -Defining a template in the Modelfile will disable this feature which may be useful if you want to use a different template than the autodetected one. diff --git a/docs/linux.md b/docs/linux.md index ec7306560..0eec014f4 100644 --- a/docs/linux.md +++ b/docs/linux.md @@ -1,40 +1,59 @@ -# Ollama on Linux +# Linux ## Install -Install Ollama running this one-liner: +To install Ollama, run the following command: -> - -```bash +```shell curl -fsSL https://ollama.com/install.sh | sh ``` -## AMD Radeon GPU support - -While AMD has contributed the `amdgpu` driver upstream to the official linux -kernel source, the version is older and may not support all ROCm features. We -recommend you install the latest driver from -https://www.amd.com/en/support/linux-drivers for best support of your Radeon -GPU. - ## Manual install -### Download the `ollama` binary +Download and extract the package: -Ollama is distributed as a self-contained binary. Download it to a directory in your PATH: +```shell +curl -L https://ollama.com/download/ollama-linux-amd64.tgz -o ollama-linux-amd64.tgz +sudo tar -C /usr -xzf ollama-linux-amd64.tgz +``` -```bash -sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama -sudo chmod +x /usr/bin/ollama +Start Ollama: + +```shell +ollama serve +``` + +In another terminal, verify that Ollama is running: + +```shell +ollama -v +``` + +### AMD GPU install + +If you have an AMD GPU, also download and extract the additional ROCm package: + +```shell +curl -L https://ollama.com/download/ollama-linux-amd64-rocm.tgz -o ollama-linux-amd64-rocm.tgz +sudo tar -C /usr -xzf ollama-linux-amd64-rocm.tgz +``` + +### ARM64 install + +Download and extract the ARM64-specific package: + +```shell +curl -L https://ollama.com/download/ollama-linux-arm64.tgz -o ollama-linux-arm64.tgz +sudo tar -C /usr -xzf ollama-linux-arm64.tgz ``` ### Adding Ollama as a startup service (recommended) -Create a user for Ollama: +Create a user and group for Ollama: -```bash -sudo useradd -r -s /bin/false -m -d /usr/share/ollama ollama +```shell +sudo useradd -r -s /bin/false -U -m -d /usr/share/ollama ollama +sudo usermod -a -G ollama $(whoami) ``` Create a service file in `/etc/systemd/system/ollama.service`: @@ -50,6 +69,7 @@ User=ollama Group=ollama Restart=always RestartSec=3 +Environment="PATH=$PATH" [Install] WantedBy=default.target @@ -57,47 +77,54 @@ WantedBy=default.target Then start the service: -```bash +```shell sudo systemctl daemon-reload sudo systemctl enable ollama ``` -### Install CUDA drivers (optional – for Nvidia GPUs) +### Install CUDA drivers (optional) [Download and install](https://developer.nvidia.com/cuda-downloads) CUDA. Verify that the drivers are installed by running the following command, which should print details about your GPU: -```bash +```shell nvidia-smi ``` -### Install ROCm (optional - for Radeon GPUs) -[Download and Install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html) +### Install AMD ROCm drivers (optional) -Make sure to install ROCm v6 +[Download and Install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html) ROCm v6. ### Start Ollama -Start Ollama using `systemd`: +Start Ollama and verify it is running: -```bash +```shell sudo systemctl start ollama +sudo systemctl status ollama ``` -## Update +> [!NOTE] +> While AMD has contributed the `amdgpu` driver upstream to the official linux +> kernel source, the version is older and may not support all ROCm features. We +> recommend you install the latest driver from +> https://www.amd.com/en/support/linux-drivers for best support of your Radeon +> GPU. -Update ollama by running the install script again: +## Updating -```bash +Update Ollama by running the install script again: + +```shell curl -fsSL https://ollama.com/install.sh | sh ``` -Or by downloading the ollama binary: +Or by re-downloading Ollama: -```bash -sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama -sudo chmod +x /usr/bin/ollama +```shell +curl -L https://ollama.com/download/ollama-linux-amd64.tgz -o ollama-linux-amd64.tgz +sudo tar -C /usr -xzf ollama-linux-amd64.tgz ``` ## Installing specific versions @@ -106,15 +133,15 @@ Use `OLLAMA_VERSION` environment variable with the install script to install a s For example: -``` -curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.1.32 sh +```shell +curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.3.9 sh ``` ## Viewing logs To view logs of Ollama running as a startup service, run: -```bash +```shell journalctl -e -u ollama ``` @@ -122,7 +149,7 @@ journalctl -e -u ollama Remove the ollama service: -```bash +```shell sudo systemctl stop ollama sudo systemctl disable ollama sudo rm /etc/systemd/system/ollama.service @@ -130,13 +157,13 @@ sudo rm /etc/systemd/system/ollama.service Remove the ollama binary from your bin directory (either `/usr/local/bin`, `/usr/bin`, or `/bin`): -```bash +```shell sudo rm $(which ollama) ``` Remove the downloaded models and Ollama service user and group: -```bash +```shell sudo rm -r /usr/share/ollama sudo userdel ollama sudo groupdel ollama diff --git a/docs/modelfile.md b/docs/modelfile.md index 21ee1826e..a33f180b7 100644 --- a/docs/modelfile.md +++ b/docs/modelfile.md @@ -1,6 +1,7 @@ # Ollama Model File -> Note: `Modelfile` syntax is in development +> [!NOTE] +> `Modelfile` syntax is in development A model file is the blueprint to create and share models with Ollama. @@ -10,8 +11,9 @@ A model file is the blueprint to create and share models with Ollama. - [Examples](#examples) - [Instructions](#instructions) - [FROM (Required)](#from-required) - - [Build from llama3](#build-from-llama3) - - [Build from a bin file](#build-from-a-bin-file) + - [Build from existing model](#build-from-existing-model) + - [Build from a Safetensors model](#build-from-a-safetensors-model) + - [Build from a GGUF file](#build-from-a-gguf-file) - [PARAMETER](#parameter) - [Valid Parameters and Values](#valid-parameters-and-values) - [TEMPLATE](#template) @@ -48,7 +50,7 @@ INSTRUCTION arguments An example of a `Modelfile` creating a mario blueprint: ```modelfile -FROM llama3 +FROM llama3.1 # sets the temperature to 1 [higher is more creative, lower is more coherent] PARAMETER temperature 1 # sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token @@ -70,10 +72,10 @@ More examples are available in the [examples directory](../examples). To view the Modelfile of a given model, use the `ollama show --modelfile` command. ```bash - > ollama show --modelfile llama3 + > ollama show --modelfile llama3.1 # Modelfile generated by "ollama show" # To build a new Modelfile based on this one, replace the FROM line with: - # FROM llama3:latest + # FROM llama3.1:latest FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29 TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|> @@ -98,22 +100,39 @@ The `FROM` instruction defines the base model to use when creating a model. FROM : ``` -#### Build from llama3 +#### Build from existing model ```modelfile -FROM llama3 +FROM llama3.1 ``` A list of available base models: +Additional models can be found at: + -#### Build from a `bin` file +#### Build from a Safetensors model ```modelfile -FROM ./ollama-model.bin +FROM ``` -This bin file location should be specified as an absolute path or relative to the `Modelfile` location. +The model directory should contain the Safetensors weights for a supported architecture. + +Currently supported model architectures: + * Llama (including Llama 2, Llama 3, and Llama 3.1) + * Mistral (including Mistral 1, Mistral 2, and Mixtral) + * Gemma (including Gemma 1 and Gemma 2) + * Phi3 + +#### Build from a GGUF file + +```modelfile +FROM ./ollama-model.gguf +``` + +The GGUF file location should be specified as an absolute path or relative to the `Modelfile` location. + ### PARAMETER @@ -140,6 +159,7 @@ PARAMETER | num_predict | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context) | int | num_predict 42 | | top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 | | top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 | +| min_p | Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0) | float | min_p 0.05 | ### TEMPLATE @@ -172,10 +192,23 @@ SYSTEM """""" ### ADAPTER -The `ADAPTER` instruction is an optional instruction that specifies any LoRA adapter that should apply to the base model. The value of this instruction should be an absolute path or a path relative to the Modelfile and the file must be in a GGML file format. The adapter should be tuned from the base model otherwise the behaviour is undefined. +The `ADAPTER` instruction specifies a fine tuned LoRA adapter that should apply to the base model. The value of the adapter should be an absolute path or a path relative to the Modelfile. The base model should be specified with a `FROM` instruction. If the base model is not the same as the base model that the adapter was tuned from the behaviour will be erratic. + +#### Safetensor adapter ```modelfile -ADAPTER ./ollama-lora.bin +ADAPTER +``` + +Currently supported Safetensor adapters: + * Llama (including Llama 2, Llama 3, and Llama 3.1) + * Mistral (including Mistral 1, Mistral 2, and Mixtral) + * Gemma (including Gemma 1 and Gemma 2) + +#### GGUF adapter + +```modelfile +ADAPTER ./ollama-lora.gguf ``` ### LICENSE diff --git a/docs/openai.md b/docs/openai.md index 81b967eb7..c6df0fecb 100644 --- a/docs/openai.md +++ b/docs/openai.md @@ -25,7 +25,38 @@ chat_completion = client.chat.completions.create( 'content': 'Say this is a test', } ], - model='llama3', + model='llama3.1', +) + +response = client.chat.completions.create( + model="llava", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image_url", + "image_url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC", + }, + ], + } + ], + max_tokens=300, +) + +completion = client.completions.create( + model="llama3.1", + prompt="Say this is a test", +) + +list_completion = client.models.list() + +model = client.models.retrieve("llama3.1") + +embeddings = client.embeddings.create( + model="all-minilm", + input=["why is the sky blue?", "why is the grass green?"], ) ``` @@ -42,18 +73,48 @@ const openai = new OpenAI({ }) const chatCompletion = await openai.chat.completions.create({ - messages: [{ role: 'user', content: 'Say this is a test' }], - model: 'llama3', + messages: [{ role: 'user', content: 'Say this is a test' }], + model: 'llama3.1', +}) + +const response = await openai.chat.completions.create({ + model: "llava", + messages: [ + { + role: "user", + content: [ + { type: "text", text: "What's in this image?" }, + { + type: "image_url", + image_url: "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC", + }, + ], + }, + ], +}) + +const completion = await openai.completions.create({ + model: "llama3.1", + prompt: "Say this is a test.", +}) + +const listCompletion = await openai.models.list() + +const model = await openai.models.retrieve("llama3.1") + +const embedding = await openai.embeddings.create({ + model: "all-minilm", + input: ["why is the sky blue?", "why is the grass green?"], }) ``` ### `curl` -``` +``` shell curl http://localhost:11434/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "llama3", + "model": "llama3.1", "messages": [ { "role": "system", @@ -65,6 +126,48 @@ curl http://localhost:11434/v1/chat/completions \ } ] }' + +curl http://localhost:11434/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "llava", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What'\''s in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC" + } + } + ] + } + ], + "max_tokens": 300 + }' + +curl http://localhost:11434/v1/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "llama3.1", + "prompt": "Say this is a test" + }' + +curl http://localhost:11434/v1/models + +curl http://localhost:11434/v1/models/llama3.1 + +curl http://localhost:11434/v1/embeddings \ + -H "Content-Type: application/json" \ + -d '{ + "model": "all-minilm", + "input": ["why is the sky blue?", "why is the grass green?"] + }' ``` ## Endpoints @@ -77,8 +180,8 @@ curl http://localhost:11434/v1/chat/completions \ - [x] Streaming - [x] JSON mode - [x] Reproducible outputs -- [ ] Vision -- [ ] Function calling +- [x] Vision +- [x] Tools (streaming support coming soon) - [ ] Logprobs #### Supported request fields @@ -86,7 +189,10 @@ curl http://localhost:11434/v1/chat/completions \ - [x] `model` - [x] `messages` - [x] Text `content` - - [ ] Array of `content` parts + - [x] Image `content` + - [x] Base64 encoded image + - [ ] Image URL + - [x] Array of `content` parts - [x] `frequency_penalty` - [x] `presence_penalty` - [x] `response_format` @@ -96,22 +202,79 @@ curl http://localhost:11434/v1/chat/completions \ - [x] `temperature` - [x] `top_p` - [x] `max_tokens` -- [ ] `logit_bias` -- [ ] `tools` +- [x] `tools` - [ ] `tool_choice` +- [ ] `logit_bias` +- [ ] `user` +- [ ] `n` + +### `/v1/completions` + +#### Supported features + +- [x] Completions +- [x] Streaming +- [x] JSON mode +- [x] Reproducible outputs +- [ ] Logprobs + +#### Supported request fields + +- [x] `model` +- [x] `prompt` +- [x] `frequency_penalty` +- [x] `presence_penalty` +- [x] `seed` +- [x] `stop` +- [x] `stream` +- [x] `temperature` +- [x] `top_p` +- [x] `max_tokens` +- [x] `suffix` +- [ ] `best_of` +- [ ] `echo` +- [ ] `logit_bias` - [ ] `user` - [ ] `n` #### Notes -- `usage.prompt_tokens` will be 0 for completions where prompt evaluation is cached +- `prompt` currently only accepts a string + +### `/v1/models` + +#### Notes + +- `created` corresponds to when the model was last modified +- `owned_by` corresponds to the ollama username, defaulting to `"library"` + +### `/v1/models/{model}` + +#### Notes + +- `created` corresponds to when the model was last modified +- `owned_by` corresponds to the ollama username, defaulting to `"library"` + +### `/v1/embeddings` + +#### Supported request fields + +- [x] `model` +- [x] `input` + - [x] string + - [x] array of strings + - [ ] array of tokens + - [ ] array of token arrays +- [ ] `encoding format` +- [ ] `dimensions` +- [ ] `user` ## Models Before using a model, pull it locally `ollama pull`: ```shell -ollama pull llama3 +ollama pull llama3.1 ``` ### Default model names @@ -119,7 +282,7 @@ ollama pull llama3 For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name: ``` -ollama cp llama3 gpt-3.5-turbo +ollama cp llama3.1 gpt-3.5-turbo ``` Afterwards, this new model name can be specified the `model` field: @@ -137,3 +300,28 @@ curl http://localhost:11434/v1/chat/completions \ ] }' ``` + +### Setting the context size + +The OpenAI API does not have a way of setting the context size for a model. If you need to change the context size, create a `Modelfile` which looks like: + +```modelfile +FROM +PARAMETER num_ctx +``` + +Use the `ollama create mymodel` command to create a new model with the updated context size. Call the API with the updated model name: + +```shell +curl http://localhost:11434/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "mymodel", + "messages": [ + { + "role": "user", + "content": "Hello!" + } + ] + }' +``` diff --git a/docs/template.md b/docs/template.md new file mode 100644 index 000000000..192d878d4 --- /dev/null +++ b/docs/template.md @@ -0,0 +1,167 @@ +# Template + +Ollama provides a powerful templating engine backed by Go's built-in templating engine to construct prompts for your large language model. This feature is a valuable tool to get the most out of your models. + +## Basic Template Structure + +A basic Go template consists of three main parts: + +* **Layout**: The overall structure of the template. +* **Variables**: Placeholders for dynamic data that will be replaced with actual values when the template is rendered. +* **Functions**: Custom functions or logic that can be used to manipulate the template's content. + +Here's an example of a simple chat template: + +```gotmpl +{{- range .Messages }} +{{ .Role }}: {{ .Content }} +{{- end }} +``` + +In this example, we have: + +* A basic messages structure (layout) +* Three variables: `Messages`, `Role`, and `Content` (variables) +* A custom function (action) that iterates over an array of items (`range .Messages`) and displays each item + +## Adding templates to your model + +By default, models imported into Ollama have a default template of `{{ .Prompt }}`, i.e. user inputs are sent verbatim to the LLM. This is appropriate for text or code completion models but lacks essential markers for chat or instruction models. + +Omitting a template in these models puts the responsibility of correctly templating input onto the user. Adding a template allows users to easily get the best results from the model. + +To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3. + +```dockerfile +FROM llama3.1 + +TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|> + +{{ .System }}<|eot_id|> +{{- end }} +{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|> + +{{ .Content }}<|eot_id|> +{{- end }}<|start_header_id|>assistant<|end_header_id|> + +""" +``` + +## Variables + +`System` (string): system prompt + +`Prompt` (string): user prompt + +`Response` (string): assistant response + +`Suffix` (string): text inserted after the assistant's response + +`Messages` (list): list of messages + +`Messages[].Role` (string): role which can be one of `system`, `user`, `assistant`, or `tool` + +`Messages[].Content` (string): message content + +`Messages[].ToolCalls` (list): list of tools the model wants to call + +`Messages[].ToolCalls[].Function` (object): function to call + +`Messages[].ToolCalls[].Function.Name` (string): function name + +`Messages[].ToolCalls[].Function.Arguments` (map): mapping of argument name to argument value + +`Tools` (list): list of tools the model can access + +`Tools[].Type` (string): schema type. `type` is always `function` + +`Tools[].Function` (object): function definition + +`Tools[].Function.Name` (string): function name + +`Tools[].Function.Description` (string): function description + +`Tools[].Function.Parameters` (object): function parameters + +`Tools[].Function.Parameters.Type` (string): schema type. `type` is always `object` + +`Tools[].Function.Parameters.Required` (list): list of required properties + +`Tools[].Function.Parameters.Properties` (map): mapping of property name to property definition + +`Tools[].Function.Parameters.Properties[].Type` (string): property type + +`Tools[].Function.Parameters.Properties[].Description` (string): property description + +`Tools[].Function.Parameters.Properties[].Enum` (list): list of valid values + +## Tips and Best Practices + +Keep the following tips and best practices in mind when working with Go templates: + +* **Be mindful of dot**: Control flow structures like `range` and `with` changes the value `.` +* **Out-of-scope variables**: Use `$.` to reference variables not currently in scope, starting from the root +* **Whitespace control**: Use `-` to trim leading (`{{-`) and trailing (`-}}`) whitespace + +## Examples + +### Example Messages + +#### ChatML + +ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2. + +```gotmpl +{{- range .Messages }}<|im_start|>{{ .Role }} +{{ .Content }}<|im_end|> +{{ end }}<|im_start|>assistant +``` + +### Example Tools + +Tools support can be added to a model by adding a `{{ .Tools }}` node to the template. This feature is useful for models trained to call external tools and can a powerful tool for retrieving real-time data or performing complex tasks. + +#### Mistral + +Mistral v0.3 and Mixtral 8x22B supports tool calling. + +```gotmpl +{{- range $index, $_ := .Messages }} +{{- if eq .Role "user" }} +{{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS] {{ json $.Tools }}[/AVAILABLE_TOOLS] +{{- end }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }} + +{{ end }}{{ .Content }}[/INST] +{{- else if eq .Role "assistant" }} +{{- if .Content }} {{ .Content }} +{{- else if .ToolCalls }}[TOOL_CALLS] [ +{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}} +{{- end }}] +{{- end }} +{{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}}[/TOOL_RESULTS] +{{- end }} +{{- end }} +``` + +### Example Fill-in-Middle + +Fill-in-middle support can be added to a model by adding a `{{ .Suffix }}` node to the template. This feature is useful for models that are trained to generate text in the middle of user input, such as code completion models. + +#### CodeLlama + +CodeLlama [7B](https://ollama.com/library/codellama:7b-code) and [13B](https://ollama.com/library/codellama:13b-code) code completion models support fill-in-middle. + +```gotmpl +
 {{ .Prompt }} {{ .Suffix }} 
+```
+
+> [!NOTE]
+> CodeLlama 34B and 70B code completion and all instruct and Python fine-tuned models do not support fill-in-middle.
+
+#### Codestral
+
+Codestral [22B](https://ollama.com/library/codestral:22b) supports fill-in-middle.
+
+```gotmpl
+[SUFFIX]{{ .Suffix }}[PREFIX] {{ .Prompt }}
+```
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
index de29b344c..0a89b87f9 100644
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -9,7 +9,7 @@ cat ~/.ollama/logs/server.log
 On **Linux** systems with systemd, the logs can be found with this command:
 
 ```shell
-journalctl -u ollama
+journalctl -u ollama --no-pager
 ```
 
 When you run Ollama in a **container**, the logs go to stdout/stderr in the container:
@@ -70,14 +70,18 @@ curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION="0.1.29" sh
 
 If your system is configured with the "noexec" flag where Ollama stores its temporary executable files, you can specify an alternate location by setting OLLAMA_TMPDIR to a location writable by the user ollama runs as. For example OLLAMA_TMPDIR=/usr/share/ollama/
 
-## Container fails to run on NVIDIA GPU
+## NVIDIA GPU Discovery
 
-Make sure you've set up the container runtime first as described in [docker.md](./docker.md)
+When Ollama starts up, it takes inventory of the GPUs present in the system to determine compatibility and how much VRAM is available.  Sometimes this discovery can fail to find your GPUs.  In general, running the latest driver will yield the best results.
 
-Sometimes the container runtime can have difficulties initializing the GPU. When you check the server logs, this can show up as various error codes, such as "3" (not initialized), "46" (device unavailable), "100" (no device), "999" (unknown), or others. The following troubleshooting techniques may help resolve the problem
+### Linux NVIDIA Troubleshooting
 
-- Is the container runtime working?  Try `docker run --gpus all ubuntu nvidia-smi` - if this doesn't work, Ollama wont be able to see your NVIDIA GPU.
-- Is the uvm driver not loaded? `sudo nvidia-modprobe -u`
+If you are using a container to run Ollama, make sure you've set up the container runtime first as described in [docker.md](./docker.md)
+
+Sometimes the Ollama can have difficulties initializing the GPU. When you check the server logs, this can show up as various error codes, such as "3" (not initialized), "46" (device unavailable), "100" (no device), "999" (unknown), or others. The following troubleshooting techniques may help resolve the problem
+
+- If you are using a container, is the container runtime working?  Try `docker run --gpus all ubuntu nvidia-smi` - if this doesn't work, Ollama wont be able to see your NVIDIA GPU.
+- Is the uvm driver loaded? `sudo nvidia-modprobe -u`
 - Try reloading the nvidia_uvm driver - `sudo rmmod nvidia_uvm` then `sudo modprobe nvidia_uvm`
 - Try rebooting
 - Make sure you're running the latest nvidia drivers
@@ -85,3 +89,19 @@ Sometimes the container runtime can have difficulties initializing the GPU. When
 If none of those resolve the problem, gather additional information and file an issue:
 - Set `CUDA_ERROR_LEVEL=50` and try again to get more diagnostic logs
 - Check dmesg for any errors `sudo dmesg | grep -i nvrm` and `sudo dmesg | grep -i nvidia`
+
+
+## AMD GPU Discovery
+
+On linux, AMD GPU access typically requires `video` and/or `render` group membership to access the `/dev/kfd` device.  If permissions are not set up correctly, Ollama will detect this and report an error in the server log.
+
+When running in a container, in some Linux distributions and container runtimes, the ollama process may be unable to access the GPU.  Use `ls -ld /dev/kfd /dev/dri /dev/dri/*` on the host system to determine the group assignments on your system, and pass additional `--group-add ...` arguments to the container so it can access the required devices.
+
+If you are experiencing problems getting Ollama to correctly discover or use your GPU for inference, the following may help isolate the failure.
+- `AMD_LOG_LEVEL=3` Enable info log levels in the AMD HIP/ROCm libraries.  This can help show more detailed error codes that can help troubleshoot problems
+- `OLLAMA_DEBUG=1` During GPU discovery additional information will be reported
+- Check dmesg for any errors from amdgpu or kfd drivers `sudo dmesg | grep -i amdgpu` and `sudo dmesg | grep -i kfd`
+
+## Windows Terminal Errors
+
+Older versions of Windows 10 (e.g., 21H1) are known to have a bug where the standard terminal program does not display control characters correctly.  This can result in a long string of strings like `←[?25h←[?25l` being displayed, sometimes erroring with `The parameter is incorrect`  To resolve this problem, please update to Win 10 22H1 or newer.
diff --git a/docs/tutorials/langchainjs.md b/docs/tutorials/langchainjs.md
index 4d60afb64..f925869b5 100644
--- a/docs/tutorials/langchainjs.md
+++ b/docs/tutorials/langchainjs.md
@@ -15,7 +15,7 @@ import { Ollama } from "@langchain/community/llms/ollama";
 
 const ollama = new Ollama({
   baseUrl: "http://localhost:11434",
-  model: "llama3",
+  model: "llama3.1",
 });
 
 const answer = await ollama.invoke(`why is the sky blue?`);
@@ -23,7 +23,7 @@ const answer = await ollama.invoke(`why is the sky blue?`);
 console.log(answer);
 ```
 
-That will get us the same thing as if we ran `ollama run llama3 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
+That will get us the same thing as if we ran `ollama run llama3.1 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
 
 ```bash
 npm install cheerio
diff --git a/docs/windows.md b/docs/windows.md
index abc0eb300..372a35aa8 100644
--- a/docs/windows.md
+++ b/docs/windows.md
@@ -19,15 +19,17 @@ Logs will often be helpful in diagnosing the problem (see
 
 ## System Requirements
 
-* Windows 10 or newer, Home or Pro
+* Windows 10 22H2 or newer, Home or Pro
 * NVIDIA 452.39 or newer Drivers if you have an NVIDIA card
 * AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card
 
+Ollama uses unicode characters for progress indication, which may render as unknown squares in some older terminal fonts in Windows 10. If you see this, try changing your terminal font settings.
+
 ## API Access
 
 Here's a quick example showing API access from `powershell`
 ```powershell
-(Invoke-WebRequest -method POST -Body '{"model":"llama3", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
+(Invoke-WebRequest -method POST -Body '{"model":"llama3.1", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
 ```
 
 ## Troubleshooting
@@ -46,6 +48,9 @@ the explorer window by hitting `+R` and type in:
 - `explorer %HOMEPATH%\.ollama` contains models and configuration
 - `explorer %TEMP%` contains temporary executable files in one or more `ollama*` directories
 
+## Uninstall
+
+The Ollama Windows installer registers an Uninstaller application.  Under `Add or remove programs` in Windows Settings, you can uninstall Ollama.
 
 ## Standalone CLI
 
diff --git a/envconfig/config.go b/envconfig/config.go
index c02c4878e..9c1490a93 100644
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -1,77 +1,226 @@
 package envconfig
 
 import (
-	"errors"
 	"fmt"
 	"log/slog"
+	"math"
 	"net"
+	"net/url"
 	"os"
 	"path/filepath"
 	"runtime"
 	"strconv"
 	"strings"
+	"time"
 )
 
-type OllamaHost struct {
-	Scheme string
-	Host   string
-	Port   string
+// Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
+// Default is scheme "http" and host "127.0.0.1:11434"
+func Host() *url.URL {
+	defaultPort := "11434"
+
+	s := strings.TrimSpace(Var("OLLAMA_HOST"))
+	scheme, hostport, ok := strings.Cut(s, "://")
+	switch {
+	case !ok:
+		scheme, hostport = "http", s
+	case scheme == "http":
+		defaultPort = "80"
+	case scheme == "https":
+		defaultPort = "443"
+	}
+
+	hostport, path, _ := strings.Cut(hostport, "/")
+	host, port, err := net.SplitHostPort(hostport)
+	if err != nil {
+		host, port = "127.0.0.1", defaultPort
+		if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
+			host = ip.String()
+		} else if hostport != "" {
+			host = hostport
+		}
+	}
+
+	if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
+		slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
+		port = defaultPort
+	}
+
+	return &url.URL{
+		Scheme: scheme,
+		Host:   net.JoinHostPort(host, port),
+		Path:   path,
+	}
 }
 
-func (o OllamaHost) String() string {
-	return fmt.Sprintf("%s://%s:%s", o.Scheme, o.Host, o.Port)
+// Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
+func Origins() (origins []string) {
+	if s := Var("OLLAMA_ORIGINS"); s != "" {
+		origins = strings.Split(s, ",")
+	}
+
+	for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
+		origins = append(origins,
+			fmt.Sprintf("http://%s", origin),
+			fmt.Sprintf("https://%s", origin),
+			fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
+			fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
+		)
+	}
+
+	origins = append(origins,
+		"app://*",
+		"file://*",
+		"tauri://*",
+	)
+
+	return origins
 }
 
-var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
+// Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
+// Default is $HOME/.ollama/models
+func Models() string {
+	if s := Var("OLLAMA_MODELS"); s != "" {
+		return s
+	}
+
+	home, err := os.UserHomeDir()
+	if err != nil {
+		panic(err)
+	}
+
+	return filepath.Join(home, ".ollama", "models")
+}
+
+// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
+// Negative values are treated as infinite. Zero is treated as no keep alive.
+// Default is 5 minutes.
+func KeepAlive() (keepAlive time.Duration) {
+	keepAlive = 5 * time.Minute
+	if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
+		if d, err := time.ParseDuration(s); err == nil {
+			keepAlive = d
+		} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
+			keepAlive = time.Duration(n) * time.Second
+		}
+	}
+
+	if keepAlive < 0 {
+		return time.Duration(math.MaxInt64)
+	}
+
+	return keepAlive
+}
+
+// LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable.
+// Zero or Negative values are treated as infinite.
+// Default is 5 minutes.
+func LoadTimeout() (loadTimeout time.Duration) {
+	loadTimeout = 5 * time.Minute
+	if s := Var("OLLAMA_LOAD_TIMEOUT"); s != "" {
+		if d, err := time.ParseDuration(s); err == nil {
+			loadTimeout = d
+		} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
+			loadTimeout = time.Duration(n) * time.Second
+		}
+	}
+
+	if loadTimeout <= 0 {
+		return time.Duration(math.MaxInt64)
+	}
+
+	return loadTimeout
+}
+
+func Bool(k string) func() bool {
+	return func() bool {
+		if s := Var(k); s != "" {
+			b, err := strconv.ParseBool(s)
+			if err != nil {
+				return true
+			}
+
+			return b
+		}
+
+		return false
+	}
+}
 
 var (
-	// Set via OLLAMA_ORIGINS in the environment
-	AllowOrigins []string
-	// Set via OLLAMA_DEBUG in the environment
-	Debug bool
-	// Experimental flash attention
-	FlashAttention bool
-	// Set via OLLAMA_HOST in the environment
-	Host *OllamaHost
-	// Set via OLLAMA_KEEP_ALIVE in the environment
-	KeepAlive string
-	// Set via OLLAMA_LLM_LIBRARY in the environment
-	LLMLibrary string
-	// Set via OLLAMA_MAX_LOADED_MODELS in the environment
-	MaxRunners int
-	// Set via OLLAMA_MAX_QUEUE in the environment
-	MaxQueuedRequests int
-	// Set via OLLAMA_MODELS in the environment
-	ModelsDir string
-	// Set via OLLAMA_MAX_VRAM in the environment
-	MaxVRAM uint64
-	// Set via OLLAMA_NOHISTORY in the environment
-	NoHistory bool
-	// Set via OLLAMA_NOPRUNE in the environment
-	NoPrune bool
-	// Set via OLLAMA_NUM_PARALLEL in the environment
-	NumParallel int
-	// Set via OLLAMA_RUNNERS_DIR in the environment
-	RunnersDir string
-	// Set via OLLAMA_SCHED_SPREAD in the environment
-	SchedSpread bool
-	// Set via OLLAMA_TMPDIR in the environment
-	TmpDir string
-	// Set via OLLAMA_INTEL_GPU in the environment
-	IntelGpu bool
-
-	// Set via CUDA_VISIBLE_DEVICES in the environment
-	CudaVisibleDevices string
-	// Set via HIP_VISIBLE_DEVICES in the environment
-	HipVisibleDevices string
-	// Set via ROCR_VISIBLE_DEVICES in the environment
-	RocrVisibleDevices string
-	// Set via GPU_DEVICE_ORDINAL in the environment
-	GpuDeviceOrdinal string
-	// Set via HSA_OVERRIDE_GFX_VERSION in the environment
-	HsaOverrideGfxVersion string
+	// Debug enabled additional debug information.
+	Debug = Bool("OLLAMA_DEBUG")
+	// FlashAttention enables the experimental flash attention feature.
+	FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
+	// NoHistory disables readline history.
+	NoHistory = Bool("OLLAMA_NOHISTORY")
+	// NoPrune disables pruning of model blobs on startup.
+	NoPrune = Bool("OLLAMA_NOPRUNE")
+	// SchedSpread allows scheduling models across all GPUs.
+	SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
+	// IntelGPU enables experimental Intel GPU detection.
+	IntelGPU = Bool("OLLAMA_INTEL_GPU")
 )
 
+func String(s string) func() string {
+	return func() string {
+		return Var(s)
+	}
+}
+
+var (
+	LLMLibrary = String("OLLAMA_LLM_LIBRARY")
+	TmpDir     = String("OLLAMA_TMPDIR")
+
+	CudaVisibleDevices    = String("CUDA_VISIBLE_DEVICES")
+	HipVisibleDevices     = String("HIP_VISIBLE_DEVICES")
+	RocrVisibleDevices    = String("ROCR_VISIBLE_DEVICES")
+	GpuDeviceOrdinal      = String("GPU_DEVICE_ORDINAL")
+	HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
+)
+
+func Uint(key string, defaultValue uint) func() uint {
+	return func() uint {
+		if s := Var(key); s != "" {
+			if n, err := strconv.ParseUint(s, 10, 64); err != nil {
+				slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
+			} else {
+				return uint(n)
+			}
+		}
+
+		return defaultValue
+	}
+}
+
+var (
+	// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
+	NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
+	// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
+	MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
+	// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
+	MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
+	// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
+	MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
+)
+
+func Uint64(key string, defaultValue uint64) func() uint64 {
+	return func() uint64 {
+		if s := Var(key); s != "" {
+			if n, err := strconv.ParseUint(s, 10, 64); err != nil {
+				slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
+			} else {
+				return n
+			}
+		}
+
+		return defaultValue
+	}
+}
+
+// Set aside VRAM per GPU
+var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
+
 type EnvVar struct {
 	Name        string
 	Value       any
@@ -80,31 +229,45 @@ type EnvVar struct {
 
 func AsMap() map[string]EnvVar {
 	ret := map[string]EnvVar{
-		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
-		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
-		"OLLAMA_HOST":              {"OLLAMA_HOST", Host, "IP Address for the ollama server (default 127.0.0.1:11434)"},
-		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
-		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"},
-		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models per GPU"},
-		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"},
-		"OLLAMA_MAX_VRAM":          {"OLLAMA_MAX_VRAM", MaxVRAM, "Maximum VRAM"},
-		"OLLAMA_MODELS":            {"OLLAMA_MODELS", ModelsDir, "The path to the models directory"},
-		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"},
-		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"},
-		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests"},
-		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"},
-		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"},
-		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread, "Always schedule model across all GPUs"},
-		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"},
+		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
+		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
+		"OLLAMA_GPU_OVERHEAD":      {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
+		"OLLAMA_HOST":              {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
+		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
+		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
+		"OLLAMA_LOAD_TIMEOUT":      {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
+		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
+		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
+		"OLLAMA_MODELS":            {"OLLAMA_MODELS", Models(), "The path to the models directory"},
+		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
+		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
+		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
+		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
+		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
+		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
+
+		// Informational
+		"HTTP_PROXY":  {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},
+		"HTTPS_PROXY": {"HTTPS_PROXY", String("HTTPS_PROXY")(), "HTTPS proxy"},
+		"NO_PROXY":    {"NO_PROXY", String("NO_PROXY")(), "No proxy"},
 	}
+
+	if runtime.GOOS != "windows" {
+		// Windows environment variables are case-insensitive so there's no need to duplicate them
+		ret["http_proxy"] = EnvVar{"http_proxy", String("http_proxy")(), "HTTP proxy"}
+		ret["https_proxy"] = EnvVar{"https_proxy", String("https_proxy")(), "HTTPS proxy"}
+		ret["no_proxy"] = EnvVar{"no_proxy", String("no_proxy")(), "No proxy"}
+	}
+
 	if runtime.GOOS != "darwin" {
-		ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices, "Set which NVIDIA devices are visible"}
-		ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices, "Set which AMD devices are visible"}
-		ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices, "Set which AMD devices are visible"}
-		ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal, "Set which AMD devices are visible"}
-		ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion, "Override the gfx used for all detected AMD GPUs"}
-		ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGpu, "Enable experimental Intel GPU detection"}
+		ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
+		ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
+		ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
+		ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
+		ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
+		ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
 	}
+
 	return ret
 }
 
@@ -116,231 +279,16 @@ func Values() map[string]string {
 	return vals
 }
 
-var defaultAllowOrigins = []string{
-	"localhost",
-	"127.0.0.1",
-	"0.0.0.0",
+// Var returns an environment variable stripped of leading and trailing quotes or spaces
+func Var(key string) string {
+	return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
 }
 
-// Clean quotes and spaces from the value
-func clean(key string) string {
-	return strings.Trim(os.Getenv(key), "\"' ")
-}
-
-func init() {
-	// default values
-	NumParallel = 0 // Autoselect
-	MaxRunners = 0  // Autoselect
-	MaxQueuedRequests = 512
-
-	LoadConfig()
-}
-
-func LoadConfig() {
-	if debug := clean("OLLAMA_DEBUG"); debug != "" {
-		d, err := strconv.ParseBool(debug)
-		if err == nil {
-			Debug = d
-		} else {
-			Debug = true
-		}
-	}
-
-	if fa := clean("OLLAMA_FLASH_ATTENTION"); fa != "" {
-		d, err := strconv.ParseBool(fa)
-		if err == nil {
-			FlashAttention = d
-		}
-	}
-
-	RunnersDir = clean("OLLAMA_RUNNERS_DIR")
-	if runtime.GOOS == "windows" && RunnersDir == "" {
-		// On Windows we do not carry the payloads inside the main executable
-		appExe, err := os.Executable()
-		if err != nil {
-			slog.Error("failed to lookup executable path", "error", err)
-		}
-
-		cwd, err := os.Getwd()
-		if err != nil {
-			slog.Error("failed to lookup working directory", "error", err)
-		}
-
-		var paths []string
-		for _, root := range []string{filepath.Dir(appExe), cwd} {
-			paths = append(paths,
-				root,
-				filepath.Join(root, "windows-"+runtime.GOARCH),
-				filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
-			)
-		}
-
-		// Try a few variations to improve developer experience when building from source in the local tree
-		for _, p := range paths {
-			candidate := filepath.Join(p, "ollama_runners")
-			_, err := os.Stat(candidate)
-			if err == nil {
-				RunnersDir = candidate
-				break
-			}
-		}
-		if RunnersDir == "" {
-			slog.Error("unable to locate llm runner directory.  Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
-		}
-	}
-
-	TmpDir = clean("OLLAMA_TMPDIR")
-
-	userLimit := clean("OLLAMA_MAX_VRAM")
-	if userLimit != "" {
-		avail, err := strconv.ParseUint(userLimit, 10, 64)
-		if err != nil {
-			slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err)
-		} else {
-			MaxVRAM = avail
-		}
-	}
-
-	LLMLibrary = clean("OLLAMA_LLM_LIBRARY")
-
-	if onp := clean("OLLAMA_NUM_PARALLEL"); onp != "" {
-		val, err := strconv.Atoi(onp)
-		if err != nil {
-			slog.Error("invalid setting, ignoring", "OLLAMA_NUM_PARALLEL", onp, "error", err)
-		} else {
-			NumParallel = val
-		}
-	}
-
-	if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" {
-		NoHistory = true
-	}
-
-	if spread := clean("OLLAMA_SCHED_SPREAD"); spread != "" {
-		s, err := strconv.ParseBool(spread)
-		if err == nil {
-			SchedSpread = s
-		} else {
-			SchedSpread = true
-		}
-	}
-
-	if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
-		NoPrune = true
-	}
-
-	if origins := clean("OLLAMA_ORIGINS"); origins != "" {
-		AllowOrigins = strings.Split(origins, ",")
-	}
-	for _, allowOrigin := range defaultAllowOrigins {
-		AllowOrigins = append(AllowOrigins,
-			fmt.Sprintf("http://%s", allowOrigin),
-			fmt.Sprintf("https://%s", allowOrigin),
-			fmt.Sprintf("http://%s", net.JoinHostPort(allowOrigin, "*")),
-			fmt.Sprintf("https://%s", net.JoinHostPort(allowOrigin, "*")),
-		)
-	}
-
-	AllowOrigins = append(AllowOrigins,
-		"app://*",
-		"file://*",
-		"tauri://*",
-	)
-
-	maxRunners := clean("OLLAMA_MAX_LOADED_MODELS")
-	if maxRunners != "" {
-		m, err := strconv.Atoi(maxRunners)
-		if err != nil {
-			slog.Error("invalid setting, ignoring", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err)
-		} else {
-			MaxRunners = m
-		}
-	}
-
-	if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" {
-		p, err := strconv.Atoi(onp)
-		if err != nil || p <= 0 {
-			slog.Error("invalid setting, ignoring", "OLLAMA_MAX_QUEUE", onp, "error", err)
-		} else {
-			MaxQueuedRequests = p
-		}
-	}
-
-	KeepAlive = clean("OLLAMA_KEEP_ALIVE")
-
-	var err error
-	ModelsDir, err = getModelsDir()
-	if err != nil {
-		slog.Error("invalid setting", "OLLAMA_MODELS", ModelsDir, "error", err)
-	}
-
-	Host, err = getOllamaHost()
-	if err != nil {
-		slog.Error("invalid setting", "OLLAMA_HOST", Host, "error", err, "using default port", Host.Port)
-	}
-
-	if set, err := strconv.ParseBool(clean("OLLAMA_INTEL_GPU")); err == nil {
-		IntelGpu = set
-	}
-
-	CudaVisibleDevices = clean("CUDA_VISIBLE_DEVICES")
-	HipVisibleDevices = clean("HIP_VISIBLE_DEVICES")
-	RocrVisibleDevices = clean("ROCR_VISIBLE_DEVICES")
-	GpuDeviceOrdinal = clean("GPU_DEVICE_ORDINAL")
-	HsaOverrideGfxVersion = clean("HSA_OVERRIDE_GFX_VERSION")
-}
-
-func getModelsDir() (string, error) {
-	if models, exists := os.LookupEnv("OLLAMA_MODELS"); exists {
-		return models, nil
-	}
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return "", err
-	}
-	return filepath.Join(home, ".ollama", "models"), nil
-}
-
-func getOllamaHost() (*OllamaHost, error) {
-	defaultPort := "11434"
-
-	hostVar := os.Getenv("OLLAMA_HOST")
-	hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'"))
-
-	scheme, hostport, ok := strings.Cut(hostVar, "://")
-	switch {
-	case !ok:
-		scheme, hostport = "http", hostVar
-	case scheme == "http":
-		defaultPort = "80"
-	case scheme == "https":
-		defaultPort = "443"
-	}
-
-	// trim trailing slashes
-	hostport = strings.TrimRight(hostport, "/")
-
-	host, port, err := net.SplitHostPort(hostport)
-	if err != nil {
-		host, port = "127.0.0.1", defaultPort
-		if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
-			host = ip.String()
-		} else if hostport != "" {
-			host = hostport
-		}
-	}
-
-	if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 {
-		return &OllamaHost{
-			Scheme: scheme,
-			Host:   host,
-			Port:   defaultPort,
-		}, ErrInvalidHostPort
-	}
-
-	return &OllamaHost{
-		Scheme: scheme,
-		Host:   host,
-		Port:   port,
-	}, nil
+// On windows, we keep the binary at the top directory, but
+// other platforms use a "bin" directory, so this returns ".."
+func LibRelativeToExe() string {
+	if runtime.GOOS == "windows" {
+		return "."
+	}
+	return ".."
 }
diff --git a/envconfig/config_test.go b/envconfig/config_test.go
index 7d923d629..7ac7c53e3 100644
--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@@ -1,70 +1,269 @@
 package envconfig
 
 import (
-	"fmt"
-	"net"
+	"math"
 	"testing"
+	"time"
 
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
+	"github.com/google/go-cmp/cmp"
 )
 
-func TestConfig(t *testing.T) {
-	Debug = false // Reset whatever was loaded in init()
-	t.Setenv("OLLAMA_DEBUG", "")
-	LoadConfig()
-	require.False(t, Debug)
-	t.Setenv("OLLAMA_DEBUG", "false")
-	LoadConfig()
-	require.False(t, Debug)
-	t.Setenv("OLLAMA_DEBUG", "1")
-	LoadConfig()
-	require.True(t, Debug)
-	t.Setenv("OLLAMA_FLASH_ATTENTION", "1")
-	LoadConfig()
-	require.True(t, FlashAttention)
-}
-
-func TestClientFromEnvironment(t *testing.T) {
-	type testCase struct {
+func TestHost(t *testing.T) {
+	cases := map[string]struct {
 		value  string
 		expect string
-		err    error
+	}{
+		"empty":               {"", "http://127.0.0.1:11434"},
+		"only address":        {"1.2.3.4", "http://1.2.3.4:11434"},
+		"only port":           {":1234", "http://:1234"},
+		"address and port":    {"1.2.3.4:1234", "http://1.2.3.4:1234"},
+		"hostname":            {"example.com", "http://example.com:11434"},
+		"hostname and port":   {"example.com:1234", "http://example.com:1234"},
+		"zero port":           {":0", "http://:0"},
+		"too large port":      {":66000", "http://:11434"},
+		"too small port":      {":-1", "http://:11434"},
+		"ipv6 localhost":      {"[::1]", "http://[::1]:11434"},
+		"ipv6 world open":     {"[::]", "http://[::]:11434"},
+		"ipv6 no brackets":    {"::1", "http://[::1]:11434"},
+		"ipv6 + port":         {"[::1]:1337", "http://[::1]:1337"},
+		"extra space":         {" 1.2.3.4 ", "http://1.2.3.4:11434"},
+		"extra quotes":        {"\"1.2.3.4\"", "http://1.2.3.4:11434"},
+		"extra space+quotes":  {" \" 1.2.3.4 \" ", "http://1.2.3.4:11434"},
+		"extra single quotes": {"'1.2.3.4'", "http://1.2.3.4:11434"},
+		"http":                {"http://1.2.3.4", "http://1.2.3.4:80"},
+		"http port":           {"http://1.2.3.4:4321", "http://1.2.3.4:4321"},
+		"https":               {"https://1.2.3.4", "https://1.2.3.4:443"},
+		"https port":          {"https://1.2.3.4:4321", "https://1.2.3.4:4321"},
+		"proxy path":          {"https://example.com/ollama", "https://example.com:443/ollama"},
 	}
 
-	hostTestCases := map[string]*testCase{
-		"empty":               {value: "", expect: "127.0.0.1:11434"},
-		"only address":        {value: "1.2.3.4", expect: "1.2.3.4:11434"},
-		"only port":           {value: ":1234", expect: ":1234"},
-		"address and port":    {value: "1.2.3.4:1234", expect: "1.2.3.4:1234"},
-		"hostname":            {value: "example.com", expect: "example.com:11434"},
-		"hostname and port":   {value: "example.com:1234", expect: "example.com:1234"},
-		"zero port":           {value: ":0", expect: ":0"},
-		"too large port":      {value: ":66000", err: ErrInvalidHostPort},
-		"too small port":      {value: ":-1", err: ErrInvalidHostPort},
-		"ipv6 localhost":      {value: "[::1]", expect: "[::1]:11434"},
-		"ipv6 world open":     {value: "[::]", expect: "[::]:11434"},
-		"ipv6 no brackets":    {value: "::1", expect: "[::1]:11434"},
-		"ipv6 + port":         {value: "[::1]:1337", expect: "[::1]:1337"},
-		"extra space":         {value: " 1.2.3.4 ", expect: "1.2.3.4:11434"},
-		"extra quotes":        {value: "\"1.2.3.4\"", expect: "1.2.3.4:11434"},
-		"extra space+quotes":  {value: " \" 1.2.3.4 \" ", expect: "1.2.3.4:11434"},
-		"extra single quotes": {value: "'1.2.3.4'", expect: "1.2.3.4:11434"},
-	}
-
-	for k, v := range hostTestCases {
-		t.Run(k, func(t *testing.T) {
-			t.Setenv("OLLAMA_HOST", v.value)
-			LoadConfig()
-
-			oh, err := getOllamaHost()
-			if err != v.err {
-				t.Fatalf("expected %s, got %s", v.err, err)
-			}
-
-			if err == nil {
-				host := net.JoinHostPort(oh.Host, oh.Port)
-				assert.Equal(t, v.expect, host, fmt.Sprintf("%s: expected %s, got %s", k, v.expect, host))
+	for name, tt := range cases {
+		t.Run(name, func(t *testing.T) {
+			t.Setenv("OLLAMA_HOST", tt.value)
+			if host := Host(); host.String() != tt.expect {
+				t.Errorf("%s: expected %s, got %s", name, tt.expect, host.String())
+			}
+		})
+	}
+}
+
+func TestOrigins(t *testing.T) {
+	cases := []struct {
+		value  string
+		expect []string
+	}{
+		{"", []string{
+			"http://localhost",
+			"https://localhost",
+			"http://localhost:*",
+			"https://localhost:*",
+			"http://127.0.0.1",
+			"https://127.0.0.1",
+			"http://127.0.0.1:*",
+			"https://127.0.0.1:*",
+			"http://0.0.0.0",
+			"https://0.0.0.0",
+			"http://0.0.0.0:*",
+			"https://0.0.0.0:*",
+			"app://*",
+			"file://*",
+			"tauri://*",
+		}},
+		{"http://10.0.0.1", []string{
+			"http://10.0.0.1",
+			"http://localhost",
+			"https://localhost",
+			"http://localhost:*",
+			"https://localhost:*",
+			"http://127.0.0.1",
+			"https://127.0.0.1",
+			"http://127.0.0.1:*",
+			"https://127.0.0.1:*",
+			"http://0.0.0.0",
+			"https://0.0.0.0",
+			"http://0.0.0.0:*",
+			"https://0.0.0.0:*",
+			"app://*",
+			"file://*",
+			"tauri://*",
+		}},
+		{"http://172.16.0.1,https://192.168.0.1", []string{
+			"http://172.16.0.1",
+			"https://192.168.0.1",
+			"http://localhost",
+			"https://localhost",
+			"http://localhost:*",
+			"https://localhost:*",
+			"http://127.0.0.1",
+			"https://127.0.0.1",
+			"http://127.0.0.1:*",
+			"https://127.0.0.1:*",
+			"http://0.0.0.0",
+			"https://0.0.0.0",
+			"http://0.0.0.0:*",
+			"https://0.0.0.0:*",
+			"app://*",
+			"file://*",
+			"tauri://*",
+		}},
+		{"http://totally.safe,http://definitely.legit", []string{
+			"http://totally.safe",
+			"http://definitely.legit",
+			"http://localhost",
+			"https://localhost",
+			"http://localhost:*",
+			"https://localhost:*",
+			"http://127.0.0.1",
+			"https://127.0.0.1",
+			"http://127.0.0.1:*",
+			"https://127.0.0.1:*",
+			"http://0.0.0.0",
+			"https://0.0.0.0",
+			"http://0.0.0.0:*",
+			"https://0.0.0.0:*",
+			"app://*",
+			"file://*",
+			"tauri://*",
+		}},
+	}
+	for _, tt := range cases {
+		t.Run(tt.value, func(t *testing.T) {
+			t.Setenv("OLLAMA_ORIGINS", tt.value)
+
+			if diff := cmp.Diff(Origins(), tt.expect); diff != "" {
+				t.Errorf("%s: mismatch (-want +got):\n%s", tt.value, diff)
+			}
+		})
+	}
+}
+
+func TestBool(t *testing.T) {
+	cases := map[string]bool{
+		"":      false,
+		"true":  true,
+		"false": false,
+		"1":     true,
+		"0":     false,
+		// invalid values
+		"random":    true,
+		"something": true,
+	}
+
+	for k, v := range cases {
+		t.Run(k, func(t *testing.T) {
+			t.Setenv("OLLAMA_BOOL", k)
+			if b := Bool("OLLAMA_BOOL")(); b != v {
+				t.Errorf("%s: expected %t, got %t", k, v, b)
+			}
+		})
+	}
+}
+
+func TestUint(t *testing.T) {
+	cases := map[string]uint{
+		"0":    0,
+		"1":    1,
+		"1337": 1337,
+		// default values
+		"":       11434,
+		"-1":     11434,
+		"0o10":   11434,
+		"0x10":   11434,
+		"string": 11434,
+	}
+
+	for k, v := range cases {
+		t.Run(k, func(t *testing.T) {
+			t.Setenv("OLLAMA_UINT", k)
+			if i := Uint("OLLAMA_UINT", 11434)(); i != v {
+				t.Errorf("%s: expected %d, got %d", k, v, i)
+			}
+		})
+	}
+}
+
+func TestKeepAlive(t *testing.T) {
+	cases := map[string]time.Duration{
+		"":       5 * time.Minute,
+		"1s":     time.Second,
+		"1m":     time.Minute,
+		"1h":     time.Hour,
+		"5m0s":   5 * time.Minute,
+		"1h2m3s": 1*time.Hour + 2*time.Minute + 3*time.Second,
+		"0":      time.Duration(0),
+		"60":     60 * time.Second,
+		"120":    2 * time.Minute,
+		"3600":   time.Hour,
+		"-0":     time.Duration(0),
+		"-1":     time.Duration(math.MaxInt64),
+		"-1m":    time.Duration(math.MaxInt64),
+		// invalid values
+		" ":   5 * time.Minute,
+		"???": 5 * time.Minute,
+		"1d":  5 * time.Minute,
+		"1y":  5 * time.Minute,
+		"1w":  5 * time.Minute,
+	}
+
+	for tt, expect := range cases {
+		t.Run(tt, func(t *testing.T) {
+			t.Setenv("OLLAMA_KEEP_ALIVE", tt)
+			if actual := KeepAlive(); actual != expect {
+				t.Errorf("%s: expected %s, got %s", tt, expect, actual)
+			}
+		})
+	}
+}
+
+func TestLoadTimeout(t *testing.T) {
+	defaultTimeout := 5 * time.Minute
+	cases := map[string]time.Duration{
+		"":       defaultTimeout,
+		"1s":     time.Second,
+		"1m":     time.Minute,
+		"1h":     time.Hour,
+		"5m0s":   defaultTimeout,
+		"1h2m3s": 1*time.Hour + 2*time.Minute + 3*time.Second,
+		"0":      time.Duration(math.MaxInt64),
+		"60":     60 * time.Second,
+		"120":    2 * time.Minute,
+		"3600":   time.Hour,
+		"-0":     time.Duration(math.MaxInt64),
+		"-1":     time.Duration(math.MaxInt64),
+		"-1m":    time.Duration(math.MaxInt64),
+		// invalid values
+		" ":   defaultTimeout,
+		"???": defaultTimeout,
+		"1d":  defaultTimeout,
+		"1y":  defaultTimeout,
+		"1w":  defaultTimeout,
+	}
+
+	for tt, expect := range cases {
+		t.Run(tt, func(t *testing.T) {
+			t.Setenv("OLLAMA_LOAD_TIMEOUT", tt)
+			if actual := LoadTimeout(); actual != expect {
+				t.Errorf("%s: expected %s, got %s", tt, expect, actual)
+			}
+		})
+	}
+}
+
+func TestVar(t *testing.T) {
+	cases := map[string]string{
+		"value":       "value",
+		" value ":     "value",
+		" 'value' ":   "value",
+		` "value" `:   "value",
+		" ' value ' ": " value ",
+		` " value " `: " value ",
+	}
+
+	for k, v := range cases {
+		t.Run(k, func(t *testing.T) {
+			t.Setenv("OLLAMA_VAR", k)
+			if s := Var("OLLAMA_VAR"); s != v {
+				t.Errorf("%s: expected %q, got %q", k, v, s)
 			}
 		})
 	}
diff --git a/examples/go-chat/main.go b/examples/go-chat/main.go
index 5266f03e9..7663fb8f4 100644
--- a/examples/go-chat/main.go
+++ b/examples/go-chat/main.go
@@ -35,7 +35,7 @@ func main() {
 
 	ctx := context.Background()
 	req := &api.ChatRequest{
-		Model:    "llama3",
+		Model:    "llama3.1",
 		Messages: messages,
 	}
 
diff --git a/examples/go-generate-streaming/main.go b/examples/go-generate-streaming/main.go
index 494033511..3acfb22a9 100644
--- a/examples/go-generate-streaming/main.go
+++ b/examples/go-generate-streaming/main.go
@@ -16,7 +16,7 @@ func main() {
 
 	// By default, GenerateRequest is streaming.
 	req := &api.GenerateRequest{
-		Model:  "gemma",
+		Model:  "gemma2",
 		Prompt: "how many planets are there?",
 	}
 
diff --git a/examples/go-generate/main.go b/examples/go-generate/main.go
index 50fbf64b7..2fe28742b 100644
--- a/examples/go-generate/main.go
+++ b/examples/go-generate/main.go
@@ -15,7 +15,7 @@ func main() {
 	}
 
 	req := &api.GenerateRequest{
-		Model:  "gemma",
+		Model:  "gemma2",
 		Prompt: "how many planets are there?",
 
 		// set streaming to false
diff --git a/examples/go-http-generate/README.md b/examples/go-http-generate/README.md
deleted file mode 100644
index e69de29bb..000000000
diff --git a/examples/langchain-python-rag-document/README.md b/examples/langchain-python-rag-document/README.md
index 20a73a883..e2f3bc028 100644
--- a/examples/langchain-python-rag-document/README.md
+++ b/examples/langchain-python-rag-document/README.md
@@ -4,6 +4,14 @@ This example provides an interface for asking questions to a PDF document.
 
 ## Setup
 
+1. Ensure you have the `llama3.1` model installed:
+
+```
+ollama pull llama3.1
+```
+
+2. Install the Python Requirements.
+
 ```
 pip install -r requirements.txt
 ```
diff --git a/examples/langchain-python-rag-document/main.py b/examples/langchain-python-rag-document/main.py
index 3ed9499f2..6f7cec9be 100644
--- a/examples/langchain-python-rag-document/main.py
+++ b/examples/langchain-python-rag-document/main.py
@@ -51,7 +51,7 @@ while True:
         template=template,
     )
 
-    llm = Ollama(model="llama3:8b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
+    llm = Ollama(model="llama3.1", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
     qa_chain = RetrievalQA.from_chain_type(
         llm,
         retriever=vectorstore.as_retriever(),
diff --git a/examples/langchain-python-rag-privategpt/requirements.txt b/examples/langchain-python-rag-privategpt/requirements.txt
index 0aad1fe55..4f2cee250 100644
--- a/examples/langchain-python-rag-privategpt/requirements.txt
+++ b/examples/langchain-python-rag-privategpt/requirements.txt
@@ -1,6 +1,6 @@
 langchain==0.0.274
 gpt4all==1.0.8
-chromadb==0.4.7
+chromadb==0.5.0
 llama-cpp-python==0.1.81
 urllib3==2.0.4
 PyMuPDF==1.23.5
@@ -12,4 +12,4 @@ pandoc==2.3
 pypandoc==1.11
 tqdm==4.66.1
 sentence_transformers==2.2.2
-numpy>=1.22.2 # not directly required, pinned by Snyk to avoid a vulnerability
\ No newline at end of file
+numpy>=1.22.2 # not directly required, pinned by Snyk to avoid a vulnerability
diff --git a/examples/langchain-python-rag-websummary/README.md b/examples/langchain-python-rag-websummary/README.md
index 3f3b98733..29c706a39 100644
--- a/examples/langchain-python-rag-websummary/README.md
+++ b/examples/langchain-python-rag-websummary/README.md
@@ -4,10 +4,10 @@ This example summarizes the website, [https://ollama.com/blog/run-llama2-uncenso
 
 ## Running the Example
 
-1. Ensure you have the `llama2` model installed:
+1. Ensure you have the `llama3.1` model installed:
 
    ```bash
-   ollama pull llama2
+   ollama pull llama3.1
    ```
 
 2. Install the Python Requirements.
diff --git a/examples/langchain-python-rag-websummary/main.py b/examples/langchain-python-rag-websummary/main.py
index d1b05ba8a..77b09fbbc 100644
--- a/examples/langchain-python-rag-websummary/main.py
+++ b/examples/langchain-python-rag-websummary/main.py
@@ -5,8 +5,8 @@ from langchain.chains.summarize import load_summarize_chain
 loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally")
 docs = loader.load()
 
-llm = Ollama(model="llama3")
+llm = Ollama(model="llama3.1")
 chain = load_summarize_chain(llm, chain_type="stuff")
 
-result = chain.invoke(docs) 
+result = chain.invoke(docs)
 print(result)
diff --git a/examples/langchain-python-simple/README.md b/examples/langchain-python-simple/README.md
index d4102dec7..60db2c8c3 100644
--- a/examples/langchain-python-simple/README.md
+++ b/examples/langchain-python-simple/README.md
@@ -4,10 +4,10 @@ This example is a basic "hello world" of using LangChain with Ollama.
 
 ## Running the Example
 
-1. Ensure you have the `llama3` model installed:
+1. Ensure you have the `llama3.1` model installed:
 
    ```bash
-   ollama pull llama3
+   ollama pull llama3.1
    ```
 
 2. Install the Python Requirements.
diff --git a/examples/langchain-python-simple/main.py b/examples/langchain-python-simple/main.py
index 7cb652867..a7ed81d67 100644
--- a/examples/langchain-python-simple/main.py
+++ b/examples/langchain-python-simple/main.py
@@ -1,6 +1,6 @@
 from langchain.llms import Ollama
 
 input = input("What is your question?")
-llm = Ollama(model="llama3")
+llm = Ollama(model="llama3.1")
 res = llm.predict(input)
 print (res)
diff --git a/examples/modelfile-mario/Modelfile b/examples/modelfile-mario/Modelfile
index 33d5952b1..a37470864 100644
--- a/examples/modelfile-mario/Modelfile
+++ b/examples/modelfile-mario/Modelfile
@@ -1,4 +1,4 @@
-FROM llama3
+FROM llama3.1
 PARAMETER temperature 1
 SYSTEM """
 You are Mario from super mario bros, acting as an assistant.
diff --git a/examples/modelfile-mario/readme.md b/examples/modelfile-mario/readme.md
index e4f0d4172..c3f34197a 100644
--- a/examples/modelfile-mario/readme.md
+++ b/examples/modelfile-mario/readme.md
@@ -2,12 +2,12 @@
 
 # Example character: Mario
 
-This example shows how to create a basic character using Llama3 as the base model.
+This example shows how to create a basic character using Llama3.1 as the base model.
 
 To run this example:
 
 1. Download the Modelfile
-2. `ollama pull llama3` to get the base model used in the model file.
+2. `ollama pull llama3.1` to get the base model used in the model file.
 3. `ollama create NAME -f ./Modelfile`
 4. `ollama run NAME`
 
@@ -18,7 +18,7 @@ Ask it some questions like "Who are you?" or "Is Peach in trouble again?"
 What the model file looks like:
 
 ```
-FROM llama3
+FROM llama3.1
 PARAMETER temperature 1
 SYSTEM """
 You are Mario from Super Mario Bros, acting as an assistant.
diff --git a/examples/python-dockerit/dockerit.py b/examples/python-dockerit/dockerit.py
index b013102fa..6a288d906 100644
--- a/examples/python-dockerit/dockerit.py
+++ b/examples/python-dockerit/dockerit.py
@@ -4,7 +4,7 @@ imageName = input("Enter the name of the image: ")
 client = docker.from_env()
 s = requests.Session()
 output=""
-with s.post('http://localhost:11434/api/generate', json={'model': 'dockerit', 'prompt': inputDescription}, stream=True) as r:
+with s.post('http://localhost:11434/api/generate', json={'model': 'mattw/dockerit', 'prompt': inputDescription}, stream=True) as r:
   for line in r.iter_lines():
     if line:
       j = json.loads(line)
diff --git a/examples/python-grounded-factuality-rag-check/README.md b/examples/python-grounded-factuality-rag-check/README.md
new file mode 100644
index 000000000..5c9817526
--- /dev/null
+++ b/examples/python-grounded-factuality-rag-check/README.md
@@ -0,0 +1,93 @@
+# RAG Hallucination Checker using Bespoke-Minicheck
+
+This example allows the user to ask questions related to a document, which can be specified via an article url. Relevant chunks are retreived from the document and given to `llama3.1` as context to answer the question. Then each sentence in the answer is checked against the retrieved chunks using `bespoke-minicheck` to ensure that the answer does not contain hallucinations. 
+
+## Running the Example
+
+1. Ensure `all-minilm` (embedding) `llama3.1` (chat) and `bespoke-minicheck` (check) models installed:
+
+   ```bash
+   ollama pull all-minilm
+   ollama pull llama3.1
+   ollama pull bespoke-minicheck
+   ```
+
+2. Install the dependencies.
+
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+3. Run the example:
+
+   ```bash
+   python main.py
+   ```
+
+## Expected Output
+
+```text
+Enter the URL of an article you want to chat with, or press Enter for default example:
+
+Loaded, chunked, and embedded text from https://www.theverge.com/2024/9/12/24242439/openai-o1-model-reasoning-strawberry-chatgpt.
+
+Enter your question or type quit: Who is the CEO of openai?
+
+Retrieved chunks:
+OpenAI is releasing a new model called o1 , the first in a planned series of “ reasoning ” models that have been trained to answer more complex questions , faster than a human can . It ’ s being released alongside o1-mini , a smaller , cheaper version . And yes , if you ’ re steeped in AI rumors : this is , in fact , the extremely hyped Strawberry model . For OpenAI , o1 represents a step toward its broader goal of human-like artificial intelligence .
+
+OpenAI is releasing a new model called o1 , the first in a planned series of “ reasoning ” models that have been trained to answer more complex questions , faster than a human can . It ’ s being released alongside o1-mini , a smaller , cheaper version . And yes , if you ’ re steeped in AI rumors : this is , in fact , the extremely hyped Strawberry model . For OpenAI , o1 represents a step toward its broader goal of human-like artificial intelligence . More practically , it does a better job at writing code and solving multistep problems than previous models . But it ’ s also more expensive and slower to use than GPT-4o . OpenAI is calling this release of o1 a “ preview ” to emphasize how nascent it is . ChatGPT Plus and Team users get access to both o1-preview and o1-mini starting today , while Enterprise and Edu users will get access early next week .
+
+More practically , it does a better job at writing code and solving multistep problems than previous models . But it ’ s also more expensive and slower to use than GPT-4o . OpenAI is calling this release of o1 a “ preview ” to emphasize how nascent it is . ChatGPT Plus and Team users get access to both o1-preview and o1-mini starting today , while Enterprise and Edu users will get access early next week . OpenAI says it plans to bring o1-mini access to all the free users of ChatGPT but hasn ’ t set a release date yet . Developer access to o1 is really expensive : In the API , o1-preview is $ 15 per 1 million input tokens , or chunks of text parsed by the model , and $ 60 per 1 million output tokens . For comparison , GPT-4o costs $ 5 per 1 million input tokens and $ 15 per 1 million output tokens .
+
+OpenAI says it plans to bring o1-mini access to all the free users of ChatGPT but hasn ’ t set a release date yet . Developer access to o1 is really expensive : In the API , o1-preview is $ 15 per 1 million input tokens , or chunks of text parsed by the model , and $ 60 per 1 million output tokens . For comparison , GPT-4o costs $ 5 per 1 million input tokens and $ 15 per 1 million output tokens . The training behind o1 is fundamentally different from its predecessors , OpenAI ’ s research lead , Jerry Tworek , tells me , though the company is being vague about the exact details . He says o1 “ has been trained using a completely new optimization algorithm and a new training dataset specifically tailored for it. ” Image : OpenAI OpenAI taught previous GPT models to mimic patterns from its training data .
+
+LLM Answer:
+The text does not mention the CEO of OpenAI. It only discusses the release of a new model called o1 and some details about it, but does not provide information on the company's leadership.
+
+LLM Claim: The text does not mention the CEO of OpenAI.
+Is this claim supported by the context according to bespoke-minicheck? Yes
+
+LLM Claim: It only discusses the release of a new model called o1 and some details about it, but does not provide information on the company's leadership.
+Is this claim supported by the context according to bespoke-minicheck? No
+```
+
+The second claim is unsupported since the text mentions the research lead. 
+
+Another tricky example:
+
+```text
+
+Enter your question or type quit: what sets o1 apart from gpt-4o?
+
+Retrieved chunks: 
+OpenAI says it plans to bring o1-mini access to all the free users of ChatGPT but hasn ’ t set a release date yet . Developer access to o1 is really expensive : In the API , o1-preview is $ 15 per 1 million input tokens , or chunks of text parsed by the model , and $ 60 per 1 million output tokens . For comparison , GPT-4o costs $ 5 per 1 million input tokens and $ 15 per 1 million output tokens . The training behind o1 is fundamentally different from its predecessors , OpenAI ’ s research lead , Jerry Tworek , tells me , though the company is being vague about the exact details . He says o1 “ has been trained using a completely new optimization algorithm and a new training dataset specifically tailored for it. ” Image : OpenAI OpenAI taught previous GPT models to mimic patterns from its training data .
+
+He says OpenAI also tested o1 against a qualifying exam for the International Mathematics Olympiad , and while GPT-4o only correctly solved only 13 percent of problems , o1 scored 83 percent . “ We can ’ t say we solved hallucinations ” In online programming contests known as Codeforces competitions , this new model reached the 89th percentile of participants , and OpenAI claims the next update of this model will perform “ similarly to PhD students on challenging benchmark tasks in physics , chemistry and biology. ” At the same time , o1 is not as capable as GPT-4o in a lot of areas . It doesn ’ t do as well on factual knowledge about the world .
+
+More practically , it does a better job at writing code and solving multistep problems than previous models . But it ’ s also more expensive and slower to use than GPT-4o . OpenAI is calling this release of o1 a “ preview ” to emphasize how nascent it is . ChatGPT Plus and Team users get access to both o1-preview and o1-mini starting today , while Enterprise and Edu users will get access early next week . OpenAI says it plans to bring o1-mini access to all the free users of ChatGPT but hasn ’ t set a release date yet . Developer access to o1 is really expensive : In the API , o1-preview is $ 15 per 1 million input tokens , or chunks of text parsed by the model , and $ 60 per 1 million output tokens . For comparison , GPT-4o costs $ 5 per 1 million input tokens and $ 15 per 1 million output tokens .
+
+“ We can ’ t say we solved hallucinations ” In online programming contests known as Codeforces competitions , this new model reached the 89th percentile of participants , and OpenAI claims the next update of this model will perform “ similarly to PhD students on challenging benchmark tasks in physics , chemistry and biology. ” At the same time , o1 is not as capable as GPT-4o in a lot of areas . It doesn ’ t do as well on factual knowledge about the world . It also doesn ’ t have the ability to browse the web or process files and images . Still , the company believes it represents a brand-new class of capabilities . It was named o1 to indicate “ resetting the counter back to 1. ” “ I ’ m gon na be honest : I think we ’ re terrible at naming , traditionally , ” McGrew says .
+LLM Answer: According to the text, several things set o1 apart from GPT-4o:
+
+* In online programming contests (Codeforces competitions), o1 scored 83% correct solutions compared to GPT-4o's 13%.
+* The training behind o1 is "fundamentally different" from its predecessors, including a completely new optimization algorithm and a new training dataset specifically tailored for it.
+* o1 has been shown to perform similarly to PhD students on challenging benchmark tasks in physics, chemistry, and biology, while GPT-4o does not have this level of performance.
+* However, o1 is also less capable than GPT-4o in some areas, particularly with regard to factual knowledge about the world.
+
+LLM Claim: According to the text, several things set o1 apart from GPT-4o:
+
+* In online programming contests (Codeforces competitions), o1 scored 83% correct solutions compared to GPT-4o's 13%.
+Is this claim supported by the context according to bespoke-minicheck? Yes
+
+LLM Claim: * The training behind o1 is "fundamentally different" from its predecessors, including a completely new optimization algorithm and a new training dataset specifically tailored for it.
+Is this claim supported by the context according to bespoke-minicheck? Yes
+
+LLM Claim: * o1 has been shown to perform similarly to PhD students on challenging benchmark tasks in physics, chemistry, and biology, while GPT-4o does not have this level of performance.
+Is this claim supported by the context according to bespoke-minicheck? No
+
+LLM Claim: * However, o1 is also less capable than GPT-4o in some areas, particularly with regard to factual knowledge about the world.
+Is this claim supported by the context according to bespoke-minicheck? Yes
+```
+
+We see that the third claim "* o1 has been shown to perform similarly to PhD students on challenging benchmark tasks in physics, chemistry, and biology, while GPT-4o does not have this level of performance." is not supported by the context. This is because the context only mentions that o1 "is claimed to perform" which is different from "has been shown to perform".
diff --git a/examples/python-grounded-factuality-rag-check/main.py b/examples/python-grounded-factuality-rag-check/main.py
new file mode 100644
index 000000000..f4d562d5f
--- /dev/null
+++ b/examples/python-grounded-factuality-rag-check/main.py
@@ -0,0 +1,137 @@
+import ollama
+import warnings
+from mattsollamatools import chunker
+from newspaper import Article
+import numpy as np
+from sklearn.neighbors import NearestNeighbors
+import nltk
+
+warnings.filterwarnings(
+    "ignore", category=FutureWarning, module="transformers.tokenization_utils_base"
+)
+nltk.download("punkt_tab", quiet=True)
+
+
+def getArticleText(url):
+    """Gets the text of an article from a URL.
+
+    Often there are a bunch of ads and menus on pages for a news article.
+    This uses newspaper3k to get just the text of just the article.
+    """
+    article = Article(url)
+    article.download()
+    article.parse()
+    return article.text
+
+
+def knn_search(question_embedding, embeddings, k=5):
+    """Performs K-nearest neighbors (KNN) search"""
+    X = np.array(
+        [item["embedding"] for article in embeddings for item in article["embeddings"]]
+    )
+    source_texts = [
+        item["source"] for article in embeddings for item in article["embeddings"]
+    ]
+
+    # Fit a KNN model on the embeddings
+    knn = NearestNeighbors(n_neighbors=k, metric="cosine")
+    knn.fit(X)
+
+    # Find the indices and distances of the k-nearest neighbors.
+    _, indices = knn.kneighbors(question_embedding, n_neighbors=k)
+
+    # Get the indices and source texts of the best matches
+    best_matches = [(indices[0][i], source_texts[indices[0][i]]) for i in range(k)]
+
+    return best_matches
+
+
+def check(document, claim):
+    """Checks if the claim is supported by the document by calling bespoke-minicheck.
+
+    Returns Yes/yes if the claim is supported by the document, No/no otherwise.
+    Support for logits will be added in the future.
+
+    bespoke-minicheck's system prompt is defined as:
+      'Determine whether the provided claim is consistent with the corresponding
+      document. Consistency in this context implies that all information presented in the claim
+      is substantiated by the document. If not, it should be considered inconsistent. Please
+      assess the claim's consistency with the document by responding with either "Yes" or "No".'
+
+    bespoke-minicheck's user prompt is defined as:
+      "Document: {document}\nClaim: {claim}"
+    """
+    prompt = f"Document: {document}\nClaim: {claim}"
+    response = ollama.generate(
+        model="bespoke-minicheck", prompt=prompt, options={"num_predict": 2, "temperature": 0.0}
+    )
+    return response["response"].strip()
+
+
+if __name__ == "__main__":
+    allEmbeddings = []
+    default_url = "https://www.theverge.com/2024/9/12/24242439/openai-o1-model-reasoning-strawberry-chatgpt"
+    user_input = input(
+        "Enter the URL of an article you want to chat with, or press Enter for default example: "
+    )
+    article_url = user_input.strip() if user_input.strip() else default_url
+    article = {}
+    article["embeddings"] = []
+    article["url"] = article_url
+    text = getArticleText(article_url)
+    chunks = chunker(text)
+
+    # Embed (batch) chunks using ollama
+    embeddings = ollama.embed(model="all-minilm", input=chunks)["embeddings"]
+
+    for chunk, embedding in zip(chunks, embeddings):
+        item = {}
+        item["source"] = chunk
+        item["embedding"] = embedding
+        item["sourcelength"] = len(chunk)
+        article["embeddings"].append(item)
+
+    allEmbeddings.append(article)
+
+    print(f"\nLoaded, chunked, and embedded text from {article_url}.\n")
+
+    while True:
+        # Input a question from the user
+        # For example, "Who is the chief research officer?"
+        question = input("Enter your question or type quit: ")
+
+        if question.lower() == "quit":
+            break
+
+        # Embed the user's question using ollama.embed
+        question_embedding = ollama.embed(model="all-minilm", input=question)[
+            "embeddings"
+        ]
+
+        # Perform KNN search to find the best matches (indices and source text)
+        best_matches = knn_search(question_embedding, allEmbeddings, k=4)
+
+        sourcetext = "\n\n".join([source_text for (_, source_text) in best_matches])
+
+        print(f"\nRetrieved chunks: \n{sourcetext}\n")
+
+        # Give the retreived chunks and question to the chat model
+        system_prompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}"
+
+        ollama_response = ollama.generate(
+            model="llama3.1",
+            prompt=question,
+            system=system_prompt,
+            options={"stream": False},
+        )
+
+        answer = ollama_response["response"]
+        print(f"LLM Answer:\n{answer}\n")
+
+        # Check each sentence in the response for grounded factuality
+        if answer:
+            for claim in nltk.sent_tokenize(answer):
+                print(f"LLM Claim: {claim}")
+                print(
+                    f"Is this claim supported by the context according to bespoke-minicheck? {check(sourcetext, claim)}\n"
+                )
diff --git a/examples/python-grounded-factuality-rag-check/requirements.txt b/examples/python-grounded-factuality-rag-check/requirements.txt
new file mode 100644
index 000000000..d4bd6df34
--- /dev/null
+++ b/examples/python-grounded-factuality-rag-check/requirements.txt
@@ -0,0 +1,8 @@
+ollama
+lxml==5.3.0
+lxml_html_clean==0.2.2
+mattsollamatools==0.0.25
+newspaper3k==0.2.8
+nltk==3.9.1
+numpy==1.26.4
+scikit-learn==1.5.2
\ No newline at end of file
diff --git a/examples/python-grounded-factuality-simple-check/main.py b/examples/python-grounded-factuality-simple-check/main.py
new file mode 100644
index 000000000..0204f3b39
--- /dev/null
+++ b/examples/python-grounded-factuality-simple-check/main.py
@@ -0,0 +1,53 @@
+"""Simple example to demonstrate how to use the bespoke-minicheck model."""
+
+import ollama
+
+# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
+
+
+def check(document, claim):
+    """Checks if the claim is supported by the document by calling bespoke-minicheck.
+
+    Returns Yes/yes if the claim is supported by the document, No/no otherwise.
+    Support for logits will be added in the future.
+
+    bespoke-minicheck's system prompt is defined as:
+      'Determine whether the provided claim is consistent with the corresponding
+      document. Consistency in this context implies that all information presented in the claim
+      is substantiated by the document. If not, it should be considered inconsistent. Please
+      assess the claim's consistency with the document by responding with either "Yes" or "No".'
+
+    bespoke-minicheck's user prompt is defined as:
+      "Document: {document}\nClaim: {claim}"
+    """
+    prompt = f"Document: {document}\nClaim: {claim}"
+    response = ollama.generate(
+        model="bespoke-minicheck", prompt=prompt, options={"num_predict": 2, "temperature": 0.0}
+    )
+    return response["response"].strip()
+
+
+def get_user_input(prompt):
+    user_input = input(prompt)
+    if not user_input:
+        exit()
+    print()
+    return user_input
+
+
+def main():
+    while True:
+        # Get a document from the user (e.g. "Ryan likes running and biking.")
+        document = get_user_input("Enter a document: ")
+        # Get a claim from the user (e.g. "Ryan likes to run.")
+        claim = get_user_input("Enter a claim: ")
+        # Check if the claim is supported by the document
+        grounded_factuality_check = check(document, claim)
+        print(
+            f"Is the claim supported by the document according to bespoke-minicheck? {grounded_factuality_check}"
+        )
+        print("\n\n")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/python-grounded-factuality-simple-check/readme.md b/examples/python-grounded-factuality-simple-check/readme.md
new file mode 100644
index 000000000..b164b5eb9
--- /dev/null
+++ b/examples/python-grounded-factuality-simple-check/readme.md
@@ -0,0 +1,54 @@
+# Simple Bespoke-Minicheck Example
+
+`bespoke-minicheck` is a model for checking if a claim is supported by a document. It is used through the **generate** endpoint, which is called in this example with a `prompt` that includes the expected formatting of the user input. 
+
+## Running the Example
+
+1. Ensure you have the `bespoke-minicheck` model installed:
+
+   ```bash
+   ollama pull bespoke-minicheck
+   ```
+
+2. Install the dependencies:
+
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+3. Run the program:
+
+   ```bash
+   python main.py
+   ```
+
+4. Enter a document and a claim when prompted:
+
+   ```bash
+   Enter a document: Roses are red.
+
+   Enter a claim: Roses are blue. 
+   ```
+
+   The claim and document are then given to the `bespoke-minicheck` as inputs, which then generates a response (Yes or No) on whether the claim is supported by the document.
+
+   ```bash
+   Is the claim supported by the document according to bespoke-minicheck? No
+   ```
+
+## More Examples
+
+Document ([source](https://en.wikipedia.org/wiki/Apple_I)): 
+> The Apple Computer 1 (Apple-1[a]), later known predominantly as the Apple I(written with a Roman numeral),[b] is an 8-bit motherboard-only personal computer designed by Steve Wozniak[5][6] and released by the Apple Computer Company (now Apple Inc.) in 1976. The company was initially formed to sell the Apple I – its first product – and would later become the world's largest technology company.[7] The idea of starting a company and selling the computer came from Wozniak's friend and Apple co-founder Steve Jobs.[8][9] One of the main innovations of the Apple I was that it included video display terminal circuitry on its circuit board, allowing it to connect to a low-cost composite video monitor or television, instead of an expensive computer terminal, compared to most existing computers at the time.
+
+Claim: 
+>The Apple I is a 16-bit computer.
+
+Expected output:
+>Is the claim supported by the document according to bespoke-minicheck? **No**
+
+Claim: 
+>Apple was originally called the Apple Computer Company.
+
+Expected output:
+>Is the claim supported by the document according to bespoke-minicheck? **Yes**
diff --git a/examples/python-grounded-factuality-simple-check/requirements.txt b/examples/python-grounded-factuality-simple-check/requirements.txt
new file mode 100644
index 000000000..403abba66
--- /dev/null
+++ b/examples/python-grounded-factuality-simple-check/requirements.txt
@@ -0,0 +1 @@
+ollama
diff --git a/examples/python-json-datagenerator/predefinedschema.py b/examples/python-json-datagenerator/predefinedschema.py
index 1fd54892b..68090ad79 100644
--- a/examples/python-json-datagenerator/predefinedschema.py
+++ b/examples/python-json-datagenerator/predefinedschema.py
@@ -2,7 +2,7 @@ import requests
 import json
 import random
 
-model = "llama3"
+model = "llama3.1"
 template = {
   "firstName": "",
   "lastName": "",
diff --git a/examples/python-json-datagenerator/randomaddresses.py b/examples/python-json-datagenerator/randomaddresses.py
index 72b1fefb9..878c98037 100644
--- a/examples/python-json-datagenerator/randomaddresses.py
+++ b/examples/python-json-datagenerator/randomaddresses.py
@@ -12,7 +12,7 @@ countries = [
     "France",
 ]
 country = random.choice(countries)
-model = "llama3"
+model = "llama3.1"
 
 prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
 
diff --git a/examples/python-json-datagenerator/readme.md b/examples/python-json-datagenerator/readme.md
index 883570443..5b444dff1 100644
--- a/examples/python-json-datagenerator/readme.md
+++ b/examples/python-json-datagenerator/readme.md
@@ -6,10 +6,10 @@ There are two python scripts in this example. `randomaddresses.py` generates ran
 
 ## Running the Example
 
-1. Ensure you have the `llama3` model installed:
+1. Ensure you have the `llama3.1` model installed:
 
    ```bash
-   ollama pull llama3
+   ollama pull llama3.1
    ```
 
 2. Install the Python Requirements.
diff --git a/examples/python-loganalysis/Modelfile b/examples/python-loganalysis/Modelfile
index 5237cb6eb..b28aa0c0f 100644
--- a/examples/python-loganalysis/Modelfile
+++ b/examples/python-loganalysis/Modelfile
@@ -4,5 +4,5 @@ SYSTEM """
 You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer.
 """
 
-PARAMETER TEMPERATURE 0.3
+PARAMETER temperature 0.3
 
diff --git a/examples/python-loganalysis/readme.md b/examples/python-loganalysis/readme.md
index 4be0baaaf..03bab672e 100644
--- a/examples/python-loganalysis/readme.md
+++ b/examples/python-loganalysis/readme.md
@@ -21,6 +21,8 @@ You can try this with the `logtest.logfile` file included in this directory.
 2. Install the Python Requirements.
 
    ```bash
+   python3 -m venv .venv
+   source .venv/bin/activate
    pip install -r requirements.txt
    ```
 
diff --git a/examples/python-loganalysis/requirements.txt b/examples/python-loganalysis/requirements.txt
index 9688b8ec0..e7cb17efb 100644
--- a/examples/python-loganalysis/requirements.txt
+++ b/examples/python-loganalysis/requirements.txt
@@ -1 +1 @@
-Requests==2.31.0
+Requests>=2.32.3
diff --git a/examples/python-simplechat/client.py b/examples/python-simplechat/client.py
index f82a16b3e..85043d5f4 100644
--- a/examples/python-simplechat/client.py
+++ b/examples/python-simplechat/client.py
@@ -2,7 +2,7 @@ import json
 import requests
 
 # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
-model = "llama3"  # TODO: update this for whatever model you wish to use
+model = "llama3.1"  # TODO: update this for whatever model you wish to use
 
 
 def chat(messages):
diff --git a/examples/python-simplechat/readme.md b/examples/python-simplechat/readme.md
index dd2576bc1..4c2ded4d8 100644
--- a/examples/python-simplechat/readme.md
+++ b/examples/python-simplechat/readme.md
@@ -4,10 +4,10 @@ The **chat** endpoint is one of two ways to generate text from an LLM with Ollam
 
 ## Running the Example
 
-1. Ensure you have the `llama3` model installed:
+1. Ensure you have the `llama3.1` model installed:
 
    ```bash
-   ollama pull llama3
+   ollama pull llama3.1
    ```
 
 2. Install the Python Requirements.
diff --git a/examples/typescript-simplechat/client.ts b/examples/typescript-simplechat/client.ts
index a1e0eea37..8ad113b12 100644
--- a/examples/typescript-simplechat/client.ts
+++ b/examples/typescript-simplechat/client.ts
@@ -1,6 +1,6 @@
 import * as readline from "readline";
 
-const model = "llama3";
+const model = "llama3.1";
 type Message = {
   role: "assistant" | "user" | "system";
   content: string;
diff --git a/format/format.go b/format/format.go
index 31059578f..ac50570df 100644
--- a/format/format.go
+++ b/format/format.go
@@ -3,6 +3,7 @@ package format
 import (
 	"fmt"
 	"math"
+	"strconv"
 )
 
 const (
@@ -28,6 +29,6 @@ func HumanNumber(b uint64) string {
 	case b >= Thousand:
 		return fmt.Sprintf("%.0fK", float64(b)/Thousand)
 	default:
-		return fmt.Sprintf("%d", b)
+		return strconv.FormatUint(b, 10)
 	}
 }
diff --git a/go.mod b/go.mod
index 6807b9b48..6e437c730 100644
--- a/go.mod
+++ b/go.mod
@@ -1,6 +1,6 @@
 module github.com/ollama/ollama
 
-go 1.22.0
+go 1.22.5
 
 require (
 	github.com/containerd/console v1.0.3
@@ -18,6 +18,7 @@ require (
 require (
 	github.com/agnivade/levenshtein v1.1.1
 	github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
+	github.com/google/go-cmp v0.6.0
 	github.com/mattn/go-runewidth v0.0.14
 	github.com/nlpodyssey/gopickle v0.3.0
 	github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
@@ -71,7 +72,7 @@ require (
 	golang.org/x/net v0.25.0 // indirect
 	golang.org/x/sys v0.20.0
 	golang.org/x/term v0.20.0
-	golang.org/x/text v0.15.0 // indirect
+	golang.org/x/text v0.15.0
 	google.golang.org/protobuf v1.34.1
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
diff --git a/gpu/amd_common.go b/gpu/amd_common.go
index 27a81e3f8..2894ac2c4 100644
--- a/gpu/amd_common.go
+++ b/gpu/amd_common.go
@@ -3,12 +3,14 @@
 package gpu
 
 import (
-	"fmt"
+	"errors"
 	"log/slog"
 	"os"
 	"path/filepath"
 	"runtime"
 	"strings"
+
+	"github.com/ollama/ollama/envconfig"
 )
 
 // Determine if the given ROCm lib directory is usable by checking for existence of some glob patterns
@@ -49,9 +51,17 @@ func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
 }
 
 func commonAMDValidateLibDir() (string, error) {
-	// We try to favor system paths first, so that we can wire up the subprocess to use
-	// the system version.  Only use our bundled version if the system version doesn't work
-	// This gives users a more recovery options if versions have subtle problems at runtime
+	// Favor our bundled version
+
+	// Installer payload location if we're running the installed binary
+	exe, err := os.Executable()
+	if err == nil {
+		rocmTargetDir := filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama")
+		if rocmLibUsable(rocmTargetDir) {
+			slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
+			return rocmTargetDir, nil
+		}
+	}
 
 	// Prefer explicit HIP env var
 	hipPath := os.Getenv("HIP_PATH")
@@ -87,14 +97,5 @@ func commonAMDValidateLibDir() (string, error) {
 		}
 	}
 
-	// Installer payload location if we're running the installed binary
-	exe, err := os.Executable()
-	if err == nil {
-		rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm")
-		if rocmLibUsable(rocmTargetDir) {
-			slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
-			return rocmTargetDir, nil
-		}
-	}
-	return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
+	return "", errors.New("no suitable rocm found, falling back to CPU")
 }
diff --git a/gpu/amd_hip_windows.go b/gpu/amd_hip_windows.go
index 8572a24c5..2cea28242 100644
--- a/gpu/amd_hip_windows.go
+++ b/gpu/amd_hip_windows.go
@@ -1,6 +1,7 @@
 package gpu
 
 import (
+	"errors"
 	"fmt"
 	"log/slog"
 	"syscall"
@@ -33,9 +34,10 @@ type HipLib struct {
 }
 
 func NewHipLib() (*HipLib, error) {
-	h, err := windows.LoadLibrary("amdhip64.dll")
+	// At runtime we depend on v6, so discover GPUs with the same library for a consistent set of GPUs
+	h, err := windows.LoadLibrary("amdhip64_6.dll")
 	if err != nil {
-		return nil, fmt.Errorf("unable to load amdhip64.dll: %w", err)
+		return nil, fmt.Errorf("unable to load amdhip64_6.dll, please make sure to upgrade to the latest amd driver: %w", err)
 	}
 	hl := &HipLib{}
 	hl.dll = h
@@ -75,7 +77,7 @@ func (hl *HipLib) Release() {
 
 func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
 	if hl.dll == 0 {
-		return 0, 0, fmt.Errorf("dll has been unloaded")
+		return 0, 0, errors.New("dll has been unloaded")
 	}
 	var version int
 	status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
@@ -84,9 +86,8 @@ func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
 	}
 
 	slog.Debug("hipDriverGetVersion", "version", version)
-	// TODO - this isn't actually right, but the docs claim hipDriverGetVersion isn't accurate anyway...
-	driverMajor = version / 1000
-	driverMinor = (version - (driverMajor * 1000)) / 10
+	driverMajor = version / 10000000
+	driverMinor = (version - (driverMajor * 10000000)) / 100000
 
 	return driverMajor, driverMinor, nil
 }
@@ -110,7 +111,7 @@ func (hl *HipLib) HipGetDeviceCount() int {
 
 func (hl *HipLib) HipSetDevice(device int) error {
 	if hl.dll == 0 {
-		return fmt.Errorf("dll has been unloaded")
+		return errors.New("dll has been unloaded")
 	}
 	status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device))
 	if status != hipSuccess {
@@ -121,7 +122,7 @@ func (hl *HipLib) HipSetDevice(device int) error {
 
 func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) {
 	if hl.dll == 0 {
-		return nil, fmt.Errorf("dll has been unloaded")
+		return nil, errors.New("dll has been unloaded")
 	}
 	var props hipDevicePropMinimal
 	status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device))
@@ -134,7 +135,7 @@ func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, err
 // free, total, err
 func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) {
 	if hl.dll == 0 {
-		return 0, 0, fmt.Errorf("dll has been unloaded")
+		return 0, 0, errors.New("dll has been unloaded")
 	}
 	var totalMemory uint64
 	var freeMemory uint64
diff --git a/gpu/amd_linux.go b/gpu/amd_linux.go
index 15b6fc61f..d3f5b9fc6 100644
--- a/gpu/amd_linux.go
+++ b/gpu/amd_linux.go
@@ -5,11 +5,13 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"io/fs"
 	"log/slog"
 	"os"
 	"path/filepath"
 	"regexp"
 	"slices"
+	"sort"
 	"strconv"
 	"strings"
 
@@ -60,9 +62,9 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 
 	// Determine if the user has already pre-selected which GPUs to look at, then ignore the others
 	var visibleDevices []string
-	hipVD := envconfig.HipVisibleDevices   // zero based index only
-	rocrVD := envconfig.RocrVisibleDevices // zero based index or UUID, but consumer cards seem to not support UUID
-	gpuDO := envconfig.GpuDeviceOrdinal    // zero based index
+	hipVD := envconfig.HipVisibleDevices()   // zero based index only
+	rocrVD := envconfig.RocrVisibleDevices() // zero based index or UUID, but consumer cards seem to not support UUID
+	gpuDO := envconfig.GpuDeviceOrdinal()    // zero based index
 	switch {
 	// TODO is this priorty order right?
 	case hipVD != "":
@@ -75,13 +77,27 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 		visibleDevices = strings.Split(gpuDO, ",")
 	}
 
-	gfxOverride := envconfig.HsaOverrideGfxVersion
+	gfxOverride := envconfig.HsaOverrideGfxVersion()
 	var supported []string
 	libDir := ""
 
 	// The amdgpu driver always exposes the host CPU(s) first, but we have to skip them and subtract
 	// from the other IDs to get alignment with the HIP libraries expectations (zero is the first GPU, not the CPU)
 	matches, _ := filepath.Glob(GPUPropertiesFileGlob)
+	sort.Slice(matches, func(i, j int) bool {
+		// /sys/class/kfd/kfd/topology/nodes//properties
+		a, err := strconv.ParseInt(filepath.Base(filepath.Dir(matches[i])), 10, 64)
+		if err != nil {
+			slog.Debug("parse err", "error", err, "match", matches[i])
+			return false
+		}
+		b, err := strconv.ParseInt(filepath.Base(filepath.Dir(matches[j])), 10, 64)
+		if err != nil {
+			slog.Debug("parse err", "error", err, "match", matches[i])
+			return false
+		}
+		return a < b
+	})
 	cpuCount := 0
 	for _, match := range matches {
 		slog.Debug("evaluating amdgpu node " + match)
@@ -344,6 +360,10 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 	if len(resp) == 0 {
 		slog.Info("no compatible amdgpu devices detected")
 	}
+	if err := verifyKFDDriverAccess(); err != nil {
+		slog.Error("amdgpu devices detected but permission problems block access", "error", err)
+		return nil
+	}
 	return resp
 }
 
@@ -378,7 +398,7 @@ func AMDValidateLibDir() (string, error) {
 
 	// If we still haven't found a usable rocm, the user will have to install it on their own
 	slog.Warn("amdgpu detected, but no compatible rocm library found.  Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install")
-	return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
+	return "", errors.New("no suitable rocm found, falling back to CPU")
 }
 
 func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
@@ -440,3 +460,19 @@ func getFreeMemory(usedFile string) (uint64, error) {
 	}
 	return usedMemory, nil
 }
+
+func verifyKFDDriverAccess() error {
+	// Verify we have permissions - either running as root, or we have group access to the driver
+	fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0o666)
+	if err != nil {
+		if errors.Is(err, fs.ErrPermission) {
+			return fmt.Errorf("permissions not set up properly.  Either run ollama as root, or add you user account to the render group. %w", err)
+		} else if errors.Is(err, fs.ErrNotExist) {
+			// Container runtime failure?
+			return fmt.Errorf("kfd driver not loaded.  If running in a container, remember to include '--device /dev/kfd --device /dev/dri'")
+		}
+		return fmt.Errorf("failed to check permission on /dev/kfd: %w", err)
+	}
+	fd.Close()
+	return nil
+}
diff --git a/gpu/amd_windows.go b/gpu/amd_windows.go
index 8b6fabebb..ef6bf830c 100644
--- a/gpu/amd_windows.go
+++ b/gpu/amd_windows.go
@@ -2,7 +2,7 @@ package gpu
 
 import (
 	"bytes"
-	"fmt"
+	"errors"
 	"log/slog"
 	"os"
 	"path/filepath"
@@ -22,8 +22,8 @@ const (
 
 var (
 	// Used to validate if the given ROCm lib is usable
-	ROCmLibGlobs          = []string{"hipblas.dll", "rocblas"}                 // TODO - probably include more coverage of files here...
-	RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\5.7\\bin"} // TODO glob?
+	ROCmLibGlobs          = []string{"hipblas.dll", "rocblas"}                 // This is not sufficient to discern v5 vs v6
+	RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\6.1\\bin"} // TODO glob?
 )
 
 func AMDGetGPUInfo() []RocmGPUInfo {
@@ -35,12 +35,11 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 	}
 	defer hl.Release()
 
-	// TODO - this reports incorrect version information, so omitting for now
-	// driverMajor, driverMinor, err := hl.AMDDriverVersion()
-	// if err != nil {
-	// 	// For now this is benign, but we may eventually need to fail compatibility checks
-	// 	slog.Debug("error looking up amd driver version", "error", err)
-	// }
+	driverMajor, driverMinor, err := hl.AMDDriverVersion()
+	if err != nil {
+		// For now this is benign, but we may eventually need to fail compatibility checks
+		slog.Debug("error looking up amd driver version", "error", err)
+	}
 
 	// Note: the HIP library automatically handles subsetting to any HIP_VISIBLE_DEVICES the user specified
 	count := hl.HipGetDeviceCount()
@@ -54,7 +53,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 	}
 
 	var supported []string
-	gfxOverride := envconfig.HsaOverrideGfxVersion
+	gfxOverride := envconfig.HsaOverrideGfxVersion()
 	if gfxOverride == "" {
 		supported, err = GetSupportedGFX(libDir)
 		if err != nil {
@@ -86,14 +85,15 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 		n = bytes.IndexByte(props.GcnArchName[:], 0)
 		gfx := string(props.GcnArchName[:n])
 		slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
-		//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY!  Always 0
+		// slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY!  Always 0
 		// TODO  Why isn't props.iGPU accurate!?
 		if strings.EqualFold(name, iGPUName) {
 			slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx)
 			continue
 		}
 		if gfxOverride == "" {
-			if !slices.Contains[[]string, string](supported, gfx) {
+			// Strip off Target Features when comparing
+			if !slices.Contains[[]string, string](supported, strings.Split(gfx, ":")[0]) {
 				slog.Warn("amdgpu is not supported", "gpu", i, "gpu_type", gfx, "library", libDir, "supported_types", supported)
 				// TODO - consider discrete markdown just for ROCM troubleshooting?
 				slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage")
@@ -132,10 +132,8 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 				MinimumMemory:  rocmMinimumMemory,
 				Name:           name,
 				Compute:        gfx,
-
-				// TODO - this information isn't accurate on windows, so don't report it until we find the right way to retrieve
-				// DriverMajor:    driverMajor,
-				// DriverMinor:    driverMinor,
+				DriverMajor:    driverMajor,
+				DriverMinor:    driverMinor,
 			},
 			index: i,
 		}
@@ -155,7 +153,7 @@ func AMDValidateLibDir() (string, error) {
 	// Installer payload (if we're running from some other location)
 	localAppData := os.Getenv("LOCALAPPDATA")
 	appDir := filepath.Join(localAppData, "Programs", "Ollama")
-	rocmTargetDir := filepath.Join(appDir, "rocm")
+	rocmTargetDir := filepath.Join(appDir, envconfig.LibRelativeToExe(), "lib", "ollama")
 	if rocmLibUsable(rocmTargetDir) {
 		slog.Debug("detected ollama installed ROCm at " + rocmTargetDir)
 		return rocmTargetDir, nil
@@ -163,7 +161,7 @@ func AMDValidateLibDir() (string, error) {
 
 	// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
 	slog.Warn("amdgpu detected, but no compatible rocm library found.  Please install ROCm")
-	return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
+	return "", errors.New("no suitable rocm found, falling back to CPU")
 }
 
 func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
diff --git a/gpu/assets.go b/gpu/assets.go
deleted file mode 100644
index 073d2e813..000000000
--- a/gpu/assets.go
+++ /dev/null
@@ -1,145 +0,0 @@
-package gpu
-
-import (
-	"errors"
-	"fmt"
-	"log/slog"
-	"os"
-	"path/filepath"
-	"runtime"
-	"strconv"
-	"strings"
-	"sync"
-	"syscall"
-	"time"
-
-	"github.com/ollama/ollama/envconfig"
-)
-
-var (
-	lock        sync.Mutex
-	payloadsDir = ""
-)
-
-func PayloadsDir() (string, error) {
-	lock.Lock()
-	defer lock.Unlock()
-	var err error
-	if payloadsDir == "" {
-		runnersDir := envconfig.RunnersDir
-
-		if runnersDir != "" {
-			payloadsDir = runnersDir
-			return payloadsDir, nil
-		}
-
-		// The remainder only applies on non-windows where we still carry payloads in the main executable
-		cleanupTmpDirs()
-		tmpDir := envconfig.TmpDir
-		if tmpDir == "" {
-			tmpDir, err = os.MkdirTemp("", "ollama")
-			if err != nil {
-				return "", fmt.Errorf("failed to generate tmp dir: %w", err)
-			}
-		} else {
-			err = os.MkdirAll(tmpDir, 0755)
-			if err != nil {
-				return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err)
-			}
-		}
-
-		// Track our pid so we can clean up orphaned tmpdirs
-		pidFilePath := filepath.Join(tmpDir, "ollama.pid")
-		pidFile, err := os.OpenFile(pidFilePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm)
-		if err != nil {
-			return "", err
-		}
-		if _, err := pidFile.Write([]byte(fmt.Sprint(os.Getpid()))); err != nil {
-			return "", err
-		}
-
-		// We create a distinct subdirectory for payloads within the tmpdir
-		// This will typically look like /tmp/ollama3208993108/runners on linux
-		payloadsDir = filepath.Join(tmpDir, "runners")
-	}
-	return payloadsDir, nil
-}
-
-// Best effort to clean up prior tmpdirs
-func cleanupTmpDirs() {
-	dirs, err := filepath.Glob(filepath.Join(os.TempDir(), "ollama*"))
-	if err != nil {
-		return
-	}
-	for _, d := range dirs {
-		info, err := os.Stat(d)
-		if err != nil || !info.IsDir() {
-			continue
-		}
-		raw, err := os.ReadFile(filepath.Join(d, "ollama.pid"))
-		if err != nil {
-			slog.Warn("failed to read ollama.pid", "path", d, "error", err)
-			// No pid, ignore this tmpdir
-			continue
-		}
-
-		pid, err := strconv.Atoi(string(raw))
-		if err != nil {
-			slog.Warn("failed to parse pid", "path", d, "error", err)
-			continue
-		}
-
-		proc, err := os.FindProcess(pid)
-		if err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
-			slog.Warn("found running ollama", "pid", pid, "path", d)
-			// Another running ollama, ignore this tmpdir
-			continue
-		}
-
-		if err := os.Remove(d); err != nil {
-			slog.Warn("unable to cleanup stale tmpdir", "path", d, "error", err)
-		}
-	}
-}
-
-func Cleanup() {
-	lock.Lock()
-	defer lock.Unlock()
-	runnersDir := envconfig.RunnersDir
-	if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" {
-		// We want to fully clean up the tmpdir parent of the payloads dir
-		tmpDir := filepath.Clean(filepath.Join(payloadsDir, ".."))
-		slog.Debug("cleaning up", "dir", tmpDir)
-		err := os.RemoveAll(tmpDir)
-		if err != nil {
-			// On windows, if we remove too quickly the llama.dll may still be in-use and fail to remove
-			time.Sleep(1000 * time.Millisecond)
-			err = os.RemoveAll(tmpDir)
-			if err != nil {
-				slog.Warn("failed to clean up", "dir", tmpDir, "err", err)
-			}
-		}
-	}
-}
-
-func UpdatePath(dir string) {
-	if runtime.GOOS == "windows" {
-		tmpDir := filepath.Dir(dir)
-		pathComponents := strings.Split(os.Getenv("PATH"), ";")
-		i := 0
-		for _, comp := range pathComponents {
-			if strings.EqualFold(comp, dir) {
-				return
-			}
-			// Remove any other prior paths to our temp dir
-			if !strings.HasPrefix(strings.ToLower(comp), strings.ToLower(tmpDir)) {
-				pathComponents[i] = comp
-				i++
-			}
-		}
-		newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
-		slog.Info("updating", "PATH", newPath)
-		os.Setenv("PATH", newPath)
-	}
-	// linux and darwin rely on rpath
-}
diff --git a/gpu/cpu_common.go b/gpu/cpu_common.go
index 63e88f25b..34edcdc5a 100644
--- a/gpu/cpu_common.go
+++ b/gpu/cpu_common.go
@@ -1,6 +1,11 @@
 package gpu
 
 import (
+	"os"
+	"path/filepath"
+	"runtime"
+	"strings"
+
 	"golang.org/x/sys/cpu"
 )
 
@@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
 	// else LCD
 	return CPUCapabilityNone
 }
+
+func IsNUMA() bool {
+	if runtime.GOOS != "linux" {
+		// numa support in llama.cpp is linux only
+		return false
+	}
+	ids := map[string]interface{}{}
+	packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
+	for _, packageId := range packageIds {
+		id, err := os.ReadFile(packageId)
+		if err == nil {
+			ids[strings.TrimSpace(string(id))] = struct{}{}
+		}
+	}
+	return len(ids) > 1
+}
diff --git a/gpu/cuda_common.go b/gpu/cuda_common.go
index c90a644c4..aceec70af 100644
--- a/gpu/cuda_common.go
+++ b/gpu/cuda_common.go
@@ -4,9 +4,17 @@ package gpu
 
 import (
 	"log/slog"
+	"os"
+	"regexp"
+	"runtime"
+	"strconv"
 	"strings"
 )
 
+// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
+// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
+var CudaTegra string = os.Getenv("JETSON_JETPACK")
+
 func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
 	ids := []string{}
 	for _, info := range gpuInfo {
@@ -19,3 +27,38 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
 	}
 	return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
 }
+
+func cudaVariant(gpuInfo CudaGPUInfo) string {
+	if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
+		if CudaTegra != "" {
+			ver := strings.Split(CudaTegra, ".")
+			if len(ver) > 0 {
+				return "jetpack" + ver[0]
+			}
+		} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
+			r := regexp.MustCompile(` R(\d+) `)
+			m := r.FindSubmatch(data)
+			if len(m) != 2 {
+				slog.Info("Unexpected format for /etc/nv_tegra_release.  Set JETSON_JETPACK to select version")
+			} else {
+				if l4t, err := strconv.Atoi(string(m[1])); err == nil {
+					// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
+					// https://developer.nvidia.com/embedded/jetpack-archive
+					switch l4t {
+					case 35:
+						return "jetpack5"
+					case 36:
+						return "jetpack6"
+					default:
+						slog.Info("unsupported L4T version", "nv_tegra_release", string(data))
+					}
+				}
+			}
+		}
+	}
+
+	if gpuInfo.computeMajor < 6 || gpuInfo.DriverMajor < 12 || (gpuInfo.DriverMajor == 12 && gpuInfo.DriverMinor == 0) {
+		return "v11"
+	}
+	return "v12"
+}
diff --git a/gpu/gpu.go b/gpu/gpu.go
index 0120d4271..69279867e 100644
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -7,9 +7,9 @@ package gpu
 #cgo windows LDFLAGS: -lpthread
 
 #include "gpu_info.h"
-
 */
 import "C"
+
 import (
 	"fmt"
 	"log/slog"
@@ -72,13 +72,8 @@ var RocmComputeMin = 9
 // TODO find a better way to detect iGPU instead of minimum memory
 const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
 
-// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
-// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
-var CudaTegra string = os.Getenv("JETSON_JETPACK")
-
 // Note: gpuMutex must already be held
 func initCudaHandles() *cudaHandles {
-
 	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
 
 	cHandles := &cudaHandles{}
@@ -106,10 +101,9 @@ func initCudaHandles() *cudaHandles {
 		localAppData := os.Getenv("LOCALAPPDATA")
 		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
 	}
-	tmpDir, _ := PayloadsDir()
-	if tmpDir != "" {
-		// TODO - add "payloads" for subprocess
-		cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", CudartMgmtName)}
+	libDir := LibraryDir()
+	if libDir != "" {
+		cudartMgmtPatterns = []string{filepath.Join(libDir, CudartMgmtName)}
 	}
 	cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
 
@@ -236,7 +230,7 @@ func GetGPUInfo() GpuInfoList {
 	}()
 
 	if !bootstrapped {
-		slog.Debug("Detecting GPUs")
+		slog.Info("looking for compatible GPUs")
 		needRefresh = false
 		cpuCapability = GetCPUCapability()
 		var memInfo C.mem_info_t
@@ -245,14 +239,19 @@ func GetGPUInfo() GpuInfoList {
 		if err != nil {
 			slog.Warn("error looking up system memory", "error", err)
 		}
-		cpus = []CPUInfo{CPUInfo{
-			GpuInfo: GpuInfo{
-				memInfo: mem,
-				Library: "cpu",
-				Variant: cpuCapability,
-				ID:      "0",
+		depPath := LibraryDir()
+
+		cpus = []CPUInfo{
+			{
+				GpuInfo: GpuInfo{
+					memInfo:        mem,
+					Library:        "cpu",
+					Variant:        cpuCapability.String(),
+					ID:             "0",
+					DependencyPath: depPath,
+				},
 			},
-		}}
+		}
 
 		// Fallback to CPU mode if we're lacking required vector extensions on x86
 		if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
@@ -262,12 +261,6 @@ func GetGPUInfo() GpuInfoList {
 			return GpuInfoList{cpus[0].GpuInfo}
 		}
 
-		// On windows we bundle the nvidia library one level above the runner dir
-		depPath := ""
-		if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
-			depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir), "cuda")
-		}
-
 		// Load ALL libraries
 		cHandles = initCudaHandles()
 
@@ -302,11 +295,45 @@ func GetGPUInfo() GpuInfoList {
 				gpuInfo.FreeMemory = uint64(memInfo.free)
 				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
 				gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
+				gpuInfo.computeMajor = int(memInfo.major)
+				gpuInfo.computeMinor = int(memInfo.minor)
 				gpuInfo.MinimumMemory = cudaMinimumMemory
-				gpuInfo.DependencyPath = depPath
-				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
 				gpuInfo.DriverMajor = driverMajor
 				gpuInfo.DriverMinor = driverMinor
+				variant := cudaVariant(gpuInfo)
+				if depPath != "" {
+					gpuInfo.DependencyPath = depPath
+					// Check for variant specific directory
+					if variant != "" {
+						if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil {
+							gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+variant)
+						}
+					}
+				}
+				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
+				gpuInfo.Variant = variant
+
+				// query the management library as well so we can record any skew between the two
+				// which represents overhead on the GPU we must set aside on subsequent updates
+				if cHandles.nvml != nil {
+					C.nvml_get_free(*cHandles.nvml, C.int(gpuInfo.index), &memInfo.free, &memInfo.total, &memInfo.used)
+					if memInfo.err != nil {
+						slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
+						C.free(unsafe.Pointer(memInfo.err))
+					} else {
+						if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
+							gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
+							slog.Info("detected OS VRAM overhead",
+								"id", gpuInfo.ID,
+								"library", gpuInfo.Library,
+								"compute", gpuInfo.Compute,
+								"driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
+								"name", gpuInfo.Name,
+								"overhead", format.HumanBytes2(gpuInfo.OSOverhead),
+							)
+						}
+					}
+				}
 
 				// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
 				cudaGPUs = append(cudaGPUs, gpuInfo)
@@ -314,40 +341,36 @@ func GetGPUInfo() GpuInfoList {
 		}
 
 		// Intel
-		if envconfig.IntelGpu {
+		if envconfig.IntelGPU() {
 			oHandles = initOneAPIHandles()
-			// On windows we bundle the oneapi library one level above the runner dir
-			depPath = ""
-			if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
-				depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir), "oneapi")
-			}
-
-			for d := range oHandles.oneapi.num_drivers {
-				if oHandles.oneapi == nil {
-					// shouldn't happen
-					slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
-					continue
-				}
-				devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
-				for i := range devCount {
-					gpuInfo := OneapiGPUInfo{
-						GpuInfo: GpuInfo{
-							Library: "oneapi",
-						},
-						driverIndex: int(d),
-						gpuIndex:    int(i),
+			if oHandles != nil && oHandles.oneapi != nil {
+				for d := range oHandles.oneapi.num_drivers {
+					if oHandles.oneapi == nil {
+						// shouldn't happen
+						slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
+						continue
+					}
+					devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
+					for i := range devCount {
+						gpuInfo := OneapiGPUInfo{
+							GpuInfo: GpuInfo{
+								Library: "oneapi",
+							},
+							driverIndex: int(d),
+							gpuIndex:    int(i),
+						}
+						// TODO - split bootstrapping from updating free memory
+						C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
+						// TODO - convert this to MinimumMemory based on testing...
+						var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
+						memInfo.free = C.uint64_t(totalFreeMem)
+						gpuInfo.TotalMemory = uint64(memInfo.total)
+						gpuInfo.FreeMemory = uint64(memInfo.free)
+						gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
+						gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
+						gpuInfo.DependencyPath = depPath
+						oneapiGPUs = append(oneapiGPUs, gpuInfo)
 					}
-					// TODO - split bootstrapping from updating free memory
-					C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
-					// TODO - convert this to MinimumMemory based on testing...
-					var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
-					memInfo.free = C.uint64_t(totalFreeMem)
-					gpuInfo.TotalMemory = uint64(memInfo.total)
-					gpuInfo.FreeMemory = uint64(memInfo.free)
-					gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
-					gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
-					gpuInfo.DependencyPath = depPath
-					oneapiGPUs = append(oneapiGPUs, gpuInfo)
 				}
 			}
 		}
@@ -387,6 +410,9 @@ func GetGPUInfo() GpuInfoList {
 
 		rocmGPUs = AMDGetGPUInfo()
 		bootstrapped = true
+		if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
+			slog.Info("no compatible GPUs were discovered")
+		}
 	}
 
 	// For detected GPUs, load library if not loaded
@@ -402,14 +428,17 @@ func GetGPUInfo() GpuInfoList {
 					"before",
 					"total", format.HumanBytes2(cpus[0].TotalMemory),
 					"free", format.HumanBytes2(cpus[0].FreeMemory),
+					"free_swap", format.HumanBytes2(cpus[0].FreeSwap),
 				),
 				slog.Group(
 					"now",
 					"total", format.HumanBytes2(mem.TotalMemory),
 					"free", format.HumanBytes2(mem.FreeMemory),
+					"free_swap", format.HumanBytes2(mem.FreeSwap),
 				),
 			)
 			cpus[0].FreeMemory = mem.FreeMemory
+			cpus[0].FreeSwap = mem.FreeSwap
 		}
 
 		var memInfo C.mem_info_t
@@ -438,9 +467,14 @@ func GetGPUInfo() GpuInfoList {
 				slog.Warn("error looking up nvidia GPU memory")
 				continue
 			}
+			if cHandles.nvml != nil && gpu.OSOverhead > 0 {
+				// When using the management library update based on recorded overhead
+				memInfo.free -= C.uint64_t(gpu.OSOverhead)
+			}
 			slog.Debug("updating cuda memory data",
 				"gpu", gpu.ID,
 				"name", gpu.Name,
+				"overhead", format.HumanBytes2(gpu.OSOverhead),
 				slog.Group(
 					"before",
 					"total", format.HumanBytes2(gpu.TotalMemory),
@@ -516,10 +550,12 @@ func GetGPUInfo() GpuInfoList {
 func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
 	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
 	var ldPaths []string
-	var patterns []string
 	gpuLibPaths := []string{}
 	slog.Debug("Searching for GPU library", "name", baseLibName)
 
+	// Start with our bundled libraries
+	patterns := []string{filepath.Join(LibraryDir(), baseLibName)}
+
 	switch runtime.GOOS {
 	case "windows":
 		ldPaths = strings.Split(os.Getenv("PATH"), ";")
@@ -528,13 +564,14 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
 	default:
 		return gpuLibPaths
 	}
-	// Start with whatever we find in the PATH/LD_LIBRARY_PATH
+
+	// Then with whatever we find in the PATH/LD_LIBRARY_PATH
 	for _, ldPath := range ldPaths {
 		d, err := filepath.Abs(ldPath)
 		if err != nil {
 			continue
 		}
-		patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
+		patterns = append(patterns, filepath.Join(d, baseLibName))
 	}
 	patterns = append(patterns, defaultPatterns...)
 	slog.Debug("gpu library search", "globs", patterns)
@@ -600,7 +637,23 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
 		defer C.free(unsafe.Pointer(lib))
 		C.nvcuda_init(lib, &resp)
 		if resp.err != nil {
-			slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err))
+			// Decide what log level based on the type of error message to help users understand why
+			msg := C.GoString(resp.err)
+			switch resp.cudaErr {
+			case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
+				slog.Warn("version mismatch between driver and cuda driver library - reboot or upgrade may be required", "library", libPath, "error", msg)
+			case C.CUDA_ERROR_NO_DEVICE:
+				slog.Info("no nvidia devices detected", "library", libPath)
+			case C.CUDA_ERROR_UNKNOWN:
+				slog.Warn("unknown error initializing cuda driver library", "library", libPath, "error", msg)
+				slog.Warn("see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information")
+			default:
+				if strings.Contains(msg, "wrong ELF class") {
+					slog.Debug("skipping 32bit library", "library", libPath)
+				} else {
+					slog.Info("unable to load cuda driver library", "library", libPath, "error", msg)
+				}
+			}
 			C.free(unsafe.Pointer(resp.err))
 		} else {
 			return int(resp.num_devices), &resp.ch, libPath
@@ -671,7 +724,7 @@ func LoadVulkanMgmt(vulkanLibPaths []string, capLibPaths []string) (int, *C.vk_h
 }
 
 func getVerboseState() C.uint16_t {
-	if envconfig.Debug {
+	if envconfig.Debug() {
 		return C.uint16_t(1)
 	}
 	return C.uint16_t(0)
@@ -699,3 +752,31 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
 		return "", ""
 	}
 }
+
+func LibraryDir() string {
+	// On Windows/linux we bundle the dependencies at the same level as the executable
+	appExe, err := os.Executable()
+	if err != nil {
+		slog.Warn("failed to lookup executable path", "error", err)
+	}
+	cwd, err := os.Getwd()
+	if err != nil {
+		slog.Warn("failed to lookup working directory", "error", err)
+	}
+	// Scan for any of our dependeices, and pick first match
+	for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe()), cwd} {
+		libDep := filepath.Join("lib", "ollama")
+		if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
+			return filepath.Join(root, libDep)
+		}
+		// Developer mode, local build
+		if _, err := os.Stat(filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
+			return filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)
+		}
+		if _, err := os.Stat(filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
+			return filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)
+		}
+	}
+	slog.Warn("unable to locate gpu dependency libraries")
+	return ""
+}
diff --git a/gpu/gpu_darwin.go b/gpu/gpu_darwin.go
index f26d23c12..417b48dfa 100644
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@@ -8,6 +8,7 @@ package gpu
 #include "gpu_info_darwin.h"
 */
 import "C"
+
 import (
 	"runtime"
 
@@ -24,7 +25,7 @@ func GetGPUInfo() GpuInfoList {
 		return []GpuInfo{
 			{
 				Library: "cpu",
-				Variant: GetCPUCapability(),
+				Variant: GetCPUCapability().String(),
 				memInfo: mem,
 			},
 		}
@@ -47,7 +48,7 @@ func GetCPUInfo() GpuInfoList {
 	return []GpuInfo{
 		{
 			Library: "cpu",
-			Variant: GetCPUCapability(),
+			Variant: GetCPUCapability().String(),
 			memInfo: mem,
 		},
 	}
@@ -56,7 +57,8 @@ func GetCPUInfo() GpuInfoList {
 func GetCPUMem() (memInfo, error) {
 	return memInfo{
 		TotalMemory: uint64(C.getPhysicalMemory()),
-		FreeMemory:  0,
+		FreeMemory:  uint64(C.getFreeMemory()),
+		// FreeSwap omitted as Darwin uses dynamic paging
 	}, nil
 }
 
diff --git a/gpu/gpu_info.h b/gpu/gpu_info.h
index a0b4c96ee..d36aff253 100644
--- a/gpu/gpu_info.h
+++ b/gpu/gpu_info.h
@@ -68,4 +68,4 @@ void cpu_check_ram(mem_info_t *resp);
 #include "gpu_info_vulkan.h"
 
 #endif  // __GPU_INFO_H__
-#endif  // __APPLE__
\ No newline at end of file
+#endif  // __APPLE__
diff --git a/gpu/gpu_info_darwin.h b/gpu/gpu_info_darwin.h
index 3edca237c..415e7922d 100644
--- a/gpu/gpu_info_darwin.h
+++ b/gpu/gpu_info_darwin.h
@@ -2,3 +2,4 @@
 #include 
 uint64_t getRecommendedMaxVRAM();
 uint64_t getPhysicalMemory();
+uint64_t getFreeMemory();
diff --git a/gpu/gpu_info_darwin.m b/gpu/gpu_info_darwin.m
index a145ac076..5ca139e0b 100644
--- a/gpu/gpu_info_darwin.m
+++ b/gpu/gpu_info_darwin.m
@@ -1,4 +1,5 @@
-// go:build darwin
+#import 
+#import 
 #include "gpu_info_darwin.h"
 
 uint64_t getRecommendedMaxVRAM() {
@@ -8,6 +9,27 @@ uint64_t getRecommendedMaxVRAM() {
   return result;
 }
 
+// getPhysicalMemory returns the total physical memory in bytes
 uint64_t getPhysicalMemory() {
-  return [[NSProcessInfo processInfo] physicalMemory];
+  return [NSProcessInfo processInfo].physicalMemory;
+}
+
+// getFreeMemory returns the total free memory in bytes, including inactive
+// memory that can be reclaimed by the system.
+uint64_t getFreeMemory() {
+  mach_port_t host_port = mach_host_self();
+  mach_msg_type_number_t host_size = sizeof(vm_statistics64_data_t) / sizeof(integer_t);
+  vm_size_t pagesize;
+  vm_statistics64_data_t vm_stat;
+
+  host_page_size(host_port, &pagesize);
+  if (host_statistics64(host_port, HOST_VM_INFO64, (host_info64_t)&vm_stat, &host_size) != KERN_SUCCESS) {
+    return 0;
+  }
+
+  uint64_t free_memory = (uint64_t)vm_stat.free_count * pagesize;
+  free_memory += (uint64_t)vm_stat.speculative_count * pagesize;
+  free_memory += (uint64_t)vm_stat.inactive_count * pagesize;
+
+  return free_memory;
 }
diff --git a/gpu/gpu_info_nvcuda.c b/gpu/gpu_info_nvcuda.c
index abe140844..a1a38bfc2 100644
--- a/gpu/gpu_info_nvcuda.c
+++ b/gpu/gpu_info_nvcuda.c
@@ -7,6 +7,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
   CUresult ret;
   resp->err = NULL;
   resp->num_devices = 0;
+  resp->cudaErr = CUDA_SUCCESS;
   const int buflen = 256;
   char buf[buflen + 1];
   int i;
@@ -38,6 +39,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
             nvcuda_lib_path, msg);
     free(msg);
     resp->err = strdup(buf);
+    resp->cudaErr = -1;
     return;
   }
 
@@ -52,6 +54,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
               msg);
       free(msg);
       resp->err = strdup(buf);
+      resp->cudaErr = -1;
       return;
     }
   }
@@ -61,12 +64,9 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
     LOG(resp->ch.verbose, "cuInit err: %d\n", ret);
     UNLOAD_LIBRARY(resp->ch.handle);
     resp->ch.handle = NULL;
-    if (ret == CUDA_ERROR_INSUFFICIENT_DRIVER) {
-      resp->err = strdup("your nvidia driver is too old or missing.  If you have a CUDA GPU please upgrade to run ollama");
-      return;
-    }
-    snprintf(buf, buflen, "nvcuda init failure: %d", ret);
+    snprintf(buf, buflen, "cuda driver library init failure: %d", ret);
     resp->err = strdup(buf);
+    resp->cudaErr = ret;
     return;
   }
 
@@ -91,6 +91,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
     resp->ch.handle = NULL;
     snprintf(buf, buflen, "unable to get device count: %d", ret);
     resp->err = strdup(buf);
+    resp->cudaErr = ret;
     return;
   }
 }
@@ -106,13 +107,13 @@ void nvcuda_bootstrap(nvcuda_handle_t h, int i, mem_info_t *resp) {
   CUuuid uuid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 
   if (h.handle == NULL) {
-    resp->err = strdup("nvcuda handle isn't initialized");
+    resp->err = strdup("cuda driver library handle isn't initialized");
     return;
   }
 
   ret = (*h.cuDeviceGet)(&device, i);
   if (ret != CUDA_SUCCESS) {
-    snprintf(buf, buflen, "nvcuda device failed to initialize");
+    snprintf(buf, buflen, "cuda driver library device failed to initialize");
     resp->err = strdup(buf);
     return;
   }
@@ -168,14 +169,14 @@ void nvcuda_bootstrap(nvcuda_handle_t h, int i, mem_info_t *resp) {
   // To get memory we have to set (and release) a context
   ret = (*h.cuCtxCreate_v3)(&ctx, NULL, 0, 0, device);
   if (ret != CUDA_SUCCESS) {
-    snprintf(buf, buflen, "nvcuda failed to get device context %d", ret);
+    snprintf(buf, buflen, "cuda driver library failed to get device context %d", ret);
     resp->err = strdup(buf);
     return;
   }
 
   ret = (*h.cuMemGetInfo_v2)(&memInfo.free, &memInfo.total);
   if (ret != CUDA_SUCCESS) {
-    snprintf(buf, buflen, "nvcuda device memory info lookup failure %d", ret);
+    snprintf(buf, buflen, "cuda driver library device memory info lookup failure %d", ret);
     resp->err = strdup(buf);
     // Best effort on failure...
     (*h.cuCtxDestroy)(ctx);
@@ -193,7 +194,7 @@ void nvcuda_bootstrap(nvcuda_handle_t h, int i, mem_info_t *resp) {
 
   ret = (*h.cuCtxDestroy)(ctx);
   if (ret != CUDA_SUCCESS) {
-    LOG(1, "nvcuda failed to release device context %d", ret);
+    LOG(1, "cuda driver library failed to release device context %d", ret);
   }
 }
 
@@ -206,7 +207,7 @@ void nvcuda_get_free(nvcuda_handle_t h, int i, uint64_t *free, uint64_t *total)
 
   ret = (*h.cuDeviceGet)(&device, i);
   if (ret != CUDA_SUCCESS) {
-    LOG(1, "nvcuda device failed to initialize");
+    LOG(1, "cuda driver library device failed to initialize");
     return;
   }
 
@@ -214,13 +215,13 @@ void nvcuda_get_free(nvcuda_handle_t h, int i, uint64_t *free, uint64_t *total)
   // To get memory we have to set (and release) a context
   ret = (*h.cuCtxCreate_v3)(&ctx, NULL, 0, 0, device);
   if (ret != CUDA_SUCCESS) {
-    LOG(1, "nvcuda failed to get device context %d", ret);
+    LOG(1, "cuda driver library failed to get device context %d", ret);
     return;
   }
 
   ret = (*h.cuMemGetInfo_v2)(free, total);
   if (ret != CUDA_SUCCESS) {
-    LOG(1, "nvcuda device memory info lookup failure %d", ret);
+    LOG(1, "cuda driver library device memory info lookup failure %d", ret);
     // Best effort on failure...
     (*h.cuCtxDestroy)(ctx);
     return;
@@ -228,12 +229,12 @@ void nvcuda_get_free(nvcuda_handle_t h, int i, uint64_t *free, uint64_t *total)
 
   ret = (*h.cuCtxDestroy)(ctx);
   if (ret != CUDA_SUCCESS) {
-    LOG(1, "nvcuda failed to release device context %d", ret);
+    LOG(1, "cuda driver library failed to release device context %d", ret);
   }
 }
 
 void nvcuda_release(nvcuda_handle_t h) {
-  LOG(h.verbose, "releasing nvcuda library\n");
+  LOG(h.verbose, "releasing cuda driver library\n");
   UNLOAD_LIBRARY(h.handle);
   // TODO and other context release logic?
   h.handle = NULL;
diff --git a/gpu/gpu_info_nvcuda.h b/gpu/gpu_info_nvcuda.h
index f9654f641..ef2fe8a30 100644
--- a/gpu/gpu_info_nvcuda.h
+++ b/gpu/gpu_info_nvcuda.h
@@ -7,9 +7,12 @@
 typedef enum cudaError_enum {
   CUDA_SUCCESS = 0,
   CUDA_ERROR_INVALID_VALUE = 1,
-  CUDA_ERROR_MEMORY_ALLOCATION = 2,
+  CUDA_ERROR_OUT_OF_MEMORY = 2,
   CUDA_ERROR_NOT_INITIALIZED = 3,
   CUDA_ERROR_INSUFFICIENT_DRIVER = 35,
+  CUDA_ERROR_NO_DEVICE = 100,
+  CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803,
+  CUDA_ERROR_UNKNOWN = 999,
   // Other values omitted for now...
 } CUresult;
 
@@ -64,6 +67,7 @@ typedef struct nvcuda_init_resp {
   char *err;  // If err is non-null handle is invalid
   nvcuda_handle_t ch;
   int num_devices;
+  CUresult cudaErr;
 } nvcuda_init_resp_t;
 
 void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp);
diff --git a/gpu/gpu_linux.go b/gpu/gpu_linux.go
index 2e723c4da..76df63268 100644
--- a/gpu/gpu_linux.go
+++ b/gpu/gpu_linux.go
@@ -43,6 +43,15 @@ var OneapiGlobs = []string{
 	"/usr/lib*/libze_intel_gpu.so*",
 }
 
+var (
+	CudartMgmtName = "libcudart.so*"
+	NvcudaMgmtName = "libcuda.so*"
+	NvmlMgmtName   = "" // not currently wired on linux
+	OneapiMgmtName = "libze_intel_gpu.so*"
+	VulkanMgmtName = "libvulkan.so*"
+	libcapMgmtName = "libcap.so*"
+)
+
 var VulkanGlobs = []string{
 	"/usr/lib/x86_64-linux-gnu/libvulkan.so*",
 	"/usr/lib*/libvulkan.so*",
@@ -53,20 +62,13 @@ var capLinuxGlobs = []string{
 	"/usr/lib*/libcap.so*",
 }
 
-var CudartMgmtName = "libcudart.so*"
-var NvcudaMgmtName = "libcuda.so*"
-var NvmlMgmtName = "" // not currently wired on linux
-var OneapiMgmtName = "libze_intel_gpu.so"
-var VulkanMgmtName = "libvulkan.so*"
-var libcapMgmtName = "libcap.so*"
-
 func FindLibCapLibs() []string {
 	return FindGPULibs(libcapMgmtName, capLinuxGlobs)
 }
 
 func GetCPUMem() (memInfo, error) {
 	var mem memInfo
-	var total, available, free, buffers, cached uint64
+	var total, available, free, buffers, cached, freeSwap uint64
 	f, err := os.Open("/proc/meminfo")
 	if err != nil {
 		return mem, err
@@ -86,20 +88,21 @@ func GetCPUMem() (memInfo, error) {
 			_, err = fmt.Sscanf(line, "Buffers:%d", &buffers)
 		case strings.HasPrefix(line, "Cached:"):
 			_, err = fmt.Sscanf(line, "Cached:%d", &cached)
+		case strings.HasPrefix(line, "SwapFree:"):
+			_, err = fmt.Sscanf(line, "SwapFree:%d", &freeSwap)
 		default:
 			continue
 		}
 		if err != nil {
 			return mem, err
 		}
-
-		if total > 0 && available > 0 {
-			mem.TotalMemory = total * format.KibiByte
-			mem.FreeMemory = available * format.KibiByte
-			return mem, nil
-		}
 	}
 	mem.TotalMemory = total * format.KibiByte
-	mem.FreeMemory = (free + buffers + cached) * format.KibiByte
+	mem.FreeSwap = freeSwap * format.KibiByte
+	if available > 0 {
+		mem.FreeMemory = available * format.KibiByte
+	} else {
+		mem.FreeMemory = (free + buffers + cached) * format.KibiByte
+	}
 	return mem, nil
 }
diff --git a/gpu/gpu_test.go b/gpu/gpu_test.go
index 46d3201e1..13a3f5442 100644
--- a/gpu/gpu_test.go
+++ b/gpu/gpu_test.go
@@ -32,4 +32,29 @@ func TestCPUMemInfo(t *testing.T) {
 	}
 }
 
+func TestByLibrary(t *testing.T) {
+	type testCase struct {
+		input  []GpuInfo
+		expect int
+	}
+
+	testCases := map[string]*testCase{
+		"empty":                    {input: []GpuInfo{}, expect: 0},
+		"cpu":                      {input: []GpuInfo{{Library: "cpu"}}, expect: 1},
+		"cpu + GPU":                {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda"}}, expect: 2},
+		"cpu + 2 GPU no variant":   {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda"}, {Library: "cuda"}}, expect: 2},
+		"cpu + 2 GPU same variant": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda", Variant: "v11"}, {Library: "cuda", Variant: "v11"}}, expect: 2},
+		"cpu + 2 GPU diff variant": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda", Variant: "v11"}, {Library: "cuda", Variant: "v12"}}, expect: 3},
+	}
+
+	for k, v := range testCases {
+		t.Run(k, func(t *testing.T) {
+			resp := (GpuInfoList)(v.input).ByLibrary()
+			if len(resp) != v.expect {
+				t.Fatalf("expected length %d, got %d => %+v", v.expect, len(resp), resp)
+			}
+		})
+	}
+}
+
 // TODO - add some logic to figure out card type through other means and actually verify we got back what we expected
diff --git a/gpu/gpu_windows.go b/gpu/gpu_windows.go
index 328477440..5491da963 100644
--- a/gpu/gpu_windows.go
+++ b/gpu/gpu_windows.go
@@ -40,10 +40,12 @@ var OneapiGlobs = []string{
 	"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
 }
 
-var CudartMgmtName = "cudart64_*.dll"
-var NvcudaMgmtName = "nvcuda.dll"
-var NvmlMgmtName = "nvml.dll"
-var OneapiMgmtName = "ze_intel_gpu64.dll"
+var (
+	CudartMgmtName = "cudart64_*.dll"
+	NvcudaMgmtName = "nvcuda.dll"
+	NvmlMgmtName   = "nvml.dll"
+	OneapiMgmtName = "ze_intel_gpu64.dll"
+)
 
 func FindLibCapLibs() []string {
 	return []string{""}
@@ -55,5 +57,5 @@ func GetCPUMem() (memInfo, error) {
 	if r1 == 0 {
 		return memInfo{}, fmt.Errorf("GlobalMemoryStatusEx failed: %w", err)
 	}
-	return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys}, nil
+	return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil
 }
diff --git a/gpu/types.go b/gpu/types.go
index ae7fb853c..e623c3766 100644
--- a/gpu/types.go
+++ b/gpu/types.go
@@ -10,6 +10,7 @@ import (
 type memInfo struct {
 	TotalMemory uint64 `json:"total_memory,omitempty"`
 	FreeMemory  uint64 `json:"free_memory,omitempty"`
+	FreeSwap    uint64 `json:"free_swap,omitempty"`
 }
 
 // Beginning of an `ollama info` command
@@ -18,7 +19,7 @@ type GpuInfo struct {
 	Library string `json:"library,omitempty"`
 
 	// Optional variant to select (e.g. versions, cpu feature flags)
-	Variant CPUCapability `json:"variant"`
+	Variant string `json:"variant"`
 
 	// MinimumMemory represents the minimum memory required to use the GPU
 	MinimumMemory uint64 `json:"-"`
@@ -52,7 +53,10 @@ type CPUInfo struct {
 
 type CudaGPUInfo struct {
 	GpuInfo
-	index int //nolint:unused,nolintlint
+	OSOverhead   uint64 // Memory overhead between the driver library and management library
+	index        int    //nolint:unused,nolintlint
+	computeMajor int    //nolint:unused,nolintlint
+	computeMinor int    //nolint:unused,nolintlint
 }
 type CudaGPUInfoList []CudaGPUInfo
 
@@ -86,8 +90,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
 	for _, info := range l {
 		found := false
 		requested := info.Library
-		if info.Variant != CPUCapabilityNone {
-			requested += "_" + info.Variant.String()
+		if info.Variant != CPUCapabilityNone.String() {
+			requested += "_" + info.Variant
 		}
 		for i, lib := range libs {
 			if lib == requested {
@@ -97,7 +101,7 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
 			}
 		}
 		if !found {
-			libs = append(libs, info.Library)
+			libs = append(libs, requested)
 			resp = append(resp, []GpuInfo{info})
 		}
 	}
@@ -110,6 +114,7 @@ func (l GpuInfoList) LogDetails() {
 		slog.Info("inference compute",
 			"id", g.ID,
 			"library", g.Library,
+			"variant", g.Variant,
 			"compute", g.Compute,
 			"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
 			"name", g.Name,
diff --git a/integration/basic_test.go b/integration/basic_test.go
index 6e632a1ce..8e35b5c5b 100644
--- a/integration/basic_test.go
+++ b/integration/basic_test.go
@@ -45,14 +45,7 @@ func TestUnicodeModelDir(t *testing.T) {
 	defer os.RemoveAll(modelDir)
 	slog.Info("unicode", "OLLAMA_MODELS", modelDir)
 
-	oldModelsDir := os.Getenv("OLLAMA_MODELS")
-	if oldModelsDir == "" {
-		defer os.Unsetenv("OLLAMA_MODELS")
-	} else {
-		defer os.Setenv("OLLAMA_MODELS", oldModelsDir)
-	}
-	err = os.Setenv("OLLAMA_MODELS", modelDir)
-	require.NoError(t, err)
+	t.Setenv("OLLAMA_MODELS", modelDir)
 
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
diff --git a/integration/concurrency_test.go b/integration/concurrency_test.go
index d66ba9f00..42e9d0749 100644
--- a/integration/concurrency_test.go
+++ b/integration/concurrency_test.go
@@ -11,8 +11,10 @@ import (
 	"testing"
 	"time"
 
-	"github.com/ollama/ollama/api"
 	"github.com/stretchr/testify/require"
+
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/format"
 )
 
 func TestMultiModelConcurrency(t *testing.T) {
@@ -39,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) {
 			},
 		}
 		resp = [2][]string{
-			[]string{"sunlight"},
-			[]string{"england", "english", "massachusetts", "pilgrims", "british"},
+			{"sunlight"},
+			{"england", "english", "massachusetts", "pilgrims", "british"},
 		}
 	)
 	var wg sync.WaitGroup
@@ -69,12 +71,11 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
 	reqLimit := len(req)
 	iterLimit := 5
 
-	vram := os.Getenv("OLLAMA_MAX_VRAM")
-	if vram != "" {
-		max, err := strconv.ParseUint(vram, 10, 64)
+	if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" {
+		maxVram, err := strconv.ParseUint(s, 10, 64)
 		require.NoError(t, err)
 		// Don't hammer on small VRAM cards...
-		if max < 4*1024*1024*1024 {
+		if maxVram < 4*format.GibiByte {
 			reqLimit = min(reqLimit, 2)
 			iterLimit = 2
 		}
@@ -106,13 +107,16 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
 
 // Stress the system if we know how much VRAM it has, and attempt to load more models than will fit
 func TestMultiModelStress(t *testing.T) {
-	vram := os.Getenv("OLLAMA_MAX_VRAM")
-	if vram == "" {
+	s := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM
+	if s == "" {
 		t.Skip("OLLAMA_MAX_VRAM not specified, can't pick the right models for the stress test")
 	}
-	max, err := strconv.ParseUint(vram, 10, 64)
-	require.NoError(t, err)
-	const MB = uint64(1024 * 1024)
+
+	maxVram, err := strconv.ParseUint(s, 10, 64)
+	if err != nil {
+		t.Fatal(err)
+	}
+
 	type model struct {
 		name string
 		size uint64 // Approximate amount of VRAM they typically use when fully loaded in VRAM
@@ -121,83 +125,82 @@ func TestMultiModelStress(t *testing.T) {
 	smallModels := []model{
 		{
 			name: "orca-mini",
-			size: 2992 * MB,
+			size: 2992 * format.MebiByte,
 		},
 		{
 			name: "phi",
-			size: 2616 * MB,
+			size: 2616 * format.MebiByte,
 		},
 		{
 			name: "gemma:2b",
-			size: 2364 * MB,
+			size: 2364 * format.MebiByte,
 		},
 		{
 			name: "stable-code:3b",
-			size: 2608 * MB,
+			size: 2608 * format.MebiByte,
 		},
 		{
 			name: "starcoder2:3b",
-			size: 2166 * MB,
+			size: 2166 * format.MebiByte,
 		},
 	}
 	mediumModels := []model{
 		{
 			name: "llama2",
-			size: 5118 * MB,
+			size: 5118 * format.MebiByte,
 		},
 		{
 			name: "mistral",
-			size: 4620 * MB,
+			size: 4620 * format.MebiByte,
 		},
 		{
 			name: "orca-mini:7b",
-			size: 5118 * MB,
+			size: 5118 * format.MebiByte,
 		},
 		{
 			name: "dolphin-mistral",
-			size: 4620 * MB,
+			size: 4620 * format.MebiByte,
 		},
 		{
 			name: "gemma:7b",
-			size: 5000 * MB,
+			size: 5000 * format.MebiByte,
+		},
+		{
+			name: "codellama:7b",
+			size: 5118 * format.MebiByte,
 		},
-		// TODO - uncomment this once #3565 is merged and this is rebased on it
-		// {
-		// 	name: "codellama:7b",
-		// 	size: 5118 * MB,
-		// },
 	}
 
 	// These seem to be too slow to be useful...
 	// largeModels := []model{
 	// 	{
 	// 		name: "llama2:13b",
-	// 		size: 7400 * MB,
+	// 		size: 7400 * format.MebiByte,
 	// 	},
 	// 	{
 	// 		name: "codellama:13b",
-	// 		size: 7400 * MB,
+	// 		size: 7400 * format.MebiByte,
 	// 	},
 	// 	{
 	// 		name: "orca-mini:13b",
-	// 		size: 7400 * MB,
+	// 		size: 7400 * format.MebiByte,
 	// 	},
 	// 	{
 	// 		name: "gemma:7b",
-	// 		size: 5000 * MB,
+	// 		size: 5000 * format.MebiByte,
 	// 	},
 	// 	{
 	// 		name: "starcoder2:15b",
-	// 		size: 9100 * MB,
+	// 		size: 9100 * format.MebiByte,
 	// 	},
 	// }
 
 	var chosenModels []model
 	switch {
-	case max < 10000*MB:
+	case maxVram < 10000*format.MebiByte:
 		slog.Info("selecting small models")
 		chosenModels = smallModels
-	// case max < 30000*MB:
+	// case maxVram < 30000*format.MebiByte:
 	default:
 		slog.Info("selecting medium models")
 		chosenModels = mediumModels
@@ -226,15 +229,15 @@ func TestMultiModelStress(t *testing.T) {
 	}
 
 	var wg sync.WaitGroup
-	consumed := uint64(256 * MB) // Assume some baseline usage
+	consumed := uint64(256 * format.MebiByte) // Assume some baseline usage
 	for i := 0; i < len(req); i++ {
 		// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
-		if i > 1 && consumed > max {
-			slog.Info("achieved target vram exhaustion", "count", i, "vramMB", max/1024/1024, "modelsMB", consumed/1024/1024)
+		if i > 1 && consumed > maxVram {
+			slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
 			break
 		}
 		consumed += chosenModels[i].size
-		slog.Info("target vram", "count", i, "vramMB", max/1024/1024, "modelsMB", consumed/1024/1024)
+		slog.Info("target vram", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
 
 		wg.Add(1)
 		go func(i int) {
diff --git a/integration/context_test.go b/integration/context_test.go
index 46fac5ead..f1342e16c 100644
--- a/integration/context_test.go
+++ b/integration/context_test.go
@@ -12,7 +12,7 @@ import (
 
 func TestContextExhaustion(t *testing.T) {
 	// Longer needed for small footprint GPUs
-	ctx, cancel := context.WithTimeout(context.Background(), 6*time.Minute)
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
 	defer cancel()
 	// Set up the test data
 	req := api.GenerateRequest{
@@ -25,5 +25,10 @@ func TestContextExhaustion(t *testing.T) {
 			"num_ctx":     128,
 		},
 	}
-	GenerateTestHelper(ctx, t, req, []string{"once", "upon", "lived"})
+	client, _, cleanup := InitServerConnection(ctx, t)
+	defer cleanup()
+	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+		t.Fatalf("PullIfMissing failed: %v", err)
+	}
+	DoGenerate(ctx, t, client, req, []string{"once", "upon", "lived"}, 120*time.Second, 10*time.Second)
 }
diff --git a/integration/embed_test.go b/integration/embed_test.go
new file mode 100644
index 000000000..4a68af68a
--- /dev/null
+++ b/integration/embed_test.go
@@ -0,0 +1,209 @@
+//go:build integration
+
+package integration
+
+import (
+	"context"
+	"math"
+	"testing"
+	"time"
+
+	"github.com/ollama/ollama/api"
+)
+
+func floatsEqual32(a, b float32) bool {
+	return math.Abs(float64(a-b)) <= 1e-4
+}
+
+func floatsEqual64(a, b float64) bool {
+	return math.Abs(a-b) <= 1e-4
+}
+
+func TestAllMiniLMEmbeddings(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
+	defer cancel()
+
+	req := api.EmbeddingRequest{
+		Model:  "all-minilm",
+		Prompt: "why is the sky blue?",
+	}
+
+	res, err := embeddingTestHelper(ctx, t, req)
+
+	if err != nil {
+		t.Fatalf("error: %v", err)
+	}
+
+	if len(res.Embedding) != 384 {
+		t.Fatalf("expected 384 floats, got %d", len(res.Embedding))
+	}
+
+	if !floatsEqual64(res.Embedding[0], 0.06642947345972061) {
+		t.Fatalf("expected 0.06642947345972061, got %.16f", res.Embedding[0])
+	}
+}
+
+func TestAllMiniLMEmbed(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
+	defer cancel()
+
+	req := api.EmbedRequest{
+		Model: "all-minilm",
+		Input: "why is the sky blue?",
+	}
+
+	res, err := embedTestHelper(ctx, t, req)
+
+	if err != nil {
+		t.Fatalf("error: %v", err)
+	}
+
+	if len(res.Embeddings) != 1 {
+		t.Fatalf("expected 1 embedding, got %d", len(res.Embeddings))
+	}
+
+	if len(res.Embeddings[0]) != 384 {
+		t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
+	}
+
+	if !floatsEqual32(res.Embeddings[0][0], 0.010071031) {
+		t.Fatalf("expected 0.010071031, got %.8f", res.Embeddings[0][0])
+	}
+
+	if res.PromptEvalCount != 6 {
+		t.Fatalf("expected 6 prompt tokens, got %d", res.PromptEvalCount)
+	}
+}
+
+func TestAllMiniLMBatchEmbed(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
+	defer cancel()
+
+	req := api.EmbedRequest{
+		Model: "all-minilm",
+		Input: []string{"why is the sky blue?", "why is the grass green?"},
+	}
+
+	res, err := embedTestHelper(ctx, t, req)
+
+	if err != nil {
+		t.Fatalf("error: %v", err)
+	}
+
+	if len(res.Embeddings) != 2 {
+		t.Fatalf("expected 2 embeddings, got %d", len(res.Embeddings))
+	}
+
+	if len(res.Embeddings[0]) != 384 {
+		t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
+	}
+
+	if !floatsEqual32(res.Embeddings[0][0], 0.010071031) || !floatsEqual32(res.Embeddings[1][0], -0.009802706) {
+		t.Fatalf("expected 0.010071031 and -0.009802706, got %.8f and %.8f", res.Embeddings[0][0], res.Embeddings[1][0])
+	}
+
+	if res.PromptEvalCount != 12 {
+		t.Fatalf("expected 12 prompt tokens, got %d", res.PromptEvalCount)
+	}
+}
+
+func TestAllMiniLMEmbedTruncate(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
+	defer cancel()
+
+	truncTrue, truncFalse := true, false
+
+	type testReq struct {
+		Name    string
+		Request api.EmbedRequest
+	}
+
+	reqs := []testReq{
+		{
+			Name: "Target Truncation",
+			Request: api.EmbedRequest{
+				Model: "all-minilm",
+				Input: "why",
+			},
+		},
+		{
+			Name: "Default Truncate",
+			Request: api.EmbedRequest{
+				Model:   "all-minilm",
+				Input:   "why is the sky blue?",
+				Options: map[string]any{"num_ctx": 1},
+			},
+		},
+		{
+			Name: "Explicit Truncate",
+			Request: api.EmbedRequest{
+				Model:    "all-minilm",
+				Input:    "why is the sky blue?",
+				Truncate: &truncTrue,
+				Options:  map[string]any{"num_ctx": 1},
+			},
+		},
+	}
+
+	res := make(map[string]*api.EmbedResponse)
+
+	for _, req := range reqs {
+		response, err := embedTestHelper(ctx, t, req.Request)
+		if err != nil {
+			t.Fatalf("error: %v", err)
+		}
+		res[req.Name] = response
+	}
+
+	if res["Target Truncation"].Embeddings[0][0] != res["Default Truncate"].Embeddings[0][0] {
+		t.Fatal("expected default request to truncate correctly")
+	}
+
+	if res["Default Truncate"].Embeddings[0][0] != res["Explicit Truncate"].Embeddings[0][0] {
+		t.Fatal("expected default request and truncate true request to be the same")
+	}
+
+	// check that truncate set to false returns an error if context length is exceeded
+	_, err := embedTestHelper(ctx, t, api.EmbedRequest{
+		Model:    "all-minilm",
+		Input:    "why is the sky blue?",
+		Truncate: &truncFalse,
+		Options:  map[string]any{"num_ctx": 1},
+	})
+
+	if err == nil {
+		t.Fatal("expected error, got nil")
+	}
+}
+
+func embeddingTestHelper(ctx context.Context, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) {
+	client, _, cleanup := InitServerConnection(ctx, t)
+	defer cleanup()
+	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+		t.Fatalf("failed to pull model %s: %v", req.Model, err)
+	}
+
+	response, err := client.Embeddings(ctx, &req)
+
+	if err != nil {
+		return nil, err
+	}
+
+	return response, nil
+}
+
+func embedTestHelper(ctx context.Context, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) {
+	client, _, cleanup := InitServerConnection(ctx, t)
+	defer cleanup()
+	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+		t.Fatalf("failed to pull model %s: %v", req.Model, err)
+	}
+
+	response, err := client.Embed(ctx, &req)
+
+	if err != nil {
+		return nil, err
+	}
+
+	return response, nil
+}
diff --git a/integration/llm_test.go b/integration/llm_test.go
index 4952b0726..398e0a03a 100644
--- a/integration/llm_test.go
+++ b/integration/llm_test.go
@@ -35,8 +35,8 @@ var (
 		},
 	}
 	resp = [2][]string{
-		[]string{"sunlight"},
-		[]string{"england", "english", "massachusetts", "pilgrims"},
+		{"sunlight"},
+		{"england", "english", "massachusetts", "pilgrims"},
 	}
 )
 
diff --git a/integration/max_queue_test.go b/integration/max_queue_test.go
index dfa5eae0c..ec9e085a5 100644
--- a/integration/max_queue_test.go
+++ b/integration/max_queue_test.go
@@ -5,7 +5,6 @@ package integration
 import (
 	"context"
 	"errors"
-	"fmt"
 	"log/slog"
 	"os"
 	"strconv"
@@ -14,8 +13,10 @@ import (
 	"testing"
 	"time"
 
-	"github.com/ollama/ollama/api"
 	"github.com/stretchr/testify/require"
+
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/envconfig"
 )
 
 func TestMaxQueue(t *testing.T) {
@@ -27,13 +28,10 @@ func TestMaxQueue(t *testing.T) {
 	// Note: This test can be quite slow when running in CPU mode, so keep the threadCount low unless your on GPU
 	// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
 	threadCount := 32
-	mq := os.Getenv("OLLAMA_MAX_QUEUE")
-	if mq != "" {
-		var err error
-		threadCount, err = strconv.Atoi(mq)
-		require.NoError(t, err)
+	if maxQueue := envconfig.MaxQueue(); maxQueue != 0 {
+		threadCount = int(maxQueue)
 	} else {
-		os.Setenv("OLLAMA_MAX_QUEUE", fmt.Sprintf("%d", threadCount))
+		t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
 	}
 
 	req := api.GenerateRequest{
diff --git a/integration/utils_test.go b/integration/utils_test.go
index 7e1fcc10e..a60109958 100644
--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -162,7 +162,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
 	fn := func(resp api.ProgressResponse) error {
 		// fmt.Print(".")
 		if !stallTimer.Reset(stallDuration) {
-			return fmt.Errorf("stall was detected, aborting status reporting")
+			return errors.New("stall was detected, aborting status reporting")
 		}
 		return nil
 	}
@@ -180,7 +180,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
 
 	select {
 	case <-stallTimer.C:
-		return fmt.Errorf("download stalled")
+		return errors.New("download stalled")
 	case <-done:
 		return pullError
 	}
@@ -243,7 +243,7 @@ func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq ap
 		// fmt.Print(".")
 		buf.Write([]byte(response.Response))
 		if !stallTimer.Reset(streamTimeout) {
-			return fmt.Errorf("stall was detected while streaming response, aborting")
+			return errors.New("stall was detected while streaming response, aborting")
 		}
 		return nil
 	}
@@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
 			},
 		},
 		[][]string{
-			[]string{"sunlight"},
-			[]string{"soil", "organic", "earth", "black", "tan"},
-			[]string{"england", "english", "massachusetts", "pilgrims", "british"},
-			[]string{"fourth", "july", "declaration", "independence"},
-			[]string{"nitrogen", "oxygen", "carbon", "dioxide"},
+			{"sunlight"},
+			{"soil", "organic", "earth", "black", "tan"},
+			{"england", "english", "massachusetts", "pilgrims", "british"},
+			{"fourth", "july", "declaration", "independence"},
+			{"nitrogen", "oxygen", "carbon", "dioxide"},
 		}
 }
diff --git a/llm/ext_server/CMakeLists.txt b/llm/ext_server/CMakeLists.txt
index db7d52dcc..517302451 100644
--- a/llm/ext_server/CMakeLists.txt
+++ b/llm/ext_server/CMakeLists.txt
@@ -1,14 +1,15 @@
-
 set(TARGET ollama_llama_server)
 option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
+set(LLAMA_SERVER_LDFLAGS $ENV{LLAMA_SERVER_LDFLAGS})
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
-add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
+add_executable(${TARGET} server.cpp utils.hpp httplib.h)
 install(TARGETS ${TARGET} RUNTIME)
 target_compile_definitions(${TARGET} PRIVATE
     SERVER_VERBOSE=$
 )
-target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
+target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT} ${LLAMA_SERVER_LDFLAGS})
 if (WIN32)
     TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
+    target_link_options(${TARGET} PRIVATE -municode -Wl,/subsystem:console)
 endif()
 target_compile_features(${TARGET} PRIVATE cxx_std_11)
\ No newline at end of file
diff --git a/llm/ext_server/json.hpp b/llm/ext_server/json.hpp
deleted file mode 100644
index ea945f346..000000000
--- a/llm/ext_server/json.hpp
+++ /dev/null
@@ -1,24596 +0,0 @@
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-/****************************************************************************\
- * Note on documentation: The source files contain links to the online      *
- * documentation of the public API at https://json.nlohmann.me. This URL    *
- * contains the most recent documentation and should also be applicable to  *
- * previous versions; documentation for deprecated functions is not         *
- * removed, but marked deprecated. See "Generate documentation" section in  *
- * file docs/README.md.                                                     *
-\****************************************************************************/
-
-#ifndef INCLUDE_NLOHMANN_JSON_HPP_
-#define INCLUDE_NLOHMANN_JSON_HPP_
-
-#include  // all_of, find, for_each
-#include  // nullptr_t, ptrdiff_t, size_t
-#include  // hash, less
-#include  // initializer_list
-#ifndef JSON_NO_IO
-    #include  // istream, ostream
-#endif  // JSON_NO_IO
-#include  // random_access_iterator_tag
-#include  // unique_ptr
-#include  // accumulate
-#include  // string, stoi, to_string
-#include  // declval, forward, move, pair, swap
-#include  // vector
-
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-
-
-#include 
-
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-
-
-// This file contains all macro definitions affecting or depending on the ABI
-
-#ifndef JSON_SKIP_LIBRARY_VERSION_CHECK
-    #if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH)
-        #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 11 || NLOHMANN_JSON_VERSION_PATCH != 2
-            #warning "Already included a different version of the library!"
-        #endif
-    #endif
-#endif
-
-#define NLOHMANN_JSON_VERSION_MAJOR 3   // NOLINT(modernize-macro-to-enum)
-#define NLOHMANN_JSON_VERSION_MINOR 11  // NOLINT(modernize-macro-to-enum)
-#define NLOHMANN_JSON_VERSION_PATCH 2   // NOLINT(modernize-macro-to-enum)
-
-#ifndef JSON_DIAGNOSTICS
-    #define JSON_DIAGNOSTICS 0
-#endif
-
-#ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
-    #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0
-#endif
-
-#if JSON_DIAGNOSTICS
-    #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag
-#else
-    #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS
-#endif
-
-#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
-    #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp
-#else
-    #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON
-#endif
-
-#ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION
-    #define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0
-#endif
-
-// Construct the namespace ABI tags component
-#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b
-#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \
-    NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b)
-
-#define NLOHMANN_JSON_ABI_TAGS                                       \
-    NLOHMANN_JSON_ABI_TAGS_CONCAT(                                   \
-            NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS,                       \
-            NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON)
-
-// Construct the namespace version component
-#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \
-    _v ## major ## _ ## minor ## _ ## patch
-#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \
-    NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch)
-
-#if NLOHMANN_JSON_NAMESPACE_NO_VERSION
-#define NLOHMANN_JSON_NAMESPACE_VERSION
-#else
-#define NLOHMANN_JSON_NAMESPACE_VERSION                                 \
-    NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \
-                                           NLOHMANN_JSON_VERSION_MINOR, \
-                                           NLOHMANN_JSON_VERSION_PATCH)
-#endif
-
-// Combine namespace components
-#define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b
-#define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \
-    NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b)
-
-#ifndef NLOHMANN_JSON_NAMESPACE
-#define NLOHMANN_JSON_NAMESPACE               \
-    nlohmann::NLOHMANN_JSON_NAMESPACE_CONCAT( \
-            NLOHMANN_JSON_ABI_TAGS,           \
-            NLOHMANN_JSON_NAMESPACE_VERSION)
-#endif
-
-#ifndef NLOHMANN_JSON_NAMESPACE_BEGIN
-#define NLOHMANN_JSON_NAMESPACE_BEGIN                \
-    namespace nlohmann                               \
-    {                                                \
-    inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \
-                NLOHMANN_JSON_ABI_TAGS,              \
-                NLOHMANN_JSON_NAMESPACE_VERSION)     \
-    {
-#endif
-
-#ifndef NLOHMANN_JSON_NAMESPACE_END
-#define NLOHMANN_JSON_NAMESPACE_END                                     \
-    }  /* namespace (inline namespace) NOLINT(readability/namespace) */ \
-    }  // namespace nlohmann
-#endif
-
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-
-
-#include  // transform
-#include  // array
-#include  // forward_list
-#include  // inserter, front_inserter, end
-#include  // map
-#include  // string
-#include  // tuple, make_tuple
-#include  // is_arithmetic, is_same, is_enum, underlying_type, is_convertible
-#include  // unordered_map
-#include  // pair, declval
-#include  // valarray
-
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-
-
-#include  // nullptr_t
-#include  // exception
-#include  // runtime_error
-#include  // to_string
-#include  // vector
-
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-
-
-#include  // array
-#include  // size_t
-#include  // uint8_t
-#include  // string
-
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-
-
-#include  // declval, pair
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-
-
-#include 
-
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-
-
-// #include 
-
-
-NLOHMANN_JSON_NAMESPACE_BEGIN
-namespace detail
-{
-
-template struct make_void
-{
-    using type = void;
-};
-template using void_t = typename make_void::type;
-
-}  // namespace detail
-NLOHMANN_JSON_NAMESPACE_END
-
-
-NLOHMANN_JSON_NAMESPACE_BEGIN
-namespace detail
-{
-
-// https://en.cppreference.com/w/cpp/experimental/is_detected
-struct nonesuch
-{
-    nonesuch() = delete;
-    ~nonesuch() = delete;
-    nonesuch(nonesuch const&) = delete;
-    nonesuch(nonesuch const&&) = delete;
-    void operator=(nonesuch const&) = delete;
-    void operator=(nonesuch&&) = delete;
-};
-
-template class Op,
-         class... Args>
-struct detector
-{
-    using value_t = std::false_type;
-    using type = Default;
-};
-
-template class Op, class... Args>
-struct detector>, Op, Args...>
-{
-    using value_t = std::true_type;
-    using type = Op;
-};
-
-template class Op, class... Args>
-using is_detected = typename detector::value_t;
-
-template class Op, class... Args>
-struct is_detected_lazy : is_detected { };
-
-template class Op, class... Args>
-using detected_t = typename detector::type;
-
-template class Op, class... Args>
-using detected_or = detector;
-
-template class Op, class... Args>
-using detected_or_t = typename detected_or::type;
-
-template class Op, class... Args>
-using is_detected_exact = std::is_same>;
-
-template class Op, class... Args>
-using is_detected_convertible =
-    std::is_convertible, To>;
-
-}  // namespace detail
-NLOHMANN_JSON_NAMESPACE_END
-
-// #include 
-
-
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-FileCopyrightText: 2016-2021 Evan Nemerson 
-// SPDX-License-Identifier: MIT
-
-/* Hedley - https://nemequ.github.io/hedley
- * Created by Evan Nemerson 
- */
-
-#if !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < 15)
-#if defined(JSON_HEDLEY_VERSION)
-    #undef JSON_HEDLEY_VERSION
-#endif
-#define JSON_HEDLEY_VERSION 15
-
-#if defined(JSON_HEDLEY_STRINGIFY_EX)
-    #undef JSON_HEDLEY_STRINGIFY_EX
-#endif
-#define JSON_HEDLEY_STRINGIFY_EX(x) #x
-
-#if defined(JSON_HEDLEY_STRINGIFY)
-    #undef JSON_HEDLEY_STRINGIFY
-#endif
-#define JSON_HEDLEY_STRINGIFY(x) JSON_HEDLEY_STRINGIFY_EX(x)
-
-#if defined(JSON_HEDLEY_CONCAT_EX)
-    #undef JSON_HEDLEY_CONCAT_EX
-#endif
-#define JSON_HEDLEY_CONCAT_EX(a,b) a##b
-
-#if defined(JSON_HEDLEY_CONCAT)
-    #undef JSON_HEDLEY_CONCAT
-#endif
-#define JSON_HEDLEY_CONCAT(a,b) JSON_HEDLEY_CONCAT_EX(a,b)
-
-#if defined(JSON_HEDLEY_CONCAT3_EX)
-    #undef JSON_HEDLEY_CONCAT3_EX
-#endif
-#define JSON_HEDLEY_CONCAT3_EX(a,b,c) a##b##c
-
-#if defined(JSON_HEDLEY_CONCAT3)
-    #undef JSON_HEDLEY_CONCAT3
-#endif
-#define JSON_HEDLEY_CONCAT3(a,b,c) JSON_HEDLEY_CONCAT3_EX(a,b,c)
-
-#if defined(JSON_HEDLEY_VERSION_ENCODE)
-    #undef JSON_HEDLEY_VERSION_ENCODE
-#endif
-#define JSON_HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision))
-
-#if defined(JSON_HEDLEY_VERSION_DECODE_MAJOR)
-    #undef JSON_HEDLEY_VERSION_DECODE_MAJOR
-#endif
-#define JSON_HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000)
-
-#if defined(JSON_HEDLEY_VERSION_DECODE_MINOR)
-    #undef JSON_HEDLEY_VERSION_DECODE_MINOR
-#endif
-#define JSON_HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000)
-
-#if defined(JSON_HEDLEY_VERSION_DECODE_REVISION)
-    #undef JSON_HEDLEY_VERSION_DECODE_REVISION
-#endif
-#define JSON_HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000)
-
-#if defined(JSON_HEDLEY_GNUC_VERSION)
-    #undef JSON_HEDLEY_GNUC_VERSION
-#endif
-#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__)
-    #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
-#elif defined(__GNUC__)
-    #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_VERSION_CHECK)
-    #undef JSON_HEDLEY_GNUC_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_GNUC_VERSION)
-    #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GNUC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_MSVC_VERSION)
-    #undef JSON_HEDLEY_MSVC_VERSION
-#endif
-#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL)
-    #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100)
-#elif defined(_MSC_FULL_VER) && !defined(__ICL)
-    #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10)
-#elif defined(_MSC_VER) && !defined(__ICL)
-    #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0)
-#endif
-
-#if defined(JSON_HEDLEY_MSVC_VERSION_CHECK)
-    #undef JSON_HEDLEY_MSVC_VERSION_CHECK
-#endif
-#if !defined(JSON_HEDLEY_MSVC_VERSION)
-    #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0)
-#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
-    #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch)))
-#elif defined(_MSC_VER) && (_MSC_VER >= 1200)
-    #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch)))
-#else
-    #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor)))
-#endif
-
-#if defined(JSON_HEDLEY_INTEL_VERSION)
-    #undef JSON_HEDLEY_INTEL_VERSION
-#endif
-#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL)
-    #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE)
-#elif defined(__INTEL_COMPILER) && !defined(__ICL)
-    #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0)
-#endif
-
-#if defined(JSON_HEDLEY_INTEL_VERSION_CHECK)
-    #undef JSON_HEDLEY_INTEL_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_INTEL_VERSION)
-    #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_INTEL_CL_VERSION)
-    #undef JSON_HEDLEY_INTEL_CL_VERSION
-#endif
-#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL)
-    #define JSON_HEDLEY_INTEL_CL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0)
-#endif
-
-#if defined(JSON_HEDLEY_INTEL_CL_VERSION_CHECK)
-    #undef JSON_HEDLEY_INTEL_CL_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_INTEL_CL_VERSION)
-    #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_CL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_PGI_VERSION)
-    #undef JSON_HEDLEY_PGI_VERSION
-#endif
-#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__)
-    #define JSON_HEDLEY_PGI_VERSION JSON_HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__)
-#endif
-
-#if defined(JSON_HEDLEY_PGI_VERSION_CHECK)
-    #undef JSON_HEDLEY_PGI_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_PGI_VERSION)
-    #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PGI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_SUNPRO_VERSION)
-    #undef JSON_HEDLEY_SUNPRO_VERSION
-#endif
-#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000)
-    #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10)
-#elif defined(__SUNPRO_C)
-    #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf)
-#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000)
-    #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10)
-#elif defined(__SUNPRO_CC)
-    #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf)
-#endif
-
-#if defined(JSON_HEDLEY_SUNPRO_VERSION_CHECK)
-    #undef JSON_HEDLEY_SUNPRO_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_SUNPRO_VERSION)
-    #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_SUNPRO_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION)
-    #undef JSON_HEDLEY_EMSCRIPTEN_VERSION
-#endif
-#if defined(__EMSCRIPTEN__)
-    #define JSON_HEDLEY_EMSCRIPTEN_VERSION JSON_HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__)
-#endif
-
-#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK)
-    #undef JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION)
-    #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_EMSCRIPTEN_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_ARM_VERSION)
-    #undef JSON_HEDLEY_ARM_VERSION
-#endif
-#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION)
-    #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100)
-#elif defined(__CC_ARM) && defined(__ARMCC_VERSION)
-    #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100)
-#endif
-
-#if defined(JSON_HEDLEY_ARM_VERSION_CHECK)
-    #undef JSON_HEDLEY_ARM_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_ARM_VERSION)
-    #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_ARM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_IBM_VERSION)
-    #undef JSON_HEDLEY_IBM_VERSION
-#endif
-#if defined(__ibmxl__)
-    #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__)
-#elif defined(__xlC__) && defined(__xlC_ver__)
-    #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff)
-#elif defined(__xlC__)
-    #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0)
-#endif
-
-#if defined(JSON_HEDLEY_IBM_VERSION_CHECK)
-    #undef JSON_HEDLEY_IBM_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_IBM_VERSION)
-    #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IBM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TI_VERSION)
-    #undef JSON_HEDLEY_TI_VERSION
-#endif
-#if \
-    defined(__TI_COMPILER_VERSION__) && \
-    ( \
-      defined(__TMS470__) || defined(__TI_ARM__) || \
-      defined(__MSP430__) || \
-      defined(__TMS320C2000__) \
-    )
-#if (__TI_COMPILER_VERSION__ >= 16000000)
-    #define JSON_HEDLEY_TI_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
-#endif
-#endif
-
-#if defined(JSON_HEDLEY_TI_VERSION_CHECK)
-    #undef JSON_HEDLEY_TI_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TI_VERSION)
-    #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL2000_VERSION)
-    #undef JSON_HEDLEY_TI_CL2000_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__)
-    #define JSON_HEDLEY_TI_CL2000_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL2000_VERSION_CHECK)
-    #undef JSON_HEDLEY_TI_CL2000_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TI_CL2000_VERSION)
-    #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL2000_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL430_VERSION)
-    #undef JSON_HEDLEY_TI_CL430_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__)
-    #define JSON_HEDLEY_TI_CL430_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL430_VERSION_CHECK)
-    #undef JSON_HEDLEY_TI_CL430_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TI_CL430_VERSION)
-    #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL430_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TI_ARMCL_VERSION)
-    #undef JSON_HEDLEY_TI_ARMCL_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__))
-    #define JSON_HEDLEY_TI_ARMCL_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(JSON_HEDLEY_TI_ARMCL_VERSION_CHECK)
-    #undef JSON_HEDLEY_TI_ARMCL_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TI_ARMCL_VERSION)
-    #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_ARMCL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL6X_VERSION)
-    #undef JSON_HEDLEY_TI_CL6X_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__)
-    #define JSON_HEDLEY_TI_CL6X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL6X_VERSION_CHECK)
-    #undef JSON_HEDLEY_TI_CL6X_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TI_CL6X_VERSION)
-    #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL6X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL7X_VERSION)
-    #undef JSON_HEDLEY_TI_CL7X_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__)
-    #define JSON_HEDLEY_TI_CL7X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL7X_VERSION_CHECK)
-    #undef JSON_HEDLEY_TI_CL7X_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TI_CL7X_VERSION)
-    #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL7X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TI_CLPRU_VERSION)
-    #undef JSON_HEDLEY_TI_CLPRU_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__)
-    #define JSON_HEDLEY_TI_CLPRU_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(JSON_HEDLEY_TI_CLPRU_VERSION_CHECK)
-    #undef JSON_HEDLEY_TI_CLPRU_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TI_CLPRU_VERSION)
-    #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CLPRU_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_CRAY_VERSION)
-    #undef JSON_HEDLEY_CRAY_VERSION
-#endif
-#if defined(_CRAYC)
-    #if defined(_RELEASE_PATCHLEVEL)
-        #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL)
-    #else
-        #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0)
-    #endif
-#endif
-
-#if defined(JSON_HEDLEY_CRAY_VERSION_CHECK)
-    #undef JSON_HEDLEY_CRAY_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_CRAY_VERSION)
-    #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_CRAY_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_IAR_VERSION)
-    #undef JSON_HEDLEY_IAR_VERSION
-#endif
-#if defined(__IAR_SYSTEMS_ICC__)
-    #if __VER__ > 1000
-        #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000))
-    #else
-        #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0)
-    #endif
-#endif
-
-#if defined(JSON_HEDLEY_IAR_VERSION_CHECK)
-    #undef JSON_HEDLEY_IAR_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_IAR_VERSION)
-    #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IAR_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TINYC_VERSION)
-    #undef JSON_HEDLEY_TINYC_VERSION
-#endif
-#if defined(__TINYC__)
-    #define JSON_HEDLEY_TINYC_VERSION JSON_HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100)
-#endif
-
-#if defined(JSON_HEDLEY_TINYC_VERSION_CHECK)
-    #undef JSON_HEDLEY_TINYC_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TINYC_VERSION)
-    #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TINYC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_DMC_VERSION)
-    #undef JSON_HEDLEY_DMC_VERSION
-#endif
-#if defined(__DMC__)
-    #define JSON_HEDLEY_DMC_VERSION JSON_HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf)
-#endif
-
-#if defined(JSON_HEDLEY_DMC_VERSION_CHECK)
-    #undef JSON_HEDLEY_DMC_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_DMC_VERSION)
-    #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_DMC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_COMPCERT_VERSION)
-    #undef JSON_HEDLEY_COMPCERT_VERSION
-#endif
-#if defined(__COMPCERT_VERSION__)
-    #define JSON_HEDLEY_COMPCERT_VERSION JSON_HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100)
-#endif
-
-#if defined(JSON_HEDLEY_COMPCERT_VERSION_CHECK)
-    #undef JSON_HEDLEY_COMPCERT_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_COMPCERT_VERSION)
-    #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_COMPCERT_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_PELLES_VERSION)
-    #undef JSON_HEDLEY_PELLES_VERSION
-#endif
-#if defined(__POCC__)
-    #define JSON_HEDLEY_PELLES_VERSION JSON_HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0)
-#endif
-
-#if defined(JSON_HEDLEY_PELLES_VERSION_CHECK)
-    #undef JSON_HEDLEY_PELLES_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_PELLES_VERSION)
-    #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PELLES_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_MCST_LCC_VERSION)
-    #undef JSON_HEDLEY_MCST_LCC_VERSION
-#endif
-#if defined(__LCC__) && defined(__LCC_MINOR__)
-    #define JSON_HEDLEY_MCST_LCC_VERSION JSON_HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__)
-#endif
-
-#if defined(JSON_HEDLEY_MCST_LCC_VERSION_CHECK)
-    #undef JSON_HEDLEY_MCST_LCC_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_MCST_LCC_VERSION)
-    #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_MCST_LCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_VERSION)
-    #undef JSON_HEDLEY_GCC_VERSION
-#endif
-#if \
-    defined(JSON_HEDLEY_GNUC_VERSION) && \
-    !defined(__clang__) && \
-    !defined(JSON_HEDLEY_INTEL_VERSION) && \
-    !defined(JSON_HEDLEY_PGI_VERSION) && \
-    !defined(JSON_HEDLEY_ARM_VERSION) && \
-    !defined(JSON_HEDLEY_CRAY_VERSION) && \
-    !defined(JSON_HEDLEY_TI_VERSION) && \
-    !defined(JSON_HEDLEY_TI_ARMCL_VERSION) && \
-    !defined(JSON_HEDLEY_TI_CL430_VERSION) && \
-    !defined(JSON_HEDLEY_TI_CL2000_VERSION) && \
-    !defined(JSON_HEDLEY_TI_CL6X_VERSION) && \
-    !defined(JSON_HEDLEY_TI_CL7X_VERSION) && \
-    !defined(JSON_HEDLEY_TI_CLPRU_VERSION) && \
-    !defined(__COMPCERT__) && \
-    !defined(JSON_HEDLEY_MCST_LCC_VERSION)
-    #define JSON_HEDLEY_GCC_VERSION JSON_HEDLEY_GNUC_VERSION
-#endif
-
-#if defined(JSON_HEDLEY_GCC_VERSION_CHECK)
-    #undef JSON_HEDLEY_GCC_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_GCC_VERSION)
-    #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_ATTRIBUTE)
-    #undef JSON_HEDLEY_HAS_ATTRIBUTE
-#endif
-#if \
-  defined(__has_attribute) && \
-  ( \
-    (!defined(JSON_HEDLEY_IAR_VERSION) || JSON_HEDLEY_IAR_VERSION_CHECK(8,5,9)) \
-  )
-#  define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute)
-#else
-#  define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_HAS_ATTRIBUTE)
-    #undef JSON_HEDLEY_GNUC_HAS_ATTRIBUTE
-#endif
-#if defined(__has_attribute)
-    #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute)
-#else
-    #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_HAS_ATTRIBUTE)
-    #undef JSON_HEDLEY_GCC_HAS_ATTRIBUTE
-#endif
-#if defined(__has_attribute)
-    #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute)
-#else
-    #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE)
-    #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE
-#endif
-#if \
-    defined(__has_cpp_attribute) && \
-    defined(__cplusplus) && \
-    (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0))
-    #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute)
-#else
-    #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS)
-    #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS
-#endif
-#if !defined(__cplusplus) || !defined(__has_cpp_attribute)
-    #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0)
-#elif \
-    !defined(JSON_HEDLEY_PGI_VERSION) && \
-    !defined(JSON_HEDLEY_IAR_VERSION) && \
-    (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \
-    (!defined(JSON_HEDLEY_MSVC_VERSION) || JSON_HEDLEY_MSVC_VERSION_CHECK(19,20,0))
-    #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute)
-#else
-    #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE)
-    #undef JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE
-#endif
-#if defined(__has_cpp_attribute) && defined(__cplusplus)
-    #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute)
-#else
-    #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE)
-    #undef JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE
-#endif
-#if defined(__has_cpp_attribute) && defined(__cplusplus)
-    #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute)
-#else
-    #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_BUILTIN)
-    #undef JSON_HEDLEY_HAS_BUILTIN
-#endif
-#if defined(__has_builtin)
-    #define JSON_HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin)
-#else
-    #define JSON_HEDLEY_HAS_BUILTIN(builtin) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_HAS_BUILTIN)
-    #undef JSON_HEDLEY_GNUC_HAS_BUILTIN
-#endif
-#if defined(__has_builtin)
-    #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin)
-#else
-    #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_HAS_BUILTIN)
-    #undef JSON_HEDLEY_GCC_HAS_BUILTIN
-#endif
-#if defined(__has_builtin)
-    #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin)
-#else
-    #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_FEATURE)
-    #undef JSON_HEDLEY_HAS_FEATURE
-#endif
-#if defined(__has_feature)
-    #define JSON_HEDLEY_HAS_FEATURE(feature) __has_feature(feature)
-#else
-    #define JSON_HEDLEY_HAS_FEATURE(feature) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_HAS_FEATURE)
-    #undef JSON_HEDLEY_GNUC_HAS_FEATURE
-#endif
-#if defined(__has_feature)
-    #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature)
-#else
-    #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_HAS_FEATURE)
-    #undef JSON_HEDLEY_GCC_HAS_FEATURE
-#endif
-#if defined(__has_feature)
-    #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature)
-#else
-    #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_EXTENSION)
-    #undef JSON_HEDLEY_HAS_EXTENSION
-#endif
-#if defined(__has_extension)
-    #define JSON_HEDLEY_HAS_EXTENSION(extension) __has_extension(extension)
-#else
-    #define JSON_HEDLEY_HAS_EXTENSION(extension) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_HAS_EXTENSION)
-    #undef JSON_HEDLEY_GNUC_HAS_EXTENSION
-#endif
-#if defined(__has_extension)
-    #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension)
-#else
-    #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_HAS_EXTENSION)
-    #undef JSON_HEDLEY_GCC_HAS_EXTENSION
-#endif
-#if defined(__has_extension)
-    #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension)
-#else
-    #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE)
-    #undef JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE
-#endif
-#if defined(__has_declspec_attribute)
-    #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute)
-#else
-    #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE)
-    #undef JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE
-#endif
-#if defined(__has_declspec_attribute)
-    #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute)
-#else
-    #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE)
-    #undef JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE
-#endif
-#if defined(__has_declspec_attribute)
-    #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute)
-#else
-    #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_WARNING)
-    #undef JSON_HEDLEY_HAS_WARNING
-#endif
-#if defined(__has_warning)
-    #define JSON_HEDLEY_HAS_WARNING(warning) __has_warning(warning)
-#else
-    #define JSON_HEDLEY_HAS_WARNING(warning) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_HAS_WARNING)
-    #undef JSON_HEDLEY_GNUC_HAS_WARNING
-#endif
-#if defined(__has_warning)
-    #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning)
-#else
-    #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_HAS_WARNING)
-    #undef JSON_HEDLEY_GCC_HAS_WARNING
-#endif
-#if defined(__has_warning)
-    #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning)
-#else
-    #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if \
-    (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \
-    defined(__clang__) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \
-    JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \
-    JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \
-    (JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR))
-    #define JSON_HEDLEY_PRAGMA(value) _Pragma(#value)
-#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0)
-    #define JSON_HEDLEY_PRAGMA(value) __pragma(value)
-#else
-    #define JSON_HEDLEY_PRAGMA(value)
-#endif
-
-#if defined(JSON_HEDLEY_DIAGNOSTIC_PUSH)
-    #undef JSON_HEDLEY_DIAGNOSTIC_PUSH
-#endif
-#if defined(JSON_HEDLEY_DIAGNOSTIC_POP)
-    #undef JSON_HEDLEY_DIAGNOSTIC_POP
-#endif
-#if defined(__clang__)
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
-    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
-#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
-    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
-#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push")
-    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop")
-#elif \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push))
-    #define JSON_HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop))
-#elif JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("push")
-    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("pop")
-#elif \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push")
-    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop")
-#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
-    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
-#else
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH
-    #define JSON_HEDLEY_DIAGNOSTIC_POP
-#endif
-
-/* JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for
-   HEDLEY INTERNAL USE ONLY.  API subject to change without notice. */
-#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_)
-    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_
-#endif
-#if defined(__cplusplus)
-#  if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat")
-#    if JSON_HEDLEY_HAS_WARNING("-Wc++17-extensions")
-#      if JSON_HEDLEY_HAS_WARNING("-Wc++1z-extensions")
-#        define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \
-    _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \
-    _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \
-    xpr \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#      else
-#        define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \
-    _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \
-    xpr \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#      endif
-#    else
-#      define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \
-    xpr \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#    endif
-#  endif
-#endif
-#if !defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x
-#endif
-
-#if defined(JSON_HEDLEY_CONST_CAST)
-    #undef JSON_HEDLEY_CONST_CAST
-#endif
-#if defined(__cplusplus)
-#  define JSON_HEDLEY_CONST_CAST(T, expr) (const_cast(expr))
-#elif \
-  JSON_HEDLEY_HAS_WARNING("-Wcast-qual") || \
-  JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) || \
-  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
-#  define JSON_HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \
-        JSON_HEDLEY_DIAGNOSTIC_PUSH \
-        JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \
-        ((T) (expr)); \
-        JSON_HEDLEY_DIAGNOSTIC_POP \
-    }))
-#else
-#  define JSON_HEDLEY_CONST_CAST(T, expr) ((T) (expr))
-#endif
-
-#if defined(JSON_HEDLEY_REINTERPRET_CAST)
-    #undef JSON_HEDLEY_REINTERPRET_CAST
-#endif
-#if defined(__cplusplus)
-    #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr))
-#else
-    #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr))
-#endif
-
-#if defined(JSON_HEDLEY_STATIC_CAST)
-    #undef JSON_HEDLEY_STATIC_CAST
-#endif
-#if defined(__cplusplus)
-    #define JSON_HEDLEY_STATIC_CAST(T, expr) (static_cast(expr))
-#else
-    #define JSON_HEDLEY_STATIC_CAST(T, expr) ((T) (expr))
-#endif
-
-#if defined(JSON_HEDLEY_CPP_CAST)
-    #undef JSON_HEDLEY_CPP_CAST
-#endif
-#if defined(__cplusplus)
-#  if JSON_HEDLEY_HAS_WARNING("-Wold-style-cast")
-#    define JSON_HEDLEY_CPP_CAST(T, expr) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \
-    ((T) (expr)) \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#  elif JSON_HEDLEY_IAR_VERSION_CHECK(8,3,0)
-#    define JSON_HEDLEY_CPP_CAST(T, expr) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    _Pragma("diag_suppress=Pe137") \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#  else
-#    define JSON_HEDLEY_CPP_CAST(T, expr) ((T) (expr))
-#  endif
-#else
-#  define JSON_HEDLEY_CPP_CAST(T, expr) (expr)
-#endif
-
-#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED)
-    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wdeprecated-declarations")
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"")
-#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)")
-#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786))
-#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445")
-#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444")
-#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
-#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996))
-#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444")
-#elif \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718")
-#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)")
-#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)")
-#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215")
-#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)")
-#else
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED
-#endif
-
-#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS)
-    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas")
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"")
-#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)")
-#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161))
-#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675")
-#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"")
-#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068))
-#elif \
-    JSON_HEDLEY_TI_VERSION_CHECK(16,9,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163")
-#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163")
-#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161")
-#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161")
-#else
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS
-#endif
-
-#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES)
-    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wunknown-attributes")
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"")
-#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
-#elif JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)")
-#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292))
-#elif JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030))
-#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098")
-#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097")
-#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)")
-#elif \
-    JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173")
-#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097")
-#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097")
-#else
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES
-#endif
-
-#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL)
-    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wcast-qual")
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"")
-#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)")
-#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
-#else
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
-#endif
-
-#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION)
-    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wunused-function")
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"")
-#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"")
-#elif JSON_HEDLEY_MSVC_VERSION_CHECK(1,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505))
-#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142")
-#else
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION
-#endif
-
-#if defined(JSON_HEDLEY_DEPRECATED)
-    #undef JSON_HEDLEY_DEPRECATED
-#endif
-#if defined(JSON_HEDLEY_DEPRECATED_FOR)
-    #undef JSON_HEDLEY_DEPRECATED_FOR
-#endif
-#if \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since))
-    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement))
-#elif \
-    (JSON_HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(JSON_HEDLEY_IAR_VERSION)) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \
-    JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since)))
-    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement)))
-#elif defined(__cplusplus) && (__cplusplus >= 201402L)
-    #define JSON_HEDLEY_DEPRECATED(since) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]])
-    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]])
-#elif \
-    JSON_HEDLEY_HAS_ATTRIBUTE(deprecated) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \
-    JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0)
-    #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__))
-    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__))
-#elif \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \
-    JSON_HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated)
-    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated)
-#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-    #define JSON_HEDLEY_DEPRECATED(since) _Pragma("deprecated")
-    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated")
-#else
-    #define JSON_HEDLEY_DEPRECATED(since)
-    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement)
-#endif
-
-#if defined(JSON_HEDLEY_UNAVAILABLE)
-    #undef JSON_HEDLEY_UNAVAILABLE
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(warning) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since)))
-#else
-    #define JSON_HEDLEY_UNAVAILABLE(available_since)
-#endif
-
-#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT)
-    #undef JSON_HEDLEY_WARN_UNUSED_RESULT
-#endif
-#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT_MSG)
-    #undef JSON_HEDLEY_WARN_UNUSED_RESULT_MSG
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \
-    JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__))
-#elif (JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L)
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]])
-#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard)
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
-#elif defined(_Check_return_) /* SAL */
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT _Check_return_
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_
-#else
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg)
-#endif
-
-#if defined(JSON_HEDLEY_SENTINEL)
-    #undef JSON_HEDLEY_SENTINEL
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(sentinel) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position)))
-#else
-    #define JSON_HEDLEY_SENTINEL(position)
-#endif
-
-#if defined(JSON_HEDLEY_NO_RETURN)
-    #undef JSON_HEDLEY_NO_RETURN
-#endif
-#if JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-    #define JSON_HEDLEY_NO_RETURN __noreturn
-#elif \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__))
-#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
-    #define JSON_HEDLEY_NO_RETURN _Noreturn
-#elif defined(__cplusplus) && (__cplusplus >= 201103L)
-    #define JSON_HEDLEY_NO_RETURN JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]])
-#elif \
-    JSON_HEDLEY_HAS_ATTRIBUTE(noreturn) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,2,0) || \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0)
-    #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__))
-#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)
-    #define JSON_HEDLEY_NO_RETURN _Pragma("does_not_return")
-#elif \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_NO_RETURN __declspec(noreturn)
-#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus)
-    #define JSON_HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;")
-#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0)
-    #define JSON_HEDLEY_NO_RETURN __attribute((noreturn))
-#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0)
-    #define JSON_HEDLEY_NO_RETURN __declspec(noreturn)
-#else
-    #define JSON_HEDLEY_NO_RETURN
-#endif
-
-#if defined(JSON_HEDLEY_NO_ESCAPE)
-    #undef JSON_HEDLEY_NO_ESCAPE
-#endif
-#if JSON_HEDLEY_HAS_ATTRIBUTE(noescape)
-    #define JSON_HEDLEY_NO_ESCAPE __attribute__((__noescape__))
-#else
-    #define JSON_HEDLEY_NO_ESCAPE
-#endif
-
-#if defined(JSON_HEDLEY_UNREACHABLE)
-    #undef JSON_HEDLEY_UNREACHABLE
-#endif
-#if defined(JSON_HEDLEY_UNREACHABLE_RETURN)
-    #undef JSON_HEDLEY_UNREACHABLE_RETURN
-#endif
-#if defined(JSON_HEDLEY_ASSUME)
-    #undef JSON_HEDLEY_ASSUME
-#endif
-#if \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_ASSUME(expr) __assume(expr)
-#elif JSON_HEDLEY_HAS_BUILTIN(__builtin_assume)
-    #define JSON_HEDLEY_ASSUME(expr) __builtin_assume(expr)
-#elif \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0)
-    #if defined(__cplusplus)
-        #define JSON_HEDLEY_ASSUME(expr) std::_nassert(expr)
-    #else
-        #define JSON_HEDLEY_ASSUME(expr) _nassert(expr)
-    #endif
-#endif
-#if \
-    (JSON_HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(JSON_HEDLEY_ARM_VERSION))) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \
-    JSON_HEDLEY_PGI_VERSION_CHECK(18,10,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(13,1,5) || \
-    JSON_HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_UNREACHABLE() __builtin_unreachable()
-#elif defined(JSON_HEDLEY_ASSUME)
-    #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0)
-#endif
-#if !defined(JSON_HEDLEY_ASSUME)
-    #if defined(JSON_HEDLEY_UNREACHABLE)
-        #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (JSON_HEDLEY_UNREACHABLE(), 1)))
-    #else
-        #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, expr)
-    #endif
-#endif
-#if defined(JSON_HEDLEY_UNREACHABLE)
-    #if  \
-        JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \
-        JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0)
-        #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (JSON_HEDLEY_STATIC_CAST(void, JSON_HEDLEY_ASSUME(0)), (value))
-    #else
-        #define JSON_HEDLEY_UNREACHABLE_RETURN(value) JSON_HEDLEY_UNREACHABLE()
-    #endif
-#else
-    #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (value)
-#endif
-#if !defined(JSON_HEDLEY_UNREACHABLE)
-    #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0)
-#endif
-
-JSON_HEDLEY_DIAGNOSTIC_PUSH
-#if JSON_HEDLEY_HAS_WARNING("-Wpedantic")
-    #pragma clang diagnostic ignored "-Wpedantic"
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus)
-    #pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
-#endif
-#if JSON_HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0)
-    #if defined(__clang__)
-        #pragma clang diagnostic ignored "-Wvariadic-macros"
-    #elif defined(JSON_HEDLEY_GCC_VERSION)
-        #pragma GCC diagnostic ignored "-Wvariadic-macros"
-    #endif
-#endif
-#if defined(JSON_HEDLEY_NON_NULL)
-    #undef JSON_HEDLEY_NON_NULL
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(nonnull) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0)
-    #define JSON_HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__)))
-#else
-    #define JSON_HEDLEY_NON_NULL(...)
-#endif
-JSON_HEDLEY_DIAGNOSTIC_POP
-
-#if defined(JSON_HEDLEY_PRINTF_FORMAT)
-    #undef JSON_HEDLEY_PRINTF_FORMAT
-#endif
-#if defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO)
-    #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check)))
-#elif defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO)
-    #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check)))
-#elif \
-    JSON_HEDLEY_HAS_ATTRIBUTE(format) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check)))
-#elif JSON_HEDLEY_PELLES_VERSION_CHECK(6,0,0)
-    #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check))
-#else
-    #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check)
-#endif
-
-#if defined(JSON_HEDLEY_CONSTEXPR)
-    #undef JSON_HEDLEY_CONSTEXPR
-#endif
-#if defined(__cplusplus)
-    #if __cplusplus >= 201103L
-        #define JSON_HEDLEY_CONSTEXPR JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr)
-    #endif
-#endif
-#if !defined(JSON_HEDLEY_CONSTEXPR)
-    #define JSON_HEDLEY_CONSTEXPR
-#endif
-
-#if defined(JSON_HEDLEY_PREDICT)
-    #undef JSON_HEDLEY_PREDICT
-#endif
-#if defined(JSON_HEDLEY_LIKELY)
-    #undef JSON_HEDLEY_LIKELY
-#endif
-#if defined(JSON_HEDLEY_UNLIKELY)
-    #undef JSON_HEDLEY_UNLIKELY
-#endif
-#if defined(JSON_HEDLEY_UNPREDICTABLE)
-    #undef JSON_HEDLEY_UNPREDICTABLE
-#endif
-#if JSON_HEDLEY_HAS_BUILTIN(__builtin_unpredictable)
-    #define JSON_HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr))
-#endif
-#if \
-  (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(JSON_HEDLEY_PGI_VERSION)) || \
-  JSON_HEDLEY_GCC_VERSION_CHECK(9,0,0) || \
-  JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-#  define JSON_HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability(  (expr), (value), (probability))
-#  define JSON_HEDLEY_PREDICT_TRUE(expr, probability)   __builtin_expect_with_probability(!!(expr),    1   , (probability))
-#  define JSON_HEDLEY_PREDICT_FALSE(expr, probability)  __builtin_expect_with_probability(!!(expr),    0   , (probability))
-#  define JSON_HEDLEY_LIKELY(expr)                      __builtin_expect                 (!!(expr),    1                  )
-#  define JSON_HEDLEY_UNLIKELY(expr)                    __builtin_expect                 (!!(expr),    0                  )
-#elif \
-  (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \
-  JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \
-  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-  (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \
-  JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-  JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-  JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-  JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \
-  JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \
-  JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \
-  JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \
-  JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-  JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-  JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \
-  JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \
-  JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-#  define JSON_HEDLEY_PREDICT(expr, expected, probability) \
-    (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (JSON_HEDLEY_STATIC_CAST(void, expected), (expr)))
-#  define JSON_HEDLEY_PREDICT_TRUE(expr, probability) \
-    (__extension__ ({ \
-        double hedley_probability_ = (probability); \
-        ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \
-    }))
-#  define JSON_HEDLEY_PREDICT_FALSE(expr, probability) \
-    (__extension__ ({ \
-        double hedley_probability_ = (probability); \
-        ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \
-    }))
-#  define JSON_HEDLEY_LIKELY(expr)   __builtin_expect(!!(expr), 1)
-#  define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
-#else
-#  define JSON_HEDLEY_PREDICT(expr, expected, probability) (JSON_HEDLEY_STATIC_CAST(void, expected), (expr))
-#  define JSON_HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr))
-#  define JSON_HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr))
-#  define JSON_HEDLEY_LIKELY(expr) (!!(expr))
-#  define JSON_HEDLEY_UNLIKELY(expr) (!!(expr))
-#endif
-#if !defined(JSON_HEDLEY_UNPREDICTABLE)
-    #define JSON_HEDLEY_UNPREDICTABLE(expr) JSON_HEDLEY_PREDICT(expr, 1, 0.5)
-#endif
-
-#if defined(JSON_HEDLEY_MALLOC)
-    #undef JSON_HEDLEY_MALLOC
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(malloc) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_MALLOC __attribute__((__malloc__))
-#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)
-    #define JSON_HEDLEY_MALLOC _Pragma("returns_new_memory")
-#elif \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_MALLOC __declspec(restrict)
-#else
-    #define JSON_HEDLEY_MALLOC
-#endif
-
-#if defined(JSON_HEDLEY_PURE)
-    #undef JSON_HEDLEY_PURE
-#endif
-#if \
-  JSON_HEDLEY_HAS_ATTRIBUTE(pure) || \
-  JSON_HEDLEY_GCC_VERSION_CHECK(2,96,0) || \
-  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-  JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
-  JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-  JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-  JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-  (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-  (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-  (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-  (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-  JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-  JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-  JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \
-  JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-#  define JSON_HEDLEY_PURE __attribute__((__pure__))
-#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)
-#  define JSON_HEDLEY_PURE _Pragma("does_not_write_global_data")
-#elif defined(__cplusplus) && \
-    ( \
-      JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \
-      JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \
-      JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \
-    )
-#  define JSON_HEDLEY_PURE _Pragma("FUNC_IS_PURE;")
-#else
-#  define JSON_HEDLEY_PURE
-#endif
-
-#if defined(JSON_HEDLEY_CONST)
-    #undef JSON_HEDLEY_CONST
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(const) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(2,5,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_CONST __attribute__((__const__))
-#elif \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)
-    #define JSON_HEDLEY_CONST _Pragma("no_side_effect")
-#else
-    #define JSON_HEDLEY_CONST JSON_HEDLEY_PURE
-#endif
-
-#if defined(JSON_HEDLEY_RESTRICT)
-    #undef JSON_HEDLEY_RESTRICT
-#endif
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus)
-    #define JSON_HEDLEY_RESTRICT restrict
-#elif \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-    JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \
-    JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \
-    defined(__clang__) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_RESTRICT __restrict
-#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus)
-    #define JSON_HEDLEY_RESTRICT _Restrict
-#else
-    #define JSON_HEDLEY_RESTRICT
-#endif
-
-#if defined(JSON_HEDLEY_INLINE)
-    #undef JSON_HEDLEY_INLINE
-#endif
-#if \
-    (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \
-    (defined(__cplusplus) && (__cplusplus >= 199711L))
-    #define JSON_HEDLEY_INLINE inline
-#elif \
-    defined(JSON_HEDLEY_GCC_VERSION) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(6,2,0)
-    #define JSON_HEDLEY_INLINE __inline__
-#elif \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_INLINE __inline
-#else
-    #define JSON_HEDLEY_INLINE
-#endif
-
-#if defined(JSON_HEDLEY_ALWAYS_INLINE)
-    #undef JSON_HEDLEY_ALWAYS_INLINE
-#endif
-#if \
-  JSON_HEDLEY_HAS_ATTRIBUTE(always_inline) || \
-  JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \
-  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-  JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
-  JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-  JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-  JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-  (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-  (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-  (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-  (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-  JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-  JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-  JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \
-  JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0)
-#  define JSON_HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) JSON_HEDLEY_INLINE
-#elif \
-  JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \
-  JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-#  define JSON_HEDLEY_ALWAYS_INLINE __forceinline
-#elif defined(__cplusplus) && \
-    ( \
-      JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-      JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-      JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-      JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \
-      JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-      JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \
-    )
-#  define JSON_HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;")
-#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-#  define JSON_HEDLEY_ALWAYS_INLINE _Pragma("inline=forced")
-#else
-#  define JSON_HEDLEY_ALWAYS_INLINE JSON_HEDLEY_INLINE
-#endif
-
-#if defined(JSON_HEDLEY_NEVER_INLINE)
-    #undef JSON_HEDLEY_NEVER_INLINE
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(noinline) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \
-    JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0)
-    #define JSON_HEDLEY_NEVER_INLINE __attribute__((__noinline__))
-#elif \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline)
-#elif JSON_HEDLEY_PGI_VERSION_CHECK(10,2,0)
-    #define JSON_HEDLEY_NEVER_INLINE _Pragma("noinline")
-#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus)
-    #define JSON_HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;")
-#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-    #define JSON_HEDLEY_NEVER_INLINE _Pragma("inline=never")
-#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0)
-    #define JSON_HEDLEY_NEVER_INLINE __attribute((noinline))
-#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0)
-    #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline)
-#else
-    #define JSON_HEDLEY_NEVER_INLINE
-#endif
-
-#if defined(JSON_HEDLEY_PRIVATE)
-    #undef JSON_HEDLEY_PRIVATE
-#endif
-#if defined(JSON_HEDLEY_PUBLIC)
-    #undef JSON_HEDLEY_PUBLIC
-#endif
-#if defined(JSON_HEDLEY_IMPORT)
-    #undef JSON_HEDLEY_IMPORT
-#endif
-#if defined(_WIN32) || defined(__CYGWIN__)
-#  define JSON_HEDLEY_PRIVATE
-#  define JSON_HEDLEY_PUBLIC   __declspec(dllexport)
-#  define JSON_HEDLEY_IMPORT   __declspec(dllimport)
-#else
-#  if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(visibility) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \
-    ( \
-      defined(__TI_EABI__) && \
-      ( \
-        (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-        JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \
-      ) \
-    ) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-#    define JSON_HEDLEY_PRIVATE __attribute__((__visibility__("hidden")))
-#    define JSON_HEDLEY_PUBLIC  __attribute__((__visibility__("default")))
-#  else
-#    define JSON_HEDLEY_PRIVATE
-#    define JSON_HEDLEY_PUBLIC
-#  endif
-#  define JSON_HEDLEY_IMPORT    extern
-#endif
-
-#if defined(JSON_HEDLEY_NO_THROW)
-    #undef JSON_HEDLEY_NO_THROW
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(nothrow) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_NO_THROW __attribute__((__nothrow__))
-#elif \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0)
-    #define JSON_HEDLEY_NO_THROW __declspec(nothrow)
-#else
-    #define JSON_HEDLEY_NO_THROW
-#endif
-
-#if defined(JSON_HEDLEY_FALL_THROUGH)
-    #undef JSON_HEDLEY_FALL_THROUGH
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(fallthrough) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(7,0,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_FALL_THROUGH __attribute__((__fallthrough__))
-#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough)
-    #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]])
-#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough)
-    #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]])
-#elif defined(__fallthrough) /* SAL */
-    #define JSON_HEDLEY_FALL_THROUGH __fallthrough
-#else
-    #define JSON_HEDLEY_FALL_THROUGH
-#endif
-
-#if defined(JSON_HEDLEY_RETURNS_NON_NULL)
-    #undef JSON_HEDLEY_RETURNS_NON_NULL
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__))
-#elif defined(_Ret_notnull_) /* SAL */
-    #define JSON_HEDLEY_RETURNS_NON_NULL _Ret_notnull_
-#else
-    #define JSON_HEDLEY_RETURNS_NON_NULL
-#endif
-
-#if defined(JSON_HEDLEY_ARRAY_PARAM)
-    #undef JSON_HEDLEY_ARRAY_PARAM
-#endif
-#if \
-    defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
-    !defined(__STDC_NO_VLA__) && \
-    !defined(__cplusplus) && \
-    !defined(JSON_HEDLEY_PGI_VERSION) && \
-    !defined(JSON_HEDLEY_TINYC_VERSION)
-    #define JSON_HEDLEY_ARRAY_PARAM(name) (name)
-#else
-    #define JSON_HEDLEY_ARRAY_PARAM(name)
-#endif
-
-#if defined(JSON_HEDLEY_IS_CONSTANT)
-    #undef JSON_HEDLEY_IS_CONSTANT
-#endif
-#if defined(JSON_HEDLEY_REQUIRE_CONSTEXPR)
-    #undef JSON_HEDLEY_REQUIRE_CONSTEXPR
-#endif
-/* JSON_HEDLEY_IS_CONSTEXPR_ is for
-   HEDLEY INTERNAL USE ONLY.  API subject to change without notice. */
-#if defined(JSON_HEDLEY_IS_CONSTEXPR_)
-    #undef JSON_HEDLEY_IS_CONSTEXPR_
-#endif
-#if \
-    JSON_HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \
-    (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \
-    JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr)
-#endif
-#if !defined(__cplusplus)
-#  if \
-       JSON_HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \
-       JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
-       JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-       JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \
-       JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \
-       JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \
-       JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,24)
-#if defined(__INTPTR_TYPE__)
-    #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*)
-#else
-    #include 
-    #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*)
-#endif
-#  elif \
-       ( \
-          defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \
-          !defined(JSON_HEDLEY_SUNPRO_VERSION) && \
-          !defined(JSON_HEDLEY_PGI_VERSION) && \
-          !defined(JSON_HEDLEY_IAR_VERSION)) || \
-       (JSON_HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(JSON_HEDLEY_IAR_VERSION)) || \
-       JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \
-       JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \
-       JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \
-       JSON_HEDLEY_ARM_VERSION_CHECK(5,3,0)
-#if defined(__INTPTR_TYPE__)
-    #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0)
-#else
-    #include 
-    #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0)
-#endif
-#  elif \
-       defined(JSON_HEDLEY_GCC_VERSION) || \
-       defined(JSON_HEDLEY_INTEL_VERSION) || \
-       defined(JSON_HEDLEY_TINYC_VERSION) || \
-       defined(JSON_HEDLEY_TI_ARMCL_VERSION) || \
-       JSON_HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \
-       defined(JSON_HEDLEY_TI_CL2000_VERSION) || \
-       defined(JSON_HEDLEY_TI_CL6X_VERSION) || \
-       defined(JSON_HEDLEY_TI_CL7X_VERSION) || \
-       defined(JSON_HEDLEY_TI_CLPRU_VERSION) || \
-       defined(__clang__)
-#    define JSON_HEDLEY_IS_CONSTEXPR_(expr) ( \
-        sizeof(void) != \
-        sizeof(*( \
-                  1 ? \
-                  ((void*) ((expr) * 0L) ) : \
-((struct { char v[sizeof(void) * 2]; } *) 1) \
-                ) \
-              ) \
-                                            )
-#  endif
-#endif
-#if defined(JSON_HEDLEY_IS_CONSTEXPR_)
-    #if !defined(JSON_HEDLEY_IS_CONSTANT)
-        #define JSON_HEDLEY_IS_CONSTANT(expr) JSON_HEDLEY_IS_CONSTEXPR_(expr)
-    #endif
-    #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (JSON_HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1))
-#else
-    #if !defined(JSON_HEDLEY_IS_CONSTANT)
-        #define JSON_HEDLEY_IS_CONSTANT(expr) (0)
-    #endif
-    #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (expr)
-#endif
-
-#if defined(JSON_HEDLEY_BEGIN_C_DECLS)
-    #undef JSON_HEDLEY_BEGIN_C_DECLS
-#endif
-#if defined(JSON_HEDLEY_END_C_DECLS)
-    #undef JSON_HEDLEY_END_C_DECLS
-#endif
-#if defined(JSON_HEDLEY_C_DECL)
-    #undef JSON_HEDLEY_C_DECL
-#endif
-#if defined(__cplusplus)
-    #define JSON_HEDLEY_BEGIN_C_DECLS extern "C" {
-    #define JSON_HEDLEY_END_C_DECLS }
-    #define JSON_HEDLEY_C_DECL extern "C"
-#else
-    #define JSON_HEDLEY_BEGIN_C_DECLS
-    #define JSON_HEDLEY_END_C_DECLS
-    #define JSON_HEDLEY_C_DECL
-#endif
-
-#if defined(JSON_HEDLEY_STATIC_ASSERT)
-    #undef JSON_HEDLEY_STATIC_ASSERT
-#endif
-#if \
-  !defined(__cplusplus) && ( \
-      (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \
-      (JSON_HEDLEY_HAS_FEATURE(c_static_assert) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \
-      JSON_HEDLEY_GCC_VERSION_CHECK(6,0,0) || \
-      JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-      defined(_Static_assert) \
-    )
-#  define JSON_HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message)
-#elif \
-  (defined(__cplusplus) && (__cplusplus >= 201103L)) || \
-  JSON_HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \
-  JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-#  define JSON_HEDLEY_STATIC_ASSERT(expr, message) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message))
-#else
-#  define JSON_HEDLEY_STATIC_ASSERT(expr, message)
-#endif
-
-#if defined(JSON_HEDLEY_NULL)
-    #undef JSON_HEDLEY_NULL
-#endif
-#if defined(__cplusplus)
-    #if __cplusplus >= 201103L
-        #define JSON_HEDLEY_NULL JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr)
-    #elif defined(NULL)
-        #define JSON_HEDLEY_NULL NULL
-    #else
-        #define JSON_HEDLEY_NULL JSON_HEDLEY_STATIC_CAST(void*, 0)
-    #endif
-#elif defined(NULL)
-    #define JSON_HEDLEY_NULL NULL
-#else
-    #define JSON_HEDLEY_NULL ((void*) 0)
-#endif
-
-#if defined(JSON_HEDLEY_MESSAGE)
-    #undef JSON_HEDLEY_MESSAGE
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas")
-#  define JSON_HEDLEY_MESSAGE(msg) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
-    JSON_HEDLEY_PRAGMA(message msg) \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#elif \
-  JSON_HEDLEY_GCC_VERSION_CHECK(4,4,0) || \
-  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
-#  define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message msg)
-#elif JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0)
-#  define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(_CRI message msg)
-#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-#  define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg))
-#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,0,0)
-#  define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg))
-#else
-#  define JSON_HEDLEY_MESSAGE(msg)
-#endif
-
-#if defined(JSON_HEDLEY_WARNING)
-    #undef JSON_HEDLEY_WARNING
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas")
-#  define JSON_HEDLEY_WARNING(msg) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
-    JSON_HEDLEY_PRAGMA(clang warning msg) \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#elif \
-  JSON_HEDLEY_GCC_VERSION_CHECK(4,8,0) || \
-  JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \
-  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
-#  define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(GCC warning msg)
-#elif \
-  JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \
-  JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-#  define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(message(msg))
-#else
-#  define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_MESSAGE(msg)
-#endif
-
-#if defined(JSON_HEDLEY_REQUIRE)
-    #undef JSON_HEDLEY_REQUIRE
-#endif
-#if defined(JSON_HEDLEY_REQUIRE_MSG)
-    #undef JSON_HEDLEY_REQUIRE_MSG
-#endif
-#if JSON_HEDLEY_HAS_ATTRIBUTE(diagnose_if)
-#  if JSON_HEDLEY_HAS_WARNING("-Wgcc-compat")
-#    define JSON_HEDLEY_REQUIRE(expr) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \
-    __attribute__((diagnose_if(!(expr), #expr, "error"))) \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#    define JSON_HEDLEY_REQUIRE_MSG(expr,msg) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \
-    __attribute__((diagnose_if(!(expr), msg, "error"))) \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#  else
-#    define JSON_HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error")))
-#    define JSON_HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error")))
-#  endif
-#else
-#  define JSON_HEDLEY_REQUIRE(expr)
-#  define JSON_HEDLEY_REQUIRE_MSG(expr,msg)
-#endif
-
-#if defined(JSON_HEDLEY_FLAGS)
-    #undef JSON_HEDLEY_FLAGS
-#endif
-#if JSON_HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || JSON_HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion"))
-    #define JSON_HEDLEY_FLAGS __attribute__((__flag_enum__))
-#else
-    #define JSON_HEDLEY_FLAGS
-#endif
-
-#if defined(JSON_HEDLEY_FLAGS_CAST)
-    #undef JSON_HEDLEY_FLAGS_CAST
-#endif
-#if JSON_HEDLEY_INTEL_VERSION_CHECK(19,0,0)
-#  define JSON_HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \
-        JSON_HEDLEY_DIAGNOSTIC_PUSH \
-        _Pragma("warning(disable:188)") \
-        ((T) (expr)); \
-        JSON_HEDLEY_DIAGNOSTIC_POP \
-    }))
-#else
-#  define JSON_HEDLEY_FLAGS_CAST(T, expr) JSON_HEDLEY_STATIC_CAST(T, expr)
-#endif
-
-#if defined(JSON_HEDLEY_EMPTY_BASES)
-    #undef JSON_HEDLEY_EMPTY_BASES
-#endif
-#if \
-    (JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !JSON_HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_EMPTY_BASES __declspec(empty_bases)
-#else
-    #define JSON_HEDLEY_EMPTY_BASES
-#endif
-
-/* Remaining macros are deprecated. */
-
-#if defined(JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK)
-    #undef JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK
-#endif
-#if defined(__clang__)
-    #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0)
-#else
-    #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_CLANG_HAS_ATTRIBUTE)
-    #undef JSON_HEDLEY_CLANG_HAS_ATTRIBUTE
-#endif
-#define JSON_HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_ATTRIBUTE(attribute)
-
-#if defined(JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE)
-    #undef JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE
-#endif
-#define JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute)
-
-#if defined(JSON_HEDLEY_CLANG_HAS_BUILTIN)
-    #undef JSON_HEDLEY_CLANG_HAS_BUILTIN
-#endif
-#define JSON_HEDLEY_CLANG_HAS_BUILTIN(builtin) JSON_HEDLEY_HAS_BUILTIN(builtin)
-
-#if defined(JSON_HEDLEY_CLANG_HAS_FEATURE)
-    #undef JSON_HEDLEY_CLANG_HAS_FEATURE
-#endif
-#define JSON_HEDLEY_CLANG_HAS_FEATURE(feature) JSON_HEDLEY_HAS_FEATURE(feature)
-
-#if defined(JSON_HEDLEY_CLANG_HAS_EXTENSION)
-    #undef JSON_HEDLEY_CLANG_HAS_EXTENSION
-#endif
-#define JSON_HEDLEY_CLANG_HAS_EXTENSION(extension) JSON_HEDLEY_HAS_EXTENSION(extension)
-
-#if defined(JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE)
-    #undef JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE
-#endif
-#define JSON_HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute)
-
-#if defined(JSON_HEDLEY_CLANG_HAS_WARNING)
-    #undef JSON_HEDLEY_CLANG_HAS_WARNING
-#endif
-#define JSON_HEDLEY_CLANG_HAS_WARNING(warning) JSON_HEDLEY_HAS_WARNING(warning)
-
-#endif /* !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < X) */
-
-
-// This file contains all internal macro definitions (except those affecting ABI)
-// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them
-
-// #include 
-
-
-// exclude unsupported compilers
-#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK)
-    #if defined(__clang__)
-        #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400
-            #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers"
-        #endif
-    #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER))
-        #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800
-            #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers"
-        #endif
-    #endif
-#endif
-
-// C++ language standard detection
-// if the user manually specified the used c++ version this is skipped
-#if !defined(JSON_HAS_CPP_20) && !defined(JSON_HAS_CPP_17) && !defined(JSON_HAS_CPP_14) && !defined(JSON_HAS_CPP_11)
-    #if (defined(__cplusplus) && __cplusplus >= 202002L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
-        #define JSON_HAS_CPP_20
-        #define JSON_HAS_CPP_17
-        #define JSON_HAS_CPP_14
-    #elif (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464
-        #define JSON_HAS_CPP_17
-        #define JSON_HAS_CPP_14
-    #elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1)
-        #define JSON_HAS_CPP_14
-    #endif
-    // the cpp 11 flag is always specified because it is the minimal required version
-    #define JSON_HAS_CPP_11
-#endif
-
-#ifdef __has_include
-    #if __has_include()
-        #include 
-    #endif
-#endif
-
-#if !defined(JSON_HAS_FILESYSTEM) && !defined(JSON_HAS_EXPERIMENTAL_FILESYSTEM)
-    #ifdef JSON_HAS_CPP_17
-        #if defined(__cpp_lib_filesystem)
-            #define JSON_HAS_FILESYSTEM 1
-        #elif defined(__cpp_lib_experimental_filesystem)
-            #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1
-        #elif !defined(__has_include)
-            #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1
-        #elif __has_include()
-            #define JSON_HAS_FILESYSTEM 1
-        #elif __has_include()
-            #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1
-        #endif
-
-        // std::filesystem does not work on MinGW GCC 8: https://sourceforge.net/p/mingw-w64/bugs/737/
-        #if defined(__MINGW32__) && defined(__GNUC__) && __GNUC__ == 8
-            #undef JSON_HAS_FILESYSTEM
-            #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM
-        #endif
-
-        // no filesystem support before GCC 8: https://en.cppreference.com/w/cpp/compiler_support
-        #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 8
-            #undef JSON_HAS_FILESYSTEM
-            #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM
-        #endif
-
-        // no filesystem support before Clang 7: https://en.cppreference.com/w/cpp/compiler_support
-        #if defined(__clang_major__) && __clang_major__ < 7
-            #undef JSON_HAS_FILESYSTEM
-            #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM
-        #endif
-
-        // no filesystem support before MSVC 19.14: https://en.cppreference.com/w/cpp/compiler_support
-        #if defined(_MSC_VER) && _MSC_VER < 1914
-            #undef JSON_HAS_FILESYSTEM
-            #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM
-        #endif
-
-        // no filesystem support before iOS 13
-        #if defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 130000
-            #undef JSON_HAS_FILESYSTEM
-            #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM
-        #endif
-
-        // no filesystem support before macOS Catalina
-        #if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101500
-            #undef JSON_HAS_FILESYSTEM
-            #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM
-        #endif
-    #endif
-#endif
-
-#ifndef JSON_HAS_EXPERIMENTAL_FILESYSTEM
-    #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 0
-#endif
-
-#ifndef JSON_HAS_FILESYSTEM
-    #define JSON_HAS_FILESYSTEM 0
-#endif
-
-#ifndef JSON_HAS_THREE_WAY_COMPARISON
-    #if defined(__cpp_impl_three_way_comparison) && __cpp_impl_three_way_comparison >= 201907L \
-        && defined(__cpp_lib_three_way_comparison) && __cpp_lib_three_way_comparison >= 201907L
-        #define JSON_HAS_THREE_WAY_COMPARISON 1
-    #else
-        #define JSON_HAS_THREE_WAY_COMPARISON 0
-    #endif
-#endif
-
-#ifndef JSON_HAS_RANGES
-    // ranges header shipping in GCC 11.1.0 (released 2021-04-27) has syntax error
-    #if defined(__GLIBCXX__) && __GLIBCXX__ == 20210427
-        #define JSON_HAS_RANGES 0
-    #elif defined(__cpp_lib_ranges)
-        #define JSON_HAS_RANGES 1
-    #else
-        #define JSON_HAS_RANGES 0
-    #endif
-#endif
-
-#ifdef JSON_HAS_CPP_17
-    #define JSON_INLINE_VARIABLE inline
-#else
-    #define JSON_INLINE_VARIABLE
-#endif
-
-#if JSON_HEDLEY_HAS_ATTRIBUTE(no_unique_address)
-    #define JSON_NO_UNIQUE_ADDRESS [[no_unique_address]]
-#else
-    #define JSON_NO_UNIQUE_ADDRESS
-#endif
-
-// disable documentation warnings on clang
-#if defined(__clang__)
-    #pragma clang diagnostic push
-    #pragma clang diagnostic ignored "-Wdocumentation"
-    #pragma clang diagnostic ignored "-Wdocumentation-unknown-command"
-#endif
-
-// allow disabling exceptions
-#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION)
-    #define JSON_THROW(exception) throw exception
-    #define JSON_TRY try
-    #define JSON_CATCH(exception) catch(exception)
-    #define JSON_INTERNAL_CATCH(exception) catch(exception)
-#else
-    #include 
-    #define JSON_THROW(exception) std::abort()
-    #define JSON_TRY if(true)
-    #define JSON_CATCH(exception) if(false)
-    #define JSON_INTERNAL_CATCH(exception) if(false)
-#endif
-
-// override exception macros
-#if defined(JSON_THROW_USER)
-    #undef JSON_THROW
-    #define JSON_THROW JSON_THROW_USER
-#endif
-#if defined(JSON_TRY_USER)
-    #undef JSON_TRY
-    #define JSON_TRY JSON_TRY_USER
-#endif
-#if defined(JSON_CATCH_USER)
-    #undef JSON_CATCH
-    #define JSON_CATCH JSON_CATCH_USER
-    #undef JSON_INTERNAL_CATCH
-    #define JSON_INTERNAL_CATCH JSON_CATCH_USER
-#endif
-#if defined(JSON_INTERNAL_CATCH_USER)
-    #undef JSON_INTERNAL_CATCH
-    #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER
-#endif
-
-// allow overriding assert
-#if !defined(JSON_ASSERT)
-    #include  // assert
-    #define JSON_ASSERT(x) assert(x)
-#endif
-
-// allow to access some private functions (needed by the test suite)
-#if defined(JSON_TESTS_PRIVATE)
-    #define JSON_PRIVATE_UNLESS_TESTED public
-#else
-    #define JSON_PRIVATE_UNLESS_TESTED private
-#endif
-
-/*!
-@brief macro to briefly define a mapping between an enum and JSON
-@def NLOHMANN_JSON_SERIALIZE_ENUM
-@since version 3.4.0
-*/
-#define NLOHMANN_JSON_SERIALIZE_ENUM(ENUM_TYPE, ...)                                            \
-    template                                                            \
-    inline void to_json(BasicJsonType& j, const ENUM_TYPE& e)                                   \
-    {                                                                                           \
-        static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!");          \
-        static const std::pair m[] = __VA_ARGS__;                     \
-        auto it = std::find_if(std::begin(m), std::end(m),                                      \
-                               [e](const std::pair& ej_pair) -> bool  \
-        {                                                                                       \
-            return ej_pair.first == e;                                                          \
-        });                                                                                     \
-        j = ((it != std::end(m)) ? it : std::begin(m))->second;                                 \
-    }                                                                                           \
-    template                                                            \
-    inline void from_json(const BasicJsonType& j, ENUM_TYPE& e)                                 \
-    {                                                                                           \
-        static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!");          \
-        static const std::pair m[] = __VA_ARGS__;                     \
-        auto it = std::find_if(std::begin(m), std::end(m),                                      \
-                               [&j](const std::pair& ej_pair) -> bool \
-        {                                                                                       \
-            return ej_pair.second == j;                                                         \
-        });                                                                                     \
-        e = ((it != std::end(m)) ? it : std::begin(m))->first;                                  \
-    }
-
-// Ugly macros to avoid uglier copy-paste when specializing basic_json. They
-// may be removed in the future once the class is split.
-
-#define NLOHMANN_BASIC_JSON_TPL_DECLARATION                                \
-    template class ObjectType,   \
-             template class ArrayType,              \
-             class StringType, class BooleanType, class NumberIntegerType, \
-             class NumberUnsignedType, class NumberFloatType,              \
-             template class AllocatorType,                       \
-             template class JSONSerializer,     \
-             class BinaryType>
-
-#define NLOHMANN_BASIC_JSON_TPL                                            \
-    basic_json
-
-// Macros to simplify conversion from/to types
-
-#define NLOHMANN_JSON_EXPAND( x ) x
-#define NLOHMANN_JSON_GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, _61, _62, _63, _64, NAME,...) NAME
-#define NLOHMANN_JSON_PASTE(...) NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_GET_MACRO(__VA_ARGS__, \
-        NLOHMANN_JSON_PASTE64, \
-        NLOHMANN_JSON_PASTE63, \
-        NLOHMANN_JSON_PASTE62, \
-        NLOHMANN_JSON_PASTE61, \
-        NLOHMANN_JSON_PASTE60, \
-        NLOHMANN_JSON_PASTE59, \
-        NLOHMANN_JSON_PASTE58, \
-        NLOHMANN_JSON_PASTE57, \
-        NLOHMANN_JSON_PASTE56, \
-        NLOHMANN_JSON_PASTE55, \
-        NLOHMANN_JSON_PASTE54, \
-        NLOHMANN_JSON_PASTE53, \
-        NLOHMANN_JSON_PASTE52, \
-        NLOHMANN_JSON_PASTE51, \
-        NLOHMANN_JSON_PASTE50, \
-        NLOHMANN_JSON_PASTE49, \
-        NLOHMANN_JSON_PASTE48, \
-        NLOHMANN_JSON_PASTE47, \
-        NLOHMANN_JSON_PASTE46, \
-        NLOHMANN_JSON_PASTE45, \
-        NLOHMANN_JSON_PASTE44, \
-        NLOHMANN_JSON_PASTE43, \
-        NLOHMANN_JSON_PASTE42, \
-        NLOHMANN_JSON_PASTE41, \
-        NLOHMANN_JSON_PASTE40, \
-        NLOHMANN_JSON_PASTE39, \
-        NLOHMANN_JSON_PASTE38, \
-        NLOHMANN_JSON_PASTE37, \
-        NLOHMANN_JSON_PASTE36, \
-        NLOHMANN_JSON_PASTE35, \
-        NLOHMANN_JSON_PASTE34, \
-        NLOHMANN_JSON_PASTE33, \
-        NLOHMANN_JSON_PASTE32, \
-        NLOHMANN_JSON_PASTE31, \
-        NLOHMANN_JSON_PASTE30, \
-        NLOHMANN_JSON_PASTE29, \
-        NLOHMANN_JSON_PASTE28, \
-        NLOHMANN_JSON_PASTE27, \
-        NLOHMANN_JSON_PASTE26, \
-        NLOHMANN_JSON_PASTE25, \
-        NLOHMANN_JSON_PASTE24, \
-        NLOHMANN_JSON_PASTE23, \
-        NLOHMANN_JSON_PASTE22, \
-        NLOHMANN_JSON_PASTE21, \
-        NLOHMANN_JSON_PASTE20, \
-        NLOHMANN_JSON_PASTE19, \
-        NLOHMANN_JSON_PASTE18, \
-        NLOHMANN_JSON_PASTE17, \
-        NLOHMANN_JSON_PASTE16, \
-        NLOHMANN_JSON_PASTE15, \
-        NLOHMANN_JSON_PASTE14, \
-        NLOHMANN_JSON_PASTE13, \
-        NLOHMANN_JSON_PASTE12, \
-        NLOHMANN_JSON_PASTE11, \
-        NLOHMANN_JSON_PASTE10, \
-        NLOHMANN_JSON_PASTE9, \
-        NLOHMANN_JSON_PASTE8, \
-        NLOHMANN_JSON_PASTE7, \
-        NLOHMANN_JSON_PASTE6, \
-        NLOHMANN_JSON_PASTE5, \
-        NLOHMANN_JSON_PASTE4, \
-        NLOHMANN_JSON_PASTE3, \
-        NLOHMANN_JSON_PASTE2, \
-        NLOHMANN_JSON_PASTE1)(__VA_ARGS__))
-#define NLOHMANN_JSON_PASTE2(func, v1) func(v1)
-#define NLOHMANN_JSON_PASTE3(func, v1, v2) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE2(func, v2)
-#define NLOHMANN_JSON_PASTE4(func, v1, v2, v3) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE3(func, v2, v3)
-#define NLOHMANN_JSON_PASTE5(func, v1, v2, v3, v4) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE4(func, v2, v3, v4)
-#define NLOHMANN_JSON_PASTE6(func, v1, v2, v3, v4, v5) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE5(func, v2, v3, v4, v5)
-#define NLOHMANN_JSON_PASTE7(func, v1, v2, v3, v4, v5, v6) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE6(func, v2, v3, v4, v5, v6)
-#define NLOHMANN_JSON_PASTE8(func, v1, v2, v3, v4, v5, v6, v7) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE7(func, v2, v3, v4, v5, v6, v7)
-#define NLOHMANN_JSON_PASTE9(func, v1, v2, v3, v4, v5, v6, v7, v8) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE8(func, v2, v3, v4, v5, v6, v7, v8)
-#define NLOHMANN_JSON_PASTE10(func, v1, v2, v3, v4, v5, v6, v7, v8, v9) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE9(func, v2, v3, v4, v5, v6, v7, v8, v9)
-#define NLOHMANN_JSON_PASTE11(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE10(func, v2, v3, v4, v5, v6, v7, v8, v9, v10)
-#define NLOHMANN_JSON_PASTE12(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE11(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11)
-#define NLOHMANN_JSON_PASTE13(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE12(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12)
-#define NLOHMANN_JSON_PASTE14(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE13(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13)
-#define NLOHMANN_JSON_PASTE15(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE14(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14)
-#define NLOHMANN_JSON_PASTE16(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE15(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15)
-#define NLOHMANN_JSON_PASTE17(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE16(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16)
-#define NLOHMANN_JSON_PASTE18(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE17(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17)
-#define NLOHMANN_JSON_PASTE19(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE18(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18)
-#define NLOHMANN_JSON_PASTE20(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE19(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19)
-#define NLOHMANN_JSON_PASTE21(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE20(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20)
-#define NLOHMANN_JSON_PASTE22(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE21(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21)
-#define NLOHMANN_JSON_PASTE23(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE22(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22)
-#define NLOHMANN_JSON_PASTE24(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE23(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23)
-#define NLOHMANN_JSON_PASTE25(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE24(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24)
-#define NLOHMANN_JSON_PASTE26(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE25(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25)
-#define NLOHMANN_JSON_PASTE27(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE26(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26)
-#define NLOHMANN_JSON_PASTE28(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE27(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27)
-#define NLOHMANN_JSON_PASTE29(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE28(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28)
-#define NLOHMANN_JSON_PASTE30(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE29(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29)
-#define NLOHMANN_JSON_PASTE31(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE30(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30)
-#define NLOHMANN_JSON_PASTE32(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE31(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31)
-#define NLOHMANN_JSON_PASTE33(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE32(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32)
-#define NLOHMANN_JSON_PASTE34(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE33(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33)
-#define NLOHMANN_JSON_PASTE35(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE34(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34)
-#define NLOHMANN_JSON_PASTE36(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE35(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35)
-#define NLOHMANN_JSON_PASTE37(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE36(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36)
-#define NLOHMANN_JSON_PASTE38(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE37(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37)
-#define NLOHMANN_JSON_PASTE39(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE38(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38)
-#define NLOHMANN_JSON_PASTE40(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE39(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39)
-#define NLOHMANN_JSON_PASTE41(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE40(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40)
-#define NLOHMANN_JSON_PASTE42(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE41(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41)
-#define NLOHMANN_JSON_PASTE43(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE42(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42)
-#define NLOHMANN_JSON_PASTE44(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE43(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43)
-#define NLOHMANN_JSON_PASTE45(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE44(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44)
-#define NLOHMANN_JSON_PASTE46(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE45(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45)
-#define NLOHMANN_JSON_PASTE47(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE46(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46)
-#define NLOHMANN_JSON_PASTE48(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE47(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47)
-#define NLOHMANN_JSON_PASTE49(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE48(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48)
-#define NLOHMANN_JSON_PASTE50(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE49(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49)
-#define NLOHMANN_JSON_PASTE51(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE50(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50)
-#define NLOHMANN_JSON_PASTE52(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE51(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51)
-#define NLOHMANN_JSON_PASTE53(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE52(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52)
-#define NLOHMANN_JSON_PASTE54(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE53(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53)
-#define NLOHMANN_JSON_PASTE55(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE54(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54)
-#define NLOHMANN_JSON_PASTE56(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE55(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55)
-#define NLOHMANN_JSON_PASTE57(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE56(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56)
-#define NLOHMANN_JSON_PASTE58(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE57(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57)
-#define NLOHMANN_JSON_PASTE59(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE58(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58)
-#define NLOHMANN_JSON_PASTE60(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE59(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59)
-#define NLOHMANN_JSON_PASTE61(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE60(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60)
-#define NLOHMANN_JSON_PASTE62(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE61(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61)
-#define NLOHMANN_JSON_PASTE63(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE62(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62)
-#define NLOHMANN_JSON_PASTE64(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE63(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63)
-
-#define NLOHMANN_JSON_TO(v1) nlohmann_json_j[#v1] = nlohmann_json_t.v1;
-#define NLOHMANN_JSON_FROM(v1) nlohmann_json_j.at(#v1).get_to(nlohmann_json_t.v1);
-#define NLOHMANN_JSON_FROM_WITH_DEFAULT(v1) nlohmann_json_t.v1 = nlohmann_json_j.value(#v1, nlohmann_json_default_obj.v1);
-
-/*!
-@brief macro
-@def NLOHMANN_DEFINE_TYPE_INTRUSIVE
-@since version 3.9.0
-*/
-#define NLOHMANN_DEFINE_TYPE_INTRUSIVE(Type, ...)  \
-    friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \
-    friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) }
-
-#define NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Type, ...)  \
-    friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \
-    friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { Type nlohmann_json_default_obj; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) }
-
-/*!
-@brief macro
-@def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE
-@since version 3.9.0
-*/
-#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Type, ...)  \
-    inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \
-    inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) }
-
-#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, ...)  \
-    inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \
-    inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { Type nlohmann_json_default_obj; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) }
-
-
-// inspired from https://stackoverflow.com/a/26745591
-// allows to call any std function as if (e.g. with begin):
-// using std::begin; begin(x);
-//
-// it allows using the detected idiom to retrieve the return type
-// of such an expression
-#define NLOHMANN_CAN_CALL_STD_FUNC_IMPL(std_name)                                 \
-    namespace detail {                                                            \
-    using std::std_name;                                                          \
-    \
-    template                                                       \
-    using result_of_##std_name = decltype(std_name(std::declval()...));        \
-    }                                                                             \
-    \
-    namespace detail2 {                                                           \
-    struct std_name##_tag                                                         \
-    {                                                                             \
-    };                                                                            \
-    \
-    template                                                       \
-    std_name##_tag std_name(T&&...);                                              \
-    \
-    template                                                       \
-    using result_of_##std_name = decltype(std_name(std::declval()...));        \
-    \
-    template                                                       \
-    struct would_call_std_##std_name                                              \
-    {                                                                             \
-        static constexpr auto const value = ::nlohmann::detail::                  \
-                                            is_detected_exact::value; \
-    };                                                                            \
-    } /* namespace detail2 */ \
-    \
-    template                                                       \
-    struct would_call_std_##std_name : detail2::would_call_std_##std_name   \
-    {                                                                             \
-    }
-
-#ifndef JSON_USE_IMPLICIT_CONVERSIONS
-    #define JSON_USE_IMPLICIT_CONVERSIONS 1
-#endif
-
-#if JSON_USE_IMPLICIT_CONVERSIONS
-    #define JSON_EXPLICIT
-#else
-    #define JSON_EXPLICIT explicit
-#endif
-
-#ifndef JSON_DISABLE_ENUM_SERIALIZATION
-    #define JSON_DISABLE_ENUM_SERIALIZATION 0
-#endif
-
-#ifndef JSON_USE_GLOBAL_UDLS
-    #define JSON_USE_GLOBAL_UDLS 1
-#endif
-
-#if JSON_HAS_THREE_WAY_COMPARISON
-    #include  // partial_ordering
-#endif
-
-NLOHMANN_JSON_NAMESPACE_BEGIN
-namespace detail
-{
-
-///////////////////////////
-// JSON type enumeration //
-///////////////////////////
-
-/*!
-@brief the JSON type enumeration
-
-This enumeration collects the different JSON types. It is internally used to
-distinguish the stored values, and the functions @ref basic_json::is_null(),
-@ref basic_json::is_object(), @ref basic_json::is_array(),
-@ref basic_json::is_string(), @ref basic_json::is_boolean(),
-@ref basic_json::is_number() (with @ref basic_json::is_number_integer(),
-@ref basic_json::is_number_unsigned(), and @ref basic_json::is_number_float()),
-@ref basic_json::is_discarded(), @ref basic_json::is_primitive(), and
-@ref basic_json::is_structured() rely on it.
-
-@note There are three enumeration entries (number_integer, number_unsigned, and
-number_float), because the library distinguishes these three types for numbers:
-@ref basic_json::number_unsigned_t is used for unsigned integers,
-@ref basic_json::number_integer_t is used for signed integers, and
-@ref basic_json::number_float_t is used for floating-point numbers or to
-approximate integers which do not fit in the limits of their respective type.
-
-@sa see @ref basic_json::basic_json(const value_t value_type) -- create a JSON
-value with the default value for a given type
-
-@since version 1.0.0
-*/
-enum class value_t : std::uint8_t
-{
-    null,             ///< null value
-    object,           ///< object (unordered set of name/value pairs)
-    array,            ///< array (ordered collection of values)
-    string,           ///< string value
-    boolean,          ///< boolean value
-    number_integer,   ///< number value (signed integer)
-    number_unsigned,  ///< number value (unsigned integer)
-    number_float,     ///< number value (floating-point)
-    binary,           ///< binary array (ordered collection of bytes)
-    discarded         ///< discarded by the parser callback function
-};
-
-/*!
-@brief comparison operator for JSON types
-
-Returns an ordering that is similar to Python:
-- order: null < boolean < number < object < array < string < binary
-- furthermore, each type is not smaller than itself
-- discarded values are not comparable
-- binary is represented as a b"" string in python and directly comparable to a
-  string; however, making a binary array directly comparable with a string would
-  be surprising behavior in a JSON file.
-
-@since version 1.0.0
-*/
-#if JSON_HAS_THREE_WAY_COMPARISON
-    inline std::partial_ordering operator<=>(const value_t lhs, const value_t rhs) noexcept // *NOPAD*
-#else
-    inline bool operator<(const value_t lhs, const value_t rhs) noexcept
-#endif
-{
-    static constexpr std::array order = {{
-            0 /* null */, 3 /* object */, 4 /* array */, 5 /* string */,
-            1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */,
-            6 /* binary */
-        }
-    };
-
-    const auto l_index = static_cast(lhs);
-    const auto r_index = static_cast(rhs);
-#if JSON_HAS_THREE_WAY_COMPARISON
-    if (l_index < order.size() && r_index < order.size())
-    {
-        return order[l_index] <=> order[r_index]; // *NOPAD*
-    }
-    return std::partial_ordering::unordered;
-#else
-    return l_index < order.size() && r_index < order.size() && order[l_index] < order[r_index];
-#endif
-}
-
-// GCC selects the built-in operator< over an operator rewritten from
-// a user-defined spaceship operator
-// Clang, MSVC, and ICC select the rewritten candidate
-// (see GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105200)
-#if JSON_HAS_THREE_WAY_COMPARISON && defined(__GNUC__)
-inline bool operator<(const value_t lhs, const value_t rhs) noexcept
-{
-    return std::is_lt(lhs <=> rhs); // *NOPAD*
-}
-#endif
-
-}  // namespace detail
-NLOHMANN_JSON_NAMESPACE_END
-
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-
-
-// #include 
-
-
-NLOHMANN_JSON_NAMESPACE_BEGIN
-namespace detail
-{
-
-/*!
-@brief replace all occurrences of a substring by another string
-
-@param[in,out] s  the string to manipulate; changed so that all
-               occurrences of @a f are replaced with @a t
-@param[in]     f  the substring to replace with @a t
-@param[in]     t  the string to replace @a f
-
-@pre The search string @a f must not be empty. **This precondition is
-enforced with an assertion.**
-
-@since version 2.0.0
-*/
-template
-inline void replace_substring(StringType& s, const StringType& f,
-                              const StringType& t)
-{
-    JSON_ASSERT(!f.empty());
-    for (auto pos = s.find(f);                // find first occurrence of f
-            pos != StringType::npos;          // make sure f was found
-            s.replace(pos, f.size(), t),      // replace with t, and
-            pos = s.find(f, pos + t.size()))  // find next occurrence of f
-    {}
-}
-
-/*!
- * @brief string escaping as described in RFC 6901 (Sect. 4)
- * @param[in] s string to escape
- * @return    escaped string
- *
- * Note the order of escaping "~" to "~0" and "/" to "~1" is important.
- */
-template
-inline StringType escape(StringType s)
-{
-    replace_substring(s, StringType{"~"}, StringType{"~0"});
-    replace_substring(s, StringType{"/"}, StringType{"~1"});
-    return s;
-}
-
-/*!
- * @brief string unescaping as described in RFC 6901 (Sect. 4)
- * @param[in] s string to unescape
- * @return    unescaped string
- *
- * Note the order of escaping "~1" to "/" and "~0" to "~" is important.
- */
-template
-static void unescape(StringType& s)
-{
-    replace_substring(s, StringType{"~1"}, StringType{"/"});
-    replace_substring(s, StringType{"~0"}, StringType{"~"});
-}
-
-}  // namespace detail
-NLOHMANN_JSON_NAMESPACE_END
-
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-
-
-#include  // size_t
-
-// #include 
-
-
-NLOHMANN_JSON_NAMESPACE_BEGIN
-namespace detail
-{
-
-/// struct to capture the start position of the current token
-struct position_t
-{
-    /// the total number of characters read
-    std::size_t chars_read_total = 0;
-    /// the number of characters read in the current line
-    std::size_t chars_read_current_line = 0;
-    /// the number of lines read
-    std::size_t lines_read = 0;
-
-    /// conversion to size_t to preserve SAX interface
-    constexpr operator size_t() const
-    {
-        return chars_read_total;
-    }
-};
-
-}  // namespace detail
-NLOHMANN_JSON_NAMESPACE_END
-
-// #include 
-
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-FileCopyrightText: 2018 The Abseil Authors
-// SPDX-License-Identifier: MIT
-
-
-
-#include  // array
-#include  // size_t
-#include  // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type
-#include  // index_sequence, make_index_sequence, index_sequence_for
-
-// #include 
-
-
-NLOHMANN_JSON_NAMESPACE_BEGIN
-namespace detail
-{
-
-template
-using uncvref_t = typename std::remove_cv::type>::type;
-
-#ifdef JSON_HAS_CPP_14
-
-// the following utilities are natively available in C++14
-using std::enable_if_t;
-using std::index_sequence;
-using std::make_index_sequence;
-using std::index_sequence_for;
-
-#else
-
-// alias templates to reduce boilerplate
-template
-using enable_if_t = typename std::enable_if::type;
-
-// The following code is taken from https://github.com/abseil/abseil-cpp/blob/10cb35e459f5ecca5b2ff107635da0bfa41011b4/absl/utility/utility.h
-// which is part of Google Abseil (https://github.com/abseil/abseil-cpp), licensed under the Apache License 2.0.
-
-//// START OF CODE FROM GOOGLE ABSEIL
-
-// integer_sequence
-//
-// Class template representing a compile-time integer sequence. An instantiation
-// of `integer_sequence` has a sequence of integers encoded in its
-// type through its template arguments (which is a common need when
-// working with C++11 variadic templates). `absl::integer_sequence` is designed
-// to be a drop-in replacement for C++14's `std::integer_sequence`.
-//
-// Example:
-//
-//   template< class T, T... Ints >
-//   void user_function(integer_sequence);
-//
-//   int main()
-//   {
-//     // user_function's `T` will be deduced to `int` and `Ints...`
-//     // will be deduced to `0, 1, 2, 3, 4`.
-//     user_function(make_integer_sequence());
-//   }
-template 
-struct integer_sequence
-{
-    using value_type = T;
-    static constexpr std::size_t size() noexcept
-    {
-        return sizeof...(Ints);
-    }
-};
-
-// index_sequence
-//
-// A helper template for an `integer_sequence` of `size_t`,
-// `absl::index_sequence` is designed to be a drop-in replacement for C++14's
-// `std::index_sequence`.
-template 
-using index_sequence = integer_sequence;
-
-namespace utility_internal
-{
-
-template 
-struct Extend;
-
-// Note that SeqSize == sizeof...(Ints). It's passed explicitly for efficiency.
-template 
-struct Extend, SeqSize, 0>
-{
-    using type = integer_sequence < T, Ints..., (Ints + SeqSize)... >;
-};
-
-template 
-struct Extend, SeqSize, 1>
-{
-    using type = integer_sequence < T, Ints..., (Ints + SeqSize)..., 2 * SeqSize >;
-};
-
-// Recursion helper for 'make_integer_sequence'.
-// 'Gen::type' is an alias for 'integer_sequence'.
-template 
-struct Gen
-{
-    using type =
-        typename Extend < typename Gen < T, N / 2 >::type, N / 2, N % 2 >::type;
-};
-
-template 
-struct Gen
-{
-    using type = integer_sequence;
-};
-
-}  // namespace utility_internal
-
-// Compile-time sequences of integers
-
-// make_integer_sequence
-//
-// This template alias is equivalent to
-// `integer_sequence`, and is designed to be a drop-in
-// replacement for C++14's `std::make_integer_sequence`.
-template 
-using make_integer_sequence = typename utility_internal::Gen::type;
-
-// make_index_sequence
-//
-// This template alias is equivalent to `index_sequence<0, 1, ..., N-1>`,
-// and is designed to be a drop-in replacement for C++14's
-// `std::make_index_sequence`.
-template 
-using make_index_sequence = make_integer_sequence;
-
-// index_sequence_for
-//
-// Converts a typename pack into an index sequence of the same length, and
-// is designed to be a drop-in replacement for C++14's
-// `std::index_sequence_for()`
-template 
-using index_sequence_for = make_index_sequence;
-
-//// END OF CODE FROM GOOGLE ABSEIL
-
-#endif
-
-// dispatch utility (taken from ranges-v3)
-template struct priority_tag : priority_tag < N - 1 > {};
-template<> struct priority_tag<0> {};
-
-// taken from ranges-v3
-template
-struct static_const
-{
-    static JSON_INLINE_VARIABLE constexpr T value{};
-};
-
-#ifndef JSON_HAS_CPP_17
-    template
-    constexpr T static_const::value;
-#endif
-
-template
-inline constexpr std::array make_array(Args&& ... args)
-{
-    return std::array {{static_cast(std::forward(args))...}};
-}
-
-}  // namespace detail
-NLOHMANN_JSON_NAMESPACE_END
-
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-
-
-#include  // numeric_limits
-#include  // false_type, is_constructible, is_integral, is_same, true_type
-#include  // declval
-#include  // tuple
-
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-
-
-#include  // random_access_iterator_tag
-
-// #include 
-
-// #include 
-
-// #include 
-
-
-NLOHMANN_JSON_NAMESPACE_BEGIN
-namespace detail
-{
-
-template
-struct iterator_types {};
-
-template
-struct iterator_types <
-    It,
-    void_t>
-{
-    using difference_type = typename It::difference_type;
-    using value_type = typename It::value_type;
-    using pointer = typename It::pointer;
-    using reference = typename It::reference;
-    using iterator_category = typename It::iterator_category;
-};
-
-// This is required as some compilers implement std::iterator_traits in a way that
-// doesn't work with SFINAE. See https://github.com/nlohmann/json/issues/1341.
-template
-struct iterator_traits
-{
-};
-
-template
-struct iterator_traits < T, enable_if_t < !std::is_pointer::value >>
-            : iterator_types
-{
-};
-
-template
-struct iterator_traits::value>>
-{
-    using iterator_category = std::random_access_iterator_tag;
-    using value_type = T;
-    using difference_type = ptrdiff_t;
-    using pointer = T*;
-    using reference = T&;
-};
-
-}  // namespace detail
-NLOHMANN_JSON_NAMESPACE_END
-
-// #include 
-
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-
-
-// #include 
-
-
-NLOHMANN_JSON_NAMESPACE_BEGIN
-
-NLOHMANN_CAN_CALL_STD_FUNC_IMPL(begin);
-
-NLOHMANN_JSON_NAMESPACE_END
-
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-
-
-// #include 
-
-
-NLOHMANN_JSON_NAMESPACE_BEGIN
-
-NLOHMANN_CAN_CALL_STD_FUNC_IMPL(end);
-
-NLOHMANN_JSON_NAMESPACE_END
-
-// #include 
-
-// #include 
-
-// #include 
-//     __ _____ _____ _____
-//  __|  |   __|     |   | |  JSON for Modern C++
-// |  |  |__   |  |  | | | |  version 3.11.2
-// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
-//
-// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann 
-// SPDX-License-Identifier: MIT
-
-#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_
-    #define INCLUDE_NLOHMANN_JSON_FWD_HPP_
-
-    #include  // int64_t, uint64_t
-    #include  // map
-    #include  // allocator
-    #include  // string
-    #include  // vector
-
-    // #include 
-
-
-    /*!
-    @brief namespace for Niels Lohmann
-    @see https://github.com/nlohmann
-    @since version 1.0.0
-    */
-    NLOHMANN_JSON_NAMESPACE_BEGIN
-
-    /*!
-    @brief default JSONSerializer template argument
-
-    This serializer ignores the template arguments and uses ADL
-    ([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl))
-    for serialization.
-    */
-    template
-    struct adl_serializer;
-
-    /// a class to store JSON values
-    /// @sa https://json.nlohmann.me/api/basic_json/
-    template class ObjectType =
-    std::map,
-    template class ArrayType = std::vector,
-    class StringType = std::string, class BooleanType = bool,
-    class NumberIntegerType = std::int64_t,
-    class NumberUnsignedType = std::uint64_t,
-    class NumberFloatType = double,
-    template class AllocatorType = std::allocator,
-    template class JSONSerializer =
-    adl_serializer,
-    class BinaryType = std::vector>
-    class basic_json;
-
-    /// @brief JSON Pointer defines a string syntax for identifying a specific value within a JSON document
-    /// @sa https://json.nlohmann.me/api/json_pointer/
-    template
-    class json_pointer;
-
-    /*!
-    @brief default specialization
-    @sa https://json.nlohmann.me/api/json/
-    */
-    using json = basic_json<>;
-
-    /// @brief a minimal map-like container that preserves insertion order
-    /// @sa https://json.nlohmann.me/api/ordered_map/
-    template
-    struct ordered_map;
-
-    /// @brief specialization that maintains the insertion order of object keys
-    /// @sa https://json.nlohmann.me/api/ordered_json/
-    using ordered_json = basic_json;
-
-    NLOHMANN_JSON_NAMESPACE_END
-
-#endif  // INCLUDE_NLOHMANN_JSON_FWD_HPP_
-
-
-NLOHMANN_JSON_NAMESPACE_BEGIN
-/*!
-@brief detail namespace with internal helper functions
-
-This namespace collects functions that should not be exposed,
-implementations of some @ref basic_json methods, and meta-programming helpers.
-
-@since version 2.1.0
-*/
-namespace detail
-{
-
-/////////////
-// helpers //
-/////////////
-
-// Note to maintainers:
-//
-// Every trait in this file expects a non CV-qualified type.
-// The only exceptions are in the 'aliases for detected' section
-// (i.e. those of the form: decltype(T::member_function(std::declval())))
-//
-// In this case, T has to be properly CV-qualified to constraint the function arguments
-// (e.g. to_json(BasicJsonType&, const T&))
-
-template struct is_basic_json : std::false_type {};
-
-NLOHMANN_BASIC_JSON_TPL_DECLARATION
-struct is_basic_json : std::true_type {};
-
-// used by exceptions create() member functions
-// true_type for pointer to possibly cv-qualified basic_json or std::nullptr_t
-// false_type otherwise
-template
-struct is_basic_json_context :
-    std::integral_constant < bool,
-    is_basic_json::type>::type>::value
-    || std::is_same::value >
-{};
-
-//////////////////////
-// json_ref helpers //
-//////////////////////
-
-template
-class json_ref;
-
-template
-struct is_json_ref : std::false_type {};
-
-template
-struct is_json_ref> : std::true_type {};
-
-//////////////////////////
-// aliases for detected //
-//////////////////////////
-
-template
-using mapped_type_t = typename T::mapped_type;
-
-template
-using key_type_t = typename T::key_type;
-
-template
-using value_type_t = typename T::value_type;
-
-template
-using difference_type_t = typename T::difference_type;
-
-template
-using pointer_t = typename T::pointer;
-
-template
-using reference_t = typename T::reference;
-
-template
-using iterator_category_t = typename T::iterator_category;
-
-template
-using to_json_function = decltype(T::to_json(std::declval()...));
-
-template
-using from_json_function = decltype(T::from_json(std::declval()...));
-
-template
-using get_template_function = decltype(std::declval().template get());
-
-// trait checking if JSONSerializer::from_json(json const&, udt&) exists
-template
-struct has_from_json : std::false_type {};
-
-// trait checking if j.get is valid
-// use this trait instead of std::is_constructible or std::is_convertible,
-// both rely on, or make use of implicit conversions, and thus fail when T
-// has several constructors/operator= (see https://github.com/nlohmann/json/issues/958)
-template 
-struct is_getable
-{
-    static constexpr bool value = is_detected::value;
-};
-
-template
-struct has_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >>
-{
-    using serializer = typename BasicJsonType::template json_serializer;
-
-    static constexpr bool value =
-        is_detected_exact::value;
-};
-
-// This trait checks if JSONSerializer::from_json(json const&) exists
-// this overload is used for non-default-constructible user-defined-types
-template
-struct has_non_default_from_json : std::false_type {};
-
-template
-struct has_non_default_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >>
-{
-    using serializer = typename BasicJsonType::template json_serializer;
-
-    static constexpr bool value =
-        is_detected_exact::value;
-};
-
-// This trait checks if BasicJsonType::json_serializer::to_json exists
-// Do not evaluate the trait when T is a basic_json type, to avoid template instantiation infinite recursion.
-template
-struct has_to_json : std::false_type {};
-
-template
-struct has_to_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >>
-{
-    using serializer = typename BasicJsonType::template json_serializer;
-
-    static constexpr bool value =
-        is_detected_exact::value;
-};
-
-template
-using detect_key_compare = typename T::key_compare;
-
-template
-struct has_key_compare : std::integral_constant::value> {};
-
-// obtains the actual object key comparator
-template
-struct actual_object_comparator
-{
-    using object_t = typename BasicJsonType::object_t;
-    using object_comparator_t = typename BasicJsonType::default_object_comparator_t;
-    using type = typename std::conditional < has_key_compare::value,
-          typename object_t::key_compare, object_comparator_t>::type;
-};
-
-template
-using actual_object_comparator_t = typename actual_object_comparator::type;
-
-///////////////////
-// is_ functions //
-///////////////////
-
-// https://en.cppreference.com/w/cpp/types/conjunction
-template struct conjunction : std::true_type { };
-template struct conjunction : B { };
-template
-struct conjunction
-: std::conditional(B::value), conjunction, B>::type {};
-
-// https://en.cppreference.com/w/cpp/types/negation
-template struct negation : std::integral_constant < bool, !B::value > { };
-
-// Reimplementation of is_constructible and is_default_constructible, due to them being broken for
-// std::pair and std::tuple until LWG 2367 fix (see https://cplusplus.github.io/LWG/lwg-defects.html#2367).
-// This causes compile errors in e.g. clang 3.5 or gcc 4.9.
-template 
-struct is_default_constructible : std::is_default_constructible {};
-
-template 
-struct is_default_constructible>
-            : conjunction, is_default_constructible> {};
-
-template 
-struct is_default_constructible>
-            : conjunction, is_default_constructible> {};
-
-template 
-struct is_default_constructible>
-            : conjunction...> {};
-
-template 
-struct is_default_constructible>
-            : conjunction...> {};
-
-
-template 
-struct is_constructible : std::is_constructible {};
-
-template 
-struct is_constructible> : is_default_constructible> {};
-
-template 
-struct is_constructible> : is_default_constructible> {};
-
-template 
-struct is_constructible> : is_default_constructible> {};
-
-template 
-struct is_constructible> : is_default_constructible> {};
-
-
-template
-struct is_iterator_traits : std::false_type {};
-
-template
-struct is_iterator_traits>
-{
-  private:
-    using traits = iterator_traits;
-
-  public:
-    static constexpr auto value =
-        is_detected::value &&
-        is_detected::value &&
-        is_detected::value &&
-        is_detected::value &&
-        is_detected::value;
-};
-
-template
-struct is_range
-{
-  private:
-    using t_ref = typename std::add_lvalue_reference::type;
-
-    using iterator = detected_t;
-    using sentinel = detected_t;
-
-    // to be 100% correct, it should use https://en.cppreference.com/w/cpp/iterator/input_or_output_iterator
-    // and https://en.cppreference.com/w/cpp/iterator/sentinel_for
-    // but reimplementing these would be too much work, as a lot of other concepts are used underneath
-    static constexpr auto is_iterator_begin =
-        is_iterator_traits>::value;
-
-  public:
-    static constexpr bool value = !std::is_same::value && !std::is_same::value && is_iterator_begin;
-};
-
-template
-using iterator_t = enable_if_t::value, result_of_begin())>>;
-
-template
-using range_value_t = value_type_t>>;
-
-// The following implementation of is_complete_type is taken from
-// https://blogs.msdn.microsoft.com/vcblog/2015/12/02/partial-support-for-expression-sfinae-in-vs-2015-update-1/
-// and is written by Xiang Fan who agreed to using it in this library.
-
-template
-struct is_complete_type : std::false_type {};
-
-template
-struct is_complete_type : std::true_type {};
-
-template
-struct is_compatible_object_type_impl : std::false_type {};
-
-template
-struct is_compatible_object_type_impl <
-    BasicJsonType, CompatibleObjectType,
-    enable_if_t < is_detected::value&&
-    is_detected::value >>
-{
-    using object_t = typename BasicJsonType::object_t;
-
-    // macOS's is_constructible does not play well with nonesuch...
-    static constexpr bool value =
-        is_constructible::value &&
-        is_constructible::value;
-};
-
-template
-struct is_compatible_object_type
-    : is_compatible_object_type_impl {};
-
-template
-struct is_constructible_object_type_impl : std::false_type {};
-
-template
-struct is_constructible_object_type_impl <
-    BasicJsonType, ConstructibleObjectType,
-    enable_if_t < is_detected::value&&
-    is_detected::value >>
-{
-    using object_t = typename BasicJsonType::object_t;
-
-    static constexpr bool value =
-        (is_default_constructible::value &&
-         (std::is_move_assignable::value ||
-          std::is_copy_assignable::value) &&
-         (is_constructible::value &&
-          std::is_same <
-          typename object_t::mapped_type,
-          typename ConstructibleObjectType::mapped_type >::value)) ||
-        (has_from_json::value ||
-         has_non_default_from_json <
-         BasicJsonType,
-         typename ConstructibleObjectType::mapped_type >::value);
-};
-
-template
-struct is_constructible_object_type
-    : is_constructible_object_type_impl {};
-
-template
-struct is_compatible_string_type
-{
-    static constexpr auto value =
-        is_constructible::value;
-};
-
-template
-struct is_constructible_string_type
-{
-    // launder type through decltype() to fix compilation failure on ICPC
-#ifdef __INTEL_COMPILER
-    using laundered_type = decltype(std::declval());
-#else
-    using laundered_type = ConstructibleStringType;
-#endif
-
-    static constexpr auto value =
-        conjunction <
-        is_constructible,
-        is_detected_exact>::value;
-};
-
-template
-struct is_compatible_array_type_impl : std::false_type {};
-
-template
-struct is_compatible_array_type_impl <
-    BasicJsonType, CompatibleArrayType,
-    enable_if_t <
-    is_detected::value&&
-    is_iterator_traits>>::value&&
-// special case for types like std::filesystem::path whose iterator's value_type are themselves
-// c.f. https://github.com/nlohmann/json/pull/3073
-    !std::is_same>::value >>
-{
-    static constexpr bool value =
-        is_constructible>::value;
-};
-
-template
-struct is_compatible_array_type
-    : is_compatible_array_type_impl {};
-
-template
-struct is_constructible_array_type_impl : std::false_type {};
-
-template
-struct is_constructible_array_type_impl <
-    BasicJsonType, ConstructibleArrayType,
-    enable_if_t::value >>
-            : std::true_type {};
-
-template
-struct is_constructible_array_type_impl <
-    BasicJsonType, ConstructibleArrayType,
-    enable_if_t < !std::is_same::value&&
-    !is_compatible_string_type::value&&
-    is_default_constructible::value&&
-(std::is_move_assignable::value ||
- std::is_copy_assignable::value)&&
-is_detected::value&&
-is_iterator_traits>>::value&&
-is_detected::value&&
-// special case for types like std::filesystem::path whose iterator's value_type are themselves
-// c.f. https://github.com/nlohmann/json/pull/3073
-!std::is_same>::value&&
-        is_complete_type <
-        detected_t>::value >>
-{
-    using value_type = range_value_t;
-
-    static constexpr bool value =
-        std::is_same::value ||
-        has_from_json::value ||
-        has_non_default_from_json <
-        BasicJsonType,
-        value_type >::value;
-};
-
-template
-struct is_constructible_array_type
-    : is_constructible_array_type_impl {};
-
-template
-struct is_compatible_integer_type_impl : std::false_type {};
-
-template
-struct is_compatible_integer_type_impl <
-    RealIntegerType, CompatibleNumberIntegerType,
-    enable_if_t < std::is_integral::value&&
-    std::is_integral::value&&
-    !std::is_same::value >>
-{
-    // is there an assert somewhere on overflows?
-    using RealLimits = std::numeric_limits;
-    using CompatibleLimits = std::numeric_limits;
-
-    static constexpr auto value =
-        is_constructible::value &&
-        CompatibleLimits::is_integer &&
-        RealLimits::is_signed == CompatibleLimits::is_signed;
-};
-
-template
-struct is_compatible_integer_type
-    : is_compatible_integer_type_impl {};
-
-template
-struct is_compatible_type_impl: std::false_type {};
-
-template
-struct is_compatible_type_impl <
-    BasicJsonType, CompatibleType,
-    enable_if_t::value >>
-{
-    static constexpr bool value =
-        has_to_json::value;
-};
-
-template
-struct is_compatible_type
-    : is_compatible_type_impl {};
-
-template
-struct is_constructible_tuple : std::false_type {};
-
-template
-struct is_constructible_tuple> : conjunction...> {};
-
-template
-struct is_json_iterator_of : std::false_type {};
-
-template
-struct is_json_iterator_of : std::true_type {};
-
-template
-struct is_json_iterator_of : std::true_type
-{};
-
-// checks if a given type T is a template specialization of Primary
-template