readme: add ChibiChat to community integrations (#8883 )

build(rocm): add numa, elf (#8900 )
readme: add Ollama Chat WebUI for Docker to community integrations (#8084 )
2025-02-06 16:08:46 -08:00 · 2025-02-06 15:46:30 -08:00 · 2025-02-06 15:41:02 -08:00 · 2025-02-06 15:08:12 -08:00 · 2025-02-06 14:54:58 -08:00 · 2025-02-06 13:12:16 -08:00
26 changed files with 399 additions and 299 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -15,6 +15,10 @@ ml/backend/**/*.cu linguist-vendored
 ml/backend/**/*.cuh linguist-vendored
 ml/backend/**/*.m linguist-vendored
 ml/backend/**/*.metal linguist-vendored
+ml/backend/**/CMakeLists.txt linguist-vendored
+
+llama/build-info.cpp linguist-generated
+ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s linguist-generated

 * text=auto
 *.go text eol=lf
--- a/.github/ISSUE_TEMPLATE/10_bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/10_bug_report.yml
@@ -9,6 +9,14 @@ body:
      description: What happened? What did you expect to happen?
    validations:
      required: true
+  - type: textarea
+    id: logs
+    attributes:
+      label: Relevant log output
+      description: Please copy and paste any relevant log output. See [Troubleshooting Guide](https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md#how-to-troubleshoot-issues) for details.
+      render: shell
+    validations:
+      required: false
  - type: dropdown
    id: os
    attributes:
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -81,7 +81,7 @@ jobs:
          path: dist/darwin-arm64
      - run: |
          export VERSION=${GITHUB_REF_NAME#v}
-          ./scripts/build_darwin.sh macapp sign
+          ./scripts/build_darwin.sh sign macapp
        env:
          APPLE_IDENTITY: ${{ secrets.APPLE_IDENTITY }}
          APPLE_PASSWORD: ${{ secrets.APPLE_PASSWORD }}
@@ -197,33 +197,38 @@ jobs:
    env:
      GOFLAGS: ${{ needs.setup-environment.outputs.GOFLAGS }}
    steps:
-      - name: Install system dependencies
+      - name: Install AMD64 system dependencies
+        if: matrix.arch == 'amd64'
        run: |
          $ErrorActionPreference = "Stop"
-          if ("${{ matrix.arch }}" -eq 'amd64') {
-            Start-Process "C:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
-            echo "C:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-            echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          } elseif ("${{ matrix.arch }}" -eq 'arm64') {
-            Set-ExecutionPolicy Bypass -Scope Process -Force
-            [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072
-            iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
-            echo "C:\ProgramData\chocolatey\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+          Start-Process "C:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
+          echo "C:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+          echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+      - name: Install ARM64 system dependencies
+        if: matrix.arch == 'arm64'
+        run: |
+          $ErrorActionPreference = "Stop"
+          Set-ExecutionPolicy Bypass -Scope Process -Force
+          [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072
+          iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
+          echo "C:\ProgramData\chocolatey\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append

-            choco install -y --no-progress git gzip
-            echo "C:\Program Files\Git\cmd" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+          choco install -y --no-progress git gzip
+          echo "C:\Program Files\Git\cmd" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append

-            Invoke-WebRequest -Uri "https://github.com/mstorsjo/llvm-mingw/releases/download/20240619/llvm-mingw-20240619-ucrt-aarch64.zip" -OutFile "${{ runner.temp }}\llvm-mingw-ucrt-aarch64.zip"
-            Expand-Archive -Path ${{ runner.temp }}\llvm-mingw-ucrt-aarch64.zip -DestinationPath "C:\Program Files\"
-            $installPath=(Resolve-Path -Path "C:\Program Files\llvm-mingw-*-ucrt-aarch64").path
-            echo $installPath\bin | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          }
+          Invoke-WebRequest -Uri "https://github.com/mstorsjo/llvm-mingw/releases/download/20240619/llvm-mingw-20240619-ucrt-aarch64.zip" -OutFile "${{ runner.temp }}\llvm-mingw-ucrt-aarch64.zip"
+          Expand-Archive -Path ${{ runner.temp }}\llvm-mingw-ucrt-aarch64.zip -DestinationPath "C:\Program Files\"
+          $installPath=(Resolve-Path -Path "C:\Program Files\llvm-mingw-*-ucrt-aarch64").path
+          echo $installPath\bin | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
          go-version-file: go.mod
      - run: |
          go build -o dist/${{ matrix.os }}-${{ matrix.arch }}/ .
+      - if: matrix.arch == 'arm64'
+        run: |
+          Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vc_redist.arm64.exe" -OutFile "dist\windows-arm64\vc_redist.arm64.exe"
      - run: |
          $env:VERSION='${{ github.ref_name }}' -Replace "v(.*)", '$1'
          & .\scripts\build_windows.ps1 buildApp
@@ -258,16 +263,18 @@ jobs:
          echo "${{ vars.OLLAMA_CERT }}" >ollama_inc.crt
      - uses: actions/download-artifact@v4
        with:
-          name: build-windows-*
+          pattern: build-windows-*
          path: dist\
          merge-multiple: true
      - uses: actions/download-artifact@v4
        with:
-          name: depends-windows-amd64-*
+          pattern: depends-windows-amd64-*
          path: dist\windows-amd64\
          merge-multiple: true
      - run: |
          & .\scripts\build_windows.ps1 gatherDependencies sign buildInstaller distZip
+        env:
+          KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
      - uses: actions/upload-artifact@v4
        with:
          name: dist-windows
@@ -281,10 +288,13 @@ jobs:
        include:
          - os: linux
            arch: amd64
-            targets: 'archive rocm'
+            target: archive
+          - os: linux
+            arch: amd64
+            target: rocm
          - os: linux
            arch: arm64
-            targets: archive
+            target: archive
    runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
    environment: release
    needs: setup-environment
@@ -293,44 +303,104 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - uses: docker/setup-buildx-action@v3
+      - uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: ${{ matrix.os }}/${{ matrix.arch }}
+          target: ${{ matrix.target }}
+          build-args: |
+            GOFLAGS=${{ env.GOFLAGS }}
+            CGO_CFLAGS=${{ env.CGO_CFLAGS }}
+            CGO_CXXFLAGS=${{ env.CGO_CXXFLAGS }}
+          outputs: type=local,dest=dist/${{ matrix.os }}-${{ matrix.arch }}
+          cache-from: type=registry,ref=ollama/ollama:latest
+          cache-to: type=inline
      - run: |
-          apt-get update && apt-get install pigz
-          for TARGET in ${{ matrix.targets }}; do docker buildx build --platform $PLATFORM --target $TARGET --build-arg GOFLAGS --build-arg CGO_CFLAGS --build-args CGO_CXXFLAGS --output type=local,dest=dist/$PLATFORM .; done
-          tar c -C dist/$PLATFORM . | pigz -9cv >dist/ollama-${PLATFORM//\//-}.tgz
-        env:
-          PLATFORM: ${{ matrix.os }}/${{ matrix.arch }}
+          for COMPONENT in bin/* lib/ollama/*; do
+            case "$COMPONENT" in
+              bin/ollama)               echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
+              lib/ollama/*.so)          echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
+              lib/ollama/cuda_v11)      echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
+              lib/ollama/cuda_v12)      echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
+              lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
+              lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
+              lib/ollama/rocm)          echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
+            esac
+          done
+        working-directory: dist/${{ matrix.os }}-${{ matrix.arch }}
+      - run: |
+          for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz); done
      - uses: actions/upload-artifact@v4
        with:
-          name: dist-${{ matrix.os }}-${{ matrix.arch }}
+          name: dist-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }}
          path: |
-            dist/ollama-${{ matrix.os }}-${{ matrix.arch }}.tgz
+            *.tgz

-  docker-build:
+  # Build each Docker variant (OS, arch, and flavor) separately. Using QEMU is unreliable and slower.
+  docker-build-push:
    strategy:
      matrix:
        include:
-          - flavor: 'latest=false'
-            platforms: linux/amd64,linux/arm64
+          - os: linux
+            arch: arm64
            build-args: |
              CGO_CFLAGS
              CGO_CXXFLAGS
              GOFLAGS
-          - flavor: 'latest=false,suffix=rocm'
-            platforms: linux/amd64
+          - os: linux
+            arch: amd64
+            build-args: |
+              CGO_CFLAGS
+              CGO_CXXFLAGS
+              GOFLAGS
+          - os: linux
+            arch: amd64
+            suffix: '-rocm'
            build-args: |
              CGO_CFLAGS
              CGO_CXXFLAGS
              GOFLAGS
              FLAVOR=rocm
-    env:
-      GOFLAGS: ${{ needs.setup-environment.outputs.GOFLAGS }}
-    runs-on: linux
+    runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
    environment: release
    needs: setup-environment
+    env:
+      GOFLAGS: ${{ needs.setup-environment.outputs.GOFLAGS }}
    steps:
      - uses: actions/checkout@v4
-      - uses: docker/setup-qemu-action@v2
-      - uses: docker/setup-buildx-action@v2
+      - uses: docker/setup-buildx-action@v3
+      - uses: docker/login-action@v3
+        with:
+          username: ${{ vars.DOCKER_USER }}
+          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
+      - id: build-push
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: ${{ matrix.os }}/${{ matrix.arch }}
+          build-args: ${{ matrix.build-args }}
+          outputs: type=image,name=ollama/ollama,push-by-digest=true,name-canonical=true,push=true
+          cache-from: type=registry,ref=ollama/ollama:latest
+          cache-to: type=inline
+      - run: |
+          mkdir -p ${{ matrix.os }}-${{ matrix.arch }}
+          echo "${{ steps.build-push.outputs.digest }}" >${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.suffix }}.txt
+        working-directory: ${{ runner.temp }}
+      - uses: actions/upload-artifact@v4
+        with:
+          name: digest-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.suffix }}
+          path: |
+            ${{ runner.temp }}/${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.suffix }}.txt
+
+  # Merge Docker images for the same flavor into a single multi-arch manifest
+  docker-merge-push:
+    strategy:
+      matrix:
+        suffix: ['', '-rocm']
+    runs-on: linux
+    environment: release
+    needs: [docker-build-push]
+    steps:
      - uses: docker/login-action@v3
        with:
          username: ${{ vars.DOCKER_USER }}
@@ -338,22 +408,23 @@ jobs:
      - id: metadata
        uses: docker/metadata-action@v4
        with:
-          flavor: ${{ matrix.flavor }}
+          flavor: |
+            latest=false
+            suffix=${{ matrix.suffix }}
          images: |
            ollama/ollama
          tags: |
+            type=ref,enable=true,priority=600,prefix=pr-,event=pr
            type=semver,pattern={{version}}
-      - uses: docker/build-push-action@v6
+      - uses: actions/download-artifact@v4
        with:
-          context: .
-          push: true
-          platforms: ${{ matrix.platforms }}
-          build-args: ${{ matrix.build-args }}
-          tags: ${{ steps.metadata.outputs.tags }}
-          labels: ${{ steps.metadata.outputs.labels }}
-          cache-from: type=registry,ref=ollama/ollama:latest
-          cache-to: type=inline
-          provenance: false
+          pattern: digest-*
+          path: ${{ runner.temp }}
+          merge-multiple: true
+      - run: |
+          docker buildx imagetools create $(echo '${{ steps.metadata.outputs.json }}' | jq -cr '.tags | map("-t", .) | join(" ")') $(cat *-${{ matrix.suffix }}.txt | xargs printf 'ollama/ollama@%s ')
+          docker buildx imagetools inspect ollama/ollama:${{ steps.metadata.outputs.version }}
+        working-directory: ${{ runner.temp }}

  # Aggregate all the assets and ship a release
  release:
@@ -366,33 +437,24 @@ jobs:
      GH_TOKEN: ${{ github.token }}
    steps:
      - uses: actions/checkout@v4
-      - name: Set Version
-        shell: bash
-        run: |
      - uses: actions/download-artifact@v4
        with:
+          name: dist-darwin
          path: dist
-          pattern: dist-darwin
      - uses: actions/download-artifact@v4
        with:
+          name: dist-windows
          path: dist
-          pattern: dist-windows
      - uses: actions/download-artifact@v4
        with:
-          path: dist
          pattern: dist-linux-*
-      - uses: actions/download-artifact@v4
-        with:
          path: dist
-          pattern: dist-windows
-      - run: |
-          ls -lh dist/
-          (cd dist; find . -type f | xargs sha256sum > ../sha256sum.txt)
-          mv sha256sum.txt dist/
-          cat dist/sha256sum.txt
+          merge-multiple: true
+      - run: find . -type f -not -name 'sha256sum.txt' | xargs sha256sum | tee sha256sum.txt
+        working-directory: dist
      - name: Create or update Release
        run: |
-          RELEASE_VERSION=$(echo ${GITHUB_REF_NAME} | cut -f1 -d-)"
+          RELEASE_VERSION="$(echo ${GITHUB_REF_NAME} | cut -f1 -d-)"

          echo "Looking for existing release for ${RELEASE_VERSION}"
          OLD_TAG=$(gh release ls --json name,tagName | jq -r ".[] | select(.name == \"${RELEASE_VERSION}\") | .tagName")
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -163,5 +163,5 @@ jobs:
      - uses: actions/checkout@v4
      - name: Verify patches apply cleanly and do not change files
        run: |
-          make -f Makefile.sync clean checkout sync
+          make -f Makefile.sync clean sync
          git diff --compact-summary --exit-code
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,6 +29,11 @@ if((NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
    set(GGML_CPU_ALL_VARIANTS ON)
 endif()

+if (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
+    set(CMAKE_BUILD_RPATH "@loader_path")
+    set(CMAKE_INSTALL_RPATH "@loader_path")
+endif()
+
 set(OLLAMA_BUILD_DIR ${CMAKE_BINARY_DIR}/lib/ollama)
 set(OLLAMA_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/lib/ollama)

@@ -91,11 +96,12 @@ if(CMAKE_HIP_COMPILER)

    if(AMDGPU_TARGETS)
        add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip)
+
        set(OLLAMA_HIP_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/rocm)
        install(TARGETS ggml-hip
            RUNTIME_DEPENDENCIES
                DIRECTORIES ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR}
-                PRE_INCLUDE_REGEXES amdhip64 hipblas rocblas amd_comgr hsa_runtime64 rocprofiler-register drm_amdgpu drm numa
+                PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register drm drm_amdgpu numa elf
                PRE_EXCLUDE_REGEXES ".*"
                POST_EXCLUDE_REGEXES "system32"
            RUNTIME DESTINATION ${OLLAMA_HIP_INSTALL_DIR} COMPONENT HIP
--- a/Makefile.sync
+++ b/Makefile.sync
@@ -15,7 +15,11 @@ help:
 	@echo "    make -f $(lastword $(MAKEFILE_LIST)) clean sync"

 .PHONY: sync
-sync: llama/llama.cpp ml/backend/ggml/ggml apply-patches
+sync: llama/build-info.cpp llama/llama.cpp ml/backend/ggml/ggml apply-patches
+
+.PHONY: llama/build-info.cpp
+llama/build-info.cpp: llama/build-info.cpp.in
+	sed -e 's|@FETCH_HEAD@|$(FETCH_HEAD)|' $< > $@

 .PHONY: llama/llama.cpp
 llama/llama.cpp: llama/vendor/ apply-patches
--- a/README.md
+++ b/README.md
@@ -369,7 +369,11 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Minima](https://github.com/dmayboroda/minima) (RAG with on-premises or fully local workflow)
 - [aidful-ollama-model-delete](https://github.com/AidfulAI/aidful-ollama-model-delete) (User interface for simplified model cleanup)
 - [Perplexica](https://github.com/ItzCrazyKns/Perplexica) (An AI-powered search engine & an open-source alternative to Perplexity AI)
+- [Ollama Chat WebUI for Docker ](https://github.com/oslook/ollama-webui) (Support for local docker deployment, lightweight ollama webui)
 - [AI Toolkit for Visual Studio Code](https://aka.ms/ai-tooklit/ollama-docs) (Microsoft-official VSCode extension to chat, test, evaluate models with Ollama support, and use them in your AI applications.)
+- [MinimalNextOllamaChat](https://github.com/anilkay/MinimalNextOllamaChat) (Minimal Web UI for Chat and Model Control)
+- [Chipper](https://github.com/TilmanGriesel/chipper) AI interface for tinkerers (Ollama, Haystack RAG, Python)
+- [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)

 ### Cloud

@@ -533,6 +537,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [TextCraft](https://github.com/suncloudsmoon/TextCraft) (Copilot in Word alternative using Ollama)
 - [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow)
 - [TextLLaMA](https://github.com/adarshM84/TextLLaMA) A Chrome Extension that helps you write emails, correct grammar, and translate into any language
+- [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai)

 ### Supported backends

@@ -543,3 +548,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics.
 - [HoneyHive](https://docs.honeyhive.ai/integrations/ollama) is an AI observability and evaluation platform for AI agents. Use HoneyHive to evaluate agent performance, interrogate failures, and monitor quality in production.
 - [Langfuse](https://langfuse.com/docs/integrations/ollama) is an open source LLM observability platform that enables teams to collaboratively monitor, evaluate and debug AI applications.
+- [MLflow Tracing](https://mlflow.org/docs/latest/llms/tracing/index.html#automatic-tracing) is an open source LLM observability tool with a convenient API to log and visualize traces, making it easy to debug and evaluate GenAI applications.
--- a/docs/api.md
+++ b/docs/api.md
@@ -306,7 +306,7 @@ curl http://localhost:11434/api/generate -d '{

 #### Response

-```
+```json
 {
  "model": "llava",
  "created_at": "2023-11-03T15:36:02.583064Z",
@@ -795,7 +795,7 @@ curl http://localhost:11434/api/chat -d '{

 ##### Request

-```
+```shell
 curl http://localhost:11434/api/chat -d '{
  "model": "llama3.2",
  "messages": [
@@ -870,7 +870,7 @@ If the messages array is empty, the model will be loaded into memory.

 ##### Request

-```
+```shell
 curl http://localhost:11434/api/chat -d '{
  "model": "llama3.2",
  "messages": []
@@ -897,7 +897,7 @@ If the messages array is empty and the `keep_alive` parameter is set to `0`, a m

 ##### Request

-```
+```shell
 curl http://localhost:11434/api/chat -d '{
  "model": "llama3.2",
  "messages": [],
--- a/docs/development.md
+++ b/docs/development.md
@@ -118,3 +118,14 @@ To run tests, use `go test`:
 ```
 go test ./...
 ```
+
+## Library detection
+
+Ollama looks for acceleration libraries in the following paths relative to the `ollama` executable:
+
+* `./lib/ollama` (Windows)
+* `../lib/ollama` (Linux)
+* `.` (macOS)
+* `build/lib/ollama` (for development)
+
+If the libraries are not found, Ollama will not run with any acceleration libraries.
--- a/docs/linux.md
+++ b/docs/linux.md
@@ -152,7 +152,7 @@ Use `OLLAMA_VERSION` environment variable with the install script to install a s
 For example:

 ```shell
-curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.3.9 sh
+curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.5.7 sh
 ```

 ## Viewing logs
@@ -186,3 +186,9 @@ sudo rm -r /usr/share/ollama
 sudo userdel ollama
 sudo groupdel ollama
 ```
+
+Remove installed libraries:
+
+```shell
+sudo rm -rf /usr/local/lib/ollama
+```
--- a/format/bytes.go
+++ b/format/bytes.go
@@ -40,8 +40,6 @@ func HumanBytes(b int64) string {
 	}

 	switch {
-	case value >= 100:
-		return fmt.Sprintf("%d %s", int(value), unit)
 	case value >= 10:
 		return fmt.Sprintf("%d %s", int(value), unit)
 	case value != math.Trunc(value):
--- a/format/bytes_test.go
+++ b/format/bytes_test.go
@@ -0,0 +1,91 @@
+package format
+
+import (
+	"testing"
+)
+
+func TestHumanBytes(t *testing.T) {
+	type testCase struct {
+		input    int64
+		expected string
+	}
+
+	tests := []testCase{
+		// Test bytes (B)
+		{0, "0 B"},
+		{1, "1 B"},
+		{999, "999 B"},
+
+		// Test kilobytes (KB)
+		{1000, "1 KB"},
+		{1500, "1.5 KB"},
+		{999999, "999 KB"},
+
+		// Test megabytes (MB)
+		{1000000, "1 MB"},
+		{1500000, "1.5 MB"},
+		{999999999, "999 MB"},
+
+		// Test gigabytes (GB)
+		{1000000000, "1 GB"},
+		{1500000000, "1.5 GB"},
+		{999999999999, "999 GB"},
+
+		// Test terabytes (TB)
+		{1000000000000, "1 TB"},
+		{1500000000000, "1.5 TB"},
+		{1999999999999, "2.0 TB"},
+
+		// Test fractional values
+		{1234, "1.2 KB"},
+		{1234567, "1.2 MB"},
+		{1234567890, "1.2 GB"},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.expected, func(t *testing.T) {
+			result := HumanBytes(tc.input)
+			if result != tc.expected {
+				t.Errorf("Expected %s, got %s", tc.expected, result)
+			}
+		})
+	}
+}
+
+func TestHumanBytes2(t *testing.T) {
+	type testCase struct {
+		input    uint64
+		expected string
+	}
+
+	tests := []testCase{
+		// Test bytes (B)
+		{0, "0 B"},
+		{1, "1 B"},
+		{1023, "1023 B"},
+
+		// Test kibibytes (KiB)
+		{1024, "1.0 KiB"},
+		{1536, "1.5 KiB"},
+		{1048575, "1024.0 KiB"},
+
+		// Test mebibytes (MiB)
+		{1048576, "1.0 MiB"},
+		{1572864, "1.5 MiB"},
+		{1073741823, "1024.0 MiB"},
+
+		// Test gibibytes (GiB)
+		{1073741824, "1.0 GiB"},
+		{1610612736, "1.5 GiB"},
+		{2147483648, "2.0 GiB"},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.expected, func(t *testing.T) {
+			result := HumanBytes2(tc.input)
+			if result != tc.expected {
+				t.Errorf("Expected %s, got %s", tc.expected, result)
+			}
+		})
+	}
+}
--- a/llama/build-info.cpp
+++ b/llama/build-info.cpp
@@ -1,4 +1,4 @@
 int LLAMA_BUILD_NUMBER = 0;
-char const *LLAMA_COMMIT = "ba1cb19cdd0d92e012e0f6e009e0620f854b6afd";
+char const *LLAMA_COMMIT = "46e3556e01b824e52395fb050b29804b6cff2a7c";
 char const *LLAMA_COMPILER = "";
 char const *LLAMA_BUILD_TARGET = "";
--- a/llama/build-info.cpp.in
+++ b/llama/build-info.cpp.in
@@ -0,0 +1,4 @@
+int LLAMA_BUILD_NUMBER = 0;
+char const *LLAMA_COMMIT = "@FETCH_HEAD@";
+char const *LLAMA_COMPILER = "";
+char const *LLAMA_BUILD_TARGET = "";
--- a/llama/llama.cpp/examples/llava/clip.cpp
+++ b/llama/llama.cpp/examples/llava/clip.cpp
@@ -1235,35 +1235,15 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
        }
    }

-#ifdef GGML_USE_CUDA
-   new_clip->backend = ggml_backend_cuda_init(0);
-   LOG_INF("%s: CLIP using CUDA backend\n", __func__);
-#endif
-
-#ifdef GGML_USE_METAL
-   new_clip->backend = ggml_backend_metal_init();
-   LOG_INF("%s: CLIP using Metal backend\n", __func__);
-#endif
-
-#ifdef GGML_USE_CANN
-   new_clip->backend = ggml_backend_cann_init(0);
-   LOG_INF("%s: CLIP using CANN backend\n", __func__);
-#endif
-
-#ifdef GGML_USE_VULKAN
-   new_clip->backend = ggml_backend_vk_init(0);
-   LOG_INF("%s: CLIP using Vulkan backend\n", __func__);
-#endif
-
-#ifdef GGML_USE_SYCL
-   new_clip->backend = ggml_backend_sycl_init(0);
-   LOG_INF("%s: CLIP using SYCL backend\n", __func__);
-#endif
-
-    if (!new_clip->backend) {
-        new_clip->backend = ggml_backend_cpu_init();
-        LOG_INF("%s: CLIP using CPU backend\n", __func__);
+    ggml_backend_t backend = ggml_backend_init_best();
+    if (backend == nullptr) {
+        LOG_ERR("%s: failed to initialize backend\n", __func__);
+        clip_free(new_clip);
+        gguf_free(ctx);
+        return nullptr;
    }
+    LOG_INF("%s: using %s backend\n", __func__, ggml_backend_name(backend));
+    new_clip->backend = backend;

    // model size and capabilities
    {
--- a/llama/llama.go
+++ b/llama/llama.go
@@ -199,21 +199,25 @@ func (c *Context) KvCacheDefrag() {

 // Get the embeddings for a sequence id
 func (c *Context) GetEmbeddingsSeq(seqId int) []float32 {
-	embeddings := unsafe.Pointer(C.llama_get_embeddings_seq(c.c, C.int(seqId)))
-	if embeddings == nil {
+	e := unsafe.Pointer(C.llama_get_embeddings_seq(c.c, C.int(seqId)))
+	if e == nil {
 		return nil
 	}

-	return unsafe.Slice((*float32)(embeddings), c.Model().NEmbd())
+	embeddings := make([]float32, c.Model().NEmbd())
+	_ = copy(embeddings, unsafe.Slice((*float32)(e), c.Model().NEmbd()))
+	return embeddings
 }

 func (c *Context) GetEmbeddingsIth(i int) []float32 {
-	embeddings := unsafe.Pointer(C.llama_get_embeddings_ith(c.c, C.int32_t(i)))
-	if embeddings == nil {
+	e := unsafe.Pointer(C.llama_get_embeddings_ith(c.c, C.int32_t(i)))
+	if e == nil {
 		return nil
 	}

-	return unsafe.Slice((*float32)(embeddings), c.Model().NEmbd())
+	embeddings := make([]float32, c.Model().NEmbd())
+	_ = copy(embeddings, unsafe.Slice((*float32)(e), c.Model().NEmbd()))
+	return embeddings
 }

 type ModelParams struct {
--- a/llama/mllama.cpp
+++ b/llama/mllama.cpp
@@ -558,30 +558,15 @@ struct mllama_ctx *mllama_model_load(const char *fname, const int verbosity = 1)

    mllama_ctx *new_mllama = new mllama_ctx{};

-#ifdef GGML_USE_CUDA
-    new_mllama->backend = ggml_backend_cuda_init(0);
-    LOG("vision using CUDA backend");
-#endif
-
-#ifdef GGML_USE_METAL
-    new_mllama->backend = ggml_backend_metal_init();
-    LOG("vision using Metal backend");
-#endif
-
-#ifdef GGML_USE_CANN
-    new_mllama->backend = ggml_backend_cann_init(0);
-    LOG("vision using CANN backend");
-#endif
-
-#ifdef GGML_USE_VULKAN
-    new_mllama->backend = ggml_backend_vk_init(0);
-    LOG("vision using Vulkan backend");
-#endif
-
-    if (!new_mllama->backend) {
-        new_mllama->backend = ggml_backend_cpu_init();
-        LOG("vision using CPU backend");
+    ggml_backend_t backend = ggml_backend_init_best();
+    if (backend == nullptr) {
+        LOG("%s: failed to initialize backend\n", __func__);
+        mllama_free(new_mllama);
+        gguf_free(ctx);
+        return nullptr;
    }
+    LOG("%s: using %s backend\n", __func__, ggml_backend_name(backend));
+    new_mllama->backend = backend;

    // load tensors
    {
--- a/llama/patches/0013-use-dynamic-backend-loading-for-clip.patch
+++ b/llama/patches/0013-use-dynamic-backend-loading-for-clip.patch
@@ -1,14 +1,14 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: jmorganca <jmorganca@gmail.com>
 Date: Sat, 4 Jan 2025 22:52:48 -0800
-Subject: [PATCH] re-enable gpu for clip
+Subject: [PATCH] use dynamic backend loading for clip

 ---
- examples/llava/clip.cpp | 86 ++++++++++++++++++++---------------------
- 1 file changed, 43 insertions(+), 43 deletions(-)
+ examples/llava/clip.cpp | 74 +++++++++++++++--------------------------
+ 1 file changed, 27 insertions(+), 47 deletions(-)

 diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
-index b3c1829f..718052e1 100644
+index b3c1829f..86b91d5c 100644
 --- a/examples/llava/clip.cpp
 +++ b/examples/llava/clip.cpp
@@ -8,25 +8,25 @@
@@ -56,7 +56,7 @@ index b3c1829f..718052e1 100644
 
 #define STB_IMAGE_IMPLEMENTATION
 #include "stb_image.h"
-@@ -1235,30 +1235,30 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
+@@ -1235,35 +1235,15 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
         }
     }
 
@@ -84,30 +84,19 @@ index b3c1829f..718052e1 100644
 -//    new_clip->backend = ggml_backend_sycl_init(0);
 -//    LOG_INF("%s: CLIP using SYCL backend\n", __func__);
 -//#endif
-+#ifdef GGML_USE_CUDA
-+   new_clip->backend = ggml_backend_cuda_init(0);
-+   LOG_INF("%s: CLIP using CUDA backend\n", __func__);
-+#endif
-+
-+#ifdef GGML_USE_METAL
-+   new_clip->backend = ggml_backend_metal_init();
-+   LOG_INF("%s: CLIP using Metal backend\n", __func__);
-+#endif
-+
-+#ifdef GGML_USE_CANN
-+   new_clip->backend = ggml_backend_cann_init(0);
-+   LOG_INF("%s: CLIP using CANN backend\n", __func__);
-+#endif
-+
-+#ifdef GGML_USE_VULKAN
-+   new_clip->backend = ggml_backend_vk_init(0);
-+   LOG_INF("%s: CLIP using Vulkan backend\n", __func__);
-+#endif
-+
-+#ifdef GGML_USE_SYCL
-+   new_clip->backend = ggml_backend_sycl_init(0);
-+   LOG_INF("%s: CLIP using SYCL backend\n", __func__);
-+#endif
+-
+-    if (!new_clip->backend) {
+-        new_clip->backend = ggml_backend_cpu_init();
+-        LOG_INF("%s: CLIP using CPU backend\n", __func__);
+    ggml_backend_t backend = ggml_backend_init_best();
+    if (backend == nullptr) {
+        LOG_ERR("%s: failed to initialize backend\n", __func__);
+        clip_free(new_clip);
+        gguf_free(ctx);
+        return nullptr;
+     }
+    LOG_INF("%s: using %s backend\n", __func__, ggml_backend_name(backend));
+    new_clip->backend = backend;
 
-     if (!new_clip->backend) {
-         new_clip->backend = ggml_backend_cpu_init();
+     // model size and capabilities
+     {
--- a/llm/server.go
+++ b/llm/server.go
@@ -89,7 +89,6 @@ func LoadModel(model string, maxArraySize int) (*GGML, error) {
 // NewLlamaServer will run a server for the given GPUs
 // The gpu list must be a single family.
 func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapters, projectors []string, opts api.Options, numParallel int) (LlamaServer, error) {
-	var err error
 	var systemTotalMemory uint64
 	var systemFreeMemory uint64
 	var systemSwapFreeMemory uint64
@@ -233,19 +232,9 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
 		params = append(params, "--multiuser-cache")
 	}

-	// get available libraries
-	if err != nil {
-		return nil, fmt.Errorf("could not get libollama dir: %w", err)
-	}
-
-	entries, err := os.ReadDir(discover.LibOllamaPath)
-	if err != nil {
-		return nil, fmt.Errorf("could not read libollama dir: %w", err)
-	}
-
 	libs := make(map[string]string)
-	for _, entry := range entries {
-		if entry.IsDir() {
+	if entries, err := os.ReadDir(discover.LibOllamaPath); err == nil {
+		for _, entry := range entries {
 			libs[entry.Name()] = filepath.Join(discover.LibOllamaPath, entry.Name())
 		}
 	}
@@ -285,16 +274,21 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
 			}
 		}
 		if port == 0 {
-			slog.Debug("ResolveTCPAddr failed ", "error", err)
+			slog.Debug("ResolveTCPAddr failed, using random port")
 			port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
 		}
 		finalParams := []string{"runner"}
 		finalParams = append(finalParams, params...)
 		finalParams = append(finalParams, "--port", strconv.Itoa(port))

-		pathEnv := "LD_LIBRARY_PATH"
-		if runtime.GOOS == "windows" {
+		var pathEnv string
+		switch runtime.GOOS {
+		case "windows":
 			pathEnv = "PATH"
+		case "darwin":
+			pathEnv = "DYLD_LIBRARY_PATH"
+		default:
+			pathEnv = "LD_LIBRARY_PATH"
 		}

 		var libraryPaths []string
@@ -396,7 +390,8 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
 					strings.HasPrefix(ev, "HSA_") ||
 					strings.HasPrefix(ev, "GGML_") ||
 					strings.HasPrefix(ev, "PATH=") ||
-					strings.HasPrefix(ev, "LD_LIBRARY_PATH=") {
+					strings.HasPrefix(ev, "LD_LIBRARY_PATH=") ||
+					strings.HasPrefix(ev, "DYLD_LIBRARY_PATH=") {
 					filteredEnv = append(filteredEnv, ev)
 				}
 			}
--- a/macapp/forge.config.ts
+++ b/macapp/forge.config.ts
@@ -19,7 +19,7 @@ const config: ForgeConfig = {
    icon: './assets/icon.icns',
    extraResource: [
      path.join(__dirname, '../dist/darwin/ollama'),
-      ...fs.readdirSync(path.join(__dirname, '../dist/darwin/amd64')).map(f => path.join(__dirname, '../dist/darwin/amd64', f)),
+      ...fs.readdirSync(path.join(__dirname, '../dist/darwin-amd64/lib/ollama')).map(f => path.join(__dirname, '../dist/darwin-amd64/lib/ollama', f)),
      path.join(__dirname, './assets/iconTemplate.png'),
      path.join(__dirname, './assets/iconTemplate@2x.png'),
      path.join(__dirname, './assets/iconUpdateTemplate.png'),
--- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/generate_cu_files.py
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/generate_cu_files.py
@@ -1,77 +0,0 @@
-#!/usr/bin/env python3
-
-from glob import glob
-import os
-
-TYPES_KV = ["GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0", "GGML_TYPE_F16"]
-
-SOURCE_FATTN_VEC = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f{vkq_size}.cuh"
-
-DECL_FATTN_VEC_F{vkq_size}_CASE({head_size}, {type_k}, {type_v});
-"""
-
-SOURCE_FATTN_WMMA_START = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-wmma-f16.cuh"
-
-"""
-
-SOURCE_FATTN_WMMA_CASE = "DECL_FATTN_WMMA_F16_CASE({head_size}, {cols_per_block}, {kq_acc_t});\n"
-
-TYPES_MMQ = [
-    "GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0",
-    "GGML_TYPE_Q2_K", "GGML_TYPE_Q3_K", "GGML_TYPE_Q4_K", "GGML_TYPE_Q5_K", "GGML_TYPE_Q6_K",
-    "GGML_TYPE_IQ2_XXS", "GGML_TYPE_IQ2_XS", "GGML_TYPE_IQ2_S", "GGML_TYPE_IQ3_XXS", "GGML_TYPE_IQ3_S",
-    "GGML_TYPE_IQ1_S", "GGML_TYPE_IQ4_NL", "GGML_TYPE_IQ4_XS"
-]
-
-SOURCE_MMQ = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../mmq.cuh"
-
-DECL_MMQ_CASE({type});
-"""
-
-
-def get_short_name(long_quant_name):
-    return long_quant_name.replace("GGML_TYPE_", "").lower()
-
-
-def get_head_sizes(type_k, type_v):
-    if type_k == "GGML_TYPE_F16" and type_v == "GGML_TYPE_F16":
-        return [64, 128, 256]
-    if type_k == "GGML_TYPE_F16":
-        return [64, 128]
-    return [128]
-
-
-for filename in glob("*.cu"):
-    os.remove(filename)
-
-for vkq_size in [16, 32]:
-    for type_k in TYPES_KV:
-        for type_v in TYPES_KV:
-            for head_size in get_head_sizes(type_k, type_v):
-                with open(f"fattn-vec-f{vkq_size}-instance-hs{head_size}-{get_short_name(type_k)}-{get_short_name(type_v)}.cu", "w") as f:
-                    f.write(SOURCE_FATTN_VEC.format(vkq_size=vkq_size, head_size=head_size, type_k=type_k, type_v=type_v))
-
-for kq_acc_t in ["half", "float"]:
-    for cols_per_block in [8, 16, 32]:
-        if kq_acc_t == "float" and cols_per_block == 8:
-            continue
-
-        with open(f"fattn-wmma-f16-instance-kq{kq_acc_t}-cpb{cols_per_block}.cu", "w") as f:
-            f.write(SOURCE_FATTN_WMMA_START)
-
-            for head_size in [64, 80, 96, 112, 128, 256]:
-                if cols_per_block == 8 and head_size % 32 != 0: # wmma fragment is 8x32
-                    continue
-                if kq_acc_t == "float" and cols_per_block == 32 and head_size == 256: # register spilling, bad performance
-                    continue
-                f.write(SOURCE_FATTN_WMMA_CASE.format(kq_acc_t=kq_acc_t, cols_per_block=cols_per_block, head_size=head_size))
-
-for type in TYPES_MMQ:
-    with open(f"mmq-instance-{get_short_name(type)}.cu", "w") as f:
-        f.write(SOURCE_MMQ.format(type=type))
--- a/ml/backend/ggml/ggml/src/ggml.go
+++ b/ml/backend/ggml/ggml/src/ggml.go
@@ -41,36 +41,48 @@ func sink(level C.int, text *C.char, _ unsafe.Pointer) {
 }

 var OnceLoad = sync.OnceFunc(func() {
-	var lib struct{ name, defaultValue string }
+	exe, err := os.Executable()
+	if err != nil {
+		slog.Warn("failed to get executable path", "error", err)
+		exe = "."
+	}
+
+	// PATH, LD_LIBRARY_PATH, and DYLD_LIBRARY_PATH are often
+	// set by the parent process, however, use a default value
+	// if the environment variable is not set.
+	var name, value string
 	switch runtime.GOOS {
-	case "darwin", "linux":
-		lib.name = "LD_LIBRARY_PATH"
-		lib.defaultValue = "/usr/local/lib:/usr/lib"
+	case "darwin":
+		// On macOS, DYLD_LIBRARY_PATH is often not set, so
+		// we use the directory of the executable as the default.
+		name = "DYLD_LIBRARY_PATH"
+		value = filepath.Dir(exe)
 	case "windows":
-		lib.name = "PATH"
-		lib.defaultValue = "."
+		name = "PATH"
+		value = filepath.Join(filepath.Dir(exe), "lib", "ollama")
 	default:
-		return
+		name = "LD_LIBRARY_PATH"
+		value = filepath.Join(filepath.Dir(exe), "..", "lib", "ollama")
 	}

-	paths, ok := os.LookupEnv(lib.name)
+	paths, ok := os.LookupEnv(name)
 	if !ok {
-		paths = lib.defaultValue
-	}
-
-	if runtime.GOOS == "darwin" {
-		if _, ok := os.LookupEnv("DYLD_LIBRARY_PATH"); !ok {
-			os.Setenv("DYLD_LIBRARY_PATH", paths)
-		}
+		paths = value
 	}

 	split := filepath.SplitList(paths)
 	visited := make(map[string]struct{}, len(split))
 	for _, path := range split {
-		abspath, _ := filepath.Abs(path)
+		abspath, err := filepath.Abs(path)
+		if err != nil {
+			slog.Error("failed to get absolute path", "error", err)
+			continue
+		}
+
 		if _, ok := visited[abspath]; !ok {
 			func() {
-				cpath := C.CString(path)
+				slog.Debug("ggml backend load all from path", "path", abspath)
+				cpath := C.CString(abspath)
 				defer C.free(unsafe.Pointer(cpath))
 				C.ggml_backend_load_all_from_path(cpath)
 			}()
--- a/scripts/build_darwin.sh
+++ b/scripts/build_darwin.sh
@@ -32,35 +32,35 @@ _build_darwin() {
            status "Building darwin $ARCH dynamic backends"
            cmake -B build/darwin-$ARCH \
                -DCMAKE_OSX_ARCHITECTURES=x86_64 \
-                -DCMAKE_OSX_DEPLOYMENT_TARGET=11.3
+                -DCMAKE_OSX_DEPLOYMENT_TARGET=11.3 \
+                -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX
            cmake --build build/darwin-$ARCH --target ggml-cpu -j
-            install build/darwin-$ARCH/lib/ollama/*.{dylib,so} $INSTALL_PREFIX
+            cmake --install build/darwin-$ARCH --component CPU
        fi
    done
 }

 _sign_darwin() {
    status "Creating universal binary..."
-    lipo -create -output dist/darwin/ollama dist/darwin/*/ollama
+    mkdir -p dist/darwin
+    lipo -create -output dist/darwin/ollama dist/darwin-*/ollama
+    chmod +x dist/darwin/ollama

-    if [ -z "$APPLE_IDENTITY" ]; then
-        status "No APPLE_IDENTITY set, skipping code signing"
-        return
+    if [ -n "$APPLE_IDENTITY" ]; then
+        for F in dist/darwin/ollama dist/darwin-amd64/lib/ollama/*; do
+            codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime $F
+        done
+
+        # create a temporary zip for notarization
+        TEMP=$(mktemp -u).zip
+        ditto -c -k --keepParent dist/darwin/ollama "$TEMP"
+        xcrun notarytool submit "$TEMP" --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
+        rm -f "$TEMP"
    fi

-    for F in dist/darwin/ollama dist/darwin/amd64/lib*; do
-        codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime $F
-    done
-
-    # create a temporary zip for notarization
-    TEMP=$(mktemp -u).zip
-    ditto -c -k --keepParent dist/darwin/ollama "$TEMP"
-    xcrun notarytool submit dist/darwin/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
-    rm -f "$TEMP"
-
-    # create a universal tarball
+    status "Creating universal tarball..."
    tar -cf dist/ollama-darwin.tar --strip-components 2 dist/darwin/ollama
-    tar -rf dist/ollama-darwin.tar --strip-components 3 dist/darwin/amd64/lib*
+    tar -rf dist/ollama-darwin.tar --strip-components 4 dist/darwin-amd64/lib/
    gzip -9vc <dist/ollama-darwin.tar >dist/ollama-darwin.tgz
 }

--- a/scripts/build_windows.ps1
+++ b/scripts/build_windows.ps1
@@ -208,8 +208,15 @@ function buildInstaller() {
 }

 function distZip() {
-    write-host "Generating stand-alone distribution zip file ${script:SRC_DIR}\dist\ollama-windows-${script:TARGET_ARCH}.zip"
-    Compress-Archive -Path "${script:SRC_DIR}\dist\windows-${script:TARGET_ARCH}\*" -DestinationPath "${script:SRC_DIR}\dist\ollama-windows-${script:TARGET_ARCH}.zip" -Force
+    if (Test-Path -Path "${script:SRC_DIR}\dist\windows-amd64") {
+        write-host "Generating stand-alone distribution zip file ${script:SRC_DIR}\dist\ollama-windows-amd64.zip"
+        Compress-Archive -Path "${script:SRC_DIR}\dist\windows-amd64\*" -DestinationPath "${script:SRC_DIR}\dist\ollama-windows-amd64.zip" -Force
+    }
+
+    if (Test-Path -Path "${script:SRC_DIR}\dist\windows-arm64") {
+        write-host "Generating stand-alone distribution zip file ${script:SRC_DIR}\dist\ollama-windows-arm64.zip"
+        Compress-Archive -Path "${script:SRC_DIR}\dist\windows-arm64\*" -DestinationPath "${script:SRC_DIR}\dist\ollama-windows-arm64.zip" -Force
+    }
 }

 checkEnv
--- a/server/download.go
+++ b/server/download.go
@@ -172,7 +172,10 @@ func (b *blobDownload) Prepare(ctx context.Context, requestURL *url.URL, opts *r
 		}
 	}

-	slog.Info(fmt.Sprintf("downloading %s in %d %s part(s)", b.Digest[7:19], len(b.Parts), format.HumanBytes(b.Parts[0].Size)))
+	if len(b.Parts) > 0 {
+		slog.Info(fmt.Sprintf("downloading %s in %d %s part(s)", b.Digest[7:19], len(b.Parts), format.HumanBytes(b.Parts[0].Size)))
+	}
+
 	return nil
 }

@@ -365,7 +368,7 @@ func (b *blobDownload) downloadChunk(ctx context.Context, requestURL *url.URL, w
 				lastUpdated := part.lastUpdated
 				part.lastUpdatedMu.Unlock()

-				if !lastUpdated.IsZero() && time.Since(lastUpdated) > 5*time.Second {
+				if !lastUpdated.IsZero() && time.Since(lastUpdated) > 30*time.Second {
 					const msg = "%s part %d stalled; retrying. If this persists, press ctrl-c to exit, then 'ollama pull' to find a faster connection."
 					slog.Info(fmt.Sprintf(msg, b.Digest[7:19], part.N))
 					// reset last updated
--- a/server/upload.go
+++ b/server/upload.go
@@ -108,7 +108,9 @@ func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *reg
 		offset += size
 	}

-	slog.Info(fmt.Sprintf("uploading %s in %d %s part(s)", b.Digest[7:19], len(b.Parts), format.HumanBytes(b.Parts[0].Size)))
+	if len(b.Parts) > 0 {
+		slog.Info(fmt.Sprintf("uploading %s in %d %s part(s)", b.Digest[7:19], len(b.Parts), format.HumanBytes(b.Parts[0].Size)))
+	}

 	requestURL, err = url.Parse(location)
 	if err != nil {
Author	SHA1	Message	Date
CosmicEventHorizon	e8d4eb3e68	readme: add ChibiChat to community integrations (#8883 )	2025-02-06 16:08:46 -08:00
Michael Yang	ae7e368f75	build(rocm): add numa, elf (#8900 )	2025-02-06 15:46:30 -08:00
oslook	31acd1ebf9	readme: add Ollama Chat WebUI for Docker to community integrations (#8084 )	2025-02-06 15:41:02 -08:00
Michael Yang	9a4757ae66	build(rocm): add tinfo (#8899 )	2025-02-06 15:08:12 -08:00
Abhinav Pant	7814019708	docs: add step for removing libraries in linux.md (#8897 )	2025-02-06 14:54:58 -08:00
Michael Yang	b698f9a0d8	build: add missing dependencies (#8896 )	2025-02-06 13:12:16 -08:00
Azis Alvriyanto	32285a6d19	format: rename test file from byte_test.go to bytes_test.go (#8865 )	2025-02-06 13:06:15 -08:00
Michael Yang	1c198977ec	ci: fix linux archive (#8862 ) the find returns intermediate directories which pulls the parent directories. it also omits files under lib/ollama. switch back to globbing	2025-02-05 19:45:58 -08:00
zyphixor	330b6c50b0	readme: add simple-discord-ai to community integrations (#8659 )	2025-02-05 18:35:04 -08:00
Diego Pereira	928911bc68	runner: avoid buffer overwrite when generating multiple embeddings (#8714 ) Shield the code processing the embedding result from subsequent calls that may overwrite the same buffer to process a second input when retrieving model embeddings.	2025-02-05 16:53:33 -08:00
Michael Yang	5b446cc815	chore: update gitattributes (#8860 ) * chore: update gitattributes * chore: add build info source	2025-02-05 16:37:18 -08:00
Daniel Lok	451c1596af	readme: add MLflow Tracing as an observability integration (#8811 )	2025-02-05 16:04:24 -08:00
Michael Yang	932bded12f	chore: add optional field for server logs	2025-02-05 15:55:32 -08:00
Michael Yang	070ad913ac	ci: fix linux archive	2025-02-05 15:08:02 -08:00
Azis Alvriyanto	8d8b9f83ae	format: byte formatting test coverage (#8692 ) Removed redundant checks and streamlined the switch-case structure. Added test cases for both HumanBytes and HumanBytes2 to cover a wide range of scenarios.	2025-02-05 12:23:07 -08:00
Jeffrey Morgan	f00d359a67	docs: add section in development.md on library detection (#8855 )	2025-02-05 11:16:27 -08:00
Yashwanth A	291def6adb	server: increase timeout in stall detection from 5s to 30s (#8831 ) In some cases, downloads slow due to disk i/o or other factors, causing the download to restart a part. This causes the download to "reverse" in percent completion. By increasing the timeout to 30s, this should happen less frequently.	2025-02-05 10:00:26 -08:00
Jeffrey Morgan	cd3fbf1c49	llama: use dynamic backend loading for mllama and clip (#8835 )	2025-02-05 09:46:56 -08:00
Jeffrey Morgan	c852b8e021	server: always print upload/download part info (#8832 )	2025-02-04 19:30:49 -08:00
William	d8932c55e7	server: fix out of bounds exception on model download (#8746 )	2025-02-04 18:52:47 -08:00
Michael Yang	63f0269f7f	ci: split docker build by platform this improves build reliability and concurrency	2025-02-04 17:04:27 -08:00
Jeffrey Morgan	4759ecae19	ml/backend/ggml: fix library loading on macOS amd64 (#8827 )	2025-02-04 15:05:39 -08:00
Michael Yang	65b7ecac7b	fix extra quote	2025-02-04 08:35:30 -08:00
Michael Yang	f9d2d89135	fix linux archive	2025-02-03 16:12:33 -08:00
Michael Yang	669dc31cf3	fix build	2025-02-03 15:10:51 -08:00
Tilman Griesel	d4d338c224	readme: add Chipper to community integrations (#8803 )	2025-02-03 14:18:19 -08:00
Melroy van den Berg	bfdeffc375	docs: use OLLAMA_VERSION=0.5.7 for install version override (#8802 )	2025-02-03 13:54:08 -08:00
Michael Yang	e806184023	fix release workflow	2025-02-03 13:19:57 -08:00
Jeffrey Morgan	50566113ac	llm: do not error if LibOllamaPath does not exist (#8801 )	2025-02-03 12:27:48 -08:00
Davide Bertoni	ad22ace439	docs: add missing json and shell code blocks in api.md (#8766 )	2025-02-02 13:12:55 -08:00
Anıl Kaynar	f4321a421c	readme: add MinimalNextOllamaChat to community integrations (#8767 )	2025-02-02 12:56:10 -08:00