rename /api/tags to /api/list

consistent cpu instructions on macos and linux
Merge pull request #1245 from jmorganca/mxyng/gguf-int
2023-11-23 14:48:35 -05:00 · 2023-11-22 16:26:46 -05:00 · 2023-11-22 11:42:56 -08:00 · 2023-11-22 11:40:30 -08:00 · 2023-11-22 14:32:30 -05:00 · 2023-11-22 14:32:17 -05:00
9 changed files with 73 additions and 82 deletions
--- a/README.md
+++ b/README.md
@@ -276,3 +276,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
 - [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
 - [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
+- [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)
--- a/api/client.go
+++ b/api/client.go
@@ -262,7 +262,7 @@ func (c *Client) Create(ctx context.Context, req *CreateRequest, fn CreateProgre

 func (c *Client) List(ctx context.Context) (*ListResponse, error) {
 	var lr ListResponse
-	if err := c.do(ctx, http.MethodGet, "/api/tags", nil, &lr); err != nil {
+	if err := c.do(ctx, http.MethodGet, "/api/list", nil, &lr); err != nil {
 		return nil, err
 	}
 	return &lr, nil
--- a/api/client.py
+++ b/api/client.py
@@ -157,7 +157,7 @@ def push(model_name, insecure=False, callback=None):
 # List models that are available locally.
 def list():
    try:
-        response = requests.get(f"{BASE_URL}/api/tags")
+        response = requests.get(f"{BASE_URL}/api/list")
        response.raise_for_status()
        data = response.json()
        models = data.get('models', [])
--- a/docs/api.md
+++ b/docs/api.md
@@ -372,10 +372,10 @@ curl -T model.bin -X POST http://localhost:11434/api/blobs/sha256:29fdb92e57cf08

 Return 201 Created if the blob was successfully created.

-## List Local Models
+## List Models

 ```shell
-GET /api/tags
+GET /api/list
 ```

 List models that are available locally.
@@ -385,7 +385,7 @@ List models that are available locally.
 #### Request

 ```shell
-curl http://localhost:11434/api/tags
+curl http://localhost:11434/api/list
 ```

 #### Response
--- a/llm/gguf.go
+++ b/llm/gguf.go
@@ -335,7 +335,7 @@ func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
 		case ggufTypeUint8:
 			arr = append(arr, llm.readU8(r))
 		case ggufTypeInt8:
-			arr = append(arr, llm.readU8(r))
+			arr = append(arr, llm.readI8(r))
 		case ggufTypeUint16:
 			arr = append(arr, llm.readU16(r))
 		case ggufTypeInt16:
@@ -376,7 +376,7 @@ func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
 		case ggufTypeUint8:
 			arr = append(arr, llm.readU8(r))
 		case ggufTypeInt8:
-			arr = append(arr, llm.readU8(r))
+			arr = append(arr, llm.readI8(r))
 		case ggufTypeUint16:
 			arr = append(arr, llm.readU16(r))
 		case ggufTypeInt16:
--- a/llm/llama.cpp/generate_darwin_amd64.go
+++ b/llm/llama.cpp/generate_darwin_amd64.go
@@ -13,6 +13,6 @@ package llm

 //go:generate git submodule update --force gguf
 //go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
-//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on
+//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
 //go:generate cmake --build gguf/build/cpu --target server --config Release
 //go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner
--- a/scripts/build_docker.sh
+++ b/scripts/build_docker.sh
@@ -13,5 +13,5 @@ docker buildx build \
    --cache-from type=local,src=.cache \
    --cache-to type=local,dest=.cache \
    -f Dockerfile \
-    -t ollama \
+    -t ollama/ollama:$VERSION \
    .
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -8,16 +8,20 @@ status() { echo ">>> $*" >&2; }
 error() { echo "ERROR $*"; exit 1; }
 warning() { echo "WARNING: $*"; }

-runnable() { command -v "$1" >/dev/null; }
+TEMP_DIR=$(mktemp -d)
+cleanup() { rm -rf $TEMP_DIR; }
+trap cleanup EXIT
+
+available() { command -v $1 >/dev/null; }
 require() {
-    MISSING=''
-    for TOOL in "$@"; do
-        if ! runnable "$TOOL"; then
+    local MISSING=''
+    for TOOL in $*; do
+        if ! available $TOOL; then
            MISSING="$MISSING $TOOL"
        fi
    done

-    echo "$MISSING"
+    echo $MISSING
 }

 [ "$(uname -s)" = "Linux" ] || error 'This script is intended to run on Linux only.'
@@ -32,7 +36,7 @@ esac
 SUDO=
 if [ "$(id -u)" -ne 0 ]; then
    # Running as root, no need for sudo
-    if ! runnable sudo; then
+    if ! available sudo; then
        error "This script requires superuser permissions. Please re-run as root."
    fi

@@ -48,43 +52,22 @@ if [ -n "$NEEDS" ]; then
    exit 1
 fi

-TEMP_DIR=$(mktemp -d)
-cleanup() {
-    EXIT_CODE=$?
-    rm -rf "$TEMP_DIR"
-
-    if runnable nvidia-smi && lsmod | grep -qv nvidia; then
-        status 'Reboot to complete NVIDIA CUDA driver install.'
-    fi
-
-    if runnable systemctl >/dev/null; then
-        $SUDO systemctl restart ollama
-
-        timeout 10 sh -c 'while :; do [ "$(curl -s http://127.0.0.1:11434)" = "Ollama is running" ] && break; sleep 0.2; done' \
-            && status 'Ollama service is available at 127.0.0.1:11434' \
-            || true
-    fi
-
-    if runnable ollama; then
-        status 'Install completed. Run "ollama --help" to get started.'
-    fi
-
-    exit $EXIT_CODE
-}
-trap cleanup EXIT
-
 status "Downloading ollama..."
-curl --fail --show-error --location --progress-bar -o "$TEMP_DIR/ollama" "https://ollama.ai/download/ollama-linux-$ARCH"
+curl --fail --show-error --location --progress-bar -o $TEMP_DIR/ollama "https://ollama.ai/download/ollama-linux-$ARCH"

-for BIN_DIR in /usr/local/bin /usr/bin /bin; do
-    if echo "$PATH" | grep -q $BIN_DIR; then
-        break
-    fi
+for BINDIR in /usr/local/bin /usr/bin /bin; do
+    echo $PATH | grep -q $BINDIR && break || continue
 done

-status "Installing ollama to $BIN_DIR..."
-$SUDO install -o0 -g0 -m755 -d "$BIN_DIR"
-$SUDO install -o0 -g0 -m755 "$TEMP_DIR/ollama" "$BIN_DIR/ollama"
+status "Installing ollama to $BINDIR..."
+$SUDO install -o0 -g0 -m755 -d $BINDIR
+$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $BINDIR/ollama
+
+install_success() { 
+    status 'The Ollama API is now available at 0.0.0.0:11434.'
+    status 'Install complete. Run "ollama" from the command line.'
+}
+trap install_success EXIT

 # Everything from this point onwards is optional.

@@ -94,6 +77,9 @@ configure_systemd() {
        $SUDO useradd -r -s /bin/false -m -d /usr/share/ollama ollama
    fi

+    status "Adding current user to ollama group..."
+    $SUDO usermod -a -G ollama $(whoami)
+
    status "Creating ollama systemd service..."
    cat <<EOF | $SUDO tee /etc/systemd/system/ollama.service >/dev/null
 [Unit]
@@ -101,7 +87,7 @@ Description=Ollama Service
 After=network-online.target

 [Service]
-ExecStart=$BIN_DIR/ollama serve
+ExecStart=$BINDIR/ollama serve
 User=ollama
 Group=ollama
 Restart=always
@@ -117,36 +103,39 @@ EOF
            status "Enabling and starting ollama service..."
            $SUDO systemctl daemon-reload
            $SUDO systemctl enable ollama
+
+            start_service() { $SUDO systemctl restart ollama; }
+            trap start_service EXIT
            ;;
    esac
 }

-if runnable systemctl; then
+if available systemctl; then
    configure_systemd
 fi

+if ! available lspci && ! available lshw; then
+    warning "Unable to detect NVIDIA GPU. Install lspci or lshw to automatically detect and install NVIDIA CUDA drivers."
+    exit 0
+fi
+
 check_gpu() {
    case $1 in
-        lspci) runnable lspci && lspci -d '10de:' | grep -q 'NVIDIA' || return 1 ;;
-        lshw) runnable lshw && $SUDO lshw -c display -numeric | grep -q 'vendor: .* \[10DE\]' || return 1 ;;
-        nvidia-smi) runnable nvidia-smi || return 1 ;;
+        lspci) available lspci && lspci -d '10de:' | grep -q 'NVIDIA' || return 1 ;;
+        lshw) available lshw && $SUDO lshw -c display -numeric | grep -q 'vendor: .* \[10DE\]' || return 1 ;;
+        nvidia-smi) available nvidia-smi || return 1 ;;
    esac
 }

 if check_gpu nvidia-smi; then
    status "NVIDIA GPU installed."
-    exit
-fi
-
-if ! runnable lspci && ! runnable lshw; then
-    warning "Unable to detect NVIDIA GPU. Install lspci or lshw to automatically detect and install NVIDIA CUDA drivers."
-    exit
+    exit 0
 fi

 if ! check_gpu lspci && ! check_gpu lshw; then
    install_success
-    warning "No NVIDIA GPU detected. Ollama will run with CPU."
-    exit
+    warning "No NVIDIA GPU detected. Ollama will run in CPU-only mode."
+    exit 0
 fi

 # ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-7-centos-7
@@ -158,10 +147,10 @@ install_cuda_driver_yum() {
    case $PACKAGE_MANAGER in
        yum)
            $SUDO $PACKAGE_MANAGER -y install yum-utils
-            $SUDO $PACKAGE_MANAGER-config-manager --add-repo "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo"
+            $SUDO $PACKAGE_MANAGER-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
            ;;
        dnf)
-            $SUDO $PACKAGE_MANAGER config-manager --add-repo "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo"
+            $SUDO $PACKAGE_MANAGER config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
            ;;
    esac

@@ -169,7 +158,7 @@ install_cuda_driver_yum() {
        rhel)
            status 'Installing EPEL repository...'
            # EPEL is required for third-party dependencies such as dkms and libvdpau
-            $SUDO $PACKAGE_MANAGER -y install "https://dl.fedoraproject.org/pub/epel/epel-release-latest-$2.noarch.rpm" || true
+            $SUDO $PACKAGE_MANAGER -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-$2.noarch.rpm || true
            ;;
    esac

@@ -186,20 +175,20 @@ install_cuda_driver_yum() {
 # ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#debian
 install_cuda_driver_apt() {
    status 'Installing NVIDIA repository...'
-    curl -fsSL -o "$TEMP_DIR/cuda-keyring.deb" "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb"
+    curl -fsSL -o $TEMP_DIR/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb

    case $1 in
        debian)
            status 'Enabling contrib sources...'
-            [ -f "/etc/apt/sources.list.d/debian.sources" ] \
-                && SOURCES_LIST="/etc/apt/sources.list.d/debian.sources" \
-                || SOURCES_LIST="/etc/apt/sources.list"
-            sed 's/main/contrib/' <"$SOURCES_LIST" | $SUDO tee /etc/apt/sources.list.d/contrib.sources >/dev/null
+            $SUDO sed 's/main/contrib/' < /etc/apt/sources.list | $SUDO tee /etc/apt/sources.list.d/contrib.list > /dev/null
+            if [ -f "/etc/apt/sources.list.d/debian.sources" ]; then
+                $SUDO sed 's/main/contrib/' < /etc/apt/sources.list.d/debian.sources | $SUDO tee /etc/apt/sources.list.d/contrib.sources > /dev/null
+            fi
            ;;
    esac

    status 'Installing CUDA driver...'
-    $SUDO dpkg -i "$TEMP_DIR/cuda-keyring.deb"
+    $SUDO dpkg -i $TEMP_DIR/cuda-keyring.deb
    $SUDO apt-get update

    [ -n "$SUDO" ] && SUDO_E="$SUDO -E" || SUDO_E=
@@ -217,7 +206,7 @@ OS_VERSION=$VERSION_ID

 PACKAGE_MANAGER=
 for PACKAGE_MANAGER in dnf yum apt-get; do
-    if runnable $PACKAGE_MANAGER; then
+    if available $PACKAGE_MANAGER; then
        break
    fi
 done
@@ -226,14 +215,14 @@ if [ -z "$PACKAGE_MANAGER" ]; then
    error "Unknown package manager. Skipping CUDA installation."
 fi

-if ! check_gpu nvidia-smi || nvidia-smi | grep -qo "CUDA Version: [0-9]*\.[0-9]*"; then
+if ! check_gpu nvidia-smi || [ -z "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")" ]; then
    case $OS_NAME in
-        centos|rhel) install_cuda_driver_yum 'rhel' "$OS_VERSION" ;;
-        rocky) install_cuda_driver_yum 'rhel' "$(echo "$OS_VERSION" | cut -c1)" ;;
-        fedora) install_cuda_driver_yum "$OS_NAME" "$OS_VERSION" ;;
+        centos|rhel) install_cuda_driver_yum 'rhel' $OS_VERSION ;;
+        rocky) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -c1) ;;
+        fedora) install_cuda_driver_yum $OS_NAME $OS_VERSION ;;
        amzn) install_cuda_driver_yum 'fedora' '35' ;;
-        debian) install_cuda_driver_apt "$OS_NAME" "$OS_VERSION" ;;
-        ubuntu) install_cuda_driver_apt "$OS_NAME" "$(echo "$OS_VERSION" | sed 's/\.//')" ;;
+        debian) install_cuda_driver_apt $OS_NAME $OS_VERSION ;;
+        ubuntu) install_cuda_driver_apt $OS_NAME $(echo $OS_VERSION | sed 's/\.//') ;;
        *) exit ;;
    esac
 fi
@@ -241,20 +230,20 @@ fi
 if ! lsmod | grep -q nvidia; then
    KERNEL_RELEASE="$(uname -r)"
    case $OS_NAME in
-        centos|rhel|rocky|amzn) $SUDO $PACKAGE_MANAGER -y install "kernel-devel-$KERNEL_RELEASE" "kernel-headers-$KERNEL_RELEASE" ;;
-        fedora) $SUDO $PACKAGE_MANAGER -y install "kernel-devel-$KERNEL_RELEASE" ;;
-        debian|ubuntu) $SUDO apt-get -y install "linux-headers-$KERNEL_RELEASE" ;;
+        centos|rhel|rocky|amzn) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE kernel-headers-$KERNEL_RELEASE ;;
+        fedora) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE ;;
+        debian|ubuntu) $SUDO apt-get -y install linux-headers-$KERNEL_RELEASE ;;
        *) exit ;;
    esac

    NVIDIA_CUDA_VERSION=$($SUDO dkms status | awk -F: '/added/ { print $1 }')
    if [ -n "$NVIDIA_CUDA_VERSION" ]; then
-        $SUDO dkms install "$NVIDIA_CUDA_VERSION"
+        $SUDO dkms install $NVIDIA_CUDA_VERSION
    fi

    if lsmod | grep -q nouveau; then
        status 'Reboot to complete NVIDIA CUDA driver install.'
-        exit
+        exit 0
    fi

    $SUDO modprobe nvidia
--- a/server/routes.go
+++ b/server/routes.go
@@ -771,6 +771,7 @@ func Serve(ln net.Listener, allowOrigins []string) error {
 			c.String(http.StatusOK, "Ollama is running")
 		})

+		r.Handle(method, "/api/list", ListModelsHandler)
 		r.Handle(method, "/api/tags", ListModelsHandler)
 	}
Author	SHA1	Message	Date
Jeffrey Morgan	7396030d60	rename `/api/tags` to `/api/list`	2023-11-23 14:48:35 -05:00
Jeffrey Morgan	d77dde126b	consistent cpu instructions on macos and linux	2023-11-22 16:26:46 -05:00
Michael Yang	c7e70cd3bb	Merge pull request #1245 from jmorganca/mxyng/gguf-int fix: gguf int type	2023-11-22 11:42:56 -08:00
Michael Yang	199941cd15	fix: gguf int type	2023-11-22 11:40:30 -08:00
Long Huynh	c9474f7f61	Update README.md - Community Integrations - Obsidian BMO Chatbot plugin (#1239 )	2023-11-22 14:32:30 -05:00
Jeffrey Morgan	927e3ba4a4	tag image with correct version when building with `build_docker` script	2023-11-22 14:32:17 -05:00