Compare commits

..

6 Commits

Author SHA1 Message Date
Jeffrey Morgan
7396030d60 rename /api/tags to /api/list 2023-11-23 14:48:35 -05:00
Jeffrey Morgan
d77dde126b consistent cpu instructions on macos and linux 2023-11-22 16:26:46 -05:00
Michael Yang
c7e70cd3bb Merge pull request #1245 from jmorganca/mxyng/gguf-int
fix: gguf int type
2023-11-22 11:42:56 -08:00
Michael Yang
199941cd15 fix: gguf int type 2023-11-22 11:40:30 -08:00
Long Huynh
c9474f7f61 Update README.md - Community Integrations - Obsidian BMO Chatbot plugin (#1239) 2023-11-22 14:32:30 -05:00
Jeffrey Morgan
927e3ba4a4 tag image with correct version when building with build_docker script 2023-11-22 14:32:17 -05:00
9 changed files with 73 additions and 82 deletions

View File

@@ -276,3 +276,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
- [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)

View File

@@ -262,7 +262,7 @@ func (c *Client) Create(ctx context.Context, req *CreateRequest, fn CreateProgre
func (c *Client) List(ctx context.Context) (*ListResponse, error) {
var lr ListResponse
if err := c.do(ctx, http.MethodGet, "/api/tags", nil, &lr); err != nil {
if err := c.do(ctx, http.MethodGet, "/api/list", nil, &lr); err != nil {
return nil, err
}
return &lr, nil

View File

@@ -157,7 +157,7 @@ def push(model_name, insecure=False, callback=None):
# List models that are available locally.
def list():
try:
response = requests.get(f"{BASE_URL}/api/tags")
response = requests.get(f"{BASE_URL}/api/list")
response.raise_for_status()
data = response.json()
models = data.get('models', [])

View File

@@ -372,10 +372,10 @@ curl -T model.bin -X POST http://localhost:11434/api/blobs/sha256:29fdb92e57cf08
Return 201 Created if the blob was successfully created.
## List Local Models
## List Models
```shell
GET /api/tags
GET /api/list
```
List models that are available locally.
@@ -385,7 +385,7 @@ List models that are available locally.
#### Request
```shell
curl http://localhost:11434/api/tags
curl http://localhost:11434/api/list
```
#### Response

View File

@@ -335,7 +335,7 @@ func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
case ggufTypeUint8:
arr = append(arr, llm.readU8(r))
case ggufTypeInt8:
arr = append(arr, llm.readU8(r))
arr = append(arr, llm.readI8(r))
case ggufTypeUint16:
arr = append(arr, llm.readU16(r))
case ggufTypeInt16:
@@ -376,7 +376,7 @@ func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
case ggufTypeUint8:
arr = append(arr, llm.readU8(r))
case ggufTypeInt8:
arr = append(arr, llm.readU8(r))
arr = append(arr, llm.readI8(r))
case ggufTypeUint16:
arr = append(arr, llm.readU16(r))
case ggufTypeInt16:

View File

@@ -13,6 +13,6 @@ package llm
//go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
//go:generate cmake --build gguf/build/cpu --target server --config Release
//go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner

View File

@@ -13,5 +13,5 @@ docker buildx build \
--cache-from type=local,src=.cache \
--cache-to type=local,dest=.cache \
-f Dockerfile \
-t ollama \
-t ollama/ollama:$VERSION \
.

View File

@@ -8,16 +8,20 @@ status() { echo ">>> $*" >&2; }
error() { echo "ERROR $*"; exit 1; }
warning() { echo "WARNING: $*"; }
runnable() { command -v "$1" >/dev/null; }
TEMP_DIR=$(mktemp -d)
cleanup() { rm -rf $TEMP_DIR; }
trap cleanup EXIT
available() { command -v $1 >/dev/null; }
require() {
MISSING=''
for TOOL in "$@"; do
if ! runnable "$TOOL"; then
local MISSING=''
for TOOL in $*; do
if ! available $TOOL; then
MISSING="$MISSING $TOOL"
fi
done
echo "$MISSING"
echo $MISSING
}
[ "$(uname -s)" = "Linux" ] || error 'This script is intended to run on Linux only.'
@@ -32,7 +36,7 @@ esac
SUDO=
if [ "$(id -u)" -ne 0 ]; then
# Running as root, no need for sudo
if ! runnable sudo; then
if ! available sudo; then
error "This script requires superuser permissions. Please re-run as root."
fi
@@ -48,43 +52,22 @@ if [ -n "$NEEDS" ]; then
exit 1
fi
TEMP_DIR=$(mktemp -d)
cleanup() {
EXIT_CODE=$?
rm -rf "$TEMP_DIR"
if runnable nvidia-smi && lsmod | grep -qv nvidia; then
status 'Reboot to complete NVIDIA CUDA driver install.'
fi
if runnable systemctl >/dev/null; then
$SUDO systemctl restart ollama
timeout 10 sh -c 'while :; do [ "$(curl -s http://127.0.0.1:11434)" = "Ollama is running" ] && break; sleep 0.2; done' \
&& status 'Ollama service is available at 127.0.0.1:11434' \
|| true
fi
if runnable ollama; then
status 'Install completed. Run "ollama --help" to get started.'
fi
exit $EXIT_CODE
}
trap cleanup EXIT
status "Downloading ollama..."
curl --fail --show-error --location --progress-bar -o "$TEMP_DIR/ollama" "https://ollama.ai/download/ollama-linux-$ARCH"
curl --fail --show-error --location --progress-bar -o $TEMP_DIR/ollama "https://ollama.ai/download/ollama-linux-$ARCH"
for BIN_DIR in /usr/local/bin /usr/bin /bin; do
if echo "$PATH" | grep -q $BIN_DIR; then
break
fi
for BINDIR in /usr/local/bin /usr/bin /bin; do
echo $PATH | grep -q $BINDIR && break || continue
done
status "Installing ollama to $BIN_DIR..."
$SUDO install -o0 -g0 -m755 -d "$BIN_DIR"
$SUDO install -o0 -g0 -m755 "$TEMP_DIR/ollama" "$BIN_DIR/ollama"
status "Installing ollama to $BINDIR..."
$SUDO install -o0 -g0 -m755 -d $BINDIR
$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $BINDIR/ollama
install_success() {
status 'The Ollama API is now available at 0.0.0.0:11434.'
status 'Install complete. Run "ollama" from the command line.'
}
trap install_success EXIT
# Everything from this point onwards is optional.
@@ -94,6 +77,9 @@ configure_systemd() {
$SUDO useradd -r -s /bin/false -m -d /usr/share/ollama ollama
fi
status "Adding current user to ollama group..."
$SUDO usermod -a -G ollama $(whoami)
status "Creating ollama systemd service..."
cat <<EOF | $SUDO tee /etc/systemd/system/ollama.service >/dev/null
[Unit]
@@ -101,7 +87,7 @@ Description=Ollama Service
After=network-online.target
[Service]
ExecStart=$BIN_DIR/ollama serve
ExecStart=$BINDIR/ollama serve
User=ollama
Group=ollama
Restart=always
@@ -117,36 +103,39 @@ EOF
status "Enabling and starting ollama service..."
$SUDO systemctl daemon-reload
$SUDO systemctl enable ollama
start_service() { $SUDO systemctl restart ollama; }
trap start_service EXIT
;;
esac
}
if runnable systemctl; then
if available systemctl; then
configure_systemd
fi
if ! available lspci && ! available lshw; then
warning "Unable to detect NVIDIA GPU. Install lspci or lshw to automatically detect and install NVIDIA CUDA drivers."
exit 0
fi
check_gpu() {
case $1 in
lspci) runnable lspci && lspci -d '10de:' | grep -q 'NVIDIA' || return 1 ;;
lshw) runnable lshw && $SUDO lshw -c display -numeric | grep -q 'vendor: .* \[10DE\]' || return 1 ;;
nvidia-smi) runnable nvidia-smi || return 1 ;;
lspci) available lspci && lspci -d '10de:' | grep -q 'NVIDIA' || return 1 ;;
lshw) available lshw && $SUDO lshw -c display -numeric | grep -q 'vendor: .* \[10DE\]' || return 1 ;;
nvidia-smi) available nvidia-smi || return 1 ;;
esac
}
if check_gpu nvidia-smi; then
status "NVIDIA GPU installed."
exit
fi
if ! runnable lspci && ! runnable lshw; then
warning "Unable to detect NVIDIA GPU. Install lspci or lshw to automatically detect and install NVIDIA CUDA drivers."
exit
exit 0
fi
if ! check_gpu lspci && ! check_gpu lshw; then
install_success
warning "No NVIDIA GPU detected. Ollama will run with CPU."
exit
warning "No NVIDIA GPU detected. Ollama will run in CPU-only mode."
exit 0
fi
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-7-centos-7
@@ -158,10 +147,10 @@ install_cuda_driver_yum() {
case $PACKAGE_MANAGER in
yum)
$SUDO $PACKAGE_MANAGER -y install yum-utils
$SUDO $PACKAGE_MANAGER-config-manager --add-repo "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo"
$SUDO $PACKAGE_MANAGER-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
;;
dnf)
$SUDO $PACKAGE_MANAGER config-manager --add-repo "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo"
$SUDO $PACKAGE_MANAGER config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
;;
esac
@@ -169,7 +158,7 @@ install_cuda_driver_yum() {
rhel)
status 'Installing EPEL repository...'
# EPEL is required for third-party dependencies such as dkms and libvdpau
$SUDO $PACKAGE_MANAGER -y install "https://dl.fedoraproject.org/pub/epel/epel-release-latest-$2.noarch.rpm" || true
$SUDO $PACKAGE_MANAGER -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-$2.noarch.rpm || true
;;
esac
@@ -186,20 +175,20 @@ install_cuda_driver_yum() {
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#debian
install_cuda_driver_apt() {
status 'Installing NVIDIA repository...'
curl -fsSL -o "$TEMP_DIR/cuda-keyring.deb" "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb"
curl -fsSL -o $TEMP_DIR/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb
case $1 in
debian)
status 'Enabling contrib sources...'
[ -f "/etc/apt/sources.list.d/debian.sources" ] \
&& SOURCES_LIST="/etc/apt/sources.list.d/debian.sources" \
|| SOURCES_LIST="/etc/apt/sources.list"
sed 's/main/contrib/' <"$SOURCES_LIST" | $SUDO tee /etc/apt/sources.list.d/contrib.sources >/dev/null
$SUDO sed 's/main/contrib/' < /etc/apt/sources.list | $SUDO tee /etc/apt/sources.list.d/contrib.list > /dev/null
if [ -f "/etc/apt/sources.list.d/debian.sources" ]; then
$SUDO sed 's/main/contrib/' < /etc/apt/sources.list.d/debian.sources | $SUDO tee /etc/apt/sources.list.d/contrib.sources > /dev/null
fi
;;
esac
status 'Installing CUDA driver...'
$SUDO dpkg -i "$TEMP_DIR/cuda-keyring.deb"
$SUDO dpkg -i $TEMP_DIR/cuda-keyring.deb
$SUDO apt-get update
[ -n "$SUDO" ] && SUDO_E="$SUDO -E" || SUDO_E=
@@ -217,7 +206,7 @@ OS_VERSION=$VERSION_ID
PACKAGE_MANAGER=
for PACKAGE_MANAGER in dnf yum apt-get; do
if runnable $PACKAGE_MANAGER; then
if available $PACKAGE_MANAGER; then
break
fi
done
@@ -226,14 +215,14 @@ if [ -z "$PACKAGE_MANAGER" ]; then
error "Unknown package manager. Skipping CUDA installation."
fi
if ! check_gpu nvidia-smi || nvidia-smi | grep -qo "CUDA Version: [0-9]*\.[0-9]*"; then
if ! check_gpu nvidia-smi || [ -z "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")" ]; then
case $OS_NAME in
centos|rhel) install_cuda_driver_yum 'rhel' "$OS_VERSION" ;;
rocky) install_cuda_driver_yum 'rhel' "$(echo "$OS_VERSION" | cut -c1)" ;;
fedora) install_cuda_driver_yum "$OS_NAME" "$OS_VERSION" ;;
centos|rhel) install_cuda_driver_yum 'rhel' $OS_VERSION ;;
rocky) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -c1) ;;
fedora) install_cuda_driver_yum $OS_NAME $OS_VERSION ;;
amzn) install_cuda_driver_yum 'fedora' '35' ;;
debian) install_cuda_driver_apt "$OS_NAME" "$OS_VERSION" ;;
ubuntu) install_cuda_driver_apt "$OS_NAME" "$(echo "$OS_VERSION" | sed 's/\.//')" ;;
debian) install_cuda_driver_apt $OS_NAME $OS_VERSION ;;
ubuntu) install_cuda_driver_apt $OS_NAME $(echo $OS_VERSION | sed 's/\.//') ;;
*) exit ;;
esac
fi
@@ -241,20 +230,20 @@ fi
if ! lsmod | grep -q nvidia; then
KERNEL_RELEASE="$(uname -r)"
case $OS_NAME in
centos|rhel|rocky|amzn) $SUDO $PACKAGE_MANAGER -y install "kernel-devel-$KERNEL_RELEASE" "kernel-headers-$KERNEL_RELEASE" ;;
fedora) $SUDO $PACKAGE_MANAGER -y install "kernel-devel-$KERNEL_RELEASE" ;;
debian|ubuntu) $SUDO apt-get -y install "linux-headers-$KERNEL_RELEASE" ;;
centos|rhel|rocky|amzn) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE kernel-headers-$KERNEL_RELEASE ;;
fedora) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE ;;
debian|ubuntu) $SUDO apt-get -y install linux-headers-$KERNEL_RELEASE ;;
*) exit ;;
esac
NVIDIA_CUDA_VERSION=$($SUDO dkms status | awk -F: '/added/ { print $1 }')
if [ -n "$NVIDIA_CUDA_VERSION" ]; then
$SUDO dkms install "$NVIDIA_CUDA_VERSION"
$SUDO dkms install $NVIDIA_CUDA_VERSION
fi
if lsmod | grep -q nouveau; then
status 'Reboot to complete NVIDIA CUDA driver install.'
exit
exit 0
fi
$SUDO modprobe nvidia

View File

@@ -771,6 +771,7 @@ func Serve(ln net.Listener, allowOrigins []string) error {
c.String(http.StatusOK, "Ollama is running")
})
r.Handle(method, "/api/list", ListModelsHandler)
r.Handle(method, "/api/tags", ListModelsHandler)
}