Compare commits

..

1 Commits

Author SHA1 Message Date
Matt Williams
a314b6c2a9 add faq on models downloaded from hf
Signed-off-by: Matt Williams <m@technovangelist.com>
2024-01-04 16:55:56 -08:00
12 changed files with 91 additions and 211 deletions

View File

@@ -1,101 +1,74 @@
ARG GOLANG_VERSION=1.21.3
# Ubuntu 20.04 amd64 dependencies
FROM --platform=linux/amd64 ubuntu:20.04 AS base-amd64
ARG CUDA_VERSION=11.3.1-1
ARG CMAKE_VERSION=3.22.1
ARG CUDA_VERSION=11.3.1
ARG ROCM_VERSION=5.7.1
# ROCm only supports amd64
ARG ROCM_VERSION=6.0
ARG CLBLAST_VER=1.6.1
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
# Note: https://rocm.docs.amd.com/en/latest/release/user_kernel_space_compat_matrix.html
RUN apt-get update && \
apt-get install -y wget gnupg && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \
mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \
echo "deb [by-hash=no] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" > /etc/apt/sources.list.d/cuda.list && \
wget "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.sh" -O /tmp/cmake-installer.sh && \
chmod +x /tmp/cmake-installer.sh && /tmp/cmake-installer.sh --skip-license --prefix=/usr && \
mkdir --parents --mode=0755 /etc/apt/keyrings && \
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor > /etc/apt/keyrings/rocm.gpg && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} focal main" > /etc/apt/sources.list.d/rocm.list && \
echo "Package: *" > /etc/apt/preferences.d/rocm-pin-600 && \
echo "Pin: release o=repo.radeon.com" >> /etc/apt/preferences.d/rocm-pin-600 && \
echo "Pin-Priority: 600" >> /etc/apt/preferences.d/rocm-pin-600 && \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get -y install cuda=${CUDA_VERSION} rocm-hip-libraries rocm-device-libs rocm-libs rocm-ocl-icd rocm-hip-sdk rocm-hip-libraries rocm-cmake rocm-clang-ocl rocm-dev
ARG CMAKE_VERSION
# CLBlast
RUN wget -qO- https://github.com/CNugteren/CLBlast/archive/refs/tags/${CLBLAST_VER}.tar.gz | tar zxv -C /tmp/ && \
cd /tmp/CLBlast-${CLBLAST_VER} && mkdir build && cd build && cmake .. && make && make install
RUN yum install -y https://repo.ius.io/ius-release-el7.rpm centos-release-scl \
&& yum update -y \
&& yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++ git236
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
ENV ROCM_PATH=/opt/rocm
ADD https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION-linux-x86_64.tar.gz /tmp/cmake-$CMAKE_VERSION.tar.gz
RUN tar -zx -C /usr --strip-components 1 </tmp/cmake-$CMAKE_VERSION.tar.gz
# Ubuntu 22.04 arm64 dependencies
FROM --platform=linux/arm64 ubuntu:20.04 AS base-arm64
ARG CUDA_VERSION=11.3.1-1
ARG CMAKE_VERSION=3.27.6
RUN apt-get update && \
apt-get install -y wget gnupg && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/sbsa/cuda-ubuntu2004.pin && \
mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/sbsa//3bf863cc.pub && \
echo "deb [by-hash=no] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/sbsa/ /" > /etc/apt/sources.list.d/cuda.list && \
wget "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.sh" -O /tmp/cmake-installer.sh && \
chmod +x /tmp/cmake-installer.sh && /tmp/cmake-installer.sh --skip-license --prefix=/usr && \
apt-get update && \
apt-cache madison cuda && \
DEBIAN_FRONTEND=noninteractive apt-get -y install cuda=${CUDA_VERSION}
FROM base-${TARGETARCH}
ARG TARGETARCH
ARG GOFLAGS="'-ldflags -w -s'"
ARG CGO_CFLAGS
ARG GOLANG_VERSION=1.21.3
# Common toolchain
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y gcc-10 g++-10 cpp-10 git ocl-icd-opencl-dev && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10
# install go
ADD https://dl.google.com/go/go${GOLANG_VERSION}.linux-$TARGETARCH.tar.gz /tmp/go${GOLANG_VERSION}.tar.gz
RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go${GOLANG_VERSION}.tar.gz
# build the final binary
WORKDIR /go/src/github.com/jmorganca/ollama
COPY . .
WORKDIR llm/generate
RUN sh gen_linux.sh
ENV GOOS=linux
ENV GOARCH=$TARGETARCH
ENV GOFLAGS=$GOFLAGS
ENV CGO_CFLAGS=${CGO_CFLAGS}
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
ARG CMAKE_VERSION
RUN dnf install -y git cmake
WORKDIR /go/src/github.com/jmorganca/ollama
COPY . .
WORKDIR llm/generate
RUN sh gen_linux.sh
FROM --platform=linux/amd64 rocm/dev-centos-7:$ROCM_VERSION-complete AS rocm-build-amd64
ARG CMAKE_VERSION
RUN yum install -y https://repo.ius.io/ius-release-el7.rpm centos-release-scl \
&& yum update -y \
&& yum remove -y git \
&& yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++ git236
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
ENV LIBRARY_PATH /opt/amdgpu/lib64
ADD https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION-linux-x86_64.tar.gz /tmp/cmake-$CMAKE_VERSION.tar.gz
RUN tar -zx -C /usr --strip-components 1 </tmp/cmake-$CMAKE_VERSION.tar.gz
WORKDIR /go/src/github.com/jmorganca/ollama
COPY . .
WORKDIR llm/generate
RUN sh gen_linux.sh
FROM --platform=linux/amd64 centos:7 AS build-amd64
ENV CGO_ENABLED 1
ARG GOLANG_VERSION
ARG GOFLAGS
ARG CGO_FLAGS
RUN yum install -y centos-release-scl \
&& yum update -y \
&& yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
ADD https://dl.google.com/go/go$GOLANG_VERSION.linux-amd64.tar.gz /tmp/go-$GOLANG_VERSION.tar.gz
RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go-$GOLANG_VERSION.tar.gz
ENV PATH /usr/local/go/bin:$PATH
WORKDIR /go/src/github.com/jmorganca/ollama
COPY . .
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cpu/lib llm/llama.cpp/build/linux/cpu/lib
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cuda/lib llm/llama.cpp/build/linux/cuda/lib
COPY --from=rocm-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/rocm/lib llm/llama.cpp/build/linux/rocm/lib
RUN go build .
FROM --platform=linux/arm64 centos:7 AS build-arm64
ENV CGO_ENABLED 1
ARG GOLANG_VERSION
ARG GOFLAGS
ARG CGO_FLAGS
RUN yum install -y centos-release-scl \
&& yum update -y \
&& yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
ADD https://dl.google.com/go/go$GOLANG_VERSION.linux-arm64.tar.gz /tmp/go-$GOLANG_VERSION.tar.gz
RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go-$GOLANG_VERSION.tar.gz
ENV PATH /usr/local/go/bin:$PATH
WORKDIR /go/src/github.com/jmorganca/ollama
COPY . .
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cpu/lib llm/llama.cpp/build/linux/cpu/lib
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cuda/lib llm/llama.cpp/build/linux/cuda/lib
RUN go build .
FROM build-$TARGETARCH
RUN /usr/local/go/bin/go generate ./... && \
/usr/local/go/bin/go build .

View File

@@ -12,7 +12,7 @@ Import models using source model weights found on Hugging Face and similar sites
Installing on Linux in most cases is easy using the script on Ollama.ai. To get more detail about the install, including CUDA drivers, see the **[Linux Documentation](./linux.md)**.
Many of our users like the flexibility of using our official Docker Image. Learn more about using Docker with Ollama using the **[Docker Documentation](https://hub.docker.com/r/ollama/ollama)**.
Many of our users like the flexibility of using our official Docker Image. Learn more about using Docker with Ollama using the **[Docker Documentation](./docker.md)**.
It is easy to install on Linux and Mac, but many users will choose to build Ollama on their own. To do this, refer to the **[Development Documentation](./development.md)**.

View File

@@ -66,6 +66,16 @@ Refer to the section above for how to use environment variables on your platform
If a different directory needs to be used, set the environment variable `OLLAMA_MODELS` to the chosen directory. Refer to the section above for how to use environment variables on your platform.
## Can I use models I downloaded from Hugging Face in Ollama?
There are a lot of models available on Hugging Face. Many of them will work with Ollama, but not all of them yet. You can look for models that use the library **PyTorch**, then in the repo look at the `config.json` file. In there you should see an architecture. For now, we support models that use the following architectures: Llama, Mistral, Falcon, RW, and BigCode.
## Can I use models I downloaded in Ollama in other applications?
Yes, as long as those applications work with GGUF models. You can find the models in the directories listed above. Under `models`, there is a manifests directory. Follow that path down to find the model you want to use. There will be a file for the model and tag you intend to use. In that file, you will see a layer called: `application/vnd.ollama.image.model`.
The next line will show a sha256 hash. That happens to also be the filename for the model weights file that you can find in `.ollama/models/blobs`. You can use that file in any application that supports gguf. But it is important not to move the file from this location otherwise Ollama won't be able to use it.
## Does Ollama send my prompts and answers back to Ollama.ai to use in any way?
No, Ollama runs entirely locally, and conversation data will never leave your machine.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

View File

@@ -1,18 +0,0 @@
{
"name": "typescript-airenamer",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"start": "tsx renamer.ts",
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"tsx": "^4.7.0"
},
"devDependencies": {
"@types/node": "^20.10.6"
}
}

View File

@@ -1,29 +0,0 @@
# Renaming Files with AI
![airenamer 2024-01-05 09_09_08](https://github.com/jmorganca/ollama/assets/633681/b98df1c8-61a7-4dff-aeb7-b04e034dced0)
This example applies the benefits of the llava models to managing images. It will find any images in your current directory, generate keywords for the image, and then copy the file to a new name based on the keywords.
## Running the example
1. Clone this repo and navigate to the `examples/typescript-airenamer` directory.
2. Install the dependencies with `npm install`.
3. Run `npm run start`.
## Review the Code
The main part of the code is in the `getkeywords` function. It calls the `/api/generate` endpoint passing in the body:
```json
{
"model": "llava:13b-v1.5-q5_K_M",
"prompt": `Describe the image as a collection of keywords. Output in JSON format. Use the following schema: { filename: string, keywords: string[] }`,
"format": "json",
"images": [image],
"stream": false
}
```
This demonstrates how to use images as well as `format: json` to allow calling another function. The images key takes an array of base64 encoded images. And `format: json` tells the model to output JSON instead of regular text. When using `format: json`, it's important to also say that you expect the output to be JSON in the prompt. Adding the expected schema to the prompt also helps the model understand what you're looking for.
The `main` function calls getkeywords passing it the base64 encoded image. Then it parses the JSON output, formats the keywords into a string, and copies the file to the new name.

View File

@@ -1,42 +0,0 @@
import fs from 'fs';
export async function getkeywords(image: string): Promise<string[]> {
const body = {
"model": "llava:13b-v1.5-q5_K_M",
"prompt": `Describe the image as a collection of keywords. Output in JSON format. Use the following schema: { filename: string, keywords: string[] }`,
"format": "json",
"images": [image],
"stream": false
};
const response = await fetch("http://localhost:11434/api/generate", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(body),
});
const json = await response.json();
const keywords = JSON.parse(json.response);
return keywords?.keywords || [];
}
async function main() {
for (const file of fs.readdirSync(".")) {
if (file.endsWith(".jpg") || file.endsWith(".png")) {
const currentpath = __dirname;
const b64 = fs.readFileSync(`${currentpath}/${file}`, { encoding: 'base64' });
const keywords = await getkeywords(b64.toString());
const fileparts = keywords.map(k => k.replace(/ /g, "_"));
const fileext = file.split(".").pop();
const newfilename = fileparts.join("-") + "." + fileext;
fs.copyFileSync(`${currentpath}/${file}`, `${currentpath}/${newfilename}`);
console.log(`Copied ${file} to ${newfilename}`);
}
}
}
main();

View File

@@ -20,8 +20,6 @@ const char *cuda_lib_paths[] = {
};
#endif
#define CUDA_LOOKUP_SIZE 5
void cuda_init(cuda_init_resp_t *resp) {
nvmlReturn_t ret;
resp->err = NULL;
@@ -32,12 +30,11 @@ void cuda_init(cuda_init_resp_t *resp) {
struct lookup {
char *s;
void **p;
} l[CUDA_LOOKUP_SIZE] = {
} l[4] = {
{"nvmlInit_v2", (void *)&resp->ch.initFn},
{"nvmlShutdown", (void *)&resp->ch.shutdownFn},
{"nvmlDeviceGetHandleByIndex", (void *)&resp->ch.getHandle},
{"nvmlDeviceGetMemoryInfo", (void *)&resp->ch.getMemInfo},
{"nvmlDeviceGetCount_v2", (void *)&resp->ch.getCount},
};
for (i = 0; cuda_lib_paths[i] != NULL && resp->ch.handle == NULL; i++) {
@@ -55,7 +52,7 @@ void cuda_init(cuda_init_resp_t *resp) {
return;
}
for (i = 0; i < CUDA_LOOKUP_SIZE; i++) { // TODO - fix this to use a null terminated list
for (i = 0; i < 4; i++) { // TODO - fix this to use a null terminated list
*l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
if (!l[i].p) {
UNLOAD_LIBRARY(resp->ch.handle);
@@ -92,34 +89,22 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
return;
}
unsigned int devices;
ret = (*h.getCount)(&devices);
// TODO - handle multiple GPUs
ret = (*h.getHandle)(0, &device);
if (ret != NVML_SUCCESS) {
snprintf(buf, buflen, "unable to get device count: %d", ret);
snprintf(buf, buflen, "unable to get device handle: %d", ret);
resp->err = strdup(buf);
return;
}
resp->total = 0;
resp->free = 0;
for (i = 0; i < devices; i++) {
ret = (*h.getHandle)(i, &device);
if (ret != NVML_SUCCESS) {
snprintf(buf, buflen, "unable to get device handle %d: %d", i, ret);
resp->err = strdup(buf);
return;
}
ret = (*h.getMemInfo)(device, &memInfo);
if (ret != NVML_SUCCESS) {
snprintf(buf, buflen, "device memory info lookup failure %d: %d", i, ret);
resp->err = strdup(buf);
return;
}
resp->total += memInfo.total;
resp->free += memInfo.free;
ret = (*h.getMemInfo)(device, &memInfo);
if (ret != NVML_SUCCESS) {
snprintf(buf, buflen, "device memory info lookup failure: %d", ret);
resp->err = strdup(buf);
return;
}
resp->total = memInfo.total;
resp->free = memInfo.free;
return;
}
#endif // __APPLE__

View File

@@ -21,7 +21,6 @@ typedef struct cuda_handle {
nvmlReturn_t (*shutdownFn)(void);
nvmlReturn_t (*getHandle)(unsigned int, nvmlDevice_t *);
nvmlReturn_t (*getMemInfo)(nvmlDevice_t, nvmlMemory_t *);
nvmlReturn_t (*getCount)(unsigned int *);
} cuda_handle_t;
typedef struct cuda_init_resp {

View File

@@ -2,6 +2,7 @@
init_vars() {
LLAMACPP_DIR=../llama.cpp
PATCHES="0001-Expose-callable-API-for-server.patch"
CMAKE_DEFS=""
CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then

View File

@@ -4,6 +4,7 @@ $ErrorActionPreference = "Stop"
function init_vars {
$script:llamacppDir = "../llama.cpp"
$script:patches = @("0001-Expose-callable-API-for-server.patch")
$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-A","x64")
$script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static")
if ($env:CGO_CFLAGS -contains "-g") {

View File

@@ -8,7 +8,7 @@ export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version
mkdir -p dist
for TARGETARCH in amd64 arm64; do
docker build --platform=linux/$TARGETARCH --build-arg=GOFLAGS --build-arg=CGO_CFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
docker build --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS --build-arg=CGO_CFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
docker rm builder-$TARGETARCH