Compare commits

..

6 Commits

Author SHA1 Message Date
Patrick Devine
b8af12ceaf feed the linter 2024-09-17 18:19:31 -07:00
Patrick Devine
6f041ddfa4 allow ctl-j to add a new line + fix multiline bracketed paste 2024-09-17 18:12:55 -07:00
Michael Yang
72962c6e08 Merge pull request #6833 from ollama/mxyng/git-am
make patches git am-able
2024-09-17 16:33:23 -07:00
Michael Yang
7bd7b02712 make patches git am-able
raw diffs can be applied using `git apply` but not with `git am`. git
patches, e.g. through `git format-patch` are both apply-able and am-able
2024-09-17 15:26:40 -07:00
Daniel Hiltgen
8f9ab5e14d CI: dist directories no longer present (#6834)
The new buildx based build no longer leaves the dist/linux-* directories
around, so we don't have to clean them up before uploading.
2024-09-16 17:31:37 -07:00
Daniel Hiltgen
7717bb6a84 CI: clean up naming, fix tagging latest (#6832)
The rocm CI step for RCs was incorrectly tagging them as the latest rocm build.
The multiarch manifest was incorrectly tagged twice (with and without the
prefix "v").  Static windows artifacts weren't being carried between build
jobs.  This also fixes the latest tagging script.
2024-09-16 16:18:41 -07:00
15 changed files with 162 additions and 94 deletions

View File

@@ -104,6 +104,7 @@ jobs:
path: |
build/**/*
build/**/*.a
llm/build/**/*.a
dist/windows-amd64/**
# ROCm generation step
@@ -421,7 +422,7 @@ jobs:
!dist/*-cov
# Container image build
build-linux:
build-container-image:
environment: release
strategy:
matrix:
@@ -459,7 +460,6 @@ jobs:
flavor: |
latest=false
tags: |
type=ref,event=tag
type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
type=semver,pattern={{version}}
- name: Set Version
@@ -503,7 +503,7 @@ jobs:
environment: release
runs-on: linux
needs:
- build-linux
- build-container-image
env:
FINAL_IMAGE_REPO: ollama/ollama
steps:
@@ -526,7 +526,6 @@ jobs:
flavor: |
latest=false
tags: |
type=ref,event=tag
type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
type=semver,pattern={{version}}
- name: Set Version
@@ -551,7 +550,7 @@ jobs:
- name: Inspect image
run: |
docker buildx imagetools inspect ${{ env.FINAL_IMAGE_REPO }}:${{ steps.meta.outputs.version }}
build-linux-rocm:
build-container-image-rocm:
environment: release
runs-on: linux
env:
@@ -570,7 +569,6 @@ jobs:
flavor: |
latest=false
tags: |
type=ref,event=tag
type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
type=semver,pattern={{version}}
- name: Set Version
@@ -592,7 +590,7 @@ jobs:
target: runtime-rocm
build-args: |
GOFLAGS
tags: ${{ env.FINAL_IMAGE_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION}}-rocm,${{ env.FINAL_IMAGE_REPO }}:rocm
tags: ${{ env.FINAL_IMAGE_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION}}-rocm
push: true
# Aggregate all the assets and ship a release
@@ -625,8 +623,6 @@ jobs:
ls -lh dist/
(cd dist; find . -type f | xargs sha256sum > ../sha256sum.txt)
mv sha256sum.txt dist/
mv dist/linux-???64 .
mv dist/linux-amd64-rocm .
cat dist/sha256sum.txt
- name: Create or update Release
run: |

View File

@@ -30,6 +30,11 @@ const (
MultilineSystem
)
const (
scannerPrompt = ">>> "
scannerAltPrompt = "... "
)
func generateInteractive(cmd *cobra.Command, opts runOptions) error {
usage := func() {
fmt.Fprintln(os.Stderr, "Available Commands:")
@@ -111,8 +116,8 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
}
scanner, err := readline.New(readline.Prompt{
Prompt: ">>> ",
AltPrompt: "... ",
Prompt: scannerPrompt,
AltPrompt: scannerAltPrompt,
Placeholder: "Send a message (/? for help)",
AltPlaceholder: `Use """ to end multi-line input`,
})
@@ -144,6 +149,11 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
scanner.Prompt.UseAlt = false
sb.Reset()
continue
case errors.Is(err, readline.ErrNewLineDetected):
sb.WriteString(line)
fmt.Fprintln(&sb)
scanner.Prompt.Prompt = scannerAltPrompt
continue
case err != nil:
return err
@@ -169,7 +179,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
multiline = MultilineNone
scanner.Prompt.UseAlt = false
case strings.HasPrefix(line, `"""`):
case strings.HasPrefix(line, `"""`) && !scanner.Pasting:
line := strings.TrimPrefix(line, `"""`)
line, ok := strings.CutSuffix(line, `"""`)
sb.WriteString(line)
@@ -433,7 +443,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
sb.WriteString(line)
}
if sb.Len() > 0 && multiline == MultilineNone {
if sb.Len() > 0 && strings.TrimSpace(sb.String()) != "" && multiline == MultilineNone {
newMessage := api.Message{Role: "user", Content: sb.String()}
if opts.MultiModal {
@@ -464,6 +474,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
}
sb.Reset()
scanner.Prompt.Prompt = scannerPrompt
}
}
}

View File

@@ -69,22 +69,10 @@ git_module_setup() {
}
apply_patches() {
# Wire up our CMakefile
if ! grep ollama ${LLAMACPP_DIR}/CMakeLists.txt; then
echo 'add_subdirectory(../ext_server ext_server) # ollama' >>${LLAMACPP_DIR}/CMakeLists.txt
fi
if [ -n "$(ls -A ../patches/*.diff)" ]; then
# apply temporary patches until fix is upstream
for patch in ../patches/*.diff; do
for file in $(grep "^+++ " ${patch} | cut -f2 -d' ' | cut -f2- -d/); do
(cd ${LLAMACPP_DIR}; git checkout ${file})
done
done
for patch in ../patches/*.diff; do
(cd ${LLAMACPP_DIR} && git apply ${patch})
done
fi
# apply temporary patches until fix is upstream
for patch in ../patches/*.patch; do
git -c 'user.name=nobody' -c 'user.email=<>' -C ${LLAMACPP_DIR} am ${patch}
done
}
build() {

View File

@@ -83,29 +83,9 @@ function git_module_setup {
}
function apply_patches {
# Wire up our CMakefile
if (!(Select-String -Path "${script:llamacppDir}/CMakeLists.txt" -Pattern 'ollama')) {
Add-Content -Path "${script:llamacppDir}/CMakeLists.txt" -Value 'add_subdirectory(../ext_server ext_server) # ollama'
}
# Apply temporary patches until fix is upstream
$patches = Get-ChildItem "../patches/*.diff"
foreach ($patch in $patches) {
# Extract file paths from the patch file
$filePaths = Get-Content $patch.FullName | Where-Object { $_ -match '^\+\+\+ ' } | ForEach-Object {
$parts = $_ -split ' '
($parts[1] -split '/', 2)[1]
}
# Checkout each file
foreach ($file in $filePaths) {
git -C "${script:llamacppDir}" checkout $file
}
}
# Apply each patch
foreach ($patch in $patches) {
git -C "${script:llamacppDir}" apply $patch.FullName
foreach ($patch in $(Get-ChildItem "../patches/*.patch")) {
git -c 'user.name=nobody' -c 'user.email=<>' -C "${script:llamacppDir}" am $patch.FullName
}
}

View File

@@ -0,0 +1,22 @@
From 8b8d83ffca775840acc5dc700f3b3703e9f5cfe4 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Fri, 23 Aug 2024 11:27:48 -0700
Subject: [PATCH] patch cmakelist
---
CMakeLists.txt | 2 ++
1 file changed, 2 insertions(+)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a3132063..6a2a9912 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -199,3 +199,5 @@ if (LLAMA_BUILD_EXAMPLES)
add_subdirectory(examples)
add_subdirectory(pocs)
endif()
+
+add_subdirectory(../ext_server ext_server) # ollama
--
2.45.2

View File

@@ -1,8 +1,18 @@
From 2cfaa0a04faa9c87ba8f1ac8527eb953e69c6cde Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Mon, 16 Sep 2024 15:53:10 -0700
Subject: [PATCH] 01-load-progress.diff
---
common/common.cpp | 2 ++
common/common.h | 7 +++++++
2 files changed, 9 insertions(+)
diff --git a/common/common.cpp b/common/common.cpp
index 2c05a4d4..927f0e3d 100644
index 9fa18472..48ff41e9 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -2093,6 +2093,8 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params &
@@ -2573,6 +2573,8 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params &
mparams.use_mmap = params.use_mmap;
mparams.use_mlock = params.use_mlock;
mparams.check_tensors = params.check_tensors;
@@ -12,10 +22,10 @@ index 2c05a4d4..927f0e3d 100644
mparams.kv_overrides = NULL;
} else {
diff --git a/common/common.h b/common/common.h
index 65c0ef81..ebca2c77 100644
index cb5e7f6d..d8f043f7 100644
--- a/common/common.h
+++ b/common/common.h
@@ -184,6 +184,13 @@ struct gpt_params {
@@ -204,6 +204,13 @@ struct gpt_params {
std::string mmproj = ""; // path to multimodal projector
std::vector<std::string> image; // path to image file(s)
@@ -29,3 +39,6 @@ index 65c0ef81..ebca2c77 100644
// embedding
bool embedding = false; // get only sentence embedding
int32_t embd_normalize = 2; // normalisation for embendings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
--
2.46.0

View File

@@ -1,5 +1,14 @@
From ba4bba80a744f76ac67b8234451c259a3c5da83b Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Mon, 16 Sep 2024 15:53:11 -0700
Subject: [PATCH] 02-clip-log.diff
---
examples/llava/clip.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index e431c7f7..f077e688 100644
index 9b890571..cb51793d 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -3,6 +3,7 @@
@@ -10,3 +19,6 @@ index e431c7f7..f077e688 100644
#include "log.h"
#include "ggml.h"
#include "ggml-alloc.h"
--
2.46.0

View File

@@ -1,8 +1,17 @@
From e43bfd3f607a6dfcaba2d490d35f412a52e55e30 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Mon, 16 Sep 2024 15:53:12 -0700
Subject: [PATCH] 03-load_exception.diff
---
src/llama.cpp | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/src/llama.cpp b/src/llama.cpp
index 73f52435..58a00fb1 100644
index 88355971..926bb71a 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -7241,7 +7241,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
@@ -8635,7 +8635,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
}
} catch (const std::exception & err) {
LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
@@ -11,7 +20,7 @@ index 73f52435..58a00fb1 100644
}
return 0;
@@ -17564,16 +17564,23 @@ struct llama_model * llama_load_model_from_file(
@@ -18022,16 +18022,23 @@ struct llama_model * llama_load_model_from_file(
}
model->rpc_servers.push_back(servers);
}
@@ -43,3 +52,6 @@ index 73f52435..58a00fb1 100644
}
return model;
--
2.46.0

View File

@@ -1,8 +1,17 @@
From 29411d9a9d2b6a0af6425ffe88498f17f71f7d5d Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Mon, 16 Sep 2024 15:53:12 -0700
Subject: [PATCH] 04-metal.diff
---
ggml/src/ggml-metal.m | 30 +++++++++++++-----------------
1 file changed, 13 insertions(+), 17 deletions(-)
diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m
index 0207b787..b5e9884b 100644
index 91b5e61b..9cfa72ac 100644
--- a/ggml/src/ggml-metal.m
+++ b/ggml/src/ggml-metal.m
@@ -1396,27 +1396,23 @@ static enum ggml_status ggml_metal_graph_compute(
@@ -1734,27 +1734,23 @@ static enum ggml_status ggml_metal_graph_compute(
// to the matrix-vector kernel
int ne11_mm_min = 1;
@@ -43,3 +52,6 @@ index 0207b787..b5e9884b 100644
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
--
2.46.0

View File

@@ -1,5 +1,14 @@
From b298ac8614d1e38da28f760eb1d2ae8af0fbbe62 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Mon, 16 Sep 2024 15:53:13 -0700
Subject: [PATCH] 05-default-pretokenizer.diff
---
src/llama.cpp | 14 +++-----------
1 file changed, 3 insertions(+), 11 deletions(-)
diff --git a/src/llama.cpp b/src/llama.cpp
index 88355971..dd7d41ed 100644
index 926bb71a..d1e959fc 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -6083,16 +6083,7 @@ static void llm_load_vocab(
@@ -30,3 +39,6 @@ index 88355971..dd7d41ed 100644
}
} else if (vocab.type == LLAMA_VOCAB_TYPE_SPM) {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
--
2.46.0

View File

@@ -1,8 +1,17 @@
From c9a6ca9fc039233dee746a4da9705762cd9e515d Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Mon, 16 Sep 2024 15:53:14 -0700
Subject: [PATCH] 06-embeddings.diff
---
src/llama.cpp | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/src/llama.cpp b/src/llama.cpp
index 88355971..d7db689b 100644
index d1e959fc..f79bd782 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -15906,7 +15906,7 @@ static size_t llama_output_reserve(llama_context & lctx, size_t n_outputs) {
@@ -15898,7 +15898,7 @@ static size_t llama_output_reserve(llama_context & lctx, size_t n_outputs) {
const auto n_embd = hparams.n_embd;
// TODO: use a per-batch flag for logits presence instead
@@ -11,7 +20,7 @@ index 88355971..d7db689b 100644
const bool has_embd = cparams.embeddings && (cparams.pooling_type == LLAMA_POOLING_TYPE_NONE);
const size_t logits_size = has_logits ? n_vocab*n_outputs_max : 0;
@@ -16175,20 +16175,23 @@ static int llama_decode_internal(
@@ -16167,20 +16167,23 @@ static int llama_decode_internal(
// no output
res = nullptr;
embd = nullptr;
@@ -41,3 +50,6 @@ index 88355971..d7db689b 100644
// LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs);
ggml_backend_sched_alloc_graph(lctx.sched, gf);
--
2.46.0

View File

@@ -1,8 +1,17 @@
From ae2b188a679c83ce105aa1e823499441dfab3c57 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Mon, 16 Sep 2024 15:53:15 -0700
Subject: [PATCH] 07-clip-unicode.diff
---
examples/llava/clip.cpp | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 95fbe3d0..5a02a6ec 100644
index cb51793d..8716472b 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -32,6 +33,14 @@
@@ -41,6 +41,14 @@
#include <cinttypes>
#include <limits>
@@ -17,7 +26,7 @@ index 95fbe3d0..5a02a6ec 100644
//#define CLIP_DEBUG_FUNCTIONS
// RGB uint8 image
@@ -1055,7 +1064,22 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
@@ -1223,7 +1231,22 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
return nullptr;
}
@@ -40,3 +49,6 @@ index 95fbe3d0..5a02a6ec 100644
if (!fin) {
LOG_TEE("cannot open model file for loading tensors\n");
clip_free(new_clip);
--
2.46.0

View File

@@ -4,7 +4,10 @@ import (
"errors"
)
var ErrInterrupt = errors.New("Interrupt")
var (
ErrInterrupt = errors.New("Interrupt")
ErrNewLineDetected = errors.New("new line detected")
)
type InterruptError struct {
Line []rune

View File

@@ -225,6 +225,9 @@ func (i *Instance) Readline() (string, error) {
buf.MoveToEnd()
fmt.Println()
if r == CharCtrlJ {
return output, ErrNewLineDetected
}
return output, nil
default:
if metaDel {

View File

@@ -2,32 +2,12 @@
set -eu
# We use 2 different image repositories to handle combining architecture images into multiarch manifest
# (The ROCm image is x86 only and is not a multiarch manifest)
# For developers, you can override the DOCKER_ORG to generate multiarch manifests
# DOCKER_ORG=jdoe VERSION=0.1.30 PUSH=1 ./scripts/tag_latest.sh
# DOCKER_ORG=jdoe VERSION=0.1.30 ./scripts/tag_latest.sh
DOCKER_ORG=${DOCKER_ORG:-"ollama"}
RELEASE_IMAGE_REPO=${RELEASE_IMAGE_REPO:-"${DOCKER_ORG}/release"}
FINAL_IMAGE_REPO=${FINAL_IMAGE_REPO:-"${DOCKER_ORG}/ollama"}
# Set PUSH to a non-empty string to trigger push instead of load
PUSH=${PUSH:-""}
echo "Assembling manifest and tagging latest"
docker manifest rm ${FINAL_IMAGE_REPO}:latest || true
docker manifest create ${FINAL_IMAGE_REPO}:latest \
${RELEASE_IMAGE_REPO}:$VERSION-amd64 \
${RELEASE_IMAGE_REPO}:$VERSION-arm64
docker pull ${RELEASE_IMAGE_REPO}:$VERSION-rocm
docker tag ${RELEASE_IMAGE_REPO}:$VERSION-rocm ${FINAL_IMAGE_REPO}:rocm
if [ -n "${PUSH}" ]; then
echo "Pushing latest tags up..."
docker manifest push ${FINAL_IMAGE_REPO}:latest
docker push ${FINAL_IMAGE_REPO}:rocm
else
echo "Not pushing ${FINAL_IMAGE_REPO}:latest and ${FINAL_IMAGE_REPO}:rocm"
fi
echo "Updating ${FINAL_IMAGE_REPO}:latest -> ${FINAL_IMAGE_REPO}:${VERSION}"
docker buildx imagetools create -t ${FINAL_IMAGE_REPO}:latest ${FINAL_IMAGE_REPO}:${VERSION}
echo "Updating ${FINAL_IMAGE_REPO}:rocm -> ${FINAL_IMAGE_REPO}:${VERSION}-rocm"
docker buildx imagetools create -t ${FINAL_IMAGE_REPO}:rocm ${FINAL_IMAGE_REPO}:${VERSION}-rocm