Compare commits
8 Commits
v0.12.4-rc
...
v0.12.4-rc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ae5e0f0889 | ||
|
|
19e6796eac | ||
|
|
33801c1597 | ||
|
|
e4340667e3 | ||
|
|
2fa1e92a99 | ||
|
|
07e36761c3 | ||
|
|
c29fb007c0 | ||
|
|
730ed6e9e1 |
38
.github/workflows/release.yaml
vendored
38
.github/workflows/release.yaml
vendored
@@ -94,7 +94,7 @@ jobs:
|
||||
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
|
||||
rocm-version: '6.2'
|
||||
flags: '-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" -DCMAKE_CXX_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma"'
|
||||
runner_dir: ''
|
||||
runner_dir: 'rocm'
|
||||
runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
|
||||
environment: release
|
||||
env:
|
||||
@@ -163,7 +163,7 @@ jobs:
|
||||
cmake --preset "${{ matrix.preset }}" ${{ matrix.flags }} -DOLLAMA_RUNNER_DIR="${{ matrix.runner_dir }}"
|
||||
cmake --build --parallel --preset "${{ matrix.preset }}"
|
||||
cmake --install build --component "${{ startsWith(matrix.preset, 'CUDA ') && 'CUDA' || startsWith(matrix.preset, 'ROCm ') && 'HIP' || 'CPU' }}" --strip --parallel 8
|
||||
rm -force dist\lib\ollama\rocm\rocblas\library\*gfx906*
|
||||
Remove-Item -Path dist\lib\ollama\rocm\rocblas\library\*gfx906* -ErrorAction SilentlyContinue
|
||||
env:
|
||||
CMAKE_GENERATOR: Ninja
|
||||
- uses: actions/upload-artifact@v4
|
||||
@@ -176,19 +176,19 @@ jobs:
|
||||
matrix:
|
||||
os: [windows]
|
||||
arch: [amd64, arm64]
|
||||
include:
|
||||
- os: windows
|
||||
arch: amd64
|
||||
llvmarch: x86_64
|
||||
- os: windows
|
||||
arch: arm64
|
||||
llvmarch: aarch64
|
||||
runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
|
||||
environment: release
|
||||
needs: [setup-environment]
|
||||
env:
|
||||
GOFLAGS: ${{ needs.setup-environment.outputs.GOFLAGS }}
|
||||
steps:
|
||||
- name: Install AMD64 system dependencies
|
||||
if: matrix.arch == 'amd64'
|
||||
run: |
|
||||
$ErrorActionPreference = "Stop"
|
||||
Start-Process "C:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
|
||||
echo "C:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
- name: Install ARM64 system dependencies
|
||||
if: matrix.arch == 'arm64'
|
||||
run: |
|
||||
@@ -200,15 +200,25 @@ jobs:
|
||||
|
||||
choco install -y --no-progress git gzip
|
||||
echo "C:\Program Files\Git\cmd" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
|
||||
Invoke-WebRequest -Uri "https://github.com/mstorsjo/llvm-mingw/releases/download/20240619/llvm-mingw-20240619-ucrt-aarch64.zip" -OutFile "${{ runner.temp }}\llvm-mingw-ucrt-aarch64.zip"
|
||||
Expand-Archive -Path ${{ runner.temp }}\llvm-mingw-ucrt-aarch64.zip -DestinationPath "C:\Program Files\"
|
||||
$installPath=(Resolve-Path -Path "C:\Program Files\llvm-mingw-*-ucrt-aarch64").path
|
||||
echo $installPath\bin | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
- name: Install clang and gcc-compat
|
||||
run: |
|
||||
$ErrorActionPreference = "Stop"
|
||||
Set-ExecutionPolicy Bypass -Scope Process -Force
|
||||
Invoke-WebRequest -Uri "https://github.com/mstorsjo/llvm-mingw/releases/download/20240619/llvm-mingw-20240619-ucrt-${{ matrix.llvmarch }}.zip" -OutFile "${{ runner.temp }}\llvm-mingw-ucrt.zip"
|
||||
Expand-Archive -Path ${{ runner.temp }}\llvm-mingw-ucrt.zip -DestinationPath "C:\Program Files\"
|
||||
$installPath=(Resolve-Path -Path "C:\Program Files\llvm-mingw-*-ucrt*").path
|
||||
echo "$installPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
- name: Verify gcc is actually clang
|
||||
run: |
|
||||
gcc -v
|
||||
if (((& gcc -v 2>&1) -join "`n") -notmatch 'clang') {
|
||||
echo "ERROR: GCC must be clang for proper utf16 handling"
|
||||
exit 1
|
||||
}
|
||||
- run: |
|
||||
go build -o dist/${{ matrix.os }}-${{ matrix.arch }}/ .
|
||||
- uses: actions/upload-artifact@v4
|
||||
|
||||
@@ -330,6 +330,9 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
|
||||
}
|
||||
}
|
||||
|
||||
// Apply any iGPU workarounds
|
||||
iGPUWorkarounds(devices)
|
||||
|
||||
return devices
|
||||
}
|
||||
|
||||
@@ -540,3 +543,32 @@ func GetDevicesFromRunner(ctx context.Context, runner BaseRunner) ([]ml.DeviceIn
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func iGPUWorkarounds(devices []ml.DeviceInfo) {
|
||||
// short circuit if we have no iGPUs
|
||||
anyiGPU := false
|
||||
for i := range devices {
|
||||
if devices[i].Integrated {
|
||||
anyiGPU = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !anyiGPU {
|
||||
return
|
||||
}
|
||||
|
||||
memInfo, err := GetCPUMem()
|
||||
if err != nil {
|
||||
slog.Debug("failed to fetch system memory information for iGPU", "error", err)
|
||||
return
|
||||
}
|
||||
for i := range devices {
|
||||
if !devices[i].Integrated {
|
||||
continue
|
||||
}
|
||||
// NVIDIA iGPUs return useless free VRAM data which ignores system buff/cache
|
||||
if devices[i].Library == "CUDA" {
|
||||
devices[i].FreeMemory = memInfo.FreeMemory
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -870,11 +870,6 @@ func (f GGML) SupportsKVCacheType(cacheType string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
if arch := f.KV().Architecture(); slices.Contains([]string{"gptoss", "gpt-oss"}, arch) {
|
||||
// gpt-oss uses attention with sinks which does not support quantized cache types
|
||||
slog.Warn("model only supports non-quantized cache types", "model", arch)
|
||||
return false
|
||||
}
|
||||
return slices.Contains([]string{"q8_0", "q4_0"}, cacheType)
|
||||
}
|
||||
|
||||
|
||||
@@ -150,7 +150,9 @@ func (moe *sparse) Moe(ctx ml.Context, hiddenStates, topKIndices, topKWeights ml
|
||||
}
|
||||
|
||||
func (moe *sparse) topKIndices(ctx ml.Context, scores ml.Tensor, opts *Options) ml.Tensor {
|
||||
scores = scores.Add(ctx, moe.ExpProbsBias)
|
||||
if moe.ExpProbsBias != nil {
|
||||
scores = scores.Add(ctx, moe.ExpProbsBias)
|
||||
}
|
||||
topKIndices := scores.TopK(ctx, opts.numExpertsUsed)
|
||||
return topKIndices
|
||||
}
|
||||
|
||||
@@ -154,24 +154,55 @@ func TestTemplate(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestParse(t *testing.T) {
|
||||
cases := []struct {
|
||||
validCases := []struct {
|
||||
name string
|
||||
template string
|
||||
vars []string
|
||||
}{
|
||||
{"{{ .Prompt }}", []string{"prompt", "response"}},
|
||||
{"{{ .System }} {{ .Prompt }}", []string{"prompt", "response", "system"}},
|
||||
{"{{ .System }} {{ .Prompt }} {{ .Response }}", []string{"prompt", "response", "system"}},
|
||||
{"{{ with .Tools }}{{ . }}{{ end }} {{ .System }} {{ .Prompt }}", []string{"prompt", "response", "system", "tools"}},
|
||||
{"{{ range .Messages }}{{ .Role }} {{ .Content }}{{ end }}", []string{"content", "messages", "role"}},
|
||||
{"{{ range .Messages }}{{ if eq .Role \"tool\" }}Tool Result: {{ .ToolName }} {{ .Content }}{{ end }}{{ end }}", []string{"content", "messages", "role", "toolname"}},
|
||||
{`{{- range .Messages }}
|
||||
{
|
||||
name: "PromptOnly",
|
||||
template: "{{ .Prompt }}",
|
||||
vars: []string{"prompt", "response"},
|
||||
},
|
||||
{
|
||||
name: "SystemAndPrompt",
|
||||
template: "{{ .System }} {{ .Prompt }}",
|
||||
vars: []string{"prompt", "response", "system"},
|
||||
},
|
||||
{
|
||||
name: "PromptResponseSystem",
|
||||
template: "{{ .System }} {{ .Prompt }} {{ .Response }}",
|
||||
vars: []string{"prompt", "response", "system"},
|
||||
},
|
||||
{
|
||||
name: "ToolsBlock",
|
||||
template: "{{ with .Tools }}{{ . }}{{ end }} {{ .System }} {{ .Prompt }}",
|
||||
vars: []string{"prompt", "response", "system", "tools"},
|
||||
},
|
||||
{
|
||||
name: "MessagesRange",
|
||||
template: "{{ range .Messages }}{{ .Role }} {{ .Content }}{{ end }}",
|
||||
vars: []string{"content", "messages", "role"},
|
||||
},
|
||||
{
|
||||
name: "ToolResultConditional",
|
||||
template: "{{ range .Messages }}{{ if eq .Role \"tool\" }}Tool Result: {{ .ToolName }} {{ .Content }}{{ end }}{{ end }}",
|
||||
vars: []string{"content", "messages", "role", "toolname"},
|
||||
},
|
||||
{
|
||||
name: "MultilineSystemUserAssistant",
|
||||
template: `{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}SYSTEM:
|
||||
{{- else if eq .Role "user" }}USER:
|
||||
{{- else if eq .Role "assistant" }}ASSISTANT:
|
||||
{{- else if eq .Role "tool" }}TOOL:
|
||||
{{- else if eq .Role "tool" }}TOOL:
|
||||
{{- end }} {{ .Content }}
|
||||
{{- end }}`, []string{"content", "messages", "role"}},
|
||||
{`{{- if .Messages }}
|
||||
{{- end }}`,
|
||||
vars: []string{"content", "messages", "role"},
|
||||
},
|
||||
{
|
||||
name: "ChatMLLike",
|
||||
template: `{{- if .Messages }}
|
||||
{{- range .Messages }}<|im_start|>{{ .Role }}
|
||||
{{ .Content }}<|im_end|>
|
||||
{{ end }}<|im_start|>assistant
|
||||
@@ -182,22 +213,60 @@ func TestParse(t *testing.T) {
|
||||
{{ .Prompt }}<|im_end|>
|
||||
{{ end }}<|im_start|>assistant
|
||||
{{ .Response }}<|im_end|>
|
||||
{{- end -}}`, []string{"content", "messages", "prompt", "response", "role", "system"}},
|
||||
{{- end -}}`,
|
||||
vars: []string{"content", "messages", "prompt", "response", "role", "system"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run("", func(t *testing.T) {
|
||||
for _, tt := range validCases {
|
||||
tt := tt
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tmpl, err := Parse(tt.template)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
t.Fatalf("Parse returned unexpected error: %v", err)
|
||||
}
|
||||
|
||||
v, err := tmpl.Vars()
|
||||
gotVars, err := tmpl.Vars()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
t.Fatalf("Vars returned unexpected error: %v", err)
|
||||
}
|
||||
if diff := cmp.Diff(v, tt.vars); diff != "" {
|
||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||
|
||||
if diff := cmp.Diff(gotVars, tt.vars); diff != "" {
|
||||
t.Errorf("Vars mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseError(t *testing.T) {
|
||||
invalidCases := []struct {
|
||||
name string
|
||||
template string
|
||||
errorStr string
|
||||
}{
|
||||
{
|
||||
"TemplateNotClosed",
|
||||
"{{ .Prompt ",
|
||||
"unclosed action",
|
||||
},
|
||||
{
|
||||
"Template",
|
||||
`{{define "x"}}{{template "x"}}{{end}}{{template "x"}}`,
|
||||
"undefined template specified",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range invalidCases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
_, err := Parse(tt.template)
|
||||
if err == nil {
|
||||
t.Fatalf("expected Parse to return an error for an invalid template, got nil")
|
||||
}
|
||||
|
||||
if !strings.Contains(strings.ToLower(err.Error()), strings.ToLower(tt.errorStr)) {
|
||||
t.Errorf("unexpected error message.\n got: %q\n want substring (case‑insensitive): %q", err.Error(), tt.errorStr)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user