Compare commits
99 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a62817d677 | ||
|
|
30dd74930d | ||
|
|
ce78e400c2 | ||
|
|
edeea1d6f0 | ||
|
|
450400107b | ||
|
|
d29cd4c2ed | ||
|
|
a84c05cf91 | ||
|
|
e3d7f32af7 | ||
|
|
3a75e74e34 | ||
|
|
237dccba1e | ||
|
|
b3f75fc812 | ||
|
|
8200c371ae | ||
|
|
0a8d6ea86d | ||
|
|
8e1050f366 | ||
|
|
eda8a32a09 | ||
|
|
a0a40aa20c | ||
|
|
2697d7f5aa | ||
|
|
1f32276178 | ||
|
|
4c4fe3f87f | ||
|
|
feedf49c71 | ||
|
|
8b00a415ab | ||
|
|
01b80e9ffc | ||
|
|
bd5e432630 | ||
|
|
aec77d6a05 | ||
|
|
6ffb5cb017 | ||
|
|
f7e3b9190f | ||
|
|
980dd15f81 | ||
|
|
01d544d373 | ||
|
|
1dc3ef3aa9 | ||
|
|
8aac22438e | ||
|
|
15c2d8fe14 | ||
|
|
25906d72d1 | ||
|
|
023451ce47 | ||
|
|
9b53e39d8e | ||
|
|
97fae2df95 | ||
|
|
160d9d4900 | ||
|
|
d4e6407464 | ||
|
|
b7f7d8cd15 | ||
|
|
2fa1db4345 | ||
|
|
71b0945fc6 | ||
|
|
5bca2e60a7 | ||
|
|
67472e0e89 | ||
|
|
e9aa5117c4 | ||
|
|
2473bdba5e | ||
|
|
7d1c0047fa | ||
|
|
7b61eba471 | ||
|
|
7edaf6e7e8 | ||
|
|
97ec8cfd4e | ||
|
|
5b3a21b578 | ||
|
|
ad0c19dde4 | ||
|
|
69eb06c40e | ||
|
|
1829fb61bd | ||
|
|
ce67706037 | ||
|
|
685a53534b | ||
|
|
de4fc29773 | ||
|
|
e04c7012c2 | ||
|
|
d4a7216c82 | ||
|
|
a4fdd03c3b | ||
|
|
fc85f50a2b | ||
|
|
86b907f82a | ||
|
|
10d49bce70 | ||
|
|
7ed367419e | ||
|
|
50ee8b5f56 | ||
|
|
03bdac0595 | ||
|
|
f457d63400 | ||
|
|
04210aa6dd | ||
|
|
43f9d92008 | ||
|
|
ed6c8bfe57 | ||
|
|
39f2bc6bfc | ||
|
|
b73b0940ef | ||
|
|
6a07344786 | ||
|
|
8b920f35a4 | ||
|
|
4221e39867 | ||
|
|
a091fadfda | ||
|
|
77ccbf04dc | ||
|
|
4addf6b587 | ||
|
|
85c7f11170 | ||
|
|
df3802a65f | ||
|
|
b732beba6a | ||
|
|
ce1fb4447e | ||
|
|
558a54b098 | ||
|
|
ed52833bb1 | ||
|
|
6f133a0bdd | ||
|
|
f561eecfb8 | ||
|
|
ff7c9060ec | ||
|
|
0ff42e84b0 | ||
|
|
8a9f946ca7 | ||
|
|
3b5210548e | ||
|
|
b0c216584c | ||
|
|
49a5483139 | ||
|
|
6bc5c13758 | ||
|
|
3e614260af | ||
|
|
d87b4a488e | ||
|
|
d8e2664c33 | ||
|
|
eafc607abb | ||
|
|
781fc2d576 | ||
|
|
df993fa37b | ||
|
|
5e9db9fb0b | ||
|
|
6b252918fb |
2
.gitattributes
vendored
2
.gitattributes
vendored
@@ -1 +1,3 @@
|
||||
llm/ext_server/* linguist-vendored
|
||||
* text=auto
|
||||
*.go text eol=lf
|
||||
|
||||
10
.github/workflows/release.yaml
vendored
10
.github/workflows/release.yaml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
security set-keychain-settings -lut 3600 build.keychain
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: "stable"
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- name: Build Darwin
|
||||
env:
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
write-host "plugin installed"
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: "stable"
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- run: go get ./...
|
||||
- run: |
|
||||
@@ -141,7 +141,7 @@ jobs:
|
||||
write-host "plugin installed"
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: "stable"
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- name: 'Install ROCm'
|
||||
run: |
|
||||
@@ -218,7 +218,7 @@ jobs:
|
||||
write-host "plugin installed"
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: "stable"
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- name: 'Install CUDA'
|
||||
run: |
|
||||
@@ -306,7 +306,7 @@ jobs:
|
||||
write-host "plugin installed"
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: "stable"
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- run: go get
|
||||
- uses: actions/download-artifact@v4
|
||||
|
||||
12
.github/workflows/test.yaml
vendored
12
.github/workflows/test.yaml
vendored
@@ -63,7 +63,7 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: "stable"
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- run: go get ./...
|
||||
- run: |
|
||||
@@ -163,7 +163,7 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: "stable"
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- name: 'Install ROCm'
|
||||
run: |
|
||||
@@ -200,7 +200,7 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: "stable"
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- name: 'Install CUDA'
|
||||
run: |
|
||||
@@ -255,7 +255,7 @@ jobs:
|
||||
submodules: recursive
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: "stable"
|
||||
go-version-file: go.mod
|
||||
cache: false
|
||||
- run: |
|
||||
case ${{ matrix.arch }} in
|
||||
@@ -273,7 +273,7 @@ jobs:
|
||||
if: ${{ startsWith(matrix.os, 'macos-') }}
|
||||
- uses: golangci/golangci-lint-action@v6
|
||||
with:
|
||||
args: --timeout 8m0s -v ${{ startsWith(matrix.os, 'windows-') && '' || '--disable gofmt --disable goimports' }}
|
||||
args: --timeout 8m0s -v
|
||||
test:
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -297,7 +297,7 @@ jobs:
|
||||
submodules: recursive
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: "stable"
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- run: |
|
||||
case ${{ matrix.arch }} in
|
||||
|
||||
@@ -7,22 +7,31 @@ linters:
|
||||
- bodyclose
|
||||
- containedctx
|
||||
- contextcheck
|
||||
- errcheck
|
||||
- exportloopref
|
||||
- gci
|
||||
- gocheckcompilerdirectives
|
||||
# conditionally enable this on linux/macos
|
||||
# - gofmt
|
||||
# - goimports
|
||||
- gofmt
|
||||
- gofumpt
|
||||
- gosimple
|
||||
- govet
|
||||
- ineffassign
|
||||
- intrange
|
||||
- makezero
|
||||
- misspell
|
||||
- nilerr
|
||||
- nolintlint
|
||||
- nosprintfhostport
|
||||
- testifylint
|
||||
- staticcheck
|
||||
- tenv
|
||||
- unconvert
|
||||
- unused
|
||||
- usestdlibvars
|
||||
- wastedassign
|
||||
- whitespace
|
||||
- usestdlibvars
|
||||
linters-settings:
|
||||
gci:
|
||||
sections: [standard, default, localmodule]
|
||||
severity:
|
||||
default-severity: error
|
||||
rules:
|
||||
|
||||
37
CONTRIBUTING.md
Normal file
37
CONTRIBUTING.md
Normal file
@@ -0,0 +1,37 @@
|
||||
# Contributing to Ollama
|
||||
|
||||
Thank you for your interest in contributing to Ollama! Here are a few guidelines to help get you started.
|
||||
|
||||
## Set up
|
||||
|
||||
See the [development documentation](./docs/development.md) for instructions on how to build and run Ollama locally.
|
||||
|
||||
## Pull requests
|
||||
|
||||
### Ideal issues
|
||||
|
||||
* [Bugs](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Abug): issues where Ollama stops working or where it results in an unexpected error.
|
||||
* [Performance](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Aperformance): issues to make Ollama faster at model inference, downloading or uploading.
|
||||
* [Security](https://github.com/ollama/ollama/blob/main/SECURITY.md): issues that could lead to a security vulnerability. As mentioned in [SECURITY.md](https://github.com/ollama/ollama/blob/main/SECURITY.md), please do not disclose security vulnerabilities publicly.
|
||||
|
||||
### Issues that are harder to review
|
||||
|
||||
* New features: new features (e.g. API fields, environment variables) add surface area to Ollama and make it harder to maintain in the long run as they cannot be removed without potentially breaking users in the future.
|
||||
* Refactoring: large code improvements are important, but can be harder or take longer to review and merge.
|
||||
* Documentation: small updates to fill in or dorrect missing documentation is helpful, however large documentation additions can be hard to maintain over time.
|
||||
|
||||
### Issues that may not be accepted
|
||||
|
||||
* Changes that break backwards compatibility in Ollama's API (including the OpenAI-compatible API)
|
||||
* Changes that add significant friction to the user experience
|
||||
* Changes that create a large future maintenance burden for maintainers and contributors
|
||||
|
||||
### Best practices
|
||||
|
||||
* Commit messages: please leave both a title and a description in your commit messages. The title should be a short summary of the changes, with a leading word that explains the section of the code being changed (e.g. `api: fix parsing of prompt field`) . In the description, leave a short 2-3 sentences that explain more about the change and its impact.
|
||||
* Tests: please add test coverage to changes where possible.
|
||||
* Minimize dependencies: avoid adding new dependencies unless absolutely necessary.
|
||||
|
||||
## Need help?
|
||||
|
||||
If you need help with anything, feel free to reach out to us on our [Discord server](https://discord.gg/ollama).
|
||||
@@ -54,6 +54,7 @@ Here are some example models that can be downloaded:
|
||||
| Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` |
|
||||
| Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
|
||||
| Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
|
||||
| Gemma 2 | 2B | 1.6GB | `ollama run gemma2:2b` |
|
||||
| Gemma 2 | 9B | 5.5GB | `ollama run gemma2` |
|
||||
| Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` |
|
||||
| Mistral | 7B | 4.1GB | `ollama run mistral` |
|
||||
@@ -300,6 +301,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
|
||||
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
|
||||
- [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
|
||||
- [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)
|
||||
|
||||
### Terminal
|
||||
|
||||
@@ -323,6 +325,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [tlm](https://github.com/yusufcanb/tlm)
|
||||
- [podman-ollama](https://github.com/ericcurtin/podman-ollama)
|
||||
- [gollama](https://github.com/sammcj/gollama)
|
||||
- [Ollama eBook Summary](https://github.com/cognitivetech/ollama-ebook-summary/)
|
||||
|
||||
### Database
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -172,7 +173,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
|
||||
}
|
||||
|
||||
if errorResponse.Error != "" {
|
||||
return fmt.Errorf(errorResponse.Error)
|
||||
return errors.New(errorResponse.Error)
|
||||
}
|
||||
|
||||
if response.StatusCode >= http.StatusBadRequest {
|
||||
@@ -297,7 +298,7 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) {
|
||||
return &lr, nil
|
||||
}
|
||||
|
||||
// List running models.
|
||||
// ListRunning lists running models.
|
||||
func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
|
||||
var lr ProcessResponse
|
||||
if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
|
||||
@@ -332,7 +333,7 @@ func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, err
|
||||
return &resp, nil
|
||||
}
|
||||
|
||||
// Hearbeat checks if the server has started and is responsive; if yes, it
|
||||
// Heartbeat checks if the server has started and is responsive; if yes, it
|
||||
// returns nil, otherwise an error.
|
||||
func (c *Client) Heartbeat(ctx context.Context) error {
|
||||
if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {
|
||||
|
||||
@@ -231,7 +231,6 @@ type Options struct {
|
||||
|
||||
// Runner options which must be set when the model is loaded into memory
|
||||
type Runner struct {
|
||||
UseNUMA bool `json:"numa,omitempty"`
|
||||
NumCtx int `json:"num_ctx,omitempty"`
|
||||
NumBatch int `json:"num_batch,omitempty"`
|
||||
NumGPU int `json:"num_gpu,omitempty"`
|
||||
@@ -505,7 +504,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
|
||||
for key, val := range m {
|
||||
opt, ok := jsonOpts[key]
|
||||
if !ok {
|
||||
slog.Warn("invalid option provided", "option", opt.Name)
|
||||
slog.Warn("invalid option provided", "option", key)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -615,7 +614,6 @@ func DefaultOptions() Options {
|
||||
F16KV: true,
|
||||
UseMLock: false,
|
||||
UseMMap: nil,
|
||||
UseNUMA: false,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ package api
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"errors"
|
||||
"math"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -192,7 +192,7 @@ func TestUseMmapFormatParams(t *testing.T) {
|
||||
"use_mmap": {"foo"},
|
||||
},
|
||||
exp: nil,
|
||||
err: fmt.Errorf("invalid bool value [foo]"),
|
||||
err: errors.New("invalid bool value [foo]"),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
|
||||
package lifecycle
|
||||
|
||||
import "fmt"
|
||||
import "errors"
|
||||
|
||||
func GetStarted() error {
|
||||
return fmt.Errorf("GetStarted not implemented")
|
||||
return errors.New("not implemented")
|
||||
}
|
||||
|
||||
@@ -34,7 +34,6 @@ func GetStarted() error {
|
||||
Sys: &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
|
||||
}
|
||||
proc, err := os.StartProcess(args[0], args, attrs)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to start getting started shell %w", err)
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ func InitLogging() {
|
||||
// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
|
||||
} else {
|
||||
rotateLogs(AppLogFile)
|
||||
logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
|
||||
logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
|
||||
if err != nil {
|
||||
slog.Error(fmt.Sprintf("failed to create server log %v", err))
|
||||
return
|
||||
|
||||
@@ -5,5 +5,5 @@ package lifecycle
|
||||
import "log/slog"
|
||||
|
||||
func ShowLogs() {
|
||||
slog.Warn("ShowLogs not yet implemented")
|
||||
slog.Warn("not implemented")
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ func TestRotateLogs(t *testing.T) {
|
||||
// No log exists
|
||||
rotateLogs(logFile)
|
||||
|
||||
require.NoError(t, os.WriteFile(logFile, []byte("1"), 0644))
|
||||
require.NoError(t, os.WriteFile(logFile, []byte("1"), 0o644))
|
||||
assert.FileExists(t, logFile)
|
||||
// First rotation
|
||||
rotateLogs(logFile)
|
||||
@@ -32,7 +32,7 @@ func TestRotateLogs(t *testing.T) {
|
||||
assert.NoFileExists(t, logFile)
|
||||
|
||||
for i := 2; i <= LogRotationCount+1; i++ {
|
||||
require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0644))
|
||||
require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0o644))
|
||||
assert.FileExists(t, logFile)
|
||||
rotateLogs(logFile)
|
||||
assert.NoFileExists(t, logFile)
|
||||
|
||||
@@ -55,7 +55,7 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
|
||||
}
|
||||
|
||||
rotateLogs(ServerLogFile)
|
||||
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
|
||||
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create server log: %w", err)
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -46,7 +47,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
|
||||
query.Add("os", runtime.GOOS)
|
||||
query.Add("arch", runtime.GOARCH)
|
||||
query.Add("version", version.Version)
|
||||
query.Add("ts", fmt.Sprintf("%d", time.Now().Unix()))
|
||||
query.Add("ts", strconv.FormatInt(time.Now().Unix(), 10))
|
||||
|
||||
nonce, err := auth.NewNonce(rand.Reader, 16)
|
||||
if err != nil {
|
||||
|
||||
@@ -4,9 +4,9 @@ package lifecycle
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"errors"
|
||||
)
|
||||
|
||||
func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
||||
return fmt.Errorf("DoUpgrade not yet implemented")
|
||||
return errors.New("not implemented")
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package lifecycle
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
@@ -15,7 +16,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
||||
return fmt.Errorf("failed to lookup downloads: %s", err)
|
||||
}
|
||||
if len(files) == 0 {
|
||||
return fmt.Errorf("no update downloads found")
|
||||
return errors.New("no update downloads found")
|
||||
} else if len(files) > 1 {
|
||||
// Shouldn't happen
|
||||
slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
|
||||
@@ -64,7 +65,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
||||
}
|
||||
} else {
|
||||
// TODO - some details about why it didn't start, or is this a pedantic error case?
|
||||
return fmt.Errorf("installer process did not start")
|
||||
return errors.New("installer process did not start")
|
||||
}
|
||||
|
||||
// TODO should we linger for a moment and check to make sure it's actually running by checking the pid?
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
package tray
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"errors"
|
||||
|
||||
"github.com/ollama/ollama/app/tray/commontray"
|
||||
)
|
||||
|
||||
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
|
||||
return nil, fmt.Errorf("NOT IMPLEMENTED YET")
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
@@ -11,9 +11,7 @@ import (
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
var (
|
||||
quitOnce sync.Once
|
||||
)
|
||||
var quitOnce sync.Once
|
||||
|
||||
func (t *winTray) Run() {
|
||||
nativeLoop()
|
||||
|
||||
@@ -11,12 +11,12 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
updatAvailableMenuID = 1
|
||||
updateMenuID = updatAvailableMenuID + 1
|
||||
separatorMenuID = updateMenuID + 1
|
||||
diagLogsMenuID = separatorMenuID + 1
|
||||
diagSeparatorMenuID = diagLogsMenuID + 1
|
||||
quitMenuID = diagSeparatorMenuID + 1
|
||||
updateAvailableMenuID = 1
|
||||
updateMenuID = updateAvailableMenuID + 1
|
||||
separatorMenuID = updateMenuID + 1
|
||||
diagLogsMenuID = separatorMenuID + 1
|
||||
diagSeparatorMenuID = diagLogsMenuID + 1
|
||||
quitMenuID = diagSeparatorMenuID + 1
|
||||
)
|
||||
|
||||
func (t *winTray) initMenus() error {
|
||||
@@ -35,7 +35,7 @@ func (t *winTray) initMenus() error {
|
||||
func (t *winTray) UpdateAvailable(ver string) error {
|
||||
if !t.updateNotified {
|
||||
slog.Debug("updating menu and sending notification for new update")
|
||||
if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
|
||||
if err := t.addOrUpdateMenuItem(updateAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
|
||||
return fmt.Errorf("unable to create menu entries %w", err)
|
||||
}
|
||||
if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {
|
||||
|
||||
@@ -11,10 +11,12 @@ import (
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"sync"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ollama/ollama/app/tray/commontray"
|
||||
"golang.org/x/sys/windows"
|
||||
|
||||
"github.com/ollama/ollama/app/tray/commontray"
|
||||
)
|
||||
|
||||
// Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
|
||||
@@ -414,7 +416,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
|
||||
iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)
|
||||
|
||||
if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
|
||||
if err := os.WriteFile(iconFilePath, iconBytes, 0644); err != nil {
|
||||
if err := os.WriteFile(iconFilePath, iconBytes, 0o644); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
@@ -432,7 +434,12 @@ func (t *winTray) setIcon(src string) error {
|
||||
t.muNID.Lock()
|
||||
defer t.muNID.Unlock()
|
||||
t.nid.Icon = h
|
||||
t.nid.Flags |= NIF_ICON
|
||||
t.nid.Flags |= NIF_ICON | NIF_TIP
|
||||
if toolTipUTF16, err := syscall.UTF16FromString(commontray.ToolTip); err == nil {
|
||||
copy(t.nid.Tip[:], toolTipUTF16)
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
t.nid.Size = uint32(unsafe.Sizeof(*t.nid))
|
||||
|
||||
return t.nid.modify()
|
||||
|
||||
@@ -61,6 +61,7 @@ const (
|
||||
MIIM_SUBMENU = 0x00000004
|
||||
MIM_APPLYTOSUBMENUS = 0x80000000
|
||||
NIF_ICON = 0x00000002
|
||||
NIF_TIP = 0x00000004
|
||||
NIF_INFO = 0x00000010
|
||||
NIF_MESSAGE = 0x00000001
|
||||
SW_HIDE = 0
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
@@ -78,7 +79,7 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
|
||||
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
|
||||
parts := bytes.Split(publicKey, []byte(" "))
|
||||
if len(parts) < 2 {
|
||||
return "", fmt.Errorf("malformed public key")
|
||||
return "", errors.New("malformed public key")
|
||||
}
|
||||
|
||||
signedData, err := privateKey.Sign(rand.Reader, bts)
|
||||
|
||||
49
cmd/cmd.go
49
cmd/cmd.go
@@ -22,6 +22,7 @@ import (
|
||||
"runtime"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
@@ -78,6 +79,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
status := "transferring model data"
|
||||
spinner := progress.NewSpinner(status)
|
||||
p.Add(status, spinner)
|
||||
defer p.Stop()
|
||||
|
||||
for i := range modelfile.Commands {
|
||||
switch modelfile.Commands[i].Name {
|
||||
@@ -112,7 +114,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
path = tempfile
|
||||
}
|
||||
|
||||
digest, err := createBlob(cmd, client, path)
|
||||
digest, err := createBlob(cmd, client, path, spinner)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -263,13 +265,20 @@ func tempZipFiles(path string) (string, error) {
|
||||
return tempfile.Name(), nil
|
||||
}
|
||||
|
||||
func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
|
||||
func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *progress.Spinner) (string, error) {
|
||||
bin, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer bin.Close()
|
||||
|
||||
// Get file info to retrieve the size
|
||||
fileInfo, err := bin.Stat()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
fileSize := fileInfo.Size()
|
||||
|
||||
hash := sha256.New()
|
||||
if _, err := io.Copy(hash, bin); err != nil {
|
||||
return "", err
|
||||
@@ -279,13 +288,43 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, er
|
||||
return "", err
|
||||
}
|
||||
|
||||
var pw progressWriter
|
||||
status := "transferring model data 0%"
|
||||
spinner.SetMessage(status)
|
||||
|
||||
done := make(chan struct{})
|
||||
defer close(done)
|
||||
|
||||
go func() {
|
||||
ticker := time.NewTicker(60 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
spinner.SetMessage(fmt.Sprintf("transferring model data %d%%", int(100*pw.n.Load()/fileSize)))
|
||||
case <-done:
|
||||
spinner.SetMessage("transferring model data 100%")
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
|
||||
if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
|
||||
if err = client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return digest, nil
|
||||
}
|
||||
|
||||
type progressWriter struct {
|
||||
n atomic.Int64
|
||||
}
|
||||
|
||||
func (w *progressWriter) Write(p []byte) (n int, err error) {
|
||||
w.n.Add(int64(len(p)))
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
func RunHandler(cmd *cobra.Command, args []string) error {
|
||||
interactive := true
|
||||
|
||||
@@ -1086,7 +1125,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func RunServer(cmd *cobra.Command, _ []string) error {
|
||||
func RunServer(_ *cobra.Command, _ []string) error {
|
||||
if err := initializeKeypair(); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1160,7 +1199,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
|
||||
return err
|
||||
}
|
||||
if err := startApp(cmd.Context(), client); err != nil {
|
||||
return fmt.Errorf("could not connect to ollama app, is it running?")
|
||||
return errors.New("could not connect to ollama app, is it running?")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -604,7 +604,7 @@ func getImageData(filePath string) ([]byte, error) {
|
||||
// Check if the file size exceeds 100MB
|
||||
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
|
||||
if info.Size() > maxSize {
|
||||
return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
|
||||
return nil, errors.New("file size exceeds maximum limit (100MB)")
|
||||
}
|
||||
|
||||
buf = make([]byte, info.Size())
|
||||
|
||||
@@ -2,7 +2,7 @@ package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"errors"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
@@ -20,7 +20,7 @@ func startApp(ctx context.Context, client *api.Client) error {
|
||||
return err
|
||||
}
|
||||
if !strings.Contains(link, "Ollama.app") {
|
||||
return fmt.Errorf("could not find ollama app")
|
||||
return errors.New("could not find ollama app")
|
||||
}
|
||||
path := strings.Split(link, "Ollama.app")
|
||||
if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
|
||||
|
||||
@@ -4,11 +4,11 @@ package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"errors"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func startApp(ctx context.Context, client *api.Client) error {
|
||||
return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it")
|
||||
return errors.New("could not connect to ollama server, run 'ollama serve' to start it")
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ func startApp(ctx context.Context, client *api.Client) error {
|
||||
// Finally look in the path
|
||||
appExe, err = exec.LookPath(AppName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not locate ollama app")
|
||||
return errors.New("could not locate ollama app")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,200 +1,128 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"google.golang.org/protobuf/proto"
|
||||
|
||||
"github.com/ollama/ollama/convert/sentencepiece"
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
const (
|
||||
_ int32 = iota
|
||||
tokenTypeNormal
|
||||
tokenTypeUnknown
|
||||
tokenTypeControl
|
||||
tokenTypeUserDefined
|
||||
tokenTypeUnused
|
||||
tokenTypeByte
|
||||
)
|
||||
|
||||
type Params struct {
|
||||
Architectures []string `json:"architectures"`
|
||||
VocabSize int `json:"vocab_size"`
|
||||
HiddenSize int `json:"hidden_size"` // n_embd
|
||||
HiddenLayers int `json:"num_hidden_layers"` // n_layer
|
||||
ContextSize int `json:"max_position_embeddings"`
|
||||
IntermediateSize int `json:"intermediate_size"`
|
||||
AttentionHeads int `json:"num_attention_heads"` // n_head
|
||||
KeyValHeads int `json:"num_key_value_heads"`
|
||||
NormEPS float64 `json:"rms_norm_eps"`
|
||||
BoSTokenID int `json:"bos_token_id"`
|
||||
EoSTokenID int `json:"eos_token_id"`
|
||||
HeadDimension int `json:"head_dim"`
|
||||
PaddingTokenID int `json:"pad_token_id"`
|
||||
RopeFrequencyBase float64 `json:"rope_theta"`
|
||||
|
||||
Experts int `json:"num_local_experts"`
|
||||
ExpertsUsed int `json:"num_experts_per_tok"`
|
||||
|
||||
PreTokenizer string
|
||||
|
||||
ByteOrder
|
||||
type Parameters struct {
|
||||
Architectures []string `json:"architectures"`
|
||||
VocabSize uint32 `json:"vocab_size"`
|
||||
}
|
||||
|
||||
type ByteOrder interface {
|
||||
binary.ByteOrder
|
||||
binary.AppendByteOrder
|
||||
func (Parameters) KV(t *Tokenizer) llm.KV {
|
||||
kv := llm.KV{
|
||||
"general.file_type": uint32(1),
|
||||
"general.quantization_version": uint32(2),
|
||||
"tokenizer.ggml.pre": t.Pre,
|
||||
"tokenizer.ggml.model": t.Vocabulary.Model,
|
||||
"tokenizer.ggml.tokens": t.Vocabulary.Tokens,
|
||||
"tokenizer.ggml.scores": t.Vocabulary.Scores,
|
||||
"tokenizer.ggml.token_type": t.Vocabulary.Types,
|
||||
}
|
||||
|
||||
if len(t.Merges) > 0 {
|
||||
kv["tokenizer.ggml.merges"] = t.Merges
|
||||
}
|
||||
|
||||
if t.Template != "" {
|
||||
kv["tokenizer.chat_template"] = t.Template
|
||||
}
|
||||
|
||||
for _, sv := range t.SpecialVocabulary {
|
||||
kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
|
||||
kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
type ModelArch interface {
|
||||
GetTensors() error
|
||||
LoadVocab() error
|
||||
WriteGGUF(io.WriteSeeker) error
|
||||
func (Parameters) specialTokenTypes() []string {
|
||||
return []string{
|
||||
"bos", "eos", "unk", "sep", "pad", "cls", "mask",
|
||||
}
|
||||
}
|
||||
|
||||
type ModelFormat interface {
|
||||
GetLayerName(string) (string, error)
|
||||
GetTensors(string, *Params) ([]llm.Tensor, error)
|
||||
GetParams(string) (*Params, error)
|
||||
GetModelArch(string, string, *Params) (ModelArch, error)
|
||||
func (Parameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
||||
return llm.WriteGGUF(ws, kv, ts)
|
||||
}
|
||||
|
||||
type ModelData struct {
|
||||
Path string
|
||||
Name string
|
||||
Params *Params
|
||||
Vocab *Vocab
|
||||
Tensors []llm.Tensor
|
||||
Format ModelFormat
|
||||
type Converter interface {
|
||||
// KV maps parameters to LLM key-values
|
||||
KV(*Tokenizer) llm.KV
|
||||
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
|
||||
Tensors([]Tensor) []llm.Tensor
|
||||
|
||||
// tensorName returns the LLM tensor name for a specific input name
|
||||
tensorName(string) string
|
||||
// specialTokenTypes returns any special token types the model uses
|
||||
specialTokenTypes() []string
|
||||
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
|
||||
}
|
||||
|
||||
func GetModelFormat(dirname string) (ModelFormat, error) {
|
||||
files, err := filepath.Glob(filepath.Join(dirname, "*"))
|
||||
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
|
||||
// and files it finds in the input path.
|
||||
// Supported input model formats include safetensors.
|
||||
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
|
||||
func Convert(fsys fs.FS, ws io.WriteSeeker) error {
|
||||
bts, err := fs.ReadFile(fsys, "config.json")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return err
|
||||
}
|
||||
|
||||
for _, fn := range files {
|
||||
if strings.HasSuffix(fn, ".safetensors") {
|
||||
return &SafetensorFormat{}, nil
|
||||
} else if strings.HasSuffix(fn, ".bin") || strings.HasSuffix(fn, ".pth") {
|
||||
slog.Debug("model is torch")
|
||||
return &TorchFormat{}, nil
|
||||
}
|
||||
var p Parameters
|
||||
if err := json.Unmarshal(bts, &p); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("couldn't determine model format")
|
||||
}
|
||||
if len(p.Architectures) < 1 {
|
||||
return errors.New("unknown architecture")
|
||||
}
|
||||
|
||||
// Details on gguf's tokenizer can be found at:
|
||||
// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer
|
||||
type Vocab struct {
|
||||
Tokens []string
|
||||
Scores []float32
|
||||
Types []int32
|
||||
Merges []string
|
||||
}
|
||||
var conv Converter
|
||||
switch p.Architectures[0] {
|
||||
case "LlamaForCausalLM", "MistralForCausalLM":
|
||||
conv = &llama{}
|
||||
case "MixtralForCausalLM":
|
||||
conv = &mixtral{}
|
||||
case "GemmaForCausalLM":
|
||||
conv = &gemma{}
|
||||
case "Phi3ForCausalLM":
|
||||
conv = &phi3{}
|
||||
default:
|
||||
return errors.New("unsupported architecture")
|
||||
}
|
||||
|
||||
func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
|
||||
slog.Info(fmt.Sprintf("reading vocab from %s", filepath.Join(dirpath, "tokenizer.model")))
|
||||
in, err := os.ReadFile(filepath.Join(dirpath, "tokenizer.model"))
|
||||
if err := json.Unmarshal(bts, conv); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
t, err := parseTokenizer(fsys, conv.specialTokenTypes())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return err
|
||||
}
|
||||
|
||||
// To regenerate sentencepiece from the protobufs use:
|
||||
// protoc -I=./ --go_out=./ sentencepiece_model.proto
|
||||
modelProto := &sentencepiece.ModelProto{}
|
||||
if err := proto.Unmarshal(in, modelProto); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
v := &Vocab{
|
||||
Tokens: make([]string, 0),
|
||||
Scores: make([]float32, 0),
|
||||
Types: make([]int32, 0),
|
||||
}
|
||||
|
||||
pieces := modelProto.GetPieces()
|
||||
for _, p := range pieces {
|
||||
v.Tokens = append(v.Tokens, p.GetPiece())
|
||||
v.Scores = append(v.Scores, p.GetScore())
|
||||
t := p.GetType()
|
||||
switch t {
|
||||
case sentencepiece.ModelProto_SentencePiece_UNKNOWN:
|
||||
case sentencepiece.ModelProto_SentencePiece_CONTROL:
|
||||
case sentencepiece.ModelProto_SentencePiece_UNUSED:
|
||||
case sentencepiece.ModelProto_SentencePiece_BYTE:
|
||||
default:
|
||||
t = sentencepiece.ModelProto_SentencePiece_NORMAL
|
||||
if vocabSize := int(p.VocabSize); vocabSize > len(t.Vocabulary.Tokens) {
|
||||
slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", p.VocabSize, "actual", len(t.Vocabulary.Tokens))
|
||||
for i := range vocabSize - len(t.Vocabulary.Tokens) {
|
||||
t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
|
||||
t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
|
||||
t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
|
||||
}
|
||||
v.Types = append(v.Types, int32(t))
|
||||
} else {
|
||||
slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
|
||||
}
|
||||
|
||||
slog.Info(fmt.Sprintf("vocab size: %d", len(v.Tokens)))
|
||||
|
||||
// add any additional tokens
|
||||
addIn, err := os.ReadFile(filepath.Join(dirpath, "added_tokens.json"))
|
||||
if os.IsNotExist(err) {
|
||||
return v, nil
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
ts, err := parseTensors(fsys)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
slog.Info("reading user defined tokens")
|
||||
|
||||
var extraTokenData map[string]int
|
||||
if err := json.Unmarshal(addIn, &extraTokenData); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
type token struct {
|
||||
key string
|
||||
pos int
|
||||
}
|
||||
|
||||
extraTokens := make([]token, 0)
|
||||
for k, id := range extraTokenData {
|
||||
extraTokens = append(extraTokens, token{k, id})
|
||||
}
|
||||
|
||||
slices.SortFunc(extraTokens, func(a, b token) int {
|
||||
return cmp.Compare(a.pos, b.pos)
|
||||
})
|
||||
|
||||
numToks := len(v.Tokens)
|
||||
|
||||
for cnt, t := range extraTokens {
|
||||
// the token id should match the specific index for the total number of tokens
|
||||
if t.pos != cnt+numToks {
|
||||
return nil, fmt.Errorf("token ID '%d' for '%s' doesn't match total token size", t.pos, t.key)
|
||||
}
|
||||
v.Tokens = append(v.Tokens, t.key)
|
||||
v.Scores = append(v.Scores, -1000.0)
|
||||
v.Types = append(v.Types, tokenTypeUserDefined)
|
||||
}
|
||||
slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens)))
|
||||
|
||||
if params.VocabSize > len(v.Tokens) {
|
||||
missingTokens := params.VocabSize - len(v.Tokens)
|
||||
slog.Warn(fmt.Sprintf("vocab is missing %d tokens", missingTokens))
|
||||
for cnt := range missingTokens {
|
||||
v.Tokens = append(v.Tokens, fmt.Sprintf("<dummy%05d>", cnt+1))
|
||||
v.Scores = append(v.Scores, -1)
|
||||
v.Types = append(v.Types, tokenTypeUserDefined)
|
||||
}
|
||||
}
|
||||
|
||||
return v, nil
|
||||
return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
|
||||
}
|
||||
|
||||
103
convert/convert_gemma.go
Normal file
103
convert/convert_gemma.go
Normal file
@@ -0,0 +1,103 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type gemma struct {
|
||||
Parameters
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
}
|
||||
|
||||
var _ Converter = (*gemma)(nil)
|
||||
|
||||
func (p *gemma) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.Parameters.KV(t)
|
||||
kv["general.architecture"] = "gemma"
|
||||
kv["general.name"] = "gemma"
|
||||
kv["gemma.context_length"] = p.MaxPositionEmbeddings
|
||||
kv["gemma.embedding_length"] = p.HiddenSize
|
||||
kv["gemma.block_count"] = p.HiddenLayers
|
||||
kv["gemma.feed_forward_length"] = p.IntermediateSize
|
||||
kv["gemma.attention.head_count"] = p.NumAttentionHeads
|
||||
kv["gemma.attention.head_count_kv"] = p.NumKeyValueHeads
|
||||
kv["gemma.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||
kv["gemma.attention.key_length"] = p.HeadDim
|
||||
kv["gemma.attention.value_length"] = p.HeadDim
|
||||
kv["tokenizer.ggml.eot_token_id"] = uint32(107)
|
||||
kv["tokenizer.ggml.middle_token_id"] = uint32(68)
|
||||
kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
|
||||
kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
for _, t := range ts {
|
||||
name := p.tensorName(t.Name())
|
||||
if strings.HasSuffix(name, "_norm.weight") {
|
||||
t.SetRepacker(p.addOne)
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
Name: name,
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *gemma) tensorName(n string) string {
|
||||
return strings.NewReplacer(
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.norm", "output_norm",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
"block_sparse_moe.gate", "ffn_inp",
|
||||
).Replace(n)
|
||||
}
|
||||
|
||||
func (*gemma) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data))
|
||||
ones := tensor.Ones(tensor.Float32, int(shape[0]))
|
||||
|
||||
n, err := n.Add(ones)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
||||
179
convert/convert_llama.go
Normal file
179
convert/convert_llama.go
Normal file
@@ -0,0 +1,179 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type llama struct {
|
||||
Parameters
|
||||
NLayers uint32 `json:"n_layers"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NLayer uint32 `json:"n_layer"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
NCtx uint32 `json:"n_ctx"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NEmbd uint32 `json:"n_embd"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NInner uint32 `json:"n_inner"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NHead uint32 `json:"n_head"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RopeScaling struct {
|
||||
Type string `json:"type"`
|
||||
Factor float32 `json:"factor"`
|
||||
} `json:"rope_scaling"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
LayerNormEPS float32 `json:"layer_norm_eps"`
|
||||
LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
|
||||
NormEpsilon float32 `json:"norm_epsilon"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
}
|
||||
|
||||
var _ Converter = (*llama)(nil)
|
||||
|
||||
func (p *llama) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.Parameters.KV(t)
|
||||
kv["general.architecture"] = "llama"
|
||||
kv["general.name"] = "llama"
|
||||
kv["llama.vocab_size"] = p.VocabSize
|
||||
|
||||
kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
|
||||
|
||||
if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
|
||||
kv["llama.context_length"] = contextLength
|
||||
}
|
||||
|
||||
if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
|
||||
kv["llama.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
||||
}
|
||||
|
||||
if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
|
||||
kv["llama.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
|
||||
}
|
||||
|
||||
if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
|
||||
kv["llama.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
|
||||
kv["llama.rope.dimension_count"] = p.HiddenSize / headCount
|
||||
}
|
||||
|
||||
if p.RopeTheta > 0 {
|
||||
kv["llama.rope.freq_base"] = p.RopeTheta
|
||||
}
|
||||
|
||||
if p.RopeScaling.Type == "linear" {
|
||||
kv["llama.rope.scaling.type"] = p.RopeScaling.Type
|
||||
kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor
|
||||
}
|
||||
|
||||
if p.NumKeyValueHeads > 0 {
|
||||
kv["llama.attention.head_count_kv"] = p.NumKeyValueHeads
|
||||
}
|
||||
|
||||
if p.RMSNormEPS > 0 {
|
||||
kv["llama.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||
}
|
||||
|
||||
if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
|
||||
kv["llama.attention.layer_norm_epsilon"] = layerNormEpsilon
|
||||
}
|
||||
|
||||
if p.HeadDim > 0 {
|
||||
kv["llama.attention.key_length"] = p.HeadDim
|
||||
kv["llama.attention.value_length"] = p.HeadDim
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
for _, t := range ts {
|
||||
name := p.tensorName(t.Name())
|
||||
if strings.HasSuffix(name, "attn_q.weight") ||
|
||||
strings.HasSuffix(name, "attn_k.weight") {
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
Name: name,
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *llama) tensorName(n string) string {
|
||||
return strings.NewReplacer(
|
||||
"lm_head", "output",
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.norm", "output_norm",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
// mixtral
|
||||
"block_sparse_moe.gate", "ffn_gate_inp",
|
||||
).Replace(n)
|
||||
}
|
||||
|
||||
func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
var dims []int
|
||||
for _, dim := range shape {
|
||||
dims = append(dims, int(dim))
|
||||
}
|
||||
|
||||
var heads uint32
|
||||
if strings.HasSuffix(name, "q_proj.weight") {
|
||||
heads = p.NumAttentionHeads
|
||||
} else if strings.HasSuffix(name, "k_proj.weight") {
|
||||
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||
} else {
|
||||
return nil, fmt.Errorf("unknown tensor for repack: %s", name)
|
||||
}
|
||||
|
||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.T(0, 2, 1, 3); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Reshape(dims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Transpose(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
||||
89
convert/convert_mixtral.go
Normal file
89
convert/convert_mixtral.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type mixtral struct {
|
||||
llama
|
||||
NumLocalExperts uint32 `json:"num_local_experts"`
|
||||
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
||||
}
|
||||
|
||||
var _ Converter = (*mixtral)(nil)
|
||||
|
||||
func (p *mixtral) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.llama.KV(t)
|
||||
|
||||
if p.NumLocalExperts > 0 {
|
||||
kv["llama.expert_count"] = p.NumLocalExperts
|
||||
}
|
||||
|
||||
if p.NumExpertsPerToken > 0 {
|
||||
kv["llama.expert_used_count"] = p.NumExpertsPerToken
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
|
||||
oldnew := []string{
|
||||
"model.layers", "blk",
|
||||
"w1", "ffn_gate_exps",
|
||||
"w2", "ffn_down_exps",
|
||||
"w3", "ffn_up_exps",
|
||||
}
|
||||
|
||||
for i := range p.NumLocalExperts {
|
||||
oldnew = append(oldnew, fmt.Sprintf(".block_sparse_moe.experts.%d.", i), ".")
|
||||
}
|
||||
|
||||
// group experts of the same layer (model.layers.%d) and type (w[123]) into a single tensor
|
||||
namer := strings.NewReplacer(oldnew...)
|
||||
experts := make(map[string]experts)
|
||||
|
||||
// merge experts into a single tensor while removing them from ts
|
||||
ts = slices.DeleteFunc(ts, func(t Tensor) bool {
|
||||
if !strings.Contains(t.Name(), ".block_sparse_moe.experts.") {
|
||||
return false
|
||||
}
|
||||
|
||||
name := namer.Replace(t.Name())
|
||||
experts[name] = append(experts[name], t)
|
||||
return true
|
||||
})
|
||||
|
||||
var out []llm.Tensor
|
||||
for n, e := range experts {
|
||||
// TODO(mxyng): sanity check experts
|
||||
out = append(out, llm.Tensor{
|
||||
Name: n,
|
||||
Kind: e[0].Kind(),
|
||||
Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...),
|
||||
WriterTo: e,
|
||||
})
|
||||
}
|
||||
|
||||
return append(out, p.llama.Tensors(ts)...)
|
||||
}
|
||||
|
||||
type experts []Tensor
|
||||
|
||||
func (e experts) WriteTo(w io.Writer) (int64, error) {
|
||||
// TODO(mxyng): experts _should_ be numerically sorted by expert but this should check
|
||||
for _, t := range e {
|
||||
// the canonical merged experts tensor stacks all experts along a new, 0 axis,
|
||||
// e.g. `tensor.Stack(0, e[0], e[1:]...)`, which requires allocating temporary buffers
|
||||
// this accomplishes the same thing by writing each expert tensor in sequence
|
||||
if _, err := t.WriteTo(w); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
return 0, nil
|
||||
}
|
||||
125
convert/convert_phi3.go
Normal file
125
convert/convert_phi3.go
Normal file
@@ -0,0 +1,125 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"math"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type phi3 struct {
|
||||
Parameters
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NLayers uint32 `json:"n_layers"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
NEmbd uint32 `json:"n_embd"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NHead uint32 `json:"n_head"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
NHeadKV uint32 `json:"n_head_kv"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RopeScaling struct {
|
||||
Type string `json:"type"`
|
||||
LongFactor ropeFactor `json:"long_factor"`
|
||||
ShortFactor ropeFactor `json:"short_factor"`
|
||||
} `json:"rope_scaling"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
NPositions uint32 `json:"n_positions"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
||||
SlidingWindow uint32 `json:"sliding_window"`
|
||||
}
|
||||
|
||||
var _ Converter = (*phi3)(nil)
|
||||
|
||||
func (p *phi3) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.Parameters.KV(t)
|
||||
kv["general.architecture"] = "phi3"
|
||||
kv["general.name"] = "phi3"
|
||||
kv["phi3.context_length"] = p.MaxPositionEmbeddings
|
||||
kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
||||
kv["phi3.feed_forward_length"] = p.IntermediateSize
|
||||
kv["phi3.block_count"] = cmp.Or(p.NumHiddenLayers, p.NLayers)
|
||||
kv["phi3.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
|
||||
kv["phi3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NHeadKV)
|
||||
kv["phi3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||
kv["phi3.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NumAttentionHeads, p.NHead)
|
||||
kv["phi3.rope.freq_base"] = p.RopeTheta
|
||||
kv["phi3.rope.scaling.original_context_length"] = p.OriginalMaxPositionEmbeddings
|
||||
kv["phi3.attention.sliding_window"] = p.SlidingWindow
|
||||
|
||||
scale := float64(p.MaxPositionEmbeddings) / float64(p.OriginalMaxPositionEmbeddings)
|
||||
|
||||
switch p.RopeScaling.Type {
|
||||
case "":
|
||||
// no scaling
|
||||
case "su", "longrope":
|
||||
kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
|
||||
case "yarn":
|
||||
kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
|
||||
default:
|
||||
panic("unknown rope scaling type")
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var addRopeFactors sync.Once
|
||||
|
||||
out := make([]llm.Tensor, 0, len(ts)+2)
|
||||
for _, t := range ts {
|
||||
name := p.tensorName(t.Name())
|
||||
if strings.HasPrefix(name, "blk.0.") {
|
||||
addRopeFactors.Do(func() {
|
||||
out = append(out, llm.Tensor{
|
||||
Name: "rope_factors_long.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
|
||||
WriterTo: p.RopeScaling.LongFactor,
|
||||
}, llm.Tensor{
|
||||
Name: "rope_factors_short.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
|
||||
WriterTo: p.RopeScaling.ShortFactor,
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
Name: name,
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *phi3) tensorName(n string) string {
|
||||
return strings.NewReplacer(
|
||||
"lm_head", "output",
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.norm", "output_norm",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.qkv_proj", "attn_qkv",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.gate_up_proj", "ffn_up",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
).Replace(n)
|
||||
}
|
||||
|
||||
type ropeFactor []float32
|
||||
|
||||
func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
|
||||
err := binary.Write(w, binary.LittleEndian, r)
|
||||
return 0, err
|
||||
}
|
||||
@@ -1,48 +1,35 @@
|
||||
//go:build slow
|
||||
|
||||
package convert
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"log/slog"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/exp/maps"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) {
|
||||
func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
|
||||
t.Helper()
|
||||
|
||||
mf, err := GetModelFormat(p)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
params, err := mf.GetParams(p)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
arch, err := mf.GetModelArch("", p, params)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := arch.LoadVocab(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := arch.GetTensors(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
f, err := os.CreateTemp(t.TempDir(), "f16")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if err := arch.WriteGGUF(f); err != nil {
|
||||
if err := Convert(fsys, f); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -50,53 +37,93 @@ func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) {
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer r.Close()
|
||||
t.Cleanup(func() { r.Close() })
|
||||
|
||||
m, _, err := llm.DecodeGGML(r)
|
||||
m, _, err := llm.DecodeGGML(r, math.MaxInt)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
return m.KV(), m.Tensors()
|
||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
return r, m.KV(), m.Tensors()
|
||||
}
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
var level slog.Level
|
||||
flag.TextVar(&level, "level", slog.LevelInfo, "log level")
|
||||
flag.Parse()
|
||||
slog.SetLogLoggerLevel(level)
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
func TestConvertFull(t *testing.T) {
|
||||
cases := []struct {
|
||||
path string
|
||||
arch string
|
||||
tensors int
|
||||
layers int
|
||||
}{
|
||||
{"Meta-Llama-3-8B-Instruct", "llama", 291, 35},
|
||||
{"Mistral-7B-Instruct-v0.2", "llama", 291, 35},
|
||||
{"Mixtral-8x7B-Instruct-v0.1", "llama", 291, 35},
|
||||
{"gemma-2b-it", "gemma", 164, 20},
|
||||
cases := []string{
|
||||
"Meta-Llama-3-8B-Instruct",
|
||||
"Mistral-7B-Instruct-v0.2",
|
||||
"Mixtral-8x7B-Instruct-v0.1",
|
||||
"gemma-2b-it",
|
||||
// microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8
|
||||
"Phi-3-mini-128k-instruct",
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.path, func(t *testing.T) {
|
||||
p := filepath.Join("testdata", tt.path)
|
||||
if _, err := os.Stat(p); err != nil {
|
||||
for i := range cases {
|
||||
tt := cases[i]
|
||||
t.Run(tt, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
p := filepath.Join("testdata", tt)
|
||||
if testing.Short() {
|
||||
t.Skip("skipping in short mode")
|
||||
} else if _, err := os.Stat(p); err != nil {
|
||||
t.Skipf("%s not found", p)
|
||||
}
|
||||
|
||||
kv, tensors := convertFull(t, p)
|
||||
f, kv, tensors := convertFull(t, os.DirFS(p))
|
||||
actual := make(map[string]string)
|
||||
for k, v := range kv {
|
||||
if s, ok := v.(json.Marshaler); !ok {
|
||||
actual[k] = fmt.Sprintf("%v", v)
|
||||
} else {
|
||||
bts, err := json.Marshal(s)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if kv.Architecture() != tt.arch {
|
||||
t.Fatalf("expected llama, got %s", kv.Architecture())
|
||||
actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts))
|
||||
}
|
||||
}
|
||||
|
||||
if kv.FileType().String() != "F16" {
|
||||
t.Fatalf("expected F16, got %s", kv.FileType())
|
||||
for _, tensor := range tensors.Items {
|
||||
sha256sum := sha256.New()
|
||||
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
|
||||
if _, err := io.Copy(sha256sum, sr); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
|
||||
}
|
||||
|
||||
if len(tensors) != tt.tensors {
|
||||
t.Fatalf("expected %d tensors, got %d", tt.tensors, len(tensors))
|
||||
expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
layers := tensors.Layers()
|
||||
if len(layers) != tt.layers {
|
||||
t.Fatalf("expected %d layers, got %d", tt.layers, len(layers))
|
||||
var expect map[string]string
|
||||
if err := json.NewDecoder(expectFile).Decode(&expect); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
keys := maps.Keys(expect)
|
||||
slices.Sort(keys)
|
||||
for _, k := range keys {
|
||||
if v, ok := actual[k]; !ok {
|
||||
t.Errorf("missing %s", k)
|
||||
} else if v != expect[k] {
|
||||
t.Errorf("unexpected %s: want %s, got %s", k, expect[k], v)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
58
convert/fs.go
Normal file
58
convert/fs.go
Normal file
@@ -0,0 +1,58 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"errors"
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
type ZipReader struct {
|
||||
r *zip.Reader
|
||||
p string
|
||||
|
||||
// limit is the maximum size of a file that can be read directly
|
||||
// from the zip archive. Files larger than this size will be extracted
|
||||
limit int64
|
||||
}
|
||||
|
||||
func NewZipReader(r *zip.Reader, p string, limit int64) fs.FS {
|
||||
return &ZipReader{r, p, limit}
|
||||
}
|
||||
|
||||
func (z *ZipReader) Open(name string) (fs.File, error) {
|
||||
r, err := z.r.Open(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
if fi, err := r.Stat(); err != nil {
|
||||
return nil, err
|
||||
} else if fi.Size() < z.limit {
|
||||
return r, nil
|
||||
}
|
||||
|
||||
if !filepath.IsLocal(name) {
|
||||
return nil, zip.ErrInsecurePath
|
||||
}
|
||||
|
||||
n := filepath.Join(z.p, name)
|
||||
if _, err := os.Stat(n); errors.Is(err, os.ErrNotExist) {
|
||||
w, err := os.Create(n)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer w.Close()
|
||||
|
||||
if _, err := io.Copy(w, r); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return os.Open(n)
|
||||
}
|
||||
102
convert/gemma.go
102
convert/gemma.go
@@ -1,102 +0,0 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type GemmaModel struct {
|
||||
ModelData
|
||||
}
|
||||
|
||||
func addOnes(data []float32, vectorSize int) ([]float32, error) {
|
||||
n := tensor.New(tensor.WithShape(vectorSize), tensor.WithBacking(data))
|
||||
ones := tensor.Ones(tensor.Float32, vectorSize)
|
||||
|
||||
n, err := n.Add(ones)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
||||
|
||||
func (m *GemmaModel) GetTensors() error {
|
||||
t, err := m.Format.GetTensors(m.Path, m.Params)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
slog.Debug(fmt.Sprintf("Total tensors: %d", len(t)))
|
||||
for _, l := range t {
|
||||
if strings.HasSuffix(l.Name, "norm.weight") {
|
||||
wt := l.WriterTo.(safetensorWriterTo)
|
||||
wt.repacker = m.Repack
|
||||
l.WriterTo = wt
|
||||
}
|
||||
m.Tensors = append(m.Tensors, l)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *GemmaModel) LoadVocab() error {
|
||||
v, err := LoadSentencePieceTokens(m.Path, m.Params)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Vocab = v
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *GemmaModel) Repack(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
return addOnes(data, int(shape[0]))
|
||||
}
|
||||
|
||||
func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||
kv := llm.KV{
|
||||
"general.architecture": "gemma",
|
||||
"general.name": m.Name,
|
||||
"gemma.context_length": uint32(m.Params.ContextSize),
|
||||
"gemma.embedding_length": uint32(m.Params.HiddenSize),
|
||||
"gemma.block_count": uint32(m.Params.HiddenLayers),
|
||||
"gemma.feed_forward_length": uint32(m.Params.IntermediateSize),
|
||||
"gemma.attention.head_count": uint32(m.Params.AttentionHeads),
|
||||
"gemma.attention.head_count_kv": uint32(m.Params.KeyValHeads),
|
||||
"gemma.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
|
||||
"gemma.attention.key_length": uint32(m.Params.HeadDimension),
|
||||
"gemma.attention.value_length": uint32(m.Params.HeadDimension),
|
||||
"general.file_type": uint32(1),
|
||||
"tokenizer.ggml.model": "llama",
|
||||
|
||||
"tokenizer.ggml.tokens": m.Vocab.Tokens,
|
||||
"tokenizer.ggml.scores": m.Vocab.Scores,
|
||||
"tokenizer.ggml.token_type": m.Vocab.Types,
|
||||
|
||||
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
|
||||
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
|
||||
"tokenizer.ggml.padding_token_id": uint32(m.Params.PaddingTokenID),
|
||||
"tokenizer.ggml.unknown_token_id": uint32(3),
|
||||
"tokenizer.ggml.add_bos_token": true,
|
||||
"tokenizer.ggml.add_eos_token": false,
|
||||
}
|
||||
|
||||
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
||||
}
|
||||
159
convert/llama.go
159
convert/llama.go
@@ -1,159 +0,0 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type LlamaModel struct {
|
||||
ModelData
|
||||
}
|
||||
|
||||
func (m *LlamaModel) GetTensors() error {
|
||||
t, err := m.Format.GetTensors(m.Path, m.Params)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, l := range t {
|
||||
matches := re.FindAllStringSubmatch(l.Name, -1)
|
||||
if len(matches) > 0 {
|
||||
switch m.Format.(type) {
|
||||
case *TorchFormat:
|
||||
wt := l.WriterTo.(torchWriterTo)
|
||||
wt.repacker = m.Repack
|
||||
l.WriterTo = wt
|
||||
case *SafetensorFormat:
|
||||
wt := l.WriterTo.(safetensorWriterTo)
|
||||
wt.repacker = m.Repack
|
||||
l.WriterTo = wt
|
||||
}
|
||||
}
|
||||
m.Tensors = append(m.Tensors, l)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LlamaModel) LoadVocab() (err error) {
|
||||
pre, ts, merges, err := parseTokens(filepath.Join(m.Path, "tokenizer.json"))
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return nil
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.Vocab = &Vocab{}
|
||||
for _, t := range ts {
|
||||
m.Vocab.Tokens = append(m.Vocab.Tokens, t.Content)
|
||||
m.Vocab.Types = append(m.Vocab.Types, t.Type())
|
||||
}
|
||||
|
||||
m.Vocab.Merges = merges
|
||||
m.Params.PreTokenizer = pre
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||
kv := llm.KV{
|
||||
"general.architecture": "llama",
|
||||
"general.name": m.Name,
|
||||
"llama.vocab_size": uint32(len(m.Vocab.Tokens)),
|
||||
"llama.context_length": uint32(m.Params.ContextSize),
|
||||
"llama.embedding_length": uint32(m.Params.HiddenSize),
|
||||
"llama.block_count": uint32(m.Params.HiddenLayers),
|
||||
"llama.feed_forward_length": uint32(m.Params.IntermediateSize),
|
||||
"llama.rope.freq_base": float32(m.Params.RopeFrequencyBase),
|
||||
"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
|
||||
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
|
||||
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
|
||||
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
|
||||
"general.file_type": uint32(1),
|
||||
"tokenizer.ggml.model": "gpt2",
|
||||
|
||||
"tokenizer.ggml.pre": m.Params.PreTokenizer,
|
||||
"tokenizer.ggml.tokens": m.Vocab.Tokens,
|
||||
"tokenizer.ggml.token_type": m.Vocab.Types,
|
||||
|
||||
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
|
||||
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
|
||||
"tokenizer.ggml.unknown_token_id": uint32(0),
|
||||
}
|
||||
|
||||
if len(m.Vocab.Merges) > 0 {
|
||||
kv["tokenizer.ggml.merges"] = m.Vocab.Merges
|
||||
} else {
|
||||
kv["tokenizer.ggml.scores"] = m.Vocab.Scores
|
||||
}
|
||||
|
||||
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
||||
}
|
||||
|
||||
func (m *LlamaModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
return llamaRepack(name, m.Params, data, shape)
|
||||
}
|
||||
|
||||
func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([]float32, error) {
|
||||
var dims []int
|
||||
for _, dim := range shape {
|
||||
if dim != 0 {
|
||||
dims = append(dims, int(dim))
|
||||
}
|
||||
}
|
||||
|
||||
var heads int
|
||||
switch {
|
||||
case strings.HasSuffix(name, "attn_q.weight"):
|
||||
heads = params.AttentionHeads
|
||||
case strings.HasSuffix(name, "attn_k.weight"):
|
||||
heads = cmp.Or(params.KeyValHeads, params.AttentionHeads)
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown tensor name: %s", name)
|
||||
}
|
||||
|
||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
if err := n.Reshape(append([]int{heads, 2, dims[0] / heads / 2}, dims[1:]...)...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.T(0, 2, 1, 3); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Reshape(dims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Transpose(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
||||
@@ -1,84 +0,0 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"io"
|
||||
"regexp"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type MistralModel struct {
|
||||
ModelData
|
||||
}
|
||||
|
||||
func (m *MistralModel) GetTensors() error {
|
||||
t, err := m.Format.GetTensors(m.Path, m.Params)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, l := range t {
|
||||
matches := re.FindAllStringSubmatch(l.Name, -1)
|
||||
if len(matches) > 0 {
|
||||
wt := l.WriterTo.(safetensorWriterTo)
|
||||
wt.repacker = m.Repack
|
||||
l.WriterTo = wt
|
||||
}
|
||||
m.Tensors = append(m.Tensors, l)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MistralModel) LoadVocab() error {
|
||||
v, err := LoadSentencePieceTokens(m.Path, m.Params)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Vocab = v
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||
kv := llm.KV{
|
||||
"general.architecture": "llama",
|
||||
"general.name": m.Name,
|
||||
"llama.context_length": uint32(m.Params.ContextSize),
|
||||
"llama.embedding_length": uint32(m.Params.HiddenSize),
|
||||
"llama.block_count": uint32(m.Params.HiddenLayers),
|
||||
"llama.feed_forward_length": uint32(m.Params.IntermediateSize),
|
||||
"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
|
||||
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
|
||||
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
|
||||
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
|
||||
"general.file_type": uint32(1),
|
||||
"tokenizer.ggml.model": "llama",
|
||||
|
||||
"tokenizer.ggml.tokens": m.Vocab.Tokens,
|
||||
"tokenizer.ggml.scores": m.Vocab.Scores,
|
||||
"tokenizer.ggml.token_type": m.Vocab.Types,
|
||||
|
||||
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
|
||||
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
|
||||
"tokenizer.ggml.add_bos_token": true,
|
||||
"tokenizer.ggml.add_eos_token": false,
|
||||
"tokenizer.ggml.unknown_token_id": uint32(0),
|
||||
}
|
||||
|
||||
if m.Params.HeadDimension > 0 {
|
||||
kv["llama.attention.key_length"] = uint32(m.Params.HeadDimension)
|
||||
kv["llama.attention.value_length"] = uint32(m.Params.HeadDimension)
|
||||
}
|
||||
|
||||
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
||||
}
|
||||
|
||||
func (m *MistralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
return llamaRepack(name, m.Params, data, shape)
|
||||
}
|
||||
@@ -1,87 +0,0 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"io"
|
||||
"regexp"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type MixtralModel struct {
|
||||
ModelData
|
||||
}
|
||||
|
||||
func (m *MixtralModel) GetTensors() error {
|
||||
t, err := m.Format.GetTensors(m.Path, m.Params)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, l := range t {
|
||||
matches := re.FindAllStringSubmatch(l.Name, -1)
|
||||
if len(matches) > 0 {
|
||||
wt := l.WriterTo.(safetensorWriterTo)
|
||||
wt.repacker = m.Repack
|
||||
l.WriterTo = wt
|
||||
}
|
||||
m.Tensors = append(m.Tensors, l)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MixtralModel) LoadVocab() error {
|
||||
v, err := LoadSentencePieceTokens(m.Path, m.Params)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Vocab = v
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||
kv := llm.KV{
|
||||
"general.architecture": "llama",
|
||||
"general.name": m.Name,
|
||||
"llama.block_count": uint32(m.Params.HiddenLayers),
|
||||
"llama.context_length": uint32(m.Params.ContextSize),
|
||||
"llama.embedding_length": uint32(m.Params.HiddenSize),
|
||||
"llama.feed_forward_length": uint32(m.Params.IntermediateSize),
|
||||
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
|
||||
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
|
||||
|
||||
"llama.rope.freq_base": float32(m.Params.RopeFrequencyBase),
|
||||
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
|
||||
|
||||
"llama.expert_count": uint32(m.Params.Experts),
|
||||
"llama.expert_used_count": uint32(m.Params.ExpertsUsed),
|
||||
|
||||
"llama.vocab_size": uint32(len(m.Vocab.Tokens)),
|
||||
"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
|
||||
|
||||
"general.file_type": uint32(1),
|
||||
"tokenizer.ggml.model": "llama",
|
||||
|
||||
"tokenizer.ggml.tokens": m.Vocab.Tokens,
|
||||
"tokenizer.ggml.scores": m.Vocab.Scores,
|
||||
"tokenizer.ggml.token_type": m.Vocab.Types,
|
||||
|
||||
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
|
||||
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
|
||||
"tokenizer.ggml.unknown_token_id": uint32(0),
|
||||
"tokenizer.ggml.add_bos_token": true,
|
||||
"tokenizer.ggml.add_eos_token": false,
|
||||
}
|
||||
|
||||
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
||||
}
|
||||
|
||||
func (m *MixtralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
return llamaRepack(name, m.Params, data, shape)
|
||||
}
|
||||
82
convert/reader.go
Normal file
82
convert/reader.go
Normal file
@@ -0,0 +1,82 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"io/fs"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Tensor interface {
|
||||
Name() string
|
||||
Shape() []uint64
|
||||
Kind() uint32
|
||||
SetRepacker(repacker)
|
||||
WriteTo(io.Writer) (int64, error)
|
||||
}
|
||||
|
||||
type tensorBase struct {
|
||||
name string
|
||||
shape []uint64
|
||||
repacker
|
||||
}
|
||||
|
||||
func (t tensorBase) Name() string {
|
||||
return t.name
|
||||
}
|
||||
|
||||
func (t tensorBase) Shape() []uint64 {
|
||||
return t.shape
|
||||
}
|
||||
|
||||
const (
|
||||
tensorKindF32 uint32 = iota
|
||||
tensorKindF16
|
||||
)
|
||||
|
||||
func (t tensorBase) Kind() uint32 {
|
||||
if strings.HasSuffix(t.name, ".block_sparse_moe.gate.weight") {
|
||||
return 0
|
||||
}
|
||||
|
||||
switch len(t.shape) {
|
||||
case 0:
|
||||
panic("invalid tensor shape")
|
||||
case 1:
|
||||
return tensorKindF32
|
||||
default:
|
||||
return tensorKindF16
|
||||
}
|
||||
}
|
||||
|
||||
func (t *tensorBase) SetRepacker(fn repacker) {
|
||||
t.repacker = fn
|
||||
}
|
||||
|
||||
type repacker func(string, []float32, []uint64) ([]float32, error)
|
||||
|
||||
func parseTensors(fsys fs.FS) ([]Tensor, error) {
|
||||
patterns := []struct {
|
||||
Pattern string
|
||||
Func func(fs.FS, ...string) ([]Tensor, error)
|
||||
}{
|
||||
{"model-*-of-*.safetensors", parseSafetensors},
|
||||
{"model.safetensors", parseSafetensors},
|
||||
{"pytorch_model-*-of-*.bin", parseTorch},
|
||||
{"pytorch_model.bin", parseTorch},
|
||||
{"consolidated.*.pth", parseTorch},
|
||||
}
|
||||
|
||||
for _, pattern := range patterns {
|
||||
matches, err := fs.Glob(fsys, pattern.Pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(matches) > 0 {
|
||||
return pattern.Func(fsys, matches...)
|
||||
}
|
||||
}
|
||||
|
||||
return nil, errors.New("unknown tensor format")
|
||||
}
|
||||
150
convert/reader_safetensors.go
Normal file
150
convert/reader_safetensors.go
Normal file
@@ -0,0 +1,150 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"slices"
|
||||
|
||||
"github.com/d4l3k/go-bfloat16"
|
||||
"github.com/x448/float16"
|
||||
"golang.org/x/exp/maps"
|
||||
)
|
||||
|
||||
type safetensorMetadata struct {
|
||||
Type string `json:"dtype"`
|
||||
Shape []uint64 `json:"shape"`
|
||||
Offsets []int64 `json:"data_offsets"`
|
||||
}
|
||||
|
||||
func parseSafetensors(fsys fs.FS, ps ...string) ([]Tensor, error) {
|
||||
var ts []Tensor
|
||||
for _, p := range ps {
|
||||
f, err := fsys.Open(p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var n int64
|
||||
if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
b := bytes.NewBuffer(make([]byte, 0, n))
|
||||
if _, err = io.CopyN(b, f, n); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var headers map[string]safetensorMetadata
|
||||
if err := json.NewDecoder(b).Decode(&headers); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
keys := maps.Keys(headers)
|
||||
slices.Sort(keys)
|
||||
|
||||
for _, key := range keys {
|
||||
if value := headers[key]; value.Type != "" {
|
||||
ts = append(ts, safetensor{
|
||||
fs: fsys,
|
||||
path: p,
|
||||
dtype: value.Type,
|
||||
offset: safetensorsPad(n, value.Offsets[0]),
|
||||
size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
|
||||
tensorBase: &tensorBase{
|
||||
name: key,
|
||||
shape: value.Shape,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ts, nil
|
||||
}
|
||||
|
||||
// safetensorsPad returns the padded size of the safetensors file given a length n and offset s
|
||||
func safetensorsPad(n, offset int64) int64 {
|
||||
return 8 + n + offset
|
||||
}
|
||||
|
||||
type safetensor struct {
|
||||
fs fs.FS
|
||||
path string
|
||||
dtype string
|
||||
offset int64
|
||||
size int64
|
||||
*tensorBase
|
||||
}
|
||||
|
||||
func (st safetensor) WriteTo(w io.Writer) (int64, error) {
|
||||
f, err := st.fs.Open(st.path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if seeker, ok := f.(io.Seeker); ok {
|
||||
if _, err := seeker.Seek(st.offset, io.SeekStart); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
} else {
|
||||
if _, err := io.CopyN(io.Discard, f, st.offset); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
switch st.dtype {
|
||||
case "F32":
|
||||
f32s = make([]float32, st.size/4)
|
||||
if err = binary.Read(f, binary.LittleEndian, f32s); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
case "F16":
|
||||
u16s := make([]uint16, st.size/2)
|
||||
if err = binary.Read(f, binary.LittleEndian, u16s); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
f32s = make([]float32, len(u16s))
|
||||
for i := range u16s {
|
||||
f32s[i] = float16.Frombits(u16s[i]).Float32()
|
||||
}
|
||||
|
||||
case "BF16":
|
||||
u8s := make([]uint8, st.size)
|
||||
if err = binary.Read(f, binary.LittleEndian, u8s); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
f32s = bfloat16.DecodeFloat32(u8s)
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown data type: %s", st.dtype)
|
||||
}
|
||||
|
||||
if st.repacker != nil {
|
||||
f32s, err = st.repacker(st.Name(), f32s, st.Shape())
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
switch st.Kind() {
|
||||
case tensorKindF32:
|
||||
return 0, binary.Write(w, binary.LittleEndian, f32s)
|
||||
case tensorKindF16:
|
||||
f16s := make([]uint16, len(f32s))
|
||||
for i := range f32s {
|
||||
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
|
||||
}
|
||||
|
||||
return 0, binary.Write(w, binary.LittleEndian, f16s)
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown storage type: %d", st.Kind())
|
||||
}
|
||||
}
|
||||
47
convert/reader_torch.go
Normal file
47
convert/reader_torch.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"io"
|
||||
"io/fs"
|
||||
|
||||
"github.com/nlpodyssey/gopickle/pytorch"
|
||||
"github.com/nlpodyssey/gopickle/types"
|
||||
)
|
||||
|
||||
func parseTorch(fsys fs.FS, ps ...string) ([]Tensor, error) {
|
||||
var ts []Tensor
|
||||
for _, p := range ps {
|
||||
pt, err := pytorch.Load(p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, k := range pt.(*types.Dict).Keys() {
|
||||
t := pt.(*types.Dict).MustGet(k)
|
||||
|
||||
var shape []uint64
|
||||
for dim := range t.(*pytorch.Tensor).Size {
|
||||
shape = append(shape, uint64(dim))
|
||||
}
|
||||
|
||||
ts = append(ts, torch{
|
||||
storage: t.(*pytorch.Tensor).Source,
|
||||
tensorBase: &tensorBase{
|
||||
name: k.(string),
|
||||
shape: shape,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return ts, nil
|
||||
}
|
||||
|
||||
type torch struct {
|
||||
storage pytorch.StorageInterface
|
||||
*tensorBase
|
||||
}
|
||||
|
||||
func (pt torch) WriteTo(w io.Writer) (int64, error) {
|
||||
return 0, nil
|
||||
}
|
||||
@@ -1,309 +0,0 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/d4l3k/go-bfloat16"
|
||||
"github.com/x448/float16"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type safetensorWriterTo struct {
|
||||
t *llm.Tensor
|
||||
|
||||
params *Params
|
||||
bo ByteOrder
|
||||
|
||||
filename string
|
||||
dtype string
|
||||
|
||||
offset, size int64
|
||||
repacker func(string, []float32, []uint64) ([]float32, error)
|
||||
}
|
||||
|
||||
type safetensorMetadata struct {
|
||||
Type string `json:"dtype"`
|
||||
Shape []uint64 `json:"shape"`
|
||||
Offsets []int64 `json:"data_offsets"`
|
||||
}
|
||||
|
||||
type SafetensorFormat struct{}
|
||||
|
||||
func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
|
||||
var tensors []llm.Tensor
|
||||
matches, err := filepath.Glob(filepath.Join(dirpath, "*.safetensors"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var offset uint64
|
||||
for _, f := range matches {
|
||||
var t []llm.Tensor
|
||||
var err error
|
||||
t, offset, err = m.readTensors(f, offset, params)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tensors = append(tensors, t...)
|
||||
}
|
||||
return tensors, nil
|
||||
}
|
||||
|
||||
func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) ([]llm.Tensor, uint64, error) {
|
||||
f, err := os.Open(fn)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var n int64
|
||||
if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
b := bytes.NewBuffer(make([]byte, 0, n))
|
||||
if _, err = io.CopyN(b, f, n); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
var headers map[string]safetensorMetadata
|
||||
if err := json.NewDecoder(b).Decode(&headers); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
var keys []string
|
||||
for key := range headers {
|
||||
if !strings.HasSuffix(key, "self_attn.rotary_embd.inv_freq") {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
}
|
||||
|
||||
slices.Sort(keys)
|
||||
|
||||
var tensors []llm.Tensor
|
||||
for _, key := range keys {
|
||||
value := headers[key]
|
||||
|
||||
var kind uint32
|
||||
switch len(value.Shape) {
|
||||
case 0:
|
||||
// valuedata
|
||||
continue
|
||||
case 2:
|
||||
kind = 1
|
||||
}
|
||||
|
||||
name, err := m.GetLayerName(key)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
shape := make([]uint64, len(value.Shape))
|
||||
copy(shape, value.Shape)
|
||||
|
||||
pad := func(s int64) int64 {
|
||||
return 8 + n + s
|
||||
}
|
||||
|
||||
t := llm.Tensor{
|
||||
Name: name,
|
||||
Kind: kind,
|
||||
Offset: offset,
|
||||
Shape: shape,
|
||||
}
|
||||
|
||||
t.WriterTo = safetensorWriterTo{
|
||||
t: &t,
|
||||
params: params,
|
||||
bo: params.ByteOrder,
|
||||
filename: fn,
|
||||
dtype: value.Type,
|
||||
offset: pad(value.Offsets[0]),
|
||||
size: pad(value.Offsets[1]) - pad(value.Offsets[0]),
|
||||
}
|
||||
|
||||
offset += t.Size()
|
||||
tensors = append(tensors, t)
|
||||
}
|
||||
|
||||
return tensors, offset, nil
|
||||
}
|
||||
|
||||
func (m *SafetensorFormat) GetParams(dirpath string) (*Params, error) {
|
||||
f, err := os.Open(filepath.Join(dirpath, "config.json"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var params Params
|
||||
|
||||
if err := json.NewDecoder(f).Decode(¶ms); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
params.ByteOrder = binary.LittleEndian
|
||||
return ¶ms, nil
|
||||
}
|
||||
|
||||
func (m *SafetensorFormat) GetLayerName(n string) (string, error) {
|
||||
directMap := map[string]string{
|
||||
"model.embed_tokens.weight": "token_embd.weight",
|
||||
"lm_head.weight": "output.weight",
|
||||
"model.norm.weight": "output_norm.weight",
|
||||
}
|
||||
|
||||
tMap := map[string]string{
|
||||
"model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
|
||||
"model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
|
||||
"model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
|
||||
"model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
|
||||
"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
|
||||
"model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
|
||||
"model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
|
||||
"model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
|
||||
"model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
|
||||
"model.layers.(\\d+).block_sparse_moe.gate.weight": "blk.$1.ffn_gate_inp.weight",
|
||||
"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w1.weight": "blk.$1.ffn_gate.$2.weight",
|
||||
"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w2.weight": "blk.$1.ffn_down.$2.weight",
|
||||
"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w3.weight": "blk.$1.ffn_up.$2.weight",
|
||||
}
|
||||
|
||||
v, ok := directMap[n]
|
||||
if ok {
|
||||
return v, nil
|
||||
}
|
||||
|
||||
// quick hack to rename the layers to gguf format
|
||||
for k, v := range tMap {
|
||||
re := regexp.MustCompile(k)
|
||||
newName := re.ReplaceAllString(n, v)
|
||||
if newName != n {
|
||||
return newName, nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
|
||||
}
|
||||
|
||||
func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) {
|
||||
f, err := os.Open(r.filename)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if _, err = f.Seek(r.offset, io.SeekStart); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
switch r.dtype {
|
||||
case "F32":
|
||||
f32s = make([]float32, r.size/4)
|
||||
if err = binary.Read(f, r.bo, f32s); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
case "F16":
|
||||
u16s := make([]uint16, r.size/2)
|
||||
if err = binary.Read(f, r.bo, u16s); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
for _, b := range u16s {
|
||||
f32s = append(f32s, float16.Frombits(b).Float32())
|
||||
}
|
||||
|
||||
case "BF16":
|
||||
u8s := make([]uint8, r.size)
|
||||
if err = binary.Read(f, r.bo, u8s); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
f32s = bfloat16.DecodeFloat32(u8s)
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown data type: %s", r.dtype)
|
||||
}
|
||||
|
||||
if r.repacker != nil {
|
||||
f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
switch r.t.Kind {
|
||||
case 0:
|
||||
return 0, binary.Write(w, r.bo, f32s)
|
||||
case 1:
|
||||
f16s := make([]uint16, len(f32s))
|
||||
for i := range f32s {
|
||||
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
|
||||
}
|
||||
|
||||
return 0, binary.Write(w, r.bo, f16s)
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
|
||||
switch len(params.Architectures) {
|
||||
case 0:
|
||||
return nil, fmt.Errorf("No architecture specified to convert")
|
||||
case 1:
|
||||
switch params.Architectures[0] {
|
||||
case "LlamaForCausalLM":
|
||||
return &LlamaModel{
|
||||
ModelData{
|
||||
Name: name,
|
||||
Path: dirPath,
|
||||
Params: params,
|
||||
Format: m,
|
||||
},
|
||||
}, nil
|
||||
case "MistralForCausalLM":
|
||||
return &MistralModel{
|
||||
ModelData{
|
||||
Name: name,
|
||||
Path: dirPath,
|
||||
Params: params,
|
||||
Format: m,
|
||||
},
|
||||
}, nil
|
||||
case "MixtralForCausalLM":
|
||||
return &MixtralModel{
|
||||
ModelData{
|
||||
Name: name,
|
||||
Path: dirPath,
|
||||
Params: params,
|
||||
Format: m,
|
||||
},
|
||||
}, nil
|
||||
case "GemmaForCausalLM":
|
||||
return &GemmaModel{
|
||||
ModelData{
|
||||
Name: name,
|
||||
Path: dirPath,
|
||||
Params: params,
|
||||
Format: m,
|
||||
},
|
||||
}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("Unknown error")
|
||||
}
|
||||
313
convert/testdata/Meta-Llama-3-8B-Instruct.json
vendored
Normal file
313
convert/testdata/Meta-Llama-3-8B-Instruct.json
vendored
Normal file
@@ -0,0 +1,313 @@
|
||||
{
|
||||
"general.architecture": "llama",
|
||||
"general.file_type": "1",
|
||||
"general.quantization_version": "2",
|
||||
"llama.block_count": "32",
|
||||
"llama.context_length": "8192",
|
||||
"llama.embedding_length": "4096",
|
||||
"llama.feed_forward_length": "14336",
|
||||
"llama.rope.dimension_count": "128",
|
||||
"llama.rope.freq_base": "500000",
|
||||
"llama.vocab_size": "128256",
|
||||
"llama.attention.head_count": "32",
|
||||
"llama.attention.head_count_kv": "8",
|
||||
"llama.attention.layer_norm_rms_epsilon": "1e-05",
|
||||
"tokenizer.ggml.model": "gpt2",
|
||||
"tokenizer.ggml.pre": "llama-bpe",
|
||||
"tokenizer.ggml.bos_token_id": "128000",
|
||||
"tokenizer.ggml.eos_token_id": "128009",
|
||||
"tokenizer.ggml.merges": "d0cbac1fcc9dcf03724b8db5c9bfb593ae1cf68fb9bc72eb1d15274dcbbf618b",
|
||||
"tokenizer.ggml.token_type": "d70a88809fd7da6f1f028622685cd64268a7a922c5d343c96f25b66327358978",
|
||||
"tokenizer.ggml.tokens": "765b529dbcbc42dd202ce657341c63807b51f3b07e09898f6aa6196326865d5a",
|
||||
"token_embd.weight": "b53102a11d9064bbd404833e3464b1b13e08ce73300b442312cccde2f19b2698",
|
||||
"blk.0.attn_norm.weight": "7318df3cca9e8d153ff0a503026a1265e63d20b2a8c1dd7a2769585082b5d1ee",
|
||||
"blk.0.ffn_down.weight": "b950806a1fc722c9fad7fd0b20c3c0a7fb50f14395e1e7663a590bfd62e20900",
|
||||
"blk.0.ffn_gate.weight": "e73e580af6d4f08e060a74a3c25efdf5d3bed99e183d95a5a85ae859014839fd",
|
||||
"blk.0.ffn_up.weight": "c8158af679ef99746da1befb67eebb19489e0bbe6ce7d97e13e348508244e516",
|
||||
"blk.0.ffn_norm.weight": "7ec69c3c31e95e49a3359003b0033f6b9e85561a3e3fd83e7476661ecdd756bb",
|
||||
"blk.0.attn_k.weight": "2732303257bac969b4964e0e32ec08b5a7f5c031bb02bf6ac4467b3ea0ebcf1e",
|
||||
"blk.0.attn_output.weight": "ecda1d43b4ccc91cd5b366d7e7a275353990ac78561a07c83d9c77031aba12dc",
|
||||
"blk.0.attn_q.weight": "569b1f5faf92b6f00910cf7effb2d5862f91038ce5c3b0019fc10e5d79fbd5e1",
|
||||
"blk.0.attn_v.weight": "aa8416c5ef7e32fb54a1f20d6ac651656845d4af240564b397c39bd83e06e3b8",
|
||||
"blk.1.attn_norm.weight": "03327e02862908c2a44b2f52decdb924bf4201f400b46f8037a9cb2e1d7a61ff",
|
||||
"blk.1.ffn_down.weight": "5a83a87603f38c99f8e1e370a2d5f967bb45ac51d881a609304a7811027321e0",
|
||||
"blk.1.ffn_gate.weight": "31da0572c79e655186c721c231376f85e56cdcc6257c28d08c8c5b40d5c22b40",
|
||||
"blk.1.ffn_up.weight": "e0c811d64ca155c8de10a868e72015d43888834804614ee1aa2953129ffbc90f",
|
||||
"blk.1.ffn_norm.weight": "5861f313d6137d6f0f904d423df47fffc6069e224ff746e1b637ac9c7f0af862",
|
||||
"blk.1.attn_k.weight": "5fbbec0acca6457b9416ebdcd90e526885d0224537b7628f6be376a7f275313d",
|
||||
"blk.1.attn_output.weight": "b237c9763fa3f75166a6f70b70f1566e77d0d89dfa164ed1b3137393e90575c3",
|
||||
"blk.1.attn_q.weight": "c0a9cf4a98b4882b16f3eb2b49d933793dcc5357abb246fd3fe3134ed2b12e1c",
|
||||
"blk.1.attn_v.weight": "96867111727200cac1af7865189dd41fd62b47584e5e5f33a91f1d34509cbd40",
|
||||
"blk.2.attn_norm.weight": "f392f8a88ee3a95b1cc19c40dd4ef66317037b0faaa1800f610779e129ee0539",
|
||||
"blk.2.ffn_down.weight": "73823eef46632aedcc8c1cb08a736b6aa97ca97842cd1fdfc5567d8dec459662",
|
||||
"blk.2.ffn_gate.weight": "f4909ae19fc3848b00bb8b9050122e74f8e903b89e22937036f4cc9fea20a718",
|
||||
"blk.2.ffn_up.weight": "16f4904a3d814ea68f00519724fc4943e48444a84c786bda39aa5efc298a7d84",
|
||||
"blk.2.ffn_norm.weight": "e3ccdf56e75cb969f6f69c39caf6daf7c4e70e89e25df0f4d2e4bc60e159aafe",
|
||||
"blk.2.attn_k.weight": "c3beb1e0a11bcf007ef0f0d8f6bdd3082d8b29090cd29597846b5d51e308a8e5",
|
||||
"blk.2.attn_output.weight": "bb9f66c32cff51154fea92933c2cd62549236f8cb1a767f9ef28d3f99809b343",
|
||||
"blk.2.attn_q.weight": "8eba394132eef2a05c5a92d62d2376000f7948448d7a2dc74e6b608203add20d",
|
||||
"blk.2.attn_v.weight": "88f61f77c53567c617db3eef8f30621109a750e679f6784f7911739bd42c2f02",
|
||||
"blk.3.attn_norm.weight": "7b996675b7ca75fa24107b3ebe0788653ede0f49ac83b8659d71ff54d591f81a",
|
||||
"blk.3.ffn_down.weight": "2cb332bc05e4821962fdc9dcbcc7cc12630f32117711b687d18fb53c0bc4fbf4",
|
||||
"blk.3.ffn_gate.weight": "340b387c7f208c8f0a6db904ef8d87c1e84b7d6ad57177abd32d86c8d18b760f",
|
||||
"blk.3.ffn_up.weight": "07484433f8a7ee061c55aa0de2ecc009f769b0617c9c0ec096e9bb2946df9f0e",
|
||||
"blk.3.ffn_norm.weight": "4f1a4ade36b393af341240bc894a2aab09cff7e4d56dc4658445deb107f9371b",
|
||||
"blk.3.attn_k.weight": "483dcd96acb4528df84b9842970994630dbd82b8715ace394aa8b39fcf8d6291",
|
||||
"blk.3.attn_output.weight": "beaff0810687923585642ee11d929cbf3b43dc6f87f30ddb552c222ab57bdbb3",
|
||||
"blk.3.attn_q.weight": "0739355002f6fce520863add697e0ff25fc88215322dc3f993be7bb68dcce7e8",
|
||||
"blk.3.attn_v.weight": "c216d17b6d90ee3e07f82598b8161fae34de2f392dbb0f745b682b578c324767",
|
||||
"blk.4.attn_norm.weight": "91ab405bc4ba15bf63af233f266aa43aaab43789a9e6596e14a357c2ac7df217",
|
||||
"blk.4.ffn_down.weight": "620f34ee75cdc73aecb8949af5fbb0d2437fd81422b6d8eb7acfc52addb9fc68",
|
||||
"blk.4.ffn_gate.weight": "f6feec7bc9acadf35ec22532f8998d8e50f31afedabb19263590dcf8b9a92eee",
|
||||
"blk.4.ffn_up.weight": "4a72af7cd28fd07b038f6cc4406678d120517280236ea85d9e76eff40ab2cc22",
|
||||
"blk.4.ffn_norm.weight": "1805b37b44d5d682bdbd2fadeafb763ee001617d7870848cc487079ee34b21f9",
|
||||
"blk.4.attn_k.weight": "a1e4f9d97cdf4c1b0d177cf00c4e32d1be30c1984a239b3c9bd73f8848888853",
|
||||
"blk.4.attn_output.weight": "a1547e2497c423b0aff0eee71d9300d6fdf4e4986679418b6e637b69a9a6720b",
|
||||
"blk.4.attn_q.weight": "0677483a9264ea6803d03d304d87a54632242cb516e8b76b6e3e8284c2f4de04",
|
||||
"blk.4.attn_v.weight": "02691ba3af344fcc1969428ab0df811ac94aaa2fd91b0dc4ec1ac0a58806980d",
|
||||
"blk.5.attn_norm.weight": "ba9c028335e5c895b87a5bd1448ca429248f9746ed97bdcb8679923206117156",
|
||||
"blk.5.ffn_down.weight": "ccfdc9006acad1940a6bc05042a3947f1066acd671e0bb53b7684e9eea9ef5c9",
|
||||
"blk.5.ffn_gate.weight": "623157679f1e742ccc3807c0b0153ddc8450104de75ec62f1370ec3807c09cf4",
|
||||
"blk.5.ffn_up.weight": "05748804c65091f963729b58b085f58351891cac8a2861f5eae26b06aa60b2a0",
|
||||
"blk.5.ffn_norm.weight": "84bae55af2efc8b8429f09056c8c04990c466dae31cb3f9356038b8957f1b406",
|
||||
"blk.5.attn_k.weight": "8c766180c726b037d587fc52371de6e3307140c52409011609d1225624b6a3eb",
|
||||
"blk.5.attn_output.weight": "490b582b3b1dc151ae55aee8b6743dad6c01fb49e43afefb6e68394b74be3d73",
|
||||
"blk.5.attn_q.weight": "6f7b8ca4d9025ec836a44bbcca46be30c66b471a9fb62943ddff8288b3731409",
|
||||
"blk.5.attn_v.weight": "9f70df3ba00c9e723214b3da83ff435a2163fff5915f75515c9664c05c866c27",
|
||||
"blk.6.attn_norm.weight": "1a4a66613a682df6f061fc7c4d986f9f7e9175b62f0c42fc1ef31db536bd5942",
|
||||
"blk.6.ffn_down.weight": "c56f25e4e49b443dbc82d88311ee63bc1f5002cc67e52f4787fd5f003aedeac1",
|
||||
"blk.6.ffn_gate.weight": "31a5cf1aa9b831a81588d508550f51fc425f9517c43254d4ef7096d38029cf04",
|
||||
"blk.6.ffn_up.weight": "ce135f3a1163e0c9297a615bdbe68a67ead21edce8debbfa9f6e15e6af8d4c94",
|
||||
"blk.6.ffn_norm.weight": "4e328ce0648c94e732bc40501858ef6262ad1161e2e407b0cdcf4813fa9d45d8",
|
||||
"blk.6.attn_k.weight": "1eb1c4c9f9c4c7ff7f5429075e0dc6a7782bed55109fa88df209a817dd8ef960",
|
||||
"blk.6.attn_output.weight": "3d32986b56873b88655ee1edabdd413fdd9ab18b82108c9ce90bdbc2d3a6f3a3",
|
||||
"blk.6.attn_q.weight": "8432f583b3a2809c99c393f9beb077cb0534dd5d247c17108f2986cadc6651f6",
|
||||
"blk.6.attn_v.weight": "5045381513815bb91839dbac8335ffe49bbc7b0008369de7ea97eb676c5e2b36",
|
||||
"blk.7.attn_norm.weight": "3dabd003638ec2499bfc8a48c49eef34276caab4fe76894eb963207848c2fdaf",
|
||||
"blk.7.ffn_down.weight": "194fae858608bdcffd235be59ab119d0b91c8549f864ea06dae69249e099935f",
|
||||
"blk.7.ffn_gate.weight": "00b24c29c30246892bce0791be804a89701d4c1332777e0bcdad5d9d5666604f",
|
||||
"blk.7.ffn_up.weight": "44d7082a5280080c90cef9e19d410391de34f212ca0736377769b8ddd0c82d5e",
|
||||
"blk.7.ffn_norm.weight": "21fe8a7fd6911c64e0d15a788b3b4cb6d71dd6ec51de65f760ee89afbb6ae53e",
|
||||
"blk.7.attn_k.weight": "57a149eec5f6744a9526cd3925ac073f9d12db0fbcb5afe042ef4dc846458c44",
|
||||
"blk.7.attn_output.weight": "0e9c28a3e81a2880251ce5eed77bcb8be8aaa1a51c9cb6de820b47ed83849fc2",
|
||||
"blk.7.attn_q.weight": "15ee75263ee4e2a43eb322bc159ae004bb7d77e3a7e63ee4ddab700430693fff",
|
||||
"blk.7.attn_v.weight": "440aa970bba4bff429fd7b7b1de21f2ad14fb2952b776cfa4acee68d7c6e9b8f",
|
||||
"blk.8.attn_norm.weight": "af5b44825633c42c1ae964c82bb2be6a242d3a751f0a91f1bae4f593e8f5b6ec",
|
||||
"blk.8.ffn_down.weight": "b11c14c76adca94fa200496dd2c10743becb23aab6642443ef1ae6d8710edbc1",
|
||||
"blk.8.ffn_gate.weight": "7bb03d3325bf8637ae2fa1296b0651356515578d46a7c5ca65c7a923d7de27bc",
|
||||
"blk.8.ffn_up.weight": "b956ef0a0669b5a9c9bf3a8da2d1c24f52d331cfb7354f6d7c51bd65be355e30",
|
||||
"blk.8.ffn_norm.weight": "c78c3d748302edfef76f71ea5cb2055c94352122eee8b9b1173779a1814d224e",
|
||||
"blk.8.attn_k.weight": "c0fba6a596ed9c1c32a7055c31a935a8b31e42b77282ee47c1f03ee3bde736b5",
|
||||
"blk.8.attn_output.weight": "83cf9947080c5d8d571f04a842bc3dcfe7bbb0195fb25b346e22635e8649f2d4",
|
||||
"blk.8.attn_q.weight": "47409350a576b333d97b7c877d69f47f46df504f3765102dfc0be9e521c7ecd6",
|
||||
"blk.8.attn_v.weight": "1999dff91404fdcf1ecb34d9eaaaa9244ec7658a74dec8feb7cfd1fddba0347e",
|
||||
"blk.9.attn_norm.weight": "1e6e29d5c3889ab4e1b0a5b9998cba60179b0f1fca133515df49cbc19d092593",
|
||||
"blk.9.ffn_down.weight": "acb898a6490adff592e10b4c62d70edc5941661ee6da44658500e9205357c8e9",
|
||||
"blk.9.ffn_gate.weight": "4cff63013593aadc3ffbaaa6ed70ffdba1224cd43c3644bf6f4162b5ac1ab542",
|
||||
"blk.9.ffn_up.weight": "f985b5a2d6cf4fe32c7256301c3c89b8ad22b59e516342c52da42d8110766a4e",
|
||||
"blk.9.ffn_norm.weight": "0d659c538bc6b21ed0018f107ab674a7424a00a42946c80e07208b479b21918f",
|
||||
"blk.9.attn_k.weight": "f67611d888780d1b38c1c146b361c65310c8183bdf64fd73e2259985c6e8517f",
|
||||
"blk.9.attn_output.weight": "f12ca1fa62a02ddc3f77f798bfb5707e0c50bf18ee0eaa67025521a98355f26b",
|
||||
"blk.9.attn_q.weight": "3865185f4361a645b086ad47b72904c095313fb1c624e511647bf1a7dfc1c476",
|
||||
"blk.9.attn_v.weight": "92125bbfed63544ab56052bd1e4aa453bbf34c795249ee54cde54907c8c6d1d3",
|
||||
"blk.10.attn_norm.weight": "5d6bfbe545bcc2fcb2fc75c68f64b1f4c918badaf53e0156fe2d88aa977b2f94",
|
||||
"blk.10.ffn_down.weight": "1dd9da8b0d2696ab5531fbca8a29c7d67567620a9d3e5fc2a19ec5d7e4c6cc8a",
|
||||
"blk.10.ffn_gate.weight": "6e55e7f014edaebda0ac6819a426221d3b025c27312a2e18cc5806f31e3db226",
|
||||
"blk.10.ffn_up.weight": "d80dde54af5db51241345ee8d64c1972608644f4deeac1e8195dc423bf27474a",
|
||||
"blk.10.ffn_norm.weight": "f6ca65951d58ae3379eee8247bec34ebd0db05674cc9295593573841b8a55df3",
|
||||
"blk.10.attn_k.weight": "b58e350bd6b49aba0fba4e4dd6865de3a2a0651ab865dbf2419b627b53ffc187",
|
||||
"blk.10.attn_output.weight": "6b26a986e12fe66ec286a21d7d5af5eaa1bfe6f2bf502165d270e4497235a54a",
|
||||
"blk.10.attn_q.weight": "3440e0e5b7e0d1e426424ae5a33f4e057be623249e9035ea12e57dbe5d3893c4",
|
||||
"blk.10.attn_v.weight": "ebfadcfe14bcd6dee933053df0a67e12e7a196d5cc45728c1ffb2a2daedd5ca2",
|
||||
"blk.11.attn_norm.weight": "3ed057b9576cd2de84507ef64c7646dc478c651efca4c2024cbe91a4f3fbf0bc",
|
||||
"blk.11.ffn_down.weight": "8ff1c2487d22f5c499761e4eb721418f141f960160d0bab779595a34e4d68898",
|
||||
"blk.11.ffn_gate.weight": "9c74e4507c7e45bf39b7cc7402198cd1dd77e3fff8c625b0413acaeb16efeb9f",
|
||||
"blk.11.ffn_up.weight": "4367158007161d29939e00a322bb6776016e43f648a94f9b08a96a477aae75be",
|
||||
"blk.11.ffn_norm.weight": "1cc0288c1491072121f4c9a0af20be0e13af49895696a3320e4fcac608768de3",
|
||||
"blk.11.attn_k.weight": "066f5b3c144fce1366835e1ebf376f768b333b8ae29f5b478c42d1d0c809c855",
|
||||
"blk.11.attn_output.weight": "e0d9f3d3f2c54aed59c02713ea4fb562799ddbacbe67ca3998dfc887bc44e47b",
|
||||
"blk.11.attn_q.weight": "28d3ecc8a88cb3815e89a7f7a7d043da7a71f702b337a126e4d3a2ac1cd6370f",
|
||||
"blk.11.attn_v.weight": "7c5cdef10ee73bca0a3b9f6ece5f0a0155664e0ce3d8de90ccdccfab5545e5e7",
|
||||
"blk.12.attn_norm.weight": "973b133301a1af760cd7b3a7955371ea0a750808b442deb6adaf7b98482bd0c6",
|
||||
"blk.12.ffn_down.weight": "d6c87b4b4ca03f75546ddd6a9e7fca720585a309188723c1ace8122438d4b200",
|
||||
"blk.12.ffn_gate.weight": "2189a6e0cab1540bd05d6089b922aa8fd694be51255654933c165f302a0c955f",
|
||||
"blk.12.ffn_up.weight": "5affbec19b58d092b9305721e3552481fe2eff51269ea3ed91cda3b9ef84d4df",
|
||||
"blk.12.ffn_norm.weight": "f650fd42a34e950f758b4a130e7b8b1a712b1dcbede0291bb8edde47aaed0ef6",
|
||||
"blk.12.attn_k.weight": "59b1e86f10450a7cc188beefc0856d2dcf44e8d7fdd9cd8859c30ec1ebaf24b6",
|
||||
"blk.12.attn_output.weight": "446b0d36b2f66bd72a2323f4f4e9d85a0f621e9a58872e89a27248d6b1123238",
|
||||
"blk.12.attn_q.weight": "3ed6bfd39f040301ed99fad882d3e569769d594259f9948445bef0e44ec881fb",
|
||||
"blk.12.attn_v.weight": "e73652cd5d0029b1931be3ba9d82508f6696dce5a29d085476a54fb7a2ddbabc",
|
||||
"blk.13.attn_norm.weight": "491b85278c0bd67bd31b9b8a9720902c244bd067e53a4a03641b7c0994782e82",
|
||||
"blk.13.ffn_down.weight": "ad71cc248a85e9ced49307a24a9bfae01d387e979a7689c82ff59998e09741f3",
|
||||
"blk.13.ffn_gate.weight": "0a55984d53971fab97575ee0ef5882013be7fdecfa76e3fbebb5dc85a07a14d4",
|
||||
"blk.13.ffn_up.weight": "378b697b35e2e53c0de98e8e29b73d42ae3ec112ec16129aa5997a9e2f3b5943",
|
||||
"blk.13.ffn_norm.weight": "f8aff2f69ab286210fad45a62b03f8d10b38f96a420d7baadf6b95d7b0b0bcd2",
|
||||
"blk.13.attn_k.weight": "25ceb841afb1034831bea7f4d6a6c578def2ce4d4c412c780ef147dc9a598360",
|
||||
"blk.13.attn_output.weight": "a242b322889c6bdaa14b67a7bab593db39df8eea3721638ef639abbb74d482e3",
|
||||
"blk.13.attn_q.weight": "d80be9945a369439e835c55cfb0e97828b8a66bb7ced534d9059c92487bf20a9",
|
||||
"blk.13.attn_v.weight": "ac33274cf9b67979d9ecdc967a55175afe0c9c4aeeff6391433cd9840c818706",
|
||||
"blk.14.attn_norm.weight": "12a1e1091de5b2da12c9e7c0b1c8e6f09ce2a749733cf7d5240445b8e21cd093",
|
||||
"blk.14.ffn_down.weight": "cfd41965c88266e32bc2dcdadda512499c35519e8686fefb9a7f249ab2291eb5",
|
||||
"blk.14.ffn_gate.weight": "8dcfe774f07a095c7c6cf0a901c9df70d938bad7b5ba347fbc8f694e7603c0d1",
|
||||
"blk.14.ffn_up.weight": "c7995577fe4a72ea0fb17c4a7b6b87b959072bbfdd5edacc6c367d43465809ae",
|
||||
"blk.14.ffn_norm.weight": "81c41ebde41739e7016ffec31d2256217b825dc3cae049a935f5f61a60d22003",
|
||||
"blk.14.attn_k.weight": "fb708bdebe4384f5c4b479c110028554f4d122f166b8091eda7d8d65e6780eb8",
|
||||
"blk.14.attn_output.weight": "f5295caf2dfdc60553dcabe17537a80577e8b153c902247daac058df23542514",
|
||||
"blk.14.attn_q.weight": "c12b7a3601c68c63ab5dc9d2599ebf3f3a10abc2c59d3a2126fffd5818f2763b",
|
||||
"blk.14.attn_v.weight": "1ce968d9149bf0d5e237d52cc6d6433565b4bbf03252a736262bb00a2b34a687",
|
||||
"blk.15.attn_norm.weight": "266fd2c36d7dcefc6b6bb7f1c9374c41f2bab5d6c84a063b6f91c4f682dad3c4",
|
||||
"blk.15.ffn_down.weight": "6154886e9ef0a6cc08ab0d264a35f497e6f0987efdac992ed04e87088bea7801",
|
||||
"blk.15.ffn_gate.weight": "183d9fd3c1b5657840099053d2fd3f72ad953b1de523296159b7761f20491a76",
|
||||
"blk.15.ffn_up.weight": "51546d4498842ae2340ee226a0888d5f61e7d2ca4d052dfa06a77b0451242d3d",
|
||||
"blk.15.ffn_norm.weight": "ef7378091a41a25a5f58bf1bf9d3bc64ea562e7f421e1c232b1f177c30fd3500",
|
||||
"blk.15.attn_k.weight": "8d556ab8d9639324141774999b6eed0e91d7ee645bf3e7a3dcd200b2e7a00751",
|
||||
"blk.15.attn_output.weight": "54aa6ba87def7cbe18b0c6ab3aff5c351cb3b6ca4a0d7b2cd5f75a1312991429",
|
||||
"blk.15.attn_q.weight": "10731b0dc031ea8e0ef37bd7f010e0a78518a10a6df05a8bae48e3148b73ef3e",
|
||||
"blk.15.attn_v.weight": "cbbe50c2ed7224866d3cf9b489c599f3ec41a4ea1aa3181e9f4e87e1fa0cefec",
|
||||
"blk.16.attn_norm.weight": "387058eb39d4b28c04cf1368247417f1faeae8ae79d894c9f293457e0eaa00b0",
|
||||
"blk.16.ffn_down.weight": "2cb26ccee585e933401ad5c82ed36ddacb3289efa0b28f8cf91b020ffbd9c333",
|
||||
"blk.16.ffn_gate.weight": "d745985efb5bab42304e5d509024631efe35f92f2b2ec4931ead6db97ca9727e",
|
||||
"blk.16.ffn_up.weight": "7a67bd195e0642828ca36eb7818149bb70c2c25f82de07e2b5807c520daf540e",
|
||||
"blk.16.ffn_norm.weight": "7cefd061c8182482a89272f8a4e88a954b12609a62716923ca1cb3593b1c1651",
|
||||
"blk.16.attn_k.weight": "d7968a2de67e755b4533e061aaad1cb62f8882af92dcad67f99d6d5112513439",
|
||||
"blk.16.attn_output.weight": "9e9ab5788272ca3394ea89eadbce8c86ecc3fd75b7899184d6191c134ad9aae0",
|
||||
"blk.16.attn_q.weight": "ef81c261b536c1a3a093b33f44cf2d42b86e5aa2d821674f07a0c80e992ed925",
|
||||
"blk.16.attn_v.weight": "aef38e7958301b4a437cbdd2fbae6197f677b09269ec1eaf63188cd5da428d25",
|
||||
"blk.17.attn_norm.weight": "28f6b289f1bc3131041e9f791b7a2a3a48baee0dfea27bf7051ebbb7ed364d80",
|
||||
"blk.17.ffn_down.weight": "1a502829aafc6a9bd6bc81f12573bf8632d5c8c659f0dfb13c8b2411f3b1ec05",
|
||||
"blk.17.ffn_gate.weight": "ddfd8aa0eb98846ebc9afe31366249159f46ae9815199dd70161527ed241ac4d",
|
||||
"blk.17.ffn_up.weight": "4211a3cc247071bd361b30de2131d02382f552855062bf3b3e004c17992e5d09",
|
||||
"blk.17.ffn_norm.weight": "647e5fa99a5b0d232af36d15816539f4d27e60a50a341b00aa88bb6e4474f8b9",
|
||||
"blk.17.attn_k.weight": "d9125ff33a19c502c0f8846433ffc24395048582fc2f463d34a0301a82156f02",
|
||||
"blk.17.attn_output.weight": "3d64fbb1cfef04444827f37c35fd9ad3413eb2165094d339ef89f00503f09de4",
|
||||
"blk.17.attn_q.weight": "e5b29424028f578beca385fd82e29f37adedf3037cd51e5889d5a1ffb0428ca7",
|
||||
"blk.17.attn_v.weight": "1809c5aaf2ac04c5d65539097564ad62796e87d24bb8b9ce5b095561a61d908a",
|
||||
"blk.18.attn_norm.weight": "99daca58d001c627523d3adfbca1d95f04e590382a326866544d57989d5f4835",
|
||||
"blk.18.ffn_down.weight": "84f30231ce6ca0f10227541dfc602d6418c1a210386b0c4926ef1656e7d4635c",
|
||||
"blk.18.ffn_gate.weight": "ca5bbe4468b541740e54f69b9e08fcc8e478c344b70551dab21b1206acfbaadb",
|
||||
"blk.18.ffn_up.weight": "0b3067b9dded31686dcfdc1e247eae3974a28a61ac59e9862758dbfaad64e8f7",
|
||||
"blk.18.ffn_norm.weight": "8154a102232dbc0f90ce77ae5c1ff8f26f8b6e4dcf326e9ec1645749669e7960",
|
||||
"blk.18.attn_k.weight": "25abb26021ccc481471a30e0d4cbeb7e1db29828417ec5136edeb93fecf09ac4",
|
||||
"blk.18.attn_output.weight": "d87d481d9b046b68efa06ccdd4ed8cbf61e692d61114b75b7fad5ed75f5d87b2",
|
||||
"blk.18.attn_q.weight": "cc6400379e15766992ff1293be79dc67682c28e9e15155a78109f4b64653b164",
|
||||
"blk.18.attn_v.weight": "45c75cb1dd496aea3173aafe2575b841dd1d02cbe010b3198099731eb98f531c",
|
||||
"blk.19.attn_norm.weight": "65389efc75297684773284ef8e5f8789a4504b636c9f33b8a32e0ee42499fa72",
|
||||
"blk.19.ffn_down.weight": "4eefab7e939f64a17e4a214ca3c77a6fa110d94f677e2d6401086f70fc538b04",
|
||||
"blk.19.ffn_gate.weight": "f1c0a59cafda66f466ab585b0b8b4861b58abe87a67cea1f6a488492242edfdf",
|
||||
"blk.19.ffn_up.weight": "c42d045eef588db4a0e56960a57e110e1ff92eb8041107d19899165fd3b90f17",
|
||||
"blk.19.ffn_norm.weight": "a8f33eda6d5d62ff5f333ad9771783caff556641f4e7df713451385676f441fa",
|
||||
"blk.19.attn_k.weight": "0bab5d9e9083492bfb05a5a3bb23b79c0e7b99ef6a6644817b4d57d5c453b8a5",
|
||||
"blk.19.attn_output.weight": "c99c551d70eafad0f7aea98fb6f9251635897168eb3895f76abf0d4ea3b3aa6f",
|
||||
"blk.19.attn_q.weight": "c98bde95627c3b54c9443813ca50b4e14f518319681db6bbf7b2332ba26e9a60",
|
||||
"blk.19.attn_v.weight": "ff3a490518cf64904db89ce0dc7d6eb89e870f1440e41883c6b55a221f82de84",
|
||||
"blk.20.ffn_gate.weight": "761f0e317229cafe9d3754048ab038a0a84e9a287b196ab65f633139f2d29aba",
|
||||
"blk.20.attn_k.weight": "45d13439b41066d282e8490a726785abf513605f46c79bd0c840f6419d27e790",
|
||||
"blk.20.attn_output.weight": "a3b958d84b4a097844179b7d55c18fd0e4f319cb15e918c6fde33b68de1bcac6",
|
||||
"blk.20.attn_q.weight": "127ab8e7d8c3f882874904196a02712bab42e6744fde45871b67350609d19f5e",
|
||||
"blk.20.attn_v.weight": "5f0ad2d14a8ae42dd3bbeccfb33295687a14055fa92c54bc946249373c1c9f17",
|
||||
"blk.20.attn_norm.weight": "77300b1755edc8c70089e0f45efa646056b9add7d8568b2324d2f3e62b64971a",
|
||||
"blk.20.ffn_down.weight": "ab93d0e075b42e9017b701a070d561e698050d90aac4b4b9919256fbe50c3204",
|
||||
"blk.20.ffn_up.weight": "4fd6628a07acc57a48d1ef83f81b7d7aa0bce569c1160a99d307284f8821322c",
|
||||
"blk.20.ffn_norm.weight": "2a9e46b9e48e8e55215de56592e1f189530037c1c94a1428e3d6f106c7f26fb2",
|
||||
"blk.21.attn_norm.weight": "4b3b5912c7bc61eb9da8e47d4651f896e85d9e59c4ecaa65df7acf3c21737298",
|
||||
"blk.21.ffn_down.weight": "7146f931663d93b8771cd84405cd4802ea6560d0729b0d6d44588203c095bc53",
|
||||
"blk.21.ffn_gate.weight": "b44ec5d64388fa40b90b3e9976d97a8b6800fa3b97584f32e64b03daffb8601f",
|
||||
"blk.21.ffn_up.weight": "0cf3643fd23c685e17062cd11e116e17ce57a405e5e78953bab94cd62fe48789",
|
||||
"blk.21.ffn_norm.weight": "4ef2cdb53da166df70b39f3e6b17af51848cfa5ea3c27ad6a1ae2a1bb1da1ce9",
|
||||
"blk.21.attn_k.weight": "5d40f32a706f670c19972b14176bf660d5b045e3637b110dbf8d7de4ff32101a",
|
||||
"blk.21.attn_output.weight": "18afaa916752ce16c9653ec0ec7e2fe60be55faa2aa5025d147be184adb75cac",
|
||||
"blk.21.attn_q.weight": "2621daa5f858931514a4b2f0fe8d81cf9b96f541e6af99bfa7539e9bde8e34ee",
|
||||
"blk.21.attn_v.weight": "63226dafc54c899bbce4aa49efceeedd8908e94faa613450fdda91f332b62864",
|
||||
"blk.22.attn_norm.weight": "cf3058daab4d2c04387e7d169d1553bb8e7358eea66285ec067703f6ce62043a",
|
||||
"blk.22.ffn_down.weight": "6a58d5fd220abdbac6cee7ba048abab794731af318f04982c2506df59413d0b3",
|
||||
"blk.22.ffn_gate.weight": "d5614535324b03c7b91727a903b2a72f8d07ad17f7aa8b61ea173cf9b895069e",
|
||||
"blk.22.ffn_up.weight": "ec20da3949566e93f66cabb67f8cd7eab399047ec6ebf5d43edfaf3669b82296",
|
||||
"blk.22.ffn_norm.weight": "84c82f38f53a649972a44466fc476bf764e064ce18de870291edc302f3700e28",
|
||||
"blk.22.attn_k.weight": "a3d2ecc37fde7c201176bb8abadf27f0d8ede9679a6034913e03d9db924fda12",
|
||||
"blk.22.attn_output.weight": "5a3b8bb433f43a387df43dd371bdf80ddfac986dfeaf38e9bac1d7a0ec6628de",
|
||||
"blk.22.attn_q.weight": "3a875cec661b4859f30a8fd2c866811184b25b68c9e36fe2663d299caf8b59c6",
|
||||
"blk.22.attn_v.weight": "8717a83b79035058dcfd3ef6f8e5b36e71d77379e5a239e1899eef8766fb7703",
|
||||
"blk.23.attn_norm.weight": "2b4a68a0a2f023dd646e4755c9bef17c2f631901154afd839edac7ac006ec99c",
|
||||
"blk.23.ffn_down.weight": "29499b1586c6fc4883c9b7a9c8cf388035146b5aecf90c5c4c8c8e082c71e7d7",
|
||||
"blk.23.ffn_gate.weight": "7d6554036d21c587b9b556428054f9c15cbef96d24b257f906fcef4ae38bd9c8",
|
||||
"blk.23.ffn_up.weight": "19761ecb288d6ebd44b681c4535661583b1e19dc29e96d0c007333cd8f00aacf",
|
||||
"blk.23.ffn_norm.weight": "37dc35500790a4ca33807b39cf7af65065e535dc25b9e94f3ed2759f61887ac9",
|
||||
"blk.23.attn_k.weight": "717547d00323817b0cb40a72ec5f8cf42ecd1f9e3e42715c2cc5e38f07fffffe",
|
||||
"blk.23.attn_output.weight": "a24786feb6a905fdf166d7500133757cbe494779d4ebcba9eb03046b319557df",
|
||||
"blk.23.attn_q.weight": "6a2c4a98f138b928d22136efa163562691d3b4ed526d52d46a2fa2694a8f3965",
|
||||
"blk.23.attn_v.weight": "c6e6081eb9c38a7fda023085957b460e9ea321e1fff408b38c2b58595c39979c",
|
||||
"blk.24.attn_norm.weight": "5e6283f891e538670425f3e244b08dc6f96f33dfa4aefa913f8eb17212421850",
|
||||
"blk.24.ffn_down.weight": "e09eb170f389deea0a4a1cbfdb52c12490768a2c60491b7bef8a4c445e2a08f5",
|
||||
"blk.24.ffn_gate.weight": "af29d815cf49a38fc2ebd0bf9b2dd9933d023a29f2d766981acb9a1b53f09117",
|
||||
"blk.24.ffn_up.weight": "36ccd9333426666de9d3088bd4dcdf5b624b09dca9e3a83a22fc0383f2d950fa",
|
||||
"blk.24.ffn_norm.weight": "a88e1692318826db6ac42582d182e51a3c698c655d0e21e04fa086318832d07b",
|
||||
"blk.24.attn_k.weight": "f7d61d6d1225289bcc502e3bbb0168b4584add0253218c1b77ac92ccef9a1c2e",
|
||||
"blk.24.attn_output.weight": "85a1363b3ccc87312094c2195022687c16b0dad7fafb9e80bb4ec474d53c29ac",
|
||||
"blk.24.attn_q.weight": "53482a2c008f42f4fad779ca323addc3712040149dfc12f782417756388a72bb",
|
||||
"blk.24.attn_v.weight": "67498272369af7dd10097c73b07f731b565cfc9a559e711cc0d526389e7b44e2",
|
||||
"blk.25.attn_norm.weight": "98dd617def5cb7825ee4833132ca2da2121245921585e1d9e36b93344adc321b",
|
||||
"blk.25.ffn_down.weight": "7fd477d6c50aed5f424a878dd284343379cffbee8a34c0b6e55100c8305fa13f",
|
||||
"blk.25.ffn_gate.weight": "f892c9806c8ec22e8aa746734ac9213428c534921cf161239e1d249fdb5d1ec0",
|
||||
"blk.25.ffn_up.weight": "528bed14c9bf9762f790525ee40412545221f4321d2a2323fa8e73c58b7643c5",
|
||||
"blk.25.ffn_norm.weight": "ca5831966672e7be6a578feeb631ec3570d3b5afe12860819ccb96e896ffc346",
|
||||
"blk.25.attn_k.weight": "610d3068cc9b20401f0c3a0efea39a279dd9f564fde19baf3403b2ec2319e4c4",
|
||||
"blk.25.attn_output.weight": "798aaf702e53b657265ac3b5e6caf3a0ab515bdadfeb1a3a156b4f3bfba76666",
|
||||
"blk.25.attn_q.weight": "8a7fa25248de83029fb97b51d036a01baebe31fcb4be121ab00dd8b7de209b10",
|
||||
"blk.25.attn_v.weight": "2a53d5e9f8a1218c66958c6388d3b37400a9af7956c785024ca44bfbc3c7d371",
|
||||
"blk.26.attn_norm.weight": "5f44fc043481eb0771f3e6d2420bcbcf73140afb9a9feb8eddb6575452acebee",
|
||||
"blk.26.ffn_down.weight": "944a60a409d0d5b6a851e33c69aca152454b691711a8b96f5bcc488772ab2833",
|
||||
"blk.26.ffn_gate.weight": "2a0ca4abb3de5593e6693d8be69b63d6d1a639855ac8332a75f520353f030c62",
|
||||
"blk.26.ffn_up.weight": "0b1df496163f9ac07bf89375d3eb441b51a81d41b47d769a04a61efc18dbe35b",
|
||||
"blk.26.ffn_norm.weight": "56b8dd046e9be6ea71f7efd80dbd14e7fb1aa020d3cd38e063275f3873fd12f8",
|
||||
"blk.26.attn_k.weight": "b1dabfabb970e6971c7ea6e53c63cf7ef56341e6a2edd9cf177785cad9af2f9a",
|
||||
"blk.26.attn_output.weight": "39532c7e836baad164a655fb97ec5114ea4da37ffba9fdea2684f6e4450e6f84",
|
||||
"blk.26.attn_q.weight": "8f48bf6aaa1252bc149e98af2be1777a5c0d2c3274c6d314171ea9344a41b604",
|
||||
"blk.26.attn_v.weight": "02fb145f7fd905133750e90571effacadddfd3f4966552dc59982ac3900ab8c4",
|
||||
"blk.27.attn_norm.weight": "654d168fc3cab716d91261f5719f180b7d697218401633b4878a759f1b5283f2",
|
||||
"blk.27.ffn_down.weight": "2823272bec3a1c12f02cc4cb24aa4031abd7e9dbe0b02676e2305b21671818f0",
|
||||
"blk.27.ffn_gate.weight": "b1a1d40cd02f97182cac17a79971d1934ee0daf3aa0bf11303568c636e208a64",
|
||||
"blk.27.ffn_up.weight": "ed62ec72a020d070e64eb7b50237b32213944727b5b2427f45d989f50df5fb2a",
|
||||
"blk.27.ffn_norm.weight": "c69649ac65d694b306a905dee8b03b89eec1ed188b1eaaf38f8e29d4b12e38a0",
|
||||
"blk.27.attn_k.weight": "cc57bbf413f1fd227128dc66efc8590c73634cbd6f96d01ec4878b5e7ca6a925",
|
||||
"blk.27.attn_output.weight": "cac407ad02361d53207b3c7e25ceab84dcb4347b8087055162e2efe14d11d84a",
|
||||
"blk.27.attn_q.weight": "0af18e07cee12015761c07c94407024f4f4d77d97bdb24163db0e16669e2cef3",
|
||||
"blk.27.attn_v.weight": "a1d08fbdfa40af773c5adcf93bd68b78a44ed144e3fc6bbeb8af02e937527eb6",
|
||||
"blk.28.attn_norm.weight": "f39a51f814512b040a1082143150e4a49ff730f85cef49d7f77fc79d83e91f40",
|
||||
"blk.28.ffn_down.weight": "74f29ed51055d1c1adb8f0660bbe538a27e016c65650f2d67efc6f1c84fa1b45",
|
||||
"blk.28.ffn_gate.weight": "ae48bb16487ded6781c60aafc0bf738fb4ae15729952906f247d216592ce249a",
|
||||
"blk.28.ffn_up.weight": "543009727718ac22f11ee4b17815f68ea6f15ba1f3e7ed5ecdb755cf6417565b",
|
||||
"blk.28.ffn_norm.weight": "b8f9e54c322079ff20a82b88948cdc2916c22c7db40b9a9ed6d3cbe89efb727e",
|
||||
"blk.28.attn_k.weight": "55d055ba653b728d6e784f9e013786fed07115c9fdf23367e3941386d5e77db8",
|
||||
"blk.28.attn_output.weight": "155101c03ddbf18f4fd0694bfc982f33c7bae25c9b087d6f5273c2bfbffcf2c9",
|
||||
"blk.28.attn_q.weight": "1ed19bfdd22e9c14eca014739982492e9516d411515a8585f65cf754d849e53f",
|
||||
"blk.28.attn_v.weight": "11ba854dd575c025d37256eee9041f6d1bd2b549a083d6409a09bfc1542913f3",
|
||||
"blk.29.attn_norm.weight": "02b0bf5e2fcefd11a153cc988c81ba672682e4844fcf6442423e21a0e10d566d",
|
||||
"blk.29.ffn_down.weight": "594bb692ec2779938721ff4748666ca8370e0e4fe85229503f616438b8884f5f",
|
||||
"blk.29.ffn_gate.weight": "8bedcf47e91dcb2cf4093de56b048ee411faab6ff472f89ab2c9c113a08e6967",
|
||||
"blk.29.ffn_up.weight": "e241a547b5fd6dfca8200b8141e21c1c487a96cbc4e5855f181a7ed1be91b642",
|
||||
"blk.29.ffn_norm.weight": "e63eba5e4c6b288bfd9f15e46e236086456c8b7f1f9c732c0b5de84962a2e7cc",
|
||||
"blk.29.attn_k.weight": "afe5979d5bcf211aebb526620f5974bcb0a2c39c8be71e815575c55d6385e3aa",
|
||||
"blk.29.attn_output.weight": "9c944ed44b124b014906fc240afd3b90aed56bbd9567f2eddfd5b7a685b3cb48",
|
||||
"blk.29.attn_q.weight": "e234e08e5c1bd9245a2edc8d63e9933b6b879f97c01392209cad4f55f05f3ada",
|
||||
"blk.29.attn_v.weight": "5cb8e3e5f954e775c5a5e4de7a9a62b17e9c6931bb0ff0e2f82c4126fd3e1a1c",
|
||||
"blk.30.attn_norm.weight": "a65483ee51a0b214144ec8a14f28ea5437586e9e12ebe342a57d1f8627ee12af",
|
||||
"blk.30.ffn_down.weight": "417959da77ceb33ead4271cbb9428b195196173a893c44e52880a7ec61b4856b",
|
||||
"blk.30.ffn_gate.weight": "a0d503ffcbe45dc927600bb98c9f6082487e65cb577ab545add400d666a87638",
|
||||
"blk.30.ffn_up.weight": "f8ab957b82ffcd10b21303cb5e866209b6fe95f827b1b94e9a949207952d12c0",
|
||||
"blk.30.ffn_norm.weight": "210c7ceb0514a9ef27b5d4d1b3aff6dde43f1af0345a050d71097940e0e73e03",
|
||||
"blk.30.attn_k.weight": "16861b9abcf5a3fe73c93d977ca45a1e6daa65be0fd85c2cff53486ce2033afa",
|
||||
"blk.30.attn_output.weight": "ca541fb2e57e2257118c35784845b0c731278af8db3036ac53d71aa1681fdbdc",
|
||||
"blk.30.attn_q.weight": "f7834917748e26bb456b945e230bc926c228e93696bc01fbc2b134bdeeac71a1",
|
||||
"blk.30.attn_v.weight": "9292783171dbe5eb689d17c9bda11e537f0e9b328fced6986c938d61ed590e81",
|
||||
"blk.31.ffn_gate.weight": "e4766a04bcd8f937ba883c6a144101e546747804ca66c35c97281d6ccb47b566",
|
||||
"blk.31.ffn_up.weight": "cc1e666116f7e6b06736db4aa4b81003c583f54f4d9200bfa48842249940e16a",
|
||||
"blk.31.attn_k.weight": "fc80b57557687504efae7d24265cb7dc39b8f826bb3d897a11783012dbedc44f",
|
||||
"blk.31.attn_output.weight": "215617f50a1f5d9b2250b82f3652b35a9e9aa0ad9ef2b485d73965a14b2b872a",
|
||||
"blk.31.attn_q.weight": "274b4f1dfb0bdec28632705677049fb3e327ce6d9e1f3baaad1560439039982f",
|
||||
"blk.31.attn_v.weight": "e641b8b926f9dfcbbf6b6da1c02555525ac4b1c306d96f20cfbba7d6662c4e56",
|
||||
"blk.31.attn_norm.weight": "b3243c361d4041ddb892ce6862dd5091f57d87357e3c67e177451b85d8baf34d",
|
||||
"blk.31.ffn_down.weight": "0a00cd3ecd5e91624a27f9e239b1de425d5ba3cfff82c256a11a4ad434abf3c2",
|
||||
"blk.31.ffn_norm.weight": "2a0d67ea2bb1303975712243f07273c92fce83baa11b1cd6d8e42e74ea3c810b",
|
||||
"output.weight": "768615f077fb797967844571c58b94d7c399d884d115be3ab4b0154504cae892",
|
||||
"output_norm.weight": "7cc5b7ce10e5082000fa00bfa68af8c7c5da218e59e2c41cf2f1499d40ca229e"
|
||||
}
|
||||
313
convert/testdata/Mistral-7B-Instruct-v0.2.json
vendored
Normal file
313
convert/testdata/Mistral-7B-Instruct-v0.2.json
vendored
Normal file
@@ -0,0 +1,313 @@
|
||||
{
|
||||
"general.architecture": "llama",
|
||||
"general.file_type": "1",
|
||||
"general.quantization_version": "2",
|
||||
"llama.block_count": "32",
|
||||
"llama.context_length": "32768",
|
||||
"llama.embedding_length": "4096",
|
||||
"llama.feed_forward_length": "14336",
|
||||
"llama.attention.head_count": "32",
|
||||
"llama.attention.head_count_kv": "8",
|
||||
"llama.attention.layer_norm_rms_epsilon": "1e-05",
|
||||
"llama.rope.dimension_count": "128",
|
||||
"tokenizer.ggml.model": "llama",
|
||||
"tokenizer.ggml.add_bos_token": "true",
|
||||
"tokenizer.ggml.add_eos_token": "false",
|
||||
"tokenizer.ggml.bos_token_id": "1",
|
||||
"tokenizer.ggml.eos_token_id": "2",
|
||||
"tokenizer.ggml.unknown_token_id": "0",
|
||||
"tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
|
||||
"tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
|
||||
"tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
|
||||
"token_embd.weight": "cde834ccac5e94324b25cb81b02d27312cac0c551b55a7e1d555d90bf6cb6e81",
|
||||
"blk.0.attn_k.weight": "458bfdd9715c66e017c2447b1ed3c582963a3111479314e664faad8c914f42be",
|
||||
"blk.0.attn_norm.weight": "e1fd60b95f713bae7b7e3ca933c64ae6c9cd1e8d808000204bbfdc19f0ba635b",
|
||||
"blk.0.attn_output.weight": "df13b6a157d9d4f96c53b012b3b9bcd207d0c94144cbd22ae3ec13bb07d6c373",
|
||||
"blk.0.attn_q.weight": "13b4126b4245bf06c915a93317c42b8174e05053535ec99dc576541e4cec7c25",
|
||||
"blk.0.attn_v.weight": "5b1781d3a341214511b27eb4e268674ea3ea829dbdf8ae5a6bb89b3c0b33fafd",
|
||||
"blk.0.ffn_down.weight": "49186f5d8148d316b07458841d13a2e66587f4af69b776188a809591ed9c070d",
|
||||
"blk.0.ffn_gate.weight": "4397e30ece09136f00f4ff84ff49e5241b765a374deb8c5a12e897e2bf73473e",
|
||||
"blk.0.ffn_norm.weight": "43260589aac3850a779bca3f9649f793bbfbe5db538361cb743b3830217f8287",
|
||||
"blk.0.ffn_up.weight": "fd7ac918240a07566f6967527ffca58fcf433a30b78fdd6d84b2136d4ebd9987",
|
||||
"blk.1.attn_k.weight": "209839566c7d235bdc20565a4766378b6ee8553133a5a3315abe8a85baa80712",
|
||||
"blk.1.attn_norm.weight": "58c52986f7c69784ba327cb7f350923420782bee17fa39b1fbd13839d4005357",
|
||||
"blk.1.attn_output.weight": "5067cc628449682665dfcf59b16e58fe2a9d2a81cb099f0fcd42f4f8670c6740",
|
||||
"blk.1.attn_q.weight": "f410f9f0dd5edc09401af597d02e2a4c727f1502ec3ec3898321617b36c6df6b",
|
||||
"blk.1.attn_v.weight": "d40fa49e07c102c0644e130e7909eaa93ed0d54e2edddc0759e721d58a4e4f5e",
|
||||
"blk.1.ffn_down.weight": "594b1eff6ed4defbdd819fabbe2d48764984f08878a860bdb808511d5a25b8db",
|
||||
"blk.1.ffn_gate.weight": "4cda97541e388a5bb607ce4cc8b3db1da7045830a630e7ba4d17807befcff346",
|
||||
"blk.1.ffn_norm.weight": "66c13d7481be65b97aa474735ddc9674f33d512ddda76fa6fb45c7464b09f1ed",
|
||||
"blk.1.ffn_up.weight": "1adc6de288ba4cc1237833ca8b4eb81107149842e38bc452e18e5cfe284338a2",
|
||||
"blk.2.attn_k.weight": "5420423559f236ab22d85a00849f31e0cc6e9c7dd879de724393d8cd2b379153",
|
||||
"blk.2.attn_norm.weight": "495fe1ab40cc52aa054ddd4f0c2d2790f4326c8d103296b1b38f3b1060db2a24",
|
||||
"blk.2.attn_output.weight": "ccb83e7085381f558bfd65588c525ad2671feddcbc3887afb4038ad9c7aac348",
|
||||
"blk.2.attn_q.weight": "2e8f77478392bc93c2a391f2e0f4a173a952bbab88a7aca099c6ee909726409a",
|
||||
"blk.2.attn_v.weight": "d64512590f3b7ebbb9e77c2eb97fbda90b00d45c944f2b174f03a2cb11007567",
|
||||
"blk.2.ffn_down.weight": "1de5084a05dcaa6b1bd926e83517dbe9ebe7fde79235fe56018b3028b1aa6397",
|
||||
"blk.2.ffn_gate.weight": "cbea526b557f49aad8c976973cf367fcd12175b900f551984f498b9e07e4b7fd",
|
||||
"blk.2.ffn_norm.weight": "530aa49b10c7eae08899d143409240deb95dae4e1d5bf78cea3b26393cff3ba1",
|
||||
"blk.2.ffn_up.weight": "13a5fc19b96b4dcc1e9bd01998c8272ebe52034c1933ed123a506b711fae9a5c",
|
||||
"blk.3.attn_k.weight": "1913b63a73305941d8cdc472e7f101c633d3357a78602eac0a4b49a744261075",
|
||||
"blk.3.attn_norm.weight": "9c11bed5ab41f4adbfdae4ead65b525c8f19443e656a8c61ba412a4e1ad1193b",
|
||||
"blk.3.attn_output.weight": "bb0b42c1d34779c5943272ed71f1dbb31ad8edd75f8bcd5c868f88505ac3a610",
|
||||
"blk.3.attn_q.weight": "3461a1fe4e49f5319ea047cae98ccdb46528a3ec23831183fe87610b48c94948",
|
||||
"blk.3.attn_v.weight": "82aa30be6a61526a41fb79bb28a2617416f5909f0477aa9e95e16be9370fcb38",
|
||||
"blk.3.ffn_down.weight": "68521011ae03f5e3b0966127111afa8ee9f2eaeeef8d3a0b86b633e0332e9fbf",
|
||||
"blk.3.ffn_gate.weight": "1e89e26338fd364bb679695968c65106382f15ad55c95cbb5ec9bdfeb766f432",
|
||||
"blk.3.ffn_norm.weight": "c81932529a5a8c417c27b888dbe95fff8b447c2ea5f6f560444ec5d50b93832c",
|
||||
"blk.3.ffn_up.weight": "305021735afd8669afefd713f56137248d5e817e60471a112ad06b7fa07ffe88",
|
||||
"blk.4.attn_k.weight": "cc26ba5c5c28082a79e6abfe61186029e80b145252ca6a7924c437f0bcf2d51b",
|
||||
"blk.4.attn_norm.weight": "302d251fdcc91f7468cf33f80b49484251d8917d7018ad264ab3a85c8ecf9ddd",
|
||||
"blk.4.attn_output.weight": "a012f5bee3520cd4ce51f0076c132ebc3653309f304032ad051aa308f55f36de",
|
||||
"blk.4.attn_q.weight": "3c8d607e447f5ef21e73af71e3c0d32fae16f91f31faae34ff06912cf9cb68fa",
|
||||
"blk.4.attn_v.weight": "49f6c81a634ce46d71c2350206ecbd231b1732af96e4e4e67693c41a07e007d8",
|
||||
"blk.4.ffn_down.weight": "e89504f311a4a34dc819a67b761022f14d71c43df3ead4f892c87aaa8e9f0adf",
|
||||
"blk.4.ffn_gate.weight": "18b22f079a2fbaefe3572eec61fdcd996fd747724e2f0ff4f08cfcb43eb7bfb6",
|
||||
"blk.4.ffn_norm.weight": "22415a492c168a0878912b05c854a631228b01c3ea8842e1d75989ec46c18a65",
|
||||
"blk.4.ffn_up.weight": "f57379eae2874d8853f14ddf0f0fcc4ff1338574d5ed5d7e88331d5fb84f5642",
|
||||
"blk.5.attn_k.weight": "d627af853c40bddf9762ce3988008c1ff17f2686fa8f73a0b5da38010147c316",
|
||||
"blk.5.attn_norm.weight": "9ce01092c7f7f1c3ef72d6b794da12d77aa1f6a24fb96ba1b9bd5a0bcc3e2443",
|
||||
"blk.5.attn_output.weight": "0388da8064c4b6b795ce2d8079e8a36535e82b2c9cf794e38ce8ae460aae726d",
|
||||
"blk.5.attn_q.weight": "039b7ce1c909761fdf475c06cf14cabe5a90199282c89e4dcf460e95a4b6275d",
|
||||
"blk.5.attn_v.weight": "c47bfd8d2496bdb6e00e03b903e15fd0ee806a515094ec257e43cc433147ab7e",
|
||||
"blk.5.ffn_down.weight": "1d62e6708974bae318cbf00a8bf621d9ba0537e549ce4710a536520a8d14168e",
|
||||
"blk.5.ffn_gate.weight": "8b42b1b11c92db19985094cbb50434e3a7c9cfea71ee6f21ea79eae7c49284a5",
|
||||
"blk.5.ffn_norm.weight": "e0bc520f1505e687ec391d632a381d38d8ebcdec19f614a11a2000ab573e8b7b",
|
||||
"blk.5.ffn_up.weight": "8cdcd17d2ea89bb9ab902dbc6bf3f827fa4ee029c6bf19eecbdefd146d8b6f2f",
|
||||
"blk.6.attn_k.weight": "5dc6bcff89794d1756bf57ec665b58622d9352130d31082a6c66e1a079f99932",
|
||||
"blk.6.attn_norm.weight": "13b26008abe0f119b5104b9d78ebd5e797d3cdd68122b93d73a3b4831a54d085",
|
||||
"blk.6.attn_output.weight": "f5a49917ea70c3fb311ccfffbfafa63ab18416a5d55e5429b70ce8bfba57c075",
|
||||
"blk.6.attn_q.weight": "d9c2f652c87dbd09ec3822e12876648fa32e86553ac25afab723b1cd9f8cef90",
|
||||
"blk.6.attn_v.weight": "5ecc5fe67609a35151011cb526f45c56fc0a999079ae0ff37c755ca03c68c555",
|
||||
"blk.6.ffn_down.weight": "0ec125ae0ecb2d9277fdb1b04f17efee94e37d0ae37311057c212ca2db3fe6d1",
|
||||
"blk.6.ffn_gate.weight": "fa4d6d38355ee8aa3b80b476d65ae7e343c9b7770d7b097fc848ee8a6e091d1f",
|
||||
"blk.6.ffn_norm.weight": "30e8f7defc627532e1739dc76d31223d45767391a431f925b63dabe334b0f392",
|
||||
"blk.6.ffn_up.weight": "6b97cc32b290fa9087806b5d65aa6dc1760737730c8c71394cc4f30c2157f9ab",
|
||||
"blk.7.attn_k.weight": "0231cb127cb7c3714cd72b8f39343891d7715a9bab2237ade9e7bc5f4ed2e68a",
|
||||
"blk.7.attn_norm.weight": "7c3187f07eead7d219d98ab2daf87905e88d5f1ace109b6f5fa55dce3914981f",
|
||||
"blk.7.attn_output.weight": "2f30ad972c284ae7c8eb0482053433495ebe8fe9c5ee2c28b4bc4ed1f33050fe",
|
||||
"blk.7.attn_q.weight": "3a2b4b8d61cc9956d304fa9f82a9e65b4bb9fda2196670b16df7e0d8c43eff2c",
|
||||
"blk.7.attn_v.weight": "d2aab97d0dcf0f61dd2f32848f7a8a99c423a4948a660a660a03a546972b8db8",
|
||||
"blk.7.ffn_down.weight": "2270d520468c5549cd30023ff9c452a277058310104c4239a616373fc5a94387",
|
||||
"blk.7.ffn_gate.weight": "4134a3ef71b3eac8f76b6f1a2e58625b3bae48081f175994bc3ed7d8b0d4f2d0",
|
||||
"blk.7.ffn_norm.weight": "42df4abd4b8769b16f3930068f96960af1b061f1aeb7505384f272233b2badff",
|
||||
"blk.7.ffn_up.weight": "c920549054ec16ff8c73a72f5d837cf4e11885e44db57c1c1c584c18fbd7a9a5",
|
||||
"blk.8.attn_k.weight": "01c609bd3bf31ce65688f1f640ee413740e821330134d4ed1877a3065d1527d5",
|
||||
"blk.8.attn_norm.weight": "48857411f769b00290f4e4f2e593e092781fdc2503f80c1e3eeda1b85a20f74d",
|
||||
"blk.8.attn_output.weight": "90fb273f8df83744554bd59236515c16c5a5a698ca3fbedc17cc89ddcee354ff",
|
||||
"blk.8.attn_q.weight": "ade617ac4653c7f00593dbb51837a468afef20a14eaab3780fb96ac3d6714369",
|
||||
"blk.8.attn_v.weight": "c2c37496494864fee5c527d1fe1f88529d31c73f9cbd02ef9b2e9b23611ea50f",
|
||||
"blk.8.ffn_down.weight": "2da58572e9ad79087c03cbb0c23c9ef69f93ec221fd5fe4ed92fb93871d23ffa",
|
||||
"blk.8.ffn_gate.weight": "4483294e628edaa4901708e73e92c917bdd93b780fa01aa74aed57166f2bbf0a",
|
||||
"blk.8.ffn_norm.weight": "c0cbb7a4f8123b62f0c4652a687f3b394802bc32870dc446eefb709e42043a7f",
|
||||
"blk.8.ffn_up.weight": "9eaf8a2060cb9224cd585997cd671866c4051ad885c2c6d9fdc7056c2a5c0d89",
|
||||
"blk.9.attn_k.weight": "5dd36c45fbc9c50fd35c36cd75576288506971eac5c5311d4f5c16ef60099645",
|
||||
"blk.9.attn_norm.weight": "3c8ca64f2f75ed7c8fc1da010c23be787648139a96ca0ef3ad10be7b14942b8d",
|
||||
"blk.9.attn_output.weight": "6277e1f833024f53c409be919ec76d34464a78b278c8f9dbf79e777746e3b995",
|
||||
"blk.9.attn_q.weight": "87352b70d9e328c2d51d59090cf5ea5a046529864a890d0bc8986447a0a5c006",
|
||||
"blk.9.attn_v.weight": "2efdf01161d7a82a9117cc2d87d37dba5ffefcf730781cb94fcc95130e48ff9e",
|
||||
"blk.9.ffn_down.weight": "e7658a2ca984961c7ace16acb679387bedb1fef656b5330bbbf588db19673a75",
|
||||
"blk.9.ffn_gate.weight": "773cd330d4ff5d64be8af00adf2e2722fae4e33fc26bb9d03549f6f4b3b0fe57",
|
||||
"blk.9.ffn_norm.weight": "c8b86cd5c43b332f72060b807091c33a258e5dac01358ff4733b916cd34c9c97",
|
||||
"blk.9.ffn_up.weight": "d8cc3bcff18bd46124ba2aa7caacc71220b44eeef6fccb993b4c6cb53e8f2c3a",
|
||||
"blk.10.attn_k.weight": "964bdf3b4e77b915a216f750ff7b0f2eb1dd6bfa071358aef21010b90111044d",
|
||||
"blk.10.attn_norm.weight": "59ed411d91d14775764eb514acb0895a75a10cbbfbc1c15d453bc50f8046cb7f",
|
||||
"blk.10.attn_output.weight": "4d35a2a44cfe4ac0a83fd3ab0dcf1f5a0bf54cdb3b7be9fc353ed32c8a3eb81c",
|
||||
"blk.10.attn_q.weight": "defff5339450dd881ac352f5c459293f39e07b9619ebd10ed632d79a3f310278",
|
||||
"blk.10.attn_v.weight": "b9803e8d6a54acea58f662d4c0a5c8ebdf986676de7dfe12d4b288937881ce93",
|
||||
"blk.10.ffn_down.weight": "eba856be64e4be20b92fb4639a783454dd92427250759df92a337e39f1971c08",
|
||||
"blk.10.ffn_gate.weight": "2d5c509b066584db4de3632b01234e86edcde35409c5ebce18957dc80fe465e3",
|
||||
"blk.10.ffn_norm.weight": "ecb9a8679945ff0273856624ce435dd250ffe5a440ea0861a5c84f0e4c44d2c6",
|
||||
"blk.10.ffn_up.weight": "e76ec7e993f399af02958778c643aa78368e3067846714165eb5aba9d5f547f5",
|
||||
"blk.11.attn_k.weight": "29c6d1f34bd3ba2f0904e57b32a5bf8dcb2834d439159a33edf234ce0b775677",
|
||||
"blk.11.attn_norm.weight": "b5817b275149cd2abe18a6a10e19854605fc58fd364666744362ceee8cfe49f4",
|
||||
"blk.11.attn_output.weight": "1e05653220e237cbe0cc770033e183c9a0eed5680510997409b16186c6691950",
|
||||
"blk.11.attn_q.weight": "03db725ae669151e4d536e50285b3b047ad097f52475df208ed3e790e31a44be",
|
||||
"blk.11.attn_v.weight": "27cdf1d4e971326c451a4615a0b79a8c7fe9508f9b76c0d52fa01971fc7eb403",
|
||||
"blk.11.ffn_down.weight": "176938cd7c2966094f614cace8ba568b10532e45a0d438f80eccd19b6c2a7f87",
|
||||
"blk.11.ffn_gate.weight": "9782339915dd6fa70013628a01524ee1d01ad8beab04068da7ac6a5ee7603a60",
|
||||
"blk.11.ffn_norm.weight": "8245f6391e3be97811c0ff27f0d8f484ecc82a468a837c893f059745bfcd95eb",
|
||||
"blk.11.ffn_up.weight": "15616ddde096d0d25e906375c548b6de4bd5576d1f6b68eefdc29f14e183af42",
|
||||
"blk.12.attn_k.weight": "66dd21604993edd1b1fe547bcaa06f5bb7e31c9204902d147a227e4badf7feec",
|
||||
"blk.12.attn_norm.weight": "23a69f85dd8a0904b9839cc5d0afcda299b74e82ae2642106224a1c820f2b761",
|
||||
"blk.12.attn_output.weight": "4a98d132e376beb274a39d4ea9b6a1b870ad5c66625439d7ff6f45c229c3ca04",
|
||||
"blk.12.attn_q.weight": "1c6c309d63afcfde32fe37257e300a78e25d01117e33490801107c0e75d1ea66",
|
||||
"blk.12.attn_v.weight": "723d9e4ebe4e2b1974afa01d8f512b52933698fa36717dd47b37b07760c50a10",
|
||||
"blk.12.ffn_down.weight": "00e0fb09e1f1fbbf3803f1dee373eaae7a93756b6e13063ab77f9927bc6f996a",
|
||||
"blk.12.ffn_gate.weight": "89159f7f97aefb1e100107e3ac2d694e1008ad873f79bb953d60c2c1bb22724d",
|
||||
"blk.12.ffn_norm.weight": "5f70aebd0e43a39d6373d8658cc670c13aadd7818831d3d84f761d5f688442f0",
|
||||
"blk.12.ffn_up.weight": "faec21b446f061eb4dca561a3180712724347b77a71eb312e7afe9be9e89fa04",
|
||||
"blk.13.attn_k.weight": "3d440825d19eac3b1753b34d94fee2b3a3cb6636c10b2703ffcf688d3c1eded3",
|
||||
"blk.13.attn_norm.weight": "47b575e57e410738ad13fd3c74bb49c06b3d31030910834ece509cd1a5c6d9be",
|
||||
"blk.13.attn_output.weight": "05436d8e613f4475741c1798a7c371b53d61b229507fa04fe23c504ba1f0e12a",
|
||||
"blk.13.attn_q.weight": "002b5024ce520da41256e3ded5cdc60e5ae07ad9b202cb19d76ab511efd02b1b",
|
||||
"blk.13.attn_v.weight": "c1f2d6763587c50312cee0d7140fa2c7ee326f5b172bc99b2d8946e08329cabd",
|
||||
"blk.13.ffn_down.weight": "b5c4e0d8a3ff96cd76a135e415b89f02d28c28f7f3c16a36af31ef0ab8773da5",
|
||||
"blk.13.ffn_gate.weight": "ae06e9e3d2e1f64c7ad23a4009dc904c2eccd7241f9f91c4974ab2504f116be0",
|
||||
"blk.13.ffn_norm.weight": "e44a22321bcbcb4a3c345b504e939e8071370f54a8cd702fabdb40b97e0d7683",
|
||||
"blk.13.ffn_up.weight": "7e6f366d538e21ad431264b12c011892d0be9dfe4c4da9f730af677f920641ba",
|
||||
"blk.14.attn_k.weight": "95492d6417952ec24b2cab87bceb750fc7e95ac6b1944fc328a3852d980164be",
|
||||
"blk.14.attn_norm.weight": "6b7b09e1c51addcdbb160ea59edf032531421c520ec5645fe1ff9ca4180cef54",
|
||||
"blk.14.attn_output.weight": "75887474e4d72c218e6ab0f69f1bf3ec3dc414d51b36fc59df00cdb23421bb6a",
|
||||
"blk.14.attn_q.weight": "940e33f76e48c21215d19e8a21234c8246d4d084381a7d9806aecb24b071d5bd",
|
||||
"blk.14.attn_v.weight": "c58601cf5a9833f80f7f9a5b2656e8eab5eb133211446ebd48f8be15fed4ebb9",
|
||||
"blk.14.ffn_down.weight": "f9f886e7f9b2a54d717b08947a25a0a93e8c2a5b8bcd5a907c06817c8ee3ac11",
|
||||
"blk.14.ffn_gate.weight": "727ed0ee68594a3f59d704ed3240b6929f083b9c36650fb848d182315737245c",
|
||||
"blk.14.ffn_norm.weight": "bd2471008ff1b2bae9aa26bea019393fb2bbc5b9493b8cec3ebd2c280fca24ca",
|
||||
"blk.14.ffn_up.weight": "b006446769f51e4f93b503c4727deae897bc1fc7f4fad49f85024b63c4548d38",
|
||||
"blk.15.attn_k.weight": "23bb70f9035356624039547a603e46be7d1e4403616eafc2451cc09c5373d522",
|
||||
"blk.15.attn_norm.weight": "718cb371ca052eeb3bfac6ac506abb887df125271821fd171797a7f2d8dd6313",
|
||||
"blk.15.attn_output.weight": "c76a2695a204b43a8e5acfa5720590b5d449a9ad9e082cbe3e80fab5903ea16a",
|
||||
"blk.15.attn_q.weight": "2b3e4037b9e91bdd26d6e8d904cf39f948192dcf09bb6445cb55ca058d4f4626",
|
||||
"blk.15.attn_v.weight": "7c15e89b6acafc8619e86aa9d412f5893ab17843ff2cfaf40eea9637b24910c6",
|
||||
"blk.15.ffn_down.weight": "e16fd4bdc6d1c1209c6b633454df4992870c8cefb2cb0e8c92a7e489e9fb5d19",
|
||||
"blk.15.ffn_gate.weight": "95a46bea366c260337c537fde06b4cbeaeec52484a69c3390bb1d178eb0525c9",
|
||||
"blk.15.ffn_norm.weight": "37730293f704da265dc6d1896b3be00c39c0a41dab07f573af39dc30a481d623",
|
||||
"blk.15.ffn_up.weight": "ba74a199da2d0875d7410824238c4ffafbda3993568812284a72b8800df91f15",
|
||||
"blk.16.attn_k.weight": "f58f79a2a91c9a763adefce0c53a71eb5ce6bd8442f4af554b04b58083bff27e",
|
||||
"blk.16.attn_norm.weight": "0c16e41b95e81978e0e0e3b338e2afe2d297426578cacee94de15df74e94eaad",
|
||||
"blk.16.attn_output.weight": "ead22fc337514e4add49aee19720008558e52090466866e849671953a1fccba4",
|
||||
"blk.16.attn_q.weight": "ef59c4e8fe8918c1add43d7e9c6fb3ef799dd3e1bdd731ec7b6a4a6f97c86048",
|
||||
"blk.16.attn_v.weight": "902e6b84c2b64241470b13e6f412f859f66b4b223bcfb9c15d5cb1106b07ef3b",
|
||||
"blk.16.ffn_down.weight": "2ad6e9eb4d8372c32a554395d460d17cfb02d6dbcb757cc962b6bfa36db4f5ee",
|
||||
"blk.16.ffn_gate.weight": "825b2d50fcce3dbe6a5d8d8a50a95466f83ca4a10343efe67894c20b4628fb15",
|
||||
"blk.16.ffn_norm.weight": "3bf6ac90befb0e17e077c8ea9454a8485a30f89f2d761ec7751b60c90aed1af9",
|
||||
"blk.16.ffn_up.weight": "9fbdd08739b32411f5ab0252174d386bab19eb0b17884862f760429b7d41d78c",
|
||||
"blk.17.attn_k.weight": "4033398718bf3674830ed1b73071ed8482b6dd4ef27f31a6c5fbb998321b6c07",
|
||||
"blk.17.attn_norm.weight": "714f2e8ac9592966a0f1c02ee979eee8f84586405b992e8ee9543e840199ffa1",
|
||||
"blk.17.attn_output.weight": "b6bbb618597d767b8f535117be68f92911e4a71d4eb4d8b5d943444151445ece",
|
||||
"blk.17.attn_q.weight": "b84a0dc00ceb515faa2628125dcec502eed923077b21cfe900a4ff16c2e5f9ed",
|
||||
"blk.17.attn_v.weight": "4387c7d6a17da9cc7a6bca8f4a75618b20407d570792056283a8e93b6ec65f18",
|
||||
"blk.17.ffn_down.weight": "47db95c6f1e12b399c3eaf9ddba261782dd71173dd163b52af96541cf87b5196",
|
||||
"blk.17.ffn_gate.weight": "59abaded0aedfd12f01df81f7a811e84db6a227f51b60abe9a247ca726e87392",
|
||||
"blk.17.ffn_norm.weight": "b7e86445be5c7b722e01ddb98d5c7527ca86cb827ce0354f2c269e0f2558751e",
|
||||
"blk.17.ffn_up.weight": "8e31c293bac649d2f60da4b3fc4a3acdce1111ec6058d8805eeeb242443011de",
|
||||
"blk.18.attn_k.weight": "5ce762ab7b032511c131df81093b587871718c7097f79d8e07d707571f18a47b",
|
||||
"blk.18.attn_norm.weight": "1f52cdc7af1f4dc1f0ef6ad1ad02e18cda32133654e57cfa9c72ada9c0b1d995",
|
||||
"blk.18.attn_output.weight": "6486957f30bf8a88516e25772c6650f98b13923f490a2865a8752e36439d1cfa",
|
||||
"blk.18.attn_q.weight": "93621c8abf69d2ca29c5207180eb628fb2b544d89de6c4a7fb0699be95534899",
|
||||
"blk.18.attn_v.weight": "11604083b5a74828ac1d226af015ad5dc0215a1fdca44fa7131c2163c02d8156",
|
||||
"blk.18.ffn_down.weight": "8f9997feb94385f106915df810239c9753b31efda2bf14bdf18a9fbbeec8233d",
|
||||
"blk.18.ffn_gate.weight": "427c213b3a4e94af703429daf2f65766f70424d8230c123e7e712a18bceb5ecb",
|
||||
"blk.18.ffn_norm.weight": "c45d305c4ea6a54013ba112f12dafaade064a32cf01317373464a3618d8ba44a",
|
||||
"blk.18.ffn_up.weight": "a2811f2e73ac9eb9cce91a21a454e84e230a155244e2cd73f2c12aad3c9b8cfd",
|
||||
"blk.19.attn_k.weight": "b2daed159925eac58c291e2f1e2000beed21002b03c9e1bc7e7a52e22240666c",
|
||||
"blk.19.attn_norm.weight": "6307306ede2ab5bffa1bcac3f8b139354678c0376b1d9f5530c1fcb4268cfeb4",
|
||||
"blk.19.attn_output.weight": "ebb98218b2a9c84d3fb6baeb02c5df264b7ab80d994d1098ba1cd47aa398effe",
|
||||
"blk.19.attn_q.weight": "4f10df2ad09177e7528e9456039b670d07db22940a49417101b725d239c16724",
|
||||
"blk.19.attn_v.weight": "30f1efc5114badaeaafa91fa466dc7fa14b1616db433c6f563ab851f7333a5dd",
|
||||
"blk.19.ffn_down.weight": "be5ec7fe6b48855cd0015b0e430d1b70c620de87a7ff188c7c1afef546d7b6bd",
|
||||
"blk.19.ffn_gate.weight": "10dffea4213881f8a9b583ee0fd370e033756d32255ed15053f794375b9400e9",
|
||||
"blk.19.ffn_norm.weight": "e75cd24ade45dca78fdb0cbcaaa2d4a17d83a5a73dcc94ce0ec2d68fbdb2a881",
|
||||
"blk.19.ffn_up.weight": "63e81bdb951410ffa81bcfba1b94a679ec9ebae59cd1623ce2651ed5d4c78bfd",
|
||||
"blk.20.attn_k.weight": "c2fc5ad39e9bdd45e73c6e54aecc474388d944c4be1ee1921b7fcd035bad02e0",
|
||||
"blk.20.attn_norm.weight": "aaa9169171937bdce20c1f057e94e9252f221cabacf1ced12e11b9586f23d308",
|
||||
"blk.20.attn_output.weight": "a9f4fb496e4bc053e3f6cf2e72e22d4cd2b545ef6c32f7e782c2ef6ebcc21d4b",
|
||||
"blk.20.attn_q.weight": "5a07ac619ed251494170b213921ef3fcc4c2712839da262516d9d5b8ea1ff185",
|
||||
"blk.20.attn_v.weight": "d6689473105d241eacb17f09f06000ee237336916cf5ec4f48271c5b41bcb8e7",
|
||||
"blk.20.ffn_down.weight": "74be38db51df736f26ede7c6b52ea787e385f181cb66231e2cced4556a25c9b8",
|
||||
"blk.20.ffn_gate.weight": "ea91e06dc3d051c0ba0243b5a8bb40edbf254eadfb54fda7247e05cfdd88cbe2",
|
||||
"blk.20.ffn_norm.weight": "5fbd357b3d6f44a7a91e8a4fc246b24303891b7957e0f3c32818ae5dc16ddd8d",
|
||||
"blk.20.ffn_up.weight": "fe3290333e056af4ed12942ac72aeba97a6b562e2db05e79cd35dd07eab5b101",
|
||||
"blk.21.attn_k.weight": "201ec6ee95f06ea5eb80fe86fd07bd016d3ae9ab6abd25d631834414e14a010e",
|
||||
"blk.21.attn_norm.weight": "ea8154f93e06485828475a00b98cc397ac84768dd70e06ecc0c075b5712d7276",
|
||||
"blk.21.attn_output.weight": "9f8af74d531478fd304723fd8e4e01578db598441b80dc7c960cb801dbbc501e",
|
||||
"blk.21.attn_q.weight": "277de9953a8d3cff894ffd06c15ad0ee1407e319df0c1a693d4f45fa9c74ac7f",
|
||||
"blk.21.attn_v.weight": "6bfdc16cfb898909b7788ddd39dd04b928f31d6732772195d53c558004638dca",
|
||||
"blk.21.ffn_down.weight": "173877146cb94801157796ee9e5eecf3f46acb3b5e797f90b83a3fc22395eb30",
|
||||
"blk.21.ffn_gate.weight": "53146713e2ca1be80496024077a028f6b6d749b02e71003c349e113b436f48f4",
|
||||
"blk.21.ffn_norm.weight": "b28b97e18ab20a5c553ba422f7d7f6014f5902f1d62a69abd20d9fe19a5f9462",
|
||||
"blk.21.ffn_up.weight": "5c39d0ac4d602b8ec8909dade93b2efcd6b6d9d84a19b252d76bb66dcfaab87c",
|
||||
"blk.22.attn_k.weight": "01f26272c82917a87a3ccf922fa1d521a952b05de878241b7efe3525b617ac87",
|
||||
"blk.22.attn_norm.weight": "5ffc96249d8873b506e9eb7158bdfd07fa1429e53c1951430ca7505d25f11c76",
|
||||
"blk.22.attn_output.weight": "9c2201569358f720244b9c9497e4da02585a167b1414c8a506b85ad75ba990d0",
|
||||
"blk.22.attn_q.weight": "906036eb4ddf027f6d920f9356a6a2a5e529b96f4e1231a0496d46b4434a5842",
|
||||
"blk.22.attn_v.weight": "30ede8b0d166003a4b8a81fc99437f557719fc36e5c4dd510c9f161f36a47e73",
|
||||
"blk.22.ffn_down.weight": "d04c164beabab30e1837b843e18852260efccfbb9d96a34ddd816e6fb3ba23c5",
|
||||
"blk.22.ffn_gate.weight": "19c889db6b19179f0a62d5981a1506592c65de83760d67afbe00d202202750a8",
|
||||
"blk.22.ffn_norm.weight": "4885eff2d851b32dbd306bd632c725857e6d164f0fa8b3d5857e572e6ef98ee9",
|
||||
"blk.22.ffn_up.weight": "365594d8db8e95cf87cc33ac23947942dc326110175cc8ec5a07b5c7059089a7",
|
||||
"blk.23.attn_k.weight": "badfea1569da0fc6ab817c5727ca3a69b07d9cfd622fb8be5e66678d5b3f7ae2",
|
||||
"blk.23.attn_norm.weight": "8968f78a379ac3ca5458b4ed4251e8d9112aca6d6dd1ef6440b4bb0b380375a4",
|
||||
"blk.23.attn_output.weight": "93e43393c03956287b1fe31e9735ff1cfe84f4ae56b83dbaebe96275e4e11831",
|
||||
"blk.23.attn_q.weight": "aaff73c725a8700ae66bf26ac8869dfe96738eff23a8ff340de2ab53400a5795",
|
||||
"blk.23.attn_v.weight": "3a86a8dcf14a746ed1411f5a7e634064bc4dfd6511c24cfeccfb2c9ebb6b4101",
|
||||
"blk.23.ffn_down.weight": "d4da6f37bd7ef69bb203f7b0dd59f50bce37432c70627e6cf274ab81548af5cf",
|
||||
"blk.23.ffn_gate.weight": "5b6072936c4a693923bb4e3d1473fd45545cb02fc07799aca458ef0449a04061",
|
||||
"blk.23.ffn_norm.weight": "cd76e37025f84773180298ddb15e0d4ba9cfc7d832e19c791049daa47c6d9c10",
|
||||
"blk.23.ffn_up.weight": "cde43b99b83124a13b2e4753d12674b3a61dfb34c04703007ced3e8e2aee1801",
|
||||
"blk.24.attn_k.weight": "457379edc4cce4cbbe107385079019bc922264fdfc7bd1d1ae84343a81460c66",
|
||||
"blk.24.attn_norm.weight": "0ce0dfab2edeede5da419fa7833db78e36222cf25c358d08f3ec664310f031fb",
|
||||
"blk.24.attn_output.weight": "0cf91c2fd40c204d2fd4b9c85b69281e5ad4ea8442972fcd44b5fc8e835ffdf8",
|
||||
"blk.24.attn_q.weight": "87ede30c09eafec6a4e6285674c1bc4637140b168b2da4ed34f36fdb6e176cc9",
|
||||
"blk.24.attn_v.weight": "4c0b078b2798ca35d6d2c2258fe499820d2bc88700654ba4016e4b028f563590",
|
||||
"blk.24.ffn_down.weight": "cdb8540c32b1ab988f984484928d39f6841f2131c1cebe90ad9456737fccbcaf",
|
||||
"blk.24.ffn_gate.weight": "da2e0e913648b5526bd2bbb344038dd067639343aed3b413662b064b0db7556e",
|
||||
"blk.24.ffn_norm.weight": "8940bd781c610d75eb2be63cfc8d869a3af05e53c963dc7fd4c6f653df5a80ab",
|
||||
"blk.24.ffn_up.weight": "90cbac2a58801abe11ed6c24560aa4acb949f79429f2aa8ff129ac05868bb87d",
|
||||
"blk.25.attn_k.weight": "90607131e36998e990ce718ad05cbecd1bcaed010931401ce6baa3b0d93ebce6",
|
||||
"blk.25.attn_norm.weight": "fbf679c85656c04a6cf8fedd5412c1ace22960e6c2d47f2d43997827811fbb97",
|
||||
"blk.25.attn_output.weight": "08412724ee7a2086514406e6f68fb9f622e10bac25b0c373b294709f4b09bd2b",
|
||||
"blk.25.attn_q.weight": "9c1238e98a2747654a0d4371d3e7ea8b979867f609dc42482544f25591e85c7f",
|
||||
"blk.25.attn_v.weight": "a57796a535c6cb09581cbafd6a91dc14adc8cca2a2465a7ffd0aec546cd84074",
|
||||
"blk.25.ffn_down.weight": "f7e34e8a6391b480da08b52640613ccadce268373934b409759743a1735b74d6",
|
||||
"blk.25.ffn_gate.weight": "b8d0b2f4612678b5ce42bd4a683f8024514b75fb5ebf6b22c600811e95582ee4",
|
||||
"blk.25.ffn_norm.weight": "cde1fdba2369d315f3c6940a997c471ec891924e642505db580d732763bd7b75",
|
||||
"blk.25.ffn_up.weight": "72e700c32ac8b9c47559c2222e45888a480b527ea512075423c5dc01678e2bb3",
|
||||
"blk.26.attn_k.weight": "6ac83b3414ae75bf3a9055c32e49d2c40fe611ab21f8444f03d2f465d18122c9",
|
||||
"blk.26.attn_norm.weight": "55f9d6dc9d75973dc75136ecb9d991b4398097ac133070873fb96ec76a6f60bc",
|
||||
"blk.26.attn_output.weight": "ebc4fcbd15b33263e50ed2ad45740867cce15bc90e1216623babcb1820734509",
|
||||
"blk.26.attn_q.weight": "080f057521073e412936fe3fee64fd574c8128fa4a148b879d3e598fe4954581",
|
||||
"blk.26.attn_v.weight": "0fa2830d6746487ac91b243716e4302361f891e4e008eddd14abec47c7809d5e",
|
||||
"blk.26.ffn_down.weight": "cb2ab8af1653adc57111ada49d2825c6995e338c8208455b92de10e580f60f31",
|
||||
"blk.26.ffn_gate.weight": "231ce30966086bce2dc0e0afd34a22a1958cfda7a57c41b3b8e9444c5dfde8a6",
|
||||
"blk.26.ffn_norm.weight": "35d959d25d17b00617590f5d5831bf705c385c51e46297a14375a700effca6af",
|
||||
"blk.26.ffn_up.weight": "367680c8d332538b467d1ef87cfeb36cc5c6af564c5023c5fb50e728e3438287",
|
||||
"blk.27.attn_k.weight": "0bfcb351c6d17aeac5b55a915074fbdf00f11c4bda98babb196ac8804805746b",
|
||||
"blk.27.attn_norm.weight": "5d598a88c2e75ba59dd7ba4fee940bdec92d72038f1286536d2dfb71d008a09c",
|
||||
"blk.27.attn_output.weight": "23a9da7347336479f6a10ded14cb3f46e06b5bd56dc4b0fbc526c688552ec840",
|
||||
"blk.27.attn_q.weight": "b83319dba9055f069208e9c9d66da08bc6874f23e575288fcd81697d1777aa54",
|
||||
"blk.27.attn_v.weight": "36ed34ccb2f36fdf16b2c2dd225a98ea6b7b0e376e7791191136ccd7bd7a4add",
|
||||
"blk.27.ffn_down.weight": "5488e1d3a58c71b5e9ddda430540b4776b268cfe1457cbc1c2622dedd9e4526e",
|
||||
"blk.27.ffn_gate.weight": "4ff48011ee0bac39af704849d9132a2410392c87a509c684f2062f6b76b498fb",
|
||||
"blk.27.ffn_norm.weight": "32afe99675983da3de2961d1b5ca41c98970a356823597fe29e91f6e86abf0e8",
|
||||
"blk.27.ffn_up.weight": "1eae3088a75629571fdbf6a20f141bc2bb2ed3f5ba2b9fd1d949f80695e442a1",
|
||||
"blk.28.attn_k.weight": "c4e80af714962d6f9040d2c09f316f4a1cbc3a2e994e19902d7c653cf3c73dba",
|
||||
"blk.28.attn_norm.weight": "c1ecf85dedc1c83d5d402bb7c94fb8b9c11f1a3e5f64e7680f80912d4a560794",
|
||||
"blk.28.attn_output.weight": "72ba47c061b21f5ebc5213a455eaf6fc49c8f8e04ff9ce37e6ed4921b629161d",
|
||||
"blk.28.attn_q.weight": "c4abc47234307f44b8ca789aa6668e298158fa4b459b2c1e84bd581806591cc1",
|
||||
"blk.28.attn_v.weight": "aeba950799d4950e491ad0fcbe30334e39b8975177990a2cb339031c45ac153c",
|
||||
"blk.28.ffn_down.weight": "4e84ce382a37b994fb8608df451a60040559e3f4f3241c3b3cb8989a3ed50d83",
|
||||
"blk.28.ffn_gate.weight": "04df157acdc8e8534ad60acc2d2a4dd3a7a6610f6382535ec728994fa6f83f83",
|
||||
"blk.28.ffn_norm.weight": "4d0386dae2bd1c1a9d0f9730718333e3a486c3bc6a5c5d482193c75d39832c80",
|
||||
"blk.28.ffn_up.weight": "fec60bb0a3daf182a14bd8311fe6dd1e3fd020c5fc273e2549cdb1a2d6b79b05",
|
||||
"blk.29.attn_k.weight": "b0532a263aa5a4e2a7a80adc83fc5dec974493bd18da7f953e7ebfc3f3a19aae",
|
||||
"blk.29.attn_norm.weight": "593fc3b4000c35b7a59dace09ca1756c08be0105b2edd354a0e1c16c82898859",
|
||||
"blk.29.attn_output.weight": "315b896f9f0cbacd0ca8937384c3a3a227efa908cb8c3a9125ec00c480e32b9b",
|
||||
"blk.29.attn_q.weight": "d482d45386d4ad3394f08e9dff233ee3a70d0427d65c0b8fa05905da7e25ca53",
|
||||
"blk.29.attn_v.weight": "cd3b5a6e2852da796902930a6a84bc87fc6a7c7bf51f8fc23758d12a39013b36",
|
||||
"blk.29.ffn_down.weight": "5b3dba6f9753bd1b1ebcba65ef5373dd62c38e755c44b7231b95d93d45761f89",
|
||||
"blk.29.ffn_gate.weight": "8610d9d2db15c256243ffcca3ffd31786d0ada0af0e7c7aa3fd20524370ab036",
|
||||
"blk.29.ffn_norm.weight": "1a2ef2d38b7ac3e51190b9ccb8b6552ba83ab290e523356a7f851ddb35dedca2",
|
||||
"blk.29.ffn_up.weight": "a5fdd15811bde16dc27677cf1a4c97daab4c28cb12a9530f1a0e573134fdb69c",
|
||||
"blk.30.attn_k.weight": "1efeb0b5f4b45a85cdf47300f892ac77ac1f38000ec3653565d1303d1fb8c743",
|
||||
"blk.30.attn_norm.weight": "c73934c182c7fe80838ec1d0b92f50a583f75f7a3d78d822f009b58ad2c80e65",
|
||||
"blk.30.attn_output.weight": "3a0fd89de2d274614750345d827a9c886a4f97b343a13cdf680390505df596a3",
|
||||
"blk.30.attn_q.weight": "711e113362bdb067db843c66236704eb1cd3fc5f40e3767143e96d510686ef4e",
|
||||
"blk.30.attn_v.weight": "82b12a9a74fd3d91b73cc2e841e2b3f0a5197ccd2998afa17020995f880d2267",
|
||||
"blk.30.ffn_down.weight": "af9f4b1287c0d824ae22d6e335d19e04a70135b835be7caa2435f1d85e931993",
|
||||
"blk.30.ffn_gate.weight": "e2ab3e6f15f5c50fca66c084cb6a57a2b6b82406d65150e82ea0437b93dd9a46",
|
||||
"blk.30.ffn_norm.weight": "c1b9c325c83f00e177386a4d7e769945f2995e60950c4a576c0a2c4ab9703d04",
|
||||
"blk.30.ffn_up.weight": "9b94a21efd419715d82071b490d3b635cf1e8da080620dcc39e5bde976d7e9a6",
|
||||
"blk.31.attn_k.weight": "0db0d82e3ddcc2c06209f5f013e1d72a84a996c40bf00186be485b909cc268e8",
|
||||
"blk.31.attn_norm.weight": "2b8b7239471f57140c5cdfe06bd224a4f6326282f99736e44fba4c7b120ac101",
|
||||
"blk.31.attn_output.weight": "a310b048840cc3ff2be4b84796340e8e2cdf05ec89d14bd3655c109b2bfa9fcd",
|
||||
"blk.31.attn_q.weight": "f45e0cd95645175ea82813455356d171838539bc3f7676d877c698f2af0a0eda",
|
||||
"blk.31.attn_v.weight": "8bde008e809112aa7e7c23e9c3099087bcc557313b01306c87efa0a4a30805ba",
|
||||
"blk.31.ffn_down.weight": "8266fec7e203fbfad7033120861e44984581ff8b6851d01dfb7b81c5d8fa90ec",
|
||||
"blk.31.ffn_gate.weight": "b73bc0aa5baf006d9ef6403104891b8133671b0992398fe038380b67e0d7e2cf",
|
||||
"blk.31.ffn_norm.weight": "9c62cc27a7b6017c1df8ad49bff249a8245e8895c6754f402cd44623fda83268",
|
||||
"blk.31.ffn_up.weight": "5b970a4694ea3171a0167f6e1636d9f00268bc1c9640430ffc35218494884adb",
|
||||
"output.weight": "74fa0ef08c57a30e633e7117b1e9c805f833e2e5e21434bc79ddf9c92c6d7330",
|
||||
"output_norm.weight": "59b8a59fd3fbf39353506116e43e5e76edd0cbf2a2873d869da4cf27a04997c3"
|
||||
}
|
||||
348
convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
vendored
Normal file
348
convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
vendored
Normal file
@@ -0,0 +1,348 @@
|
||||
{
|
||||
"general.architecture": "llama",
|
||||
"general.file_type": "1",
|
||||
"general.quantization_version": "2",
|
||||
"llama.block_count": "32",
|
||||
"llama.context_length": "32768",
|
||||
"llama.embedding_length": "4096",
|
||||
"llama.feed_forward_length": "14336",
|
||||
"llama.rope.dimension_count": "128",
|
||||
"llama.rope.freq_base": "1e+06",
|
||||
"llama.attention.head_count": "32",
|
||||
"llama.attention.head_count_kv": "8",
|
||||
"llama.attention.layer_norm_rms_epsilon": "1e-05",
|
||||
"llama.expert_count": "8",
|
||||
"llama.expert_used_count": "2",
|
||||
"tokenizer.ggml.model": "llama",
|
||||
"tokenizer.ggml.add_bos_token": "true",
|
||||
"tokenizer.ggml.add_eos_token": "false",
|
||||
"tokenizer.ggml.bos_token_id": "1",
|
||||
"tokenizer.ggml.eos_token_id": "2",
|
||||
"tokenizer.ggml.unknown_token_id": "0",
|
||||
"tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
|
||||
"tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
|
||||
"tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
|
||||
"token_embd.weight": "1d1d1d39a867d5a4bfb32792a47247d2638c10c95a6259391d02843583505cc4",
|
||||
"blk.0.ffn_gate_exps.weight": "2e5cd43ac3f26c44f071926ff6c3f239ecc52a34bc9a5b5906d3d4c1bf2fbbfa",
|
||||
"blk.0.ffn_down_exps.weight": "a4dfc7e7c96e7402eb70279601675b956bb7331da8101e63fe5c0a611b6972e5",
|
||||
"blk.0.ffn_up_exps.weight": "2d5d87b378b2319c344ed2c642598b6f7cb6beeb582a8ea51abc9ae690d473c3",
|
||||
"blk.0.ffn_gate_inp.weight": "a46aaf5aba7401ce6e41f158242b4879d34901661f3ede85496cbd0ce79d6314",
|
||||
"blk.0.attn_norm.weight": "3fe37d913bdd2b65076bcdd6efe64a37b0b03cacbb1b80b9f7089068aa35f38c",
|
||||
"blk.0.ffn_norm.weight": "5e14308a3c894734eb204c8f558bdc817e94bbd5b4e9cb4094e91ba388c8f7f2",
|
||||
"blk.0.attn_k.weight": "73d943dcac0911e87bd771f4aa1c901e1bfe1aed293af06e1a67812159859f67",
|
||||
"blk.0.attn_output.weight": "4c5f754c855e262e8d4c94c6fbbb57af06399dc0e170d7d99a1a17fc9aab9227",
|
||||
"blk.0.attn_q.weight": "d6fd7403c873d49c05f6f03208f30d99ad34cb3b71c9990c47334d502a8e4c7b",
|
||||
"blk.0.attn_v.weight": "cf17cf64b2d683bd9de6cebaf60e5c264df6fdc38fe719dde9d54c80334f6366",
|
||||
"blk.1.ffn_gate_inp.weight": "0d524de81cd915816b4e714bf595ad6946a9130b3de731cd89428b2781230809",
|
||||
"blk.1.attn_k.weight": "2ea47f412992b374c70674730fe84700e0c8cce177086ce9b6635e42408964bd",
|
||||
"blk.1.attn_output.weight": "b4b2520794d54113e86c8ff678eacfc62e35be4395a594a6c8c22b4383ebcc0c",
|
||||
"blk.1.attn_q.weight": "5db930c98c4f91f6eab57eb974c72210b158e366d23d6d2890b2759c053bee33",
|
||||
"blk.1.attn_v.weight": "079bdde09668394bf7af9f8bc175017b4f48f0ab64e6dd855a4d7561d1693c0f",
|
||||
"blk.1.ffn_gate_exps.weight": "146a62de19f9ab093deb101f9640534ffc3dc40d69f508be12fc0475d01b0c7a",
|
||||
"blk.1.ffn_down_exps.weight": "949da94a3c0f375160672a979e85f7def284264b10d48d038238aad5f5ece793",
|
||||
"blk.1.ffn_up_exps.weight": "7016a3f467d9e3f2f4b4019579ed86b757469cd367f2b225483305376b4bb3c1",
|
||||
"blk.1.attn_norm.weight": "1614d1e6ed537737275eb888666c7bac533f4eefbe73dec92b591045ca9e1afd",
|
||||
"blk.1.ffn_norm.weight": "405a455fa7d1ec36894652ceb554bbcb09a07fd6405f42741e66dc4a4665c19c",
|
||||
"blk.2.ffn_gate_exps.weight": "90d5003fc7421f44220c0842d43128955e91488f6f785fe570b62d81b719e964",
|
||||
"blk.2.ffn_down_exps.weight": "ecdc2b5a8b504ef0a7833acff47d69b0c1fa9c22126de1bb120ff5e48c3d6e2c",
|
||||
"blk.2.ffn_up_exps.weight": "2cbd9485a32460d315eb50a2f3b00863fd77245bfe885b7565efac1cdb1f191e",
|
||||
"blk.2.ffn_gate_inp.weight": "0d0a17a1a2c7a61f2cca49ecbb479154dc93a870873257bc4f225e7607f2e2c2",
|
||||
"blk.2.attn_norm.weight": "b2e4c5a977f87a6f880896bd73596234c9b83622fa0d7add5892501e3155913c",
|
||||
"blk.2.ffn_norm.weight": "0ab875b4280afa922376cfc7b9aa3f7071c9432ea1254091ce7de3749df0e8e6",
|
||||
"blk.2.attn_k.weight": "bb884af51fb51550acfef54ccf1b58ce8284e587806e6a2f88c8265e1ad05a5e",
|
||||
"blk.2.attn_output.weight": "0f03099ba1ef342ea61af9cd71d028123bbd8b1dd7d7fd9b509aef77815427d9",
|
||||
"blk.2.attn_q.weight": "8fad0d29eb4c9d24e564774ee3316b9eb7a4c4985e4567111d2c836c830f6cf3",
|
||||
"blk.2.attn_v.weight": "fe04c847ff677632401a94e7b6b6fdca60391ab21cb23bd791533115de6303a1",
|
||||
"blk.3.ffn_gate_inp.weight": "29e3aaa724590c070e614af8288939603d2641b0ef11e8c0f476bebb2776673c",
|
||||
"blk.3.attn_k.weight": "231cc5631def10f7f292d8862d6125ff555164cd70480ac76362149fad204497",
|
||||
"blk.3.attn_output.weight": "86467a605c62852e05fda1a7ef43150df2cf715fe59785dbcba09f1c27cfa086",
|
||||
"blk.3.attn_q.weight": "901822402453922225c2d6ac79616691d48217635d5ff7338daa971d5ddee210",
|
||||
"blk.3.attn_v.weight": "27030784f44375720df2f090933645a31a022d3fb3b14573e5ca0b78f44070c1",
|
||||
"blk.3.ffn_gate_exps.weight": "231ba59cc0b988d125d77bf627aa3f04636684870af88f081f3944b48a160d86",
|
||||
"blk.3.ffn_down_exps.weight": "530c3ab44ae4d66e8afa4d10c153ba5dfcdfb7321989a988e62e9d12e7234625",
|
||||
"blk.3.ffn_up_exps.weight": "b85c2d4d9d11332e702b3c0a6610d4f525f9a93e5d12f5c7c55c592c40755e75",
|
||||
"blk.3.attn_norm.weight": "05dbb6d88cfa6b199f9d705ccbda97c0ef13f9ec875c595398a1a42d009a4555",
|
||||
"blk.3.ffn_norm.weight": "6880b1c27d46969ce36fac049c05dc8b89e4bb47dc89df357e32df7e18fc512e",
|
||||
"blk.4.ffn_gate_exps.weight": "a883b4f225b760c5a2f6605dc5e2167ab85bb398c70bf64ceb539fcbd6128dcd",
|
||||
"blk.4.ffn_down_exps.weight": "d291bb656aae77947d4b525e2819bf4112afece53ff31de9dab999af1f65f9c4",
|
||||
"blk.4.ffn_up_exps.weight": "38592afb8ba3dcfb26970f906174f7d3fa62da44fa4be4fc6912a19030ea9164",
|
||||
"blk.4.ffn_gate_inp.weight": "1596cb74e8fd6c3080b937b06468bb397b0dbb661e6d180a6bcbdc43e8bfd0c6",
|
||||
"blk.4.attn_norm.weight": "f90c83c5ff4366281d283384efc941620542b9cfdea160d678dc54a75e33f758",
|
||||
"blk.4.ffn_norm.weight": "d28d8c49d1746b7cc085562d1074905fd14023844de823dc4fb22202bb280790",
|
||||
"blk.4.attn_k.weight": "792bbf412cc357140fdaba543e547a9b2f7582919e307bbd9a80c7d6d8f5f1f9",
|
||||
"blk.4.attn_output.weight": "d98e4a062d2631d9c315f1990d5f6ca9a88e7e0e46387f611ccb0353f876aa12",
|
||||
"blk.4.attn_q.weight": "1a11a55a91d9f748a72176ff6b1c174844df406e00d1b66b9aa64dc6ee4bcd1d",
|
||||
"blk.4.attn_v.weight": "04cb3c02b12a6313c7ac7044513441083d534fb4c5a3f63bbaa58f7edbd2fadb",
|
||||
"blk.5.ffn_gate_inp.weight": "cbd5cdf015d33a2da6703eb74c22fcb97581fb9175435173b6dc4f9e8364320d",
|
||||
"blk.5.attn_k.weight": "4fdf3405e4d657403f5647b51233521310ee984b4b81bbcd901cb3e6ab76b7ff",
|
||||
"blk.5.attn_output.weight": "4a25662c46979a29600ed77e1907cf81fb16ef30e724c155444e54ccb76af481",
|
||||
"blk.5.attn_q.weight": "e2acb30e30b97300039bb20ad0878f05159d5657fa811748a51d5b6fb35d631e",
|
||||
"blk.5.attn_v.weight": "306504b6a26aa123c63dbbed3f4ced0ed2ee8fb6a30bf0093539b817539f5ece",
|
||||
"blk.5.ffn_gate_exps.weight": "7e34df9b9944dbeea5e8565786d3aa6937314a4b87acd4d0874687877c5a39fd",
|
||||
"blk.5.ffn_down_exps.weight": "c4b7a57a42b5ac0a8ae27dcd5cb2646d7a7cc7123126d44a56ab128e85f60b13",
|
||||
"blk.5.ffn_up_exps.weight": "09d47593b6dd6c664a9155bff02fc2eb7ac4a70219a88162d05c802a01d3c6ba",
|
||||
"blk.5.attn_norm.weight": "58804a036d6ac4c1fe357b8b6a97a5c37cae1c2f06ee0086c041d449c1c6ef6a",
|
||||
"blk.5.ffn_norm.weight": "d872dee6789f0826211aa46ca9d0869e3e96bcace9e77d6559a7b6f3e524f3ca",
|
||||
"blk.6.ffn_gate_inp.weight": "fb1eae732e974d6c1d020a5b4ef98c5f33016f984701bcea656f999a99daad66",
|
||||
"blk.6.attn_k.weight": "55e9c59c5051ab5519b3a7962e1b5fa96a3c0251cb6200dc2f177885ad2de470",
|
||||
"blk.6.attn_output.weight": "f3c834a8d0027370350e2b6294d95434d31432e57be6313b013c15a56303d61c",
|
||||
"blk.6.attn_q.weight": "efaefe5f11c2140dc7cb532b0832c2a0b363a165cbda21f00fadae77efca377b",
|
||||
"blk.6.attn_v.weight": "900bd734d75616d846a90a121c97e081c956a3d1ab012f66dd0bc62c43e1ec3c",
|
||||
"blk.6.ffn_gate_exps.weight": "312a99661b1468fcaed2474621116f1681432755e973f3ee79d01912974fd424",
|
||||
"blk.6.ffn_down_exps.weight": "ac9cd7db67a2ef0d2b5def86873673d05e48d49d147dd944469dbb8e2d4c46f6",
|
||||
"blk.6.ffn_up_exps.weight": "57613e7e09579400a1a09fee4445acfbfe83f2f327fdf317877787d96ada6b84",
|
||||
"blk.6.attn_norm.weight": "0e8801e09885c633bc01a9a5b85d4e878d30158a4eb41a937dc5b760ebd044cb",
|
||||
"blk.6.ffn_norm.weight": "b8c58062ac93072f878446b0e7f958c737aa47fb769fc3a8f593133d12db2dd1",
|
||||
"blk.7.ffn_gate_exps.weight": "1ef611732ff13edfa8d30981ed9dac00c15ceba9fc012ed0b199e9280a849948",
|
||||
"blk.7.ffn_down_exps.weight": "856c6811945c7b0fa461ca17811cfa43436b4cdf5326bad23cbc30883486d7cc",
|
||||
"blk.7.ffn_up_exps.weight": "6725e3e33994302ee13fa5ec163631ce2dcaa08aadde8fc166c2265d4561c5c5",
|
||||
"blk.7.ffn_gate_inp.weight": "36b49d7f80c1003dc392b2c1b9960cd49889dd69e77b26b9e4b13d01f3d0a32a",
|
||||
"blk.7.attn_norm.weight": "7a0ec49acc5e20ee71c6f80ca02f4f1e564c485e0ae0621309e7c2eb0c616cf0",
|
||||
"blk.7.ffn_norm.weight": "eeae035c39ab6e64bc06a4baa1bf6e50d4c8b8797cb0ad8abd48be86974802c0",
|
||||
"blk.7.attn_k.weight": "e8f78c1def01a7a38d2d9bf7becb17755e28fefe4927856f7890fbee52840187",
|
||||
"blk.7.attn_output.weight": "5367f05ac3bb49ef8745ba5902e1bdd4442415a3ebff2c7e1a3918d7be6fe948",
|
||||
"blk.7.attn_q.weight": "37c95fc5acc55a4f6e5f02cab9be60e4fe54c08b65f98f4455741b4aa542ff4e",
|
||||
"blk.7.attn_v.weight": "c89f1343486ba55814233511e94090f7365662a8a4214aa4c278cdadc79196c2",
|
||||
"blk.8.ffn_gate_inp.weight": "4e239afe8c7afb8de3a005757c887cf14b1622ca2d224227591cb0e5301f4c17",
|
||||
"blk.8.attn_k.weight": "2ad0229f30fdcc1e85ce64e00d8f75902238294844a81d5af43e14ba75c02983",
|
||||
"blk.8.attn_output.weight": "2e44a4722acb3b521b81d0b910f8ca2f6c286d874a92ddd02150566454061699",
|
||||
"blk.8.attn_q.weight": "1cd2b09cb2f43e08de776b5f7eac197a5a6d4ffdfd52b21baa36319450147bd0",
|
||||
"blk.8.attn_v.weight": "5a22c57ebfd33ac500cbcfd321d5b5b1783f8728801db6f3f8bed51c7183e4db",
|
||||
"blk.8.ffn_gate_exps.weight": "91063fe56cb4f3ff3b41052bb5046fcf8ef61516a603ee90aab893a9d68c15a7",
|
||||
"blk.8.ffn_down_exps.weight": "d4c3abc8f1d1b462f67f70bd8f404b3fcf45dceeaa8527fa120527254c383c90",
|
||||
"blk.8.ffn_up_exps.weight": "76a1a1f08ec577716a2e7027b45293e9205751126424f1bebe1de89c78f087d5",
|
||||
"blk.8.attn_norm.weight": "f980d774da39eb76c52358afac3e38cb4c81cb323deaabbe5c41822e3f17a98e",
|
||||
"blk.8.ffn_norm.weight": "1c937658cf90f1a85db9a5f26e077730fdd4b694607dbeeb825c5fb2bc407e0b",
|
||||
"blk.9.ffn_gate_exps.weight": "a2532471ecb7896d5c78e5a34e10cfaf4125265e1595166c8d0d0dfbe2a3187f",
|
||||
"blk.9.ffn_down_exps.weight": "b47921a28412d48fee450b8b9d97cee42344a2e69f06d407fd9523d7adf13333",
|
||||
"blk.9.ffn_up_exps.weight": "7c461bd1b2a73b439cff6a10d94afa01e8b06f7e6f09d9a6f28e3876aef48bce",
|
||||
"blk.9.ffn_gate_inp.weight": "1648dfb08b5c06d7953a5a97ecb764995fae9487fb729a1c867023b2538149d0",
|
||||
"blk.9.attn_norm.weight": "8635db0f299882a63b7cfcd1d4259c9e53fab22c31d3d054de36b1001380b31b",
|
||||
"blk.9.ffn_norm.weight": "f9309aa323062d174c463613afef9b0a33501b510bfaa58a8e0e866d12ffef3c",
|
||||
"blk.9.attn_k.weight": "dfe62030441e947a588512d18d9c6e4ed72c2f71c227d622c095e4263b23dadf",
|
||||
"blk.9.attn_output.weight": "1977beb75c6349c50ba7dd3865d7c0a9c5c5ddc854413147b0eec98ac4fda351",
|
||||
"blk.9.attn_q.weight": "eb132596719605cd6bd1782487f121994629e115190edd69240b12af66e734f5",
|
||||
"blk.9.attn_v.weight": "9e708f15d332d7c5187b0693b1a977eb30a2fa10bf7df48ed9d7537c0aa6ed99",
|
||||
"blk.10.ffn_gate_inp.weight": "97503a5d166c1925f9b65c0eed980753d411714d66896f3d0fad5286c7aba702",
|
||||
"blk.10.attn_k.weight": "1ebdd222336bd25b48df1b138cdbe09021c4a5562ea7cb78cadd1255d2be3a39",
|
||||
"blk.10.attn_output.weight": "5e98faa38e9d514b9057e1c8342c509cbe1083defd518e506f6bad89117d1f5a",
|
||||
"blk.10.attn_q.weight": "3323a26c87d936d1dd87c577d0b763459fced726679612c874b3de5fc6d969c5",
|
||||
"blk.10.attn_v.weight": "d5fa73cb56aca388e205f44455e4b4f676fdc12ed7fac4542fbb3b41ecea59ad",
|
||||
"blk.10.ffn_gate_exps.weight": "225021b53782800906cd13b70be3a4161e8b300b97f984a959ccad6a6e8adcbd",
|
||||
"blk.10.ffn_down_exps.weight": "f08eb91526bd22f5fd0402fe925d6141cdbb308a1ced0330858d0c85c71f5ef3",
|
||||
"blk.10.ffn_up_exps.weight": "a9f688350c3b53eaada5103b5848bd9a3d7d6b327a70fa16c24bf28ece933eac",
|
||||
"blk.10.attn_norm.weight": "5ba426c9dfc79805015ccd76cd1068b0ad3bb7a8453e14bb1d35486f122d8f95",
|
||||
"blk.10.ffn_norm.weight": "98891d6acbc3986b2581b7a3af9f5946a392d9188972c6a8b15d4e745a4f2482",
|
||||
"blk.11.ffn_gate_inp.weight": "b2365a60566e7dace892e1cb0e62eb73ce387352601723e847052b34874feaa6",
|
||||
"blk.11.attn_k.weight": "0efbc1d1430505543ff71532a4fcda821aeac616ef6c1dca40e00d4f2ff70bea",
|
||||
"blk.11.attn_output.weight": "3d5bd4d9a41236f30d4293edb9ae27beaa113ffb31b4fbfadff3a4c370dfd3e6",
|
||||
"blk.11.attn_q.weight": "aa11e9db14dd9c77951511443077c2a1a78070753d7bd3d9811038473f69e325",
|
||||
"blk.11.attn_v.weight": "5adc567f377aa11d1763d35f50e53fb2896a8b03b623ac36acc45efa2486d512",
|
||||
"blk.11.ffn_gate_exps.weight": "71d07d982aabfab9eed3c733d49c20f023bf475368fc71db5084d91beadc4b47",
|
||||
"blk.11.ffn_down_exps.weight": "9a06e61461e48b3925a9f7d9cca634d048c8b62163d7bc5c43e35899f959319e",
|
||||
"blk.11.ffn_up_exps.weight": "bc05494d0dcec61021b3ac0c5bc1bf502736cadf48224e213bc139d562699a89",
|
||||
"blk.11.attn_norm.weight": "a5758a10bdd0404ae1470e8e9db903985d4d07f60553c5001a5e7b660d4f7ada",
|
||||
"blk.11.ffn_norm.weight": "814ae037563aad3771787316bec4806c95bf6f5991dd6474b4b1e5cc13dc18ee",
|
||||
"blk.12.ffn_gate_exps.weight": "3a68b831ba1606fb9ef6dffed4732032447ecef23ea563ff4e79317586c7eb49",
|
||||
"blk.12.ffn_down_exps.weight": "268b25e13f4b7beab08686e83705a41b21d15251809ee4784526f78a580da829",
|
||||
"blk.12.ffn_up_exps.weight": "9105751a5b5b42ca2614d0456f24f779d2e2ac8cdff0f96842aa7ae2b70f341e",
|
||||
"blk.12.ffn_gate_inp.weight": "d0de1558cc1d458c5c504f63ddc59785c323df7330474bb0644c346104b40a3a",
|
||||
"blk.12.attn_norm.weight": "859a4c8113678e2e202d10299850e0cfb52eb11ea50bcbf4fe3ff39bdd394154",
|
||||
"blk.12.ffn_norm.weight": "7fbf4c459c1760218877e9ee3f5ad49e960956a4369bcfe96c143f04ff9ddf97",
|
||||
"blk.12.attn_k.weight": "0a7e254fdf3730a57372b6ff421a613eabaea68cdefd64800857941411318374",
|
||||
"blk.12.attn_output.weight": "ceb763fc15d88af149d8fb78e82db2b7dab3aeae584af8cf7611a12356a397e5",
|
||||
"blk.12.attn_q.weight": "a43402d23c46cb2d3cb3c2a98c81b19d10026b7e6742370fed6b2880b6e049b5",
|
||||
"blk.12.attn_v.weight": "3bc24f2c0480ce91ef72993ee8f1cf962f7359e12183424583ffa1246bf3db52",
|
||||
"blk.13.ffn_gate_inp.weight": "a6d68c82bfe66d8bab68f980f5f18268a9e2c0cd6b8832ed39010e0de198ae05",
|
||||
"blk.13.attn_k.weight": "0166c39546b37dc2e01b2b396ba43e183f797dd04eaa51a6d103d8b58ee4bace",
|
||||
"blk.13.attn_output.weight": "2ce5eb198deab9557475a58b69b11e9874b547e05c23f223c6e42fa35ddca069",
|
||||
"blk.13.attn_q.weight": "745c1bbdf434284a7fae98f45e821c076dd9c2a2467dba6a9d8cf0041e419dbc",
|
||||
"blk.13.attn_v.weight": "9ece68d5ac64d1421ea7aa32e1cff9cc1fecf5175f4c4da858dd31d8633e3337",
|
||||
"blk.13.ffn_gate_exps.weight": "ccfdcb4670b131689de12d396a010b5ea737795cf5c15a14a304d720b3c7c899",
|
||||
"blk.13.ffn_down_exps.weight": "8b8fb328664764f1aaa5cbdec336d5654e981e965a02ef622bde5f07ea1c164d",
|
||||
"blk.13.ffn_up_exps.weight": "d2ace0236c2fb3365fdc85499d676a7f65813c48e5085348b1df1799922766ec",
|
||||
"blk.13.attn_norm.weight": "1ed29d7d89ce52d7cb4d57e895ff7115430466e917136c049c385c030ed44e9c",
|
||||
"blk.13.ffn_norm.weight": "a194fc542597a4dcfdfaec5e3cba2a2b2b21b21edfc87c39c0d7f7651355bc4d",
|
||||
"blk.14.ffn_gate_exps.weight": "a625e3574e5e740e7f8e2f9c40390f2f382c720aab5b10534e298002dd8d1fb9",
|
||||
"blk.14.ffn_down_exps.weight": "bc366f015b83c865946afd74c8a884943e0ea2c671314a0b7bb72f21a44d2f78",
|
||||
"blk.14.ffn_up_exps.weight": "ee3199bf2086de77b49f57f487676be8ee70e102a2fb5a5ef8ddbbc28a9eff41",
|
||||
"blk.14.ffn_gate_inp.weight": "2b437870c850fa2e2044d032bb02908af634356e37466fdae260b933e48ee8b4",
|
||||
"blk.14.attn_norm.weight": "cd8344d193a1cbd42bd898e17f4bcb1ca0b2918420fbdafa9249a6f2b7f4ae06",
|
||||
"blk.14.ffn_norm.weight": "70eec40374e558fed5b07257283cf36342b6b0129285a00007deb59c32c9f7c8",
|
||||
"blk.14.attn_k.weight": "4053bdb507e0543d724b632570bac86b31707696d90a0db44c49b2a082e0d599",
|
||||
"blk.14.attn_output.weight": "0182632cb0e06a07241b8293d25d109fbc1862e1e337d435f908e8681e2eb1ab",
|
||||
"blk.14.attn_q.weight": "ffc7794a4c1b6f793c842dba969435330a7a80b9212e457b4b2ac33e68b41241",
|
||||
"blk.14.attn_v.weight": "6411805292d528e61bbaad8f9aab9dd073529a17946c057fb06864fad9cf3211",
|
||||
"blk.15.ffn_gate_inp.weight": "77d0744567c76e6abb67f81ba9c715b2b544841186d5b948309571eff213bafb",
|
||||
"blk.15.attn_k.weight": "1f7957954ea4c6521c257b35a360e868ffa02bdb3de91f146d5e06bb4a545c98",
|
||||
"blk.15.attn_output.weight": "d7809d36bd8d3342240c46fd87bcc7f9821a222f48d9a95e45ae50460265d3cf",
|
||||
"blk.15.attn_q.weight": "25f509313ae4d8401b871904059f472a26f5714e7c791c725de77a1a522c976e",
|
||||
"blk.15.attn_v.weight": "96fedf5a591fc0f020e6de10fd72ff12b3ef9cf70cd21dabaa0d3e7b06f54e73",
|
||||
"blk.15.ffn_gate_exps.weight": "8f950d976b2fd9a3d213b84123cf114c1377efde9352767fb2ddee89e177c8ef",
|
||||
"blk.15.ffn_down_exps.weight": "6fd09d1557bb94b06efbd4f6a1ca4be532a202ba290e9315bc8da3d12a5c4c4a",
|
||||
"blk.15.ffn_up_exps.weight": "cbeb59ae7b0266a928dc7e3a6e70a9330b92f9ee1b17ee1ed91022108204a33c",
|
||||
"blk.15.attn_norm.weight": "2005330911ac2edc7b6d27aca021c67d30d16eb632e49b1a13f30fdb2717aed0",
|
||||
"blk.15.ffn_norm.weight": "0e9198f3b548eb78acc8961f2b3350d238d26cec110933ba753a8cf0035c501c",
|
||||
"blk.16.ffn_gate_inp.weight": "a41d1f99d739c8b150c3945b6949763988d0c6a4c5a2b5855592ca1a48ed23d5",
|
||||
"blk.16.attn_k.weight": "b624e2ec88c2d3047f60530fb87e72cb4a5e655a9663f6f3e9b09e5ad32cddaa",
|
||||
"blk.16.attn_output.weight": "687759ea75e45108526ffc1573d6fdf084728079bfc2dc89b9979e76280f43c4",
|
||||
"blk.16.attn_q.weight": "beff3a45c7e9ec82ffc6d3c701126be28654d10aabd747d03441210491fd31b6",
|
||||
"blk.16.attn_v.weight": "43a349b13f0b9d040cacecd942bcb168c030fef8c75c987d59a4fce6c14e855b",
|
||||
"blk.16.ffn_gate_exps.weight": "793406d6c13d727c82bb7b692ca98d65ca975baee69fc57be5378d77c5a19b62",
|
||||
"blk.16.ffn_down_exps.weight": "9bad3dd150d0230404b7f886ac7ff8803225757e813f195cdb26bad245243b4d",
|
||||
"blk.16.ffn_up_exps.weight": "7449d663023fea3496475bf0a9c1de7272ad0ce9adcb3265e8e424badaa674dc",
|
||||
"blk.16.attn_norm.weight": "a424ce34c195a401df1ce37ac4f2794e8a6720b1ee8acb21428e2b68c65e0125",
|
||||
"blk.16.ffn_norm.weight": "405a68bb8e16e1064df2de55ca3cd9ceddda1d9fc0af007a9bd7cad4b2676248",
|
||||
"blk.17.ffn_gate_exps.weight": "97c6e5321491ca5dc039ee88da0eb0e78f347372785411809af84b3298cb19dd",
|
||||
"blk.17.ffn_down_exps.weight": "1617ac19788a1be19bac69277408761e6bdf5719d63a8c7fea14d41cc27641b5",
|
||||
"blk.17.ffn_up_exps.weight": "4ead1c365f112581c10610ea3f63d2a1474311d2503d2060fed4b458ef337f5d",
|
||||
"blk.17.ffn_gate_inp.weight": "ed4b3393f2523f2b5e0fc7680a1caa2842e605728a529b5af68a7fa8d7abf940",
|
||||
"blk.17.attn_norm.weight": "beac17ef86a7fb2b5840cc72f7a95a5e3d6bd24e7fa698e0b0ebb9bdac45c561",
|
||||
"blk.17.ffn_norm.weight": "81cb58ec6d6dc02a0b4ede10adc336dc865fa76f982d4eab0e4a37b40f5b0fac",
|
||||
"blk.17.attn_k.weight": "eab569e5ea8c8b05e5a6a209fba031129453c2e28181eee3e736b3b04b36bbec",
|
||||
"blk.17.attn_output.weight": "f85b70f01438ce8fe5d10599b113f30bf18dee2bbae0657d3eba295870001db3",
|
||||
"blk.17.attn_q.weight": "887ceebfbf6a2b94b43d2df4439ac3a5bbc29311d4b28addc04d525546032047",
|
||||
"blk.17.attn_v.weight": "2df9414d65014c06a93da22ba3a668be7b83e2e8008e98d7771f7dfebed98298",
|
||||
"blk.18.ffn_gate_inp.weight": "9b07741a0950fc667e5fd25937e33bc22e1f764f80eb4ff3119f005327ae0f6e",
|
||||
"blk.18.attn_k.weight": "8649598dbb63938744c39bcda5ce8c31773e29c573be8d4d2c114f5030f8d3e8",
|
||||
"blk.18.attn_output.weight": "f8e391adb92622298ca834d5d1eda48b69c3b1c51c5a584ef6c54a725c298d75",
|
||||
"blk.18.attn_q.weight": "84bf8708a2eed618f48f69c178ed7dd11fa4c468102376e72e910ebd037d131f",
|
||||
"blk.18.attn_v.weight": "31db3cd773f09548c2c1b1eac2718e46364a7810970fe9c433fad9d8de5397eb",
|
||||
"blk.18.ffn_gate_exps.weight": "be2a2ba378002f1b61f86c273a69eede9b93786d5ce96b4fee1861f730dca4c4",
|
||||
"blk.18.ffn_down_exps.weight": "d35196159e37705db50a5343e3989f7335477f1a4add67ef42ad64a638cd07ae",
|
||||
"blk.18.ffn_up_exps.weight": "c6ceedd86e97913a6dcadc838e7abb762d629fb8dd55f15cf02fd9bd66d2ba78",
|
||||
"blk.18.attn_norm.weight": "41f0b1ad83d6e3cb9fbe0d27878c2e7ad4a351b9f554a6bc9117c01745cdf6e5",
|
||||
"blk.18.ffn_norm.weight": "96646204bd0d82f25dc77faba4dbd86b1332e449313e6684e00122da8be99057",
|
||||
"blk.19.ffn_gate_exps.weight": "c6eb7f61e7938bda0492dbc05e51e8f631c99224fe18e99861fc4fc53ba9e9ff",
|
||||
"blk.19.ffn_down_exps.weight": "4384803da3a3a3d44120d7dd192fe2c9bbd9a1a0cb492dbec1fdd7565230f1e8",
|
||||
"blk.19.ffn_up_exps.weight": "22d73de2fbb8bb0f1bd2caf17fad8a355c47d914143f7f6e6d0128f66f074a60",
|
||||
"blk.19.ffn_gate_inp.weight": "9a0cc4a2301a5634022fbce41189021bf0d1a961792d2d9330fd35556d18e5bd",
|
||||
"blk.19.attn_norm.weight": "c5cc56ec5df9a1f7d5ad71fbda49f1433132e58895d45cb44c73420bd61ebd6b",
|
||||
"blk.19.ffn_norm.weight": "77e17de741742ef2482fc7872fd423c8e3c1454dc4d2be89ee939084b6d78bc0",
|
||||
"blk.19.attn_k.weight": "a92ea36ce2e3569656306aeefb835ccd5d1b03b33a86e0d3d030644cc923b813",
|
||||
"blk.19.attn_output.weight": "5e2a912b37855f84ea964907a1a86d609cbdd79efa0c93c3e8e2fc07caf7c226",
|
||||
"blk.19.attn_q.weight": "4ef3a5913292ac3c1a6fd3e9e53d011021f2b41d0276cf849706d1ca925cf7a7",
|
||||
"blk.19.attn_v.weight": "42981b75b68ae852cee638b5433605c147da4392aaa6d7a06e756115b0171f39",
|
||||
"blk.20.ffn_gate_inp.weight": "71381b9879a7c80b9f7b475abc0aa31b8cd71ccc00856ebe89764a2acb9df2dc",
|
||||
"blk.20.attn_k.weight": "1928b7ebc054eb3967929ed6fb446314d5352f4aaf8b475ce55c6345019f2ea4",
|
||||
"blk.20.attn_output.weight": "6071ecd9ca91af0d2ba93fef4a1a56f3b243dd70f862a21a2d164d56f386043b",
|
||||
"blk.20.attn_q.weight": "002e95042a40f36ceed5829e3d0c8072e5f5e4ee86a089e2902b2348fed24dd5",
|
||||
"blk.20.attn_v.weight": "42f509cdb1c0e298f89f896e349be86952c5168e49b3f83bb17badbcb7596d57",
|
||||
"blk.20.ffn_gate_exps.weight": "a684a3ffe4b0a57c819a5fa9cb3521de223f392732927271e97ce925b6e33765",
|
||||
"blk.20.ffn_down_exps.weight": "e3081a7bc7ba750d8a4886bc8ca4f231b55db4ca082b54b4106c7531964725cb",
|
||||
"blk.20.ffn_up_exps.weight": "fad0fd5eca36ab154788da28be8ec25bb5d6db06c9d133db89e96df358a2f6a2",
|
||||
"blk.20.attn_norm.weight": "c3e3f2429715ae95e884ef1246b0b461b23c5cc0ed08beecf70a14cddd184820",
|
||||
"blk.20.ffn_norm.weight": "ff31f609dda65ca496b0584fabea6550e42edd05ebf229812aa6b7bb5ede15e6",
|
||||
"blk.21.ffn_gate_exps.weight": "366f09ef0ecfb86808eb3296cc9abdb957951d27f6533c03f1422b54061da660",
|
||||
"blk.21.ffn_down_exps.weight": "3fc495947d27fcca7fc0893c8a96e5d48ba27b2c8c58f8fcfb8dcfcd5539741c",
|
||||
"blk.21.ffn_up_exps.weight": "6713ed51410bcc8283cbb001c4ad784098f25701e8021f4fa4f411e186859c4a",
|
||||
"blk.21.ffn_gate_inp.weight": "6d4c92c01ec801647134d907bf1108878156df266a6107abc10526332b328b93",
|
||||
"blk.21.attn_norm.weight": "27605719ae2df24f4f2e85a730927cab20367631612cb501631f6bbf38eb1209",
|
||||
"blk.21.ffn_norm.weight": "ca80ee8177db185b15a4a378c1cb6f7143c76546a7f1726bda23f329323d4ffa",
|
||||
"blk.21.attn_k.weight": "9e49f743d4a5bda9b4bd9c40c2ca37cdae5aec7e54cb193897ac8b4945ada14d",
|
||||
"blk.21.attn_output.weight": "ab923540879753feaed152f5950f69cdd83d8f2413ca873f5f038b63ab0aea12",
|
||||
"blk.21.attn_q.weight": "62617fc3f1c9d2aa672a4d91a121c7a91b92d145b65e75f0b06b4bb7c825dc36",
|
||||
"blk.21.attn_v.weight": "15f8b2e72f8e8e992f2f6b3e93238a9d7be7bd6136f91c9d04b4b4cd0cd60369",
|
||||
"blk.22.ffn_gate_inp.weight": "3ddb1773d9257b68add7a2a4e94dad25ed926803e02707863dd742ab9b2dc179",
|
||||
"blk.22.attn_k.weight": "680e45a9e8d5feddee5266e119dc053bf80718fa9af1cf6803e6f493b265f1eb",
|
||||
"blk.22.attn_output.weight": "0d5fae3402fb2c5aa3a860010e3973fc8e3168d1015f7a76b7b2964681693206",
|
||||
"blk.22.attn_q.weight": "eee7e3d426ab533bd18d62c9aa142eedbde394bed07db58313e0fccc82a23237",
|
||||
"blk.22.attn_v.weight": "26b5be1fe3c2b6824c5a648a3e4bdf17691904526fca158fbc3ebb627b67e2f4",
|
||||
"blk.22.ffn_gate_exps.weight": "32ab7a7735313d60f6a75229b1aeee940b6aee176c9648536bf5921b0dc2929a",
|
||||
"blk.22.ffn_down_exps.weight": "67590808f6a67777d3eb7976c31fe616d388b98fecbb12253b72d1241d70753f",
|
||||
"blk.22.ffn_up_exps.weight": "fc245c0183e6d90829ff5e71a4ec93e4860b3d4c1a17b9dda2fb64f5f5c9ed32",
|
||||
"blk.22.attn_norm.weight": "128e99d206d4d6724758ec97468af767fa0aea592149c324b731659c1e74a1a8",
|
||||
"blk.22.ffn_norm.weight": "e45f498033f0cffa15da0eff2c47b4472e43fcf8921729fc4eeb2e3a6b3c78e2",
|
||||
"blk.23.ffn_gate_inp.weight": "d63e686f5325fbc89fa242c2c52a3b8ff54f867dca914c9ae6eea13e9d6f46e5",
|
||||
"blk.23.attn_k.weight": "f71f5a577f46ea12b1818f3a5ff4b85ddc45f9a2afb0fa2e041d71a3e31c6779",
|
||||
"blk.23.attn_output.weight": "92b13563c1e0eac0d748fb67b235dfd7a64c8f16e2dafb316885744582e23b4b",
|
||||
"blk.23.attn_q.weight": "2f9b9c35dc4f912f3f51c06e2d68f417b51a0de0a84aac530a64f9d3d7b0a2dd",
|
||||
"blk.23.attn_v.weight": "268e40813806e74a5c364b19556d087bf8374e76e7b6fcf55c381eb7da13ccd1",
|
||||
"blk.23.ffn_gate_exps.weight": "12f857e7a7ce228afac34d99b602c8d6fe96984f2a21118f459a58cb767ee65e",
|
||||
"blk.23.ffn_down_exps.weight": "cdb082c16599c3bb36a28066dcc122d9529b54fa91b6cf0153437ec960a5e16d",
|
||||
"blk.23.ffn_up_exps.weight": "f4b99f6f44d7b8b5a305894e88633bf5938fc1f6303a2b2092399da9c8b64d7c",
|
||||
"blk.23.attn_norm.weight": "a691392210383915916b4d3886d5e4d56e7855e27e37e414fbd73bf66b3712e6",
|
||||
"blk.23.ffn_norm.weight": "0c3dc72f667e5ae19b69bfa9f2bd2a01a57681f89ef9527bad4eb0d8c7b70da8",
|
||||
"blk.24.ffn_gate_exps.weight": "86baca2a3157994df7fd8ced5e08436d5c1810dc29c0715637c36de723e0e7d1",
|
||||
"blk.24.ffn_down_exps.weight": "ac5d559562b35c34993e34b071f66d15c65be5907797078c2d2a49aba54e3192",
|
||||
"blk.24.ffn_up_exps.weight": "fce0a099cf09777f44fbab3606ceb75f7fae6f0b80725f9e871654b8cdf9262a",
|
||||
"blk.24.ffn_gate_inp.weight": "e7c6800c0cfc56b565b2d35ad6f1dbfdb70dd0b05b338bc8da2286ffc3678d79",
|
||||
"blk.24.attn_norm.weight": "dc6cc18ec52d102d015153c4a1132f9d7a504e29cbdec81c5edbf3b9e65815e1",
|
||||
"blk.24.ffn_norm.weight": "480d5a1397af5e0e657f1e67d20ec0cdef5724e71246a326843321b87ffabd33",
|
||||
"blk.24.attn_k.weight": "338c0597954a9b95a782545b2fe36469553e73f86ae2d2b5697767b28e1c7daa",
|
||||
"blk.24.attn_output.weight": "a77d23b79933c67e52f1eef7f83a3dff4f767ce0bbcc39572f8cec4acd457643",
|
||||
"blk.24.attn_q.weight": "45c9478593002be1998e96e70668aafa2dd3972380fbc1df12fb05c24ba959e0",
|
||||
"blk.24.attn_v.weight": "515729420885408a6a9614bc27cda393ed907521318d14d21335d39a3eff0b61",
|
||||
"blk.25.ffn_gate_inp.weight": "aae4ac40e9ab3925241f9d784b54b38851d9bc999a6c3bc03fc3f17c9b28a67c",
|
||||
"blk.25.attn_k.weight": "4ab4808d02396c35b00b426f536015673b71c17ae6cd55bbc2e6bfe7a4c59d0c",
|
||||
"blk.25.attn_output.weight": "1990bb982b77e0c947cd1a8ef0b36227ee1259e6dbbc2829e5c136edf88675eb",
|
||||
"blk.25.attn_q.weight": "a1490f3048e8c0ec8784f8550c43adf5cc8d0f2f90131c934713fe4b1b015bd7",
|
||||
"blk.25.attn_v.weight": "f15e53c6d45b3b6f58808fa968425d65e0b26b7f9b268127a77abb1227c67431",
|
||||
"blk.25.ffn_gate_exps.weight": "656662447ff54f56ee80f78a1b9483f7efdc40f7375d0cd8a9c72ccf21f77e7b",
|
||||
"blk.25.ffn_down_exps.weight": "db06f101bccbaef19cced0f6c185166e18202465f4a42cddfd535fbe5cbabb4a",
|
||||
"blk.25.ffn_up_exps.weight": "584a7b02456f27fe1d8d3c7ccd21d426b6ea887795a3ed77f704596a1e3841d7",
|
||||
"blk.25.attn_norm.weight": "8f0f3597982930fd237e9d609776c64f2b909a455b21678f83a7ebd4bbb83e64",
|
||||
"blk.25.ffn_norm.weight": "3e7079c32582afba0c55e032f254adc18d2997705eec860185e9a6dd3d82f07e",
|
||||
"blk.26.ffn_gate_exps.weight": "e70341691b583b86489812b29b77aa41eb658b1865733d6118da54c66e3bfcc6",
|
||||
"blk.26.ffn_down_exps.weight": "5c1b812d11dfb064af816ced5ab6463bf9722eefdfc341b8a93705d5038fd781",
|
||||
"blk.26.ffn_up_exps.weight": "e18118362ae54ef7432781c83884f9fb230a9d934e342aabeda8822ea5f71fb6",
|
||||
"blk.26.ffn_gate_inp.weight": "cd1c5f6710166b9567c6b74c97b2348b191c60aa860958c6bc264ab095261dff",
|
||||
"blk.26.attn_norm.weight": "71d087531af2520bda2e676c489e8529cef5db8aeea1eec0a937a8b4f2fa2e54",
|
||||
"blk.26.ffn_norm.weight": "7f704e936fda28eb5c2cc339f0f6a5f78170b5aa43c01265b21668870d819c82",
|
||||
"blk.26.attn_k.weight": "1cc62a0ce0ae251275d898c52c4a9fba5995fca10955d2011d10dd1a59e1afb8",
|
||||
"blk.26.attn_output.weight": "636e881b1505f9cef656a4be98bec6a4765321d51f9bf1dac8933397cf44b765",
|
||||
"blk.26.attn_q.weight": "89a3c4d202d7d6adebb9e0c1bcfd8b775f6456386f1be25e86e43acc949c1e16",
|
||||
"blk.26.attn_v.weight": "ff2cc963b597cdf1a21703f3e7022af3bb4c65a34a19e19d9309a7c5e198b5bd",
|
||||
"blk.27.ffn_gate_inp.weight": "6150139498fefe380bb99d11e72028da47a15ecb73dfc5b2774f726f4bed8f9e",
|
||||
"blk.27.attn_k.weight": "f286eb9e5c56c7b801a497aedc40158c2a27877d7f9fb59b3fc67834798902d2",
|
||||
"blk.27.attn_output.weight": "5dc3d3a05f9f7729509147fd09c16fb53f85f520cdab5cb69abf4bae3fd460c7",
|
||||
"blk.27.attn_q.weight": "8462e40f86b24251960d6f35a9ea99b8793a01937faf1aec2859f2e5395dbb61",
|
||||
"blk.27.attn_v.weight": "bac1a99e38e25953f8315f7212eb9777dc216cadb09b959977885ae62724ceca",
|
||||
"blk.27.ffn_gate_exps.weight": "6a15eca7f0f6ecfd93db2e55c63875348ec4a78c4ff643ec46df9e958c0101e4",
|
||||
"blk.27.ffn_down_exps.weight": "2e1c91247c4359e2073a8e5f26fd7f6426da7be3ed5bc65dcfff701f0a5022b2",
|
||||
"blk.27.ffn_up_exps.weight": "65d6f5c553c9332085eae4aeadf25090b5d7768212ea7b08ed698102c21b29a1",
|
||||
"blk.27.attn_norm.weight": "7fab8ae63ec8e91ce625cd130ab96d8427dad3a7413bb21b25ec5f408c5b9f5a",
|
||||
"blk.27.ffn_norm.weight": "532720546b0fdcd423a02ca6e3e9d8aacb84b1b3e8269968f88a47fe2a69bab4",
|
||||
"blk.28.ffn_gate_inp.weight": "a305ea58d98962d9dcf0c53ad2389b7acc8936fb35a0e3fc9410e7767cd49dea",
|
||||
"blk.28.attn_k.weight": "8315e8a2e4f78dfdf36d4fc18fffc74bc95fe42c3ae4f9af2b6c874612c0f71b",
|
||||
"blk.28.attn_output.weight": "9b5fdedd32d39ef46a22cca7cd5355d7b93bd07ea305f466a8aad6ca5a4f3778",
|
||||
"blk.28.attn_q.weight": "4e8fb96997c30e231c437130f410d7c91d541a816f6c568b5f3bfdb4b8dece74",
|
||||
"blk.28.attn_v.weight": "1fec739cf3bd7b4913f72ca358d4cf31391c304de44ac0ae31ecb825beaa7cfd",
|
||||
"blk.28.ffn_gate_exps.weight": "9f259789d535e09268266b9a8020f32d6a6779966c909d91d3a10574f06238a2",
|
||||
"blk.28.ffn_down_exps.weight": "516d3f8abaedb01b9916a4b67d4672159769138ef2850158bc1b32c41e31f0e8",
|
||||
"blk.28.ffn_up_exps.weight": "f2f1d88d2c31ed588806fb5ad981d68f5134d7284c4fc022fd018de2eef437fc",
|
||||
"blk.28.attn_norm.weight": "960fd005598deadaebd969996f4367a9dbfad90539a863674fe95730935acc64",
|
||||
"blk.28.ffn_norm.weight": "e1993b37ced93d4049e9af2c47b0d9207d8f7e6f2cc3a52f57bef30bc806d805",
|
||||
"blk.29.ffn_gate_exps.weight": "58927146338f443513337476b3cd30e6341742f096c2beb5890d400f10121298",
|
||||
"blk.29.ffn_down_exps.weight": "03a3386e4f0b75a28c5608e23b2de8f0de25f21954e4aa7fc343431bde9db07e",
|
||||
"blk.29.ffn_up_exps.weight": "6916b7490a7ae7b04a5d81cc1e7ac9b20c483434f3b186b12d87fe176bf1567b",
|
||||
"blk.29.ffn_gate_inp.weight": "98e710e467a3d567abe4ce29d78b8e8dc033148762290c0c5e1ae4d78efd8c78",
|
||||
"blk.29.attn_norm.weight": "4e64cb307d37be20d55f38c94faf7e451d11df5e60df347906cbaf9c5441be71",
|
||||
"blk.29.ffn_norm.weight": "696c23a52f742679bd44440d687a4c44b4302d57f1e9dc5610d23374336187e7",
|
||||
"blk.29.attn_k.weight": "e85253652fd6120c623634ba66b725bf7cd491318b54ccdad2c7df8851d64c0a",
|
||||
"blk.29.attn_output.weight": "4f650a71efb150d1f24cd4d114d4187bf570ac424da3b92ea6455abdf1aea705",
|
||||
"blk.29.attn_q.weight": "69fa7da901026ebcbbbc848455b425458b7e3295007d7fc093acf4b38e2166ea",
|
||||
"blk.29.attn_v.weight": "17e2e7590b317b21f106de546aafd955579703d1e95d6aea044ee72ec3a514c9",
|
||||
"blk.30.ffn_gate_inp.weight": "3a03284b4aa60d59d4a2ec86253469b61fc656372afca427cb77a5332fbcc62c",
|
||||
"blk.30.attn_k.weight": "d518cfd0db9708e769eb1399e87ee49357dc54d5afdbac3d4c0ca46c64e789eb",
|
||||
"blk.30.attn_output.weight": "9b44378714d784c5ef9ab604359091baca4e0ec222afa139b7f840eaefb371fd",
|
||||
"blk.30.attn_q.weight": "cbb95365bbfbcad0c9cd99b4eebb5a5d32de68ce08e4063b5ec3e792b7548044",
|
||||
"blk.30.attn_v.weight": "e7985c04fe1740e35a9598f43b67b0922b4fc2d00b68a92a9f917b82c3248de1",
|
||||
"blk.30.ffn_gate_exps.weight": "8ac4bbd07935d98f895ba94dc174e5ad5046c3c222b53729d60f987c05e7eb70",
|
||||
"blk.30.ffn_down_exps.weight": "dd672cc71e82abf05064a18121b8e55fe1a4f19bc1d7cb9a142f4add54bc336e",
|
||||
"blk.30.ffn_up_exps.weight": "12282f664a2a12aa25e2deac58946108715ebb978bafed5274cef24569107646",
|
||||
"blk.30.attn_norm.weight": "1a33458fee054c6c9c896a4bb0a4e1fbfa0293b2408c7dd2b81d692e966e7273",
|
||||
"blk.30.ffn_norm.weight": "311e33b68051f507f1478ed8f2693fddb846170ddb7285a91be43f795c2ce31e",
|
||||
"blk.31.ffn_gate_exps.weight": "8af43d9867a51cd8392fb48b981b0ceee0ae979c491c07d711b3b56b5162c786",
|
||||
"blk.31.ffn_down_exps.weight": "5579cb7758c1600b19d1f540deffe081b575962e37437b3b2efb2fb0a2924e40",
|
||||
"blk.31.ffn_up_exps.weight": "f2e7c005276b3a001fb40753f027fa10b4d5a346f43cf4b4bbdeec6e74e1cf6a",
|
||||
"blk.31.ffn_gate_inp.weight": "89885dc0e30b6b16a90c0331d7fa3174671e941364e8102d934f02132237e61b",
|
||||
"blk.31.attn_norm.weight": "99e4e9bf86a9edf8c404153a7e8a82324ba79da462622196e2faba161bd95172",
|
||||
"blk.31.ffn_norm.weight": "55335997cf6de781bf332b943de96ff4646966b05d9fee86b76ea897e27b6ca7",
|
||||
"blk.31.attn_k.weight": "cee570762b78da6316b637892cc4b080e40f57af5551ffb1866b9a8e80e96628",
|
||||
"blk.31.attn_output.weight": "fa321ff55ec7819ead7b819fd45215262f39744569765ba2113c989c03588802",
|
||||
"blk.31.attn_q.weight": "9e2c409b878f8a2a1436874abf428fceb1c534b21f9ad4dd6f532b8a469007f0",
|
||||
"blk.31.attn_v.weight": "a845d0be68ba537b4a775bfba4d897faf7c82a811a2612b0b7420cc4f3574cb8",
|
||||
"output.weight": "16101cbb74b54cda9ebc07ca3c762e3263a56efb3cc011156184b95807d7cf13",
|
||||
"output_norm.weight": "d7aa61585baedd60157aafe157930785742c55989c288573566a971b02423564"
|
||||
}
|
||||
225
convert/testdata/Phi-3-mini-128k-instruct.json
vendored
Normal file
225
convert/testdata/Phi-3-mini-128k-instruct.json
vendored
Normal file
@@ -0,0 +1,225 @@
|
||||
{
|
||||
"general.architecture": "phi3",
|
||||
"general.file_type": "1",
|
||||
"general.quantization_version": "2",
|
||||
"phi3.block_count": "32",
|
||||
"phi3.context_length": "131072",
|
||||
"phi3.embedding_length": "3072",
|
||||
"phi3.feed_forward_length": "8192",
|
||||
"phi3.rope.scaling.original_context_length": "4096",
|
||||
"phi3.rope.dimension_count": "96",
|
||||
"phi3.rope.freq_base": "10000",
|
||||
"phi3.rope.scaling.attn_factor": "1.1902381",
|
||||
"phi3.attention.head_count": "32",
|
||||
"phi3.attention.head_count_kv": "32",
|
||||
"phi3.attention.layer_norm_rms_epsilon": "1e-05",
|
||||
"phi3.attention.sliding_window": "262144",
|
||||
"tokenizer.ggml.model": "llama",
|
||||
"tokenizer.ggml.pre": "default",
|
||||
"tokenizer.ggml.add_bos_token": "false",
|
||||
"tokenizer.ggml.add_eos_token": "false",
|
||||
"tokenizer.ggml.bos_token_id": "1",
|
||||
"tokenizer.ggml.eos_token_id": "32000",
|
||||
"tokenizer.ggml.unknown_token_id": "0",
|
||||
"tokenizer.ggml.padding_token_id": "32000",
|
||||
"tokenizer.ggml.scores": "6e37bcde2adc7e350e87c496eddd7a2124329c1dc66c5bf3ad3997253e4f7a62",
|
||||
"tokenizer.ggml.token_type": "b6ecf55ec64ee67d87750bdb8d757a2c58bf78377e9f4219f5689a6c4dea57ce",
|
||||
"tokenizer.ggml.tokens": "d168da3ddd3eee820916945fcb9baf24dd3cde42f606cffa2d19e7c8a8743918",
|
||||
"blk.0.attn_norm.weight": "216aeb2c9e0c271f899e1ef2a63cceeb8f41e97642e84fada54b1d3c1c11cf25",
|
||||
"blk.0.attn_output.weight": "b597d56f7188ffc1fafc273fadc59d41738cffd677ae98c61a62c3285b3a3099",
|
||||
"blk.0.attn_qkv.weight": "d28a6b44e13f59be5483e4be2bedb544e346168d720aca27f47d1a5a722be91e",
|
||||
"blk.0.ffn_down.weight": "4a691370e5a61fcbbf540fbcbf4c0f1d15dec0364528c0e916d0744f6262b63b",
|
||||
"blk.0.ffn_norm.weight": "0c00af2b4a3128bec64a0cbb1084b042fdbe13d9ad0d03bd577f9449dfead338",
|
||||
"blk.0.ffn_up.weight": "b32b52f790c1c083bfb8a3126dc1111cfeeb28dc8c584a930a1e5334cb176bf4",
|
||||
"blk.1.attn_norm.weight": "68748011503c6c029e8e69a84a8e5a89338f378769627b6dbf7f93d715c292e1",
|
||||
"blk.1.attn_output.weight": "2267344add13b048ca59e4377c86dc512be8046a57156901fa32a20fa74e4ee0",
|
||||
"blk.1.attn_qkv.weight": "9109d2e3d7a2eacfda5226587b8be124a3bf44b972da7ebb17aa15795897eacc",
|
||||
"blk.1.ffn_down.weight": "d675df4df4dd039c0c339ad6445d39eddd2004db6bf35bed6314c7497245a633",
|
||||
"blk.1.ffn_norm.weight": "3b5767ae977bc8baaa06b06efdbea193b6b3ba605ce76d77a76ce317e935500c",
|
||||
"blk.1.ffn_up.weight": "80dfd6d9d234b00334c89b8e0a02f81899c2efd377321c34ba5ba51a5f61b5ff",
|
||||
"blk.2.attn_norm.weight": "6a6743b057e5088f145bc179e92c9bfb41163e7295d7b81c62e23dd89d2b59c4",
|
||||
"blk.2.attn_output.weight": "bc5491ea54e0db81462d7d9b7d25cbdda380c2db8de041bd1c4ab7b76a1d19c3",
|
||||
"blk.2.attn_qkv.weight": "a61287a9852e2f5aca9c100b471d98398b2913a3497c743de3c70ec9ddd7087f",
|
||||
"blk.2.ffn_down.weight": "4fddcc382c8dceeab027fe43d8d44e67edb5e8ce4b9a1b7f773c87770380ade1",
|
||||
"blk.2.ffn_norm.weight": "07e05f82b3f63f711db3b684ca79aed25c0657917e66f88af47348a82065c227",
|
||||
"blk.2.ffn_up.weight": "4835a682ef1826c12df01ae7663fc45f9c82bc8e64b665f13fb7da8e201ec0fb",
|
||||
"blk.3.attn_norm.weight": "f22aba7c03999ba7136f39cda747a39715e498699dc1716cd97fc5dfc58d1b1c",
|
||||
"blk.3.attn_output.weight": "53b579855366fd786c5126b2b30aac4d583ca7bda56833c4865f5cadb5c18c6d",
|
||||
"blk.3.attn_qkv.weight": "bb56aba78158123140fcea59c69ac562ca208f6d3086819417cdad8c50f333ad",
|
||||
"blk.3.ffn_down.weight": "97280897a7cd86db2830c004bccc5bc094f50e293baded0189159a2019145a6e",
|
||||
"blk.3.ffn_norm.weight": "10a8c99f8b57a960e8e0a1133c4a26f9148403d1b9bff2eff114917de996f3b5",
|
||||
"blk.3.ffn_up.weight": "7324046c915e75d621b2043597a245a428d8eea31869135e6257a861491d8dcc",
|
||||
"blk.4.attn_norm.weight": "507d8e164de94646edbfe33def8e8fbf7c9a6ee3fbaedb5000f72d9f51ec5e36",
|
||||
"blk.4.attn_output.weight": "bbb3429e6efa98c150e0fdbf48c16180cbf0d0cbc1b3c253c6c319d78f4593a2",
|
||||
"blk.4.attn_qkv.weight": "b95ee5be0786d3901273d806c339fe6c20e6bfffd2a20672a9f56af80921e8ab",
|
||||
"blk.4.ffn_down.weight": "806bbf91df92a5a22bd5aa1ffb7fc2869f7293ffc7704771c290ecc583b27975",
|
||||
"blk.4.ffn_norm.weight": "cfc2930a81df7aee3a5e7f726a15c1182233e868bf0d9d37f6b6ae6d8c15c234",
|
||||
"blk.4.ffn_up.weight": "c3390c69533de2c8424e8069323ccc5d0c4543111535da04cf2c7d26745576aa",
|
||||
"blk.5.attn_norm.weight": "0d71c4fbcefabbd021569442853d2fe90668b19409ae2805a718a829ca60beab",
|
||||
"blk.5.attn_output.weight": "10ebd93629112bf2df5c30dd0953a4a5e9020306768283181ed426934d47e14f",
|
||||
"blk.5.attn_qkv.weight": "5cb05633369f12d4b00e0ff787736bd846856682115720ebc6cce05270c334f6",
|
||||
"blk.5.ffn_down.weight": "e28bcc5094212eafc7476dbc5b7a520d25b79578cbf4229d698e2655956a80ad",
|
||||
"blk.5.ffn_norm.weight": "b6f2c4cf9f34bb4d59989f96165c14a67dc1e266ad0a6d0fcc49f1add929e6ff",
|
||||
"blk.5.ffn_up.weight": "0f9ef99423cc07ebedc0e9cfa95809f2d7108d910bb4ef97ebc0b0309c440750",
|
||||
"blk.6.attn_norm.weight": "b3edcc47a42218234f7564d7470611b49401a41ae8cd42123f86557c69f5d7f2",
|
||||
"blk.6.attn_output.weight": "eb9b7d257b388bb5b8fe0515e5c6873317239cb94cda236e4b6ada2a6c57c65c",
|
||||
"blk.6.attn_qkv.weight": "eb968081f478c52f07bd9c2761741e982dba33cc4eeadeea3557d391b9ac2106",
|
||||
"blk.6.ffn_down.weight": "1b8588bb7463206290322695577dcfced300895d6e6f4b26966c53a9ae2f0f84",
|
||||
"blk.6.ffn_norm.weight": "1219c04b7770983c77814200eefe743f46d15328ea2b12711e44f8103eab08d3",
|
||||
"blk.6.ffn_up.weight": "197ef287239fec47c55677f0fbb66eaf0644f775bc382de843971730721394f6",
|
||||
"blk.7.attn_norm.weight": "b630ad08c80d564ed1c024384818e9fd3f22a36cd7a14aa96e7e2759a8285099",
|
||||
"blk.7.attn_output.weight": "970255aa750828a47d6b9d399f9612b5bf25aefe7dadbcba41fc416d0d4067c1",
|
||||
"blk.7.attn_qkv.weight": "ebb157c880293e6de8d629f263ba8853ed1dbdc02c311d43432bb8cfbb310739",
|
||||
"blk.7.ffn_down.weight": "24bcd4db4cba844c89f878b81843c373dbbc0675e889d32c5b12e63384a7b670",
|
||||
"blk.7.ffn_norm.weight": "b9c6f71001808ee873ce7db8056e4b53fb4cccec8b7f0f312899b575fae39d39",
|
||||
"blk.7.ffn_up.weight": "979f1828d227455c26015a2a11afe9dd05f2bb97a8ba6b38c8dab3f50e627401",
|
||||
"blk.8.attn_norm.weight": "4e8e347e3775010b7112ee630f2f4f2383be7ff64e6ca6154b9b22566552eaa6",
|
||||
"blk.8.attn_output.weight": "65a44babf44a435a1829945211b3168f9ec78ac3cb7a049a733e93d11f0d6659",
|
||||
"blk.8.attn_qkv.weight": "343ed07671da400b040812a4058482fa38284b5d9af9becfed07417fe26ce747",
|
||||
"blk.8.ffn_down.weight": "7fb7e073e3c2c503c4e9d60efa0988fed7398d900cc003695fe3fffd3e188b82",
|
||||
"blk.8.ffn_norm.weight": "b07c1f655d8593e3892a2cf73f8a0c19ce8e5cb613fafbe7cbd430da8ce4c57d",
|
||||
"blk.8.ffn_up.weight": "8b26e14de54b3fdc2e2d3ea41720f9d9c236a93688c3b7fd7bf43f5fbb327c9b",
|
||||
"blk.9.attn_norm.weight": "46394d408a8e316916177e6aa261de32e137a82d729c0b1800b072f0c38c39b6",
|
||||
"blk.9.attn_output.weight": "d57f3d46107947a7073373a0b35d6ecf7759b5df15406f4a3590a60666af6b16",
|
||||
"blk.9.attn_qkv.weight": "14bb8ace8c5453148f4b536e9f4279c813f31136716947256f5cca333448639c",
|
||||
"blk.9.ffn_down.weight": "2b8d98e2b5ed68338f6e4de43bf7de0c4858cc69103cd5177725f7444eec7694",
|
||||
"blk.9.ffn_norm.weight": "41a499dfd418cc4c6b8c12313f673f7e2cd4a3f9c4065eb6c4feb5eed02fb542",
|
||||
"blk.9.ffn_up.weight": "143aab7533a64b17fbe201490a6f674bc7f0bd370c094500b2e100419073d1c2",
|
||||
"blk.10.attn_norm.weight": "ebb670aafd36816a794347287269d8f1a5b19c1e3c0a1e38023bc19fdba9b073",
|
||||
"blk.10.attn_output.weight": "b5d65bbc0ed5e49fdd9d754bc18163cd042a285024d0cf6f954c503bc8c877cb",
|
||||
"blk.10.attn_qkv.weight": "f06b15bac88da798fa34a62b03eaac0dbe8b846020516603c387541f2d8dd672",
|
||||
"blk.10.ffn_down.weight": "fb091fcd1b4de25d1bea94d1755e255cb02914a030d23e3a234e57b8d46bde6e",
|
||||
"blk.10.ffn_norm.weight": "eb347bdf9c40414af87e13a8e72e40b31f004b50f7cb366f1a219ced60a61355",
|
||||
"blk.10.ffn_up.weight": "ed2d52fc881a173f404fe8a1067862c9856d6c3e0d2e90a330a7aa394e3f84d1",
|
||||
"blk.11.attn_norm.weight": "64e252603cf010a0e502ca39fdf8d0a196a79aec67c0d2bb9213fc0cb80c47d4",
|
||||
"blk.11.attn_output.weight": "228e33e21c69f52efc74fdfc831bc9af271e44b2a29a3dced1d64e667ce36eb5",
|
||||
"blk.11.attn_qkv.weight": "ab9ce6d4ef9e42ee0da3f20a7708a3bbc5e79e967b05fa86ba946a05e2eb63eb",
|
||||
"blk.11.ffn_down.weight": "0ca133b7835c98dc77c25d64e4eb7873778bdb5e4d22d8b80f920f46865b43bd",
|
||||
"blk.11.ffn_norm.weight": "02455741a0dfd161c79aa1ecc381901721f229fdcda5615622a629631fb61cfd",
|
||||
"blk.11.ffn_up.weight": "9fecdcc099fbb8e23c6b1ea9294702a027f4a58d265543ec5e7be79b8f63b354",
|
||||
"blk.12.attn_norm.weight": "783bb459911b1b3609a9b2bdfe272f1670add73b5471da738e07ac47e2e07dfd",
|
||||
"blk.12.attn_output.weight": "1e1a914c9e48b857206ac5a1f7cead994bc1ea91d5d4fff8c834d73f2e38ef5d",
|
||||
"blk.12.attn_qkv.weight": "5953e7185ccb87fb4dae8f9426ec86315d4c7794326e8ab59b3a95d4af2189f0",
|
||||
"blk.12.ffn_down.weight": "a3eecf0f394f86e2cfb48a5940a5c50ca86d71883b2f79fcc642a935fabce0d4",
|
||||
"blk.12.ffn_norm.weight": "0a4272e41373c23bd72f10d2d82930aa3a1480aac75832bfbf01cebf0b86b6a4",
|
||||
"blk.12.ffn_up.weight": "06f42776de3a7ceac3025f26a7a8bd20e062233cce2bdaa2183470dc4b30b87d",
|
||||
"blk.13.attn_norm.weight": "5915da60fb03e201fa649faba780e5fdf1c761c262b206e5415cf83181f65780",
|
||||
"blk.13.attn_output.weight": "4dbf6eab074fa3835fd32bd631a8208e511037d5056d2fd3015735cca7674ef7",
|
||||
"blk.13.attn_qkv.weight": "d3d8339a1c4782d9e73d77fdebe154d3c5b83ac40c9175b3e91a4977d08f876b",
|
||||
"blk.13.ffn_down.weight": "de6772b46a55e1fd42b007637dfbf68b6598e5d5b61622da0935002e1e192d3a",
|
||||
"blk.13.ffn_norm.weight": "5a640ea3b8c7be49c95a58a2327e10d8e8d9d142504bde5c8091613e5b961d7a",
|
||||
"blk.13.ffn_up.weight": "f35e3545e4bd3531b2e843b5efd31dee0c13c807ee6386e65473ba67bbec30d0",
|
||||
"blk.14.attn_norm.weight": "9b34986450b7c98b4927e81e61a816f9e84b1addc7c14926402100037aad6678",
|
||||
"blk.14.attn_output.weight": "155d52efb23d366016d861a251d4d1f4a0c13699188c50d50dba016a0d8bfcd9",
|
||||
"blk.14.attn_qkv.weight": "8e1415084e1f33c73a777f19e752489f4dd312cca047733e5ea643cd4a955e04",
|
||||
"blk.14.ffn_down.weight": "a2a142226b94baa01ccb65bdea2b7418e49085c1d9c3c63e544e3112c58a25da",
|
||||
"blk.14.ffn_norm.weight": "8aecfd9b0ae6affaea31a80c5c9a4a14b31deaa0db7bd8f6da2a64d23447921c",
|
||||
"blk.14.ffn_up.weight": "0c1407237b8c1bd02f193346b5681926fe698a5055eac6a7450451b0f991707c",
|
||||
"blk.15.attn_norm.weight": "e037bd19880bfa83d983200fb0c7866f8ad16c3ff5cc4b4f3a37ca7373870ff6",
|
||||
"blk.15.attn_output.weight": "045fe4fc95cc129a1b92771b179c11b12845c4c088786c607f17bd98857e68e1",
|
||||
"blk.15.attn_qkv.weight": "7621b7559705cab1d4dea1c69f76dbf9dc1c8837a203b656f484703b9c1b70ce",
|
||||
"blk.15.ffn_down.weight": "7e5ac20e290bc60761e1cd972354fde225b7fa861048d44d9a0dd9b046d55f58",
|
||||
"blk.15.ffn_norm.weight": "b6d830d88f1db1825687973c8c2b1a24c6fa84f07af8d0e3ef9c86009baca0b2",
|
||||
"blk.15.ffn_up.weight": "dcda0957cd04fc45476774dba2bbf9aa89d6b05d5ca7b10ae6f73ad2c49b1cd3",
|
||||
"blk.16.attn_norm.weight": "4ee9b70ba15cb2a08240f93990e90f5068c48fceb481f8e2186bec8b7214eb3f",
|
||||
"blk.16.attn_output.weight": "315cfe5536658d2498192b2980eade15b2c9a4ff220e4011911457b1727fa103",
|
||||
"blk.16.attn_qkv.weight": "3c8122e3ad637583b9dcde8ff3a323267d3014bb1f0f9771e5322260ca9ecc8d",
|
||||
"blk.16.ffn_down.weight": "3b5fbebd5ee2b86cad96fb8a9b45a8770d08f82c1c8b74d7061e866f7020a18d",
|
||||
"blk.16.ffn_norm.weight": "ffab69f20bda372de6e5878f0539163e2fc6ba113621ded95705fc3b1465c9f0",
|
||||
"blk.16.ffn_up.weight": "0935ea3d258da42d6258406365f39f58ddaabfe97ea5977580db3635188f24a1",
|
||||
"blk.17.attn_norm.weight": "f030441733f3d147b4a06a1eb4aeb8465c7c24d9c53bf4c48fe7e134d3629803",
|
||||
"blk.17.attn_output.weight": "07a955ef09e8dc766ac0df647d0b2c69f23c4c69a7137654b4aad80303ed0eda",
|
||||
"blk.17.attn_qkv.weight": "1c10688061e21e2fe12ad0cb54bf03895c1f83c3b0df743a42f548b52cbca1b2",
|
||||
"blk.17.ffn_down.weight": "ebb9cc9836f41d88fdae2aa9a4355514e4edaec8d1577ffeb947a35204e77f52",
|
||||
"blk.17.ffn_norm.weight": "50aff44f6528b13db5389f2ddcdb7676244947610bd7ffbff3f881c968c2a0d4",
|
||||
"blk.17.ffn_up.weight": "d716537949582be33bde6b02e38f5a70081c9642a9fb05a61312126718b8d148",
|
||||
"blk.18.attn_norm.weight": "0ea695c4e53d637902f46663a6ee42adc493c36794476acc7dbddaa05b13840d",
|
||||
"blk.18.attn_output.weight": "5fd35b500221a612eb4f4bddf0e9b6b7db4d7733032a75f8802fb2d884647c2e",
|
||||
"blk.18.attn_qkv.weight": "b0da37fd030fe69581f990bf23bfd35467a1bbe558af6de7c0924f6b72e92317",
|
||||
"blk.18.ffn_down.weight": "b355c33f44b328f4bb977567de8f7544db4b005d7a8fbded658518ecf3c5a153",
|
||||
"blk.18.ffn_norm.weight": "58b3fe9094079989a86e0387143259e1cc35952d24dc3df290c4ba6df44f5c51",
|
||||
"blk.18.ffn_up.weight": "2ce530954c342c30ed2ead5353f931960bfae1d278868504c0efb973560fabbe",
|
||||
"blk.19.attn_norm.weight": "533e9aed66feea8f0392aa81f9e293240e1f009a5334253915fb60c2749b615d",
|
||||
"blk.19.attn_output.weight": "84f2d00f98a4113a779d3b5d1c3e7c914eb47784d3ab13b290367c124c2994aa",
|
||||
"blk.19.attn_qkv.weight": "fbe6b9f53b07fa7537d3b3d452d20a9bc666f9fd41ec2091dd28bc2f70fc668f",
|
||||
"blk.19.ffn_down.weight": "b30199e098c8bb3f890183d8b18471e80b62b604729b277ad62488dd71e1206b",
|
||||
"blk.19.ffn_norm.weight": "c81373e41cd340b7badb19f9517c77c4250b4eb9a02dc758b8b49b652487d7ff",
|
||||
"blk.19.ffn_up.weight": "5a5cb083ca7725720e3a890f7fa46354760e8007a8188849a092e305694a75e3",
|
||||
"blk.20.attn_norm.weight": "4953091b4477e354357a8e743ba0a1900633e52f1599ee082a0c9b0b2b5cd978",
|
||||
"blk.20.attn_output.weight": "62d54f7749cd6856097b2632066a322b0296df915fe66f382c5b5981be0d4f23",
|
||||
"blk.20.attn_qkv.weight": "406de9e35b0729ebe902d7a47905cc7fb29a921431ed35dbef0c03e5690a1329",
|
||||
"blk.20.ffn_down.weight": "62fb678b0d1261e19a4903a2b347d67afcc8acff01feb33a687a35a2d1e6f9a5",
|
||||
"blk.20.ffn_norm.weight": "cd9d36b7e71e55c8925b97bb09c28219f182626bcff094878ae39c3db887a14b",
|
||||
"blk.20.ffn_up.weight": "b9276771d79d3e932e73ccc520c3f8476342b9ef312ed2ee1e0da822e6e3ad18",
|
||||
"blk.21.attn_norm.weight": "66d8c8a35e13ce9c2a0e75b670150e2c31484a55c2316df46075312196178ed3",
|
||||
"blk.21.attn_output.weight": "12ab46c9382648f9b3350fdd92a6be6352743d62d6b520d7e2024e0c838588f5",
|
||||
"blk.21.attn_qkv.weight": "a7909676ee1675ca23cd29a5fdd226df8dd9d68f94c6c9bbb51dd9fd38504008",
|
||||
"blk.21.ffn_down.weight": "6fb317279c6542e82f97d5a12a60fac1bd0fa0405154f9fbe265e2fe39bd49cc",
|
||||
"blk.21.ffn_norm.weight": "c0f703eb3ff161b5ba4490d87d8684b8a6c47a8f433e12f418333b9db439010a",
|
||||
"blk.21.ffn_up.weight": "6dbdb80ef0c35e364bbce12d40d5e74c7963c7b55d58d9579567a07ffce7b863",
|
||||
"blk.22.attn_norm.weight": "f94237433bf03d675cb2f655b81ca91a1ce2447bc6b00b13d6b0ccfe2d411eff",
|
||||
"blk.22.attn_output.weight": "e821f95995ce497c01e63ca64f737713b1b65f11df1903e51d444aa516f33f71",
|
||||
"blk.22.attn_qkv.weight": "1b0f717c73afb5eb4c82a1708c4e85c969e8a2a8770d9ddb78b1870a2d8a781e",
|
||||
"blk.22.ffn_down.weight": "0f33f7a3cdc685484be99aa0c03642b0b20850a27d1fddbe054b13a9382f3ccb",
|
||||
"blk.22.ffn_norm.weight": "9df285cf211ddd7df2b36a50489af574755c7d4d98b29a05cd04566ae613c8dc",
|
||||
"blk.22.ffn_up.weight": "63ac300e1efb34041dd0136cf43ea622fac6f0caccce1cd9262f5e08d2cf179c",
|
||||
"blk.23.attn_norm.weight": "5f72d9e88689b4027b28f5f8f26cd3abb03635ceea7ec98a4c91a9fc691f6707",
|
||||
"blk.23.attn_output.weight": "6ecf04ff61125c5fc768f8656497152149373daf321ee9c957e8f7245a1184d1",
|
||||
"blk.23.attn_qkv.weight": "a9d9978806724c2959f2cf386c233831f08e1e933dbf2b32665e788d9d512ea4",
|
||||
"blk.23.ffn_down.weight": "72c7d17886a3da17fa0daa456aa5e877b2ef5b8b403182b870d9ca5ca9c70347",
|
||||
"blk.23.ffn_norm.weight": "971e4b712e3025a13419b5b57d674b5e4ab7f18f74b57b9afc4671623da90c4b",
|
||||
"blk.23.ffn_up.weight": "df2b5c7dbd5834545b815073af0c7355b065124e6d6f0fee78d8fa5b2076dc3e",
|
||||
"blk.24.attn_norm.weight": "c41957c4a79ad3b16f6e11daec1c7f530b9f3f4b618e1e4367c3b67787ac4ab6",
|
||||
"blk.24.attn_output.weight": "ef7d61f5fc88ac6f31bf60cb5f4d2d6b8df42d38825807112361a7224b0dee3b",
|
||||
"blk.24.attn_qkv.weight": "3e6a58fe7d49c90bb6971efbad3371c32256881173ea5aee4b0c296cb206490f",
|
||||
"blk.24.ffn_down.weight": "f43619144047de42fed81dfa495f1815d3cb771330e574043e2b67620819292c",
|
||||
"blk.24.ffn_norm.weight": "5501d4a2a98c8ca6b42e77b53b221dbc08f530f6a067256d787534ec6fe028bd",
|
||||
"blk.24.ffn_up.weight": "d64c8b0e509e2b1118f6000176f8956cacecdbb200c7e95ed93fb78b6e26c84a",
|
||||
"blk.25.attn_norm.weight": "502fa3c302d371f61c5791f4615b73018ffb1daa09b6499b227116581244c5d4",
|
||||
"blk.25.attn_output.weight": "ad8391d4e9c980856f2547aa945b2b6a407a6382158dc1ddd4f08d94ecc24be6",
|
||||
"blk.25.attn_qkv.weight": "42e8983780d4a01a02c54ad23d4df21eea437f119a10af5a9c12a76a42d308c1",
|
||||
"blk.25.ffn_down.weight": "302dd010d4e0ab4eeaee89090409ea0dddeeeed3236415eb8f97c942497eea91",
|
||||
"blk.25.ffn_norm.weight": "fb34c1ee5bca96986c08834df0a0c047ba041c1123ac1f563e9d64312bf82d6a",
|
||||
"blk.25.ffn_up.weight": "10739a8de156816d93c92b935386540bfa976bdbef204f0312960f6fc657582f",
|
||||
"blk.26.attn_norm.weight": "7036c711609128c4e55968ff3681d3043338879a5737efd6c2ac9e1a2a61f1a0",
|
||||
"blk.26.attn_output.weight": "db5db45dead5cb911fa01da59832f121b7c18b2d167bf53741c40819f24d346c",
|
||||
"blk.26.attn_qkv.weight": "cae34c6b7f82ed14348d5ed30a79919c383737c1694a9cb9c0de609d3b0c1d0a",
|
||||
"blk.26.ffn_down.weight": "491ec3a4da9b4f49f8ebc6be658ce397a9b801ae9fb35e82177e47808c65e5d0",
|
||||
"blk.26.ffn_norm.weight": "fd7059d75d7f0e5288511ddeeb0f772eb3cae3ccfe4226b877015834edc3c386",
|
||||
"blk.26.ffn_up.weight": "ea1ee1274c56458ce056d2205e5bb6e5422ce4cb0ad58006b8141749b97a0c39",
|
||||
"blk.27.attn_norm.weight": "cc362c9a937609265052cd38544af17a1a7448cea086d4c801139e1fc865832d",
|
||||
"blk.27.attn_output.weight": "ba757a81dabde9cb1b069d1bb616fe79649a1724f756567ec61caed1304fe6cf",
|
||||
"blk.27.attn_qkv.weight": "1ab8d7d02d87756c12c2275636823aa5ede3d683178225c4cac4bd892c319bd4",
|
||||
"blk.27.ffn_down.weight": "deb1c711c8a66acf4dcd2d088e1548f8e08f296f755e4067d6557fa55afde88c",
|
||||
"blk.27.ffn_norm.weight": "fc6242d8cb8a4a37a8ddb7e41e7e60a63d4a89edf36acb35df052f10b9c91ece",
|
||||
"blk.27.ffn_up.weight": "8df39b09c4801f343aca78f2918a1f6db78c8c55e591eda4c69eadb74c26e180",
|
||||
"blk.28.attn_norm.weight": "75b539308f77e3cefdc6d98484d8b5cbf0538f0c2869a77b7373a145a18bc850",
|
||||
"blk.28.attn_output.weight": "ae128940eb60a6d2e121762ef4b3e9dcf9eb3e105b249507fa7f12de0e19822c",
|
||||
"blk.28.attn_qkv.weight": "bdda781c288e9326c240e33905f8e621b6a2ad902e620739d34f93fcd6f933de",
|
||||
"blk.28.ffn_down.weight": "f1d6e6d1c286b1138bfd7e53fe477f399ae93bc2c04e35416f84218ed7247965",
|
||||
"blk.28.ffn_norm.weight": "3f837ce82c8b9bde0d61d08b6f5fe5574886ea5328dbdc53f2929f18da8b4087",
|
||||
"blk.28.ffn_up.weight": "2af027002e31d1b6cfedbdb30a2b9d7213f3aa691167c353913adfd48fda31e4",
|
||||
"blk.29.attn_norm.weight": "61e8003b5329462ffe0fe172f2b160260de006aed858332d49d75504b6b6aa7a",
|
||||
"blk.29.attn_output.weight": "ca44542a72a37476dc73dbdcc01f5b7497cb3ebc4ea230a55c9634ccd8e56ad4",
|
||||
"blk.29.attn_qkv.weight": "abb3d9d6abe57872ae3daa51935d43264093ded5ce63b49d1e280ee5758be0e4",
|
||||
"blk.29.ffn_down.weight": "6764b895fce881df097489c263446f0106de36217997660c15984b3ee22a5a06",
|
||||
"blk.29.ffn_norm.weight": "89e03e9a33fc0e6e31ba9f0c2bd7c5734a118c5602bb90148793e08a80e8d0ae",
|
||||
"blk.29.ffn_up.weight": "fa7ad57a84954f4121653152efed1a871d8adb20a1ea9086e3e849ce359d7d2e",
|
||||
"blk.30.attn_norm.weight": "91a697aca1e42af54f806a20211031c3369e8d0bd58df1b0147fe24954e1f5a4",
|
||||
"blk.30.attn_output.weight": "36063fcf766c89ac75be56f688cc63cefe5f2c733fbf4378ea9956ad386fa148",
|
||||
"blk.30.attn_qkv.weight": "2cacd1161f1121a2c0b979930134f4666f73fb8d7237b3b0659ae091b15955a6",
|
||||
"blk.30.ffn_down.weight": "9f3fcb6217100595850c05dc98f9ab2a263afdb6ab28df2fcb08aeff512057d7",
|
||||
"blk.30.ffn_norm.weight": "6c600bc1fc7de39d4f8917b81fc7d1d5ed2a9b56492234c13a4bd6028c30d880",
|
||||
"blk.30.ffn_up.weight": "73cabd1bb011956b2689ea3338bb76642ef3a57c197377d666d2ab5f56317668",
|
||||
"blk.31.attn_norm.weight": "72d3e1cc771380645fa75a899858c95f39857a4f3f1ed60fe1578df383b8bc53",
|
||||
"blk.31.attn_output.weight": "40089cdd29994dc19a1d89fa15902a89cfeca3540f12dc9bf4d00ef82506e456",
|
||||
"blk.31.attn_qkv.weight": "1d0bb40e9258071ae14290a53c619a8e331dda07354d2a02ef45766c029ae5e4",
|
||||
"blk.31.ffn_down.weight": "8defa0e06335b793fa8be03883f0a322d6c5b33f52c69c943c35c60d16e42c0a",
|
||||
"blk.31.ffn_norm.weight": "33c55d9d0c496ccfb130361fe131649346e098abaaac39c0519507e5d846721d",
|
||||
"blk.31.ffn_up.weight": "599f6503f61c692c1f82001973d35119f9688db5e6be9d9c298411491c93f09b",
|
||||
"output.weight": "14b8dc662bfa3308ebb2e102c562d8e52c15670e538f20f3216a9c310ca9dd41",
|
||||
"output_norm.weight": "7f2294ba94ce65681df6c7ddd8698799199b9d77dc83c10bdad5c3999f0fdb82",
|
||||
"rope_factors_long.weight": "e34d378664e354652c38f47d10dafb0498ccc2fb042d39ff7fef768146fff22b",
|
||||
"rope_factors_short.weight": "9379146a4988f373d362fe47b06c75e7fe7c54aa4dc9558758df79b7a87471fd",
|
||||
"token_embd.weight": "19a03c1fb5ac0baee93b0a7d8b0f26e9a9b011e229b694afc50ebfc13d84f8bf"
|
||||
}
|
||||
188
convert/testdata/gemma-2b-it.json
vendored
Normal file
188
convert/testdata/gemma-2b-it.json
vendored
Normal file
@@ -0,0 +1,188 @@
|
||||
{
|
||||
"general.architecture": "gemma",
|
||||
"general.file_type": "1",
|
||||
"general.quantization_version": "2",
|
||||
"gemma.block_count": "18",
|
||||
"gemma.context_length": "8192",
|
||||
"gemma.embedding_length": "2048",
|
||||
"gemma.feed_forward_length": "16384",
|
||||
"gemma.attention.head_count": "8",
|
||||
"gemma.attention.head_count_kv": "1",
|
||||
"gemma.attention.key_length": "256",
|
||||
"gemma.attention.value_length": "256",
|
||||
"gemma.attention.layer_norm_rms_epsilon": "1e-06",
|
||||
"tokenizer.ggml.model": "llama",
|
||||
"tokenizer.ggml.add_bos_token": "true",
|
||||
"tokenizer.ggml.add_eos_token": "false",
|
||||
"tokenizer.ggml.bos_token_id": "2",
|
||||
"tokenizer.ggml.eos_token_id": "1",
|
||||
"tokenizer.ggml.padding_token_id": "0",
|
||||
"tokenizer.ggml.unknown_token_id": "3",
|
||||
"tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8",
|
||||
"tokenizer.ggml.token_type": "485e40bf3d715a4764818fc097d6a2a41db872d82ee714bc500872a3437ff48d",
|
||||
"tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda",
|
||||
"token_embd.weight": "17b87ab2c01c80657855a5413d0457b4a041afaeda0cc785080e44e2f04acf07",
|
||||
"blk.0.attn_k.weight": "28ac0da05754ad2714ae95da28a5ad191192140b30b8fd22d108d4700c9d989f",
|
||||
"blk.0.attn_norm.weight": "3f9d5675d1ab0eb8a816719dac9fab81f2e95c52be02c34263339acbc087febb",
|
||||
"blk.0.attn_output.weight": "703295c2c63990ff896778685c678f145298886f680f3ed5dc2a7ad54c293265",
|
||||
"blk.0.attn_q.weight": "69c2d0e4870e9d722a190d356203c9605575a16863466c3d1747966ef1cf5791",
|
||||
"blk.0.attn_v.weight": "95219c9c07b5ffe9a9a01e456d845eef2b11f4fc12c93dbbba479db395444c13",
|
||||
"blk.0.ffn_down.weight": "a2feb5eb3d572c57c5bafbf0ab506862df1160fe40965dcfe4b9fd855c08bed7",
|
||||
"blk.0.ffn_gate.weight": "fcca072c445c31f4dc4d5dfaa785b1bdf7271342442099b74fd17268b5829fbf",
|
||||
"blk.0.ffn_norm.weight": "7621f95dbd245cade6fffd6b08797d69d8e3954e960f0b5551b90d967ab95448",
|
||||
"blk.0.ffn_up.weight": "14a9bcdd451403c67136391e1b6e53b3b1830f00199bd911dbcc56d8749c14f4",
|
||||
"blk.1.attn_k.weight": "c70f73c5df20579cb44d971164b48b5f0d8d5abdb38b381e7a8b880ba12aa406",
|
||||
"blk.1.attn_norm.weight": "88b6b91f93a1ef83425a7c7dc2a2fbd3b22704a04c64a80061df376ac8c33626",
|
||||
"blk.1.attn_output.weight": "f031a537490c452be3b3bb51e6b7949a636405756e160976a1c070a792ea00ee",
|
||||
"blk.1.attn_q.weight": "bdb23214b1cf9cfd30f863a0a5868e52c6809d93b7e8f44df096a94204d9896a",
|
||||
"blk.1.attn_v.weight": "e9bbc0b05f2c872fb1403f8f938cd1612b502229ee401f12593b1164c61acc00",
|
||||
"blk.1.ffn_down.weight": "5ff53811038b661a7b8f2bfdf213bebfb185ec1a6060b662f063714f33584d79",
|
||||
"blk.1.ffn_gate.weight": "205085c8c951a5c7543b1495183cd96028fb49f67464b3e9862a2693a6077a33",
|
||||
"blk.1.ffn_norm.weight": "798f354fc85afce9625f5d10093a585a966831698a0560e6c9b97ce659eb4b22",
|
||||
"blk.1.ffn_up.weight": "db92dc5684cb6e90940e13f4d1da555ed20ba4f8cab1e990ddfd7553e2e91315",
|
||||
"blk.2.attn_k.weight": "ef5ce360c4eed6d00d03ca4761e0f8e4b0af4509978468314be14f3d46621044",
|
||||
"blk.2.attn_norm.weight": "6dadbc05dbd0d3fabb4216affa60a3de1378a82d2859dc90b338cbe70f50d455",
|
||||
"blk.2.attn_output.weight": "6bbf87a966f691bbfd7c8d25629aa4e6710107bd431a667434861febb391edc5",
|
||||
"blk.2.attn_q.weight": "4e575c09ae2de417ce9057ce8b073680e860a24aae13a472b68f101b760752e5",
|
||||
"blk.2.attn_v.weight": "cd33f7f01141e9439afdaf2ea1aaced9feaa335e32a58daa136ebd555d4d96f4",
|
||||
"blk.2.ffn_down.weight": "b970ff1b0b6494165defe2fbfa1d31425766ed71e64de9ec4e66ac3955c8bc5f",
|
||||
"blk.2.ffn_gate.weight": "dbb3e1360402e0e369b101995bb686b73f95d4a7673f061be85d64d15dfb0061",
|
||||
"blk.2.ffn_norm.weight": "bfb7980105d8ac9647710454f57a5cdac50598a0f6f4884e16f1d94b00844687",
|
||||
"blk.2.ffn_up.weight": "50ef89339b275a438b664686f6227dd9b6e43853ed6856ec9e33ef4bbd90bda1",
|
||||
"blk.3.attn_k.weight": "be942ea98151434eebcd2c1da4b00e0146152fe524a530689b1fd491cb833d21",
|
||||
"blk.3.attn_norm.weight": "0df2f218daf609c289fb7c60c5f375fa99c0d4e04381ad5a494a19144edd8e20",
|
||||
"blk.3.attn_output.weight": "c2184aaf86aa2cb8f47be49f60b165834e97205f39c6ee1dfd19fd4411a156ce",
|
||||
"blk.3.attn_q.weight": "4f86e2a0a4221c1c84ff9c409ac89893cb95d7208cf65bf1e98e24e01125f991",
|
||||
"blk.3.attn_v.weight": "abfdb8a60c349dadde641d1afc9542025e24fbf41a3238bfa9675e0b1f1e4b68",
|
||||
"blk.3.ffn_down.weight": "58821a8d87008d47d122427911c6fad5272aca70c448bbae223256a74bacd07e",
|
||||
"blk.3.ffn_gate.weight": "776e051f1a0ddd5c4934e69186683a75ca9a3c8c0f61911bba321fed1dd287d2",
|
||||
"blk.3.ffn_norm.weight": "7f380f29335e28be90bfcfae6f6d69fdf5751211b36d2dd62aa5541ed113e4f2",
|
||||
"blk.3.ffn_up.weight": "fc5ae8d488894cbd4951059675468d227da27871d26e925c9941863841c097ee",
|
||||
"blk.4.attn_k.weight": "14833b078cc4c5137bdd5fdc0538047974ca147a99b0282e1b144440c78bc1db",
|
||||
"blk.4.attn_norm.weight": "0a69957d4a15599fb80ad4753558020804925221457d9a5052926754d3768065",
|
||||
"blk.4.attn_output.weight": "887a49b6130fb6297cf10767207c3dd97191b2cf63723449af9c27bca8dbeda0",
|
||||
"blk.4.attn_q.weight": "51fd577b76764824dd6f0d4891c137ebe4736f591b5ca2793c5fff2be49abbde",
|
||||
"blk.4.attn_v.weight": "1a623c43cf9c509d1b7ea0d1a5c04d0af4809665f9f9e93b7d6dba8c5df178fa",
|
||||
"blk.4.ffn_down.weight": "5d61e8856d8941d2b1fd138116d015f63840d0fa1e31e20e20a5ceca1536ceec",
|
||||
"blk.4.ffn_gate.weight": "06640f7273764f8ca5df7e386547417916b6cd7d565a8343153113239a94b0a1",
|
||||
"blk.4.ffn_norm.weight": "91a6c6c41b894228e361435ecbc5058dca34d4911a23da5b56de219299c964d3",
|
||||
"blk.4.ffn_up.weight": "d016dac1055e36d6a10b6317e57f98a904709ea892ef3194342f4d2f6326561e",
|
||||
"blk.5.attn_k.weight": "987146afe124131500808cc0da33c06d207433656d41df6e6d8c99118a83bac5",
|
||||
"blk.5.attn_norm.weight": "6b354938966f2608a2fb8d0f5b363ed0d8b0967c2ec8d0abd5c625b413042ded",
|
||||
"blk.5.attn_output.weight": "cdcbfe02c6ff79d5326882b017a02099f5af71beedf6b1b3eb4de01e3a844536",
|
||||
"blk.5.attn_q.weight": "b910d0cff781d3efb42eab0a302f46f286b2de717079175680d5b42bf8c309c8",
|
||||
"blk.5.attn_v.weight": "66d3a279f747412f9f4b0e8abad44540c122ab2e811a7ee74c1f33bc36caade9",
|
||||
"blk.5.ffn_down.weight": "c9b0efd2212981f16d956d8571f054b68780ad01f4917033647e359b557a4653",
|
||||
"blk.5.ffn_gate.weight": "fe96b94109ca141c01f6a04788e20783019ca6ec334aa1f3134810bdb499e557",
|
||||
"blk.5.ffn_norm.weight": "aa7b016e832e7055a36c6e20de58ea1936f995f390401fff1c5fc65906064e49",
|
||||
"blk.5.ffn_up.weight": "555ce27c4873d3375394f38ad3b45e3d8848f9d5642dc1602383d0f0a33c2a14",
|
||||
"blk.6.attn_k.weight": "88280d461db324c4f36475ce396793063e61a27283ec64511b0480890fb5b3b4",
|
||||
"blk.6.attn_norm.weight": "af8f460c411f660d33196286d208f1845fd5a2b45f7b56549a4df31e7515447a",
|
||||
"blk.6.attn_output.weight": "dd9996fb0a256e8375ad3917705258a33fce006bcea0f536caae420a77974d8b",
|
||||
"blk.6.attn_q.weight": "7a4841541191e037cfb9b07930c4d8cab451809658b182f0ada6ccde9615c003",
|
||||
"blk.6.attn_v.weight": "ae81e6a592b64d701a9d40233e986039a56cba8d8d24f61aea93c6393cf3078a",
|
||||
"blk.6.ffn_down.weight": "622dd1ce1706355cbc659a8ab2c4509678ffe0f3ad34258e5e25ed2a5d951bcd",
|
||||
"blk.6.ffn_gate.weight": "8389a735c0bd5591010f8ced9805a2a12c749f6df0d3c18ad4d05c2a302e7168",
|
||||
"blk.6.ffn_norm.weight": "621f5346400382474d61358397bd58fb1459b07c53e376e4bca15e08b3f9b3fb",
|
||||
"blk.6.ffn_up.weight": "8d834e4c42f13c251dfee36cf89e12f1bd400680d00d5c2e6cac0459e9ce2f7f",
|
||||
"blk.7.attn_k.weight": "8bd0412de65a3e64901ef8fe6a28c95e116bf39dc9aa22f0126b9d36688e5ea7",
|
||||
"blk.7.attn_norm.weight": "056d8e56be4e87d6dc6f900762f0dc6fde07bfdc50dd85bfc510415e2bba3f3d",
|
||||
"blk.7.attn_output.weight": "27972eda51da53d416ff95aed78149a2c5a287b47d2cd46f2f544ca692ecb3bb",
|
||||
"blk.7.attn_q.weight": "41eca977b9371f7932800c11a9c45b931310196919e2a0651b847703b180fc7f",
|
||||
"blk.7.attn_v.weight": "13c74fd7e07f08883a09fb070a1fe5bbdd2341b4cb8d1cac07c4b637049b5774",
|
||||
"blk.7.ffn_down.weight": "9e75db42468800849a9a7da603d0072c5e86c8ed2b4d8b20a312a51fb86a7a10",
|
||||
"blk.7.ffn_gate.weight": "db6bdc3117f910088aaf7db51f2da63ea5bd933de36af5599c215bfb26f7db2b",
|
||||
"blk.7.ffn_norm.weight": "48bb82b49bfc8679a1e77f282ee182d952db7a3c11be7ef9a102ee2ddd8011e2",
|
||||
"blk.7.ffn_up.weight": "feebea87175817a0f3585ec0af09dc873d94c203581ae97a712eb356d3b49efe",
|
||||
"blk.8.attn_k.weight": "d5640ad71b6af68d88e17bf8e7fc26c907d2262605457a84247dd9afc2884d69",
|
||||
"blk.8.attn_norm.weight": "75b850c481a69083ae09d0207ba7317b37c735a39fcf5fef5400e6c84fb1257f",
|
||||
"blk.8.attn_output.weight": "cbd669dbdea2bdd90f9f0cc97566b3dffff3c56cecb4f47290ceef30da83b2d6",
|
||||
"blk.8.attn_q.weight": "9edcb63087a431bac361822497e6ecdaa06d9ea4a1a754e36da7ba9f8db81c7c",
|
||||
"blk.8.attn_v.weight": "3fb72c2c4f95a83626aa3e30062f9450b09ab37c7871e229f18bbc5cf744633c",
|
||||
"blk.8.ffn_down.weight": "bd69d2c9172974fff154441b237b4787fb53b2d185325442d5048130ef5bc4ef",
|
||||
"blk.8.ffn_gate.weight": "d04689c80553edd011d1cbaa5d570fffa7fa91e88b66cf1352d89ab60b72f908",
|
||||
"blk.8.ffn_norm.weight": "e49984183b735b7f2c4e4730c289eed9394056d2e283a00fd83ea0915df31a73",
|
||||
"blk.8.ffn_up.weight": "8fe62a1ce8e847e567add6c6f6bf2922bc467495b5eb4c116b3cb85b85b3b211",
|
||||
"blk.9.attn_k.weight": "d90904959e5004cf0d6e729c6bff18cc33c094798b802473c1ec55ab8d276183",
|
||||
"blk.9.attn_norm.weight": "79277f290cc07411115d8fa138045edf4a17b3416ab2145409cbe8ab829fd4ee",
|
||||
"blk.9.attn_output.weight": "5a21bf2e1f09a81405025f96d4153ffb630158e17269cff8ffff935c38ceb1a7",
|
||||
"blk.9.attn_q.weight": "51b1d0febc3b350945be4504f55afa4347517bde0f710e1a4b88e6b17e71e7c7",
|
||||
"blk.9.attn_v.weight": "aab7e1db0a8b50a03036356791ffce736ab010d15674c96eaef8049d80076054",
|
||||
"blk.9.ffn_down.weight": "cbf43ec84becb40c9359a181ab0e641fd7faae7d34b549501f7cfb7afdc3d764",
|
||||
"blk.9.ffn_gate.weight": "dce0e8661c778327bed7f03b6790d26710764188aed9dc746e6e05863891fa57",
|
||||
"blk.9.ffn_norm.weight": "6d41642104f995c77bf31122b13237caebda3e7fcccb1367ce91db36b015e923",
|
||||
"blk.9.ffn_up.weight": "82fe4c67bf24e7b2d6f6e05f7b1234c2bf90c3932951091a9066211b8e15ecbb",
|
||||
"blk.10.attn_k.weight": "f6a9ed8fd8d3229b5d03175c413ffc56a07f2ce7236271986361dd3d8993f9aa",
|
||||
"blk.10.attn_norm.weight": "cebbef89f0326ca8e02df3867a571e4d61c20c2a12f295f98ae590d62bc86010",
|
||||
"blk.10.attn_output.weight": "34f5efb86accb4f06347d83a32558ea8eab3039d128969161a741ebacbb656ff",
|
||||
"blk.10.attn_q.weight": "1e0efe27df2d5d50f7157253ba2cfd436d6781c3dc78ca176d0c16a210b5b763",
|
||||
"blk.10.attn_v.weight": "8f085bf50a2b0f83cd6cdda3c8ef5a9e204a36348ed95871aac725d1f68640cf",
|
||||
"blk.10.ffn_down.weight": "bf3b3cb4cace435809ac7b4cc933f20853af12f1f272d3dcefe7f19c0f203b8b",
|
||||
"blk.10.ffn_gate.weight": "d3df7a1413b1c5adf1a1dcda9e5225a15c89874bae53bb6137ad1ea42fca2d34",
|
||||
"blk.10.ffn_norm.weight": "a1da603b0480471b5ed8e862148cecd5fed918f8304d6933ab0bdb25b8d2fb8f",
|
||||
"blk.10.ffn_up.weight": "bffbba605922e972dc47dda88a0b4659aa52236c76e5fe861a949e6d9a367492",
|
||||
"blk.11.attn_k.weight": "9f31c63d66cd32c29b1eb8bb829d0c8525ce2ae936e0eefdaab6335a2d12a3df",
|
||||
"blk.11.attn_norm.weight": "0bde1a266d8b2e8f202bb7e2e88b19147ca83021901f6d3cae77a4df5548c754",
|
||||
"blk.11.attn_output.weight": "e10725c7cf746ed4a7e472cf7aea6cb564e5db6a1d5197adc980d650a387ccea",
|
||||
"blk.11.attn_q.weight": "05ee758a7d065802630f8c65dca424364c1c8825e389aa33f9405c45e8a50cce",
|
||||
"blk.11.attn_v.weight": "0c3ae7090f11775d24c51120db6e305db6aff706493e7ee123dcab74485ba789",
|
||||
"blk.11.ffn_down.weight": "7ba40b8e12c09c5fb2006b77a771cb01ce894e88a3b3e1877f927a5b89c91709",
|
||||
"blk.11.ffn_gate.weight": "db76388a023b98097972d354ba1c6a5e26efdeb1c596b9c28bf2cd8f6596975e",
|
||||
"blk.11.ffn_norm.weight": "a38c3ae1b89a68ddc7b72c99c5b28be7fe3787c4fad9904d0c43d64eaf00c474",
|
||||
"blk.11.ffn_up.weight": "13c8142f9cf1eddc658babf978daf3515c4ccc45f849f3e7e3930aa18a8480a0",
|
||||
"blk.12.attn_k.weight": "f03241c36ac87cb57429a2ef22186b8d7d0b590a8b173beb01fa13d93772f3b1",
|
||||
"blk.12.attn_norm.weight": "4568f654e6d65104d586e7c16ba960c83428698ce103022b7e0be15e2884e13b",
|
||||
"blk.12.attn_output.weight": "04867603f82f91e41306e09b33ecda0104b3ee4834061f2c0bbdc8da33c72509",
|
||||
"blk.12.attn_q.weight": "70fe04b9a8e08b6100cc8d6b58bf4cbbad15ca1de82d63baca5d352ba6c4cbae",
|
||||
"blk.12.attn_v.weight": "15cb28db61a86c98687991d7e611bc92a1fcc6007f3432149cfb5fe518a4f65e",
|
||||
"blk.12.ffn_down.weight": "6d10c790a4e3dc44c2dc36d96251ae97cdf30a4fa04d4c43e31bfbd038e6a7b7",
|
||||
"blk.12.ffn_gate.weight": "3462a2d8f6b4743b25e24da51b90018ac2858d05ac7e582bcb69063cfdac1104",
|
||||
"blk.12.ffn_norm.weight": "1f96392c1faa34e34ae5dea55a6a86c5aa4c79758952075d53d28de89dd88456",
|
||||
"blk.12.ffn_up.weight": "d22eacc612a7411953d948483c5fb201e11722955ee0754da866e7bec578ac6d",
|
||||
"blk.13.attn_k.weight": "5864977e6b733ea942647d6feed5c76156c48c200649c22e4e11b9e5860e57f3",
|
||||
"blk.13.attn_norm.weight": "87e053535144723db4145aa5402acc54331b7696752d852bb9fc542ff33f0fb5",
|
||||
"blk.13.attn_output.weight": "078145f5ad83f8b14f97a869346f7fd1583b24d1e3edadaa95d3da4242973f8f",
|
||||
"blk.13.attn_q.weight": "3b8caf35504cbc4d1a7dd6e011a95760703b7f71e2218b030b1254f811362dd7",
|
||||
"blk.13.attn_v.weight": "4fdf8365a603e043e5b40c4a21c84ac167f9be62794178f9d8a608dfe5653bf9",
|
||||
"blk.13.ffn_down.weight": "a07d3abbfcacf48ba028df2cab895be32cc15022d23389a745286e79c1b1d1fd",
|
||||
"blk.13.ffn_gate.weight": "1d2ab39666aa2909acc96787432a3ed13b19d25170f74665fadff9b17bbaffb1",
|
||||
"blk.13.ffn_norm.weight": "4f2e809fda5f3eadf52578ee50e0ba36e53be91e55dce418c12dfe595f5f18e7",
|
||||
"blk.13.ffn_up.weight": "8783d2720c2c37ca176a5801e0b3ef1f9cc9cf3ef1cd37af423aaf6b2a27e2bd",
|
||||
"blk.14.attn_k.weight": "ce9428e2b55d43ae0c6690dbd56182f99adc427694ba8236b405cc8ea5035e86",
|
||||
"blk.14.attn_norm.weight": "6abb35f9db8251d6ae954bda147c6ada2371b0574d11702e828f3c6ac99b7cc0",
|
||||
"blk.14.attn_output.weight": "fe3880916d0ceb5bff672c88bbefb7060a545be609bf049beb2024b38221836d",
|
||||
"blk.14.attn_q.weight": "7c8ad81be6f4a350931fd108b5f7c9e366e8c26ef62d1d85ffef5dca8fd893f8",
|
||||
"blk.14.attn_v.weight": "e4bdedffacbebe38567a0734dfd67db90e911d9a9669fcde9a7c4ad8a0066c52",
|
||||
"blk.14.ffn_down.weight": "ef6694dff1e05820aac0cd2b22f39ac7788b4967afc9250775575554c66aab2c",
|
||||
"blk.14.ffn_gate.weight": "db63c4179e2db704bc505e2b4696e055b593e295a1b7c4c586fc793bdd5aab19",
|
||||
"blk.14.ffn_norm.weight": "2796a62d832a9710148f95d533320492a33e712b2e5218659c548705bd11684d",
|
||||
"blk.14.ffn_up.weight": "3f78c78d8c2d54df45f799d4ff902316628af296834afe4ceed63d4a324ff03e",
|
||||
"blk.15.attn_k.weight": "6e810ee3859e07695645ee0c9a5efc7962668984a5f0a9325f47e462743b447c",
|
||||
"blk.15.attn_norm.weight": "0956b576ae96db0b28cb09f761f801cfd9281432284664f0fe181c8d9c55d1ec",
|
||||
"blk.15.attn_output.weight": "03a17f7e94208177aace5cc41b7f54670ba57873b7274ff6e23caf58cce110ca",
|
||||
"blk.15.attn_q.weight": "b8edafe7d2216a6f8b4ae4905a906475490e6ea418f6e1d3cec563dbdc6fab91",
|
||||
"blk.15.attn_v.weight": "f8ae8cae0f4cfa34a459824eba57350c3c248104ba5607e7d9dc7d7c39aaf4a6",
|
||||
"blk.15.ffn_down.weight": "8d02eb439da852246d2ca67e9b7b6de0b090b80744355e64728a23e41926505b",
|
||||
"blk.15.ffn_gate.weight": "ed5bf361c67db8731f186b775826f21c33bdb521111fd2d922539719a770239f",
|
||||
"blk.15.ffn_norm.weight": "5942ca3c73209ac9a0c8bfd9b4aab7f7be7aee9aa12d9c35833493b44af76767",
|
||||
"blk.15.ffn_up.weight": "f4bebf4ad99ec5f911327dec347be6c595814885309c7bc5647ce28c7f4d1cf5",
|
||||
"blk.16.attn_k.weight": "756a534c19364448e0958b8948fe33891c6ccda0fbb4dfa2024e1f532a87804b",
|
||||
"blk.16.attn_norm.weight": "386b7b9e4e6509f6af9c022d942b6c6c6cc136aeed8751ecb037c74d7c4bfb93",
|
||||
"blk.16.attn_output.weight": "3ba1a766a25830b84d7c22178203635f9c5624caad290bc5e5d73da5d5e7a2ec",
|
||||
"blk.16.attn_q.weight": "d39b0c91e1fda7685d50a0f7cc8d18c44b5bdc90a142c7fda0bc329cca1afa74",
|
||||
"blk.16.attn_v.weight": "98b33fcb0ee3483cff1b06ecb44d7b7ffb4d34c268248e4d73dfdf82b2065b2f",
|
||||
"blk.16.ffn_down.weight": "14006f5e4acb2f9416271ae562e299359cd2585739c7fc77ccbca54495563948",
|
||||
"blk.16.ffn_gate.weight": "12f8abae2d301d8f88bedb6af98b1daecc7b0b8d05148594f931f30958d77aca",
|
||||
"blk.16.ffn_norm.weight": "129a15a046ee96d06de288bd43c80f77a6b0fb3a159c7367154c6e4aaf362672",
|
||||
"blk.16.ffn_up.weight": "b4a5911a45f3871ef1d4efb7dc7108645a564b70f818eccf45beebef2e844ee9",
|
||||
"blk.17.attn_k.weight": "5e1bfcff0146ebdde3817b656952892eb671e14e75afc92fa53f84f8eecbec4c",
|
||||
"blk.17.attn_norm.weight": "60bc988fab7c4b29ee9de599df41a8de00caa94fcd74677da011fac82f60f465",
|
||||
"blk.17.attn_output.weight": "ba49b40d6a0b5685f749c24b0edbed3adc44dbe13b5d5e5fa1e56169fc746555",
|
||||
"blk.17.attn_q.weight": "82bb415d24efcd14d03ace03f907bb70db6a204c76a0bdd1892e0fba165db87d",
|
||||
"blk.17.attn_v.weight": "73dbe54beb91a899884e275ea81ffc5187a20cb7d5b68d5c299b783096999d94",
|
||||
"blk.17.ffn_down.weight": "7c086166241e0664f8963fd1ca4ed74c737abfb2525ec20f8435821ff50158f3",
|
||||
"blk.17.ffn_gate.weight": "51a32f78244d42a539f619c5ce661db9e6cf41636280a826d439b5444edcd28c",
|
||||
"blk.17.ffn_norm.weight": "c4bb247fccd1ecc84875028af63dd20aaf5cbd17eb94a9bc36679c09285dccab",
|
||||
"blk.17.ffn_up.weight": "b5886182790bc6fbadd63de9bc4ffee416f3b69a66280d197ab8c18edf769abf",
|
||||
"output_norm.weight": "481f3097d0a20412e35b3a739b1b958487bcd41ff67744baa3c9acbddd2ee4d4"
|
||||
}
|
||||
@@ -3,19 +3,150 @@ package convert
|
||||
import (
|
||||
"cmp"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"log/slog"
|
||||
"os"
|
||||
"slices"
|
||||
)
|
||||
|
||||
"golang.org/x/exp/maps"
|
||||
const (
|
||||
_ int32 = iota
|
||||
tokenTypeNormal
|
||||
tokenTypeUnknown
|
||||
tokenTypeControl
|
||||
tokenTypeUserDefined
|
||||
tokenTypeUnused
|
||||
tokenTypeByte
|
||||
)
|
||||
|
||||
type Tokenizer struct {
|
||||
Version string `json:"version"`
|
||||
AddedTokens []Token `json:"added_tokens"`
|
||||
Model TokenizerModel `json:"model"`
|
||||
*Vocabulary
|
||||
SpecialVocabulary []*SpecialVocabulary
|
||||
Merges []string
|
||||
|
||||
Pre string
|
||||
Template string
|
||||
}
|
||||
|
||||
func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error) {
|
||||
v, err := parseVocabulary(fsys)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
t := &Tokenizer{
|
||||
Vocabulary: v,
|
||||
Pre: "default",
|
||||
}
|
||||
|
||||
addedTokens := make(map[string]token)
|
||||
if f, err := fsys.Open("tokenizer.json"); errors.Is(err, os.ErrNotExist) {
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
} else {
|
||||
defer f.Close()
|
||||
|
||||
var tt tokenizer
|
||||
if err := json.NewDecoder(f).Decode(&tt); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, t := range tt.AddedTokens {
|
||||
addedTokens[t.Content] = t
|
||||
}
|
||||
|
||||
t.Merges = tt.Model.Merges
|
||||
|
||||
sha256sum := sha256.New()
|
||||
for _, pt := range tt.PreTokenizer.PreTokenizers {
|
||||
switch pt.Type {
|
||||
case "Split":
|
||||
if pt.Pattern.Regex != "" {
|
||||
// create a checksum of all Split pretokenizers which should be sufficient
|
||||
// to identify the pretokenizer
|
||||
sha256sum.Write([]byte(pt.Pattern.Regex))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch digest := hex.EncodeToString(sha256sum.Sum(nil)); digest {
|
||||
case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
|
||||
t.Pre = "llama-bpe"
|
||||
case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
|
||||
t.Pre = "deepseek-llm"
|
||||
case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
|
||||
t.Pre = "deepseek-coder"
|
||||
case "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855":
|
||||
// noop, empty pretokenizer
|
||||
default:
|
||||
slog.Warn("unknown pretokenizer, using default", "digest", digest)
|
||||
}
|
||||
}
|
||||
|
||||
if f, err := fsys.Open("tokenizer_config.json"); errors.Is(err, os.ErrNotExist) {
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
} else {
|
||||
defer f.Close()
|
||||
|
||||
var p map[string]json.RawMessage
|
||||
if err := json.NewDecoder(f).Decode(&p); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if template, ok := p["chat_template"]; ok {
|
||||
if err := json.Unmarshal(template, &t.Template); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
for _, st := range specialTokenTypes {
|
||||
sv := SpecialVocabulary{Type: st}
|
||||
if bts, ok := p[fmt.Sprintf("add_%s_token", st)]; ok {
|
||||
if err := json.Unmarshal(bts, &sv.AddToken); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if bts, ok := p[fmt.Sprintf("%s_token", st)]; ok {
|
||||
var content string
|
||||
if err := json.Unmarshal(bts, &content); err != nil {
|
||||
var mm map[string]any
|
||||
if err := json.Unmarshal(bts, &mm); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
content, ok = mm["content"].(string)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
sv.Content = content
|
||||
}
|
||||
|
||||
if id, ok := addedTokens[sv.Content]; ok {
|
||||
sv.ID = id.ID
|
||||
t.SpecialVocabulary = append(t.SpecialVocabulary, &sv)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return t, nil
|
||||
}
|
||||
|
||||
type tokenizer struct {
|
||||
Version string `json:"version"`
|
||||
AddedTokens []token `json:"added_tokens"`
|
||||
Model struct {
|
||||
Type string `json:"type"`
|
||||
Vocab map[string]int `json:"vocab"`
|
||||
Merges []string `json:"merges"`
|
||||
} `json:"model"`
|
||||
|
||||
PreTokenizer struct {
|
||||
PreTokenizers []struct {
|
||||
@@ -27,80 +158,108 @@ type Tokenizer struct {
|
||||
} `json:"pre_tokenizer"`
|
||||
}
|
||||
|
||||
type TokenizerModel struct {
|
||||
Type string `json:"type"`
|
||||
Vocab map[string]int `json:"vocab"`
|
||||
Merges []string `json:"merges"`
|
||||
Tokens []Token
|
||||
}
|
||||
|
||||
type Token struct {
|
||||
type token struct {
|
||||
ID int `json:"id"`
|
||||
Content string `json:"content"`
|
||||
Special bool `json:"special"`
|
||||
UserDefined bool
|
||||
}
|
||||
|
||||
func (t *Token) Type() int32 {
|
||||
switch {
|
||||
case t.Special:
|
||||
return tokenTypeControl
|
||||
case t.UserDefined:
|
||||
return tokenTypeUserDefined
|
||||
default:
|
||||
return tokenTypeNormal
|
||||
}
|
||||
type Vocabulary struct {
|
||||
Model string
|
||||
Tokens []string
|
||||
Scores []float32
|
||||
Types []int32
|
||||
}
|
||||
|
||||
func (t *Tokenizer) maxID() int {
|
||||
return max(
|
||||
slices.Max(maps.Values(t.Model.Vocab)),
|
||||
slices.MaxFunc(t.AddedTokens, func(a, b Token) int {
|
||||
return cmp.Compare(a.ID, b.ID)
|
||||
}).ID,
|
||||
)
|
||||
}
|
||||
|
||||
func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, err error) {
|
||||
f, err := os.Open(dirpath)
|
||||
func parseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) {
|
||||
f, err := fsys.Open("tokenizer.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var t Tokenizer
|
||||
var t tokenizer
|
||||
if err := json.NewDecoder(f).Decode(&t); err != nil {
|
||||
return "", nil, nil, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tokens = make([]Token, t.maxID()+1)
|
||||
var tokens []token
|
||||
for k, v := range t.Model.Vocab {
|
||||
tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false}
|
||||
tokens = append(tokens, token{
|
||||
ID: v,
|
||||
Content: k,
|
||||
})
|
||||
}
|
||||
|
||||
for _, v := range t.AddedTokens {
|
||||
v.UserDefined = true
|
||||
tokens[v.ID] = v
|
||||
for _, t := range t.AddedTokens {
|
||||
t.UserDefined = true
|
||||
tokens = append(tokens, t)
|
||||
}
|
||||
|
||||
sha256sum := sha256.New()
|
||||
for _, pt := range t.PreTokenizer.PreTokenizers {
|
||||
if pt.Type == "Split" && pt.Pattern.Regex != "" {
|
||||
sha256sum.Write([]byte(pt.Pattern.Regex))
|
||||
slices.SortFunc(tokens, func(i, j token) int {
|
||||
return cmp.Compare(i.ID, j.ID)
|
||||
})
|
||||
|
||||
v := Vocabulary{Model: "gpt2"}
|
||||
for _, t := range tokens {
|
||||
v.Tokens = append(v.Tokens, t.Content)
|
||||
v.Scores = append(v.Scores, float32(t.ID))
|
||||
|
||||
switch {
|
||||
case t.Special:
|
||||
v.Types = append(v.Types, tokenTypeControl)
|
||||
case t.UserDefined:
|
||||
v.Types = append(v.Types, tokenTypeUserDefined)
|
||||
default:
|
||||
v.Types = append(v.Types, tokenTypeNormal)
|
||||
}
|
||||
}
|
||||
|
||||
switch digest := fmt.Sprintf("%x", sha256sum.Sum(nil)); digest {
|
||||
case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
|
||||
pre = "llama-bpe"
|
||||
case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
|
||||
pre = "deepseek-llm"
|
||||
case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
|
||||
pre = "deepseek-coder"
|
||||
default:
|
||||
slog.Warn("unknown pretokenizer, using default", "digest", digest)
|
||||
pre = "default"
|
||||
return &v, nil
|
||||
}
|
||||
|
||||
func parseVocabulary(fsys fs.FS) (*Vocabulary, error) {
|
||||
patterns := []struct {
|
||||
Pattern string
|
||||
Func func(fs.FS) (*Vocabulary, error)
|
||||
}{
|
||||
{"tokenizer.model", parseSentencePiece},
|
||||
{"tokenizer.json", parseVocabularyFromTokenizer},
|
||||
}
|
||||
|
||||
return pre, tokens, t.Model.Merges, nil
|
||||
for _, pattern := range patterns {
|
||||
if _, err := fs.Stat(fsys, pattern.Pattern); errors.Is(err, os.ErrNotExist) {
|
||||
continue
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return pattern.Func(fsys)
|
||||
}
|
||||
|
||||
return nil, errors.New("unknown tensor format")
|
||||
}
|
||||
|
||||
type SpecialVocabulary struct {
|
||||
Type string
|
||||
ID int
|
||||
Content string
|
||||
AddToken bool
|
||||
}
|
||||
|
||||
func (sv SpecialVocabulary) Key() string {
|
||||
switch t := sv.Type; t {
|
||||
case "bos", "eos", "cls", "mask":
|
||||
return t
|
||||
case "unk":
|
||||
return "unknown"
|
||||
case "sep":
|
||||
//nolint:misspell // this is an upstream typo
|
||||
return "seperator"
|
||||
case "pad":
|
||||
return "padding"
|
||||
}
|
||||
|
||||
panic("unknown special vocabulary type")
|
||||
}
|
||||
|
||||
83
convert/tokenizer_spm.go
Normal file
83
convert/tokenizer_spm.go
Normal file
@@ -0,0 +1,83 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"slices"
|
||||
|
||||
"google.golang.org/protobuf/proto"
|
||||
|
||||
"github.com/ollama/ollama/convert/sentencepiece"
|
||||
)
|
||||
|
||||
func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
|
||||
bts, err := fs.ReadFile(fsys, "tokenizer.model")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var spm sentencepiece.ModelProto
|
||||
if err := proto.Unmarshal(bts, &spm); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
v := Vocabulary{Model: "llama"}
|
||||
for _, piece := range spm.GetPieces() {
|
||||
v.Tokens = append(v.Tokens, piece.GetPiece())
|
||||
v.Scores = append(v.Scores, piece.GetScore())
|
||||
|
||||
switch t := piece.GetType(); t {
|
||||
case sentencepiece.ModelProto_SentencePiece_UNKNOWN,
|
||||
sentencepiece.ModelProto_SentencePiece_CONTROL,
|
||||
sentencepiece.ModelProto_SentencePiece_UNUSED,
|
||||
sentencepiece.ModelProto_SentencePiece_BYTE:
|
||||
v.Types = append(v.Types, int32(t))
|
||||
default:
|
||||
v.Types = append(v.Types, int32(sentencepiece.ModelProto_SentencePiece_NORMAL))
|
||||
}
|
||||
}
|
||||
|
||||
f, err := fsys.Open("added_tokens.json")
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return &v, nil
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var atm map[string]int
|
||||
if err := json.NewDecoder(f).Decode(&atm); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
type t struct {
|
||||
id int
|
||||
content string
|
||||
}
|
||||
|
||||
var ts []t
|
||||
for content, id := range atm {
|
||||
ts = append(ts, t{id, content})
|
||||
}
|
||||
|
||||
slices.SortFunc(ts, func(i, j t) int {
|
||||
return cmp.Compare(i.id, j.id)
|
||||
})
|
||||
|
||||
n := len(v.Tokens)
|
||||
for i, t := range ts {
|
||||
if t.id != i+n {
|
||||
return nil, fmt.Errorf("invalid token id: %d", t.id)
|
||||
}
|
||||
|
||||
v.Tokens = append(v.Tokens, t.content)
|
||||
v.Scores = append(v.Scores, -1000.0)
|
||||
v.Types = append(v.Types, tokenTypeUserDefined)
|
||||
}
|
||||
|
||||
return &v, nil
|
||||
}
|
||||
287
convert/torch.go
287
convert/torch.go
@@ -1,287 +0,0 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/nlpodyssey/gopickle/pytorch"
|
||||
"github.com/nlpodyssey/gopickle/types"
|
||||
"github.com/x448/float16"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type torchWriterTo struct {
|
||||
t *llm.Tensor
|
||||
|
||||
params *Params
|
||||
bo ByteOrder
|
||||
|
||||
storage pytorch.StorageInterface
|
||||
repacker func(string, []float32, []uint64) ([]float32, error)
|
||||
}
|
||||
|
||||
type TorchFormat struct{}
|
||||
|
||||
func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
|
||||
slog.Debug("getting torch tensors")
|
||||
|
||||
var files []string
|
||||
if pt, _ := filepath.Glob(filepath.Join(dirpath, "consolidated*.pth")); len(pt) > 0 {
|
||||
files = append(files, pt...)
|
||||
} else if pt, _ := filepath.Glob(filepath.Join(dirpath, "pytorch_model*.pth")); len(pt) > 0 {
|
||||
files = append(files, pt...)
|
||||
}
|
||||
|
||||
var offset uint64
|
||||
var tensors []llm.Tensor
|
||||
for _, fn := range files {
|
||||
m, err := pytorch.Load(fn)
|
||||
if err != nil {
|
||||
slog.Error(fmt.Sprintf("error unpickling: %q", err))
|
||||
return []llm.Tensor{}, err
|
||||
}
|
||||
|
||||
for _, k := range m.(*types.Dict).Keys() {
|
||||
if strings.HasSuffix(k.(string), "self_attn.rotary_emb.inv_freq") {
|
||||
continue
|
||||
}
|
||||
|
||||
t, _ := m.(*types.Dict).Get(k)
|
||||
tshape := t.(*pytorch.Tensor).Size
|
||||
|
||||
var size uint64
|
||||
var kind uint32
|
||||
switch len(tshape) {
|
||||
case 0:
|
||||
continue
|
||||
case 1:
|
||||
// convert to float32
|
||||
kind = 0
|
||||
size = uint64(tshape[0] * 4)
|
||||
case 2:
|
||||
// convert to float16
|
||||
kind = 1
|
||||
size = uint64(tshape[0] * tshape[1] * 2)
|
||||
}
|
||||
|
||||
ggufName, err := tf.GetLayerName(k.(string))
|
||||
if err != nil {
|
||||
slog.Error(err.Error())
|
||||
return nil, err
|
||||
}
|
||||
slog.Debug(fmt.Sprintf("'%35s': '%30s' %10d [%#v]", k.(string), ggufName, size, tshape))
|
||||
|
||||
shape := []uint64{0, 0, 0, 0}
|
||||
for i := range tshape {
|
||||
shape[i] = uint64(tshape[i])
|
||||
}
|
||||
|
||||
tensor := llm.Tensor{
|
||||
Name: ggufName,
|
||||
Kind: kind,
|
||||
Offset: offset, // calculate the offset
|
||||
Shape: shape,
|
||||
}
|
||||
|
||||
tensor.WriterTo = torchWriterTo{
|
||||
t: &tensor,
|
||||
params: params,
|
||||
bo: params.ByteOrder,
|
||||
storage: t.(*pytorch.Tensor).Source,
|
||||
}
|
||||
|
||||
tensors = append(tensors, tensor)
|
||||
offset += size
|
||||
}
|
||||
}
|
||||
|
||||
return tensors, nil
|
||||
}
|
||||
|
||||
func getAltParams(dirpath string) (*Params, error) {
|
||||
f, err := os.Open(filepath.Join(dirpath, "params.json"))
|
||||
if err != nil {
|
||||
slog.Error("no params.json")
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
type TorchParams struct {
|
||||
HiddenSize int `json:"dim"`
|
||||
AttentionHeads int `json:"n_heads"`
|
||||
KeyValHeads int `json:"n_kv_heads"`
|
||||
HiddenLayers int `json:"n_layers"`
|
||||
RopeTheta float64 `json:"rope_theta"`
|
||||
NormEPS float64 `json:"norm_eps"`
|
||||
}
|
||||
|
||||
var tparams TorchParams
|
||||
|
||||
d := json.NewDecoder(f)
|
||||
err = d.Decode(&tparams)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
params := &Params{
|
||||
Architectures: []string{"LlamaForCausalLM"},
|
||||
HiddenSize: tparams.HiddenSize,
|
||||
AttentionHeads: tparams.AttentionHeads,
|
||||
KeyValHeads: tparams.KeyValHeads,
|
||||
HiddenLayers: tparams.HiddenLayers,
|
||||
NormEPS: tparams.NormEPS,
|
||||
}
|
||||
|
||||
switch {
|
||||
case tparams.RopeTheta == 1000000:
|
||||
// Codellama
|
||||
params.ContextSize = 16384
|
||||
case tparams.NormEPS == 1e-06:
|
||||
// llama2
|
||||
slog.Debug("Found llama2 - setting context size to 4096")
|
||||
params.ContextSize = 4096
|
||||
default:
|
||||
params.ContextSize = 2048
|
||||
}
|
||||
|
||||
params.ByteOrder = binary.LittleEndian
|
||||
return params, nil
|
||||
}
|
||||
|
||||
func (m *TorchFormat) GetParams(dirpath string) (*Params, error) {
|
||||
f, err := os.Open(filepath.Join(dirpath, "config.json"))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
// try params.json instead
|
||||
return getAltParams(dirpath)
|
||||
} else {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var params Params
|
||||
d := json.NewDecoder(f)
|
||||
err = d.Decode(¶ms)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
params.ByteOrder = binary.LittleEndian
|
||||
return ¶ms, nil
|
||||
}
|
||||
|
||||
func (m *TorchFormat) GetLayerName(n string) (string, error) {
|
||||
directMap := map[string]string{
|
||||
"tok_embeddings.weight": "token_embd.weight",
|
||||
"output.weight": "output.weight",
|
||||
"norm.weight": "output_norm.weight",
|
||||
"rope.freqs": "rope_freqs.weight",
|
||||
"model.embed_tokens.weight": "token_embd.weight",
|
||||
"lm_head.weight": "output.weight",
|
||||
"model.norm.weight": "output_norm.weight",
|
||||
}
|
||||
|
||||
lMap := map[string]string{
|
||||
"layers.(\\d+).attention_norm.weight": "blk.$1.attn_norm.weight",
|
||||
"layers.(\\d+).attention_output_norm.weight": "blk.$1.attn_norm.weight",
|
||||
"layers.(\\d+).feed_forward.w2.weight": "blk.$1.ffn_down.weight",
|
||||
"layers.(\\d+).feed_forward.w1.weight": "blk.$1.ffn_gate.weight",
|
||||
"layers.(\\d+).feed_forward.w3.weight": "blk.$1.ffn_up.weight",
|
||||
"layers.(\\d+).ffn_norm.weight": "blk.$1.ffn_norm.weight",
|
||||
"layers.(\\d+).attention.wk.weight": "blk.$1.attn_k.weight",
|
||||
"layers.(\\d+).attention.wo.weight": "blk.$1.attn_output.weight",
|
||||
"layers.(\\d+).attention.wq.weight": "blk.$1.attn_q.weight",
|
||||
"layers.(\\d+).attention.wv.weight": "blk.$1.attn_v.weight",
|
||||
"model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
|
||||
"model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
|
||||
"model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
|
||||
"model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
|
||||
"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
|
||||
"model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
|
||||
"model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
|
||||
"model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
|
||||
"model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
|
||||
}
|
||||
|
||||
v, ok := directMap[n]
|
||||
if ok {
|
||||
return v, nil
|
||||
}
|
||||
|
||||
// quick hack to rename the layers to gguf format
|
||||
for k, v := range lMap {
|
||||
re := regexp.MustCompile(k)
|
||||
newName := re.ReplaceAllString(n, v)
|
||||
if newName != n {
|
||||
return newName, nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
|
||||
}
|
||||
|
||||
func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) {
|
||||
var f32s []float32
|
||||
switch s := r.storage.(type) {
|
||||
case *pytorch.FloatStorage:
|
||||
f32s = s.Data
|
||||
case *pytorch.HalfStorage:
|
||||
f32s = s.Data
|
||||
case *pytorch.BFloat16Storage:
|
||||
f32s = s.Data
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown data type: %T", s)
|
||||
}
|
||||
|
||||
if r.repacker != nil {
|
||||
f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
switch r.t.Kind {
|
||||
case 0:
|
||||
return 0, binary.Write(w, r.bo, f32s)
|
||||
case 1:
|
||||
f16s := make([]uint16, len(f32s))
|
||||
for i := range f32s {
|
||||
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
|
||||
}
|
||||
|
||||
return 0, binary.Write(w, r.bo, f16s)
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
|
||||
switch len(params.Architectures) {
|
||||
case 0:
|
||||
return nil, fmt.Errorf("No architecture specified to convert")
|
||||
case 1:
|
||||
switch params.Architectures[0] {
|
||||
case "LlamaForCausalLM":
|
||||
return &LlamaModel{
|
||||
ModelData{
|
||||
Name: name,
|
||||
Path: dirPath,
|
||||
Params: params,
|
||||
Format: m,
|
||||
},
|
||||
}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("Unknown error")
|
||||
}
|
||||
@@ -669,7 +669,7 @@ curl http://localhost:11434/api/chat -d '{
|
||||
|
||||
```
|
||||
curl http://localhost:11434/api/chat -d '{
|
||||
"model": "mistral",
|
||||
"model": "llama3.1",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
@@ -708,7 +708,7 @@ curl http://localhost:11434/api/chat -d '{
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "mistral:7b-instruct-v0.3-q4_K_M",
|
||||
"model": "llama3.1",
|
||||
"created_at": "2024-07-22T20:33:28.123648Z",
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
@@ -1175,7 +1175,10 @@ curl http://localhost:11434/api/embed -d '{
|
||||
"embeddings": [[
|
||||
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
|
||||
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
|
||||
]]
|
||||
]],
|
||||
"total_duration": 14143917,
|
||||
"load_duration": 1019500,
|
||||
"prompt_eval_count": 8
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
142
docs/docker.md
142
docs/docker.md
@@ -1,71 +1,71 @@
|
||||
# Ollama Docker image
|
||||
|
||||
### CPU only
|
||||
|
||||
```bash
|
||||
docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
||||
```
|
||||
|
||||
### Nvidia GPU
|
||||
Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
|
||||
|
||||
#### Install with Apt
|
||||
1. Configure the repository
|
||||
```bash
|
||||
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
|
||||
| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
|
||||
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
|
||||
| sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||
sudo apt-get update
|
||||
```
|
||||
2. Install the NVIDIA Container Toolkit packages
|
||||
```bash
|
||||
sudo apt-get install -y nvidia-container-toolkit
|
||||
```
|
||||
|
||||
#### Install with Yum or Dnf
|
||||
1. Configure the repository
|
||||
|
||||
```bash
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
|
||||
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||
```
|
||||
|
||||
2. Install the NVIDIA Container Toolkit packages
|
||||
|
||||
```bash
|
||||
sudo yum install -y nvidia-container-toolkit
|
||||
```
|
||||
|
||||
#### Configure Docker to use Nvidia driver
|
||||
```
|
||||
sudo nvidia-ctk runtime configure --runtime=docker
|
||||
sudo systemctl restart docker
|
||||
```
|
||||
|
||||
#### Start the container
|
||||
|
||||
```bash
|
||||
docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
||||
```
|
||||
|
||||
### AMD GPU
|
||||
|
||||
To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
|
||||
|
||||
```
|
||||
docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
|
||||
```
|
||||
|
||||
### Run model locally
|
||||
|
||||
Now you can run a model:
|
||||
|
||||
```
|
||||
docker exec -it ollama ollama run llama3.1
|
||||
```
|
||||
|
||||
### Try different models
|
||||
|
||||
More models can be found on the [Ollama library](https://ollama.com/library).
|
||||
# Ollama Docker image
|
||||
|
||||
### CPU only
|
||||
|
||||
```bash
|
||||
docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
||||
```
|
||||
|
||||
### Nvidia GPU
|
||||
Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
|
||||
|
||||
#### Install with Apt
|
||||
1. Configure the repository
|
||||
```bash
|
||||
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
|
||||
| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
|
||||
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
|
||||
| sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||
sudo apt-get update
|
||||
```
|
||||
2. Install the NVIDIA Container Toolkit packages
|
||||
```bash
|
||||
sudo apt-get install -y nvidia-container-toolkit
|
||||
```
|
||||
|
||||
#### Install with Yum or Dnf
|
||||
1. Configure the repository
|
||||
|
||||
```bash
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
|
||||
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||
```
|
||||
|
||||
2. Install the NVIDIA Container Toolkit packages
|
||||
|
||||
```bash
|
||||
sudo yum install -y nvidia-container-toolkit
|
||||
```
|
||||
|
||||
#### Configure Docker to use Nvidia driver
|
||||
```
|
||||
sudo nvidia-ctk runtime configure --runtime=docker
|
||||
sudo systemctl restart docker
|
||||
```
|
||||
|
||||
#### Start the container
|
||||
|
||||
```bash
|
||||
docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
||||
```
|
||||
|
||||
### AMD GPU
|
||||
|
||||
To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
|
||||
|
||||
```
|
||||
docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
|
||||
```
|
||||
|
||||
### Run model locally
|
||||
|
||||
Now you can run a model:
|
||||
|
||||
```
|
||||
docker exec -it ollama ollama run llama3.1
|
||||
```
|
||||
|
||||
### Try different models
|
||||
|
||||
More models can be found on the [Ollama library](https://ollama.com/library).
|
||||
|
||||
@@ -16,7 +16,9 @@ If the model being imported is one of these architectures, it can be imported di
|
||||
|
||||
- LlamaForCausalLM
|
||||
- MistralForCausalLM
|
||||
- MixtralForCausalLM
|
||||
- GemmaForCausalLM
|
||||
- Phi3ForCausalLM
|
||||
|
||||
```dockerfile
|
||||
FROM /path/to/safetensors/directory
|
||||
|
||||
176
docs/openai.md
176
docs/openai.md
@@ -27,6 +27,37 @@ chat_completion = client.chat.completions.create(
|
||||
],
|
||||
model='llama3',
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="llava",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
max_tokens=300,
|
||||
)
|
||||
|
||||
completion = client.completions.create(
|
||||
model="llama3",
|
||||
prompt="Say this is a test",
|
||||
)
|
||||
|
||||
list_completion = client.models.list()
|
||||
|
||||
model = client.models.retrieve("llama3")
|
||||
|
||||
embeddings = client.embeddings.create(
|
||||
model="all-minilm",
|
||||
input=["why is the sky blue?", "why is the grass green?"],
|
||||
)
|
||||
```
|
||||
|
||||
### OpenAI JavaScript library
|
||||
@@ -42,14 +73,44 @@ const openai = new OpenAI({
|
||||
})
|
||||
|
||||
const chatCompletion = await openai.chat.completions.create({
|
||||
messages: [{ role: 'user', content: 'Say this is a test' }],
|
||||
model: 'llama3',
|
||||
messages: [{ role: 'user', content: 'Say this is a test' }],
|
||||
model: 'llama3',
|
||||
})
|
||||
|
||||
const response = await openai.chat.completions.create({
|
||||
model: "llava",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: "What's in this image?" },
|
||||
{
|
||||
type: "image_url",
|
||||
image_url: "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
const completion = await openai.completions.create({
|
||||
model: "llama3",
|
||||
prompt: "Say this is a test.",
|
||||
})
|
||||
|
||||
const listCompletion = await openai.models.list()
|
||||
|
||||
const model = await openai.models.retrieve("llama3")
|
||||
|
||||
const embedding = await openai.embeddings.create({
|
||||
model: "all-minilm",
|
||||
input: ["why is the sky blue?", "why is the grass green?"],
|
||||
})
|
||||
```
|
||||
|
||||
### `curl`
|
||||
|
||||
```
|
||||
``` shell
|
||||
curl http://localhost:11434/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
@@ -66,6 +127,47 @@ curl http://localhost:11434/v1/chat/completions \
|
||||
]
|
||||
}'
|
||||
|
||||
curl http://localhost:11434/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "llava",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What'\''s in this image?"
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"max_tokens": 300
|
||||
}'
|
||||
|
||||
curl http://localhost:11434/v1/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "llama3",
|
||||
"prompt": "Say this is a test"
|
||||
}'
|
||||
|
||||
curl http://localhost:11434/v1/models
|
||||
|
||||
curl http://localhost:11434/v1/models/llama3
|
||||
|
||||
curl http://localhost:11434/v1/embeddings \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "all-minilm",
|
||||
"input": ["why is the sky blue?", "why is the grass green?"]
|
||||
}'
|
||||
```
|
||||
|
||||
## Endpoints
|
||||
@@ -78,8 +180,8 @@ curl http://localhost:11434/v1/chat/completions \
|
||||
- [x] Streaming
|
||||
- [x] JSON mode
|
||||
- [x] Reproducible outputs
|
||||
- [x] Vision
|
||||
- [x] Tools (streaming support coming soon)
|
||||
- [ ] Vision
|
||||
- [ ] Logprobs
|
||||
|
||||
#### Supported request fields
|
||||
@@ -87,7 +189,10 @@ curl http://localhost:11434/v1/chat/completions \
|
||||
- [x] `model`
|
||||
- [x] `messages`
|
||||
- [x] Text `content`
|
||||
- [ ] Array of `content` parts
|
||||
- [x] Image `content`
|
||||
- [x] Base64 encoded image
|
||||
- [ ] Image URL
|
||||
- [x] Array of `content` parts
|
||||
- [x] `frequency_penalty`
|
||||
- [x] `presence_penalty`
|
||||
- [x] `response_format`
|
||||
@@ -103,6 +208,67 @@ curl http://localhost:11434/v1/chat/completions \
|
||||
- [ ] `user`
|
||||
- [ ] `n`
|
||||
|
||||
### `/v1/completions`
|
||||
|
||||
#### Supported features
|
||||
|
||||
- [x] Completions
|
||||
- [x] Streaming
|
||||
- [x] JSON mode
|
||||
- [x] Reproducible outputs
|
||||
- [ ] Logprobs
|
||||
|
||||
#### Supported request fields
|
||||
|
||||
- [x] `model`
|
||||
- [x] `prompt`
|
||||
- [x] `frequency_penalty`
|
||||
- [x] `presence_penalty`
|
||||
- [x] `seed`
|
||||
- [x] `stop`
|
||||
- [x] `stream`
|
||||
- [x] `temperature`
|
||||
- [x] `top_p`
|
||||
- [x] `max_tokens`
|
||||
- [x] `suffix`
|
||||
- [ ] `best_of`
|
||||
- [ ] `echo`
|
||||
- [ ] `logit_bias`
|
||||
- [ ] `user`
|
||||
- [ ] `n`
|
||||
|
||||
#### Notes
|
||||
|
||||
- `prompt` currently only accepts a string
|
||||
|
||||
### `/v1/models`
|
||||
|
||||
#### Notes
|
||||
|
||||
- `created` corresponds to when the model was last modified
|
||||
- `owned_by` corresponds to the ollama username, defaulting to `"library"`
|
||||
|
||||
### `/v1/models/{model}`
|
||||
|
||||
#### Notes
|
||||
|
||||
- `created` corresponds to when the model was last modified
|
||||
- `owned_by` corresponds to the ollama username, defaulting to `"library"`
|
||||
|
||||
### `/v1/embeddings`
|
||||
|
||||
#### Supported request fields
|
||||
|
||||
- [x] `model`
|
||||
- [x] `input`
|
||||
- [x] string
|
||||
- [x] array of strings
|
||||
- [ ] array of tokens
|
||||
- [ ] array of token arrays
|
||||
- [ ] `encoding format`
|
||||
- [ ] `dimensions`
|
||||
- [ ] `user`
|
||||
|
||||
## Models
|
||||
|
||||
Before using a model, pull it locally `ollama pull`:
|
||||
|
||||
@@ -112,15 +112,9 @@ Keep the following tips and best practices in mind when working with Go template
|
||||
ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
|
||||
|
||||
```gotmpl
|
||||
{{- if .System }}<|im_start|>system
|
||||
{{ .System }}<|im_end|>
|
||||
{{ end }}
|
||||
{{- range .Messages }}<|im_start|>{{ .Role }}
|
||||
{{ .Content }}<|im_end|>
|
||||
{{ end }}<|im_start|>assistant
|
||||
{{ else }}
|
||||
{{ if .System }}<|im_start|>system
|
||||
{{ .System }}<|im_end|>
|
||||
```
|
||||
|
||||
### Example Tools
|
||||
|
||||
@@ -9,7 +9,7 @@ cat ~/.ollama/logs/server.log
|
||||
On **Linux** systems with systemd, the logs can be found with this command:
|
||||
|
||||
```shell
|
||||
journalctl -u ollama
|
||||
journalctl -u ollama --no-pager
|
||||
```
|
||||
|
||||
When you run Ollama in a **container**, the logs go to stdout/stderr in the container:
|
||||
|
||||
@@ -3,6 +3,7 @@ package format
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -28,6 +29,6 @@ func HumanNumber(b uint64) string {
|
||||
case b >= Thousand:
|
||||
return fmt.Sprintf("%.0fK", float64(b)/Thousand)
|
||||
default:
|
||||
return fmt.Sprintf("%d", b)
|
||||
return strconv.FormatUint(b, 10)
|
||||
}
|
||||
}
|
||||
|
||||
2
go.mod
2
go.mod
@@ -1,6 +1,6 @@
|
||||
module github.com/ollama/ollama
|
||||
|
||||
go 1.22.0
|
||||
go 1.22.5
|
||||
|
||||
require (
|
||||
github.com/containerd/console v1.0.3
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
package gpu
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"errors"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -95,5 +95,5 @@ func commonAMDValidateLibDir() (string, error) {
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
||||
return "", errors.New("no suitable rocm found, falling back to CPU")
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package gpu
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"syscall"
|
||||
@@ -76,7 +77,7 @@ func (hl *HipLib) Release() {
|
||||
|
||||
func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
|
||||
if hl.dll == 0 {
|
||||
return 0, 0, fmt.Errorf("dll has been unloaded")
|
||||
return 0, 0, errors.New("dll has been unloaded")
|
||||
}
|
||||
var version int
|
||||
status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
|
||||
@@ -110,7 +111,7 @@ func (hl *HipLib) HipGetDeviceCount() int {
|
||||
|
||||
func (hl *HipLib) HipSetDevice(device int) error {
|
||||
if hl.dll == 0 {
|
||||
return fmt.Errorf("dll has been unloaded")
|
||||
return errors.New("dll has been unloaded")
|
||||
}
|
||||
status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device))
|
||||
if status != hipSuccess {
|
||||
@@ -121,7 +122,7 @@ func (hl *HipLib) HipSetDevice(device int) error {
|
||||
|
||||
func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) {
|
||||
if hl.dll == 0 {
|
||||
return nil, fmt.Errorf("dll has been unloaded")
|
||||
return nil, errors.New("dll has been unloaded")
|
||||
}
|
||||
var props hipDevicePropMinimal
|
||||
status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device))
|
||||
@@ -134,7 +135,7 @@ func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, err
|
||||
// free, total, err
|
||||
func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) {
|
||||
if hl.dll == 0 {
|
||||
return 0, 0, fmt.Errorf("dll has been unloaded")
|
||||
return 0, 0, errors.New("dll has been unloaded")
|
||||
}
|
||||
var totalMemory uint64
|
||||
var freeMemory uint64
|
||||
|
||||
@@ -393,7 +393,7 @@ func AMDValidateLibDir() (string, error) {
|
||||
|
||||
// If we still haven't found a usable rocm, the user will have to install it on their own
|
||||
slog.Warn("amdgpu detected, but no compatible rocm library found. Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install")
|
||||
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
||||
return "", errors.New("no suitable rocm found, falling back to CPU")
|
||||
}
|
||||
|
||||
func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
|
||||
|
||||
@@ -2,7 +2,7 @@ package gpu
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"errors"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -85,7 +85,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
|
||||
n = bytes.IndexByte(props.GcnArchName[:], 0)
|
||||
gfx := string(props.GcnArchName[:n])
|
||||
slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
|
||||
//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
|
||||
// slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
|
||||
// TODO Why isn't props.iGPU accurate!?
|
||||
if strings.EqualFold(name, iGPUName) {
|
||||
slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx)
|
||||
@@ -161,7 +161,7 @@ func AMDValidateLibDir() (string, error) {
|
||||
|
||||
// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
|
||||
slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm")
|
||||
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
||||
return "", errors.New("no suitable rocm found, falling back to CPU")
|
||||
}
|
||||
|
||||
func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
|
||||
|
||||
@@ -42,20 +42,16 @@ func PayloadsDir() (string, error) {
|
||||
return "", fmt.Errorf("failed to generate tmp dir: %w", err)
|
||||
}
|
||||
} else {
|
||||
err = os.MkdirAll(tmpDir, 0755)
|
||||
err = os.MkdirAll(tmpDir, 0o755)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Track our pid so we can clean up orphaned tmpdirs
|
||||
pidFilePath := filepath.Join(tmpDir, "ollama.pid")
|
||||
pidFile, err := os.OpenFile(pidFilePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if _, err := pidFile.Write([]byte(fmt.Sprint(os.Getpid()))); err != nil {
|
||||
return "", err
|
||||
n := filepath.Join(tmpDir, "ollama.pid")
|
||||
if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil {
|
||||
return "", fmt.Errorf("failed to write pid file %s: %w", n, err)
|
||||
}
|
||||
|
||||
// We create a distinct subdirectory for payloads within the tmpdir
|
||||
@@ -67,37 +63,44 @@ func PayloadsDir() (string, error) {
|
||||
|
||||
// Best effort to clean up prior tmpdirs
|
||||
func cleanupTmpDirs() {
|
||||
dirs, err := filepath.Glob(filepath.Join(os.TempDir(), "ollama*"))
|
||||
matches, err := filepath.Glob(filepath.Join(os.TempDir(), "ollama*", "ollama.pid"))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for _, d := range dirs {
|
||||
info, err := os.Stat(d)
|
||||
if err != nil || !info.IsDir() {
|
||||
|
||||
for _, match := range matches {
|
||||
raw, err := os.ReadFile(match)
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
slog.Debug("not a ollama runtime directory, skipping", "path", match)
|
||||
continue
|
||||
}
|
||||
raw, err := os.ReadFile(filepath.Join(d, "ollama.pid"))
|
||||
if err != nil {
|
||||
slog.Warn("failed to read ollama.pid", "path", d, "error", err)
|
||||
// No pid, ignore this tmpdir
|
||||
} else if err != nil {
|
||||
slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
pid, err := strconv.Atoi(string(raw))
|
||||
if err != nil {
|
||||
slog.Warn("failed to parse pid", "path", d, "error", err)
|
||||
slog.Warn("invalid pid, skipping", "path", match, "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
proc, err := os.FindProcess(pid)
|
||||
if err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
|
||||
slog.Warn("found running ollama", "pid", pid, "path", d)
|
||||
// Another running ollama, ignore this tmpdir
|
||||
p, err := os.FindProcess(pid)
|
||||
if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) {
|
||||
slog.Warn("process still running, skipping", "pid", pid, "path", match)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := os.Remove(d); err != nil {
|
||||
slog.Warn("unable to cleanup stale tmpdir", "path", d, "error", err)
|
||||
if err := os.Remove(match); err != nil {
|
||||
slog.Warn("could not cleanup stale pidfile", "path", match, "error", err)
|
||||
}
|
||||
|
||||
runners := filepath.Join(filepath.Dir(match), "runners")
|
||||
if err := os.RemoveAll(runners); err != nil {
|
||||
slog.Warn("could not cleanup stale runners", "path", runners, "error", err)
|
||||
}
|
||||
|
||||
if err := os.Remove(filepath.Dir(match)); err != nil {
|
||||
slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
package gpu
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/cpu"
|
||||
)
|
||||
|
||||
@@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
|
||||
// else LCD
|
||||
return CPUCapabilityNone
|
||||
}
|
||||
|
||||
func IsNUMA() bool {
|
||||
if runtime.GOOS != "linux" {
|
||||
// numa support in llama.cpp is linux only
|
||||
return false
|
||||
}
|
||||
ids := map[string]interface{}{}
|
||||
packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
|
||||
for _, packageId := range packageIds {
|
||||
id, err := os.ReadFile(packageId)
|
||||
if err == nil {
|
||||
ids[strings.TrimSpace(string(id))] = struct{}{}
|
||||
}
|
||||
}
|
||||
return len(ids) > 1
|
||||
}
|
||||
|
||||
80
gpu/gpu.go
80
gpu/gpu.go
@@ -7,9 +7,9 @@ package gpu
|
||||
#cgo windows LDFLAGS: -lpthread
|
||||
|
||||
#include "gpu_info.h"
|
||||
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
@@ -70,7 +70,6 @@ var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
||||
|
||||
// Note: gpuMutex must already be held
|
||||
func initCudaHandles() *cudaHandles {
|
||||
|
||||
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
||||
|
||||
cHandles := &cudaHandles{}
|
||||
@@ -211,14 +210,16 @@ func GetGPUInfo() GpuInfoList {
|
||||
if err != nil {
|
||||
slog.Warn("error looking up system memory", "error", err)
|
||||
}
|
||||
cpus = []CPUInfo{CPUInfo{
|
||||
GpuInfo: GpuInfo{
|
||||
memInfo: mem,
|
||||
Library: "cpu",
|
||||
Variant: cpuCapability,
|
||||
ID: "0",
|
||||
cpus = []CPUInfo{
|
||||
{
|
||||
GpuInfo: GpuInfo{
|
||||
memInfo: mem,
|
||||
Library: "cpu",
|
||||
Variant: cpuCapability,
|
||||
ID: "0",
|
||||
},
|
||||
},
|
||||
}}
|
||||
}
|
||||
|
||||
// Fallback to CPU mode if we're lacking required vector extensions on x86
|
||||
if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
|
||||
@@ -304,38 +305,41 @@ func GetGPUInfo() GpuInfoList {
|
||||
// Intel
|
||||
if envconfig.IntelGPU() {
|
||||
oHandles = initOneAPIHandles()
|
||||
// On windows we bundle the oneapi library one level above the runner dir
|
||||
depPath = ""
|
||||
if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" {
|
||||
depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "oneapi")
|
||||
}
|
||||
if oHandles != nil && oHandles.oneapi != nil {
|
||||
|
||||
for d := range oHandles.oneapi.num_drivers {
|
||||
if oHandles.oneapi == nil {
|
||||
// shouldn't happen
|
||||
slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
|
||||
continue
|
||||
// On windows we bundle the oneapi library one level above the runner dir
|
||||
depPath = ""
|
||||
if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" {
|
||||
depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "oneapi")
|
||||
}
|
||||
devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
|
||||
for i := range devCount {
|
||||
gpuInfo := OneapiGPUInfo{
|
||||
GpuInfo: GpuInfo{
|
||||
Library: "oneapi",
|
||||
},
|
||||
driverIndex: int(d),
|
||||
gpuIndex: int(i),
|
||||
|
||||
for d := range oHandles.oneapi.num_drivers {
|
||||
if oHandles.oneapi == nil {
|
||||
// shouldn't happen
|
||||
slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
|
||||
continue
|
||||
}
|
||||
devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
|
||||
for i := range devCount {
|
||||
gpuInfo := OneapiGPUInfo{
|
||||
GpuInfo: GpuInfo{
|
||||
Library: "oneapi",
|
||||
},
|
||||
driverIndex: int(d),
|
||||
gpuIndex: int(i),
|
||||
}
|
||||
// TODO - split bootstrapping from updating free memory
|
||||
C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
|
||||
// TODO - convert this to MinimumMemory based on testing...
|
||||
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
|
||||
memInfo.free = C.uint64_t(totalFreeMem)
|
||||
gpuInfo.TotalMemory = uint64(memInfo.total)
|
||||
gpuInfo.FreeMemory = uint64(memInfo.free)
|
||||
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||
gpuInfo.DependencyPath = depPath
|
||||
oneapiGPUs = append(oneapiGPUs, gpuInfo)
|
||||
}
|
||||
// TODO - split bootstrapping from updating free memory
|
||||
C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
|
||||
// TODO - convert this to MinimumMemory based on testing...
|
||||
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
|
||||
memInfo.free = C.uint64_t(totalFreeMem)
|
||||
gpuInfo.TotalMemory = uint64(memInfo.total)
|
||||
gpuInfo.FreeMemory = uint64(memInfo.free)
|
||||
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||
gpuInfo.DependencyPath = depPath
|
||||
oneapiGPUs = append(oneapiGPUs, gpuInfo)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ package gpu
|
||||
#include "gpu_info_darwin.h"
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
|
||||
|
||||
@@ -67,4 +67,4 @@ void cpu_check_ram(mem_info_t *resp);
|
||||
#include "gpu_info_oneapi.h"
|
||||
|
||||
#endif // __GPU_INFO_H__
|
||||
#endif // __APPLE__
|
||||
#endif // __APPLE__
|
||||
|
||||
@@ -43,10 +43,12 @@ var OneapiGlobs = []string{
|
||||
"/usr/lib*/libze_intel_gpu.so*",
|
||||
}
|
||||
|
||||
var CudartMgmtName = "libcudart.so*"
|
||||
var NvcudaMgmtName = "libcuda.so*"
|
||||
var NvmlMgmtName = "" // not currently wired on linux
|
||||
var OneapiMgmtName = "libze_intel_gpu.so"
|
||||
var (
|
||||
CudartMgmtName = "libcudart.so*"
|
||||
NvcudaMgmtName = "libcuda.so*"
|
||||
NvmlMgmtName = "" // not currently wired on linux
|
||||
OneapiMgmtName = "libze_intel_gpu.so"
|
||||
)
|
||||
|
||||
func GetCPUMem() (memInfo, error) {
|
||||
var mem memInfo
|
||||
|
||||
@@ -40,10 +40,12 @@ var OneapiGlobs = []string{
|
||||
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
|
||||
}
|
||||
|
||||
var CudartMgmtName = "cudart64_*.dll"
|
||||
var NvcudaMgmtName = "nvcuda.dll"
|
||||
var NvmlMgmtName = "nvml.dll"
|
||||
var OneapiMgmtName = "ze_intel_gpu64.dll"
|
||||
var (
|
||||
CudartMgmtName = "cudart64_*.dll"
|
||||
NvcudaMgmtName = "nvcuda.dll"
|
||||
NvmlMgmtName = "nvml.dll"
|
||||
OneapiMgmtName = "ze_intel_gpu64.dll"
|
||||
)
|
||||
|
||||
func GetCPUMem() (memInfo, error) {
|
||||
memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
|
||||
|
||||
@@ -5,6 +5,7 @@ package integration
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"os"
|
||||
"strconv"
|
||||
"sync"
|
||||
"testing"
|
||||
@@ -13,7 +14,6 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/format"
|
||||
)
|
||||
|
||||
@@ -41,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) {
|
||||
},
|
||||
}
|
||||
resp = [2][]string{
|
||||
[]string{"sunlight"},
|
||||
[]string{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||
{"sunlight"},
|
||||
{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||
}
|
||||
)
|
||||
var wg sync.WaitGroup
|
||||
@@ -71,12 +71,11 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
|
||||
reqLimit := len(req)
|
||||
iterLimit := 5
|
||||
|
||||
vram := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM
|
||||
if vram != "" {
|
||||
max, err := strconv.ParseUint(vram, 10, 64)
|
||||
if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" {
|
||||
maxVram, err := strconv.ParseUint(s, 10, 64)
|
||||
require.NoError(t, err)
|
||||
// Don't hammer on small VRAM cards...
|
||||
if max < 4*1024*1024*1024 {
|
||||
if maxVram < 4*format.GibiByte {
|
||||
reqLimit = min(reqLimit, 2)
|
||||
iterLimit = 2
|
||||
}
|
||||
@@ -233,12 +232,12 @@ func TestMultiModelStress(t *testing.T) {
|
||||
consumed := uint64(256 * format.MebiByte) // Assume some baseline usage
|
||||
for i := 0; i < len(req); i++ {
|
||||
// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
|
||||
if i > 1 && consumed > vram {
|
||||
slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed))
|
||||
if i > 1 && consumed > maxVram {
|
||||
slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
|
||||
break
|
||||
}
|
||||
consumed += chosenModels[i].size
|
||||
slog.Info("target vram", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed))
|
||||
slog.Info("target vram", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
|
||||
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
|
||||
@@ -35,8 +35,8 @@ var (
|
||||
},
|
||||
}
|
||||
resp = [2][]string{
|
||||
[]string{"sunlight"},
|
||||
[]string{"england", "english", "massachusetts", "pilgrims"},
|
||||
{"sunlight"},
|
||||
{"england", "english", "massachusetts", "pilgrims"},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ func TestMaxQueue(t *testing.T) {
|
||||
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
|
||||
threadCount := 32
|
||||
if maxQueue := envconfig.MaxQueue(); maxQueue != 0 {
|
||||
threadCount = maxQueue
|
||||
threadCount = int(maxQueue)
|
||||
} else {
|
||||
t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
|
||||
}
|
||||
|
||||
@@ -162,7 +162,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
|
||||
fn := func(resp api.ProgressResponse) error {
|
||||
// fmt.Print(".")
|
||||
if !stallTimer.Reset(stallDuration) {
|
||||
return fmt.Errorf("stall was detected, aborting status reporting")
|
||||
return errors.New("stall was detected, aborting status reporting")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -180,7 +180,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
|
||||
|
||||
select {
|
||||
case <-stallTimer.C:
|
||||
return fmt.Errorf("download stalled")
|
||||
return errors.New("download stalled")
|
||||
case <-done:
|
||||
return pullError
|
||||
}
|
||||
@@ -243,7 +243,7 @@ func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq ap
|
||||
// fmt.Print(".")
|
||||
buf.Write([]byte(response.Response))
|
||||
if !stallTimer.Reset(streamTimeout) {
|
||||
return fmt.Errorf("stall was detected while streaming response, aborting")
|
||||
return errors.New("stall was detected while streaming response, aborting")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
|
||||
},
|
||||
},
|
||||
[][]string{
|
||||
[]string{"sunlight"},
|
||||
[]string{"soil", "organic", "earth", "black", "tan"},
|
||||
[]string{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||
[]string{"fourth", "july", "declaration", "independence"},
|
||||
[]string{"nitrogen", "oxygen", "carbon", "dioxide"},
|
||||
{"sunlight"},
|
||||
{"soil", "organic", "earth", "black", "tan"},
|
||||
{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||
{"fourth", "july", "declaration", "independence"},
|
||||
{"nitrogen", "oxygen", "carbon", "dioxide"},
|
||||
}
|
||||
}
|
||||
|
||||
24
llm/ext_server/CMakeLists.txt
vendored
24
llm/ext_server/CMakeLists.txt
vendored
@@ -1,13 +1,13 @@
|
||||
set(TARGET ollama_llama_server)
|
||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_compile_definitions(${TARGET} PRIVATE
|
||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
||||
)
|
||||
target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT})
|
||||
if (WIN32)
|
||||
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
||||
endif()
|
||||
set(TARGET ollama_llama_server)
|
||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_compile_definitions(${TARGET} PRIVATE
|
||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
||||
)
|
||||
target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT})
|
||||
if (WIN32)
|
||||
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
||||
endif()
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
307
llm/ext_server/server.cpp
vendored
307
llm/ext_server/server.cpp
vendored
@@ -44,6 +44,7 @@
|
||||
#include <errhandlingapi.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
@@ -402,7 +403,9 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
|
||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||
auto init_result = llama_init_from_gpt_params(params);
|
||||
model = init_result.model;
|
||||
ctx = init_result.context;
|
||||
if (model == nullptr)
|
||||
{
|
||||
LOG_ERROR("unable to load model", {{"model", params.model}});
|
||||
@@ -1037,6 +1040,7 @@ struct llama_server_context
|
||||
img.request_encode_image = false;
|
||||
}
|
||||
|
||||
LOG_TEE("slot has images: %d\n", slot.images.size());
|
||||
return slot.images.size() > 0;
|
||||
}
|
||||
|
||||
@@ -1221,7 +1225,6 @@ struct llama_server_context
|
||||
res.result_json = json
|
||||
{
|
||||
{"embedding", std::vector<float>(embd, embd + n_embd)},
|
||||
{"timings", slot.get_formated_timings()},
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1269,6 +1272,150 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
|
||||
/* bool process_images_paligemma(server_slot &slot, int n_batch)
|
||||
{
|
||||
// set_off_embeds(ctx);
|
||||
int n_past = 0;
|
||||
int image_idx = 0;
|
||||
slot_image &img = slot.images[image_idx];
|
||||
|
||||
// rescale image embeddings
|
||||
float *data = img.image_embedding;
|
||||
for (int i = 0; i < 2048 * 256; i++)
|
||||
{
|
||||
data[i] = data[i] / sqrt(2048);
|
||||
}
|
||||
|
||||
if (ctx)
|
||||
{
|
||||
// set_image_embeds(ctx, data);
|
||||
// print_embeds(ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("ctx is null");
|
||||
}
|
||||
|
||||
// generate user_prompt -> this should contain image tokens prepended and a new line appended:
|
||||
// batch.n_tokens += (int)slot.images.size() * llama_n_embd(model);
|
||||
std::vector<llama_token> tokens;
|
||||
std::string prompt = "caption es";
|
||||
std::vector<llama_token> text = ::llama_tokenize(ctx, prompt, false, true);
|
||||
|
||||
for (int i = 0; i < (int)slot.images.size() * 256; i++)
|
||||
{
|
||||
tokens.push_back(257152);
|
||||
}
|
||||
|
||||
tokens.push_back(2);
|
||||
|
||||
for (int i = 0; i < text.size(); i++)
|
||||
{
|
||||
// printf("token [%d]: %d\n", text[i]);
|
||||
tokens.push_back(text[i]);
|
||||
}
|
||||
|
||||
tokens.push_back(108);
|
||||
|
||||
batch.n_tokens = (int)slot.images.size() * 256 + 2 + text.size();
|
||||
printf("\nbatch.n_tokens %d\n", batch.n_tokens);
|
||||
|
||||
for (int i = 0; i < batch.n_tokens; i++)
|
||||
{
|
||||
printf("token %d: %d\n", i, tokens[i]);
|
||||
}
|
||||
|
||||
for (int i = 0; i < batch.n_tokens; i += n_batch)
|
||||
{
|
||||
printf("calling decode\n");
|
||||
int n_eval = (int)batch.n_tokens - i;
|
||||
if (n_eval > n_batch)
|
||||
{
|
||||
n_eval = n_batch;
|
||||
}
|
||||
printf("n_eval: %d, n_past: %d, slot.n_past: %d\n", n_eval, n_past, slot.n_past);
|
||||
llama_set_causal_attn(ctx, false);
|
||||
|
||||
printf("DEBUGGING DECODE BATCH:\n");
|
||||
for (int j = 0; j < n_eval; j++)
|
||||
{
|
||||
printf("token[%d]: %d\n", j, tokens[j]);
|
||||
}
|
||||
|
||||
llama_batch my_batch = llama_batch_get_one(&tokens[i], n_eval, 0, 0);
|
||||
printf("%s: viewing batch: n_tokens = %d, batch.token %d, batch.pos = %d, batch.logits = %d\n", __func__, n_eval, batch.token + i, batch.pos + i, batch.logits + i);
|
||||
for (int j = 0; j < n_eval; j++)
|
||||
{
|
||||
// printf("new batch view token [%d]: %d\n", j, (batch.token[i + j]));
|
||||
}
|
||||
|
||||
printf("%s: viewing batch: n_tokens = %d, batch.token %d, batch.pos = %d, batch.logits = %d\n", __func__, n_eval, my_batch.token + i, my_batch.pos + i, my_batch.logits + i);
|
||||
for (int j = 0; j < n_eval; j++)
|
||||
{
|
||||
// printf("new batch view token [%d]: %d\n", j, (my_batch.token[i + j]));
|
||||
}
|
||||
|
||||
printf("n_eval: %d, llama_pos: %d, llama_seq_id: %d\n", n_eval, 0, 0);
|
||||
if (llama_decode(ctx, llama_batch_get_one(&tokens[i], n_eval, 0, 0)))
|
||||
{
|
||||
printf("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, batch.n_tokens, n_batch, n_past);
|
||||
return false;
|
||||
}
|
||||
llama_set_causal_attn(ctx, true);
|
||||
slot.n_past += n_eval;
|
||||
}
|
||||
printf("done processing images paligemma\n");
|
||||
// llama_batch_clear(batch);
|
||||
return true;
|
||||
} */
|
||||
|
||||
bool prepare_pali(server_slot &slot, int n_batch)
|
||||
{
|
||||
// set_off_embeds(ctx);
|
||||
int n_past = 0;
|
||||
int image_idx = 0;
|
||||
slot_image &img = slot.images[image_idx];
|
||||
|
||||
// rescale image embeddings
|
||||
float *data = img.image_embedding;
|
||||
for (int i = 0; i < 2048 * 256; i++)
|
||||
{
|
||||
data[i] = data[i] / sqrt(2048);
|
||||
}
|
||||
set_image_embeds(ctx, data);
|
||||
|
||||
// generate user_prompt -> this should contain image tokens prepended and a new line appended:
|
||||
// batch.n_tokens += (int)slot.images.size() * llama_n_embd(model);
|
||||
std::vector<llama_token> tokens;
|
||||
std::string prompt = "How much ketchup is in this image?";
|
||||
std::vector<llama_token> text = ::llama_tokenize(ctx, prompt, false, true);
|
||||
|
||||
for (int i = 0; i < (int)slot.images.size() * 256; i++)
|
||||
{
|
||||
tokens.push_back(257152);
|
||||
}
|
||||
|
||||
tokens.push_back(2);
|
||||
|
||||
for (int i = 0; i < text.size(); i++)
|
||||
{
|
||||
// printf("token [%d]: %d\n", text[i]);
|
||||
tokens.push_back(text[i]);
|
||||
}
|
||||
|
||||
tokens.push_back(108);
|
||||
|
||||
printf("currently, system_tokens.size %d\n", system_tokens.size());
|
||||
for (int i = 0; i < (int)tokens.size(); ++i)
|
||||
{
|
||||
llama_batch_add(batch, tokens[i], system_tokens.size() + slot.n_past, {slot.id}, true);
|
||||
slot.n_past += 1;
|
||||
}
|
||||
// llama_set_causal_attn(ctx, false);
|
||||
printf("slot.n_past == %d\n", slot.n_past);
|
||||
return true;
|
||||
}
|
||||
|
||||
// for multiple images processing
|
||||
bool ingest_images(server_slot &slot, int n_batch)
|
||||
{
|
||||
@@ -1549,6 +1696,15 @@ struct llama_server_context
|
||||
}
|
||||
|
||||
bool update_slots() {
|
||||
/* gpt_params params;
|
||||
params.model = "/Users/joshyan/Projects/PaliGemma/paligemma-3b-pt-224-text-model-f16.gguf";
|
||||
llama_model_params model_params = llama_model_params_from_gpt_params(params);
|
||||
|
||||
llama_model *model = llama_load_model_from_file(params.model.c_str(), model_params);
|
||||
llama_context_params ctx_params = llama_context_params_from_gpt_params(params);
|
||||
llama_context *ctx_llama = llama_new_context_with_model(model, ctx_params);
|
||||
ctx = ctx_llama; */
|
||||
|
||||
if (system_need_update)
|
||||
{
|
||||
LOG_DEBUG("updating system prompt", {});
|
||||
@@ -1809,9 +1965,15 @@ struct llama_server_context
|
||||
const bool has_images = process_images(slot);
|
||||
|
||||
// process the prefix of first image
|
||||
std::vector<llama_token> prefix_tokens = has_images ? tokenize(slot.images[0].prefix_prompt, add_bos_token) : prompt_tokens;
|
||||
std::vector<llama_token> prefix_tokens = has_images ? tokenize(slot.images[0].prefix_prompt, false) : prompt_tokens;
|
||||
printf("\nprinting prefix tokens\n");
|
||||
for (int i = 0; i < prefix_tokens.size(); i++)
|
||||
{
|
||||
printf("prefix token[%d]: %d\n", i, prefix_tokens[i]);
|
||||
}
|
||||
|
||||
int32_t slot_npast = slot.n_past_se > 0 ? slot.n_past_se : slot.n_past;
|
||||
printf("slot_npast = %d\n", slot_npast);
|
||||
|
||||
int32_t ga_i = slot.ga_i;
|
||||
int32_t ga_n = slot.ga_n;
|
||||
@@ -1831,18 +1993,25 @@ struct llama_server_context
|
||||
slot_npast++;
|
||||
}
|
||||
|
||||
if (has_images && !ingest_images(slot, n_batch))
|
||||
LOG_ERROR("checking has images", {
|
||||
{"has images", has_images},
|
||||
{"task_id", slot.task_id},
|
||||
});
|
||||
// if (has_images && !ingest_images(slot, n_batch))
|
||||
if (has_images && !prepare_pali(slot, n_batch))
|
||||
{
|
||||
LOG_ERROR("failed processing images", {
|
||||
{"slot_id", slot.id},
|
||||
{"task_id", slot.task_id},
|
||||
});
|
||||
{"slot_id", slot.id},
|
||||
{"task_id", slot.task_id},
|
||||
});
|
||||
// FIXME @phymbert: to be properly tested
|
||||
// early returning without changing the slot state will block the slot for ever
|
||||
// no one at the moment is checking the return value
|
||||
return false;
|
||||
}
|
||||
print_causal(ctx);
|
||||
|
||||
printf("batch.n_tokens here for setting logits: %d\n", batch.n_tokens);
|
||||
// extract the logits only for the last token
|
||||
if (batch.n_tokens > 0)
|
||||
{
|
||||
@@ -1857,18 +2026,58 @@ struct llama_server_context
|
||||
|
||||
if (batch.n_tokens == 0)
|
||||
{
|
||||
/* completion_token_output result;
|
||||
const llama_token id = llama_sampling_sample(slots[0].ctx_sampling, ctx, NULL, slots[0].i_batch);
|
||||
|
||||
llama_sampling_accept(slots[0].ctx_sampling, ctx, id, true);
|
||||
|
||||
slots[0].n_decoded += 1;
|
||||
if (slots[0].n_decoded == 1)
|
||||
{
|
||||
slots[0].t_start_genereration = ggml_time_us();
|
||||
slots[0].t_prompt_processing = (slots[0].t_start_genereration - slots[0].t_start_process_prompt) / 1e3;
|
||||
metrics.on_prompt_eval(slots[0]);
|
||||
}
|
||||
|
||||
llama_token_data_array cur_p = {slots[0].ctx_sampling->cur.data(), slots[0].ctx_sampling->cur.size(), false};
|
||||
result.tok = id;
|
||||
|
||||
const int32_t n_probs = slots[0].sparams.n_probs;
|
||||
if (slots[0].sparams.temp <= 0 && n_probs > 0)
|
||||
{
|
||||
// for llama_sample_token_greedy we need to sort candidates
|
||||
llama_sample_softmax(ctx, &cur_p);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < std::min(cur_p.size, (size_t)n_probs); ++i)
|
||||
{
|
||||
result.probs.push_back({cur_p.data[i].id, cur_p.data[i].p});
|
||||
}
|
||||
|
||||
if (!process_token(result, slots[0]))
|
||||
{
|
||||
slots[0].release();
|
||||
slots[0].print_timings();
|
||||
send_final_response(slots[0]);
|
||||
metrics.on_prediction(slots[0]);
|
||||
}
|
||||
|
||||
slots[0].i_batch = -1; */
|
||||
all_slots_are_idle = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
printf("batch.n_tokens = %d\n", batch.n_tokens);
|
||||
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch)
|
||||
{
|
||||
printf("i = %d\n", i);
|
||||
const int32_t n_tokens = std::min(n_batch, batch.n_tokens - i);
|
||||
|
||||
for (auto & slot : slots)
|
||||
{
|
||||
if (slot.ga_n != 1)
|
||||
{
|
||||
printf("slot.ga_n = %d\n", slot.ga_n);
|
||||
// context extension via Self-Extend
|
||||
while (slot.n_past_se >= slot.ga_i + slot.ga_w)
|
||||
{
|
||||
@@ -1895,20 +2104,30 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
|
||||
printf("batching\n");
|
||||
|
||||
llama_batch batch_view =
|
||||
{
|
||||
n_tokens,
|
||||
batch.token + i,
|
||||
nullptr,
|
||||
batch.pos + i,
|
||||
batch.n_seq_id + i,
|
||||
batch.seq_id + i,
|
||||
batch.logits + i,
|
||||
0, 0, 0, // unused
|
||||
};
|
||||
// llama_batch batch_view = prepare_pali(slots[0], n_batch);
|
||||
printf("%s: viewing batch: n_tokens = %d, batch.token %d, batch.pos = %d, batch.logits = %d\n", __func__, n_tokens, batch.token + i, batch.pos + i, batch.logits + i);
|
||||
for (int j = 0; j < n_tokens; j++)
|
||||
{
|
||||
n_tokens,
|
||||
batch.token + i,
|
||||
nullptr,
|
||||
batch.pos + i,
|
||||
batch.n_seq_id + i,
|
||||
batch.seq_id + i,
|
||||
batch.logits + i,
|
||||
0, 0, 0, // unused
|
||||
};
|
||||
|
||||
printf("new batch view token [%d]: %d\n", j, (batch.token[i + j]));
|
||||
}
|
||||
printf("current state of causal attn: ");
|
||||
print_causal(ctx);
|
||||
const int ret = llama_decode(ctx, batch_view);
|
||||
|
||||
llama_set_causal_attn(ctx, true);
|
||||
print_causal(ctx);
|
||||
if (ret != 0)
|
||||
{
|
||||
if (n_batch == 1 || ret < 0)
|
||||
@@ -1928,6 +2147,7 @@ struct llama_server_context
|
||||
|
||||
for (auto & slot : slots)
|
||||
{
|
||||
printf("there are currently n slots\n");
|
||||
if (slot.i_batch < (int) i || slot.i_batch >= (int) (i + n_tokens))
|
||||
{
|
||||
continue;
|
||||
@@ -1936,6 +2156,7 @@ struct llama_server_context
|
||||
// prompt evaluated for embedding
|
||||
if (slot.embedding)
|
||||
{
|
||||
printf("slot.embedding is true\n");
|
||||
send_embedding(slot, batch_view);
|
||||
slot.release();
|
||||
slot.i_batch = -1;
|
||||
@@ -1943,8 +2164,10 @@ struct llama_server_context
|
||||
}
|
||||
|
||||
completion_token_output result;
|
||||
printf("sampling for the ith token: %d\n", slot.i_batch - i);
|
||||
// batch.logits[263] = true;
|
||||
const llama_token id = llama_sampling_sample(slot.ctx_sampling, ctx, NULL, slot.i_batch - i);
|
||||
|
||||
printf("got back this token: %d\n", id);
|
||||
llama_sampling_accept(slot.ctx_sampling, ctx, id, true);
|
||||
|
||||
slot.n_decoded += 1;
|
||||
@@ -2420,7 +2643,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.lora_adapter.emplace_back(argv[i], 1.0f);
|
||||
params.lora_adapters.push_back({
|
||||
std::string(argv[i]),
|
||||
1.0,
|
||||
});
|
||||
params.use_mmap = false;
|
||||
}
|
||||
else if (arg == "--lora-scaled")
|
||||
@@ -2436,7 +2662,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
|
||||
params.lora_adapters.push_back({
|
||||
lora_adapter,
|
||||
std::stof(argv[i])
|
||||
});
|
||||
params.use_mmap = false;
|
||||
}
|
||||
else if (arg == "-v" || arg == "--verbose")
|
||||
@@ -3184,37 +3413,17 @@ int main(int argc, char **argv) {
|
||||
prompt = "";
|
||||
}
|
||||
|
||||
if (prompt.size() == 1) {
|
||||
prompt = prompt[0];
|
||||
}
|
||||
|
||||
// create and queue the task
|
||||
json responses;
|
||||
{
|
||||
const int id_task = llama.queue_tasks.get_new_id();
|
||||
llama.queue_results.add_waiting_task_id(id_task);
|
||||
llama.request_completion(id_task, {{"prompt", prompt}}, true, -1);
|
||||
const int task_id = llama.queue_tasks.get_new_id();
|
||||
llama.queue_results.add_waiting_task_id(task_id);
|
||||
llama.request_completion(task_id, {{"prompt", prompt}}, true, -1);
|
||||
|
||||
// get the result
|
||||
task_result result = llama.queue_results.recv(id_task);
|
||||
llama.queue_results.remove_waiting_task_id(id_task);
|
||||
if (result.error) {
|
||||
return res.set_content(result.result_json.dump(), "application/json; charset=utf-8");
|
||||
}
|
||||
// get the result
|
||||
task_result result = llama.queue_results.recv(task_id);
|
||||
llama.queue_results.remove_waiting_task_id(task_id);
|
||||
|
||||
responses = result.result_json.value("results", std::vector<json>{result.result_json});
|
||||
json embeddings = json::array();
|
||||
|
||||
int prompt_n = 0;
|
||||
for (auto & elem : responses) {
|
||||
embeddings.push_back(elem.at("embedding"));
|
||||
prompt_n += elem.at("timings").at("prompt_n").get<int>();
|
||||
}
|
||||
|
||||
// send the result
|
||||
json embedding_res = json{{"embedding", embeddings}, {"prompt_n", prompt_n}};
|
||||
return res.set_content(embedding_res.dump(), "application/json; charset=utf-8");
|
||||
}
|
||||
// send the result
|
||||
return res.set_content(result.result_json.dump(), "application/json; charset=utf-8");
|
||||
});
|
||||
|
||||
// GG: if I put the main loop inside a thread, it crashes on the first request when build in Debug!?
|
||||
|
||||
@@ -9,8 +9,8 @@ set -o pipefail
|
||||
echo "Starting darwin generate script"
|
||||
source $(dirname $0)/gen_common.sh
|
||||
init_vars
|
||||
git_module_setup
|
||||
apply_patches
|
||||
#git_module_setup
|
||||
#apply_patches
|
||||
|
||||
sign() {
|
||||
if [ -n "$APPLE_IDENTITY" ]; then
|
||||
@@ -97,5 +97,5 @@ case "${GOARCH}" in
|
||||
;;
|
||||
esac
|
||||
|
||||
cleanup
|
||||
#cleanup
|
||||
echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
|
||||
|
||||
14
llm/ggla.go
14
llm/ggla.go
@@ -36,6 +36,8 @@ type ggla struct {
|
||||
|
||||
kv KV
|
||||
tensors []*Tensor
|
||||
|
||||
tensorOffset uint64
|
||||
}
|
||||
|
||||
func newGGLA(container *containerGGLA) *ggla {
|
||||
@@ -50,7 +52,10 @@ func (llm *ggla) KV() KV {
|
||||
}
|
||||
|
||||
func (llm *ggla) Tensors() Tensors {
|
||||
return llm.tensors
|
||||
return Tensors{
|
||||
Items: llm.tensors,
|
||||
Offset: llm.tensorOffset,
|
||||
}
|
||||
}
|
||||
|
||||
func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {
|
||||
@@ -66,6 +71,13 @@ func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {
|
||||
}
|
||||
llm.kv["alpha"] = alpha
|
||||
|
||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
llm.tensorOffset = uint64(offset)
|
||||
|
||||
for {
|
||||
var dims uint32
|
||||
if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
|
||||
|
||||
15
llm/ggml.go
15
llm/ggml.go
@@ -112,11 +112,14 @@ func (kv KV) ChatTemplate() string {
|
||||
return s
|
||||
}
|
||||
|
||||
type Tensors []*Tensor
|
||||
type Tensors struct {
|
||||
Items []*Tensor
|
||||
Offset uint64
|
||||
}
|
||||
|
||||
func (ts Tensors) Layers() map[string]Layer {
|
||||
layers := make(map[string]Layer)
|
||||
for _, t := range ts {
|
||||
for _, t := range ts.Items {
|
||||
parts := strings.Split(t.Name, ".")
|
||||
if parts[0] == "blk" {
|
||||
// join first and second part, e.g. blk.%d
|
||||
@@ -154,6 +157,14 @@ type Tensor struct {
|
||||
io.WriterTo `json:"-"`
|
||||
}
|
||||
|
||||
func (t Tensor) block() (n int) {
|
||||
if _, err := fmt.Sscanf(t.Name, "blk.%d.", &n); err != nil {
|
||||
return -1
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (t Tensor) blockSize() uint64 {
|
||||
switch t.Kind {
|
||||
case 0, 1, 24, 25, 26, 27, 28, 30: // F32, F16, I8, I16, I32, I64, F64, BF16
|
||||
|
||||
344
llm/gguf.go
344
llm/gguf.go
@@ -2,11 +2,16 @@ package llm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"cmp"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/exp/maps"
|
||||
)
|
||||
|
||||
type containerGGUF struct {
|
||||
@@ -88,7 +93,8 @@ type gguf struct {
|
||||
kv KV
|
||||
tensors []*Tensor
|
||||
|
||||
parameters uint64
|
||||
parameters uint64
|
||||
tensorOffset uint64
|
||||
|
||||
scratch [16 << 10]byte
|
||||
}
|
||||
@@ -100,16 +106,15 @@ func newGGUF(container *containerGGUF) *gguf {
|
||||
}
|
||||
}
|
||||
|
||||
func NewGGUFV3(bo binary.ByteOrder) *gguf {
|
||||
return newGGUF(&containerGGUF{ByteOrder: bo, Version: 3})
|
||||
}
|
||||
|
||||
func (llm *gguf) KV() KV {
|
||||
return llm.kv
|
||||
}
|
||||
|
||||
func (llm *gguf) Tensors() Tensors {
|
||||
return llm.tensors
|
||||
return Tensors{
|
||||
Items: llm.tensors,
|
||||
Offset: llm.tensorOffset,
|
||||
}
|
||||
}
|
||||
|
||||
func (llm *gguf) numTensor() uint64 {
|
||||
@@ -199,7 +204,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||
return fmt.Errorf("failed to read tensor dimensions: %w", err)
|
||||
}
|
||||
|
||||
shape := [4]uint64{1, 1, 1, 1}
|
||||
shape := make([]uint64, dims)
|
||||
for i := 0; uint32(i) < dims; i++ {
|
||||
shape[i], err = readGGUF[uint64](llm, rs)
|
||||
if err != nil {
|
||||
@@ -236,13 +241,21 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||
alignment = 32
|
||||
}
|
||||
|
||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
padding := ggufPadding(offset, int64(alignment))
|
||||
llm.tensorOffset = uint64(offset + padding)
|
||||
|
||||
for _, tensor := range llm.tensors {
|
||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get current offset: %w", err)
|
||||
}
|
||||
|
||||
padding := llm.padding(offset, int64(alignment))
|
||||
padding := ggufPadding(offset, int64(alignment))
|
||||
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
|
||||
return fmt.Errorf("failed to seek to init padding: %w", err)
|
||||
}
|
||||
@@ -261,12 +274,12 @@ func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
|
||||
return t, err
|
||||
}
|
||||
|
||||
func writeGGUF[V any](llm *gguf, w io.Writer, t uint32, v V) error {
|
||||
if err := binary.Write(w, llm.ByteOrder, t); err != nil {
|
||||
func writeGGUF[V any](w io.Writer, t uint32, v V) error {
|
||||
if err := binary.Write(w, binary.LittleEndian, t); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return binary.Write(w, llm.ByteOrder, v)
|
||||
return binary.Write(w, binary.LittleEndian, v)
|
||||
}
|
||||
|
||||
func readGGUFV1String(llm *gguf, r io.Reader) (string, error) {
|
||||
@@ -330,12 +343,12 @@ func readGGUFString(llm *gguf, r io.Reader) (string, error) {
|
||||
return string(buf), nil
|
||||
}
|
||||
|
||||
func writeGGUFString(llm *gguf, w io.Writer, s string) error {
|
||||
if err := binary.Write(w, llm.ByteOrder, ggufTypeString); err != nil {
|
||||
func writeGGUFString(w io.Writer, s string) error {
|
||||
if err := binary.Write(w, binary.LittleEndian, ggufTypeString); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
|
||||
if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -476,216 +489,71 @@ func readGGUFArray(llm *gguf, r io.Reader) (*array, error) {
|
||||
return a, nil
|
||||
}
|
||||
|
||||
func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error {
|
||||
if err := binary.Write(w, llm.ByteOrder, ggufTypeArray); err != nil {
|
||||
// writeGGUFArray writes a slice s of type E to the write with a gguf type of t
|
||||
func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
|
||||
if err := binary.Write(w, binary.LittleEndian, ggufTypeArray); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(w, llm.ByteOrder, t); err != nil {
|
||||
if err := binary.Write(w, binary.LittleEndian, t); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
|
||||
if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, e := range s {
|
||||
if err := binary.Write(w, llm.ByteOrder, e); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
return binary.Write(w, binary.LittleEndian, s)
|
||||
}
|
||||
|
||||
var ggufKVOrder = map[string][]string{
|
||||
"llama": {
|
||||
"general.architecture",
|
||||
"general.name",
|
||||
"llama.vocab_size",
|
||||
"llama.context_length",
|
||||
"llama.embedding_length",
|
||||
"llama.block_count",
|
||||
"llama.feed_forward_length",
|
||||
"llama.attention.head_count",
|
||||
"llama.attention.head_count_kv",
|
||||
"llama.attention.layer_norm_rms_epsilon",
|
||||
"llama.rope.freq_base",
|
||||
"llama.rope.dimension_count",
|
||||
"llama.expert_count",
|
||||
"llama.expert_used_count",
|
||||
"gemma.context_length",
|
||||
"gemma.embedding_length",
|
||||
"gemma.block_count",
|
||||
"gemma.feed_forward_length",
|
||||
"gemma.attention.head_count",
|
||||
"gemma.attention.head_count_kv",
|
||||
"gemma.attention.layer_norm_rms_epsilon",
|
||||
"gemma.attention.key_length",
|
||||
"gemma.attention.value_length",
|
||||
"general.file_type",
|
||||
"tokenizer.ggml.pre",
|
||||
"tokenizer.ggml.model",
|
||||
"tokenizer.ggml.tokens",
|
||||
"tokenizer.ggml.scores",
|
||||
"tokenizer.ggml.merges",
|
||||
"tokenizer.ggml.token_type",
|
||||
"tokenizer.ggml.bos_token_id",
|
||||
"tokenizer.ggml.eos_token_id",
|
||||
"tokenizer.ggml.unknown_token_id",
|
||||
"tokenizer.ggml.padding_token_id",
|
||||
"tokenizer.ggml.add_bos_token",
|
||||
"tokenizer.ggml.add_eos_token",
|
||||
"tokenizer.chat_template",
|
||||
"bert.pooling_type",
|
||||
},
|
||||
}
|
||||
|
||||
func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
|
||||
switch llm.Version {
|
||||
case 3:
|
||||
llm.V3.NumTensor = uint64(len(tensors))
|
||||
llm.V3.NumKV = uint64(len(kv))
|
||||
default:
|
||||
return fmt.Errorf("not implemented: ggufv%d", llm.Version)
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, llm.ByteOrder, []byte("GGUF")); err != nil {
|
||||
func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
||||
if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, llm.ByteOrder, llm.Version); err != nil {
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint32(3)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, llm.ByteOrder, llm.numTensor()); err != nil {
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(ts))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, llm.ByteOrder, llm.numKV()); err != nil {
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(kv))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
kvCheck := make(map[string]bool)
|
||||
for k := range kv {
|
||||
kvCheck[k] = false
|
||||
}
|
||||
keys := maps.Keys(kv)
|
||||
slices.Sort(keys)
|
||||
|
||||
for _, k := range ggufKVOrder["llama"] {
|
||||
v, ok := kv[k]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
kvCheck[k] = true
|
||||
|
||||
if err := binary.Write(ws, llm.ByteOrder, uint64(len(k))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, llm.ByteOrder, []byte(k)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var err error
|
||||
switch v := v.(type) {
|
||||
case uint32:
|
||||
err = writeGGUF(llm, ws, ggufTypeUint32, v)
|
||||
case float32:
|
||||
err = writeGGUF(llm, ws, ggufTypeFloat32, v)
|
||||
case bool:
|
||||
err = writeGGUF(llm, ws, ggufTypeBool, v)
|
||||
case string:
|
||||
err = writeGGUFString(llm, ws, v)
|
||||
case []int32:
|
||||
err = writeGGUFArray(llm, ws, ggufTypeInt32, v)
|
||||
case []uint32:
|
||||
err = writeGGUFArray(llm, ws, ggufTypeUint32, v)
|
||||
case []float32:
|
||||
err = writeGGUFArray(llm, ws, ggufTypeFloat32, v)
|
||||
case []string:
|
||||
if err := binary.Write(ws, llm.ByteOrder, ggufTypeArray); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, llm.ByteOrder, ggufTypeString); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, llm.ByteOrder, uint64(len(v))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, e := range v {
|
||||
if err := binary.Write(ws, llm.ByteOrder, uint64(len(e))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, llm.ByteOrder, []byte(e)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("improper type for '%s'", k)
|
||||
}
|
||||
if err != nil {
|
||||
for _, key := range keys {
|
||||
if err := ggufWriteKV(ws, key, kv[key]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for k, v := range kvCheck {
|
||||
if !v {
|
||||
return fmt.Errorf("Didn't know how to write kv %s", k)
|
||||
slices.SortStableFunc(ts, func(a, b Tensor) int {
|
||||
if i, j := a.block(), b.block(); i < 0 && j > 0 {
|
||||
return 1
|
||||
} else if i > 0 && j < 0 {
|
||||
return -1
|
||||
} else {
|
||||
return cmp.Compare(i, j)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
for _, tensor := range tensors {
|
||||
if err := binary.Write(ws, llm.ByteOrder, uint64(len(tensor.Name))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, llm.ByteOrder, []byte(tensor.Name)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var dims int
|
||||
for cnt := range len(tensor.Shape) {
|
||||
if tensor.Shape[cnt] > 0 {
|
||||
dims++
|
||||
}
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, llm.ByteOrder, uint32(dims)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for i := range dims {
|
||||
if err := binary.Write(ws, llm.ByteOrder, tensor.Shape[dims-1-i]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, llm.ByteOrder, tensor.Kind); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, llm.ByteOrder, tensor.Offset); err != nil {
|
||||
var s uint64
|
||||
for _, t := range ts {
|
||||
t.Offset = s
|
||||
if err := ggufWriteTensorInfo(ws, t); err != nil {
|
||||
return err
|
||||
}
|
||||
s += t.Size()
|
||||
}
|
||||
|
||||
var alignment int64 = 32
|
||||
for _, tensor := range tensors {
|
||||
offset, err := ws.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
padding := llm.padding(offset, alignment)
|
||||
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := tensor.WriteTo(ws); err != nil {
|
||||
for _, t := range ts {
|
||||
if err := ggufWriteTensor(ws, t, alignment); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -693,6 +561,102 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (gguf) padding(offset, align int64) int64 {
|
||||
func ggufWriteKV(ws io.WriteSeeker, k string, v any) error {
|
||||
slog.Debug(k, "type", fmt.Sprintf("%T", v))
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(k))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, []byte(k)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var err error
|
||||
switch v := v.(type) {
|
||||
case uint32:
|
||||
err = writeGGUF(ws, ggufTypeUint32, v)
|
||||
case float32:
|
||||
err = writeGGUF(ws, ggufTypeFloat32, v)
|
||||
case bool:
|
||||
err = writeGGUF(ws, ggufTypeBool, v)
|
||||
case string:
|
||||
err = writeGGUFString(ws, v)
|
||||
case []int32:
|
||||
err = writeGGUFArray(ws, ggufTypeInt32, v)
|
||||
case []uint32:
|
||||
err = writeGGUFArray(ws, ggufTypeUint32, v)
|
||||
case []float32:
|
||||
err = writeGGUFArray(ws, ggufTypeFloat32, v)
|
||||
case []string:
|
||||
if err := binary.Write(ws, binary.LittleEndian, ggufTypeArray); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, ggufTypeString); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(v))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, e := range v {
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(e))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, []byte(e)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("improper type for '%s'", k)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func ggufWriteTensorInfo(ws io.WriteSeeker, t Tensor) error {
|
||||
slog.Debug(t.Name, "kind", t.Kind, "shape", t.Shape, "offset", t.Offset)
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(t.Name))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, []byte(t.Name)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, uint32(len(t.Shape))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for i := range len(t.Shape) {
|
||||
if err := binary.Write(ws, binary.LittleEndian, t.Shape[len(t.Shape)-i-1]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, t.Kind); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return binary.Write(ws, binary.LittleEndian, t.Offset)
|
||||
}
|
||||
|
||||
func ggufWriteTensor(ws io.WriteSeeker, t Tensor, alignment int64) error {
|
||||
offset, err := ws.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(offset, alignment)))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = t.WriteTo(ws)
|
||||
return err
|
||||
}
|
||||
|
||||
func ggufPadding(offset, align int64) int64 {
|
||||
return (align - offset%align) % align
|
||||
}
|
||||
|
||||
Submodule llm/llama.cpp updated: 6eeaeba126...1e6f6554aa
@@ -11,8 +11,9 @@ package llm
|
||||
// #include <stdlib.h>
|
||||
// #include "llama.h"
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"errors"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
@@ -33,7 +34,7 @@ func Quantize(infile, outfile string, ftype fileType) error {
|
||||
params.ftype = ftype.Value()
|
||||
|
||||
if rc := C.llama_model_quantize(cinfile, coutfile, ¶ms); rc != 0 {
|
||||
return fmt.Errorf("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
|
||||
return errors.New("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
@@ -2,15 +2,15 @@ package llm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/gpu"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/gpu"
|
||||
)
|
||||
|
||||
func TestEstimateGPULayers(t *testing.T) {
|
||||
@@ -20,7 +20,6 @@ func TestEstimateGPULayers(t *testing.T) {
|
||||
f, err := os.CreateTemp(t.TempDir(), modelName)
|
||||
require.NoError(t, err)
|
||||
defer f.Close()
|
||||
gguf := NewGGUFV3(binary.LittleEndian)
|
||||
inputLayerCount := 5
|
||||
|
||||
tensors := []Tensor{
|
||||
@@ -32,7 +31,7 @@ func TestEstimateGPULayers(t *testing.T) {
|
||||
{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
||||
}
|
||||
assert.Len(t, tensors, inputLayerCount+1)
|
||||
err = gguf.Encode(f, KV{
|
||||
err = WriteGGUF(f, KV{
|
||||
"general.architecture": "llama",
|
||||
"general.name": "name",
|
||||
"llama.context_length": uint32(32),
|
||||
|
||||
@@ -1,40 +1,32 @@
|
||||
diff --git a/common/common.cpp b/common/common.cpp
|
||||
index dbb724fb..c26fe6ee 100644
|
||||
index 2e8374d5..70d0afde 100644
|
||||
--- a/common/common.cpp
|
||||
+++ b/common/common.cpp
|
||||
@@ -2087,14 +2087,27 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
||||
for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
|
||||
const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]);
|
||||
float lora_scale = std::get<1>(params.lora_adapter[i]);
|
||||
+
|
||||
+ // try to load as gguf
|
||||
auto adapter = llama_lora_adapter_init(model, lora_adapter.c_str());
|
||||
if (adapter == nullptr) {
|
||||
- fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
|
||||
@@ -2110,9 +2110,21 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
||||
loaded_la.adapter = llama_lora_adapter_init(model, la.path.c_str());
|
||||
if (loaded_la.adapter == nullptr) {
|
||||
fprintf(stderr, "%s: error: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
|
||||
- llama_free(lctx);
|
||||
- llama_free_model(model);
|
||||
- return std::make_tuple(nullptr, nullptr);
|
||||
+ fprintf(stderr, "%s: error: failed to apply lora adapter, trying ggla\n", __func__);
|
||||
- return iparams;
|
||||
+
|
||||
+ // if that fails, try loading as ggla for compatibility
|
||||
+ int err = llama_model_apply_lora_from_file(model,
|
||||
+ lora_adapter.c_str(),
|
||||
+ lora_scale,
|
||||
+ la.path.c_str(),
|
||||
+ la.scale,
|
||||
+ nullptr,
|
||||
+ params.n_threads);
|
||||
+ if (err != 0) {
|
||||
+ fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
|
||||
+ llama_free(lctx);
|
||||
+ llama_free_model(model);
|
||||
+ return std::make_tuple(nullptr, nullptr);
|
||||
+ return iparams;
|
||||
+ } else {
|
||||
+ break;
|
||||
+ }
|
||||
+ } else {
|
||||
+ llama_lora_adapter_set(lctx, adapter, lora_scale);
|
||||
}
|
||||
- llama_lora_adapter_set(lctx, adapter, lora_scale);
|
||||
iparams.lora_adapters.push_back(loaded_la); // copy to list of loaded adapters
|
||||
}
|
||||
|
||||
if (params.ignore_eos) {
|
||||
diff --git a/include/llama.h b/include/llama.h
|
||||
index 93fd77ca..b0fb37a6 100644
|
||||
--- a/include/llama.h
|
||||
@@ -355,4 +347,4 @@ index 80a0dd0f..9d7b0e17 100644
|
||||
+ return 1;
|
||||
+ }
|
||||
+}
|
||||
\ No newline at end of file
|
||||
\ No newline at end of file
|
||||
@@ -1,20 +0,0 @@
|
||||
diff --git a/src/llama.cpp b/src/llama.cpp
|
||||
index a207451f..fba6b175 100644
|
||||
--- a/src/llama.cpp
|
||||
+++ b/src/llama.cpp
|
||||
@@ -4969,6 +4969,7 @@ static void llm_load_hparams(
|
||||
hparams.attn_soft_cap = true;
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
+ case 26: model.type = e_model::MODEL_2B; break;
|
||||
case 42: model.type = e_model::MODEL_9B; break;
|
||||
case 46: model.type = e_model::MODEL_27B; break;
|
||||
default: model.type = e_model::MODEL_UNKNOWN;
|
||||
@@ -11736,6 +11737,7 @@ struct llm_build_context {
|
||||
|
||||
// ref: https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e
|
||||
switch (model.type) {
|
||||
+ case e_model::MODEL_2B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
|
||||
case e_model::MODEL_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
|
||||
case e_model::MODEL_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break;
|
||||
default: GGML_ABORT("fatal error");
|
||||
311
llm/patches/12-paligemma.diff
Normal file
311
llm/patches/12-paligemma.diff
Normal file
@@ -0,0 +1,311 @@
|
||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||
index 54aa822c..45d03982 100644
|
||||
--- a/examples/llava/clip.cpp
|
||||
+++ b/examples/llava/clip.cpp
|
||||
@@ -765,9 +765,12 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||
embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings);
|
||||
embeddings = ggml_add(ctx0, embeddings, model.mm_0_b);
|
||||
|
||||
- embeddings = ggml_gelu(ctx0, embeddings);
|
||||
- embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings);
|
||||
- embeddings = ggml_add(ctx0, embeddings, model.mm_2_b);
|
||||
+ // paligemma missing second linear layer
|
||||
+ if (model.mm_2_w) {
|
||||
+ embeddings = ggml_gelu(ctx0, embeddings);
|
||||
+ embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings);
|
||||
+ embeddings = ggml_add(ctx0, embeddings, model.mm_2_b);
|
||||
+ }
|
||||
|
||||
} else if (ctx->proj_type == PROJECTOR_TYPE_MLP_NORM) {
|
||||
embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings);
|
||||
@@ -2542,7 +2545,10 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
|
||||
return ctx->vision_model.mm_model_peg_0_b->ne[0];
|
||||
}
|
||||
if (ctx->proj_type == PROJECTOR_TYPE_MLP) {
|
||||
- return ctx->vision_model.mm_2_b->ne[0];
|
||||
+ // paligemma missing second linear layer
|
||||
+ if (ctx->vision_model.mm_2_b == nullptr) {
|
||||
+ return ctx->vision_model.mm_0_b->ne[0];
|
||||
+ }
|
||||
}
|
||||
if (ctx->proj_type == PROJECTOR_TYPE_MLP_NORM) {
|
||||
return ctx->vision_model.mm_3_b->ne[0];
|
||||
diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp
|
||||
index 8c7dd2ae..38eeb305 100644
|
||||
--- a/examples/llava/llava-cli.cpp
|
||||
+++ b/examples/llava/llava-cli.cpp
|
||||
@@ -18,7 +18,10 @@ static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_toke
|
||||
if (n_eval > n_batch) {
|
||||
n_eval = n_batch;
|
||||
}
|
||||
- if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval, *n_past, 0))) {
|
||||
+
|
||||
+ llama_batch my_batch = llama_batch_get_one(&tokens[i], n_eval, *n_past, 0);
|
||||
+ if (llama_decode(ctx_llama, my_batch))
|
||||
+ {
|
||||
LOG_TEE("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
|
||||
return false;
|
||||
}
|
||||
@@ -36,6 +39,11 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
|
||||
static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
|
||||
std::string str2 = str;
|
||||
std::vector<llama_token> embd_inp = ::llama_tokenize(ctx_llama, str2, add_bos, true);
|
||||
+ embd_inp.push_back(108);
|
||||
+ for (int i = 0; i < embd_inp.size(); i++)
|
||||
+ {
|
||||
+ printf("token[%d]: %d\n", i, embd_inp[i]);
|
||||
+ }
|
||||
eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
|
||||
return true;
|
||||
}
|
||||
@@ -183,9 +191,17 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
|
||||
}
|
||||
}
|
||||
|
||||
- eval_string(ctx_llava->ctx_llama, system_prompt.c_str(), params->n_batch, &n_past, true);
|
||||
- llava_eval_image_embed(ctx_llava->ctx_llama, image_embed, params->n_batch, &n_past);
|
||||
- eval_string(ctx_llava->ctx_llama, user_prompt.c_str(), params->n_batch, &n_past, false);
|
||||
+ // build user prompt with 256 image tokens
|
||||
+ user_prompt = "What is in this image?";
|
||||
+ std::string image_token_prefix = "";
|
||||
+ for (int i = 0; i < 256; i++) {
|
||||
+ image_token_prefix += "<image>";
|
||||
+ }
|
||||
+ std::string user_prompt_with_images = image_token_prefix + "<bos>" + user_prompt;
|
||||
+
|
||||
+ llama_set_causal_attn(ctx_llava->ctx_llama, true);
|
||||
+ eval_string(ctx_llava->ctx_llama, user_prompt_with_images.c_str(), params->n_batch, &n_past, false);
|
||||
+ // llama_set_causal_attn(ctx_llava->ctx_llama, true);
|
||||
|
||||
// generate the response
|
||||
|
||||
@@ -324,6 +340,19 @@ int main(int argc, char ** argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
+ if (!image_embed || !image_embed->embed) {
|
||||
+ std::cerr << "Error: image_embed or image_embed->embed is null." << std::endl;
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ // image feature scaling
|
||||
+ float *data = image_embed->embed;
|
||||
+ for (int i = 0; i < 2048 * 256; i++) {
|
||||
+ data[i] = data[i] / sqrt(2048);
|
||||
+ }
|
||||
+
|
||||
+ set_image_embeds(ctx_llava->ctx_llama, image_embed->embed);
|
||||
+
|
||||
// process the prompt
|
||||
process_prompt(ctx_llava, image_embed, ¶ms, params.prompt);
|
||||
|
||||
diff --git a/include/llama.h b/include/llama.h
|
||||
index ce07f4fa..c3465d68 100644
|
||||
--- a/include/llama.h
|
||||
+++ b/include/llama.h
|
||||
@@ -444,6 +444,13 @@ extern "C" {
|
||||
// Frees all allocated memory
|
||||
LLAMA_API void llama_free(struct llama_context * ctx);
|
||||
|
||||
+ // save image embeddings
|
||||
+ LLAMA_API void set_image_embeds(struct llama_context *ctx, float *data);
|
||||
+
|
||||
+ LLAMA_API void print_embeds(struct llama_context *ctx);
|
||||
+
|
||||
+ LLAMA_API void print_causal(struct llama_context *ctx);
|
||||
+
|
||||
LLAMA_API int64_t llama_time_us(void);
|
||||
|
||||
LLAMA_API size_t llama_max_devices(void);
|
||||
diff --git a/src/llama.cpp b/src/llama.cpp
|
||||
index 7f2f0003..d5926202 100644
|
||||
--- a/src/llama.cpp
|
||||
+++ b/src/llama.cpp
|
||||
@@ -2677,6 +2677,7 @@ struct llama_context {
|
||||
|
||||
const struct llama_model & model;
|
||||
|
||||
+ float *image_embeds;
|
||||
struct llama_cparams cparams;
|
||||
struct llama_sampling sampling;
|
||||
struct llama_kv_cache kv_self;
|
||||
@@ -2760,6 +2761,33 @@ struct llama_context {
|
||||
struct ggml_tensor * inp_KQ_mask_cross; // F32 [n_outputs_enc, n_batch]
|
||||
};
|
||||
|
||||
+void set_image_embeds(llama_context *ctx, float *data) {
|
||||
+ ctx->image_embeds = data;
|
||||
+}
|
||||
+
|
||||
+void print_embeds(struct llama_context *ctx)
|
||||
+{
|
||||
+ if (ctx->image_embeds)
|
||||
+ {
|
||||
+ for (int i = 0; i < 256; i++)
|
||||
+ {
|
||||
+ LLAMA_LOG_INFO("%f ", ctx->image_embeds[i]);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+void print_causal(llama_context *ctx)
|
||||
+{
|
||||
+ if (ctx->cparams.causal_attn)
|
||||
+ {
|
||||
+ LLAMA_LOG_INFO("causal attn is true\n");
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ LLAMA_LOG_INFO("causal attn is false\n");
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
struct llama_lora_weight {
|
||||
struct ggml_tensor * a = nullptr;
|
||||
struct ggml_tensor * b = nullptr;
|
||||
@@ -3021,6 +3049,96 @@ static bool llama_kv_cache_init(
|
||||
return true;
|
||||
}
|
||||
|
||||
+void llama_log_tensor(ggml_tensor *tensor, char *filename)
|
||||
+{
|
||||
+ if (tensor == NULL)
|
||||
+ {
|
||||
+ fprintf(stderr, "Tensor is NULL\n");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ FILE *fp = fopen(filename, "wb");
|
||||
+ if (fp == NULL)
|
||||
+ {
|
||||
+ fprintf(stderr, "Failed to open file '%s'\n", filename);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ LLAMA_LOG_INFO("Tensor name: %s\n", tensor->name);
|
||||
+ LLAMA_LOG_INFO("Tensor type: ");
|
||||
+ switch (tensor->type)
|
||||
+ {
|
||||
+ case GGML_TYPE_F32:
|
||||
+ LLAMA_LOG_INFO("GGML_TYPE_F32\n");
|
||||
+ break;
|
||||
+ case GGML_TYPE_F16:
|
||||
+ printf("GGML_TYPE_F16\n");
|
||||
+ break;
|
||||
+ case GGML_TYPE_Q4_0:
|
||||
+ printf("GGML_TYPE_Q4_0\n");
|
||||
+ break;
|
||||
+ case GGML_TYPE_Q4_1:
|
||||
+ printf("GGML_TYPE_Q4_1\n");
|
||||
+ break;
|
||||
+ default:
|
||||
+ printf("Unknown\n");
|
||||
+ }
|
||||
+
|
||||
+ LLAMA_LOG_INFO("Tensor dimensions: ");
|
||||
+ for (int i = 0; i < GGML_MAX_DIMS; i++)
|
||||
+ {
|
||||
+ if (tensor->ne[i] == 1)
|
||||
+ break;
|
||||
+ printf("%ld ", tensor->ne[i]);
|
||||
+ }
|
||||
+ printf("\n");
|
||||
+
|
||||
+ size_t num_elements = ggml_nelements(tensor);
|
||||
+ LLAMA_LOG_INFO("num elements: %zu\n", num_elements);
|
||||
+
|
||||
+ LLAMA_LOG_INFO("Tensor data:\n");
|
||||
+ switch (tensor->type)
|
||||
+ {
|
||||
+ case GGML_TYPE_F32:
|
||||
+ {
|
||||
+ float *data = (float *)tensor->data;
|
||||
+ for (size_t i = 0; i < num_elements; i++)
|
||||
+ {
|
||||
+ fprintf(fp, "%f ", data[i]);
|
||||
+ if (i % 2048 == 0 && i != 0)
|
||||
+ {
|
||||
+ fprintf(fp, "\n");
|
||||
+ }
|
||||
+ }
|
||||
+ /* for (size_t i = 0; i < 25; i++)
|
||||
+ {
|
||||
+ LLAMA_LOG_INFO("%f ", data[i]);
|
||||
+ if (i % 2048 == 0 && i != 0)
|
||||
+ {
|
||||
+ LLAMA_LOG_INFO("\n");
|
||||
+ }
|
||||
+ } */
|
||||
+ }
|
||||
+ break;
|
||||
+ case GGML_TYPE_F16:
|
||||
+ {
|
||||
+ // Implement custom printing for fp16 data
|
||||
+ fprintf(fp, "F16 data (not shown)\n");
|
||||
+ }
|
||||
+ break;
|
||||
+ // For quantized types, you might need to implement custom printing logic
|
||||
+ case GGML_TYPE_Q4_0:
|
||||
+ case GGML_TYPE_Q4_1:
|
||||
+ fprintf(fp, "Quantized data (not shown)\n");
|
||||
+ break;
|
||||
+ default:
|
||||
+ fprintf(fp, "Unknown data type\n");
|
||||
+ }
|
||||
+ fprintf(fp, "\n");
|
||||
+
|
||||
+ fclose(fp);
|
||||
+}
|
||||
+
|
||||
// find an empty slot of size "n_tokens" in the cache
|
||||
// updates the cache head
|
||||
// Note: On success, it's important that cache.head points
|
||||
@@ -11660,6 +11778,18 @@ struct llm_build_context {
|
||||
|
||||
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
|
||||
|
||||
+ // set the image embeddings in the input tensor
|
||||
+ if (lctx.image_embeds) {
|
||||
+ struct ggml_tensor *image_embeds = ggml_dup_tensor(ctx0, inpL);
|
||||
+ image_embeds->data = lctx.image_embeds;
|
||||
+ image_embeds->ne[1] = 256;
|
||||
+ print_embeds(&lctx);
|
||||
+ // llama_log_tensor(image_embeds, "/Users/joshyan/ollama/tensordata");
|
||||
+
|
||||
+ inpL = ggml_set_2d_inplace(ctx0, inpL, image_embeds, inpL->nb[1], 0);
|
||||
+ lctx.image_embeds = NULL;
|
||||
+ }
|
||||
+
|
||||
inpL = ggml_scale(ctx0, inpL, sqrtf(n_embd));
|
||||
cb(inpL, "inp_scaled", -1);
|
||||
|
||||
@@ -14678,7 +14808,7 @@ static int llama_decode_internal(
|
||||
}
|
||||
|
||||
// non-causal masks do not use the KV cache
|
||||
- if (hparams.causal_attn) {
|
||||
+ if (hparams.causal_attn || lctx.image_embeds) {
|
||||
llama_kv_cache_update(&lctx);
|
||||
|
||||
// if we have enough unused cells before the current head ->
|
||||
@@ -18565,6 +18695,12 @@ float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) {
|
||||
if (ctx->logits == nullptr) {
|
||||
throw std::runtime_error("no logits");
|
||||
}
|
||||
+ // LLAMA_LOG_INFO("CURRENTLY, I IS %d\n", i);
|
||||
+ // printf("currently, i is: %d", i);
|
||||
+ /* for (int i = 0; i < 263; i++)
|
||||
+ {
|
||||
+ printf("output_ids[%d]: %d\n", i, ctx->output_ids[i]);
|
||||
+ } */
|
||||
|
||||
if (i < 0) {
|
||||
j = ctx->n_outputs + i;
|
||||
@@ -18577,6 +18713,7 @@ float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) {
|
||||
j = ctx->output_ids[i];
|
||||
}
|
||||
|
||||
+ j = 0;
|
||||
if (j < 0) {
|
||||
throw std::runtime_error(format("batch.logits[%d] != true", i));
|
||||
}
|
||||
@@ -33,7 +33,7 @@ type LlamaServer interface {
|
||||
Ping(ctx context.Context) error
|
||||
WaitUntilRunning(ctx context.Context) error
|
||||
Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error
|
||||
Embed(ctx context.Context, input []string) (*EmbedResponse, error)
|
||||
Embedding(ctx context.Context, input string) ([]float32, error)
|
||||
Tokenize(ctx context.Context, content string) ([]int, error)
|
||||
Detokenize(ctx context.Context, tokens []int) (string, error)
|
||||
Close() error
|
||||
@@ -44,11 +44,12 @@ type LlamaServer interface {
|
||||
|
||||
// llmServer is an instance of the llama.cpp server
|
||||
type llmServer struct {
|
||||
port int
|
||||
cmd *exec.Cmd
|
||||
done chan error // Channel to signal when the process exits
|
||||
status *StatusWriter
|
||||
options api.Options
|
||||
port int
|
||||
cmd *exec.Cmd
|
||||
done chan error // Channel to signal when the process exits
|
||||
status *StatusWriter
|
||||
options api.Options
|
||||
numParallel int
|
||||
|
||||
estimate MemoryEstimate
|
||||
totalLayers uint64
|
||||
@@ -124,8 +125,9 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
}
|
||||
}
|
||||
|
||||
// On linux, over-allocating CPU memory will almost always result in an error
|
||||
if runtime.GOOS == "linux" {
|
||||
// On linux and windows, over-allocating CPU memory will almost always result in an error
|
||||
// Darwin has fully dynamic swap so has no direct concept of free swap space
|
||||
if runtime.GOOS != "darwin" {
|
||||
systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize
|
||||
available := systemFreeMemory + systemSwapFreeMemory
|
||||
if systemMemoryRequired > available {
|
||||
@@ -177,22 +179,22 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
opts.NumGPU = 0
|
||||
if len(servers) == 0 {
|
||||
return nil, fmt.Errorf("no servers found for %v", gpus)
|
||||
}
|
||||
|
||||
params := []string{
|
||||
"--model", model,
|
||||
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
|
||||
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
||||
"--ctx-size", strconv.Itoa(opts.NumCtx),
|
||||
"--batch-size", strconv.Itoa(opts.NumBatch),
|
||||
"--embedding",
|
||||
}
|
||||
|
||||
params = append(params, "--log-disable")
|
||||
|
||||
if opts.NumGPU >= 0 {
|
||||
params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", opts.NumGPU))
|
||||
params = append(params, "--n-gpu-layers", strconv.Itoa(opts.NumGPU))
|
||||
}
|
||||
|
||||
if envconfig.Debug() {
|
||||
@@ -200,7 +202,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
}
|
||||
|
||||
if opts.MainGPU > 0 {
|
||||
params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
|
||||
params = append(params, "--main-gpu", strconv.Itoa(opts.MainGPU))
|
||||
}
|
||||
|
||||
if len(adapters) > 0 {
|
||||
@@ -214,7 +216,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
}
|
||||
|
||||
if opts.NumThread > 0 {
|
||||
params = append(params, "--threads", fmt.Sprintf("%d", opts.NumThread))
|
||||
params = append(params, "--threads", strconv.Itoa(opts.NumThread))
|
||||
}
|
||||
|
||||
if !opts.F16KV {
|
||||
@@ -256,11 +258,17 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
params = append(params, "--mlock")
|
||||
}
|
||||
|
||||
if opts.UseNUMA {
|
||||
params = append(params, "--numa")
|
||||
if gpu.IsNUMA() {
|
||||
numaMode := "distribute"
|
||||
if runtime.GOOS == "linux" {
|
||||
if _, err := exec.LookPath("numactl"); err == nil {
|
||||
numaMode = "numactl"
|
||||
}
|
||||
}
|
||||
params = append(params, "--numa", numaMode)
|
||||
}
|
||||
|
||||
params = append(params, "--parallel", fmt.Sprintf("%d", numParallel))
|
||||
params = append(params, "--parallel", strconv.Itoa(numParallel))
|
||||
|
||||
if estimate.TensorSplit != "" {
|
||||
params = append(params, "--tensor-split", estimate.TensorSplit)
|
||||
@@ -337,6 +345,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
status: NewStatusWriter(os.Stderr),
|
||||
options: opts,
|
||||
estimate: estimate,
|
||||
numParallel: numParallel,
|
||||
sem: semaphore.NewWeighted(int64(numParallel)),
|
||||
totalLayers: ggml.KV().BlockCount() + 1,
|
||||
gpus: gpus,
|
||||
@@ -425,7 +434,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
if strings.Contains(s.status.LastErrMsg, "unknown model") {
|
||||
s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
|
||||
}
|
||||
s.done <- fmt.Errorf(s.status.LastErrMsg)
|
||||
s.done <- errors.New(s.status.LastErrMsg)
|
||||
} else {
|
||||
s.done <- err
|
||||
}
|
||||
@@ -724,7 +733,7 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
||||
"n_predict": req.Options.NumPredict,
|
||||
"n_keep": req.Options.NumKeep,
|
||||
"main_gpu": req.Options.MainGPU,
|
||||
"temperature": req.Options.Temperature,
|
||||
"temperature": 0,
|
||||
"top_k": req.Options.TopK,
|
||||
"top_p": req.Options.TopP,
|
||||
"min_p": req.Options.MinP,
|
||||
@@ -874,16 +883,15 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
||||
return nil
|
||||
}
|
||||
|
||||
type EmbedRequest struct {
|
||||
Content []string `json:"content"`
|
||||
type EmbeddingRequest struct {
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
type EmbedResponse struct {
|
||||
Embedding [][]float32 `json:"embedding"`
|
||||
PromptEvalCount int `json:"prompt_n"`
|
||||
type EmbeddingResponse struct {
|
||||
Embedding []float32 `json:"embedding"`
|
||||
}
|
||||
|
||||
func (s *llmServer) Embed(ctx context.Context, input []string) (*EmbedResponse, error) {
|
||||
func (s *llmServer) Embedding(ctx context.Context, input string) ([]float32, error) {
|
||||
if err := s.sem.Acquire(ctx, 1); err != nil {
|
||||
slog.Error("Failed to acquire semaphore", "error", err)
|
||||
return nil, err
|
||||
@@ -898,18 +906,18 @@ func (s *llmServer) Embed(ctx context.Context, input []string) (*EmbedResponse,
|
||||
return nil, fmt.Errorf("unexpected server status: %s", status.ToString())
|
||||
}
|
||||
|
||||
data, err := json.Marshal(EmbedRequest{Content: input})
|
||||
data, err := json.Marshal(EmbeddingRequest{Content: input})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error marshaling embed data: %w", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("http://127.0.0.1:%d/embedding", s.port), bytes.NewBuffer(data))
|
||||
r, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("http://127.0.0.1:%d/embedding", s.port), bytes.NewBuffer(data))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating embed request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
r.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
resp, err := http.DefaultClient.Do(r)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("do embedding request: %w", err)
|
||||
}
|
||||
@@ -925,12 +933,12 @@ func (s *llmServer) Embed(ctx context.Context, input []string) (*EmbedResponse,
|
||||
return nil, fmt.Errorf("%s", body)
|
||||
}
|
||||
|
||||
var e EmbedResponse
|
||||
var e EmbeddingResponse
|
||||
if err := json.Unmarshal(body, &e); err != nil {
|
||||
return nil, fmt.Errorf("unmarshal tokenize response: %w", err)
|
||||
}
|
||||
|
||||
return &e, nil
|
||||
return e.Embedding, nil
|
||||
}
|
||||
|
||||
type TokenizeRequest struct {
|
||||
|
||||
@@ -26,6 +26,7 @@ var errorPrefixes = []string{
|
||||
"cudaMalloc failed",
|
||||
"\"ERR\"",
|
||||
"error loading model",
|
||||
"GGML_ASSERT",
|
||||
}
|
||||
|
||||
func (w *StatusWriter) Write(b []byte) (int, error) {
|
||||
|
||||
3
main.go
3
main.go
@@ -3,8 +3,9 @@ package main
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/ollama/ollama/cmd"
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/ollama/ollama/cmd"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
@@ -14,6 +15,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
)
|
||||
@@ -164,9 +166,15 @@ type ListCompletion struct {
|
||||
}
|
||||
|
||||
type EmbeddingList struct {
|
||||
Object string `json:"object"`
|
||||
Data []Embedding `json:"data"`
|
||||
Model string `json:"model"`
|
||||
Object string `json:"object"`
|
||||
Data []Embedding `json:"data"`
|
||||
Model string `json:"model"`
|
||||
Usage EmbeddingUsage `json:"usage,omitempty"`
|
||||
}
|
||||
|
||||
type EmbeddingUsage struct {
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
}
|
||||
|
||||
func NewError(code int, message string) ErrorResponse {
|
||||
@@ -332,6 +340,10 @@ func toEmbeddingList(model string, r api.EmbedResponse) EmbeddingList {
|
||||
Object: "list",
|
||||
Data: data,
|
||||
Model: model,
|
||||
Usage: EmbeddingUsage{
|
||||
PromptTokens: r.PromptEvalCount,
|
||||
TotalTokens: r.PromptEvalCount,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -357,24 +369,24 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
||||
for _, c := range content {
|
||||
data, ok := c.(map[string]any)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid message format")
|
||||
return nil, errors.New("invalid message format")
|
||||
}
|
||||
switch data["type"] {
|
||||
case "text":
|
||||
text, ok := data["text"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid message format")
|
||||
return nil, errors.New("invalid message format")
|
||||
}
|
||||
messages = append(messages, api.Message{Role: msg.Role, Content: text})
|
||||
case "image_url":
|
||||
var url string
|
||||
if urlMap, ok := data["image_url"].(map[string]any); ok {
|
||||
if url, ok = urlMap["url"].(string); !ok {
|
||||
return nil, fmt.Errorf("invalid message format")
|
||||
return nil, errors.New("invalid message format")
|
||||
}
|
||||
} else {
|
||||
if url, ok = data["image_url"].(string); !ok {
|
||||
return nil, fmt.Errorf("invalid message format")
|
||||
return nil, errors.New("invalid message format")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -390,17 +402,17 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
||||
}
|
||||
|
||||
if !valid {
|
||||
return nil, fmt.Errorf("invalid image input")
|
||||
return nil, errors.New("invalid image input")
|
||||
}
|
||||
|
||||
img, err := base64.StdEncoding.DecodeString(url)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid message format")
|
||||
return nil, errors.New("invalid message format")
|
||||
}
|
||||
|
||||
messages = append(messages, api.Message{Role: msg.Role, Images: []api.ImageData{img}})
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid message format")
|
||||
return nil, errors.New("invalid message format")
|
||||
}
|
||||
}
|
||||
default:
|
||||
@@ -413,7 +425,7 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
||||
toolCalls[i].Function.Name = tc.Function.Name
|
||||
err := json.Unmarshal([]byte(tc.Function.Arguments), &toolCalls[i].Function.Arguments)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid tool call arguments")
|
||||
return nil, errors.New("invalid tool call arguments")
|
||||
}
|
||||
}
|
||||
messages = append(messages, api.Message{Role: msg.Role, ToolCalls: toolCalls})
|
||||
@@ -727,14 +739,12 @@ func (w *RetrieveWriter) Write(data []byte) (int, error) {
|
||||
func (w *EmbedWriter) writeResponse(data []byte) (int, error) {
|
||||
var embedResponse api.EmbedResponse
|
||||
err := json.Unmarshal(data, &embedResponse)
|
||||
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
w.ResponseWriter.Header().Set("Content-Type", "application/json")
|
||||
err = json.NewEncoder(w.ResponseWriter).Encode(toEmbeddingList(w.model, embedResponse))
|
||||
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
@@ -7,24 +7,22 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
const prefix = `data:image/jpeg;base64,`
|
||||
const image = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=`
|
||||
const imageURL = prefix + image
|
||||
const (
|
||||
prefix = `data:image/jpeg;base64,`
|
||||
image = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=`
|
||||
)
|
||||
|
||||
func prepareRequest(req *http.Request, body any) {
|
||||
bodyBytes, _ := json.Marshal(body)
|
||||
req.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
}
|
||||
var False = false
|
||||
|
||||
func captureRequestMiddleware(capturedRequest any) gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
@@ -40,134 +38,136 @@ func captureRequestMiddleware(capturedRequest any) gin.HandlerFunc {
|
||||
|
||||
func TestChatMiddleware(t *testing.T) {
|
||||
type testCase struct {
|
||||
Name string
|
||||
Setup func(t *testing.T, req *http.Request)
|
||||
Expected func(t *testing.T, req *api.ChatRequest, resp *httptest.ResponseRecorder)
|
||||
name string
|
||||
body string
|
||||
req api.ChatRequest
|
||||
err ErrorResponse
|
||||
}
|
||||
|
||||
var capturedRequest *api.ChatRequest
|
||||
|
||||
testCases := []testCase{
|
||||
{
|
||||
Name: "chat handler",
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
body := ChatCompletionRequest{
|
||||
Model: "test-model",
|
||||
Messages: []Message{{Role: "user", Content: "Hello"}},
|
||||
}
|
||||
prepareRequest(req, body)
|
||||
},
|
||||
Expected: func(t *testing.T, req *api.ChatRequest, resp *httptest.ResponseRecorder) {
|
||||
if resp.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", resp.Code)
|
||||
}
|
||||
|
||||
if req.Messages[0].Role != "user" {
|
||||
t.Fatalf("expected 'user', got %s", req.Messages[0].Role)
|
||||
}
|
||||
|
||||
if req.Messages[0].Content != "Hello" {
|
||||
t.Fatalf("expected 'Hello', got %s", req.Messages[0].Content)
|
||||
}
|
||||
name: "chat handler",
|
||||
body: `{
|
||||
"model": "test-model",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello"}
|
||||
]
|
||||
}`,
|
||||
req: api.ChatRequest{
|
||||
Model: "test-model",
|
||||
Messages: []api.Message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "Hello",
|
||||
},
|
||||
},
|
||||
Options: map[string]any{
|
||||
"temperature": 1.0,
|
||||
"top_p": 1.0,
|
||||
},
|
||||
Stream: &False,
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "chat handler with image content",
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
body := ChatCompletionRequest{
|
||||
Model: "test-model",
|
||||
Messages: []Message{
|
||||
{
|
||||
Role: "user", Content: []map[string]any{
|
||||
{"type": "text", "text": "Hello"},
|
||||
{"type": "image_url", "image_url": map[string]string{"url": imageURL}},
|
||||
name: "chat handler with image content",
|
||||
body: `{
|
||||
"model": "test-model",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Hello"
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "` + prefix + image + `"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`,
|
||||
req: api.ChatRequest{
|
||||
Model: "test-model",
|
||||
Messages: []api.Message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "Hello",
|
||||
},
|
||||
{
|
||||
Role: "user",
|
||||
Images: []api.ImageData{
|
||||
func() []byte {
|
||||
img, _ := base64.StdEncoding.DecodeString(image)
|
||||
return img
|
||||
}(),
|
||||
},
|
||||
},
|
||||
},
|
||||
Options: map[string]any{
|
||||
"temperature": 1.0,
|
||||
"top_p": 1.0,
|
||||
},
|
||||
Stream: &False,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "chat handler with tools",
|
||||
body: `{
|
||||
"model": "test-model",
|
||||
"messages": [
|
||||
{"role": "user", "content": "What's the weather like in Paris Today?"},
|
||||
{"role": "assistant", "tool_calls": [{"id": "id", "type": "function", "function": {"name": "get_current_weather", "arguments": "{\"location\": \"Paris, France\", \"format\": \"celsius\"}"}}]}
|
||||
]
|
||||
}`,
|
||||
req: api.ChatRequest{
|
||||
Model: "test-model",
|
||||
Messages: []api.Message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "What's the weather like in Paris Today?",
|
||||
},
|
||||
{
|
||||
Role: "assistant",
|
||||
ToolCalls: []api.ToolCall{
|
||||
{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get_current_weather",
|
||||
Arguments: map[string]interface{}{
|
||||
"location": "Paris, France",
|
||||
"format": "celsius",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
prepareRequest(req, body)
|
||||
},
|
||||
Expected: func(t *testing.T, req *api.ChatRequest, resp *httptest.ResponseRecorder) {
|
||||
if resp.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", resp.Code)
|
||||
}
|
||||
|
||||
if req.Messages[0].Role != "user" {
|
||||
t.Fatalf("expected 'user', got %s", req.Messages[0].Role)
|
||||
}
|
||||
|
||||
if req.Messages[0].Content != "Hello" {
|
||||
t.Fatalf("expected 'Hello', got %s", req.Messages[0].Content)
|
||||
}
|
||||
|
||||
img, _ := base64.StdEncoding.DecodeString(imageURL[len(prefix):])
|
||||
|
||||
if req.Messages[1].Role != "user" {
|
||||
t.Fatalf("expected 'user', got %s", req.Messages[1].Role)
|
||||
}
|
||||
|
||||
if !bytes.Equal(req.Messages[1].Images[0], img) {
|
||||
t.Fatalf("expected image encoding, got %s", req.Messages[1].Images[0])
|
||||
}
|
||||
},
|
||||
Options: map[string]any{
|
||||
"temperature": 1.0,
|
||||
"top_p": 1.0,
|
||||
},
|
||||
Stream: &False,
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
Name: "chat handler with tools",
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
body := ChatCompletionRequest{
|
||||
Model: "test-model",
|
||||
Messages: []Message{
|
||||
{Role: "user", Content: "What's the weather like in Paris Today?"},
|
||||
{Role: "assistant", ToolCalls: []ToolCall{{
|
||||
ID: "id",
|
||||
Type: "function",
|
||||
Function: struct {
|
||||
Name string `json:"name"`
|
||||
Arguments string `json:"arguments"`
|
||||
}{
|
||||
Name: "get_current_weather",
|
||||
Arguments: "{\"location\": \"Paris, France\", \"format\": \"celsius\"}",
|
||||
},
|
||||
}}},
|
||||
},
|
||||
}
|
||||
prepareRequest(req, body)
|
||||
},
|
||||
Expected: func(t *testing.T, req *api.ChatRequest, resp *httptest.ResponseRecorder) {
|
||||
if resp.Code != 200 {
|
||||
t.Fatalf("expected 200, got %d", resp.Code)
|
||||
}
|
||||
|
||||
if req.Messages[0].Content != "What's the weather like in Paris Today?" {
|
||||
t.Fatalf("expected What's the weather like in Paris Today?, got %s", req.Messages[0].Content)
|
||||
}
|
||||
|
||||
if req.Messages[1].ToolCalls[0].Function.Arguments["location"] != "Paris, France" {
|
||||
t.Fatalf("expected 'Paris, France', got %v", req.Messages[1].ToolCalls[0].Function.Arguments["location"])
|
||||
}
|
||||
|
||||
if req.Messages[1].ToolCalls[0].Function.Arguments["format"] != "celsius" {
|
||||
t.Fatalf("expected celsius, got %v", req.Messages[1].ToolCalls[0].Function.Arguments["format"])
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "chat handler error forwarding",
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
body := ChatCompletionRequest{
|
||||
Model: "test-model",
|
||||
Messages: []Message{{Role: "user", Content: 2}},
|
||||
}
|
||||
prepareRequest(req, body)
|
||||
},
|
||||
Expected: func(t *testing.T, req *api.ChatRequest, resp *httptest.ResponseRecorder) {
|
||||
if resp.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected 400, got %d", resp.Code)
|
||||
}
|
||||
|
||||
if !strings.Contains(resp.Body.String(), "invalid message content type") {
|
||||
t.Fatalf("error was not forwarded")
|
||||
}
|
||||
name: "chat handler error forwarding",
|
||||
body: `{
|
||||
"model": "test-model",
|
||||
"messages": [
|
||||
{"role": "user", "content": 2}
|
||||
]
|
||||
}`,
|
||||
err: ErrorResponse{
|
||||
Error: Error{
|
||||
Message: "invalid message content type: float64",
|
||||
Type: "invalid_request_error",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -182,16 +182,26 @@ func TestChatMiddleware(t *testing.T) {
|
||||
router.Handle(http.MethodPost, "/api/chat", endpoint)
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
req, _ := http.NewRequest(http.MethodPost, "/api/chat", nil)
|
||||
|
||||
tc.Setup(t, req)
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
req, _ := http.NewRequest(http.MethodPost, "/api/chat", strings.NewReader(tc.body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
router.ServeHTTP(resp, req)
|
||||
|
||||
tc.Expected(t, capturedRequest, resp)
|
||||
var errResp ErrorResponse
|
||||
if resp.Code != http.StatusOK {
|
||||
if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
if capturedRequest != nil && !reflect.DeepEqual(tc.req, *capturedRequest) {
|
||||
t.Fatal("requests did not match")
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(tc.err, errResp) {
|
||||
t.Fatal("errors did not match")
|
||||
}
|
||||
capturedRequest = nil
|
||||
})
|
||||
}
|
||||
@@ -199,71 +209,52 @@ func TestChatMiddleware(t *testing.T) {
|
||||
|
||||
func TestCompletionsMiddleware(t *testing.T) {
|
||||
type testCase struct {
|
||||
Name string
|
||||
Setup func(t *testing.T, req *http.Request)
|
||||
Expected func(t *testing.T, req *api.GenerateRequest, resp *httptest.ResponseRecorder)
|
||||
name string
|
||||
body string
|
||||
req api.GenerateRequest
|
||||
err ErrorResponse
|
||||
}
|
||||
|
||||
var capturedRequest *api.GenerateRequest
|
||||
|
||||
testCases := []testCase{
|
||||
{
|
||||
Name: "completions handler",
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
temp := float32(0.8)
|
||||
body := CompletionRequest{
|
||||
Model: "test-model",
|
||||
Prompt: "Hello",
|
||||
Temperature: &temp,
|
||||
Stop: []string{"\n", "stop"},
|
||||
Suffix: "suffix",
|
||||
}
|
||||
prepareRequest(req, body)
|
||||
},
|
||||
Expected: func(t *testing.T, req *api.GenerateRequest, resp *httptest.ResponseRecorder) {
|
||||
if req.Prompt != "Hello" {
|
||||
t.Fatalf("expected 'Hello', got %s", req.Prompt)
|
||||
}
|
||||
|
||||
if req.Options["temperature"] != 1.6 {
|
||||
t.Fatalf("expected 1.6, got %f", req.Options["temperature"])
|
||||
}
|
||||
|
||||
stopTokens, ok := req.Options["stop"].([]any)
|
||||
|
||||
if !ok {
|
||||
t.Fatalf("expected stop tokens to be a list")
|
||||
}
|
||||
|
||||
if stopTokens[0] != "\n" || stopTokens[1] != "stop" {
|
||||
t.Fatalf("expected ['\\n', 'stop'], got %v", stopTokens)
|
||||
}
|
||||
|
||||
if req.Suffix != "suffix" {
|
||||
t.Fatalf("expected 'suffix', got %s", req.Suffix)
|
||||
}
|
||||
name: "completions handler",
|
||||
body: `{
|
||||
"model": "test-model",
|
||||
"prompt": "Hello",
|
||||
"temperature": 0.8,
|
||||
"stop": ["\n", "stop"],
|
||||
"suffix": "suffix"
|
||||
}`,
|
||||
req: api.GenerateRequest{
|
||||
Model: "test-model",
|
||||
Prompt: "Hello",
|
||||
Options: map[string]any{
|
||||
"frequency_penalty": 0.0,
|
||||
"presence_penalty": 0.0,
|
||||
"temperature": 1.6,
|
||||
"top_p": 1.0,
|
||||
"stop": []any{"\n", "stop"},
|
||||
},
|
||||
Suffix: "suffix",
|
||||
Stream: &False,
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "completions handler error forwarding",
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
body := CompletionRequest{
|
||||
Model: "test-model",
|
||||
Prompt: "Hello",
|
||||
Temperature: nil,
|
||||
Stop: []int{1, 2},
|
||||
Suffix: "suffix",
|
||||
}
|
||||
prepareRequest(req, body)
|
||||
},
|
||||
Expected: func(t *testing.T, req *api.GenerateRequest, resp *httptest.ResponseRecorder) {
|
||||
if resp.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected 400, got %d", resp.Code)
|
||||
}
|
||||
|
||||
if !strings.Contains(resp.Body.String(), "invalid type for 'stop' field") {
|
||||
t.Fatalf("error was not forwarded")
|
||||
}
|
||||
name: "completions handler error forwarding",
|
||||
body: `{
|
||||
"model": "test-model",
|
||||
"prompt": "Hello",
|
||||
"temperature": null,
|
||||
"stop": [1, 2],
|
||||
"suffix": "suffix"
|
||||
}`,
|
||||
err: ErrorResponse{
|
||||
Error: Error{
|
||||
Message: "invalid type for 'stop' field: float64",
|
||||
Type: "invalid_request_error",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -278,15 +269,27 @@ func TestCompletionsMiddleware(t *testing.T) {
|
||||
router.Handle(http.MethodPost, "/api/generate", endpoint)
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
req, _ := http.NewRequest(http.MethodPost, "/api/generate", nil)
|
||||
|
||||
tc.Setup(t, req)
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
req, _ := http.NewRequest(http.MethodPost, "/api/generate", strings.NewReader(tc.body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
router.ServeHTTP(resp, req)
|
||||
|
||||
tc.Expected(t, capturedRequest, resp)
|
||||
var errResp ErrorResponse
|
||||
if resp.Code != http.StatusOK {
|
||||
if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
if capturedRequest != nil && !reflect.DeepEqual(tc.req, *capturedRequest) {
|
||||
t.Fatal("requests did not match")
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(tc.err, errResp) {
|
||||
t.Fatal("errors did not match")
|
||||
}
|
||||
|
||||
capturedRequest = nil
|
||||
})
|
||||
@@ -295,78 +298,47 @@ func TestCompletionsMiddleware(t *testing.T) {
|
||||
|
||||
func TestEmbeddingsMiddleware(t *testing.T) {
|
||||
type testCase struct {
|
||||
Name string
|
||||
Setup func(t *testing.T, req *http.Request)
|
||||
Expected func(t *testing.T, req *api.EmbedRequest, resp *httptest.ResponseRecorder)
|
||||
name string
|
||||
body string
|
||||
req api.EmbedRequest
|
||||
err ErrorResponse
|
||||
}
|
||||
|
||||
var capturedRequest *api.EmbedRequest
|
||||
|
||||
testCases := []testCase{
|
||||
{
|
||||
Name: "embed handler single input",
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
body := EmbedRequest{
|
||||
Input: "Hello",
|
||||
Model: "test-model",
|
||||
}
|
||||
prepareRequest(req, body)
|
||||
},
|
||||
Expected: func(t *testing.T, req *api.EmbedRequest, resp *httptest.ResponseRecorder) {
|
||||
if req.Input != "Hello" {
|
||||
t.Fatalf("expected 'Hello', got %s", req.Input)
|
||||
}
|
||||
|
||||
if req.Model != "test-model" {
|
||||
t.Fatalf("expected 'test-model', got %s", req.Model)
|
||||
}
|
||||
name: "embed handler single input",
|
||||
body: `{
|
||||
"input": "Hello",
|
||||
"model": "test-model"
|
||||
}`,
|
||||
req: api.EmbedRequest{
|
||||
Input: "Hello",
|
||||
Model: "test-model",
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "embed handler batch input",
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
body := EmbedRequest{
|
||||
Input: []string{"Hello", "World"},
|
||||
Model: "test-model",
|
||||
}
|
||||
prepareRequest(req, body)
|
||||
},
|
||||
Expected: func(t *testing.T, req *api.EmbedRequest, resp *httptest.ResponseRecorder) {
|
||||
input, ok := req.Input.([]any)
|
||||
|
||||
if !ok {
|
||||
t.Fatalf("expected input to be a list")
|
||||
}
|
||||
|
||||
if input[0].(string) != "Hello" {
|
||||
t.Fatalf("expected 'Hello', got %s", input[0])
|
||||
}
|
||||
|
||||
if input[1].(string) != "World" {
|
||||
t.Fatalf("expected 'World', got %s", input[1])
|
||||
}
|
||||
|
||||
if req.Model != "test-model" {
|
||||
t.Fatalf("expected 'test-model', got %s", req.Model)
|
||||
}
|
||||
name: "embed handler batch input",
|
||||
body: `{
|
||||
"input": ["Hello", "World"],
|
||||
"model": "test-model"
|
||||
}`,
|
||||
req: api.EmbedRequest{
|
||||
Input: []any{"Hello", "World"},
|
||||
Model: "test-model",
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "embed handler error forwarding",
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
body := EmbedRequest{
|
||||
Model: "test-model",
|
||||
}
|
||||
prepareRequest(req, body)
|
||||
},
|
||||
Expected: func(t *testing.T, req *api.EmbedRequest, resp *httptest.ResponseRecorder) {
|
||||
if resp.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected 400, got %d", resp.Code)
|
||||
}
|
||||
|
||||
if !strings.Contains(resp.Body.String(), "invalid input") {
|
||||
t.Fatalf("error was not forwarded")
|
||||
}
|
||||
name: "embed handler error forwarding",
|
||||
body: `{
|
||||
"model": "test-model"
|
||||
}`,
|
||||
err: ErrorResponse{
|
||||
Error: Error{
|
||||
Message: "invalid input",
|
||||
Type: "invalid_request_error",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -381,116 +353,167 @@ func TestEmbeddingsMiddleware(t *testing.T) {
|
||||
router.Handle(http.MethodPost, "/api/embed", endpoint)
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
req, _ := http.NewRequest(http.MethodPost, "/api/embed", nil)
|
||||
|
||||
tc.Setup(t, req)
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
req, _ := http.NewRequest(http.MethodPost, "/api/embed", strings.NewReader(tc.body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
router.ServeHTTP(resp, req)
|
||||
|
||||
tc.Expected(t, capturedRequest, resp)
|
||||
var errResp ErrorResponse
|
||||
if resp.Code != http.StatusOK {
|
||||
if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
if capturedRequest != nil && !reflect.DeepEqual(tc.req, *capturedRequest) {
|
||||
t.Fatal("requests did not match")
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(tc.err, errResp) {
|
||||
t.Fatal("errors did not match")
|
||||
}
|
||||
|
||||
capturedRequest = nil
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMiddlewareResponses(t *testing.T) {
|
||||
func TestListMiddleware(t *testing.T) {
|
||||
type testCase struct {
|
||||
Name string
|
||||
Method string
|
||||
Path string
|
||||
TestPath string
|
||||
Handler func() gin.HandlerFunc
|
||||
Endpoint func(c *gin.Context)
|
||||
Setup func(t *testing.T, req *http.Request)
|
||||
Expected func(t *testing.T, resp *httptest.ResponseRecorder)
|
||||
name string
|
||||
endpoint func(c *gin.Context)
|
||||
resp string
|
||||
}
|
||||
|
||||
testCases := []testCase{
|
||||
{
|
||||
Name: "list handler",
|
||||
Method: http.MethodGet,
|
||||
Path: "/api/tags",
|
||||
TestPath: "/api/tags",
|
||||
Handler: ListMiddleware,
|
||||
Endpoint: func(c *gin.Context) {
|
||||
name: "list handler",
|
||||
endpoint: func(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, api.ListResponse{
|
||||
Models: []api.ListModelResponse{
|
||||
{
|
||||
Name: "Test Model",
|
||||
Name: "test-model",
|
||||
ModifiedAt: time.Unix(int64(1686935002), 0).UTC(),
|
||||
},
|
||||
},
|
||||
})
|
||||
},
|
||||
Expected: func(t *testing.T, resp *httptest.ResponseRecorder) {
|
||||
var listResp ListCompletion
|
||||
if err := json.NewDecoder(resp.Body).Decode(&listResp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if listResp.Object != "list" {
|
||||
t.Fatalf("expected list, got %s", listResp.Object)
|
||||
}
|
||||
|
||||
if len(listResp.Data) != 1 {
|
||||
t.Fatalf("expected 1, got %d", len(listResp.Data))
|
||||
}
|
||||
|
||||
if listResp.Data[0].Id != "Test Model" {
|
||||
t.Fatalf("expected Test Model, got %s", listResp.Data[0].Id)
|
||||
}
|
||||
},
|
||||
resp: `{
|
||||
"object": "list",
|
||||
"data": [
|
||||
{
|
||||
"id": "test-model",
|
||||
"object": "model",
|
||||
"created": 1686935002,
|
||||
"owned_by": "library"
|
||||
}
|
||||
]
|
||||
}`,
|
||||
},
|
||||
{
|
||||
Name: "retrieve model",
|
||||
Method: http.MethodGet,
|
||||
Path: "/api/show/:model",
|
||||
TestPath: "/api/show/test-model",
|
||||
Handler: RetrieveMiddleware,
|
||||
Endpoint: func(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, api.ShowResponse{
|
||||
ModifiedAt: time.Date(2024, 6, 17, 13, 45, 0, 0, time.UTC),
|
||||
})
|
||||
},
|
||||
Expected: func(t *testing.T, resp *httptest.ResponseRecorder) {
|
||||
var retrieveResp Model
|
||||
if err := json.NewDecoder(resp.Body).Decode(&retrieveResp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if retrieveResp.Object != "model" {
|
||||
t.Fatalf("Expected object to be model, got %s", retrieveResp.Object)
|
||||
}
|
||||
|
||||
if retrieveResp.Id != "test-model" {
|
||||
t.Fatalf("Expected id to be test-model, got %s", retrieveResp.Id)
|
||||
}
|
||||
name: "list handler empty output",
|
||||
endpoint: func(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, api.ListResponse{})
|
||||
},
|
||||
resp: `{
|
||||
"object": "list",
|
||||
"data": null
|
||||
}`,
|
||||
},
|
||||
}
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
router := gin.New()
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
router = gin.New()
|
||||
router.Use(tc.Handler())
|
||||
router.Handle(tc.Method, tc.Path, tc.Endpoint)
|
||||
req, _ := http.NewRequest(tc.Method, tc.TestPath, nil)
|
||||
router := gin.New()
|
||||
router.Use(ListMiddleware())
|
||||
router.Handle(http.MethodGet, "/api/tags", tc.endpoint)
|
||||
req, _ := http.NewRequest(http.MethodGet, "/api/tags", nil)
|
||||
|
||||
if tc.Setup != nil {
|
||||
tc.Setup(t, req)
|
||||
}
|
||||
resp := httptest.NewRecorder()
|
||||
router.ServeHTTP(resp, req)
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
router.ServeHTTP(resp, req)
|
||||
var expected, actual map[string]any
|
||||
err := json.Unmarshal([]byte(tc.resp), &expected)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to unmarshal expected response: %v", err)
|
||||
}
|
||||
|
||||
assert.Equal(t, http.StatusOK, resp.Code)
|
||||
err = json.Unmarshal(resp.Body.Bytes(), &actual)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to unmarshal actual response: %v", err)
|
||||
}
|
||||
|
||||
tc.Expected(t, resp)
|
||||
})
|
||||
if !reflect.DeepEqual(expected, actual) {
|
||||
t.Errorf("responses did not match\nExpected: %+v\nActual: %+v", expected, actual)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRetrieveMiddleware(t *testing.T) {
|
||||
type testCase struct {
|
||||
name string
|
||||
endpoint func(c *gin.Context)
|
||||
resp string
|
||||
}
|
||||
|
||||
testCases := []testCase{
|
||||
{
|
||||
name: "retrieve handler",
|
||||
endpoint: func(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, api.ShowResponse{
|
||||
ModifiedAt: time.Unix(int64(1686935002), 0).UTC(),
|
||||
})
|
||||
},
|
||||
resp: `{
|
||||
"id":"test-model",
|
||||
"object":"model",
|
||||
"created":1686935002,
|
||||
"owned_by":"library"}
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "retrieve handler error forwarding",
|
||||
endpoint: func(c *gin.Context) {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "model not found"})
|
||||
},
|
||||
resp: `{
|
||||
"error": {
|
||||
"code": null,
|
||||
"message": "model not found",
|
||||
"param": null,
|
||||
"type": "api_error"
|
||||
}
|
||||
}`,
|
||||
},
|
||||
}
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
for _, tc := range testCases {
|
||||
router := gin.New()
|
||||
router.Use(RetrieveMiddleware())
|
||||
router.Handle(http.MethodGet, "/api/show/:model", tc.endpoint)
|
||||
req, _ := http.NewRequest(http.MethodGet, "/api/show/test-model", nil)
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
router.ServeHTTP(resp, req)
|
||||
|
||||
var expected, actual map[string]any
|
||||
err := json.Unmarshal([]byte(tc.resp), &expected)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to unmarshal expected response: %v", err)
|
||||
}
|
||||
|
||||
err = json.Unmarshal(resp.Body.Bytes(), &actual)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to unmarshal actual response: %v", err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(expected, actual) {
|
||||
t.Errorf("responses did not match\nExpected: %+v\nActual: %+v", expected, actual)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -82,7 +82,7 @@ TEMPLATE """ {{ if .System }}<|start_header_id|>system<|end_header_id|>
|
||||
}
|
||||
|
||||
func TestParseFileFrom(t *testing.T) {
|
||||
var cases = []struct {
|
||||
cases := []struct {
|
||||
input string
|
||||
expected []Command
|
||||
err error
|
||||
@@ -185,7 +185,7 @@ BADCOMMAND param1 value1
|
||||
}
|
||||
|
||||
func TestParseFileMessages(t *testing.T) {
|
||||
var cases = []struct {
|
||||
cases := []struct {
|
||||
input string
|
||||
expected []Command
|
||||
err error
|
||||
@@ -276,7 +276,7 @@ MESSAGE system`,
|
||||
}
|
||||
|
||||
func TestParseFileQuoted(t *testing.T) {
|
||||
var cases = []struct {
|
||||
cases := []struct {
|
||||
multiline string
|
||||
expected []Command
|
||||
err error
|
||||
@@ -430,7 +430,7 @@ TEMPLATE """
|
||||
}
|
||||
|
||||
func TestParseFileParameters(t *testing.T) {
|
||||
var cases = map[string]struct {
|
||||
cases := map[string]struct {
|
||||
name, value string
|
||||
}{
|
||||
"numa true": {"numa", "true"},
|
||||
@@ -491,7 +491,7 @@ func TestParseFileParameters(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestParseFileComments(t *testing.T) {
|
||||
var cases = []struct {
|
||||
cases := []struct {
|
||||
input string
|
||||
expected []Command
|
||||
}{
|
||||
@@ -516,7 +516,7 @@ FROM foo
|
||||
}
|
||||
|
||||
func TestParseFileFormatParseFile(t *testing.T) {
|
||||
var cases = []string{
|
||||
cases := []string{
|
||||
`
|
||||
FROM foo
|
||||
ADAPTER adapter1
|
||||
|
||||
@@ -6,8 +6,9 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
"golang.org/x/term"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
)
|
||||
|
||||
type Bar struct {
|
||||
|
||||
@@ -3,11 +3,12 @@ package progress
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Spinner struct {
|
||||
message string
|
||||
message atomic.Value
|
||||
messageWidth int
|
||||
|
||||
parts []string
|
||||
@@ -21,20 +22,25 @@ type Spinner struct {
|
||||
|
||||
func NewSpinner(message string) *Spinner {
|
||||
s := &Spinner{
|
||||
message: message,
|
||||
parts: []string{
|
||||
"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏",
|
||||
},
|
||||
started: time.Now(),
|
||||
}
|
||||
s.SetMessage(message)
|
||||
go s.start()
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *Spinner) SetMessage(message string) {
|
||||
s.message.Store(message)
|
||||
}
|
||||
|
||||
func (s *Spinner) String() string {
|
||||
var sb strings.Builder
|
||||
if len(s.message) > 0 {
|
||||
message := strings.TrimSpace(s.message)
|
||||
|
||||
if message, ok := s.message.Load().(string); ok && len(message) > 0 {
|
||||
message := strings.TrimSpace(message)
|
||||
if s.messageWidth > 0 && len(message) > s.messageWidth {
|
||||
message = message[:s.messageWidth]
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ type Buffer struct {
|
||||
DisplayPos int
|
||||
Pos int
|
||||
Buf *arraylist.List
|
||||
//LineHasSpace is an arraylist of bools to keep track of whether a line has a space at the end
|
||||
// LineHasSpace is an arraylist of bools to keep track of whether a line has a space at the end
|
||||
LineHasSpace *arraylist.List
|
||||
Prompt *Prompt
|
||||
LineWidth int
|
||||
@@ -56,13 +56,13 @@ func (b *Buffer) GetLineSpacing(line int) bool {
|
||||
|
||||
func (b *Buffer) MoveLeft() {
|
||||
if b.Pos > 0 {
|
||||
//asserts that we retrieve a rune
|
||||
// asserts that we retrieve a rune
|
||||
if e, ok := b.Buf.Get(b.Pos - 1); ok {
|
||||
if r, ok := e.(rune); ok {
|
||||
rLength := runewidth.RuneWidth(r)
|
||||
|
||||
if b.DisplayPos%b.LineWidth == 0 {
|
||||
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width))
|
||||
fmt.Print(CursorUp + CursorBOL + CursorRightN(b.Width))
|
||||
if rLength == 2 {
|
||||
fmt.Print(CursorLeft)
|
||||
}
|
||||
@@ -74,7 +74,7 @@ func (b *Buffer) MoveLeft() {
|
||||
fmt.Print(CursorLeft)
|
||||
}
|
||||
} else {
|
||||
fmt.Print(cursorLeftN(rLength))
|
||||
fmt.Print(CursorLeftN(rLength))
|
||||
}
|
||||
|
||||
b.Pos -= 1
|
||||
@@ -115,15 +115,15 @@ func (b *Buffer) MoveRight() {
|
||||
b.DisplayPos += rLength
|
||||
|
||||
if b.DisplayPos%b.LineWidth == 0 {
|
||||
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt())))
|
||||
fmt.Print(CursorDown + CursorBOL + CursorRightN(len(b.Prompt.prompt())))
|
||||
} else if (b.DisplayPos-rLength)%b.LineWidth == b.LineWidth-1 && hasSpace {
|
||||
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt())+rLength))
|
||||
fmt.Print(CursorDown + CursorBOL + CursorRightN(len(b.Prompt.prompt())+rLength))
|
||||
b.DisplayPos += 1
|
||||
} else if b.LineHasSpace.Size() > 0 && b.DisplayPos%b.LineWidth == b.LineWidth-1 && hasSpace {
|
||||
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt())))
|
||||
fmt.Print(CursorDown + CursorBOL + CursorRightN(len(b.Prompt.prompt())))
|
||||
b.DisplayPos += 1
|
||||
} else {
|
||||
fmt.Print(cursorRightN(rLength))
|
||||
fmt.Print(CursorRightN(rLength))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -154,7 +154,7 @@ func (b *Buffer) MoveToStart() {
|
||||
fmt.Print(CursorUp)
|
||||
}
|
||||
}
|
||||
fmt.Printf(CursorBOL + cursorRightN(len(b.Prompt.prompt())))
|
||||
fmt.Print(CursorBOL + CursorRightN(len(b.Prompt.prompt())))
|
||||
b.Pos = 0
|
||||
b.DisplayPos = 0
|
||||
}
|
||||
@@ -169,9 +169,9 @@ func (b *Buffer) MoveToEnd() {
|
||||
fmt.Print(CursorDown)
|
||||
}
|
||||
remainder := b.DisplaySize() % b.LineWidth
|
||||
fmt.Printf(CursorBOL + cursorRightN(len(b.Prompt.prompt())+remainder))
|
||||
fmt.Print(CursorBOL + CursorRightN(len(b.Prompt.prompt())+remainder))
|
||||
} else {
|
||||
fmt.Print(cursorRightN(b.DisplaySize() - b.DisplayPos))
|
||||
fmt.Print(CursorRightN(b.DisplaySize() - b.DisplayPos))
|
||||
}
|
||||
|
||||
b.Pos = b.Buf.Size()
|
||||
@@ -286,8 +286,7 @@ func (b *Buffer) drawRemaining() {
|
||||
remLength := runewidth.StringWidth(remainingText)
|
||||
|
||||
if len(currLine) > 0 {
|
||||
fmt.Printf(ClearToEOL + currLine)
|
||||
fmt.Print(cursorLeftN(currLineSpace))
|
||||
fmt.Print(ClearToEOL + currLine + CursorLeftN(currLineSpace))
|
||||
} else {
|
||||
fmt.Print(ClearToEOL)
|
||||
}
|
||||
@@ -301,9 +300,9 @@ func (b *Buffer) drawRemaining() {
|
||||
}
|
||||
|
||||
if (b.DisplayPos+currLineSpace)%b.LineWidth == 0 && currLine == remainingText {
|
||||
fmt.Print(cursorRightN(currLineSpace))
|
||||
fmt.Print(CursorRightN(currLineSpace))
|
||||
fmt.Printf("\n%s", b.Prompt.AltPrompt)
|
||||
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width-currLineSpace))
|
||||
fmt.Print(CursorUp + CursorBOL + CursorRightN(b.Width-currLineSpace))
|
||||
}
|
||||
|
||||
// render the other lines
|
||||
@@ -333,9 +332,7 @@ func (b *Buffer) drawRemaining() {
|
||||
lineLength += runewidth.RuneWidth(c)
|
||||
fmt.Printf("%c", c)
|
||||
}
|
||||
fmt.Print(ClearToEOL)
|
||||
fmt.Print(cursorUpN(totalLines))
|
||||
fmt.Printf(CursorBOL + cursorRightN(b.Width-currLineSpace))
|
||||
fmt.Print(ClearToEOL + CursorUpN(totalLines) + CursorBOL + CursorRightN(b.Width-currLineSpace))
|
||||
|
||||
hasSpace := b.GetLineSpacing(b.DisplayPos / b.LineWidth)
|
||||
|
||||
@@ -357,8 +354,7 @@ func (b *Buffer) Remove() {
|
||||
if b.DisplayPos%b.LineWidth == 0 {
|
||||
// if the user backspaces over the word boundary, do this magic to clear the line
|
||||
// and move to the end of the previous line
|
||||
fmt.Printf(CursorBOL + ClearToEOL)
|
||||
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width))
|
||||
fmt.Print(CursorBOL + ClearToEOL + CursorUp + CursorBOL + CursorRightN(b.Width))
|
||||
|
||||
if b.DisplaySize()%b.LineWidth < (b.DisplaySize()-rLength)%b.LineWidth {
|
||||
b.LineHasSpace.Remove(b.DisplayPos/b.LineWidth - 1)
|
||||
@@ -370,24 +366,23 @@ func (b *Buffer) Remove() {
|
||||
}
|
||||
|
||||
if rLength == 2 {
|
||||
fmt.Print(CursorLeft + " " + cursorLeftN(2))
|
||||
fmt.Print(CursorLeft + " " + CursorLeftN(2))
|
||||
} else {
|
||||
fmt.Print(" " + CursorLeft)
|
||||
}
|
||||
} else if (b.DisplayPos-rLength)%b.LineWidth == 0 && hasSpace {
|
||||
fmt.Printf(CursorBOL + ClearToEOL)
|
||||
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width))
|
||||
fmt.Print(CursorBOL + ClearToEOL + CursorUp + CursorBOL + CursorRightN(b.Width))
|
||||
|
||||
if b.Pos == b.Buf.Size() {
|
||||
b.LineHasSpace.Remove(b.DisplayPos/b.LineWidth - 1)
|
||||
}
|
||||
b.DisplayPos -= 1
|
||||
} else {
|
||||
fmt.Print(cursorLeftN(rLength))
|
||||
fmt.Print(CursorLeftN(rLength))
|
||||
for range rLength {
|
||||
fmt.Print(" ")
|
||||
}
|
||||
fmt.Print(cursorLeftN(rLength))
|
||||
fmt.Print(CursorLeftN(rLength))
|
||||
}
|
||||
|
||||
var eraseExtraLine bool
|
||||
@@ -405,9 +400,9 @@ func (b *Buffer) Remove() {
|
||||
// are trailing characters which go over the line width boundary
|
||||
if eraseExtraLine {
|
||||
remainingLines := (b.DisplaySize() - b.DisplayPos) / b.LineWidth
|
||||
fmt.Printf(cursorDownN(remainingLines+1) + CursorBOL + ClearToEOL)
|
||||
fmt.Print(CursorDownN(remainingLines+1) + CursorBOL + ClearToEOL)
|
||||
place := b.DisplayPos % b.LineWidth
|
||||
fmt.Printf(cursorUpN(remainingLines+1) + cursorRightN(place+len(b.Prompt.prompt())))
|
||||
fmt.Print(CursorUpN(remainingLines+1) + CursorRightN(place+len(b.Prompt.prompt())))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -422,9 +417,9 @@ func (b *Buffer) Delete() {
|
||||
if b.DisplaySize()%b.LineWidth == 0 {
|
||||
if b.DisplayPos != b.DisplaySize() {
|
||||
remainingLines := (b.DisplaySize() - b.DisplayPos) / b.LineWidth
|
||||
fmt.Printf(cursorDownN(remainingLines) + CursorBOL + ClearToEOL)
|
||||
fmt.Print(CursorDownN(remainingLines) + CursorBOL + ClearToEOL)
|
||||
place := b.DisplayPos % b.LineWidth
|
||||
fmt.Printf(cursorUpN(remainingLines) + cursorRightN(place+len(b.Prompt.prompt())))
|
||||
fmt.Print(CursorUpN(remainingLines) + CursorRightN(place+len(b.Prompt.prompt())))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -471,17 +466,17 @@ func (b *Buffer) DeleteWord() {
|
||||
}
|
||||
|
||||
func (b *Buffer) ClearScreen() {
|
||||
fmt.Printf(ClearScreen + CursorReset + b.Prompt.prompt())
|
||||
fmt.Print(ClearScreen + CursorReset + b.Prompt.prompt())
|
||||
if b.IsEmpty() {
|
||||
ph := b.Prompt.placeholder()
|
||||
fmt.Printf(ColorGrey + ph + cursorLeftN(len(ph)) + ColorDefault)
|
||||
fmt.Print(ColorGrey + ph + CursorLeftN(len(ph)) + ColorDefault)
|
||||
} else {
|
||||
currPos := b.DisplayPos
|
||||
currIndex := b.Pos
|
||||
b.Pos = 0
|
||||
b.DisplayPos = 0
|
||||
b.drawRemaining()
|
||||
fmt.Printf(CursorReset + cursorRightN(len(b.Prompt.prompt())))
|
||||
fmt.Print(CursorReset + CursorRightN(len(b.Prompt.prompt())))
|
||||
if currPos > 0 {
|
||||
targetLine := currPos / b.LineWidth
|
||||
if targetLine > 0 {
|
||||
@@ -491,10 +486,10 @@ func (b *Buffer) ClearScreen() {
|
||||
}
|
||||
remainder := currPos % b.LineWidth
|
||||
if remainder > 0 {
|
||||
fmt.Print(cursorRightN(remainder))
|
||||
fmt.Print(CursorRightN(remainder))
|
||||
}
|
||||
if currPos%b.LineWidth == 0 {
|
||||
fmt.Printf(CursorBOL + b.Prompt.AltPrompt)
|
||||
fmt.Print(CursorBOL + b.Prompt.AltPrompt)
|
||||
}
|
||||
}
|
||||
b.Pos = currIndex
|
||||
@@ -513,13 +508,13 @@ func (b *Buffer) Replace(r []rune) {
|
||||
|
||||
b.Buf.Clear()
|
||||
|
||||
fmt.Printf(CursorBOL + ClearToEOL)
|
||||
fmt.Print(CursorBOL + ClearToEOL)
|
||||
|
||||
for range lineNums {
|
||||
fmt.Print(CursorUp + CursorBOL + ClearToEOL)
|
||||
}
|
||||
|
||||
fmt.Printf(CursorBOL + b.Prompt.prompt())
|
||||
fmt.Print(CursorBOL + b.Prompt.prompt())
|
||||
|
||||
for _, c := range r {
|
||||
b.Add(c)
|
||||
@@ -545,19 +540,3 @@ func (b *Buffer) StringNM(n, m int) string {
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func cursorLeftN(n int) string {
|
||||
return fmt.Sprintf(CursorLeftN, n)
|
||||
}
|
||||
|
||||
func cursorRightN(n int) string {
|
||||
return fmt.Sprintf(CursorRightN, n)
|
||||
}
|
||||
|
||||
func cursorUpN(n int) string {
|
||||
return fmt.Sprintf(CursorUpN, n)
|
||||
}
|
||||
|
||||
func cursorDownN(n int) string {
|
||||
return fmt.Sprintf(CursorDownN, n)
|
||||
}
|
||||
|
||||
@@ -4,9 +4,7 @@ import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrInterrupt = errors.New("Interrupt")
|
||||
)
|
||||
var ErrInterrupt = errors.New("Interrupt")
|
||||
|
||||
type InterruptError struct {
|
||||
Line []rune
|
||||
|
||||
@@ -98,7 +98,7 @@ func (i *Instance) Readline() (string, error) {
|
||||
showPlaceholder := !i.Pasting || i.Prompt.UseAlt
|
||||
if buf.IsEmpty() && showPlaceholder {
|
||||
ph := i.Prompt.placeholder()
|
||||
fmt.Printf(ColorGrey + ph + fmt.Sprintf(CursorLeftN, len(ph)) + ColorDefault)
|
||||
fmt.Print(ColorGrey + ph + CursorLeftN(len(ph)) + ColorDefault)
|
||||
}
|
||||
|
||||
r, err := i.Terminal.Read()
|
||||
|
||||
@@ -7,8 +7,10 @@ import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const tcgets = 0x5401
|
||||
const tcsets = 0x5402
|
||||
const (
|
||||
tcgets = 0x5401
|
||||
tcsets = 0x5402
|
||||
)
|
||||
|
||||
func getTermios(fd uintptr) (*Termios, error) {
|
||||
termios := new(Termios)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package readline
|
||||
|
||||
import "strconv"
|
||||
|
||||
const (
|
||||
CharNull = 0
|
||||
CharLineStart = 1
|
||||
@@ -41,34 +43,49 @@ const (
|
||||
)
|
||||
|
||||
const (
|
||||
CursorUp = "\033[1A"
|
||||
CursorDown = "\033[1B"
|
||||
CursorRight = "\033[1C"
|
||||
CursorLeft = "\033[1D"
|
||||
Esc = "\x1b"
|
||||
|
||||
CursorSave = "\033[s"
|
||||
CursorRestore = "\033[u"
|
||||
CursorSave = Esc + "[s"
|
||||
CursorRestore = Esc + "[u"
|
||||
|
||||
CursorUpN = "\033[%dA"
|
||||
CursorDownN = "\033[%dB"
|
||||
CursorRightN = "\033[%dC"
|
||||
CursorLeftN = "\033[%dD"
|
||||
CursorEOL = Esc + "[E"
|
||||
CursorBOL = Esc + "[1G"
|
||||
CursorHide = Esc + "[?25l"
|
||||
CursorShow = Esc + "[?25h"
|
||||
|
||||
CursorEOL = "\033[E"
|
||||
CursorBOL = "\033[1G"
|
||||
CursorHide = "\033[?25l"
|
||||
CursorShow = "\033[?25h"
|
||||
ClearToEOL = Esc + "[K"
|
||||
ClearLine = Esc + "[2K"
|
||||
ClearScreen = Esc + "[2J"
|
||||
CursorReset = Esc + "[0;0f"
|
||||
|
||||
ClearToEOL = "\033[K"
|
||||
ClearLine = "\033[2K"
|
||||
ClearScreen = "\033[2J"
|
||||
CursorReset = "\033[0;0f"
|
||||
ColorGrey = Esc + "[38;5;245m"
|
||||
ColorDefault = Esc + "[0m"
|
||||
|
||||
ColorGrey = "\033[38;5;245m"
|
||||
ColorDefault = "\033[0m"
|
||||
StartBracketedPaste = Esc + "[?2004h"
|
||||
EndBracketedPaste = Esc + "[?2004l"
|
||||
)
|
||||
|
||||
StartBracketedPaste = "\033[?2004h"
|
||||
EndBracketedPaste = "\033[?2004l"
|
||||
func CursorUpN(n int) string {
|
||||
return Esc + "[" + strconv.Itoa(n) + "A"
|
||||
}
|
||||
|
||||
func CursorDownN(n int) string {
|
||||
return Esc + "[" + strconv.Itoa(n) + "B"
|
||||
}
|
||||
|
||||
func CursorRightN(n int) string {
|
||||
return Esc + "[" + strconv.Itoa(n) + "C"
|
||||
}
|
||||
|
||||
func CursorLeftN(n int) string {
|
||||
return Esc + "[" + strconv.Itoa(n) + "D"
|
||||
}
|
||||
|
||||
var (
|
||||
CursorUp = CursorUpN(1)
|
||||
CursorDown = CursorDownN(1)
|
||||
CursorRight = CursorRightN(1)
|
||||
CursorLeft = CursorLeftN(1)
|
||||
)
|
||||
|
||||
const (
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user