Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
832b4db9d4 | ||
|
|
c43873f33b | ||
|
|
d790bf9916 | ||
|
|
35afac099a | ||
|
|
811c3d1900 | ||
|
|
3553d10769 | ||
|
|
6fe178134d |
18
api/types.go
18
api/types.go
@@ -3,7 +3,6 @@ package api
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
|
||||||
"math"
|
"math"
|
||||||
"os"
|
"os"
|
||||||
"reflect"
|
"reflect"
|
||||||
@@ -238,44 +237,39 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
|
|||||||
// when JSON unmarshals numbers, it uses float64, not int
|
// when JSON unmarshals numbers, it uses float64, not int
|
||||||
field.SetInt(int64(t))
|
field.SetInt(int64(t))
|
||||||
default:
|
default:
|
||||||
log.Printf("could not convert model parameter %v of type %T to int, skipped", key, val)
|
return fmt.Errorf("option %q must be of type integer", key)
|
||||||
}
|
}
|
||||||
case reflect.Bool:
|
case reflect.Bool:
|
||||||
val, ok := val.(bool)
|
val, ok := val.(bool)
|
||||||
if !ok {
|
if !ok {
|
||||||
log.Printf("could not convert model parameter %v of type %T to bool, skipped", key, val)
|
return fmt.Errorf("option %q must be of type boolean", key)
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
field.SetBool(val)
|
field.SetBool(val)
|
||||||
case reflect.Float32:
|
case reflect.Float32:
|
||||||
// JSON unmarshals to float64
|
// JSON unmarshals to float64
|
||||||
val, ok := val.(float64)
|
val, ok := val.(float64)
|
||||||
if !ok {
|
if !ok {
|
||||||
log.Printf("could not convert model parameter %v of type %T to float32, skipped", key, val)
|
return fmt.Errorf("option %q must be of type float32", key)
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
field.SetFloat(val)
|
field.SetFloat(val)
|
||||||
case reflect.String:
|
case reflect.String:
|
||||||
val, ok := val.(string)
|
val, ok := val.(string)
|
||||||
if !ok {
|
if !ok {
|
||||||
log.Printf("could not convert model parameter %v of type %T to string, skipped", key, val)
|
return fmt.Errorf("option %q must be of type string", key)
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
field.SetString(val)
|
field.SetString(val)
|
||||||
case reflect.Slice:
|
case reflect.Slice:
|
||||||
// JSON unmarshals to []interface{}, not []string
|
// JSON unmarshals to []interface{}, not []string
|
||||||
val, ok := val.([]interface{})
|
val, ok := val.([]interface{})
|
||||||
if !ok {
|
if !ok {
|
||||||
log.Printf("could not convert model parameter %v of type %T to slice, skipped", key, val)
|
return fmt.Errorf("option %q must be of type array", key)
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
// convert []interface{} to []string
|
// convert []interface{} to []string
|
||||||
slice := make([]string, len(val))
|
slice := make([]string, len(val))
|
||||||
for i, item := range val {
|
for i, item := range val {
|
||||||
str, ok := item.(string)
|
str, ok := item.(string)
|
||||||
if !ok {
|
if !ok {
|
||||||
log.Printf("could not convert model parameter %v of type %T to slice of strings, skipped", key, item)
|
return fmt.Errorf("option %q must be of an array of strings", key)
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
slice[i] = str
|
slice[i] = str
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -162,13 +162,56 @@ app.on('before-quit', () => {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
const updateURL = `https://ollama.ai/api/update?os=${process.platform}&arch=${
|
||||||
|
process.arch
|
||||||
|
}&version=${app.getVersion()}&id=${id()}`
|
||||||
|
|
||||||
|
let latest = ''
|
||||||
|
async function isNewReleaseAvailable() {
|
||||||
|
try {
|
||||||
|
const response = await fetch(updateURL)
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if (response.status === 204) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json()
|
||||||
|
|
||||||
|
const url = data?.url
|
||||||
|
if (!url) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if (latest === url) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
latest = url
|
||||||
|
|
||||||
|
return true
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`update check failed - ${error}`)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function checkUpdate() {
|
||||||
|
const available = await isNewReleaseAvailable()
|
||||||
|
if (available) {
|
||||||
|
logger.info('checking for update')
|
||||||
|
autoUpdater.checkForUpdates()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function init() {
|
function init() {
|
||||||
if (app.isPackaged) {
|
if (app.isPackaged) {
|
||||||
autoUpdater.checkForUpdates()
|
checkUpdate()
|
||||||
setInterval(() => {
|
setInterval(() => {
|
||||||
if (!updateAvailable) {
|
checkUpdate()
|
||||||
autoUpdater.checkForUpdates()
|
|
||||||
}
|
|
||||||
}, 60 * 60 * 1000)
|
}, 60 * 60 * 1000)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -246,11 +289,7 @@ function id(): string {
|
|||||||
return uuid
|
return uuid
|
||||||
}
|
}
|
||||||
|
|
||||||
autoUpdater.setFeedURL({
|
autoUpdater.setFeedURL({ url: updateURL })
|
||||||
url: `https://ollama.ai/api/update?os=${process.platform}&arch=${
|
|
||||||
process.arch
|
|
||||||
}&version=${app.getVersion()}&id=${id()}`,
|
|
||||||
})
|
|
||||||
|
|
||||||
autoUpdater.on('error', e => {
|
autoUpdater.on('error', e => {
|
||||||
logger.error(`update check failed - ${e.message}`)
|
logger.error(`update check failed - ${e.message}`)
|
||||||
|
|||||||
55
llm/llama.go
55
llm/llama.go
@@ -30,42 +30,43 @@ import (
|
|||||||
var llamaCppEmbed embed.FS
|
var llamaCppEmbed embed.FS
|
||||||
|
|
||||||
type ModelRunner struct {
|
type ModelRunner struct {
|
||||||
Path string // path to the model runner executable
|
Path string // path to the model runner executable
|
||||||
|
Accelerated bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func chooseRunners(workDir, runnerType string) []ModelRunner {
|
func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||||
buildPath := path.Join("llama.cpp", runnerType, "build")
|
buildPath := path.Join("llama.cpp", runnerType, "build")
|
||||||
var runners []string
|
var runners []ModelRunner
|
||||||
|
|
||||||
// set the runners based on the OS
|
// set the runners based on the OS
|
||||||
// IMPORTANT: the order of the runners in the array is the priority order
|
// IMPORTANT: the order of the runners in the array is the priority order
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
case "darwin":
|
case "darwin":
|
||||||
runners = []string{
|
runners = []ModelRunner{
|
||||||
path.Join(buildPath, "metal", "bin", "ollama-runner"),
|
{Path: path.Join(buildPath, "metal", "bin", "ollama-runner")},
|
||||||
path.Join(buildPath, "cpu", "bin", "ollama-runner"),
|
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
||||||
}
|
}
|
||||||
case "linux":
|
case "linux":
|
||||||
runners = []string{
|
runners = []ModelRunner{
|
||||||
path.Join(buildPath, "cuda", "bin", "ollama-runner"),
|
{Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true},
|
||||||
path.Join(buildPath, "cpu", "bin", "ollama-runner"),
|
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
||||||
}
|
}
|
||||||
case "windows":
|
case "windows":
|
||||||
// TODO: select windows GPU runner here when available
|
// TODO: select windows GPU runner here when available
|
||||||
runners = []string{
|
runners = []ModelRunner{
|
||||||
path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe"),
|
{Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
|
log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
|
||||||
runners = []string{
|
runners = []ModelRunner{
|
||||||
path.Join(buildPath, "cpu", "bin", "ollama-runner"),
|
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
runnerAvailable := false // if no runner files are found in the embed, this flag will cause a fast fail
|
runnerAvailable := false // if no runner files are found in the embed, this flag will cause a fast fail
|
||||||
for _, r := range runners {
|
for _, r := range runners {
|
||||||
// find all the files in the runner's bin directory
|
// find all the files in the runner's bin directory
|
||||||
files, err := fs.Glob(llamaCppEmbed, path.Join(path.Dir(r), "*"))
|
files, err := fs.Glob(llamaCppEmbed, path.Join(path.Dir(r.Path), "*"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// this is expected, ollama may be compiled without all runners packed in
|
// this is expected, ollama may be compiled without all runners packed in
|
||||||
log.Printf("%s runner not found: %v", r, err)
|
log.Printf("%s runner not found: %v", r, err)
|
||||||
@@ -115,7 +116,10 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
|
|||||||
localRunnersByPriority := []ModelRunner{}
|
localRunnersByPriority := []ModelRunner{}
|
||||||
for _, r := range runners {
|
for _, r := range runners {
|
||||||
// clean the ModelRunner paths so that they match the OS we are running on
|
// clean the ModelRunner paths so that they match the OS we are running on
|
||||||
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{Path: filepath.Clean(path.Join(workDir, r))})
|
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{
|
||||||
|
Path: filepath.Clean(path.Join(workDir, r.Path)),
|
||||||
|
Accelerated: r.Accelerated,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
return localRunnersByPriority
|
return localRunnersByPriority
|
||||||
@@ -215,6 +219,11 @@ func CheckVRAM() (int64, error) {
|
|||||||
free += vram
|
free += vram
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if free*1024*1024 < 2*1000*1000*1000 {
|
||||||
|
log.Printf("less than 2 GB VRAM available, falling back to CPU only")
|
||||||
|
free = 0
|
||||||
|
}
|
||||||
|
|
||||||
return free, nil
|
return free, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -238,8 +247,8 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
|
|||||||
// TODO: this is a rough heuristic, better would be to calculate this based on number of layers and context size
|
// TODO: this is a rough heuristic, better would be to calculate this based on number of layers and context size
|
||||||
bytesPerLayer := fileSizeBytes / numLayer
|
bytesPerLayer := fileSizeBytes / numLayer
|
||||||
|
|
||||||
// max number of layers we can fit in VRAM, subtract 5% to prevent consuming all available VRAM and running out of memory
|
// max number of layers we can fit in VRAM, subtract 8% to prevent consuming all available VRAM and running out of memory
|
||||||
layers := int(freeVramBytes/bytesPerLayer) * 95 / 100
|
layers := int(freeVramBytes/bytesPerLayer) * 92 / 100
|
||||||
log.Printf("%d MiB VRAM available, loading up to %d GPU layers", vramMib, layers)
|
log.Printf("%d MiB VRAM available, loading up to %d GPU layers", vramMib, layers)
|
||||||
|
|
||||||
return layers
|
return layers
|
||||||
@@ -261,8 +270,7 @@ func NewStatusWriter() *StatusWriter {
|
|||||||
|
|
||||||
func (w *StatusWriter) Write(b []byte) (int, error) {
|
func (w *StatusWriter) Write(b []byte) (int, error) {
|
||||||
if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
|
if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
|
||||||
err := fmt.Errorf("llama runner: %s", after)
|
w.ErrCh <- fmt.Errorf("llama runner: %s", bytes.TrimSpace(after))
|
||||||
w.ErrCh <- err
|
|
||||||
}
|
}
|
||||||
return os.Stderr.Write(b)
|
return os.Stderr.Write(b)
|
||||||
}
|
}
|
||||||
@@ -277,16 +285,20 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
|
|||||||
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
|
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
numGPU := NumGPU(numLayers, fileInfo.Size(), opts)
|
||||||
params := []string{
|
params := []string{
|
||||||
"--model", model,
|
"--model", model,
|
||||||
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
|
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
|
||||||
"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
|
"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
|
||||||
"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
|
"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
|
||||||
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
||||||
"--n-gpu-layers", fmt.Sprintf("%d", NumGPU(numLayers, fileInfo.Size(), opts)),
|
|
||||||
"--embedding",
|
"--embedding",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if numGPU > 0 {
|
||||||
|
params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", numGPU))
|
||||||
|
}
|
||||||
|
|
||||||
if opts.NumGQA > 0 {
|
if opts.NumGQA > 0 {
|
||||||
params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
|
params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
|
||||||
}
|
}
|
||||||
@@ -317,6 +329,11 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
|
|||||||
|
|
||||||
// start the llama.cpp server with a retry in case the port is already in use
|
// start the llama.cpp server with a retry in case the port is already in use
|
||||||
for _, runner := range runners {
|
for _, runner := range runners {
|
||||||
|
if runner.Accelerated && numGPU == 0 {
|
||||||
|
log.Printf("skipping accelerated runner because num_gpu=0")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
if _, err := os.Stat(runner.Path); err != nil {
|
if _, err := os.Stat(runner.Path); err != nil {
|
||||||
log.Printf("llama runner not found: %v", err)
|
log.Printf("llama runner not found: %v", err)
|
||||||
continue
|
continue
|
||||||
|
|||||||
Reference in New Issue
Block a user