Compare commits
7 Commits
v0.1.49-rc
...
v0.1.49-rc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0ee87615c7 | ||
|
|
f8241bfba3 | ||
|
|
4607c70641 | ||
|
|
c12f1c5b99 | ||
|
|
a08f20d910 | ||
|
|
6cea036027 | ||
|
|
5796bfc401 |
5
.github/workflows/release.yaml
vendored
5
.github/workflows/release.yaml
vendored
@@ -304,6 +304,11 @@ jobs:
|
||||
write-host "Installing plugin"
|
||||
& "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
|
||||
write-host "plugin installed"
|
||||
- name: remove unwanted mingw dll.a files
|
||||
run: |
|
||||
Get-ChildItem -Path "C:\mingw64" -Recurse -Filter "libpthread.dll.a" -File | Remove-Item -Force
|
||||
Get-ChildItem -Path "C:\mingw64" -Recurse -Filter "libwinpthread.dll.a" -File | Remove-Item -Force
|
||||
Get-ChildItem -Path "C:\mingw64" -Recurse -Filter "libstdc++.dll.a" -File | Remove-Item -Force
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
|
||||
@@ -56,7 +56,7 @@ func GetCPUInfo() GpuInfoList {
|
||||
func GetCPUMem() (memInfo, error) {
|
||||
return memInfo{
|
||||
TotalMemory: uint64(C.getPhysicalMemory()),
|
||||
FreeMemory: 0,
|
||||
FreeMemory: uint64(C.getFreeMemory()),
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -2,3 +2,4 @@
|
||||
#include <stdint.h>
|
||||
uint64_t getRecommendedMaxVRAM();
|
||||
uint64_t getPhysicalMemory();
|
||||
uint64_t getFreeMemory();
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// go:build darwin
|
||||
#import <Foundation/Foundation.h>
|
||||
#import <mach/mach.h>
|
||||
#include "gpu_info_darwin.h"
|
||||
|
||||
uint64_t getRecommendedMaxVRAM() {
|
||||
@@ -8,6 +9,27 @@ uint64_t getRecommendedMaxVRAM() {
|
||||
return result;
|
||||
}
|
||||
|
||||
// getPhysicalMemory returns the total physical memory in bytes
|
||||
uint64_t getPhysicalMemory() {
|
||||
return [[NSProcessInfo processInfo] physicalMemory];
|
||||
return [NSProcessInfo processInfo].physicalMemory;
|
||||
}
|
||||
|
||||
// getFreeMemory returns the total free memory in bytes, including inactive
|
||||
// memory that can be reclaimed by the system.
|
||||
uint64_t getFreeMemory() {
|
||||
mach_port_t host_port = mach_host_self();
|
||||
mach_msg_type_number_t host_size = sizeof(vm_statistics64_data_t) / sizeof(integer_t);
|
||||
vm_size_t pagesize;
|
||||
vm_statistics64_data_t vm_stat;
|
||||
|
||||
host_page_size(host_port, &pagesize);
|
||||
if (host_statistics64(host_port, HOST_VM_INFO64, (host_info64_t)&vm_stat, &host_size) != KERN_SUCCESS) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t free_memory = (uint64_t)vm_stat.free_count * pagesize;
|
||||
free_memory += (uint64_t)vm_stat.speculative_count * pagesize;
|
||||
free_memory += (uint64_t)vm_stat.inactive_count * pagesize;
|
||||
|
||||
return free_memory;
|
||||
}
|
||||
|
||||
@@ -77,7 +77,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
|
||||
if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
|
||||
init_vars
|
||||
echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
|
||||
CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
|
||||
CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
|
||||
BUILD_DIR="../build/linux/${ARCH}/cpu"
|
||||
echo "Building custom CPU"
|
||||
build
|
||||
@@ -93,7 +93,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
|
||||
# -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
|
||||
# -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
|
||||
|
||||
COMMON_CPU_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
|
||||
COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
|
||||
if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
|
||||
#
|
||||
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
|
||||
|
||||
@@ -2,7 +2,6 @@ package llm
|
||||
|
||||
// #cgo CFLAGS: -Illama.cpp -Illama.cpp/include -Illama.cpp/ggml/include
|
||||
// #cgo LDFLAGS: -lllama -lggml -lstdc++ -lpthread
|
||||
// #cgo windows LDFLAGS: -static-libstdc++ -static-libgcc -static
|
||||
// #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/build/darwin/arm64_static -L${SRCDIR}/build/darwin/arm64_static/src -L${SRCDIR}/build/darwin/arm64_static/ggml/src -framework Accelerate -framework Metal
|
||||
// #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/darwin/x86_64_static -L${SRCDIR}/build/darwin/x86_64_static/src -L${SRCDIR}/build/darwin/x86_64_static/ggml/src
|
||||
// #cgo windows,amd64 LDFLAGS: -L${SRCDIR}/build/windows/amd64_static -L${SRCDIR}/build/windows/amd64_static/src -L${SRCDIR}/build/windows/amd64_static/ggml/src
|
||||
|
||||
@@ -197,25 +197,36 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||
break
|
||||
}
|
||||
|
||||
// Block attempting to load a model larger than system memory + GPU memory
|
||||
estimate := llm.EstimateGPULayers(gpus, ggml, pending.model.ProjectorPaths, pending.opts)
|
||||
maxSize := systemMem.FreeMemory
|
||||
for _, gpu := range gpus {
|
||||
if gpu.Library == "cpu" {
|
||||
continue
|
||||
}
|
||||
if loadedCount == 0 {
|
||||
// If no other models are loaded, set the limit based on what's available
|
||||
maxSize += gpu.FreeMemory
|
||||
} else {
|
||||
// Other models could be unloaded, favor total memory for limit
|
||||
maxSize += gpu.TotalMemory
|
||||
|
||||
// Add available GPU memory to the total pool
|
||||
// macOS hardware has unified memory so don't double count
|
||||
if runtime.GOOS != "darwin" {
|
||||
for _, gpu := range gpus {
|
||||
if gpu.Library == "cpu" {
|
||||
continue
|
||||
}
|
||||
if loadedCount == 0 {
|
||||
// If no other models are loaded, set the limit based on what's available
|
||||
maxSize += gpu.FreeMemory
|
||||
} else {
|
||||
// Other models could be unloaded, favor total memory for limit
|
||||
maxSize += gpu.TotalMemory
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Block attempting to load a model larger than system memory + GPU memory
|
||||
if estimate.TotalSize > maxSize {
|
||||
slog.Warn("model request too large for system", "requested", format.HumanBytes2(estimate.TotalSize), "system", format.HumanBytes2(maxSize))
|
||||
pending.errCh <- fmt.Errorf("requested model (%s) is too large for this system (%s)", format.HumanBytes2(estimate.TotalSize), format.HumanBytes2(maxSize))
|
||||
break
|
||||
|
||||
// Linux will crash if over-allocating memory - return an error to the user.
|
||||
// TODO (jmorganca): add reasonable upper limits for darwin and windows as well
|
||||
if runtime.GOOS == "linux" {
|
||||
pending.errCh <- fmt.Errorf("requested model (%s) is too large for this system (%s)", format.HumanBytes2(estimate.TotalSize), format.HumanBytes2(maxSize))
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Evaluate if the model will fit in the available system memory, or if we should unload a model first
|
||||
|
||||
Reference in New Issue
Block a user