Compare commits
1 Commits
v0.1.45
...
royh-param
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c494aea5c8 |
26
cmd/cmd.go
26
cmd/cmd.go
@@ -636,7 +636,7 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
|
|||||||
case "modelfile":
|
case "modelfile":
|
||||||
fmt.Println(resp.Modelfile)
|
fmt.Println(resp.Modelfile)
|
||||||
case "parameters":
|
case "parameters":
|
||||||
fmt.Println(resp.Parameters)
|
fmt.Println(formatParams(resp.Parameters, false))
|
||||||
case "system":
|
case "system":
|
||||||
fmt.Println(resp.System)
|
fmt.Println(resp.System)
|
||||||
case "template":
|
case "template":
|
||||||
@@ -664,7 +664,7 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
|
|||||||
|
|
||||||
mainTableData := [][]string{
|
mainTableData := [][]string{
|
||||||
{"Model"},
|
{"Model"},
|
||||||
{renderSubTable(modelData, false)},
|
{renderSubTable(modelData, false, true)},
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.ProjectorInfo != nil {
|
if resp.ProjectorInfo != nil {
|
||||||
@@ -678,20 +678,20 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
|
|||||||
|
|
||||||
mainTableData = append(mainTableData,
|
mainTableData = append(mainTableData,
|
||||||
[]string{"Projector"},
|
[]string{"Projector"},
|
||||||
[]string{renderSubTable(projectorData, false)},
|
[]string{renderSubTable(projectorData, false, true)},
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.Parameters != "" {
|
if resp.Parameters != "" {
|
||||||
mainTableData = append(mainTableData, []string{"Parameters"}, []string{formatParams(resp.Parameters)})
|
mainTableData = append(mainTableData, []string{"Parameters"}, []string{formatParams(resp.Parameters, true)})
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.System != "" {
|
if resp.System != "" {
|
||||||
mainTableData = append(mainTableData, []string{"System"}, []string{renderSubTable(twoLines(resp.System), true)})
|
mainTableData = append(mainTableData, []string{"System"}, []string{renderSubTable(twoLines(resp.System), true, true)})
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.License != "" {
|
if resp.License != "" {
|
||||||
mainTableData = append(mainTableData, []string{"License"}, []string{renderSubTable(twoLines(resp.License), true)})
|
mainTableData = append(mainTableData, []string{"License"}, []string{renderSubTable(twoLines(resp.License), true, true)})
|
||||||
}
|
}
|
||||||
|
|
||||||
table := tablewriter.NewWriter(os.Stdout)
|
table := tablewriter.NewWriter(os.Stdout)
|
||||||
@@ -708,7 +708,7 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func renderSubTable(data [][]string, file bool) string {
|
func renderSubTable(data [][]string, file bool, tab bool) string {
|
||||||
var buf bytes.Buffer
|
var buf bytes.Buffer
|
||||||
table := tablewriter.NewWriter(&buf)
|
table := tablewriter.NewWriter(&buf)
|
||||||
table.SetAutoWrapText(!file)
|
table.SetAutoWrapText(!file)
|
||||||
@@ -723,6 +723,10 @@ func renderSubTable(data [][]string, file bool) string {
|
|||||||
|
|
||||||
table.Render()
|
table.Render()
|
||||||
|
|
||||||
|
if !tab {
|
||||||
|
return buf.String()
|
||||||
|
}
|
||||||
|
|
||||||
renderedTable := buf.String()
|
renderedTable := buf.String()
|
||||||
lines := strings.Split(renderedTable, "\n")
|
lines := strings.Split(renderedTable, "\n")
|
||||||
for i, line := range lines {
|
for i, line := range lines {
|
||||||
@@ -750,14 +754,16 @@ func twoLines(s string) [][]string {
|
|||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
func formatParams(s string) string {
|
func formatParams(s string, tab bool) string {
|
||||||
lines := strings.Split(s, "\n")
|
lines := strings.Split(s, "\n")
|
||||||
table := [][]string{}
|
table := [][]string{}
|
||||||
|
|
||||||
for _, line := range lines {
|
for _, line := range lines {
|
||||||
table = append(table, strings.Fields(line))
|
fields := strings.Fields(line)
|
||||||
|
fields[1] = strings.TrimPrefix(strings.TrimSuffix(fields[1], `"`), `"`)
|
||||||
|
table = append(table, fields)
|
||||||
}
|
}
|
||||||
return renderSubTable(table, false)
|
return renderSubTable(table, false, tab)
|
||||||
}
|
}
|
||||||
|
|
||||||
func CopyHandler(cmd *cobra.Command, args []string) error {
|
func CopyHandler(cmd *cobra.Command, args []string) error {
|
||||||
|
|||||||
@@ -77,27 +77,20 @@ func cleanupTmpDirs() {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
raw, err := os.ReadFile(filepath.Join(d, "ollama.pid"))
|
raw, err := os.ReadFile(filepath.Join(d, "ollama.pid"))
|
||||||
|
if err == nil {
|
||||||
|
pid, err := strconv.Atoi(string(raw))
|
||||||
|
if err == nil {
|
||||||
|
if proc, err := os.FindProcess(pid); err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
|
||||||
|
// Another running ollama, ignore this tmpdir
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
slog.Debug("failed to open ollama.pid", "path", d, "error", err)
|
||||||
|
}
|
||||||
|
err = os.RemoveAll(d)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("failed to read ollama.pid", "path", d, "error", err)
|
slog.Debug("unable to cleanup stale tmpdir", "path", d, "error", err)
|
||||||
// No pid, ignore this tmpdir
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
pid, err := strconv.Atoi(string(raw))
|
|
||||||
if err != nil {
|
|
||||||
slog.Warn("failed to parse pid", "path", d, "error", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
proc, err := os.FindProcess(pid)
|
|
||||||
if err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
|
|
||||||
slog.Warn("found running ollama", "pid", pid, "path", d)
|
|
||||||
// Another running ollama, ignore this tmpdir
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := os.Remove(d); err != nil {
|
|
||||||
slog.Warn("unable to cleanup stale tmpdir", "path", d, "error", err)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
40
llm/ggml.go
40
llm/ggml.go
@@ -69,30 +69,6 @@ func (kv KV) HeadCountKV() uint64 {
|
|||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kv KV) EmbeddingHeadCount() uint64 {
|
|
||||||
if heads := kv.HeadCount(); heads > 0 {
|
|
||||||
return kv.EmbeddingLength() / kv.HeadCount()
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
func (kv KV) EmbeddingHeadCountK() uint64 {
|
|
||||||
if k := kv.u64(fmt.Sprintf("%s.attention.key_length", kv.Architecture())); k > 0 {
|
|
||||||
return k
|
|
||||||
}
|
|
||||||
|
|
||||||
return kv.EmbeddingHeadCount()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (kv KV) EmbeddingHeadCountV() uint64 {
|
|
||||||
if v := kv.u64(fmt.Sprintf("%s.attention.value_length", kv.Architecture())); v > 0 {
|
|
||||||
return v
|
|
||||||
}
|
|
||||||
|
|
||||||
return kv.EmbeddingHeadCount()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (kv KV) GQA() uint64 {
|
func (kv KV) GQA() uint64 {
|
||||||
return kv.HeadCount() / kv.HeadCountKV()
|
return kv.HeadCount() / kv.HeadCountKV()
|
||||||
}
|
}
|
||||||
@@ -323,9 +299,6 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
|||||||
headsKV := llm.KV().HeadCountKV()
|
headsKV := llm.KV().HeadCountKV()
|
||||||
vocab := uint64(len(llm.KV()["tokenizer.ggml.tokens"].([]any)))
|
vocab := uint64(len(llm.KV()["tokenizer.ggml.tokens"].([]any)))
|
||||||
|
|
||||||
embeddingHeads := llm.KV().EmbeddingHeadCount()
|
|
||||||
embeddingHeadsK := llm.KV().EmbeddingHeadCountK()
|
|
||||||
|
|
||||||
layers := llm.Tensors().Layers()
|
layers := llm.Tensors().Layers()
|
||||||
|
|
||||||
switch llm.KV().Architecture() {
|
switch llm.KV().Architecture() {
|
||||||
@@ -335,7 +308,7 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
|||||||
partialOffload = 4 * batch * embedding
|
partialOffload = 4 * batch * embedding
|
||||||
partialOffload += max(
|
partialOffload += max(
|
||||||
// 4*batch*(4+6*embedding+context*(2*heads)+llm.KV().GQA()),
|
// 4*batch*(4+6*embedding+context*(2*heads)+llm.KV().GQA()),
|
||||||
4*batch*(1+embedding+max(context, embedding))+embedding*embedding*9/16+4*context*(batch*heads+embeddingHeads*headsKV),
|
4*batch*(1+embedding+max(context, embedding))+embedding*embedding*9/16+4*context*(batch*heads+embedding/heads*headsKV),
|
||||||
4*batch*(embedding+vocab)+embedding*vocab*105/128,
|
4*batch*(embedding+vocab)+embedding*vocab*105/128,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -343,15 +316,15 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
|||||||
// mixtral 8x22b
|
// mixtral 8x22b
|
||||||
ff := uint64(llm.KV()["llama.feed_forward_length"].(uint32))
|
ff := uint64(llm.KV()["llama.feed_forward_length"].(uint32))
|
||||||
partialOffload = max(
|
partialOffload = max(
|
||||||
3*ffnGateExpsWeight.Size()+4*batch*(2*ff+headsKV+embedding+context+embeddingHeads*headsKV),
|
3*ffnGateExpsWeight.Size()+4*batch*(2*ff+headsKV+embedding+context+embedding/heads*headsKV),
|
||||||
4*(context*batch*heads+context*embeddingHeads*headsKV+batch*1024+embeddingHeads*headsKV*batch),
|
4*(context*batch*heads+context*embedding/heads*headsKV+batch*1024+embedding/heads*headsKV*batch),
|
||||||
)
|
)
|
||||||
} else if ffnGateWeight, ok := layers["blk.0"]["ffn_gate.0.weight"]; ok {
|
} else if ffnGateWeight, ok := layers["blk.0"]["ffn_gate.0.weight"]; ok {
|
||||||
// mixtral 8x7b
|
// mixtral 8x7b
|
||||||
ffnGateWeight1 := ffnGateWeight.Shape[1]
|
ffnGateWeight1 := ffnGateWeight.Shape[1]
|
||||||
fullOffload = 4 * batch * (2 + 3*embedding + context*(1+heads) + 2*headsKV + ffnGateWeight1)
|
fullOffload = 4 * batch * (2 + 3*embedding + context*(1+heads) + 2*headsKV + ffnGateWeight1)
|
||||||
partialOffload = max(
|
partialOffload = max(
|
||||||
4*batch*(3+embeddingHeads*headsKV+embedding+context*(1+heads)+ffnGateWeight1)+(embedding*embedding+3*embedding*headsKV*ffnGateWeight1)*9/16,
|
4*batch*(3+embedding/heads*headsKV+embedding+context*(1+heads)+ffnGateWeight1)+(embedding*embedding+3*embedding*headsKV*ffnGateWeight1)*9/16,
|
||||||
4*batch*(1+2*embedding+context*(1+heads))+embedding*(6*context*headsKV/heads+embedding*9/16),
|
4*batch*(1+2*embedding+context*(1+heads))+embedding*(6*context*headsKV/heads+embedding*9/16),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@@ -395,14 +368,15 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
|||||||
fullOffload,
|
fullOffload,
|
||||||
)
|
)
|
||||||
case "deepseek2":
|
case "deepseek2":
|
||||||
|
keys := uint64(llm.KV()["deepseek2.attention.key_length"].(uint32))
|
||||||
fullOffload = max(
|
fullOffload = max(
|
||||||
4*batch*(3*embedding+vocab),
|
4*batch*(3*embedding+vocab),
|
||||||
4*batch*(3*embedding+2+context*(1+headsKV)+2*embeddingHeadsK*headsKV),
|
4*batch*(3*embedding+2+context*(1+headsKV)+2*keys*headsKV),
|
||||||
)
|
)
|
||||||
|
|
||||||
partialOffload = max(
|
partialOffload = max(
|
||||||
4*batch*(3*embedding+vocab)+embedding*vocab*105/128,
|
4*batch*(3*embedding+vocab)+embedding*vocab*105/128,
|
||||||
4*batch*(2*embedding+1+2*embeddingHeadsK*headsKV+context+context*headsKV)+4*embeddingHeadsK*context*headsKV+embedding*embeddingHeadsK*headsKV*9/16,
|
4*batch*(2*embedding+1+2*keys*headsKV+context+context*headsKV)+4*keys*context*headsKV+embedding*keys*headsKV*9/16,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -115,8 +115,8 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
|||||||
slog.Warn("model missing blk.0 layer size")
|
slog.Warn("model missing blk.0 layer size")
|
||||||
}
|
}
|
||||||
|
|
||||||
// fp16 k,v = sizeof(float16) * n_ctx * n_layer * (n_embd_head_k + n_embd_head_v) * n_head_kv
|
// fp16 k,v = (1 (k) + 1 (v)) * sizeof(float16) * n_ctx * n_layer * n_embd / n_head * n_head_kv
|
||||||
var kv uint64 = 2 * uint64(opts.NumCtx) * ggml.KV().BlockCount() * (ggml.KV().EmbeddingHeadCountK() + ggml.KV().EmbeddingHeadCountV()) * ggml.KV().HeadCountKV()
|
var kv uint64 = 2 * 2 * uint64(opts.NumCtx) * ggml.KV().BlockCount() * ggml.KV().EmbeddingLength() / ggml.KV().HeadCount() * ggml.KV().HeadCountKV()
|
||||||
|
|
||||||
// KV is proportional to the number of layers
|
// KV is proportional to the number of layers
|
||||||
layerSize += kv / ggml.KV().BlockCount()
|
layerSize += kv / ggml.KV().BlockCount()
|
||||||
|
|||||||
Reference in New Issue
Block a user