Compare commits

..

1 Commits

Author SHA1 Message Date
Roy Han
c494aea5c8 Strip stop strings 2024-06-20 09:06:08 -07:00
4 changed files with 38 additions and 65 deletions

View File

@@ -636,7 +636,7 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
case "modelfile": case "modelfile":
fmt.Println(resp.Modelfile) fmt.Println(resp.Modelfile)
case "parameters": case "parameters":
fmt.Println(resp.Parameters) fmt.Println(formatParams(resp.Parameters, false))
case "system": case "system":
fmt.Println(resp.System) fmt.Println(resp.System)
case "template": case "template":
@@ -664,7 +664,7 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
mainTableData := [][]string{ mainTableData := [][]string{
{"Model"}, {"Model"},
{renderSubTable(modelData, false)}, {renderSubTable(modelData, false, true)},
} }
if resp.ProjectorInfo != nil { if resp.ProjectorInfo != nil {
@@ -678,20 +678,20 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
mainTableData = append(mainTableData, mainTableData = append(mainTableData,
[]string{"Projector"}, []string{"Projector"},
[]string{renderSubTable(projectorData, false)}, []string{renderSubTable(projectorData, false, true)},
) )
} }
if resp.Parameters != "" { if resp.Parameters != "" {
mainTableData = append(mainTableData, []string{"Parameters"}, []string{formatParams(resp.Parameters)}) mainTableData = append(mainTableData, []string{"Parameters"}, []string{formatParams(resp.Parameters, true)})
} }
if resp.System != "" { if resp.System != "" {
mainTableData = append(mainTableData, []string{"System"}, []string{renderSubTable(twoLines(resp.System), true)}) mainTableData = append(mainTableData, []string{"System"}, []string{renderSubTable(twoLines(resp.System), true, true)})
} }
if resp.License != "" { if resp.License != "" {
mainTableData = append(mainTableData, []string{"License"}, []string{renderSubTable(twoLines(resp.License), true)}) mainTableData = append(mainTableData, []string{"License"}, []string{renderSubTable(twoLines(resp.License), true, true)})
} }
table := tablewriter.NewWriter(os.Stdout) table := tablewriter.NewWriter(os.Stdout)
@@ -708,7 +708,7 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
return nil return nil
} }
func renderSubTable(data [][]string, file bool) string { func renderSubTable(data [][]string, file bool, tab bool) string {
var buf bytes.Buffer var buf bytes.Buffer
table := tablewriter.NewWriter(&buf) table := tablewriter.NewWriter(&buf)
table.SetAutoWrapText(!file) table.SetAutoWrapText(!file)
@@ -723,6 +723,10 @@ func renderSubTable(data [][]string, file bool) string {
table.Render() table.Render()
if !tab {
return buf.String()
}
renderedTable := buf.String() renderedTable := buf.String()
lines := strings.Split(renderedTable, "\n") lines := strings.Split(renderedTable, "\n")
for i, line := range lines { for i, line := range lines {
@@ -750,14 +754,16 @@ func twoLines(s string) [][]string {
return res return res
} }
func formatParams(s string) string { func formatParams(s string, tab bool) string {
lines := strings.Split(s, "\n") lines := strings.Split(s, "\n")
table := [][]string{} table := [][]string{}
for _, line := range lines { for _, line := range lines {
table = append(table, strings.Fields(line)) fields := strings.Fields(line)
fields[1] = strings.TrimPrefix(strings.TrimSuffix(fields[1], `"`), `"`)
table = append(table, fields)
} }
return renderSubTable(table, false) return renderSubTable(table, false, tab)
} }
func CopyHandler(cmd *cobra.Command, args []string) error { func CopyHandler(cmd *cobra.Command, args []string) error {

View File

@@ -77,27 +77,20 @@ func cleanupTmpDirs() {
continue continue
} }
raw, err := os.ReadFile(filepath.Join(d, "ollama.pid")) raw, err := os.ReadFile(filepath.Join(d, "ollama.pid"))
if err == nil {
pid, err := strconv.Atoi(string(raw))
if err == nil {
if proc, err := os.FindProcess(pid); err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
// Another running ollama, ignore this tmpdir
continue
}
}
} else {
slog.Debug("failed to open ollama.pid", "path", d, "error", err)
}
err = os.RemoveAll(d)
if err != nil { if err != nil {
slog.Warn("failed to read ollama.pid", "path", d, "error", err) slog.Debug("unable to cleanup stale tmpdir", "path", d, "error", err)
// No pid, ignore this tmpdir
continue
}
pid, err := strconv.Atoi(string(raw))
if err != nil {
slog.Warn("failed to parse pid", "path", d, "error", err)
continue
}
proc, err := os.FindProcess(pid)
if err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
slog.Warn("found running ollama", "pid", pid, "path", d)
// Another running ollama, ignore this tmpdir
continue
}
if err := os.Remove(d); err != nil {
slog.Warn("unable to cleanup stale tmpdir", "path", d, "error", err)
} }
} }
} }

View File

@@ -69,30 +69,6 @@ func (kv KV) HeadCountKV() uint64 {
return 1 return 1
} }
func (kv KV) EmbeddingHeadCount() uint64 {
if heads := kv.HeadCount(); heads > 0 {
return kv.EmbeddingLength() / kv.HeadCount()
}
return 0
}
func (kv KV) EmbeddingHeadCountK() uint64 {
if k := kv.u64(fmt.Sprintf("%s.attention.key_length", kv.Architecture())); k > 0 {
return k
}
return kv.EmbeddingHeadCount()
}
func (kv KV) EmbeddingHeadCountV() uint64 {
if v := kv.u64(fmt.Sprintf("%s.attention.value_length", kv.Architecture())); v > 0 {
return v
}
return kv.EmbeddingHeadCount()
}
func (kv KV) GQA() uint64 { func (kv KV) GQA() uint64 {
return kv.HeadCount() / kv.HeadCountKV() return kv.HeadCount() / kv.HeadCountKV()
} }
@@ -323,9 +299,6 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
headsKV := llm.KV().HeadCountKV() headsKV := llm.KV().HeadCountKV()
vocab := uint64(len(llm.KV()["tokenizer.ggml.tokens"].([]any))) vocab := uint64(len(llm.KV()["tokenizer.ggml.tokens"].([]any)))
embeddingHeads := llm.KV().EmbeddingHeadCount()
embeddingHeadsK := llm.KV().EmbeddingHeadCountK()
layers := llm.Tensors().Layers() layers := llm.Tensors().Layers()
switch llm.KV().Architecture() { switch llm.KV().Architecture() {
@@ -335,7 +308,7 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
partialOffload = 4 * batch * embedding partialOffload = 4 * batch * embedding
partialOffload += max( partialOffload += max(
// 4*batch*(4+6*embedding+context*(2*heads)+llm.KV().GQA()), // 4*batch*(4+6*embedding+context*(2*heads)+llm.KV().GQA()),
4*batch*(1+embedding+max(context, embedding))+embedding*embedding*9/16+4*context*(batch*heads+embeddingHeads*headsKV), 4*batch*(1+embedding+max(context, embedding))+embedding*embedding*9/16+4*context*(batch*heads+embedding/heads*headsKV),
4*batch*(embedding+vocab)+embedding*vocab*105/128, 4*batch*(embedding+vocab)+embedding*vocab*105/128,
) )
@@ -343,15 +316,15 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
// mixtral 8x22b // mixtral 8x22b
ff := uint64(llm.KV()["llama.feed_forward_length"].(uint32)) ff := uint64(llm.KV()["llama.feed_forward_length"].(uint32))
partialOffload = max( partialOffload = max(
3*ffnGateExpsWeight.Size()+4*batch*(2*ff+headsKV+embedding+context+embeddingHeads*headsKV), 3*ffnGateExpsWeight.Size()+4*batch*(2*ff+headsKV+embedding+context+embedding/heads*headsKV),
4*(context*batch*heads+context*embeddingHeads*headsKV+batch*1024+embeddingHeads*headsKV*batch), 4*(context*batch*heads+context*embedding/heads*headsKV+batch*1024+embedding/heads*headsKV*batch),
) )
} else if ffnGateWeight, ok := layers["blk.0"]["ffn_gate.0.weight"]; ok { } else if ffnGateWeight, ok := layers["blk.0"]["ffn_gate.0.weight"]; ok {
// mixtral 8x7b // mixtral 8x7b
ffnGateWeight1 := ffnGateWeight.Shape[1] ffnGateWeight1 := ffnGateWeight.Shape[1]
fullOffload = 4 * batch * (2 + 3*embedding + context*(1+heads) + 2*headsKV + ffnGateWeight1) fullOffload = 4 * batch * (2 + 3*embedding + context*(1+heads) + 2*headsKV + ffnGateWeight1)
partialOffload = max( partialOffload = max(
4*batch*(3+embeddingHeads*headsKV+embedding+context*(1+heads)+ffnGateWeight1)+(embedding*embedding+3*embedding*headsKV*ffnGateWeight1)*9/16, 4*batch*(3+embedding/heads*headsKV+embedding+context*(1+heads)+ffnGateWeight1)+(embedding*embedding+3*embedding*headsKV*ffnGateWeight1)*9/16,
4*batch*(1+2*embedding+context*(1+heads))+embedding*(6*context*headsKV/heads+embedding*9/16), 4*batch*(1+2*embedding+context*(1+heads))+embedding*(6*context*headsKV/heads+embedding*9/16),
) )
} }
@@ -395,14 +368,15 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
fullOffload, fullOffload,
) )
case "deepseek2": case "deepseek2":
keys := uint64(llm.KV()["deepseek2.attention.key_length"].(uint32))
fullOffload = max( fullOffload = max(
4*batch*(3*embedding+vocab), 4*batch*(3*embedding+vocab),
4*batch*(3*embedding+2+context*(1+headsKV)+2*embeddingHeadsK*headsKV), 4*batch*(3*embedding+2+context*(1+headsKV)+2*keys*headsKV),
) )
partialOffload = max( partialOffload = max(
4*batch*(3*embedding+vocab)+embedding*vocab*105/128, 4*batch*(3*embedding+vocab)+embedding*vocab*105/128,
4*batch*(2*embedding+1+2*embeddingHeadsK*headsKV+context+context*headsKV)+4*embeddingHeadsK*context*headsKV+embedding*embeddingHeadsK*headsKV*9/16, 4*batch*(2*embedding+1+2*keys*headsKV+context+context*headsKV)+4*keys*context*headsKV+embedding*keys*headsKV*9/16,
) )
} }

View File

@@ -115,8 +115,8 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
slog.Warn("model missing blk.0 layer size") slog.Warn("model missing blk.0 layer size")
} }
// fp16 k,v = sizeof(float16) * n_ctx * n_layer * (n_embd_head_k + n_embd_head_v) * n_head_kv // fp16 k,v = (1 (k) + 1 (v)) * sizeof(float16) * n_ctx * n_layer * n_embd / n_head * n_head_kv
var kv uint64 = 2 * uint64(opts.NumCtx) * ggml.KV().BlockCount() * (ggml.KV().EmbeddingHeadCountK() + ggml.KV().EmbeddingHeadCountV()) * ggml.KV().HeadCountKV() var kv uint64 = 2 * 2 * uint64(opts.NumCtx) * ggml.KV().BlockCount() * ggml.KV().EmbeddingLength() / ggml.KV().HeadCount() * ggml.KV().HeadCountKV()
// KV is proportional to the number of layers // KV is proportional to the number of layers
layerSize += kv / ggml.KV().BlockCount() layerSize += kv / ggml.KV().BlockCount()