tests: move csv output to benstat format
This commit is contained in:
parent
b97eb2b858
commit
61349a8ec6
|
|
@ -161,11 +161,12 @@ func doModelPerfTest(t *testing.T, chatModels []string) {
|
||||||
}
|
}
|
||||||
|
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
|
name string
|
||||||
prompt string
|
prompt string
|
||||||
anyResp []string
|
anyResp []string
|
||||||
}{
|
}{
|
||||||
{blueSkyPrompt, blueSkyExpected},
|
{"blue_sky", blueSkyPrompt, blueSkyExpected},
|
||||||
{maxPrompt, []string{"shakespeare", "oppression", "sorrows", "gutenberg", "child", "license", "sonnet", "melancholy", "love", "sorrow", "beauty"}},
|
{"max", maxPrompt, []string{"shakespeare", "oppression", "sorrows", "gutenberg", "child", "license", "sonnet", "melancholy", "love", "sorrow", "beauty"}},
|
||||||
}
|
}
|
||||||
var gpuPercent int
|
var gpuPercent int
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
|
|
@ -259,25 +260,20 @@ func doModelPerfTest(t *testing.T, chatModels []string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Round the logged prompt count for comparisons across versions/configurations which can vary slightly
|
prefillTimePerToken := float64(resp.PromptEvalDuration.Nanoseconds()) / float64(resp.PromptEvalCount)
|
||||||
fmt.Fprintf(os.Stderr, "MODEL_PERF_HEADER:%s,%s,%s,%s,%s,%s,%s\n",
|
prefillTokensPerSec := float64(resp.PromptEvalCount) / (float64(resp.PromptEvalDuration.Nanoseconds()) + 1e-12) * 1e9
|
||||||
"MODEL",
|
fmt.Fprintf(os.Stderr, "BenchmarkModel/name=%s-%s/%d/step=%s %d %.2f ns/token %.2f token/sec\n",
|
||||||
"CONTEXT",
|
model, tc.name, numCtx, "prefill", resp.PromptEvalCount, prefillTimePerToken, prefillTokensPerSec)
|
||||||
"GPU PERCENT",
|
|
||||||
"APPROX PROMPT COUNT",
|
evalTimePerToken := float64(resp.EvalDuration.Nanoseconds()) / float64(resp.EvalCount)
|
||||||
"LOAD TIME",
|
evalTokensPerSec := float64(resp.EvalCount) / (float64(resp.EvalDuration.Nanoseconds()) + 1e-12) * 1e9
|
||||||
"PROMPT EVAL TPS",
|
fmt.Fprintf(os.Stderr, "BenchmarkModel/name=%s-%s/%d/step=%s %d %.2f ns/token %.2f token/sec\n",
|
||||||
"EVAL TPS",
|
model, tc.name, numCtx, "generate", resp.EvalCount, evalTimePerToken, evalTokensPerSec)
|
||||||
)
|
|
||||||
fmt.Fprintf(os.Stderr, "MODEL_PERF_DATA:%s,%d,%d,%d,%0.2f,%0.2f,%0.2f\n",
|
fmt.Fprintf(os.Stderr, "BenchmarkMode/name=%s-%s/%d 1 %d ns/request\n",
|
||||||
model,
|
model, tc.name, numCtx, resp.TotalDuration.Nanoseconds())
|
||||||
numCtx,
|
fmt.Fprintf(os.Stderr, "BenchmarkMode/name=%s-%s/%d/step=%s 1 %d ns/request\n",
|
||||||
gpuPercent,
|
model, tc.name, numCtx, "load", resp.LoadDuration.Nanoseconds())
|
||||||
(resp.PromptEvalCount/10)*10,
|
|
||||||
float64(resp.LoadDuration)/1000000000.0,
|
|
||||||
float64(resp.PromptEvalCount)/(float64(resp.PromptEvalDuration)/1000000000.0),
|
|
||||||
float64(resp.EvalCount)/(float64(resp.EvalDuration)/1000000000.0),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue