Return Correct Prompt Eval Count Regardless of Cache Prompt (#5371)
* openai compatibility
* Revert "openai compatibility"
This reverts commit d3f98a811e.
* remove erroneous subtraction of prompt cache
This commit is contained in:
2
llm/ext_server/server.cpp
vendored
2
llm/ext_server/server.cpp
vendored
@@ -1732,7 +1732,7 @@ struct llama_server_context
|
||||
slot.n_past -= 1;
|
||||
}
|
||||
|
||||
slot.n_prompt_tokens_processed = slot.n_prompt_tokens - slot.n_past;
|
||||
slot.n_prompt_tokens_processed = slot.n_prompt_tokens;
|
||||
|
||||
if (slot.ga_n != 1)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user