llm: enable flash attention by default
This commit is contained in:
@@ -90,6 +90,7 @@ func init() {
|
||||
NumParallel = 1
|
||||
MaxRunners = 1
|
||||
MaxQueuedRequests = 512
|
||||
FlashAttention = true
|
||||
|
||||
LoadConfig()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user