diff --git a/server/sched_test.go b/server/sched_test.go index 2f3a9caf8..0acd59118 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -676,47 +676,6 @@ func TestAlreadyCanceled(t *testing.T) { require.Empty(t, scenario1a.req.successCh) } -func TestHomogeneousGPUs(t *testing.T) { - ctx, done := context.WithTimeout(t.Context(), 100*time.Millisecond) - defer done() - s := InitScheduler(ctx) - - s.getGpuFn = func() discover.GpuInfoList { - // Set memory values to require the model to be spread - gpus := []discover.GpuInfo{ - {Library: "cuda"}, - {Library: "rocm"}, - {Library: "vulkan"}, - } - gpus[0].TotalMemory = 1 * format.GibiByte - gpus[0].FreeMemory = 256 * format.MebiByte - gpus[1].TotalMemory = 1 * format.GibiByte - gpus[1].FreeMemory = 256 * format.MebiByte - gpus[2].TotalMemory = 1 * format.GibiByte - gpus[2].FreeMemory = 256 * format.MebiByte - return gpus - } - s.getCpuFn = getCpuFn - a := newScenarioRequest(t, ctx, "ollama-model-1", 10, &api.Duration{Duration: 5 * time.Millisecond}) - s.newServerFn = func(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) { - require.Len(t, gpus, 1) - return a.newServer(gpus, model, f, adapters, projectors, opts, numParallel) - } - slog.Info("a") - s.pendingReqCh <- a.req - require.Len(t, s.pendingReqCh, 1) - s.Run(ctx) - select { - case resp := <-a.req.successCh: - require.Equal(t, resp.llama, a.srv) - require.Empty(t, s.pendingReqCh) - require.Empty(t, a.req.errCh) - case err := <-a.req.errCh: - t.Fatal(err.Error()) - case <-ctx.Done(): - t.Fatal("timeout") - } -} type mockLlm struct { modelPath string pingResp error