From 80873ca49e54b008616e9cd7929146cb6279b1ce Mon Sep 17 00:00:00 2001 From: Inforithmics Date: Sat, 6 Sep 2025 20:58:35 +0200 Subject: [PATCH] Reduce Changes remove TestHomogeneousGPUs (doesn't exist on master) --- server/sched_test.go | 41 ----------------------------------------- 1 file changed, 41 deletions(-) diff --git a/server/sched_test.go b/server/sched_test.go index 2f3a9caf8..0acd59118 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -676,47 +676,6 @@ func TestAlreadyCanceled(t *testing.T) { require.Empty(t, scenario1a.req.successCh) } -func TestHomogeneousGPUs(t *testing.T) { - ctx, done := context.WithTimeout(t.Context(), 100*time.Millisecond) - defer done() - s := InitScheduler(ctx) - - s.getGpuFn = func() discover.GpuInfoList { - // Set memory values to require the model to be spread - gpus := []discover.GpuInfo{ - {Library: "cuda"}, - {Library: "rocm"}, - {Library: "vulkan"}, - } - gpus[0].TotalMemory = 1 * format.GibiByte - gpus[0].FreeMemory = 256 * format.MebiByte - gpus[1].TotalMemory = 1 * format.GibiByte - gpus[1].FreeMemory = 256 * format.MebiByte - gpus[2].TotalMemory = 1 * format.GibiByte - gpus[2].FreeMemory = 256 * format.MebiByte - return gpus - } - s.getCpuFn = getCpuFn - a := newScenarioRequest(t, ctx, "ollama-model-1", 10, &api.Duration{Duration: 5 * time.Millisecond}) - s.newServerFn = func(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) { - require.Len(t, gpus, 1) - return a.newServer(gpus, model, f, adapters, projectors, opts, numParallel) - } - slog.Info("a") - s.pendingReqCh <- a.req - require.Len(t, s.pendingReqCh, 1) - s.Run(ctx) - select { - case resp := <-a.req.successCh: - require.Equal(t, resp.llama, a.srv) - require.Empty(t, s.pendingReqCh) - require.Empty(t, a.req.errCh) - case err := <-a.req.errCh: - t.Fatal(err.Error()) - case <-ctx.Done(): - t.Fatal("timeout") - } -} type mockLlm struct { modelPath string pingResp error