diff --git a/discover/runner.go b/discover/runner.go index 4c0bce75b..5e4e05f95 100644 --- a/discover/runner.go +++ b/discover/runner.go @@ -496,7 +496,7 @@ func bootstrapDevices(ctx context.Context, ollamaLibDirs []string, extraEnvs []s func GetDevicesFromRunner(ctx context.Context, runner BaseRunner) ([]ml.DeviceInfo, error) { var moreDevices []ml.DeviceInfo port := runner.GetPort() - tick := time.Tick(500 * time.Millisecond) + tick := time.Tick(10 * time.Millisecond) for { select { case <-ctx.Done(): @@ -530,7 +530,7 @@ func GetDevicesFromRunner(ctx context.Context, runner BaseRunner) ([]ml.DeviceIn } if resp.StatusCode != 200 { logutil.Trace("runner failed to discover free VRAM", "status", resp.StatusCode, "response", body) - continue + return nil, fmt.Errorf("runner error: %s", string(body)) } if err := json.Unmarshal(body, &moreDevices); err != nil { diff --git a/runner/ollamarunner/runner.go b/runner/ollamarunner/runner.go index c86d3c2b9..a97ef7c18 100644 --- a/runner/ollamarunner/runner.go +++ b/runner/ollamarunner/runner.go @@ -1247,6 +1247,8 @@ func (s *Server) info(w http.ResponseWriter, r *http.Request) { m := s.model if m == nil { + startLoad := time.Now() + // Dummy load to get the backend wired up f, err := os.CreateTemp("", "*.bin") if err != nil { @@ -1268,9 +1270,12 @@ func (s *Server) info(w http.ResponseWriter, r *http.Request) { http.Error(w, fmt.Sprintf("failed to initialize baackend: %v", err), http.StatusInternalServerError) return } + slog.Debug("dummy model load took", "duration", time.Since(startLoad)) } + startDevices := time.Now() infos := m.Backend().BackendDevices() + slog.Debug("gathering device infos took", "duration", time.Since(startDevices)) if err := json.NewEncoder(w).Encode(&infos); err != nil { http.Error(w, fmt.Sprintf("failed to encode response: %v", err), http.StatusInternalServerError) }