fix - trust the library paths from discovery when starting runner

2025-09-23 11:49:56 -07:00 · 2025-09-23 11:49:56 -07:00 · c57cd59be7
parent c86af47ac0
commit c57cd59be7
1 changed files with 137 additions and 179 deletions
--- a/llm/server.go
+++ b/llm/server.go
@ -238,35 +238,29 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
 	}

 	var gpuLibs []string
+	newLibs := func(libDirs []string) (ret []string) {
+		for _, d := range libDirs {
+			found := false
+			for _, t := range gpuLibs {
+				if d == t {
+					found = true
+					break
+				}
+			}
+			if !found {
+				ret = append(ret, d)
+			}
+		}
+		return
+	}
 	for _, gpu := range gpus {
-		gpuLibs = append(gpuLibs, gpu.RunnerName())
+		gpuLibs = append(gpuLibs, newLibs(gpu.DependencyPath)...)
 	}

 	requested := envconfig.LLMLibrary()
 	if availableLibs[requested] != "" {
 		slog.Info("using requested gpu library", "requested", requested)
-		gpuLibs = []string{requested}
-	}
-
-	var compatible []string
-	for _, gpuLib := range gpuLibs {
-		var matchingLibs []string
-		for k := range availableLibs {
-			// exact match first
-			if k == gpuLib {
-				matchingLibs = append([]string{k}, matchingLibs...)
-				continue
-			}
-
-			// then match the family (e.g. 'cuda')
-			if strings.Split(k, "_")[0] == strings.Split(gpuLib, "_")[0] {
-				matchingLibs = append(matchingLibs, k)
-			}
-		}
-
-		if len(matchingLibs) > 0 {
-			compatible = append(compatible, matchingLibs[0])
-		}
+		gpuLibs = []string{availableLibs[requested]}
 	}

 	exe, err := os.Executable()
@ -278,164 +272,128 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
 		exe = eval
 	}

-	// iterate through compatible GPU libraries such as 'cuda_v12', 'rocm', etc.
-	// adding each library's respective path to the LD_LIBRARY_PATH, until finally running
-	// without any LD_LIBRARY_PATH flags
-	for {
-		port := 0
-		if a, err := net.ResolveTCPAddr("tcp", "localhost:0"); err == nil {
-			var l *net.TCPListener
-			if l, err = net.ListenTCP("tcp", a); err == nil {
-				port = l.Addr().(*net.TCPAddr).Port
-				l.Close()
-			}
-		}
-		if port == 0 {
-			slog.Debug("ResolveTCPAddr failed, using random port")
-			port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
-		}
-		params := []string{"runner"}
-		if textProcessor != nil {
-			// New engine
-			// TODO - if we have failure to load scenarios, add logic to retry with the old runner
-			params = append(params, "--ollama-engine")
-		}
-		params = append(params, "--model", modelPath)
-		params = append(params, "--port", strconv.Itoa(port))
-
-		var pathEnv string
-		switch runtime.GOOS {
-		case "windows":
-			pathEnv = "PATH"
-		case "darwin":
-			pathEnv = "DYLD_LIBRARY_PATH"
-		default:
-			pathEnv = "LD_LIBRARY_PATH"
-		}
-
-		// Note: we always put our dependency paths first
-		// since these are the exact version we compiled/linked against
-		libraryPaths := []string{discover.LibOllamaPath}
-		if libraryPath, ok := os.LookupEnv(pathEnv); ok {
-			libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
-		}
-
-		ggmlPaths := []string{discover.LibOllamaPath}
-		for _, c := range compatible {
-			if libpath, ok := availableLibs[c]; ok {
-				slog.Debug("adding gpu library", "path", libpath)
-				libraryPaths = append([]string{libpath}, libraryPaths...)
-				ggmlPaths = append(ggmlPaths, libpath)
-			}
-		}
-
-		for _, gpu := range gpus {
-			if gpu.DependencyPath != nil {
-				slog.Debug("adding gpu dependency paths", "paths", gpu.DependencyPath)
-				libraryPaths = append(gpu.DependencyPath, libraryPaths...)
-			}
-		}
-
-		// finally, add the root library path
-		libraryPaths = append(libraryPaths, discover.LibOllamaPath)
-
-		s := llmServer{
-			port:           port,
-			cmd:            exec.Command(exe, params...),
-			status:         NewStatusWriter(os.Stderr),
-			options:        opts,
-			modelPath:      modelPath,
-			loadRequest:    loadRequest,
-			llamaModel:     llamaModel,
-			llamaModelLock: &sync.Mutex{},
-			textProcessor:  textProcessor,
-			numParallel:    numParallel,
-			sem:            semaphore.NewWeighted(int64(numParallel)),
-			totalLayers:    f.KV().BlockCount() + 1,
-			loadStart:      time.Now(),
-			done:           make(chan error, 1),
-		}
-
-		s.cmd.Env = os.Environ()
-		s.cmd.Stdout = os.Stdout
-		s.cmd.Stderr = s.status
-		s.cmd.SysProcAttr = LlamaServerSysProcAttr
-
-		s.cmd.Env = append(s.cmd.Env, "OLLAMA_LIBRARY_PATH="+strings.Join(ggmlPaths, string(filepath.ListSeparator)))
-
-		// Always filter down the set of GPUs in case there are any unsupported devices that might crash
-		envWorkarounds := gpus.GetVisibleDevicesEnv()
-		pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
-
-		// Update or add the path variable with our adjusted version
-		pathNeeded := true
-		envWorkaroundDone := make([]bool, len(envWorkarounds))
-		for i := range s.cmd.Env {
-			cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
-			if strings.EqualFold(cmp[0], pathEnv) {
-				s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
-				pathNeeded = false
-			} else if len(envWorkarounds) != 0 {
-				for j, kv := range envWorkarounds {
-					tmp := strings.SplitN(kv, "=", 2)
-					if strings.EqualFold(cmp[0], tmp[0]) {
-						s.cmd.Env[i] = kv
-						envWorkaroundDone[j] = true
-					}
-				}
-			}
-		}
-		if pathNeeded {
-			s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
-		}
-		for i, done := range envWorkaroundDone {
-			if !done {
-				s.cmd.Env = append(s.cmd.Env, envWorkarounds[i])
-			}
-		}
-
-		slog.Info("starting runner", "cmd", s.cmd)
-		slog.Debug("subprocess", "", filteredEnv(s.cmd.Env))
-
-		if err = s.cmd.Start(); err != nil {
-			var msg string
-			if s.status != nil && s.status.LastErrMsg != "" {
-				msg = s.status.LastErrMsg
-			}
-			err := fmt.Errorf("error starting runner: %v %s", err, msg)
-			if len(compatible) == 0 {
-				if llamaModel != nil {
-					llama.FreeModel(llamaModel)
-				}
-				return nil, err
-			}
-
-			slog.Warn("unable to start runner with compatible gpu", "error", err, "compatible", compatible)
-			compatible = compatible[1:]
-			continue
-		}
-
-		// reap subprocess when it exits
-		go func() {
-			err := s.cmd.Wait()
-			// Favor a more detailed message over the process exit status
-			if err != nil && s.status != nil && s.status.LastErrMsg != "" {
-				slog.Error("llama runner terminated", "error", err)
-				if strings.Contains(s.status.LastErrMsg, "unknown model") {
-					s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
-				}
-				s.done <- errors.New(s.status.LastErrMsg)
-			} else {
-				s.done <- err
-			}
-		}()
-
-		if textProcessor != nil {
-			return &ollamaServer{llmServer: s}, nil
-		} else {
-			return &llamaServer{llmServer: s, ggml: f}, nil
+	port := 0
+	if a, err := net.ResolveTCPAddr("tcp", "localhost:0"); err == nil {
+		var l *net.TCPListener
+		if l, err = net.ListenTCP("tcp", a); err == nil {
+			port = l.Addr().(*net.TCPAddr).Port
+			l.Close()
 		}
 	}
+	if port == 0 {
+		slog.Debug("ResolveTCPAddr failed, using random port")
+		port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
+	}
+	params := []string{"runner"}
+	if textProcessor != nil {
+		// New engine
+		// TODO - if we have failure to load scenarios, add logic to retry with the old runner
+		params = append(params, "--ollama-engine")
+	}
+	params = append(params, "--model", modelPath)
+	params = append(params, "--port", strconv.Itoa(port))
+
+	var pathEnv string
+	switch runtime.GOOS {
+	case "windows":
+		pathEnv = "PATH"
+	case "darwin":
+		pathEnv = "DYLD_LIBRARY_PATH"
+	default:
+		pathEnv = "LD_LIBRARY_PATH"
+	}
+
+	s := llmServer{
+		port:           port,
+		cmd:            exec.Command(exe, params...),
+		status:         NewStatusWriter(os.Stderr),
+		options:        opts,
+		modelPath:      modelPath,
+		loadRequest:    loadRequest,
+		llamaModel:     llamaModel,
+		llamaModelLock: &sync.Mutex{},
+		textProcessor:  textProcessor,
+		numParallel:    numParallel,
+		sem:            semaphore.NewWeighted(int64(numParallel)),
+		totalLayers:    f.KV().BlockCount() + 1,
+		loadStart:      time.Now(),
+		done:           make(chan error, 1),
+	}
+
+	s.cmd.Env = os.Environ()
+	s.cmd.Stdout = os.Stdout
+	s.cmd.Stderr = s.status
+	s.cmd.SysProcAttr = LlamaServerSysProcAttr
+
+	s.cmd.Env = append(s.cmd.Env, "OLLAMA_LIBRARY_PATH="+strings.Join(gpuLibs, string(filepath.ListSeparator)))
+
+	// Always filter down the set of GPUs in case there are any unsupported devices that might crash
+	envWorkarounds := gpus.GetVisibleDevicesEnv()
+	pathEnvVal := strings.Join(gpuLibs, string(filepath.ListSeparator))
+
+	// Update or add the path variable with our adjusted version
+	pathNeeded := true
+	envWorkaroundDone := make([]bool, len(envWorkarounds))
+	for i := range s.cmd.Env {
+		cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
+		if strings.EqualFold(cmp[0], pathEnv) {
+			s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
+			pathNeeded = false
+		} else if len(envWorkarounds) != 0 {
+			for j, kv := range envWorkarounds {
+				tmp := strings.SplitN(kv, "=", 2)
+				if strings.EqualFold(cmp[0], tmp[0]) {
+					s.cmd.Env[i] = kv
+					envWorkaroundDone[j] = true
+				}
+			}
+		}
+	}
+	if pathNeeded {
+		s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
+	}
+	for i, done := range envWorkaroundDone {
+		if !done {
+			s.cmd.Env = append(s.cmd.Env, envWorkarounds[i])
+		}
+	}
+
+	slog.Info("starting runner", "cmd", s.cmd)
+	slog.Debug("subprocess", "", filteredEnv(s.cmd.Env))
+
+	if err = s.cmd.Start(); err != nil {
+		var msg string
+		if s.status != nil && s.status.LastErrMsg != "" {
+			msg = s.status.LastErrMsg
+		}
+		err := fmt.Errorf("error starting runner: %v %s", err, msg)
+		if llamaModel != nil {
+			llama.FreeModel(llamaModel)
+		}
+		return nil, err
+	}
+
+	// reap subprocess when it exits
+	go func() {
+		err := s.cmd.Wait()
+		// Favor a more detailed message over the process exit status
+		if err != nil && s.status != nil && s.status.LastErrMsg != "" {
+			slog.Error("llama runner terminated", "error", err)
+			if strings.Contains(s.status.LastErrMsg, "unknown model") {
+				s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
+			}
+			s.done <- errors.New(s.status.LastErrMsg)
+		} else {
+			s.done <- err
+		}
+	}()
+
+	if textProcessor != nil {
+		return &ollamaServer{llmServer: s}, nil
+	} else {
+		return &llamaServer{llmServer: s, ggml: f}, nil
+	}
+
 }

 func (s *llmServer) ModelPath() string {