diff --git a/discover/gpu_info_vulkan.c b/discover/gpu_info_vulkan.c index 9ebe79e82..65033ad8a 100644 --- a/discover/gpu_info_vulkan.c +++ b/discover/gpu_info_vulkan.c @@ -1,9 +1,10 @@ +#ifndef __APPLE__ #include "gpu_info_vulkan.h" #include int is_extension_supported(vk_handle_t* rh, VkPhysicalDevice device, char* extension) { - VkPhysicalDeviceProperties properties; + VkPhysicalDeviceProperties properties = {}; (*rh->vkGetPhysicalDeviceProperties)(device, &properties); uint32_t extensionCount; @@ -131,7 +132,7 @@ int vk_check_flash_attention(vk_handle_t rh, int i) { return 0; } - VkPhysicalDeviceProperties properties; + VkPhysicalDeviceProperties properties = {}; (*rh.vkGetPhysicalDeviceProperties)(devices[i], &properties); int supports_nv_coopmat2 = is_extension_supported(&rh, devices[i], VK_NV_COOPERATIVE_MATRIX_2_EXTENSION_NAME); @@ -161,7 +162,7 @@ void vk_check_vram(vk_handle_t rh, int i, mem_info_t *resp) { return; } - VkPhysicalDeviceProperties properties; + VkPhysicalDeviceProperties properties = {}; (*rh.vkGetPhysicalDeviceProperties)(devices[i], &properties); int supports_budget = is_extension_supported(&rh, devices[i], VK_EXT_MEMORY_BUDGET_EXTENSION_NAME); @@ -186,11 +187,11 @@ void vk_check_vram(vk_handle_t rh, int i, mem_info_t *resp) { device_props2.pNext = &id_props; (*rh.vkGetPhysicalDeviceProperties2)(devices[i], &device_props2); - VkPhysicalDeviceMemoryBudgetPropertiesEXT physical_device_memory_budget_properties; + VkPhysicalDeviceMemoryBudgetPropertiesEXT physical_device_memory_budget_properties = {}; physical_device_memory_budget_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; physical_device_memory_budget_properties.pNext = NULL; - VkPhysicalDeviceMemoryProperties2 device_memory_properties; + VkPhysicalDeviceMemoryProperties2 device_memory_properties = {}; device_memory_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2; device_memory_properties.pNext = &physical_device_memory_budget_properties; @@ -236,3 +237,5 @@ void vk_release(vk_handle_t rh) { UNLOAD_LIBRARY(rh.vk_handle); rh.vk_handle = NULL; } + +#endif // __APPLE__ diff --git a/discover/gpu_info_vulkan.h b/discover/gpu_info_vulkan.h index 665f07fb4..b85f18e6b 100644 --- a/discover/gpu_info_vulkan.h +++ b/discover/gpu_info_vulkan.h @@ -4,7 +4,335 @@ #include "gpu_info.h" -#include +#define VK_DEFINE_HANDLE(object) typedef struct object##_T* object; +VK_DEFINE_HANDLE(VkInstance) +VK_DEFINE_HANDLE(VkPhysicalDevice) + +typedef uint32_t VkFlags; +typedef uint32_t VkBool32; +typedef uint64_t VkDeviceSize; +typedef uint32_t VkSampleMask; +typedef VkFlags VkSampleCountFlags; +typedef VkFlags VkMemoryPropertyFlags; +typedef VkFlags VkMemoryHeapFlags; +typedef VkFlags VkInstanceCreateFlags; + +#define VK_MAX_EXTENSION_NAME_SIZE 256U +#define VK_MAX_DESCRIPTION_SIZE 256U +#define VK_LUID_SIZE 8U +#define VK_UUID_SIZE 16U +#define VK_MAX_MEMORY_TYPES 32U +#define VK_MAX_MEMORY_HEAPS 16U +#define VK_MAX_PHYSICAL_DEVICE_NAME_SIZE 256U + +#define VK_MAKE_VERSION(major, minor, patch) (((major) << 22) | ((minor) << 12) | (patch)) +#define VK_API_VERSION_1_0 VK_MAKE_VERSION(1, 0, 0) +#define VK_API_VERSION_1_1 VK_MAKE_VERSION(1, 1, 0) +#define VK_API_VERSION_1_2 VK_MAKE_VERSION(1, 2, 0) +#define VK_API_VERSION_1_3 VK_MAKE_VERSION(1, 3, 0) +#define VK_API_VERSION_MAJOR(version) ((uint32_t)(version) >> 22) +#define VK_API_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3FF) +#define VK_API_VERSION_PATCH(version) ((uint32_t)(version) & 0xFFF) + +#define VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME "VK_KHR_get_physical_device_properties2" +#define VK_NV_COOPERATIVE_MATRIX_2_EXTENSION_NAME "VK_NV_cooperative_matrix2" +#define VK_EXT_MEMORY_BUDGET_EXTENSION_NAME "VK_EXT_memory_budget" + + +typedef enum VkResult { + VK_SUCCESS = 0, + VK_NOT_READY = 1, + VK_TIMEOUT = 2, + VK_EVENT_SET = 3, + VK_EVENT_RESET = 4, + VK_INCOMPLETE = 5, + VK_ERROR_OUT_OF_HOST_MEMORY = -1, + VK_ERROR_OUT_OF_DEVICE_MEMORY = -2, + VK_ERROR_INITIALIZATION_FAILED = -3, + VK_ERROR_DEVICE_LOST = -4, + VK_ERROR_MEMORY_MAP_FAILED = -5, + VK_ERROR_LAYER_NOT_PRESENT = -6, + VK_ERROR_EXTENSION_NOT_PRESENT = -7, + VK_ERROR_FEATURE_NOT_PRESENT = -8, + VK_ERROR_INCOMPATIBLE_DRIVER = -9, + VK_ERROR_TOO_MANY_OBJECTS = -10, + VK_ERROR_FORMAT_NOT_SUPPORTED = -11, + VK_ERROR_FRAGMENTED_POOL = -12, + VK_ERROR_UNKNOWN = -13, + VK_ERROR_OUT_OF_POOL_MEMORY = -1000069000, + VK_ERROR_INVALID_EXTERNAL_HANDLE = -1000072003, + VK_ERROR_FRAGMENTATION = -1000168000, + VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS = -1000257000, + VK_PIPELINE_COMPILE_REQUIRED = 1000297000, + VK_ERROR_SURFACE_LOST_KHR = -1000000000, + VK_ERROR_NATIVE_WINDOW_IN_USE_KHR = -1000000001, + VK_SUBOPTIMAL_KHR = 1000001003, + VK_ERROR_OUT_OF_DATE_KHR = -1000001004, + VK_ERROR_INCOMPATIBLE_DISPLAY_KHR = -1000003001, + VK_ERROR_VALIDATION_FAILED_EXT = -1000011001, + VK_ERROR_INVALID_SHADER_NV = -1000012000, + VK_ERROR_IMAGE_USAGE_NOT_SUPPORTED_KHR = -1000158000, + VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR = -1000158001, + VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR = -1000158002, + VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR = -1000158003, + VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR = -1000158004, + VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR = -1000158005, + VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT = -1000158006, + VK_ERROR_NOT_PERMITTED_KHR = -1000174001, + VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT = -1000255000, + VK_THREAD_IDLE_KHR = 1000268000, + VK_THREAD_DONE_KHR = 1000268001, + VK_OPERATION_DEFERRED_KHR = 1000268002, + VK_OPERATION_NOT_DEFERRED_KHR = 1000268003, + VK_ERROR_COMPRESSION_EXHAUSTED_EXT = -1000338000, + VK_RESULT_MAX_ENUM = 0x7FFFFFFF +} VkResult; + +typedef enum VkStructureType { + VK_STRUCTURE_TYPE_APPLICATION_INFO = 0, + VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 1, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2 = 1000059001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2 = 1000059006, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES = 1000071004, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT = 1000237000, + VK_STRUCTURE_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkStructureType; + +typedef enum VkPhysicalDeviceType { + VK_PHYSICAL_DEVICE_TYPE_OTHER = 0, + VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU = 1, + VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU = 2, + VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU = 3, + VK_PHYSICAL_DEVICE_TYPE_CPU = 4, + VK_PHYSICAL_DEVICE_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkPhysicalDeviceType; + +typedef enum VkSystemAllocationScope { + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND = 0, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT = 1, + VK_SYSTEM_ALLOCATION_SCOPE_CACHE = 2, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE = 3, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE = 4, + VK_SYSTEM_ALLOCATION_SCOPE_MAX_ENUM = 0x7FFFFFFF +} VkSystemAllocationScope; + +typedef enum VkInternalAllocationType { + VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE = 0, + VK_INTERNAL_ALLOCATION_TYPE_NON_EXECUTABLE = 1, + VK_INTERNAL_ALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkInternalAllocationType; + +#define VK_MEMORY_HEAP_DEVICE_LOCAL_BIT 0x00000001 + +typedef struct VkExtensionProperties { + char extensionName[VK_MAX_EXTENSION_NAME_SIZE]; + uint32_t specVersion; +} VkExtensionProperties; + +typedef struct VkPhysicalDeviceLimits { + uint32_t maxImageDimension1D; + uint32_t maxImageDimension2D; + uint32_t maxImageDimension3D; + uint32_t maxImageDimensionCube; + uint32_t maxImageArrayLayers; + uint32_t maxTexelBufferElements; + uint32_t maxUniformBufferRange; + uint32_t maxStorageBufferRange; + uint32_t maxPushConstantsSize; + uint32_t maxMemoryAllocationCount; + uint32_t maxSamplerAllocationCount; + VkDeviceSize bufferImageGranularity; + VkDeviceSize sparseAddressSpaceSize; + uint32_t maxBoundDescriptorSets; + uint32_t maxPerStageDescriptorSamplers; + uint32_t maxPerStageDescriptorUniformBuffers; + uint32_t maxPerStageDescriptorStorageBuffers; + uint32_t maxPerStageDescriptorSampledImages; + uint32_t maxPerStageDescriptorStorageImages; + uint32_t maxPerStageDescriptorInputAttachments; + uint32_t maxPerStageResources; + uint32_t maxDescriptorSetSamplers; + uint32_t maxDescriptorSetUniformBuffers; + uint32_t maxDescriptorSetUniformBuffersDynamic; + uint32_t maxDescriptorSetStorageBuffers; + uint32_t maxDescriptorSetStorageBuffersDynamic; + uint32_t maxDescriptorSetSampledImages; + uint32_t maxDescriptorSetStorageImages; + uint32_t maxDescriptorSetInputAttachments; + uint32_t maxVertexInputAttributes; + uint32_t maxVertexInputBindings; + uint32_t maxVertexInputAttributeOffset; + uint32_t maxVertexInputBindingStride; + uint32_t maxVertexOutputComponents; + uint32_t maxTessellationGenerationLevel; + uint32_t maxTessellationPatchSize; + uint32_t maxTessellationControlPerVertexInputComponents; + uint32_t maxTessellationControlPerVertexOutputComponents; + uint32_t maxTessellationControlPerPatchOutputComponents; + uint32_t maxTessellationControlTotalOutputComponents; + uint32_t maxTessellationEvaluationInputComponents; + uint32_t maxTessellationEvaluationOutputComponents; + uint32_t maxGeometryShaderInvocations; + uint32_t maxGeometryInputComponents; + uint32_t maxGeometryOutputComponents; + uint32_t maxGeometryOutputVertices; + uint32_t maxGeometryTotalOutputComponents; + uint32_t maxFragmentInputComponents; + uint32_t maxFragmentOutputAttachments; + uint32_t maxFragmentDualSrcAttachments; + uint32_t maxFragmentCombinedOutputResources; + uint32_t maxComputeSharedMemorySize; + uint32_t maxComputeWorkGroupCount[3]; + uint32_t maxComputeWorkGroupInvocations; + uint32_t maxComputeWorkGroupSize[3]; + uint32_t subPixelPrecisionBits; + uint32_t subTexelPrecisionBits; + uint32_t mipmapPrecisionBits; + uint32_t maxDrawIndexedIndexValue; + uint32_t maxDrawIndirectCount; + float maxSamplerLodBias; + float maxSamplerAnisotropy; + uint32_t maxViewports; + uint32_t maxViewportDimensions[2]; + float viewportBoundsRange[2]; + uint32_t viewportSubPixelBits; + size_t minMemoryMapAlignment; + VkDeviceSize minTexelBufferOffsetAlignment; + VkDeviceSize minUniformBufferOffsetAlignment; + VkDeviceSize minStorageBufferOffsetAlignment; + int32_t minTexelOffset; + uint32_t maxTexelOffset; + int32_t minTexelGatherOffset; + uint32_t maxTexelGatherOffset; + float minInterpolationOffset; + float maxInterpolationOffset; + uint32_t subPixelInterpolationOffsetBits; + uint32_t maxFramebufferWidth; + uint32_t maxFramebufferHeight; + uint32_t maxFramebufferLayers; + VkSampleCountFlags framebufferColorSampleCounts; + VkSampleCountFlags framebufferDepthSampleCounts; + VkSampleCountFlags framebufferStencilSampleCounts; + VkSampleCountFlags framebufferNoAttachmentsSampleCounts; + uint32_t maxColorAttachments; + VkSampleCountFlags sampledImageColorSampleCounts; + VkSampleCountFlags sampledImageIntegerSampleCounts; + VkSampleCountFlags sampledImageDepthSampleCounts; + VkSampleCountFlags sampledImageStencilSampleCounts; + VkSampleCountFlags storageImageSampleCounts; + uint32_t maxSampleMaskWords; + VkBool32 timestampComputeAndGraphics; + float timestampPeriod; + uint32_t maxClipDistances; + uint32_t maxCullDistances; + uint32_t maxCombinedClipAndCullDistances; + uint32_t discreteQueuePriorities; + float pointSizeRange[2]; + float lineWidthRange[2]; + float pointSizeGranularity; + float lineWidthGranularity; + VkBool32 strictLines; + VkBool32 standardSampleLocations; + VkDeviceSize optimalBufferCopyOffsetAlignment; + VkDeviceSize optimalBufferCopyRowPitchAlignment; + VkDeviceSize nonCoherentAtomSize; +} VkPhysicalDeviceLimits; + +typedef struct VkPhysicalDeviceSparseProperties { + VkBool32 residencyStandard2DBlockShape; + VkBool32 residencyStandard2DMultisampleBlockShape; + VkBool32 residencyStandard3DBlockShape; + VkBool32 residencyAlignedMipSize; + VkBool32 residencyNonResidentStrict; +} VkPhysicalDeviceSparseProperties; + +typedef struct VkPhysicalDeviceProperties { + uint32_t apiVersion; + uint32_t driverVersion; + uint32_t vendorID; + uint32_t deviceID; + uint32_t deviceType; + char deviceName[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]; + uint8_t pipelineCacheUUID[VK_UUID_SIZE]; + VkPhysicalDeviceLimits limits; + VkPhysicalDeviceSparseProperties sparseProperties; +} VkPhysicalDeviceProperties; + +typedef struct VkPhysicalDeviceProperties2 { + VkStructureType sType; + void* pNext; + VkPhysicalDeviceProperties properties; +} VkPhysicalDeviceProperties2; + +typedef struct VkPhysicalDeviceIDProperties { + VkStructureType sType; + void* pNext; + uint8_t deviceUUID[VK_UUID_SIZE]; + uint8_t driverUUID[VK_UUID_SIZE]; + uint8_t deviceLUID[VK_LUID_SIZE]; + uint32_t deviceNodeMask; + VkBool32 deviceLUIDValid; +} VkPhysicalDeviceIDProperties; + +typedef struct VkMemoryType { + VkMemoryPropertyFlags propertyFlags; + uint32_t heapIndex; +} VkMemoryType; + +typedef struct VkMemoryHeap { + VkDeviceSize size; + VkMemoryHeapFlags flags; +} VkMemoryHeap; + +typedef struct VkPhysicalDeviceMemoryProperties { + uint32_t memoryTypeCount; + VkMemoryType memoryTypes[VK_MAX_MEMORY_TYPES]; + uint32_t memoryHeapCount; + VkMemoryHeap memoryHeaps[VK_MAX_MEMORY_HEAPS]; +} VkPhysicalDeviceMemoryProperties; + +typedef struct VkPhysicalDeviceMemoryProperties2 { + VkStructureType sType; + void* pNext; + VkPhysicalDeviceMemoryProperties memoryProperties; +} VkPhysicalDeviceMemoryProperties2; + +typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT { + VkStructureType sType; + void* pNext; + VkDeviceSize heapBudget[VK_MAX_MEMORY_HEAPS]; + VkDeviceSize heapUsage[VK_MAX_MEMORY_HEAPS]; +} VkPhysicalDeviceMemoryBudgetPropertiesEXT; + +typedef struct VkApplicationInfo { + VkStructureType sType; + const void* pNext; + const char* pApplicationName; + uint32_t applicationVersion; + const char* pEngineName; + uint32_t engineVersion; + uint32_t apiVersion; +} VkApplicationInfo; + +typedef struct VkInstanceCreateInfo { + VkStructureType sType; + const void* pNext; + VkInstanceCreateFlags flags; + const VkApplicationInfo* pApplicationInfo; + uint32_t enabledLayerCount; + const char* const* ppEnabledLayerNames; + uint32_t enabledExtensionCount; + const char* const* ppEnabledExtensionNames; +} VkInstanceCreateInfo; + +typedef struct VkAllocationCallbacks { + void* pUserData; + void* (*pfnAllocation)(void* pUserData, size_t size, size_t alignment, VkSystemAllocationScope allocationScope); + void* (*pfnReallocation)(void* pUserData, void* pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope); + void (*pfnFree)(void* pUserData, void* pMemory); + void (*pfnInternalAllocation)(void* pUserData, size_t size, VkInternalAllocationType allocationType, VkSystemAllocationScope allocationScope); + void (*pfnInternalFree)(void* pUserData, size_t size, VkInternalAllocationType allocationType, VkSystemAllocationScope allocationScope); +} VkAllocationCallbacks; typedef struct { void* vk_handle; diff --git a/discover/vulkan_common.go b/discover/vulkan_common.go deleted file mode 100644 index 4dccbade9..000000000 --- a/discover/vulkan_common.go +++ /dev/null @@ -1,19 +0,0 @@ -package discover - -import ( - "log/slog" - "strings" -) - -func vkGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { - ids := []string{} - for _, info := range gpuInfo { - if info.Library != "vulkan" { - // TODO shouldn't happen if things are wired correctly... - slog.Debug("vkGetVisibleDevicesEnv skipping over non-vulkan device", "library", info.Library) - continue - } - ids = append(ids, info.ID) - } - return "GGML_VK_VISIBLE_DEVICES", strings.Join(ids, ",") -}