diff --git a/thirdparty/README.md b/thirdparty/README.md index 0aca97c52f3..4a76e1c22e4 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -926,7 +926,7 @@ Files extracted from upstream source: ## re-spirv - Upstream: https://github.com/renderbag/re-spirv -- Version: git (2f9be81bca5882ada1b6377d2ef8c0f7d8665171, 2025) +- Version: git (00ed4d2c25be0c5ea78523c795119d7570d6209e, 2025) - License: MIT Files extracted from upstream source: diff --git a/thirdparty/re-spirv/re-spirv.cpp b/thirdparty/re-spirv/re-spirv.cpp index fa613383b6a..b9884b67dc1 100644 --- a/thirdparty/re-spirv/re-spirv.cpp +++ b/thirdparty/re-spirv/re-spirv.cpp @@ -799,9 +799,9 @@ namespace respv { uint32_t resultType = UINT32_MAX; uint32_t resultId = UINT32_MAX; uint32_t parameterIndex = 0; - std::vector remapsPending; - std::vector returnParameters; - std::vector sameBlockOperations; + uint32_t remapsPendingCount = 0; + uint32_t returnParametersCount = 0; + uint32_t sameBlockOperationsCount = 0; bool startBlockIdAssigned = false; bool functionInlined = false; @@ -819,6 +819,7 @@ namespace respv { uint32_t functionWordCount = 0; uint32_t codeWordCount = 0; uint32_t variableWordCount = 0; + uint32_t decorationWordCount = 0; uint32_t inlineWordCount = 0; uint32_t returnValueCount = 0; uint32_t callIndex = 0; @@ -858,18 +859,75 @@ namespace respv { // Regular constructor. } }; + + struct FunctionResult { + uint32_t wordIndex = UINT32_MAX; + uint32_t decorationIndex = UINT32_MAX; + }; + + typedef std::vector::iterator FunctionDefinitionIterator; + + struct FunctionItem { + FunctionDefinitionIterator function = {}; + FunctionDefinitionIterator rootFunction = {}; + uint32_t callIndex = 0; + + FunctionItem(FunctionDefinitionIterator function, FunctionDefinitionIterator rootFunction, uint32_t callIndex) : function(function), rootFunction(rootFunction), callIndex(callIndex) { + // Regular constructor. + } + }; + + struct ResultDecoration { + uint32_t wordIndex = 0; + uint32_t nextDecorationIndex = 0; + + ResultDecoration(uint32_t wordIndex, uint32_t nextDecorationIndex) : wordIndex(wordIndex), nextDecorationIndex(nextDecorationIndex) { + // Regular constructor. + } + }; + + thread_local std::vector functionResultMap; + thread_local std::vector resultDecorations; + thread_local std::vector loopMergeIdStack; + thread_local std::vector functionDefinitions; + thread_local std::vector functionParameters; + thread_local std::vector functionCalls; + thread_local std::vector functionStack; + thread_local std::vector callStack; + thread_local std::vector shaderResultMap; + thread_local std::vector storeMap; + thread_local std::vector storeMapChanges; + thread_local std::vector loadMap; + thread_local std::vector loadMapChanges; + thread_local std::vector phiMap; + thread_local std::vector opPhis; + thread_local std::vector remapsPending; + thread_local std::vector returnParameters; + thread_local std::vector sameBlockOperations; + functionResultMap.clear(); + resultDecorations.clear(); + loopMergeIdStack.clear(); + functionDefinitions.clear(); + functionParameters.clear(); + functionCalls.clear(); + callStack.clear(); + shaderResultMap.clear(); + storeMap.clear(); + storeMapChanges.clear(); + loadMap.clear(); + loadMapChanges.clear(); + phiMap.clear(); + opPhis.clear(); + remapsPending.clear(); + returnParameters.clear(); + sameBlockOperations.clear(); // Parse all instructions in the shader first. const uint32_t *dataWords = reinterpret_cast(pData); const size_t dataWordCount = pSize / sizeof(uint32_t); const uint32_t dataIdBound = dataWords[3]; - std::vector loopMergeIdStack; - std::vector localVariableMap; - localVariableMap.resize(dataIdBound, false); + functionResultMap.resize(dataIdBound); - std::vector functionDefinitions; - std::vector functionParameters; - std::vector functionCalls; FunctionDefinition currentFunction; uint32_t parseWordIndex = SpvStartWordIndex; uint32_t entryPointFunctionId = UINT32_MAX; @@ -927,7 +985,7 @@ namespace respv { fprintf(stderr, "Found function call without a function start.\n"); return false; } - + currentFunction.codeWordCount += wordCount; if (currentFunction.callCount == 0) { @@ -937,11 +995,28 @@ namespace respv { functionCalls.emplace_back(parseWordIndex, dataWords[parseWordIndex + 3], sameBlockWordCount); currentFunction.callCount++; break; + case SpvOpDecorate: { + uint32_t resultId = dataWords[parseWordIndex + 1]; + if (resultId > dataIdBound) { + fprintf(stderr, "Found decoration with invalid result %u.\n", resultId); + return false; + } + + uint32_t nextDecorationIndex = functionResultMap[resultId].decorationIndex; + functionResultMap[resultId].decorationIndex = uint32_t(resultDecorations.size()); + resultDecorations.emplace_back(parseWordIndex, nextDecorationIndex); + globalWordCount += wordCount; + break; + } case SpvOpVariable: if (currentFunction.resultId != UINT32_MAX) { // Identify the variable as a local function variable. uint32_t resultId = dataWords[parseWordIndex + 2]; - localVariableMap[resultId] = true; + if (resultId > dataIdBound) { + fprintf(stderr, "Found variable with invalid result %u.\n", resultId); + return false; + } + currentFunction.variableWordCount += wordCount; } else { @@ -1048,6 +1123,28 @@ namespace respv { break; } + if (currentFunction.resultId != UINT32_MAX) { + bool hasResult, hasType; + SpvHasResultAndType(opCode, &hasResult, &hasType); + + if (hasResult) { + // Indicate the result is associated to a function. + uint32_t resultId = dataWords[parseWordIndex + (hasType ? 2 : 1)]; + functionResultMap[resultId].wordIndex = parseWordIndex; + + // Look for all decorations associated to this result. These will be skipped when rewriting + // the shader and written back when the result is parsed again. + uint32_t decorationIndex = functionResultMap[resultId].decorationIndex; + while (decorationIndex != UINT32_MAX) { + const ResultDecoration &decoration = resultDecorations[decorationIndex]; + uint32_t decorationWordCount = (dataWords[decoration.wordIndex] >> 16U) & 0xFFFFU; + currentFunction.decorationWordCount += decorationWordCount; + globalWordCount -= decorationWordCount; + decorationIndex = decoration.nextDecorationIndex; + } + } + } + parseWordIndex += wordCount; } @@ -1060,7 +1157,6 @@ namespace respv { std::sort(functionDefinitions.begin(), functionDefinitions.end()); // Find the entry point function and mark that it shouldn't be inlined. - typedef std::vector::iterator FunctionDefinitionIterator; FunctionDefinitionIterator entryFunctionIt = std::lower_bound(functionDefinitions.begin(), functionDefinitions.end(), entryPointFunctionId); if (entryFunctionIt == functionDefinitions.end()) { fprintf(stderr, "Unable to find entry point function %d.\n", entryPointFunctionId); @@ -1071,17 +1167,6 @@ namespace respv { // Do a first iteration pass with the functions that can't be inlined as the starting points of the stack. // This pass will figure out the total size required for the final inlined shader. - struct FunctionItem { - FunctionDefinitionIterator function = {}; - FunctionDefinitionIterator rootFunction = {}; - uint32_t callIndex = 0; - - FunctionItem(FunctionDefinitionIterator function, FunctionDefinitionIterator rootFunction, uint32_t callIndex) : function(function), rootFunction(rootFunction), callIndex(callIndex) { - // Regular constructor. - } - }; - - std::vector functionStack; FunctionDefinitionIterator startFunctionIt = functionDefinitions.begin(); while (startFunctionIt != functionDefinitions.end()) { if (!startFunctionIt->canInline) { @@ -1092,12 +1177,14 @@ namespace respv { } uint32_t codeWordCount = 0; + uint32_t functionDecorationWordCount = 0; while (!functionStack.empty()) { FunctionItem &functionItem = functionStack.back(); if (functionItem.callIndex == functionItem.function->callCount) { // Add this function's code and variables. codeWordCount += functionItem.function->codeWordCount; codeWordCount += functionItem.function->variableWordCount; + functionDecorationWordCount += functionItem.function->decorationWordCount; // This function will be inlined so its variables should be reserved on the parent function instead. if (functionItem.function->canInline) { @@ -1137,43 +1224,39 @@ namespace respv { } // Figure out the total size of the shader and copy the header. - size_t totalWordCount = SpvStartWordIndex + globalWordCount + codeWordCount; + size_t totalWordCount = SpvStartWordIndex + globalWordCount + codeWordCount + functionDecorationWordCount; inlinedSpirvWords.resize(totalWordCount); memcpy(inlinedSpirvWords.data(), pData, SpvStartWordIndex * sizeof(uint32_t)); // To avoid reallocation of these unless the shader really warrants it, we reserve some memory for these vectors. uint32_t &inlinedIdBound = inlinedSpirvWords[3]; uint32_t dstWordIndex = SpvStartWordIndex; - std::vector callStack; - std::vector shaderResultMap; - std::vector storeMap; - std::vector storeMapChanges; - std::vector loadMap; - std::vector loadMapChanges; - std::vector phiMap; - std::vector opPhis; - constexpr size_t ReservationForRecursionDepth = 8; - callStack.reserve(ReservationForRecursionDepth); shaderResultMap.resize(dataIdBound, UINT32_MAX); storeMap.resize(dataIdBound, UINT32_MAX); loadMap.resize(dataIdBound, UINT32_MAX); phiMap.resize(dataIdBound, UINT32_MAX); - auto copyInstruction = [&](uint32_t dataWordIndex, bool renameResult, uint32_t ©WordIndex) { + auto copyInstruction = [&](uint32_t dataWordIndex, bool renameResult, uint32_t ©WordIndex, uint32_t ©DecorationIndex) { + copyDecorationIndex = UINT32_MAX; + SpvOp opCode = SpvOp(dataWords[dataWordIndex] & 0xFFFFU); uint32_t wordCount = (dataWords[dataWordIndex] >> 16U) & 0xFFFFU; for (uint32_t i = 0; i < wordCount; i++) { inlinedSpirvWords[copyWordIndex + i] = dataWords[dataWordIndex + i]; } - // Any inlined functions must remap all their results and operands. - if (renameResult) { - bool hasResult, hasType; - SpvHasResultAndType(opCode, &hasResult, &hasType); + bool hasResult, hasType; + SpvHasResultAndType(opCode, &hasResult, &hasType); - if (hasResult) { + if (hasResult) { + // Any inlined functions must remap all their results and operands. + uint32_t &resultId = inlinedSpirvWords[copyWordIndex + (hasType ? 2 : 1)]; + if ((resultId < dataIdBound) && (functionResultMap[resultId].wordIndex != UINT32_MAX)) { + copyDecorationIndex = functionResultMap[resultId].decorationIndex; + } + + if (renameResult) { // First labels in a function will be replaced by the assigned label if present. - uint32_t &resultId = inlinedSpirvWords[copyWordIndex + (hasType ? 2 : 1)]; uint32_t newResultId; if ((opCode == SpvOpLabel) && (callStack.back().startBlockId != UINT32_MAX) && !callStack.back().startBlockIdAssigned) { newResultId = callStack.back().startBlockId; @@ -1234,14 +1317,26 @@ namespace respv { if (SpvHasLabels(opCode, labelWordStart, labelWordCount, labelWordStride, true)) { for (uint32_t j = 0; (j < labelWordCount) && ((labelWordStart + j * labelWordStride) < wordCount); j++) { uint32_t labelWordIndex = labelWordStart + j * labelWordStride; - callStack.back().remapsPending.emplace_back(copyWordIndex + labelWordIndex); + remapsPending.emplace_back(copyWordIndex + labelWordIndex); + callStack.back().remapsPendingCount++; } } copyWordIndex += wordCount; }; + auto copyDecorations = [&](uint32_t copyDecorationIndex, uint32_t ©WordIndex) { + uint32_t placeholderWordIndex; + while (copyDecorationIndex != UINT32_MAX) { + copyInstruction(resultDecorations[copyDecorationIndex].wordIndex, false, copyWordIndex, placeholderWordIndex); + copyDecorationIndex = resultDecorations[copyDecorationIndex].nextDecorationIndex; + } + }; + // Perform the final pass for inlining all functions. + uint32_t copyDecorationIndex; + uint32_t dstInlinedDecorationWordIndex = UINT32_MAX; + uint32_t dstInlinedDecorationWordIndexMax = UINT32_MAX; uint32_t dstInlinedVariableWordIndex = UINT32_MAX; uint32_t dstInlinedVariableWordIndexMax = UINT32_MAX; callStack.emplace_back(SpvStartWordIndex); @@ -1272,8 +1367,9 @@ namespace respv { loadMapChanges.pop_back(); } - callStack.back().sameBlockOperations.clear(); + sameBlockOperations.resize(sameBlockOperations.size() - callStack.back().sameBlockOperationsCount); callStack.back().blockId = dataWords[callWordIndex + 1]; + callStack.back().sameBlockOperationsCount = 0; break; case SpvOpFunction: { uint32_t functionId = dataWords[callWordIndex + 2]; @@ -1311,8 +1407,8 @@ namespace respv { break; case SpvOpFunctionEnd: { // Apply any pending remappings from instructions with labels. - for (uint32_t remapPending : callStack.back().remapsPending) { - uint32_t &resultId = inlinedSpirvWords[remapPending]; + for (size_t i = remapsPending.size() - callStack.back().remapsPendingCount; i < remapsPending.size(); i++) { + uint32_t &resultId = inlinedSpirvWords[remapsPending[i]]; if (shaderResultMap[resultId] != UINT32_MAX) { resultId = shaderResultMap[resultId]; } @@ -1343,12 +1439,12 @@ namespace respv { inlinedSpirvWords[dstWordIndex++] = callStack.back().returnBlockId; // If the function only returns one possible value, the caller instead will just remap the result to this one. - if (callStack.back().returnParameters.size() == 2) { + if (callStack.back().returnParametersCount == 2) { uint32_t functionResultId = callStack.back().resultId; - shaderResultMap[functionResultId] = callStack.back().returnParameters[0]; + shaderResultMap[functionResultId] = returnParameters[returnParameters.size() - callStack.back().returnParametersCount]; } // Insert an OpPhi for selecting the result from a function call that called a function that returns multiple values. - else if (callStack.back().returnParameters.size() > 2) { + else if (callStack.back().returnParametersCount > 2) { // Remap the function result if necessary. const CallItem &previousCallStack = callStack[callStack.size() - 2]; uint32_t functionResultId = callStack.back().resultId; @@ -1359,13 +1455,13 @@ namespace respv { } opPhis.emplace_back(dstWordIndex); - inlinedSpirvWords[dstWordIndex++] = SpvOpPhi | ((3 + callStack.back().returnParameters.size()) << 16U); + inlinedSpirvWords[dstWordIndex++] = SpvOpPhi | ((3 + callStack.back().returnParametersCount) << 16U); inlinedSpirvWords[dstWordIndex++] = callStack.back().resultType; inlinedSpirvWords[dstWordIndex++] = functionResultId; // Copy the OpPhi arguments directly. - for (size_t i = 0; i < callStack.back().returnParameters.size(); i++) { - inlinedSpirvWords[dstWordIndex++] = callStack.back().returnParameters[i]; + for (size_t i = returnParameters.size() - callStack.back().returnParametersCount; i < returnParameters.size(); i++) { + inlinedSpirvWords[dstWordIndex++] = returnParameters[i]; } } @@ -1373,12 +1469,16 @@ namespace respv { } // Pop this stack level and return to iterating on the previous one. + remapsPending.resize(remapsPending.size() - callStack.back().remapsPendingCount); + returnParameters.resize(returnParameters.size() - callStack.back().returnParametersCount); + sameBlockOperations.resize(sameBlockOperations.size() - callStack.back().sameBlockOperationsCount); callStack.pop_back(); if (!callStack.empty()) { // Copy the same block operations and rename the results even if the function wasn't inlined. - for (uint32_t sameBlockWordIndex : callStack.back().sameBlockOperations) { - copyInstruction(sameBlockWordIndex, true, dstWordIndex); + for (size_t i = sameBlockOperations.size() - callStack.back().sameBlockOperationsCount; i < sameBlockOperations.size(); i++) { + copyInstruction(sameBlockOperations[i], true, dstWordIndex, copyDecorationIndex); + copyDecorations(copyDecorationIndex, dstInlinedDecorationWordIndex); } callStack.back().wordIndex -= wordCount; @@ -1454,6 +1554,20 @@ namespace respv { break; } + case SpvOpDecorate: { + if (dstInlinedDecorationWordIndex == UINT32_MAX) { + // Upon encountering the first decoration in the shader, reserve space to write out any decorations + // that are found to be linked to function results. + dstInlinedDecorationWordIndex = dstWordIndex; + dstWordIndex += functionDecorationWordCount; + dstInlinedDecorationWordIndexMax = dstWordIndex; + } + + // Only copy the decoration as-is if it doesn't belong to a result in a function. + uint32_t resultId = dataWords[callWordIndex + 1]; + copyWords = (functionResultMap[resultId].wordIndex == UINT32_MAX); + break; + } case SpvOpVariable: if ((callStack.back().functionId < UINT32_MAX) && !callStack.back().functionInlined) { // As soon as we find a variable local to the function, reserve the space to insert all @@ -1502,8 +1616,9 @@ namespace respv { operandId = shaderResultMap[operandId]; } - callStack.back().returnParameters.emplace_back(operandId); - callStack.back().returnParameters.emplace_back(callStack.back().blockId); + returnParameters.emplace_back(operandId); + returnParameters.emplace_back(callStack.back().blockId); + callStack.back().returnParametersCount += 2; } else { // Copy as is. @@ -1518,7 +1633,7 @@ namespace respv { // Ignore load operations with memory operands. if (wordCount == 4) { uint32_t pointerId = dataWords[callStack.back().wordIndex + 3]; - if (localVariableMap[pointerId] && (storeMap[pointerId] < dataIdBound)) { + if ((functionResultMap[pointerId].wordIndex != UINT32_MAX) && (storeMap[pointerId] < dataIdBound)) { uint32_t resultId = dataWords[callStack.back().wordIndex + 2]; if (loadMap[resultId] != storeMap[pointerId]) { loadMap[resultId] = storeMap[pointerId]; @@ -1548,7 +1663,8 @@ namespace respv { break; case SpvOpImage: case SpvOpSampledImage: { - callStack.back().sameBlockOperations.emplace_back(callStack.back().wordIndex); + sameBlockOperations.emplace_back(callStack.back().wordIndex); + callStack.back().sameBlockOperationsCount++; break; } default: @@ -1557,15 +1673,17 @@ namespace respv { if (copyWords) { uint32_t ©WordIndex = copyWordsToVariables ? dstInlinedVariableWordIndex : dstWordIndex; - copyInstruction(callWordIndex, callStack.back().functionInlined, copyWordIndex); - - // Make sure enough space was reserved for variables. - assert(!copyWordsToVariables || copyWordIndex <= dstInlinedVariableWordIndexMax); + copyInstruction(callWordIndex, callStack.back().functionInlined, copyWordIndex, copyDecorationIndex); + copyDecorations(copyDecorationIndex, dstInlinedDecorationWordIndex); } if (!callStack.empty()) { callStack.back().wordIndex += wordCount; } + + assert(dstWordIndex <= totalWordCount && "Not enough words were reserved for the shader."); + assert(dstInlinedVariableWordIndex <= dstInlinedVariableWordIndexMax && "Not enough words were reserved for inlined variables."); + assert(dstInlinedDecorationWordIndex <= dstInlinedDecorationWordIndexMax && "Not enough words were reserved for function decorations."); } if (dstWordIndex != totalWordCount) { @@ -1679,11 +1797,18 @@ namespace respv { // Greatly decreases the costs of adding nodes to the linked list. listNodes.reserve(instructions.size() * 2); + thread_local std::vector loopMergeBlockStack; + thread_local std::vector loopMergeInstructionStack; + thread_local std::vector preOrderVisitedBlocks; + thread_local std::vector postOrderVisitedBlocks; + loopMergeBlockStack.clear(); + loopMergeInstructionStack.clear(); + preOrderVisitedBlocks.clear(); + postOrderVisitedBlocks.clear(); + bool foundOpSwitch = false; const uint32_t *dataWords = reinterpret_cast(pData); const size_t dataWordCount = pSize / sizeof(uint32_t); - std::vector loopMergeBlockStack; - std::vector loopMergeInstructionStack; uint32_t currentBlockId = 0; uint32_t currentLoopHeaderIndex = 0; for (uint32_t i = 0; i < uint32_t(instructions.size()); i++) { @@ -1874,8 +1999,8 @@ namespace respv { // Do a pre-order and post-order traversal of the tree starting from each function. These indices are // later used to figure out whether instructions dominate other instructions when doing optimizations. - std::vector preOrderVisitedBlocks; - std::vector postOrderVisitedBlocks; + thread_local std::vector blockIndexStack; + thread_local std::vector blockAdjacentStack; uint32_t preOrderIndex = 0; uint32_t postOrderIndex = 0; blockPreOrderIndices.resize(blocks.size(), 0); @@ -1885,8 +2010,8 @@ namespace respv { for (uint32_t i = 0; i < uint32_t(functions.size()); i++) { const Function &function = functions[i]; const Instruction &functionLabelInstruction = instructions[function.labelInstructionIndex]; - std::vector blockIndexStack; - std::vector blockAdjacentStack; + blockIndexStack.clear(); + blockAdjacentStack.clear(); blockIndexStack.emplace_back(functionLabelInstruction.blockIndex); blockAdjacentStack.emplace_back(UINT32_MAX); while (!blockIndexStack.empty()) { @@ -1971,14 +2096,18 @@ namespace respv { } } + // Sort degrees doesn't need to be cleared as its contents will be copied over. + thread_local std::vector sortDegrees; + thread_local std::vector instructionStack; + thread_local std::vector instructionSortVector; + instructionStack.clear(); + instructionSortVector.clear(); + // Make a copy of the degrees as they'll be used to perform a topological sort. - std::vector sortDegrees; sortDegrees.resize(instructionInDegrees.size()); memcpy(sortDegrees.data(), instructionInDegrees.data(), sizeof(uint32_t) * sortDegrees.size()); // The first nodes to be processed should be the ones with no incoming connections. - std::vector instructionStack; - instructionStack.clear(); for (uint32_t i = 0; i < uint32_t(instructions.size()); i++) { if (sortDegrees[i] == 0) { instructionStack.emplace_back(i); @@ -2019,8 +2148,6 @@ namespace respv { return false; } - std::vector instructionSortVector; - instructionSortVector.clear(); instructionSortVector.resize(instructionOrder.size(), InstructionSort()); for (uint32_t instructionIndex : instructionOrder) { uint64_t nextLevel = instructionSortVector[instructionIndex].instructionLevel + 1;