package ssaimport ()// RunPasses implements Builder.RunPasses.//// The order here matters; some pass depends on the previous ones.//// Note that passes suffixed with "Opt" are the optimization passes, meaning that they edit the instructions and blocks// while the other passes are not, like passEstimateBranchProbabilities does not edit them, but only calculates the additional information.func ( *builder) () { .runPreBlockLayoutPasses() .runBlockLayoutPass() .runPostBlockLayoutPasses() .runFinalizingPasses()}func ( *builder) () {passSortSuccessors()passDeadBlockEliminationOpt()// The result of passCalculateImmediateDominators will be used by various passes below.passCalculateImmediateDominators()passRedundantPhiEliminationOpt()passNopInstElimination()// TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic. // WebAssembly program shouldn't result in irreducible CFG, but we should handle it properly in just in case. // See FixIrreducible pass in LLVM: https://llvm.org/doxygen/FixIrreducible_8cpp_source.html// TODO: implement more optimization passes like: // block coalescing. // Copy-propagation. // Constant folding. // Common subexpression elimination. // Arithmetic simplifications. // and more!// passDeadCodeEliminationOpt could be more accurate if we do this after other optimizations.passDeadCodeEliminationOpt() .donePreBlockLayoutPasses = true}func ( *builder) () {if !.donePreBlockLayoutPasses {panic("runBlockLayoutPass must be called after all pre passes are done") }passLayoutBlocks() .doneBlockLayout = true}// runPostBlockLayoutPasses runs the post block layout passes. After this point, CFG is somewhat stable,// but still can be modified before finalizing passes. At this point, critical edges are split by passLayoutBlocks.func ( *builder) () {if !.doneBlockLayout {panic("runPostBlockLayoutPasses must be called after block layout pass is done") }// TODO: Do more. e.g. tail duplication, loop unrolling, etc. .donePostBlockLayoutPasses = true}// runFinalizingPasses runs the finalizing passes. After this point, CFG should not be modified.func ( *builder) () {if !.donePostBlockLayoutPasses {panic("runFinalizingPasses must be called after post block layout passes are done") }// Critical edges are split, so we fix the loop nesting forest.passBuildLoopNestingForest()passBuildDominatorTree()// Now that we know the final placement of the blocks, we can explicitly mark the fallthrough jumps. .markFallthroughJumps()}// passDeadBlockEliminationOpt searches the unreachable blocks, and sets the basicBlock.invalid flag true if so.func passDeadBlockEliminationOpt( *builder) { := .entryBlk() .blkStack = append(.blkStack, )forlen(.blkStack) > 0 { := .blkStack[len(.blkStack)-1] .blkStack = .blkStack[:len(.blkStack)-1] .visited = 1if !.sealed && !.ReturnBlock() {panic(fmt.Sprintf("%s is not sealed", )) }ifwazevoapi.SSAValidationEnabled { .validate() }for , := range .success {if .visited == 1 {continue } .blkStack = append(.blkStack, ) } }for := .blockIteratorBegin(); != nil; = .blockIteratorNext() {if .visited != 1 { .invalid = true } .visited = 0 }}// passRedundantPhiEliminationOpt eliminates the redundant PHIs (in our terminology, parameters of a block).// This requires the reverse post-order traversal to be calculated before calling this function,// hence passCalculateImmediateDominators must be called before this.func passRedundantPhiEliminationOpt( *builder) { := .redundantParams[:0] // reuse the slice from previous iterations.// TODO: this might be costly for large programs, but at least, as far as I did the experiment, it's almost the // same as the single iteration version in terms of the overall compilation time. That *might be* mostly thanks to the fact // that removing many PHIs results in the reduction of the total instructions, not because of this indefinite iteration is // relatively small. For example, sqlite speedtest binary results in the large number of redundant PHIs, // the maximum number of iteration was 22, which seems to be acceptable but not that small either since the // complexity here is O(BlockNum * Iterations) at the worst case where BlockNum might be the order of thousands. // -- Note -- // Currently, each iteration can run in any order of blocks, but it empirically converges quickly in practice when // running on the reverse post-order. It might be possible to optimize this further by using the dominator tree.for { := false _ = .blockIteratorReversePostOrderBegin() // skip entry block!// Below, we intentionally use the named iteration variable name, as this comes with inevitable nested for loops!for := .blockIteratorReversePostOrderNext(); != nil; = .blockIteratorReversePostOrderNext() { := .params.View() := len()for := 0; < ; ++ { := [] := true := ValueInvalidfor := range .preds { := .preds[].branch// Resolve the alias in the arguments so that we could use the previous iteration's result. .resolveArgumentAlias() := .vs.View()[]if == {// This is self-referencing: PHI from the same PHI.continue }if !.Valid() { = continue }if != { = falsebreak } }if !.Valid() {// This shouldn't happen, and must be a bug in builder.go.panic("BUG: params added but only self-referencing") }if { = append(, redundantParam{index: , uniqueValue: , }) } }iflen() == 0 {continue } = true// Remove the redundant PHIs from the argument list of branching instructions.for := range .preds { , := 0, 0 := .preds[] := .branch := .vs.View()for , := range {iflen() == || [].index != { [] = ++ } else { ++ } } .vs.Cut() }// Still need to have the definition of the value of the PHI (previously as the parameter).for := range { := &[] := [.index]// Create an alias in this block from the only phi argument to the phi value. .alias(, .uniqueValue) }// Finally, Remove the param from the blk. , := 0, 0for := 0; < ; ++ { := []iflen() == || [].index != { [] = ++ } else { ++ } } .params.Cut()// Clears the map for the next iteration. = [:0] }if ! {break } }// Reuse the slice for the future passes. .redundantParams = }// passDeadCodeEliminationOpt traverses all the instructions, and calculates the reference count of each Value, and// eliminates all the unnecessary instructions whose ref count is zero.// The results are stored at builder.valueRefCounts. This also assigns a InstructionGroupID to each Instruction// during the process. This is the last SSA-level optimization pass and after this,// the SSA function is ready to be used by backends.//// TODO: the algorithm here might not be efficient. Get back to this later.func passDeadCodeEliminationOpt( *builder) { := int(.nextValueID)if >= len(.valuesInfo) { := - len(.valuesInfo) + 1 .valuesInfo = append(.valuesInfo, make([]ValueInfo, )...) := .valuesInfo[len(.valuesInfo)-:]for := range { [].alias = ValueInvalid } }// First, we gather all the instructions with side effects. := .instStack[:0]// During the process, we will assign InstructionGroupID to each instruction, which is not // relevant to dead code elimination, but we need in the backend.varInstructionGroupIDfor := .blockIteratorBegin(); != nil; = .blockIteratorNext() {for := .rootInstr; != nil; = .next { .gid = switch .sideEffect() {casesideEffectTraps:// The trappable should always be alive. = append(, )casesideEffectStrict: = append(, )// The strict side effect should create different instruction groups. ++ } } }// Find all the instructions referenced by live instructions transitively.forlen() > 0 { := len() - 1 := [] = [:]if .live {// If it's already marked alive, this is referenced multiple times, // so we can skip it.continue } .live = true// Before we walk, we need to resolve the alias first. .resolveArgumentAlias() , , , := .Args()if .Valid() { := .InstructionOfValue()if != nil { = append(, ) } }if .Valid() { := .InstructionOfValue()if != nil { = append(, ) } }if .Valid() { := .InstructionOfValue()if != nil { = append(, ) } }for , := range { := .InstructionOfValue()if != nil { = append(, ) } } }// Now that all the live instructions are flagged as live=true, we eliminate all dead instructions.for := .blockIteratorBegin(); != nil; = .blockIteratorNext() {for := .rootInstr; != nil; = .next {if !.live {// Remove the instruction from the list.if := .prev; != nil { .next = .next } else { .rootInstr = .next }if := .next; != nil { .prev = .prev }continue }// If the value alive, we can be sure that arguments are used definitely. // Hence, we can increment the value reference counts. , , , := .Args()if .Valid() { .incRefCount(.ID(), ) }if .Valid() { .incRefCount(.ID(), ) }if .Valid() { .incRefCount(.ID(), ) }for , := range { .incRefCount(.ID(), ) } } } .instStack = // we reuse the stack for the next iteration.}func ( *builder) ( ValueID, *Instruction) {ifwazevoapi.SSALoggingEnabled {fmt.Printf("v%d referenced from %v\n", , .Format()) } := &.valuesInfo[] .RefCount++}// passNopInstElimination eliminates the instructions which is essentially a no-op.func passNopInstElimination( *builder) {for := .blockIteratorBegin(); != nil; = .blockIteratorNext() {for := .rootInstr; != nil; = .next {switch .Opcode() {// TODO: add more logics here.caseOpcodeIshl, OpcodeSshr, OpcodeUshr: , := .Arg2() := .InstructionOfValue()if == nil {// If there's no defining instruction, that means the amount is coming from the parameter.continue }if .Constant() { := .ConstantVal()if .Type().Bits() == 64 { = % 64 } else { = % 32 }if == 0 { .alias(.Return(), ) } } } } }}// passSortSuccessors sorts the successors of each block in the natural program order.func passSortSuccessors( *builder) {for := 0; < .basicBlocksPool.Allocated(); ++ { := .basicBlocksPool.View()sortBlocks(.success) }}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.