llir / llvm

Library for interacting with LLVM IR in pure Go.
https://llir.github.io/document/
BSD Zero Clause License
1.19k stars 78 forks source link

interface BinaryInstruction #191

Closed dannypsnl closed 2 years ago

dannypsnl commented 3 years ago

In some optimization, like value numbering, can find out BinaryInstruction automatically will be a nice feature. For example:

switch inst.(type) {
case ir.BinaryInstruction:
    opCode := inst.OpCode()
    v1 := getValueOf(inst.Operand(1))
    v2 := getValueOf(inst.Operand(2))
    hashKey := makeHashKey(v1, opCode, v2)
    // ...
}
dannypsnl commented 3 years ago

From @mewmew

The Operands function returns the operands for each instruction. The GetOpCode function returns the opcode of each instruction. The opcode.IsBinaryInst method may be used to distinguish binary instructions.

For now, we can place Operands, GetOpCode and OpCode in irutil. It is quite likely that we will move Operands to become a method for each instruction, and include it in the ir.Instruction interface.

For now, let's keep it in irutil and we can finalize the decision whether to move Operands to their.Instruction interface later.

mewmew commented 3 years ago

Uploaded a preliminary example for issue 191.

package main

import (
    "fmt"
    "log"

    "github.com/llir/llvm/asm"
    "github.com/llir/llvm/ir"
    "github.com/llir/llvm/ir/value"
    "github.com/pkg/errors"
)

func main() {
    if err := foo("foo.ll"); err != nil {
        log.Fatalf("%+v", err)
    }
}

func foo(llPath string) error {
    m, err := asm.ParseFile(llPath)
    if err != nil {
        return errors.WithStack(err)
    }
    for _, f := range m.Funcs {
        for _, block := range f.Blocks {
            for _, inst := range block.Insts {
                opcode := GetOpCode(inst)
                if opcode.IsBinaryInst() {
                    operands := Operands(inst)
                    makeHashKey(operands[0], opcode, operands[1])
                }
            }
        }
    }
    return nil
}

func makeHashKey(x value.Value, opcode OpCode, y value.Value) uint64 {
    fmt.Println("x:", x)
    fmt.Println("opcode:", opcode)
    fmt.Println("y:", y)
    // TODO: compute hash key
    return 0
}

// Operands returns the operands of the given instruction.
func Operands(inst ir.Instruction) []value.Value {
    switch inst := inst.(type) {
    // Unary instructions
    case *ir.InstFNeg:
        return []value.Value{inst.X}
    // Binary instructions
    case *ir.InstAdd:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstFAdd:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstSub:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstFSub:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstMul:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstFMul:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstUDiv:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstSDiv:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstFDiv:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstURem:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstSRem:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstFRem:
        return []value.Value{inst.X, inst.Y}
    // Bitwise instructions
    case *ir.InstShl:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstLShr:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstAShr:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstAnd:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstOr:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstXor:
        return []value.Value{inst.X, inst.Y}
    // Vector instructions
    case *ir.InstExtractElement:
        return []value.Value{inst.X, inst.Index}
    case *ir.InstInsertElement:
        return []value.Value{inst.X, inst.Elem, inst.Index}
    case *ir.InstShuffleVector:
        return []value.Value{inst.X, inst.Y, inst.Mask}
    // Aggregate instructions
    case *ir.InstExtractValue:
        return []value.Value{inst.X}
    case *ir.InstInsertValue:
        return []value.Value{inst.X, inst.Elem}
    // Memory instructions
    case *ir.InstAlloca:
        return []value.Value{inst.NElems}
    case *ir.InstLoad:
        return []value.Value{inst.Src}
    case *ir.InstStore:
        return []value.Value{inst.Src, inst.Dst}
    case *ir.InstFence:
        return nil
    case *ir.InstCmpXchg:
        return []value.Value{inst.Ptr, inst.Cmp, inst.New}
    case *ir.InstAtomicRMW:
        return []value.Value{inst.Dst, inst.X}
    case *ir.InstGetElementPtr:
        return append([]value.Value{inst.Src}, inst.Indices...)
    // Conversion instructions
    case *ir.InstTrunc:
        return []value.Value{inst.From}
    case *ir.InstZExt:
        return []value.Value{inst.From}
    case *ir.InstSExt:
        return []value.Value{inst.From}
    case *ir.InstFPTrunc:
        return []value.Value{inst.From}
    case *ir.InstFPExt:
        return []value.Value{inst.From}
    case *ir.InstFPToUI:
        return []value.Value{inst.From}
    case *ir.InstFPToSI:
        return []value.Value{inst.From}
    case *ir.InstUIToFP:
        return []value.Value{inst.From}
    case *ir.InstSIToFP:
        return []value.Value{inst.From}
    case *ir.InstPtrToInt:
        return []value.Value{inst.From}
    case *ir.InstIntToPtr:
        return []value.Value{inst.From}
    case *ir.InstBitCast:
        return []value.Value{inst.From}
    case *ir.InstAddrSpaceCast:
        return []value.Value{inst.From}
    // Other instructions
    case *ir.InstICmp:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstFCmp:
        return []value.Value{inst.X, inst.Y}
    case *ir.InstPhi:
        operands := make([]value.Value, 0, 2*len(inst.Incs))
        for _, inc := range inst.Incs {
            operands = append(operands, inc.X, inc.Pred)
        }
        return operands
    case *ir.InstSelect:
        return []value.Value{inst.Cond, inst.ValueTrue, inst.ValueFalse}
    case *ir.InstFreeze:
        return []value.Value{inst.X}
    case *ir.InstCall:
        return append([]value.Value{inst.Callee}, inst.Args...)
    case *ir.InstVAArg:
        return []value.Value{inst.ArgList}
    case *ir.InstLandingPad:
        var operands []value.Value
        for _, clause := range inst.Clauses {
            operands = append(operands, clause.X)
        }
        return operands
    case *ir.InstCatchPad:
        return append([]value.Value{inst.CatchSwitch}, inst.Args...)
    case *ir.InstCleanupPad:
        return append([]value.Value{inst.ParentPad}, inst.Args...)
    }
    panic(fmt.Errorf("support for instruction %T not yet implemented", inst))
}

// GetOpCode returns the op code of the given instruction
func GetOpCode(inst ir.Instruction) OpCode {
    switch inst.(type) {
    // Unary instructions
    case *ir.InstFNeg:
        return OpCodeInstFNeg
    // Binary instructions
    case *ir.InstAdd:
        return OpCodeInstAdd
    case *ir.InstFAdd:
        return OpCodeInstFAdd
    case *ir.InstSub:
        return OpCodeInstSub
    case *ir.InstFSub:
        return OpCodeInstFSub
    case *ir.InstMul:
        return OpCodeInstMul
    case *ir.InstFMul:
        return OpCodeInstFMul
    case *ir.InstUDiv:
        return OpCodeInstUDiv
    case *ir.InstSDiv:
        return OpCodeInstSDiv
    case *ir.InstFDiv:
        return OpCodeInstFDiv
    case *ir.InstURem:
        return OpCodeInstURem
    case *ir.InstSRem:
        return OpCodeInstSRem
    case *ir.InstFRem:
        return OpCodeInstFRem
    // Bitwise instructions
    case *ir.InstShl:
        return OpCodeInstShl
    case *ir.InstLShr:
        return OpCodeInstLShr
    case *ir.InstAShr:
        return OpCodeInstAShr
    case *ir.InstAnd:
        return OpCodeInstAnd
    case *ir.InstOr:
        return OpCodeInstOr
    case *ir.InstXor:
        return OpCodeInstXor
    // Vector instructions
    case *ir.InstExtractElement:
        return OpCodeInstExtractElement
    case *ir.InstInsertElement:
        return OpCodeInstInsertElement
    case *ir.InstShuffleVector:
        return OpCodeInstShuffleVector
    // Aggregate instructions
    case *ir.InstExtractValue:
        return OpCodeInstExtractValue
    case *ir.InstInsertValue:
        return OpCodeInstInsertValue
    // Memory instructions
    case *ir.InstAlloca:
        return OpCodeInstAlloca
    case *ir.InstLoad:
        return OpCodeInstLoad
    case *ir.InstStore:
        return OpCodeInstStore
    case *ir.InstFence:
        return OpCodeInstFence
    case *ir.InstCmpXchg:
        return OpCodeInstCmpXchg
    case *ir.InstAtomicRMW:
        return OpCodeInstAtomicRMW
    case *ir.InstGetElementPtr:
        return OpCodeInstGetElementPtr
    // Conversion instructions
    case *ir.InstTrunc:
        return OpCodeInstTrunc
    case *ir.InstZExt:
        return OpCodeInstZExt
    case *ir.InstSExt:
        return OpCodeInstSExt
    case *ir.InstFPTrunc:
        return OpCodeInstFPTrunc
    case *ir.InstFPExt:
        return OpCodeInstFPExt
    case *ir.InstFPToUI:
        return OpCodeInstFPToUI
    case *ir.InstFPToSI:
        return OpCodeInstFPToSI
    case *ir.InstUIToFP:
        return OpCodeInstUIToFP
    case *ir.InstSIToFP:
        return OpCodeInstSIToFP
    case *ir.InstPtrToInt:
        return OpCodeInstPtrToInt
    case *ir.InstIntToPtr:
        return OpCodeInstIntToPtr
    case *ir.InstBitCast:
        return OpCodeInstBitCast
    case *ir.InstAddrSpaceCast:
        return OpCodeInstAddrSpaceCast
    // Other instructions
    case *ir.InstICmp:
        return OpCodeInstICmp
    case *ir.InstFCmp:
        return OpCodeInstFCmp
    case *ir.InstPhi:
        return OpCodeInstPhi
    case *ir.InstSelect:
        return OpCodeInstSelect
    case *ir.InstFreeze:
        return OpCodeInstFreeze
    case *ir.InstCall:
        return OpCodeInstCall
    case *ir.InstVAArg:
        return OpCodeInstVAArg
    case *ir.InstLandingPad:
        return OpCodeInstLandingPad
    case *ir.InstCatchPad:
        return OpCodeInstCatchPad
    case *ir.InstCleanupPad:
        return OpCodeInstCleanupPad
    }
    panic(fmt.Errorf("support for instruction %T not yet implemented", inst))
}

//go:generate stringer -type OpCode

// OpCode represents the op code of an instruction.
type OpCode int

// IsBinaryInst reports whether the given op code is a binary or bitwise
// instruction.
func (opcode OpCode) IsBinaryInst() bool {
    switch opcode {
    // Binary instructions
    case OpCodeInstAdd, OpCodeInstFAdd, OpCodeInstSub, OpCodeInstFSub, OpCodeInstMul, OpCodeInstFMul, OpCodeInstUDiv, OpCodeInstSDiv, OpCodeInstFDiv, OpCodeInstURem, OpCodeInstSRem, OpCodeInstFRem:
        return true
    // Bitwise instructions
    case OpCodeInstShl, OpCodeInstLShr, OpCodeInstAShr, OpCodeInstAnd, OpCodeInstOr, OpCodeInstXor:
        return true
    }
    return false
}

// Instruction op codes.
const (
    OpCodeNone OpCode = 0
    // Unary instructions
    OpCodeInstFNeg OpCode = iota + 1
    // Binary instructions
    OpCodeInstAdd
    OpCodeInstFAdd
    OpCodeInstSub
    OpCodeInstFSub
    OpCodeInstMul
    OpCodeInstFMul
    OpCodeInstUDiv
    OpCodeInstSDiv
    OpCodeInstFDiv
    OpCodeInstURem
    OpCodeInstSRem
    OpCodeInstFRem
    // Bitwise instructions
    OpCodeInstShl
    OpCodeInstLShr
    OpCodeInstAShr
    OpCodeInstAnd
    OpCodeInstOr
    OpCodeInstXor
    // Vector instructions
    OpCodeInstExtractElement
    OpCodeInstInsertElement
    OpCodeInstShuffleVector
    // Aggregate instructions
    OpCodeInstExtractValue
    OpCodeInstInsertValue
    // Memory instructions
    OpCodeInstAlloca
    OpCodeInstLoad
    OpCodeInstStore
    OpCodeInstFence
    OpCodeInstCmpXchg
    OpCodeInstAtomicRMW
    OpCodeInstGetElementPtr
    // Conversion instructions
    OpCodeInstTrunc
    OpCodeInstZExt
    OpCodeInstSExt
    OpCodeInstFPTrunc
    OpCodeInstFPExt
    OpCodeInstFPToUI
    OpCodeInstFPToSI
    OpCodeInstUIToFP
    OpCodeInstSIToFP
    OpCodeInstPtrToInt
    OpCodeInstIntToPtr
    OpCodeInstBitCast
    OpCodeInstAddrSpaceCast
    // Other instructions
    OpCodeInstICmp
    OpCodeInstFCmp
    OpCodeInstPhi
    OpCodeInstSelect
    OpCodeInstFreeze
    OpCodeInstCall
    OpCodeInstVAArg
    OpCodeInstLandingPad
    OpCodeInstCatchPad
    OpCodeInstCleanupPad
)
mewmew commented 3 years ago

Related discussions in #42.

There are two considerations we should take, both outlined by @pwaller:

  1. return pointer to value.Value from Operands to allow mutation.
  2. pass destination slice to Operands to avoid allocation

Regarding the first point, I think this will allow for a very powerful API. Similar to the IR traversal API (inspired by Go fix) that we have in irutil.Walk.

Regarding the second point. While preferable to keep memory allocations low, up until now, we've prioritized a simpler API for llir/llvm over potential to reduce memory allocations. In particular, for methods which allocate and return slices ((e.g. ir.Terminator.Succs). For consistency and a continued prioritization of simplified API, I think we should skip the second point and keep the API as simple as possible.

Suggested re-definition of ir.Instruction (and potentially ir.Terminator) interface:

package ir

// Instruction is an LLVM IR instruction. All instructions (except store and
// fence) implement the value.Named interface and may thus be used directly as
// values.
//
// An Instruction has one of the following underlying types.
//
// ...
type Instruction interface {
    LLStringer
    // isInstruction ensures that only instructions can be assigned to the
    // instruction.Instruction interface.
    isInstruction()
    // Operands returns a mutable list of operands of the instruction.
    Operands() []*value.Value
}

And correspondingly for ir.Terminator.

package ir

// Terminator is an LLVM IR terminator instruction (a control flow instruction).
//
// A Terminator has one of the following underlying types.
//
// ...
type Terminator interface {
    LLStringer
    // Succs returns the successor basic blocks of the terminator.
    Succs() []*Block
    // Operands returns a mutable list of operands of the terminator.
    Operands() []*value.Value
}

Cheers, Robin

mewmew commented 2 years ago

Related to this issue, the Operands method is implemented in PR #214.

dannypsnl commented 2 years ago

close by #214.