Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(174)

Unified Diff: x86map/map.go

Issue 91170044: code review 91170044: x86: x86 CSV table and decoder generator (Closed)
Patch Set: diff -r d5498ce3c3f7 https://code.google.com/p/rsc.x86/ Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « x86.csv ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: x86map/map.go
===================================================================
new file mode 100644
--- /dev/null
+++ b/x86map/map.go
@@ -0,0 +1,884 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// X86map constructs the x86 opcode map from the instruction set CSV file.
+//
+// Usage:
+// x86map [-fmt=format] x86.csv
+//
+// The known output formats are:
+//
+// text (default) - print decoding tree in text form
+// decoder - print decoding tables for the x86asm package
+package main
+
+import (
+ "bufio"
+ "bytes"
+ "encoding/csv"
+ "flag"
+ "fmt"
+ "log"
+ "os"
+ "sort"
+ "strconv"
+ "strings"
+)
+
+var format = flag.String("fmt", "text", "output format: text, decoder")
+
+var inputFile string
+
+func usage() {
+ fmt.Fprintf(os.Stderr, "usage: x86map [-fmt=format] x86.csv\n")
+ os.Exit(2)
+}
+
+func main() {
+ log.SetFlags(0)
+ log.SetPrefix("x86map: ")
+
+ flag.Usage = usage
+ flag.Parse()
+ if flag.NArg() != 1 {
+ usage()
+ }
+
+ inputFile = flag.Arg(0)
+
+ var print func(*Prog)
+ switch *format {
+ default:
+ log.Fatalf("unknown output format %q", *format)
+ case "text":
+ print = printText
+ case "decoder":
+ print = printDecoder
+ }
+
+ p, err := readCSV(flag.Arg(0))
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ //p = mergeTail(p)
+
+ print(p)
+}
+
+// readCSV reads the CSV file and returns the corresponding Map.
+// It may print details about problems to standard error using the log package.
+func readCSV(file string) (*Prog, error) {
+ // Read input.
+ // Skip leading blank and # comment lines.
+ f, err := os.Open(file)
+ if err != nil {
+ return nil, err
+ }
+ b := bufio.NewReader(f)
+ for {
+ c, err := b.ReadByte()
+ if err != nil {
+ break
+ }
+ if c == '\n' {
+ continue
+ }
+ if c == '#' {
+ b.ReadBytes('\n')
+ continue
+ }
+ b.UnreadByte()
+ break
+ }
+ table, err := csv.NewReader(b).ReadAll()
+ if err != nil {
+ return nil, fmt.Errorf("parsing %s: %v", file, err)
+ }
+ if len(table) == 0 {
+ return nil, fmt.Errorf("empty csv input")
+ }
+ if len(table[0]) < 6 {
+ return nil, fmt.Errorf("csv too narrow: need at least six columns")
+ }
+
+ p := &Prog{}
+ for _, row := range table {
+ add(p, row[0], row[1], row[2], row[3], row[4], row[5])
+ }
+
+ check(p)
+
+ return p, nil
+}
+
+// A Prog is a single node in the tree representing the instruction format.
+// Collectively the tree of nodes form a kind of program for decoding.
+// Each Prog has a single action, identifying the kind of node it is,
+// and then children to be executed according to the action.
+// For example, the Prog with Action="decode" has children named for each
+// possible next byte in the input, and those children are the decoding
+// tree to execute for the corresponding bytes.
+type Prog struct {
+ Path string
+ Action string
+ Child map[string]*Prog
+ PC int
+ TailID int
+}
+
+// keys returns the child keys in sorted order.
+func (p *Prog) keys() []string {
+ var keys []string
+ for key := range p.Child {
+ keys = append(keys, key)
+ }
+ sort.Strings(keys)
+ return keys
+}
+
+// findChildLeaf finds a leaf node in the subtree rooted at p
+// and returns that node's full path. The path is useful in error
+// messages as an example of where a particular subtree is headed.
+func (p *Prog) findChildLeaf() string {
+ for {
+ if len(p.Child) == 0 {
+ return p.Path
+ }
+ p = p.Child[p.keys()[0]]
+ }
+}
+
+// walk advances from p to apply the given action and key.
+// If p has no action yet, the action is recorded as p.Action.
+// Otherwise the action must match p's action: every node in the
+// tree can have at most one action, although possibly with many
+// alternative keys.
+// If p already has an alternative with the given key, walk returns
+// that preexisting subtree. Otherwise walk allocates a new Prog
+// representing that subtree and returns that node.
+func (p *Prog) walk(action, key, text, opcode string) *Prog {
+ if p.Action == "" {
+ p.Action = action
+ } else if p.Action != action {
+ log.Printf("%s; %s: conflicting paths %s and %s|%s %s\n", text, opcode, p.findChildLeaf(), p.Path, action, key)
+ return new(Prog)
+ }
+ q := p.Child[key]
+ if q == nil {
+ if p.Child == nil {
+ p.Child = make(map[string]*Prog)
+ }
+ q = new(Prog)
+ q.Path = fmt.Sprintf("%s|%s %s", p.Path, action, key)
+ p.Child[key] = q
+ }
+ return q
+}
+
+// add adds a single instructions to the tree rooted at root.
+// The string arguments match the CSV: instruction mnemonic,
+// opcode encoding, validity in 32- and 64-bit modes, CPUID
+// feature set (ignored), and additional tags.
+//
+// In effect, add adds a new path through the tree leading to
+// the given instruction, but it reuses as much of the existing
+// tree structure as possible. For example if there have already
+// been instructions added starting with 0F and this instruction
+// also starts with 0F, that 0F subtree node is reused instead of
+// allocating a parallel one. To maximize the reuse, the check action
+// sequence along the path being added is the same for every instruction:
+// encoding pieces needed to make a decision, 64-bit mode check,
+// rex check, prefix check, address size check, data size check,
+// register vs memory argument check. Once all those checks have
+// been applied, the assumption is that we have uniquely identified
+// an instruction, and at that point it is okay to diverge from the
+// uniform pattern to set the opcode and read the specific arguments
+// corresponding to the instruction at hand.
+//
+// The maximimal reuse of the existing tree means that the tree
+// resulting from all adds have been done amounts to a decision tree.
+// There is one detail that makes it non-deterministic: some checks
+// do not matter to some instructions and those are recorded as "any" keys.
+// If you are decoding and there is a key for the specific thing you are
+// seeing as well as the "any" key, both must be considered. To avoid
+// adding complexity to the decoder execution, the 'check' function
+// removes this case by merging "any" trees into specific keys when
+// present.
+func add(root *Prog, text, opcode, valid32, valid64, cpuid, tags string) {
+ // These are not real instructions: they are either
+ // prefixes for other instructions, composite instructions
+ // built from multiple individual instructions, or alternate
+ // mnemonics of other encodings.
+ // Discard for disassembly, because we want a unique decoding.
+ if strings.Contains(tags, "pseudo") {
+ return
+ }
+
+ // Ignore VEX instructions for now.
+ if strings.HasPrefix(opcode, "VEX") {
+ return
+ }
+
+ var rex, prefix string
+ encoding := strings.Fields(opcode)
+ if len(encoding) > 0 && strings.HasPrefix(encoding[0], "REX") {
+ rex = encoding[0]
+ encoding = encoding[1:]
+ if len(encoding) > 0 && encoding[0] == "+" {
+ encoding = encoding[1:]
+ }
+ }
+ if len(encoding) > 0 && isPrefix[encoding[0]] {
+ prefix = encoding[0]
+ encoding = encoding[1:]
+ }
+ if rex == "" && len(encoding) > 0 && strings.HasPrefix(encoding[0], "REX") {
+ rex = encoding[0]
+ encoding = encoding[1:]
+ if len(encoding) > 0 && encoding[0] == "+" {
+ encoding = encoding[1:]
+ }
+ }
+ if len(encoding) > 0 && isPrefix[encoding[0]] {
+ log.Printf("%s %s: too many prefixes", text, opcode)
+ return
+ }
+
+ p := root
+ walk := func(action, item string) {
+ p = p.walk(action, item, text, opcode)
+ }
+
+ var haveModRM, havePlus bool
+ var usedReg string
+ for len(encoding) > 0 && (isHex(encoding[0]) || isSlashNum(encoding[0])) {
+ key := encoding[0]
+ if isSlashNum(key) {
+ if usedReg != "" {
+ log.Printf("%s %s: multiple modrm checks", text, opcode)
+ }
+ haveModRM = true
+ usedReg = key
+ }
+ if i := strings.Index(key, "+"); i >= 0 {
+ key = key[:i+1]
+ havePlus = true
+ }
+ walk("decode", key)
+ encoding = encoding[1:]
+ }
+
+ if valid32 != "V" {
+ walk("is64", "1")
+ } else if valid64 != "V" {
+ walk("is64", "0")
+ } else {
+ walk("is64", "any")
+ }
+
+ if rex != "" {
+ walk("prefix", rex)
+ }
+ if prefix == "" {
+ prefix = "0"
+ }
+ walk("prefix", prefix)
+
+ if strings.Contains(tags, "address16") {
+ walk("addrsize", "16")
+ } else if strings.Contains(tags, "address32") {
+ walk("addrsize", "32")
+ } else if strings.Contains(tags, "address64") {
+ walk("addrsize", "64")
+ } else {
+ walk("addrsize", "any")
+ }
+
+ if strings.Contains(tags, "operand16") {
+ walk("datasize", "16")
+ } else if strings.Contains(tags, "operand32") {
+ walk("datasize", "32")
+ } else if strings.Contains(tags, "operand64") {
+ walk("datasize", "64")
+ } else {
+ walk("datasize", "any")
+ }
+
+ if len(encoding) > 0 && encoding[0] == "/r" {
+ haveModRM = true
+ }
+ if haveModRM {
+ if strings.Contains(tags, "modrm_regonly") {
+ walk("ismem", "0")
+ } else if strings.Contains(tags, "modrm_memonly") {
+ walk("ismem", "1")
+ } else {
+ walk("ismem", "any")
+ }
+ }
+
+ walk("op", strings.Fields(text)[0])
+
+ for _, field := range encoding {
+ walk("read", field)
+ }
+
+ var usedRM string
+ for _, arg := range strings.Fields(text)[1:] {
+ arg = strings.TrimRight(arg, ",")
+ if usesReg[arg] && !haveModRM && !havePlus {
+ log.Printf("%s %s: no modrm field to use for %s", text, opcode, arg)
+ continue
+ }
+ if usesRM[arg] && !haveModRM {
+ log.Printf("%s %s: no modrm field to use for %s", text, opcode, arg)
+ continue
+ }
+ if usesReg[arg] {
+ if usedReg != "" {
+ log.Printf("%s %s: modrm reg field used by both %s and %s", text, opcode, usedReg, arg)
+ continue
+ }
+ usedReg = arg
+ }
+ if usesRM[arg] {
+ if usedRM != "" {
+ log.Printf("%s %s: modrm r/m field used by both %s and %s", text, opcode, usedRM, arg)
+ continue
+ }
+ usedRM = arg
+ }
+ walk("arg", arg)
+ }
+
+ walk("match", "!")
+}
+
+// allKeys records the list of all possible child keys for actions that support "any".
+var allKeys = map[string][]string{
+ "is64": {"0", "1"},
+ "ismem": {"0", "1"},
+ "addrsize": {"16", "32", "64"},
+ "datasize": {"16", "32", "64"},
+}
+
+// check checks that the program tree is well-formed.
+// It also merges "any" keys into specific decoding keys in order to
+// create an invariant that a particular check node either has a
+// single "any" child - making it a no-op - or has no "any" children.
+// See the discussion of "any" in the comment for add above.
+func check(p *Prog) {
+ if p.Child["any"] != nil && len(p.Child) > 1 {
+ for _, key := range p.keys() {
+ if key != "any" {
+ mergeCopy(p.Child[key], p.Child["any"])
+ }
+ }
+ if allKeys[p.Action] == nil {
+ log.Printf("%s: unknown key space for %s=any", p.Path, p.Action)
+ }
+ for _, key := range allKeys[p.Action] {
+ if p.Child[key] == nil {
+ p.Child[key] = p.Child["any"]
+ }
+ }
+ delete(p.Child, "any")
+ }
+
+ for _, q := range p.Child {
+ check(q)
+ }
+
+ switch p.Action {
+ case "op", "read", "arg":
+ if len(p.Child) > 1 {
+ log.Printf("%s: multiple children for action=%s: %v", p.Path, p.Action, p.keys())
+ }
+ }
+}
+
+// mergeCopy merges a copy of the tree rooted at src into dst.
+// It is only used once no more paths will be added to the tree,
+// so it is safe to introduce cross-links that make the program
+// a dag rather than a tree.
+func mergeCopy(dst, src *Prog) {
+ //log.Printf("merge %s|%s and %s|%s\n", dst.Path, dst.Action, src.Path, src.Action)
+ if dst.Action != src.Action {
+ log.Printf("cannot merge %s|%s and %s|%s", dst.Path, dst.Action, src.Path, src.Action)
+ return
+ }
+
+ for _, key := range src.keys() {
+ if dst.Child[key] == nil {
+ // Create new subtree by creating cross-link.
+ dst.Child[key] = src.Child[key]
+ } else {
+ // Merge src subtree into existing dst subtree.
+ mergeCopy(dst.Child[key], src.Child[key])
+ }
+ }
+}
+
+// set returns a map mapping each of the words in all to true.
+func set(all string) map[string]bool {
+ m := map[string]bool{}
+ for _, f := range strings.Fields(all) {
+ m[f] = true
+ }
+ return m
+}
+
+// isPrefix records the x86 opcode prefix bytes.
+var isPrefix = set(`
+ 26
+ 2E
+ 36
+ 3E
+ 64
+ 65
+ 66
+ 67
+ F0
+ F2
+ F3
+`)
+
+// usesReg records the argument codes that use the modrm reg field.
+var usesReg = set(`
+ r8
+ r16
+ r32
+ r64
+`)
+
+// usesRM records the argument codes that use the modrm r/m field.
+var usesRM = set(`
+ r/m8
+ r/m16
+ r/m32
+ r/m64
+`)
+
+// isHex reports whether the argument is a two digit hex number
+// possibly followed by a +foo suffix.
+func isHex(s string) bool {
+ if i := strings.Index(s, "+"); i >= 0 {
+ s = s[:i]
+ }
+ if len(s) != 2 {
+ return false
+ }
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if '0' <= c && c <= '9' || 'A' <= c && c <= 'F' {
+ continue
+ }
+ return false
+ }
+ return true
+}
+
+// isSlashNum reports whether the argument is /n for some number n in [0,7].
+func isSlashNum(s string) bool {
+ return len(s) == 2 && s[0] == '/' && '0' <= s[1] && s[1] <= '7'
+}
+
+// mergeTail is supposed to merge common subtrees (program tails),
+// reducing the size of the final program code.
+// It identifies the subtrees using a bottom-up canonicalization.
+//
+// THIS CODE DOES NOT WORK. IT NEEDS TO BE DEBUGGED.
+func mergeTail(p *Prog, emitted map[string]*Prog) *Prog {
+ if emitted == nil {
+ emitted = make(map[string]*Prog)
+ }
+
+ if p.Action == "match" {
+ return p
+ }
+
+ for _, key := range p.keys() {
+ p.Child[key] = mergeTail(p.Child[key], emitted)
+ }
+
+ op := ""
+ for _, key := range p.keys() {
+ q := p.Child[key]
+ if q.Action != "op" || len(q.Child) > 1 {
+ op = ""
+ break
+ }
+ qop := q.keys()[0]
+ if op == "" {
+ op = qop
+ } else if op != qop {
+ op = ""
+ break
+ }
+ }
+
+ if op != "" {
+ // Pull 'op x' up above the discriminator.
+ p1 := new(Prog)
+ *p1 = *p
+ for _, key := range p.keys() {
+ p1.Child[key] = p.Child[key].Child[op]
+ }
+ p.Action = "op"
+ p.Child = map[string]*Prog{op: p1}
+ }
+
+ var buf bytes.Buffer
+ fmt.Fprintf(&buf, "%s\n", p.Action)
+ for _, key := range p.keys() {
+ fmt.Fprintf(&buf, "%s %d\n", key, p.Child[key].TailID)
+ }
+ key := buf.String()
+
+ if q := emitted[key]; q != nil {
+ return q
+ }
+ emitted[key] = p
+ p.TailID = len(emitted)
+ return p
+}
+
+// printText prints the tree in textual form.
+func printText(p *Prog) {
+ printTree(p, 0)
+}
+
+var tabs = strings.Repeat(" ", 100)
+
+func printTree(p *Prog, depth int) {
+ fmt.Printf("%.*s%s\n", 4*depth, tabs, p.Action)
+ for _, key := range p.keys() {
+ fmt.Printf("%.*s%s\n", 4*(depth+1), tabs, key)
+ printTree(p.Child[key], depth+2)
+ }
+}
+
+// printDecoder prints a Go array containing the decoder program.
+// It runs in two passes, both of which traverse and could generate
+// the entire program. The first pass records the PC for each Prog node,
+// and the second pass emits the actual program, using the PCs as jump
+// targets in the places where the program is a dag rather than a tree.
+func printDecoder(p *Prog) {
+ opMap := make(map[string]bool)
+ printDecoderPass(p, 1, false, opMap)
+ fmt.Printf("// DO NOT EDIT\n")
+ fmt.Printf("// generated by: x86map -fmt=decoder %s\n", inputFile)
+ fmt.Printf("\n")
+ fmt.Printf("package x86asm\n\n")
+ fmt.Printf("var decoder = [...]uint16{\n\tuint16(xFail),\n")
+ printDecoderPass(p, 1, true, opMap)
+ fmt.Printf("}\n\n")
+
+ var ops []string
+ for op := range opMap {
+ ops = append(ops, op)
+ }
+ sort.Strings(ops)
+
+ fmt.Printf("const (\n")
+ fmt.Printf("\t_ Op = iota\n\n")
+ last := ""
+ for _, op := range ops {
+ fmt.Printf("\t%s\n", op)
+ last = op
+ }
+ fmt.Printf(")\n\n")
+ fmt.Printf("const maxOp = %s\n\n", last)
+
+ fmt.Printf("var opNames = [...]string{\n")
+ for _, op := range ops {
+ fmt.Printf("\t%s: \"%s\",\n", op, op)
+ }
+ fmt.Printf("}\n")
+}
+
+// printDecoderPass prints the decoding table program for p,
+// assuming that we are emitting code at the given program counter.
+// It returns the new current program counter, that is, the program
+// counter after the printed instructions.
+// If printing==false, printDecoderPass does not print the actual
+// code words but still does the PC computation.
+func printDecoderPass(p *Prog, pc int, printing bool, ops map[string]bool) int {
+ // Record PC on first pass.
+ if p.PC == 0 {
+ p.PC = pc
+ }
+
+ // If PC doesn't match, we've already printed this code
+ // because it was reached some other way. Jump to that copy.
+ if p.PC != pc {
+ if printing {
+ fmt.Printf("/*%d*/\tuint16(xJump), %d,\n", pc, p.PC)
+ }
+ return pc + 2
+ }
+
+ // Otherwise, emit the code for the given action.
+
+ // At the bottom, if next is non-nil, emit code for next.
+ // Then emit the code for the children named by the keys.
+ keys := p.keys()
+ var next *Prog
+
+ switch p.Action {
+ default:
+ log.Printf("printDecoderPass: unknown action %q: %s", p.Action, p.Path)
+
+ case "decode":
+ // Decode hex bytes or /n modrm op checks.
+ // Hex bytes take priority, so do them first.
+ // Hex bytes of the form "40+" indicate an
+ // 8 entry-wide swath of codes: 40, 41, ..., 47.
+ hex := 0
+ slash := 0
+ for _, key := range keys {
+ if isHex(key) {
+ if strings.Contains(key, "+") {
+ hex += 8
+ } else {
+ hex++
+ }
+ }
+ if isSlashNum(key) {
+ slash++
+ }
+ }
+ if hex > 0 {
+ // TODO(rsc): Introduce an xCondByte256 that has 256 child entries
+ // and no explicit keys. That will cut the size in half for large
+ // tables.
+ if printing {
+ fmt.Printf("/*%d*/\tuint16(xCondByte), %d,\n", pc, hex)
+ for _, key := range keys {
+ if !isHex(key) {
+ continue
+ }
+ if i := strings.Index(key, "+"); i >= 0 {
+ nextPC := p.Child[key].PC
+ n, _ := strconv.ParseUint(key[:i], 16, 0)
+ for j := 0; j < 8; j++ {
+ fmt.Printf("\t%#02x, %d,\n", int(n)+j, nextPC)
+ }
+ continue
+ }
+ fmt.Printf("\t0x%s, %d,\n", key, p.Child[key].PC)
+ }
+ }
+ pc += 2 + 2*hex
+
+ // All other condition checks fail the decoding if nothing is found,
+ // but this one falls through so that we can then do /n checks.
+ // If there are no upcoming /n checks, insert an explicit failure.
+ if slash == 0 {
+ if printing {
+ fmt.Printf("\tuint16(xFail),\n")
+ }
+ pc++
+ }
+ }
+ if slash > 0 {
+ if printing {
+ fmt.Printf("/*%d*/\tuint16(xCondSlashR),\n", pc)
+ for i := 0; i < 8; i++ {
+ fmt.Printf("\t%d, // %d\n", p.childPC(fmt.Sprintf("/%d", i)), i)
+ }
+ }
+ pc += 1 + 8
+ }
+
+ case "is64":
+ // Decode based on processor mode: 64-bit or not.
+ if len(keys) == 1 && keys[0] == "any" {
+ next = p.Child["any"]
+ break
+ }
+ if p.Child["any"] != nil {
+ log.Printf("%s: mixed is64 keys: %v", p.Path, keys)
+ }
+
+ if printing {
+ fmt.Printf("/*%d*/\tuint16(xCondIs64), %d, %d,\n", pc, p.childPC("0"), p.childPC("1"))
+ }
+ pc += 3
+
+ case "prefix":
+ // Decode based on presence of prefix.
+ // The "0" prefix means "none of the above", so if there's
+ // nothing else, it's the same as "any".
+ if len(keys) == 1 && (keys[0] == "any" || keys[0] == "0") {
+ next = p.Child["any"]
+ break
+ }
+ if p.Child["any"] != nil {
+ log.Printf("%s: mixed prefix keys: %v", p.Path, keys)
+ }
+
+ // Emit the prefixes in reverse sorted order, so that REX.W
+ // is considered before the standard prefixes, F3 and F2 are
+ // considered before 66, and the fallback 0 is considered last.
+ if printing {
+ fmt.Printf("/*%d*/\tuint16(xCondPrefix), %d,\n", pc, len(keys))
+ for i := len(keys) - 1; i >= 0; i-- {
+ key := keys[i]
+ nextPC := p.Child[key].PC
+ if key == "REX.W" {
+ key = "0x48"
+ } else if key == "REX" {
+ key = "0x40"
+ } else {
+ key = "0x" + key
+ }
+ fmt.Printf("\t%s, %d,\n", key, nextPC)
+ }
+ }
+ pc += 2 + 2*len(keys)
+
+ case "addrsize":
+ // Decode based on address size attribute.
+ if len(keys) == 1 && keys[0] == "any" {
+ next = p.Child["any"]
+ break
+ }
+ if p.Child["any"] != nil {
+ log.Printf("%s: mixed addrsize keys: %v", p.Path, keys)
+ }
+
+ if printing {
+ fmt.Printf("/*%d*/\tuint16(xCondAddrSize), %d, %d, %d,\n", pc, p.childPC("16"), p.childPC("32"), p.childPC("64"))
+ }
+ pc += 4
+
+ case "datasize":
+ // Decode based on operand size attribute.
+ if len(keys) == 1 && keys[0] == "any" {
+ next = p.Child["any"]
+ break
+ }
+ if p.Child["any"] != nil {
+ log.Printf("%s: mixed datasize keys: %v", p.Path, keys)
+ }
+
+ if printing {
+ fmt.Printf("/*%d*/\tuint16(xCondDataSize), %d, %d, %d,\n", pc, p.childPC("16"), p.childPC("32"), p.childPC("64"))
+ }
+ pc += 4
+
+ case "ismem":
+ // Decode based on modrm form: memory or register reference.
+ if len(keys) == 1 && keys[0] == "any" {
+ next = p.Child["any"]
+ break
+ }
+ if p.Child["any"] != nil {
+ log.Printf("%s: mixed ismem keys: %v", p.Path, keys)
+ }
+
+ if printing {
+ fmt.Printf("/*%d*/\tuint16(xCondIsMem), %d, %d,\n", pc, p.childPC("0"), p.childPC("1"))
+ }
+ pc += 3
+
+ case "op":
+ // Set opcode.
+ ops[keys[0]] = true
+ if printing {
+ fmt.Printf("/*%d*/\tuint16(xSetOp), uint16(%s),\n", pc, keys[0])
+ }
+ next = p.Child[keys[0]]
+ pc += 2
+
+ case "read":
+ // Read argument bytes.
+ if printing {
+ fmt.Printf("/*%d*/\tuint16(xRead%s),\n", pc, xOp(keys[0]))
+ }
+ next = p.Child[keys[0]]
+ pc++
+
+ case "arg":
+ // Record instruction argument (interpret bytes loaded with read).
+ if printing {
+ fmt.Printf("/*%d*/\tuint16(xArg%s),\n", pc, xOp(keys[0]))
+ }
+ next = p.Child[keys[0]]
+ pc++
+
+ case "match":
+ // Finish match.
+ if printing {
+ fmt.Printf("/*%d*/\tuint16(xMatch),\n", pc)
+ }
+ pc++
+ return pc
+ }
+
+ if next != nil {
+ pc = printDecoderPass(next, pc, printing, ops)
+ }
+
+ for _, key := range keys {
+ q := p.Child[key]
+ if q.PC == 0 || q.PC == pc {
+ pc = printDecoderPass(q, pc, printing, ops)
+ }
+ }
+
+ return pc
+}
+
+// childPC returns the PC for the given child key.
+// If the key is not present, it returns PC 0,
+// which is known to be an xFail instruction.
+func (p *Prog) childPC(key string) int {
+ q := p.Child[key]
+ if q == nil {
+ return 0
+ }
+ return q.PC
+}
+
+// isLower reports whether c is an ASCII lower case letter.
+func isLower(c byte) bool {
+ return 'a' <= c && c <= 'z'
+}
+
+// isLetterDigit reports whether c is an ASCII letter or digit.
+func isLetterDigit(c byte) bool {
+ return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9'
+}
+
+// xOp converts arg, an Intel manual shorthand, into a decoder opcode suffix.
+// The standard form is LeadingUpperLetter with a few punctuation symbols
+// turned into purely lower case words: M16and32, M16colon32, CR0dashCR7.
+func xOp(arg string) string {
+ var buf []byte
+ for i := 0; i < len(arg); i++ {
+ c := arg[i]
+ if isLower(c) && (i == 0 || !isLetterDigit(arg[i-1])) {
+ c -= 'a' - 'A'
+ }
+ buf = append(buf, c)
+ }
+ return argFix.Replace(string(buf))
+}
+
+var argFix = strings.NewReplacer(
+ "/R", "SlashR",
+ "/", "",
+ "<", "",
+ ">", "",
+ "+", "plus",
+ "-", "dash",
+ ":", "colon",
+ "&", "and",
+ "ST(0)", "ST",
+ "ST(I)", "STi",
+ "ST(I)+Op", "STi",
+)
« no previous file with comments | « x86.csv ('k') | no next file » | no next file with comments »

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b