Nix flake for RPython interpreters
Révision | 4a430c6d0514bed189fdc75fed602fad9a3811a5 (tree) |
---|---|
l'heure | 2024-04-20 10:32:57 |
Auteur | Corbin <cds@corb...> |
Commiter | Corbin |
bf: Hash-cons all ops.
This is so many lines of code, but it pays off. In exchange for around
30 lines of code, we get about a 50% improvement on Mandelbrot time
spent in JIT, and maybe a 10-15% speedup.
What's happening here? RPython JIT greens are compared by pointer when
they are structs, not by contents. There's no way to override this,
because the comparison is on the JIT's hot path for interp. Instead, we
have to hash-cons the entire structure of ops so that two ops will
compare equal by pointer when they are equal by contents. This could be
generated, somewhat, but there's no good answer for the linear scan on
loops or other variadic components.
So, the hash-consing means fewer code objects, which improves the JIT's
performance because it doesn't recompile equivalent code multiple times.
@@ -8,6 +8,12 @@ import sys | ||
8 | 8 | from rpython.jit.codewriter.policy import JitPolicy |
9 | 9 | from rpython.rlib.jit import JitDriver, purefunction |
10 | 10 | |
11 | +def opEq(ops1, ops2): | |
12 | + if len(ops1) != len(ops2): return False | |
13 | + for i, op in enumerate(ops1): | |
14 | + if op is not ops2[i]: return False | |
15 | + return True | |
16 | + | |
11 | 17 | def printableProgram(program): return program.asStr() |
12 | 18 | |
13 | 19 | jitdriver = JitDriver(greens=['program'], reds=['position', 'tape'], |
@@ -35,15 +41,23 @@ class Add(Op): | ||
35 | 41 | def runOn(self, tape, position): |
36 | 42 | tape[position] += self.imm |
37 | 43 | return position |
38 | -Inc = Add(1) | |
39 | -Dec = Add(-1) | |
44 | +addCache = {} | |
45 | +def add(imm): | |
46 | + if imm not in addCache: addCache[imm] = Add(imm) | |
47 | + return addCache[imm] | |
48 | +Inc = add(1) | |
49 | +Dec = add(-1) | |
40 | 50 | class Shift(Op): |
41 | 51 | _immutable_fields_ = "width", |
42 | 52 | def __init__(self, width): self.width = width |
43 | 53 | def asStr(self): return "shift(%d)" % self.width |
44 | 54 | def runOn(self, tape, position): return position + self.width |
45 | -Left = Shift(-1) | |
46 | -Right = Shift(1) | |
55 | +shiftCache = {} | |
56 | +def shift(width): | |
57 | + if width not in shiftCache: shiftCache[width] = Shift(width) | |
58 | + return shiftCache[width] | |
59 | +Left = shift(-1) | |
60 | +Right = shift(1) | |
47 | 61 | class _Zero(Op): |
48 | 62 | def asStr(self): return "0" |
49 | 63 | def runOn(self, tape, position): |
@@ -60,6 +74,11 @@ class ZeroScaleAdd(Op): | ||
60 | 74 | tape[position + self.offset] += tape[position] * self.scale |
61 | 75 | tape[position] = 0 |
62 | 76 | return position |
77 | +scaleAddCache = {} | |
78 | +def scaleAdd(offset, scale): | |
79 | + if (offset, scale) not in scaleAddCache: | |
80 | + scaleAddCache[offset, scale] = ZeroScaleAdd(offset, scale) | |
81 | + return scaleAddCache[offset, scale] | |
63 | 82 | class Loop(Op): |
64 | 83 | _immutable_fields_ = "ops[*]", |
65 | 84 | def __init__(self, ops): self.ops = ops |
@@ -71,15 +90,22 @@ class Loop(Op): | ||
71 | 90 | position=position, tape=tape) |
72 | 91 | for op in self.ops: position = op.runOn(tape, position) |
73 | 92 | return position |
93 | +loopCache = [] | |
94 | +def loop(ops): | |
95 | + for l in loopCache: | |
96 | + if opEq(ops, l.ops): return l | |
97 | + rv = Loop(ops) | |
98 | + loopCache.append(rv) | |
99 | + return rv | |
74 | 100 | |
75 | 101 | def peep(ops): |
76 | 102 | rv = [] |
77 | 103 | temp = ops[0] |
78 | 104 | for op in ops[1:]: |
79 | 105 | if isinstance(temp, Shift) and isinstance(op, Shift): |
80 | - temp = Shift(temp.width + op.width) | |
106 | + temp = shift(temp.width + op.width) | |
81 | 107 | elif isinstance(temp, Add) and isinstance(op, Add): |
82 | - temp = Add(temp.imm + op.imm) | |
108 | + temp = add(temp.imm + op.imm) | |
83 | 109 | else: |
84 | 110 | rv.append(temp) |
85 | 111 | temp = op |
@@ -98,8 +124,8 @@ def loopish(ops): | ||
98 | 124 | elif (len(ops) == 4 and |
99 | 125 | isConstAdd(ops[0], -1) and isinstance(ops[2], Add) and |
100 | 126 | oppositeShifts(ops[1], ops[3])): |
101 | - return ZeroScaleAdd(ops[1].width, ops[2].imm) | |
102 | - return Loop(ops[:]) | |
127 | + return scaleAdd(ops[1].width, ops[2].imm) | |
128 | + return loop(ops[:]) | |
103 | 129 | |
104 | 130 | parseTable = { |
105 | 131 | ',': Input, '.': Output, |