package dev.w1zzrd.asm.analysis;
import dev.w1zzrd.asm.exception.StateAnalysisException;
import dev.w1zzrd.asm.signature.MethodSignature;
import dev.w1zzrd.asm.signature.TypeSignature;
import jdk.internal.org.objectweb.asm.Label;
import jdk.internal.org.objectweb.asm.Opcodes;
import jdk.internal.org.objectweb.asm.Type;
import jdk.internal.org.objectweb.asm.tree.*;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.Stack;
import java.util.function.Predicate;
/**
* Method frame state analysis class.
* Ideally, this should allow for instruction optimization when weaving methods.
* Additionally, this could theoretically enable code to be woven in-between original instructions
*/
public class FrameState {
/**
* Stack clobbering pushed values after an instruction is invoked.
* (See {@link jdk.internal.org.objectweb.asm.Frame#SIZE})
* Key:
* ? No change
* X Requires special attention
* L Object
* I int
* J long
* F float
* D double
*/
private static final String STACK_CLOBBER_PUSH =
"?LIIIIIIIJJFFFDDIIXXXIJFDLIIIIJJJJFFFFDDDDLLLLIJFDLIII???????????????????????????????????XXXXXX?IJFDIJFDIJFDIJFDIJFDIJFDIJIJIJIJIJIJXJFDIFDIJDIJFIIIIIIII?????????????????????????X?X?XXXXXLLLI?LI??XL????";
/**
* Stack clobbering popped values when an instruction is invoked.
* (See {@link jdk.internal.org.objectweb.asm.Frame#SIZE})
* Key:
* ? None
* X Requires special attention
* $ Cat1 computational type
* L Object
* I int
* J long
* F float
* D double
* S int/float
* W long/double
* C int, int
* V long, long
* B float, float
* N double, double
* M object, int
* 0 object, object
* 1 object, int, int
* 2 object, int, long
* 3 object, int, float
* 4 object, int, double
* 5 object, int, object
* K Cat1, Cat1
*
* Cat1 computational types are, according to the JVM8 spec, essentially all 32-bit types (any type that occupies 1 stack slot)
*/
private static final String STACK_CLOBBER_POP =
"??????????????????????????????????????????????MMMMMMMMIJFDLIIIIJJJJFFFFDDDDLLLL12345111$K$$$XXXKCVBNCVBNCVBNCVBNCVBNCVBNCVCVCVCVCVCV?IIIJJJFFFDDDIIIVBBNNIIIIIICCCCCC00?????IJFDL??XLXXXXXX?IILLLLLLXXLL??";
public static Stack getFrameStateAt(AbstractInsnNode targetNode, List locals) {
Stack stack = new Stack<>();
AbstractInsnNode first = targetNode, tmp;
// Computation is already O(n), no need to accept first instruction as an argument
while ((tmp = first.getPrevious()) != null)
first = tmp;
// Now we traverse backward to select ONE possible sequence of instructions that may be executed
// This lets us simulate the stack for this sequence. This only works because we don't consider conditionals
// Because we ignore conditionals and because we have assisting FrameNodes, we sidestep the halting problem
// Since we're traversing backward, the latest instruction to be read will be the earliest to be executed
Stack simulate = new Stack<>();
for (AbstractInsnNode check = targetNode; check != null; check = check.getPrevious()) {
// If the node we're checking is a label, find the earliest jump to it
// This assumes that no compiler optimizations have been made based on multiple values at compile time
// since we can't trivially predict branches, but I don't think the java compiler does either, so meh
if (check instanceof LabelNode) {
// Labels don't affect the stack, so we can safely ignore them
JumpInsnNode jump = findEarliestJump((LabelNode) check);
if (jump == null)
continue;
check = jump;
}
// No need to check line numbers in a simulation
if (check instanceof LineNumberNode)
continue;
// Add instruction to simulation list
simulate.add(check);
// No need to simulate the state before a full frame: this is kinda like a "checkpoint" in the frame state
if (check instanceof FrameNode && ((FrameNode) check).type == Opcodes.F_FULL)
break;
}
int stackSize = 0;
// We now have a proposed set of instructions that might run if the instructions were to be called
// Next, we analyse the stack and locals throughout the execution of these instructions
while (!simulate.isEmpty()) {
updateFrameState(simulate.pop(), stack, locals);
}
// The stack and locals are now in the state they would be in after the target instruction is hit
// QED or something...
/*
* NOTE: This code analysis strategy assumes that the program analyzed follows the behaviour of any regular JVM
* program. This will, for example, fail to predict the state of the following set of (paraphrased)
* instructions:
*
*
* METHOD START:
* 1: LOAD 1
* 2: JUMP TO LABEL "X" IF LOADED VALUE == 0 // if(1 == 0)
* 3: PUSH 2
* 4: PUSH 69
* 5: PUSH 420
* 6: LABEL "X"
* 7: POP <----- Analysis requested of this node
*
*
* This FrameState method will (falsely) predict that the stack is empty and that the POP will fail, since it
* will trace execution back to the jump at line (2), since it cannot determine that the jump will always fail
* and simply observes the jump as popping the value loaded at (1), thus meaning that the stack would be empty
* at line (7). Whereas in actuality, the jump will fail and the stack will therefore contain three values.
*
* This is because a normal Java program would not allow this arrangement of instructions, since it would entail
* a split in the state of the stack based on the outcome of the conditional jump. I don't know of any JVM
* language that *would* produce this behaviour (and I'm 90% sure the JVM would complain about the lack of
* FrameNodes in the above example), but if the FrameState *does* encounter code compiled by such a language,
* this code CANNOT predict the state of such a stack and incorrect assumptions about the state of the stack may
* occur.
*
* Also note that this kind of behaviour cannot be predicted due to the halting problem, so any program which
* exhibits the aforementioned behaviour is inherently unpredictable.
*/
return stack;
}
/**
* Get a list of all local variables currently in scope at a given instruction
* @param insn Instruction to get local variable scope for
* @param allLocals All local variables in method
* @return A subset of all local variables such that accessing any value in said subset would not be an error
*/
public static List localsAt(AbstractInsnNode insn, List allLocals) {
ArrayList collect = new ArrayList<>();
for (LocalVariableNode vNode : allLocals) {
// Find relative index of start of variable scope
Integer start = relativeIndexOf(insn, vNode.start);
// If start of scope could not be found, or it begins after the given instruction, it's not in scope
if (start == null || start > 0)
continue;
// Find relative index of end of variable scope
Integer end = relativeIndexOf(insn, vNode.end);
// If end of scope could not be found, or it ends before the given instruction, it's not in scope
if (end == null || end < 0)
continue;
// Scope starts at (or before) given instruction and ends at (or after) given instruction
collect.add(vNode);
}
return collect;
}
/**
* Attempts to find the earliest jump label referencing the given node in the instruction list
* @param node {@link LabelNode} to find jumps referencing
* @return A jump instruction node earlier in the instruction list or null if none could be found
*/
private static JumpInsnNode findEarliestJump(LabelNode node) {
JumpInsnNode jump = null;
// Traverse backward until we hit the beginning of the list
for (AbstractInsnNode prev = node; prev != null; prev = prev.getPrevious())
if (prev instanceof JumpInsnNode && ((JumpInsnNode) prev).label.equals(node))
jump = (JumpInsnNode) prev;
return jump;
}
/**
* Updates the state of a simulated stack frame based on the effects of a given instruction. Effectively simulates
* the instruction to a certain degree
* @param instruction Instruction to "simulate"
* @param stack Frame stack values
* @param locals Frame local variables
*/
private static void updateFrameState(
AbstractInsnNode instruction,
Stack stack,
List locals
) {
if (instruction instanceof FrameNode) {
// Stack values are always updated at a FrameNode
stack.clear();
// This is VERY different from getType() declared in AbstractInsnNode
switch (((FrameNode) instruction).type) {
case Opcodes.F_NEW:
case Opcodes.F_FULL:
case Opcodes.F_SAME: // This feels like undocumented behaviour
// Since this is a full frame, we start anew
//locals.clear();
// Ascertain stack types
appendTypes(((FrameNode) instruction).stack, stack, true);
// Ascertain local types
//appendTypes(((FrameNode) instruction).local, locals, false);
break;
case Opcodes.F_APPEND:
//appendTypes(((FrameNode) instruction).local, locals, false);
break;
case Opcodes.F_SAME1:
appendTypes(((FrameNode) instruction).stack, stack, true);
break;
case Opcodes.F_CHOP:
/*
List