package dev.w1zzrd.asm.analysis; import dev.w1zzrd.asm.exception.StateAnalysisException; import dev.w1zzrd.asm.signature.MethodSignature; import dev.w1zzrd.asm.signature.TypeSignature; import jdk.internal.org.objectweb.asm.Label; import jdk.internal.org.objectweb.asm.Opcodes; import jdk.internal.org.objectweb.asm.Type; import jdk.internal.org.objectweb.asm.tree.*; import java.lang.reflect.Field; import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.Stack; import java.util.function.Predicate; /** * Method frame state analysis class. * Ideally, this should allow for instruction optimization when weaving methods. * Additionally, this could theoretically enable code to be woven in-between original instructions */ public class FrameState { /** * Stack clobbering pushed values after an instruction is invoked. * (See {@link jdk.internal.org.objectweb.asm.Frame#SIZE})
* Key:
* ? No change
* X Requires special attention
* L Object
* I int
* J long
* F float
* D double
*/ private static final String STACK_CLOBBER_PUSH = "?LIIIIIIIJJFFFDDIIXXXIJFDLIIIIJJJJFFFFDDDDLLLLIJFDLIII???????????????????????????????????XXXXXX?IJFDIJFDIJFDIJFDIJFDIJFDIJIJIJIJIJIJXJFDIFDIJDIJFIIIIIIII?????????????????????????X?X?XXXXXLLLI?LI??XL????"; /** * Stack clobbering popped values when an instruction is invoked. * (See {@link jdk.internal.org.objectweb.asm.Frame#SIZE})
* Key:
* ? None
* X Requires special attention
* $ Cat1 computational type
* L Object
* I int
* J long
* F float
* D double
* S int/float
* W long/double
* C int, int
* V long, long
* B float, float
* N double, double
* M object, int
* 0 object, object
* 1 object, int, int
* 2 object, int, long
* 3 object, int, float
* 4 object, int, double
* 5 object, int, object
* K Cat1, Cat1
*
* Cat1 computational types are, according to the JVM8 spec, essentially all 32-bit types (any type that occupies 1 stack slot) */ private static final String STACK_CLOBBER_POP = "??????????????????????????????????????????????MMMMMMMMIJFDLIIIIJJJJFFFFDDDDLLLL12345111$K$$$XXXKCVBNCVBNCVBNCVBNCVBNCVBNCVCVCVCVCVCV?IIIJJJFFFDDDIIIVBBNNIIIIIICCCCCC00?????IJFDL??XLXXXXXX?IILLLLLLXXLL??"; public static Stack getFrameStateAt(AbstractInsnNode targetNode, List locals) { Stack stack = new Stack<>(); AbstractInsnNode first = targetNode, tmp; // Computation is already O(n), no need to accept first instruction as an argument while ((tmp = first.getPrevious()) != null) first = tmp; // Now we traverse backward to select ONE possible sequence of instructions that may be executed // This lets us simulate the stack for this sequence. This only works because we don't consider conditionals // Because we ignore conditionals and because we have assisting FrameNodes, we sidestep the halting problem // Since we're traversing backward, the latest instruction to be read will be the earliest to be executed Stack simulate = new Stack<>(); for (AbstractInsnNode check = targetNode; check != null; check = check.getPrevious()) { // If the node we're checking is a label, find the earliest jump to it // This assumes that no compiler optimizations have been made based on multiple values at compile time // since we can't trivially predict branches, but I don't think the java compiler does either, so meh if (check instanceof LabelNode) { // Labels don't affect the stack, so we can safely ignore them JumpInsnNode jump = findEarliestJump((LabelNode) check); if (jump == null) continue; check = jump; } // No need to check line numbers in a simulation if (check instanceof LineNumberNode) continue; // Add instruction to simulation list simulate.add(check); // No need to simulate the state before a full frame: this is kinda like a "checkpoint" in the frame state if (check instanceof FrameNode && ((FrameNode) check).type == Opcodes.F_FULL) break; } int stackSize = 0; // We now have a proposed set of instructions that might run if the instructions were to be called // Next, we analyse the stack and locals throughout the execution of these instructions while (!simulate.isEmpty()) { updateFrameState(simulate.pop(), stack, locals); } // The stack and locals are now in the state they would be in after the target instruction is hit // QED or something... /* * NOTE: This code analysis strategy assumes that the program analyzed follows the behaviour of any regular JVM * program. This will, for example, fail to predict the state of the following set of (paraphrased) * instructions: * * * METHOD START: * 1: LOAD 1 * 2: JUMP TO LABEL "X" IF LOADED VALUE == 0 // if(1 == 0) * 3: PUSH 2 * 4: PUSH 69 * 5: PUSH 420 * 6: LABEL "X" * 7: POP <----- Analysis requested of this node * * * This FrameState method will (falsely) predict that the stack is empty and that the POP will fail, since it * will trace execution back to the jump at line (2), since it cannot determine that the jump will always fail * and simply observes the jump as popping the value loaded at (1), thus meaning that the stack would be empty * at line (7). Whereas in actuality, the jump will fail and the stack will therefore contain three values. * * This is because a normal Java program would not allow this arrangement of instructions, since it would entail * a split in the state of the stack based on the outcome of the conditional jump. I don't know of any JVM * language that *would* produce this behaviour (and I'm 90% sure the JVM would complain about the lack of * FrameNodes in the above example), but if the FrameState *does* encounter code compiled by such a language, * this code CANNOT predict the state of such a stack and incorrect assumptions about the state of the stack may * occur. * * Also note that this kind of behaviour cannot be predicted due to the halting problem, so any program which * exhibits the aforementioned behaviour is inherently unpredictable. */ return stack; } /** * Get a list of all local variables currently in scope at a given instruction * @param insn Instruction to get local variable scope for * @param allLocals All local variables in method * @return A subset of all local variables such that accessing any value in said subset would not be an error */ public static List localsAt(AbstractInsnNode insn, List allLocals) { ArrayList collect = new ArrayList<>(); for (LocalVariableNode vNode : allLocals) { // Find relative index of start of variable scope Integer start = relativeIndexOf(insn, vNode.start); // If start of scope could not be found, or it begins after the given instruction, it's not in scope if (start == null || start > 0) continue; // Find relative index of end of variable scope Integer end = relativeIndexOf(insn, vNode.end); // If end of scope could not be found, or it ends before the given instruction, it's not in scope if (end == null || end < 0) continue; // Scope starts at (or before) given instruction and ends at (or after) given instruction collect.add(vNode); } return collect; } /** * Attempts to find the earliest jump label referencing the given node in the instruction list * @param node {@link LabelNode} to find jumps referencing * @return A jump instruction node earlier in the instruction list or null if none could be found */ private static JumpInsnNode findEarliestJump(LabelNode node) { JumpInsnNode jump = null; // Traverse backward until we hit the beginning of the list for (AbstractInsnNode prev = node; prev != null; prev = prev.getPrevious()) if (prev instanceof JumpInsnNode && ((JumpInsnNode) prev).label.equals(node)) jump = (JumpInsnNode) prev; return jump; } /** * Updates the state of a simulated stack frame based on the effects of a given instruction. Effectively simulates * the instruction to a certain degree * @param instruction Instruction to "simulate" * @param stack Frame stack values * @param locals Frame local variables */ private static void updateFrameState( AbstractInsnNode instruction, Stack stack, List locals ) { if (instruction instanceof FrameNode) { // Stack values are always updated at a FrameNode stack.clear(); // This is VERY different from getType() declared in AbstractInsnNode switch (((FrameNode) instruction).type) { case Opcodes.F_NEW: case Opcodes.F_FULL: case Opcodes.F_SAME: // This feels like undocumented behaviour // Since this is a full frame, we start anew //locals.clear(); // Ascertain stack types appendTypes(((FrameNode) instruction).stack, stack, true); // Ascertain local types //appendTypes(((FrameNode) instruction).local, locals, false); break; case Opcodes.F_APPEND: //appendTypes(((FrameNode) instruction).local, locals, false); break; case Opcodes.F_SAME1: appendTypes(((FrameNode) instruction).stack, stack, true); break; case Opcodes.F_CHOP: /* List local = ((FrameNode) instruction).local; if (local != null) while (local.size() > locals.size()) locals.remove(locals.size() - 1); */ break; } } else clobberStack(instruction, stack, locals); } /** * Parse and append raw frame type declarations to the end of the given collection * @param types Raw frame types to parse * @param appendTo Collection to append types to * @param skipNulls Whether or not to short-circuit parsing when a null-valued type is found */ private static void appendTypes(List types, List appendTo, boolean skipNulls) { if (types == null) return; for (Object o : types) { if (o == null && skipNulls) break; else if (o == null) appendTo.add(null); else { TypeSignature sig = parseFrameSignature(o); appendTo.add(sig); if (sig.stackFrameElementWith() == 2) appendTo.add(new TypeSignature(sig.getSig().charAt(0), true)); } } } /** * Determine the {@link TypeSignature} of stack/local type declaration * @param o Type to parse * @return {@link TypeSignature} representing the given type declaration */ private static TypeSignature parseFrameSignature(Object o) { if (o instanceof String) // Fully qualified type return new TypeSignature("L"+o+";"); else if (o instanceof Integer) { // Primitive switch ((int)o) { case 0: // Top return new TypeSignature('V', true); case 1: // Int return new TypeSignature("I"); case 2: // Float return new TypeSignature("F"); case 3: // Double return new TypeSignature("D"); case 4: // Long return new TypeSignature("J"); case 5: // Null return new TypeSignature(); } } else if (o instanceof Label) { return new TypeSignature("Ljava/lang/Object;", 0, true); } throw new StateAnalysisException(String.format("Could not determine type signature for object %s", o)); } /** * Simulate stack-clobbering effects of invoking a given instruction with a given frame state * @param insn Instruction to simulate * @param stack Frame stack values * @param locals Frame local variables */ private static void clobberStack( AbstractInsnNode insn, List stack, List locals ) { // Look, before you go ahead and roast my code, just know that I have a "code first, think later" mentality, // so this entire method was essentially throw together and structured this way before I realised what I was // doing. If things look like they're implemented in a dumb way, it's probably because it is. There was // virtually no thought behind the implementation of this method. Now... let the roasting commence final int opcode = insn.getOpcode(); if (opcode >= 0 && opcode < STACK_CLOBBER_POP.length()) { // We have an instruction char pushType = STACK_CLOBBER_PUSH.charAt(opcode); char popType = STACK_CLOBBER_POP.charAt(opcode); // Yes, the switches in the conditional statements can be collapsed, but this keeps it clean (for now) // TODO: Collapse switch statements if (pushType == 'X' && popType == 'X') { // Complex argument and result // This behaviour is exhibited by 11 instructions in the JVM 8 spec int argCount = 0; MethodSignature msig = null; switch (opcode) { case Opcodes.DUP2: case Opcodes.DUP2_X1: case Opcodes.DUP2_X2: // Actually just operates on Cat2 values, but whatever stack.add(stack.size() - (opcode - 90), stack.get(stack.size() - 2)); stack.add(stack.size() - (opcode - 90), stack.get(stack.size() - 2)); break; case Opcodes.INVOKEDYNAMIC: msig = new MethodSignature(((InvokeDynamicInsnNode) insn).desc); argCount = -1; case Opcodes.INVOKEVIRTUAL: case Opcodes.INVOKESPECIAL: case Opcodes.INVOKEINTERFACE: ++argCount; case Opcodes.INVOKESTATIC: if (msig == null) msig = new MethodSignature(((MethodInsnNode)insn).desc); argCount += msig.getArgCount(); for (int i = 0; i < argCount; ++i) { // Longs and doubles pop 2 values from the stack if (i < msig.getArgCount() && msig.getArg(i).stackFrameElementWith() == 2) stack.remove(stack.size() - 1); // All args pop at least 1 value stack.remove(stack.size() - 1); } // For non-void methods, push return to stack if (!msig.getRet().isVoidType()) stack.add(msig.getRet()); break; case 196: // WIDE // WIDE instruction not expected in normal Java programs // TODO: Implement? //throw new NotImplementedException(); break; // Ignore instruction, since it wraps the immediately following instruction } } else if (pushType == 'X') { // Complex result // Technically IINC is classified here, but it can be ignored because this isn't a verification tool; // this just checks clobbering, which IINC does not do switch (opcode) { case Opcodes.DUP: case Opcodes.DUP_X1: case Opcodes.DUP_X2: stack.add(stack.size() - (opcode - 88), stack.get(stack.size() - 1)); break; case Opcodes.LDC: case 19: // LDC_W case 20: // LDC2_W { // I'm not 100% sure this actually works for LDC_W and LDC2_W LdcInsnNode ldc = (LdcInsnNode) insn; if (ldc.cst instanceof Type) { // Type objects in in context will always refer to method references, class literals or // array literals int sort = ((Type) ldc.cst).getSort(); switch (sort) { case Type.OBJECT: stack.add(new TypeSignature(((Type) ldc.cst).getDescriptor())); break; case Type.METHOD: stack.add(new TypeSignature(new MethodSignature(((Type) ldc.cst).getDescriptor()))); break; } } else if (ldc.cst instanceof String){ // Loading a string constant, I think stack.add(new TypeSignature("Ljava/lang/String;")); } else { // Some primitive boxed value // All the boxed primitives have a public static final field TYPE declaring their unboxed // type, so we just get the internal name of that field reflectively because I'm lazy // TODO: Un-reflect-ify this because it can literally be solved with if-elses instead try { Class cType = ((Class)ldc.cst.getClass().getField("TYPE").get(null)); char cLetter = long.class.equals(cType) ? 'j' : boolean.class.equals(cType) ? 'z' : cType.getName().charAt(0); stack.add(new TypeSignature(cLetter, false)); // For W pushes, add top value if (long.class.equals(cType) || double.class.equals(cType)) stack.add(new TypeSignature(cLetter, true)); } catch (NoSuchFieldException | IllegalAccessException e) { throw new RuntimeException(e); } } break; } case Opcodes.GETFIELD: stack.remove(stack.size() - 1); case Opcodes.GETSTATIC: stack.add(new TypeSignature(((FieldInsnNode) insn).desc)); break; } } else if (popType == 'X') { // Complex argument encompasses 3 instructions switch (opcode) { case Opcodes.PUTFIELD: case Opcodes.PUTSTATIC: { FieldInsnNode put = (FieldInsnNode) insn; // Get type signature TypeSignature sig = new TypeSignature(put.desc); // If type is Long or Double, we need to pop 2 elements if (sig.stackFrameElementWith() == 2) stack.remove(stack.size() - 1); // Pop element from stack stack.remove(stack.size() - 1); // If this was a non-static instruction, pop object reference too if (opcode == Opcodes.PUTFIELD) stack.remove(stack.size() - 1); break; } case Opcodes.MULTIANEWARRAY: { MultiANewArrayInsnNode marray = (MultiANewArrayInsnNode) insn; // Pop a value for each dimension for (int i = 0; i < marray.dims; ++i) stack.remove(stack.size() - 1); stack.add(new TypeSignature(marray.desc)); break; } } } else { // Trivial-ish argument and result trivialPop(insn, popType, stack, locals); trivialPush(insn, pushType, stack, locals); } } } /** * Simulate a "trivial" instruction which pops values from the operand stack * @param insn Instruction to simulate pushing for * @param type Classification of push type * @param stack Simulated operand stand types * @param locals Simulated frame local types */ private static void trivialPop(AbstractInsnNode insn, char type, List stack, List locals) { // TODO: Fix type naming scheme; this is actually going to make me cry // Yes, the fall-throughs are very intentional switch (type) { // Pops 4 values case 'V': case 'N': case '2': case '4': stack.remove(stack.size() - 1); // Pops 3 values case '1': case '3': case '5': stack.remove(stack.size() - 1); // Pops 2 values case 'D': case 'J': case 'W': case 'C': case 'B': case 'M': case '0': case 'K': stack.remove(stack.size() - 1); // Pops 1 value case 'I': case 'F': case 'L': case 'S': case '$': stack.remove(stack.size() - 1); break; } } /** * Simulate a "trivial" instruction which pushes values to the operand stack * @param insn Instruction to simulate pushing for * @param type Classification of push type * @param stack Simulated operand stand types * @param locals Simulated frame local types */ private static void trivialPush(AbstractInsnNode insn, char type, List stack, List locals) { // Pushing is a bit more tricky than popping because we have to resolve types (kind of) switch (type) { case 'I': case 'F': // Push single-entry primitive stack.add(new TypeSignature(Character.toString(type))); break; case 'D': case 'J': // Push two-entry primitive (value + top) stack.add(new TypeSignature(Character.toString(type))); stack.add(new TypeSignature(type, true)); break; case 'L': // Push an object type to the stack switch (insn.getOpcode()) { case Opcodes.ACONST_NULL: // Null type, I guess stack.add(new TypeSignature()); break; case Opcodes.ALOAD: case 42: // ALOAD_0 case 43: // ALOAD_1 case 44: // ALOAD_2 case 45: // ALOAD_3 // Push a local variable to the stack Optional targetVar = localsAt(insn, locals) .stream() .filter(it -> it.index == ((VarInsnNode) insn).var) .findFirst(); if (!targetVar.isPresent()) throw new StateAnalysisException(String.format( "Attempt to access a local variable out of scope: Opcode = %s", insn.getOpcode() )); stack.add(new TypeSignature(targetVar.get().desc)); break; case Opcodes.AALOAD: // Read an array element to the stack stack.remove(stack.size() - 1); // Pop array index // Pop array and push value // This assumes that the popped value is an array (as it should be) stack.add(stack.remove(stack.size() - 1).getArrayElementType()); break; case Opcodes.NEW: // Allocate a new object (should really be marked as uninitialized, but meh) // We'll burn that bridge when we get to it or something... stack.add(new TypeSignature(String.format("L%s;", ((TypeInsnNode) insn).desc))); break; case Opcodes.NEWARRAY: // Allocate a new, 1-dimensional, primitive array stack.remove(stack.size() - 1); stack.add(new TypeSignature( Character.toString("ZCFDBSIJ".charAt(((IntInsnNode) insn).operand - 4)), 1, false )); break; case Opcodes.ANEWARRAY: // Allocate a new, 1-dimensional, object array stack.remove(stack.size() - 1); stack.add(new TypeSignature(((TypeInsnNode) insn).desc, 1, false)); break; case Opcodes.CHECKCAST: // Cast an object to another type stack.remove(stack.size() - 1); stack.add(new TypeSignature(((TypeInsnNode) insn).desc)); break; } } } /** * Purely for debugging purposes. This method generates a collection of instruction names that match the given * functional stack-clobbering properties.
*
* For example:
* The WIDE instruction is classified as both a complex-push and complex-pop because determining how it clobbers * the stack requires determining which instruction it is wrapping and thereby what types are expected. * Depending on the bytecode, the wide instruction can pop between 0 (like WIDE ILOAD) and 2 (like WIDE LSTORE) * operands and may push between 0 (like WIDE ISTORE) and 2 (like WIDE DLOAD) operands or not touch the operand * stack at all (like WIDE IINC). * * @param complexPush Whether or not the instructions should have non-trivial results generated by execution * @param complexPop Whether or not the instructions should have non-trivial argument requirements for execution * @param insnP An instruction-code specific predicate for fine-tuned filtering * @return A collection of instruction names matching the given functional properties. For instructions named * "Opcode<...>", please refer to the comments in {@link Opcodes} as well as the official JVM specification * @see JVM8 instructions spec */ private static List getOpsByComplexity( boolean complexPush, boolean complexPop, Predicate insnP ) { ArrayList opcodes = new ArrayList<>(); for (int i = 0; i < FrameState.STACK_CLOBBER_PUSH.length(); ++i) if ((FrameState.STACK_CLOBBER_PUSH.charAt(i) == 'X' == complexPush) && (FrameState.STACK_CLOBBER_POP.charAt(i) == 'X' == complexPop)) opcodes.add(i); return opcodes.stream().filter(insnP == null ? it -> true : insnP).map(instrID -> { try { return java.util.Arrays .stream(Opcodes.class.getFields()) .filter(field -> { try { return java.lang.reflect.Modifier.isStatic(field.getModifiers()) && !field.getName().startsWith("ACC_") && !field.getName().startsWith("T_") && !field.getName().startsWith("H_") && !field.getName().startsWith("F_") && !field.getName().startsWith("V1_") && field.getType().equals(int.class) && field.get(null).equals(instrID); } catch (Throwable t) { throw new RuntimeException(t); } }) .map(Field::getName) .findFirst() .orElse(String.format("Opcode<%d>", instrID)); } catch(Throwable t) { throw new RuntimeException(t); } }).collect(java.util.stream.Collectors.toList()); } /** * Find the index of a given instruction relative to a starting point * @param current Starting point for search * @param find Instruction to find * @return Negative values for instructions previous to the current instruction, positive values for instructions * after the current instruction. Null if instruction could not be found */ private static Integer relativeIndexOf( AbstractInsnNode current, AbstractInsnNode find ) { // Check backward int idx = 0; AbstractInsnNode check = current; while (check != null) { if (check == find) return idx; --idx; check = check.getPrevious(); } // Check forward idx = 1; check = current.getNext(); while (check != null) { if (check == find) return idx; ++idx; check = check.getNext(); } // No match return null; } }