ollvm源码分析之指令替换(1)
参考⽹站:
ollvm
ollvm总体框架与llvm⼀致,如下图所⽰。
其中IR(intermediate representation)是前端语⾔⽣成的中间代码表⽰,也是Pass操作的对象,它包括四部分:Module
Function
BasicBlock
Instruction
OLLVM有三个Pass以实现混淆,分别为Substitution、BogusControlFlow、flattening。它们位于Transforms/Obfuscation/⽬录。Pass之Substitution
实现功能是:指令替换
1 替换
1.1 Add
变换前变换后
a=b+c a=b-(-c)
a=b+c a= -(-b + (-c))
a=b+c r = rand (); a = b + r; a = a + c;a = a – r
a=b+c r = rand (); a = b - r; a = a + b; a = a + r
1.2 Sub
变换前变换后
a=b-c a=b+(-c)
a=b-c r = rand (); a = b + r; a = a - c;a = a – r
a=b-c r = rand (); a = b - r; a = a -c;a = a + r
1.3 And
变换前变换后
a=b&c a=(b^~c)&b
a=a&b!(!a | !b) & (r | !r)
1.4 Or
变换前变换后
a=b|c a=(b&c)|(b^c)
a|b[(!a & r) | (a & !r) ^ (!b & r) |(b & !r) ] | [!(!a | !b) & (r |!r)]
1.5 Xor
变换前变换后
a=a^b    a = (!a & b) | (a & !b)
a=a^b(a ^ r) ^ (b ^ r)或(!a & r | a & !r) ^ (!b & r | b & !r)
1.6 命令
命令解析
-mllvm -sub激活指令替换-mllvm -sub_loop=3若已被激活,进⾏3次替换,默认为1 2 代码分析
namespace{
struct Substitution :public FunctionPass {
static char ID;// Pass identification, replacement for typeid
void(Substitution::*funcAdd[NUMBER_ADD_SUBST])(BinaryOperator *bo);
void(Substitution::*funcSub[NUMBER_SUB_SUBST])(BinaryOperator *bo);
void(Substitution::*funcAnd[NUMBER_AND_SUBST])(BinaryOperator *bo);
void(Substitution::*funcOr[NUMBER_OR_SUBST])(BinaryOperator *bo);
void(Substitution::*funcXor[NUMBER_XOR_SUBST])(BinaryOperator *bo);
bool flag;
Substitution():FunctionPass(ID){}
Substitution(bool flag):FunctionPass(ID){
this->flag = flag;
funcAdd[0]=&Substitution::addNeg;
funcAdd[1]=&Substitution::addDoubleNeg;
funcAdd[2]=&Substitution::addRand;
funcAdd[3]=&Substitution::addRand2;
funcSub[0]=&Substitution::subNeg;
funcSub[1]=&Substitution::subRand;
funcSub[2]=&Substitution::subRand2;
funcAnd[0]=&Substitution::andSubstitution;
funcAnd[1]=&Substitution::andSubstitutionRand;
funcOr[0]=&Substitution::orSubstitution;
funcOr[1]=&Substitution::orSubstitutionRand;
funcXor[0]=&Substitution::xorSubstitution;
funcXor[1]=&Substitution::xorSubstitutionRand;
}
bool runOnFunction(Function &F);
bool substitute(Function *f);
void addNeg(BinaryOperator *bo);
void addDoubleNeg(BinaryOperator *bo);
void addRand(BinaryOperator *bo);
void addRand2(BinaryOperator *bo);
void subNeg(BinaryOperator *bo);
void subRand(BinaryOperator *bo);
void subRand2(BinaryOperator *bo);
void andSubstitution(BinaryOperator *bo);
void andSubstitutionRand(BinaryOperator *bo);
void orSubstitution(BinaryOperator *bo);
void orSubstitutionRand(BinaryOperator *bo);
void xorSubstitution(BinaryOperator *bo);
void xorSubstitutionRand(BinaryOperator *bo);
};
}
定义了5个指针数组,使⽤Substitution(bool flag)函数进⾏初始化。Add对应4个处理函数,Sub对应3个处理函数,and对应2个处理函数,or对应2个处理函数,xor对应2个处理函数。
2.1 ⼊⼝函数 runOnFunction
继承FunctionPass,所以⼊⼝函数是runOnFunction。
bool Substitution::runOnFunction(Function &F){
// Check if the percentage is correct
if(ObfTimes <=0){
errs()<<"Substitution application number -sub_loop=x must be x > 0";
return false;
}
Function *tmp =&F;
// Do we obfuscate
if(toObfuscate(flag, tmp,"sub")){
substitute(tmp);
return true;
}
return false;
}
⾸先验证验证了 -mllvm -sub_loop=x这个编译参数的正确性,其必须⼤于0。使⽤toObfuscate(flag, tmp, “sub”)函数判断是否进⾏混淆,若满⾜条件,则需要调⽤substitute(tmp)函数进⾏指令替换。
2.2 substitute函数
bool Substitution::substitute(Function *f){
Function *tmp = f;
// Loop for the number of time we run the pass on the function
int times = ObfTimes;
do{
for(Function::iterator bb = tmp->begin(); bb != tmp->end();++bb){
for(BasicBlock::iterator inst = bb->begin(); inst != bb->end();++inst){
if(inst->isBinaryOp()){
switch(inst->getOpcode()){
case BinaryOperator::Add:
// case BinaryOperator::FAdd:
// Substitute with random add operation
replaceall()
(this->*funcAdd[llvm::cryptoutils->get_range(NUMBER_ADD_SUBST)])(
cast<BinaryOperator>(inst));
++Add;
break;
case BinaryOperator::Sub:
// case BinaryOperator::FSub:
// Substitute with random sub operation
(this->*funcSub[llvm::cryptoutils->get_range(NUMBER_SUB_SUBST)])(
cast<BinaryOperator>(inst));
++Sub;
break;
case BinaryOperator::Mul:
case BinaryOperator::FMul:
//++Mul;
break;
case BinaryOperator::UDiv:
case BinaryOperator::SDiv:
case BinaryOperator::FDiv:
/
/++Div;
break;
case BinaryOperator::URem:
case BinaryOperator::SRem:
case BinaryOperator::FRem:
//++Rem;
break;
case Instruction::Shl:
//++Shi;
break;
case Instruction::LShr:
/
/++Shi;
break;
case Instruction::AShr:
//++Shi;
break;
case Instruction::And:
(this->*
funcAnd[llvm::cryptoutils->get_range(2)])(cast<BinaryOperator>(inst));
++And;
break;
case Instruction::Or:
(this->*
funcOr[llvm::cryptoutils->get_range(2)])(cast<BinaryOperator>(inst));
++Or;
break;
case Instruction::Xor:
(this->*
funcXor[llvm::cryptoutils->get_range(2)])(cast<BinaryOperator>(inst));
++Xor;
break;
default:
break;
}// End switch
}// End isBinaryOp
}// End for basickblock
}// End for Function
}while(--times >0);// for times
return false;
}
最外层的do…while循环是根据需要循环次数进⾏变换,内层两个for循环从外向内分别是遍历当前函数的所有代码块、遍历每个代码块的每条指令。内层if条件⽤于判断当前指令是否为⼆进制操作(isBinaryOp)。根据Opcode来判断是否需要执⾏替换操作,下分为Add, Sub, And, Or, Xor 5种操作。其中llvm::cryptoutils->get_range是⼀个随机函数,可以从之前定义的处理函数数组中随机选择⼀个处理函数。
下⾯我们选择Sub的处理函数subNeg(BinaryOperator *bo)为例,其他四个函数变换过程类似。
// Implementation of a = b + (-c)
void Substitution::subNeg(BinaryOperator *bo){
BinaryOperator *op =NULL;
if(bo->getOpcode()== Instruction::Sub){
op = BinaryOperator::CreateNeg(bo->getOperand(1),"", bo);
op =
BinaryOperator::Create(Instruction::Add, bo->getOperand(0), op,"", bo);
// Check signed wrap
//op->setHasNoSignedWrap(bo->hasNoSignedWrap());
//op->setHasNoUnsignedWrap(bo->hasNoUnsignedWrap());
}else{
op = BinaryOperator::CreateFNeg(bo->getOperand(1),"", bo);
op = BinaryOperator::Create(Instruction::FAdd, bo->getOperand(0), op,"",bo);
}
bo->replaceAllUsesWith(op);
}
该函数将a=b-c变成a=b+(-c);
BinaryOperator::CreateNeg(bo->getOperand(1), “”, bo)指将c变成-c;BinaryOperator::Create(Instruction::Add, bo-
>getOperand(0), op, “”, bo)指b+(-c);
bo->replaceAllUsesWith(op)指更新⽼的操作数。