GP-3307 Stack strings

This commit is contained in:
caheckman 2024-04-16 22:24:34 +00:00
parent 9b6ba66aa0
commit 5604178194
37 changed files with 1653 additions and 419 deletions

View File

@ -102,6 +102,7 @@ model {
include "blockaction.cc"
include "merge.cc"
include "double.cc"
include "constseq.cc"
include "coreaction.cc"
include "condexe.cc"
include "override.cc"

View File

@ -63,6 +63,7 @@ src/decompile/datatests/retstruct.xml||GHIDRA||||END|
src/decompile/datatests/sbyte.xml||GHIDRA||||END|
src/decompile/datatests/skipnext2.xml||GHIDRA||||END|
src/decompile/datatests/stackreturn.xml||GHIDRA||||END|
src/decompile/datatests/stackstring.xml||GHIDRA||||END|
src/decompile/datatests/statuscmp.xml||GHIDRA||||END|
src/decompile/datatests/switchhide.xml||GHIDRA||||END|
src/decompile/datatests/switchind.xml||GHIDRA||||END|

View File

@ -83,7 +83,7 @@ DECCORE=capability architecture options graph cover block cast typeop database c
type variable varmap jumptable emulate emulateutil flow userop multiprecision \
funcdata funcdata_block funcdata_op funcdata_varnode unionresolve pcodeinject \
heritage prefersplit rangeutil ruleaction subflow blockaction merge double \
transform coreaction condexe override dynamic crc32 prettyprint \
transform constseq coreaction condexe override dynamic crc32 prettyprint \
printlanguage printc printjava memstate opbehavior paramid signature $(COREEXT_NAMES)
# Files used for any project that use the sleigh decoder
SLEIGH= sleigh pcodeparse pcodecompile sleighbase slghsymbol \

View File

@ -1321,7 +1321,6 @@ void Architecture::parseCompilerConfig(DocumentStorage &store)
if (miter == protoModels.end()) { // If __thiscall doesn't exist we clone it off of the default
createModelAlias("__thiscall",defaultfp->getName());
}
userops.setDefaults(this);
initializeSegments();
PreferSplitManager::initialize(splitrecords);
types->setupSizes(); // If no data_organization was registered, set up default values

View File

@ -0,0 +1,491 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "constseq.hh"
#include "funcdata.hh"
namespace ghidra {
const int4 StringSequence::MINIMUM_SEQUENCE_LENGTH = 4;
/// \brief Set-up for recovering COPY ops into a memory range, given a Symbol and an Address being COPYed into
///
/// The SymbolEntry and Address are passed in, with an expected data-type. Check if there is an array
/// of the data-type within the Symbol, and if so, initialize the memory range for the
/// the sequence. Follow on with gathering PcodeOps and testing if the sequence is viable. If not, the
/// the size the memory range will be set to zero.
/// \param fdata is the function containing the root COPY
/// \param ct is the specific data-type for which there should be an array
/// \param ent is the given Symbol
/// \param root is the COPY holding the constant
/// \param addr is the Address being COPYed into
StringSequence::StringSequence(Funcdata &fdata,Datatype *ct,SymbolEntry *ent,PcodeOp *root,const Address &addr)
: data(fdata)
{
rootOp = root;
rootAddr = addr;
charType = ct;
entry = ent;
size = 0;
if (entry->getAddr().getSpace() != addr.getSpace())
return;
int8 off = rootAddr.getOffset() - entry->getFirst();
if (off >= entry->getSize())
return;
if (rootOp->getIn(0)->getOffset() == 0)
return;
Datatype *parentType = entry->getSymbol()->getType();
Datatype *lastType = (Datatype *)0;
int8 lastOff = 0;
do {
if (parentType == ct)
break;
lastType = parentType;
lastOff = off;
parentType = parentType->getSubType(off, &off);
} while(parentType != (Datatype *)0);
if (parentType != ct || lastType == (Datatype *)0 || lastType->getMetatype() != TYPE_ARRAY)
return;
startAddr = rootAddr - lastOff;
size = ((TypeArray *)lastType)->numElements() * charType->getAlignSize();
block = rootOp->getParent();
if (collectCopyOps()) {
if (checkCopyInterference()) {
if (formByteArray()) {
return;
}
}
}
clear();
}
void StringSequence::clear(void)
{
size = 0;
moveOps.clear();
}
/// The COPYs must be in the same basic block.
/// If any COPY size does not match the \b copyType, return \b false.
/// If there is a COPY to the array entry before rootVn, return \b false.
/// Otherwise earlier COPYs are skipped. No COPYs are collected after the first gap (entry with no COPY to it).
/// \return \b true to indicate legal COPY ops of constants were recovered.
bool StringSequence::collectCopyOps(void)
{
Address endAddr = startAddr + (size - 1); // startAddr - endAddr bounds the formal array
Address beginAddr = startAddr; // Start search for COPYs at the start of the array
if (startAddr != rootAddr) {
beginAddr = rootAddr - charType->getAlignSize(); // or the first address before the root address (whichever is later)
}
VarnodeLocSet::const_iterator iter = data.beginLoc(beginAddr);
VarnodeLocSet::const_iterator enditer = data.endLoc(endAddr);
int4 diff = rootAddr.getOffset() - startAddr.getOffset();
while(iter != enditer) {
Varnode *vn = *iter;
++iter;
if (!vn->isWritten()) continue;
PcodeOp *op = vn->getDef();
if (op->code() != CPUI_COPY) continue;
if (op->getParent() != block) continue;
if (!op->getIn(0)->isConstant()) continue;
if (vn->getSize() != charType->getSize())
return false; // COPY is the wrong size (has yet to be split)
int4 tmpDiff = vn->getOffset() - startAddr.getOffset();
if (tmpDiff < diff) {
if (tmpDiff + charType->getAlignSize() == diff)
return false; // COPY to previous element, rootVn is not the first in sequence
continue;
}
else if (tmpDiff > diff) {
if (tmpDiff - diff < charType->getAlignSize())
continue;
if (tmpDiff - diff > charType->getAlignSize())
break; // Gap in COPYs
diff = tmpDiff; // Advanced by one character
}
moveOps.emplace_back(vn->getOffset(),op,-1);
}
return (moveOps.size() >= MINIMUM_SEQUENCE_LENGTH);
}
/// The output Varnodes themselves should be verified to only be read outside of the basic block.
/// So effectively only LOADs, STOREs, and CALLs can really interfere. Check for these between the given ops.
/// \param startOp is the is the starting COPY
/// \param endOp is the ending COPY
/// \return \b true if there is no interference, \b false if there is possible interference
bool StringSequence::checkBetweenCopy(PcodeOp *startOp,PcodeOp *endOp)
{
startOp = startOp->nextOp();
while(startOp != endOp) {
if (startOp->getEvalType() == PcodeOp::special) {
OpCode opc = startOp->code();
if (opc != CPUI_INDIRECT && opc != CPUI_CALLOTHER &&
opc != CPUI_SEGMENTOP && opc != CPUI_CPOOLREF && opc != CPUI_NEW)
return false;
}
startOp = startOp->nextOp();
}
return true;
}
/// Sort the COPY ops based on block order. Starting with the root COPY, walk backward until an interfering
/// gap is found or until the earliest COPY is reached. Similarly, walk forward until an interfering gap is found.
/// Truncate the COPY op array to be this smaller set. If too many were truncated, return \b false.
/// \return \b true if a maximal set of COPYs is found containing at the least the minimum number required
bool StringSequence::checkCopyInterference(void)
{
sort(moveOps.begin(),moveOps.end()); // Sort COPYs based on basic block order
int4 pos;
for(pos=0;pos<moveOps.size();++pos) {
if (moveOps[pos].op == rootOp) break;
}
if (pos == moveOps.size()) return false;
PcodeOp *curOp = moveOps[pos].op;
int4 startingPos,endingPos;
for(startingPos=pos-1;startingPos>=0;--startingPos) {
PcodeOp *prevOp = moveOps[startingPos].op;
if (!checkBetweenCopy(prevOp,curOp))
break;
curOp = prevOp;
}
startingPos += 1;
curOp = moveOps[pos].op;
for(endingPos=pos+1;endingPos < moveOps.size();++endingPos) {
PcodeOp *nextOp = moveOps[endingPos].op;
if (!checkBetweenCopy(curOp,nextOp))
break;
curOp = nextOp;
}
if (endingPos- startingPos < MINIMUM_SEQUENCE_LENGTH)
return false;
if (startingPos > 0) {
for(int4 i=startingPos;i<endingPos;++i) {
moveOps[i-startingPos] = moveOps[i];
}
}
moveOps.resize(endingPos-startingPos,WriteNode(0,(PcodeOp *)0,-1));
return true;
}
/// \brief Construct a Varnode, with data-type, that acts as a pointer (in)to the Symbol to the root Address
///
/// First, a PTRSUB is built from the base register to the Symbol. Then depending on its data-type, additional
/// PTRSUBs and PTRADDs are buit to get from the start of the Symbol to the memory region holding the character data.
/// All the new Varnodes have the appropriate pointer data-type set. The final Varnode holding the pointer to
/// the memory region is returned.
/// \param insertPoint is the point before which all new PTRSUBs and PTRADDs are inserted
Varnode *StringSequence::constructTypedPointer(PcodeOp *insertPoint)
{
Varnode *spacePtr;
AddrSpace *spc = rootAddr.getSpace();
TypeFactory *types = data.getArch()->types;
if (spc->getType() == IPTR_SPACEBASE)
spacePtr = data.constructSpacebaseInput(spc);
else
spacePtr = data.constructConstSpacebase(spc);
Datatype *baseType = entry->getSymbol()->getType();
PcodeOp *ptrsub = data.newOp(2, insertPoint->getAddr());
data.opSetOpcode(ptrsub, CPUI_PTRSUB);
data.opSetInput(ptrsub,spacePtr,0);
uintb baseOff = AddrSpace::byteToAddress(entry->getFirst(),spc->getWordSize()); // Convert to address units
data.opSetInput(ptrsub,data.newConstant(spacePtr->getSize(), baseOff),1);
spacePtr = data.newUniqueOut(spacePtr->getSize(), ptrsub);
data.opInsertBefore(ptrsub, insertPoint);
TypePointer *curType = types->getTypePointerStripArray(spacePtr->getSize(), baseType, spc->getWordSize());
spacePtr->updateType(curType, false, false);
int8 curOff = rootAddr.getOffset() - entry->getFirst();
while(baseType != charType) {
int4 elSize = -1;
if (baseType->getMetatype() == TYPE_ARRAY)
elSize = ((TypeArray *)baseType)->getBase()->getAlignSize();
int8 newOff;
baseType = baseType->getSubType(curOff, &newOff );
if (baseType == (Datatype *)0) break;
curOff -= newOff;
baseOff = AddrSpace::byteToAddress(curOff, spc->getWordSize());
if (elSize >= 0) {
if (curOff == 0) { // Don't create a PTRADD( #0, ...)
// spacePtr already has data-type with ARRAY stripped
// baseType is already updated
continue;
}
ptrsub = data.newOp(3, insertPoint->getAddr());
data.opSetOpcode(ptrsub, CPUI_PTRADD);
int8 numEl = curOff / elSize;
data.opSetInput(ptrsub,data.newConstant(4, numEl),1);
data.opSetInput(ptrsub,data.newConstant(4,elSize),2);
}
else {
ptrsub = data.newOp(2, insertPoint->getAddr());
data.opSetOpcode(ptrsub, CPUI_PTRSUB);
data.opSetInput(ptrsub,data.newConstant(spacePtr->getSize(), baseOff), 1);
}
data.opSetInput(ptrsub,spacePtr,0);
spacePtr = data.newUniqueOut(spacePtr->getSize(), ptrsub);
data.opInsertBefore(ptrsub, insertPoint);
curType = types->getTypePointerStripArray(spacePtr->getSize(), baseType, spc->getWordSize());
spacePtr->updateType(curType, false, false);
curOff = newOff;
}
if (curOff != 0) {
PcodeOp *addOp = data.newOp(2, insertPoint->getAddr());
data.opSetOpcode(addOp, CPUI_INT_ADD);
data.opSetInput(addOp, spacePtr, 0);
baseOff = AddrSpace::byteToAddress(curOff, spc->getWordSize());
data.opSetInput(addOp, data.newConstant(spacePtr->getSize(), baseOff), 1);
spacePtr = data.newUniqueOut(spacePtr->getSize(), addOp);
data.opInsertBefore(addOp, insertPoint);
curType = types->getTypePointer(spacePtr->getSize(), charType, spc->getWordSize());
spacePtr->updateType(curType, false, false);
}
return spacePtr;
}
/// Create an array of bytes from the root Varnode to the extent of the memory region.
/// Run through the COPYs and place their constant input into the array.
/// If there are gaps in the byte array, remove any COPY that doesn't write to the contiguous
/// region in front of the root Varnode. Return \b false if the contiguous region is too small.
/// \return \b true if there exists enough COPYs that write into the region in front of the root Varnode without gaps
bool StringSequence::formByteArray(void)
{
int4 diff = rootAddr.getOffset() - startAddr.getOffset();
byteArray.resize(size-diff,0);
vector<uint1> used(size-diff,0);
int4 elSize = charType->getSize();
bool isBigEndian = rootAddr.isBigEndian();
for(int4 i=0;i<moveOps.size();++i) {
int4 bytePos = moveOps[i].offset - rootAddr.getOffset();
if (used[bytePos] != 0)
return false; // Multiple COPYs to same place
uint8 val = moveOps[i].op->getIn(0)->getOffset();
used[bytePos] = (val == 0) ? 2 : 1; // Mark byte as used, a 2 indicates a null terminator
if (isBigEndian) {
for(int4 j=0;j<elSize;++j) {
uint1 b = (val >> (elSize-1-j)*8) & 0xff;
byteArray[bytePos+j] = b;
}
}
else {
for(int4 j=0;j<elSize;++j) {
byteArray[bytePos+j] = (uint1)val;
val >>= 8;
}
}
}
int4 bigElSize = charType->getAlignSize();
int4 count;
for(count=0;count<moveOps.size();++count) {
uint1 val = used[ count * bigElSize ];
if (val != 1) { // Count number of characters not including null terminator
if (val == 2)
count += 1; // Allow a single null terminator
break;
}
}
if (count < MINIMUM_SEQUENCE_LENGTH)
return false;
if (count != moveOps.size()) {
uintb maxOff = rootAddr.getOffset() + count * bigElSize;
vector<WriteNode> finalOps;
for(int4 i=0;i<moveOps.size();++i) {
if (moveOps[i].offset < maxOff)
finalOps.push_back(moveOps[i]);
}
moveOps.swap(finalOps);
}
return true;
}
/// Use the \b charType to select the appropriate string copying function. If a match to the \b charType
/// doesn't exist, use a built-in \b memcpy function. The id of the selected built-in function is returned.
/// The value indicating either the number of characters or number of bytes being copied is also passed back.
/// \param index will hold the number of elements being copied
uint4 StringSequence::selectStringCopyFunction(int4 &index)
{
TypeFactory *types = data.getArch()->types;
if (charType == types->getTypeChar(types->getSizeOfChar())) {
index = moveOps.size();
return UserPcodeOp::BUILTIN_STRNCPY;
}
else if (charType == types->getTypeChar(types->getSizeOfWChar())) {
index = moveOps.size();
return UserPcodeOp::BUILTIN_WCSNCPY;
}
index = moveOps.size() * charType->getSize();
return UserPcodeOp::BUILTIN_MEMCPY;
}
/// A built-in user-op that copies string data is created. Its first (destination) parameter is constructed
/// as a pointer to the array holding the character data, which may be nested in other arrays or structures.
/// The second (source) parameter is an \e internal \e string constructed from the \b byteArray. The
/// third parameter is the constant indicating the length of the string. The \e user-op is inserted just before
/// the last PcodeOp moving a character into the memory region.
/// \return the constructed PcodeOp representing the \b memcpy
PcodeOp *StringSequence::buildStringCopy(void)
{
PcodeOp *insertPoint = moveOps[0].op; // Earliest COPY in the block
int4 numBytes = moveOps.size() * charType->getSize();
Architecture *glb = data.getArch();
TypeFactory *types = glb->types;
Datatype *charPtrType = types->getTypePointer(types->getSizeOfPointer(),charType,rootAddr.getSpace()->getWordSize());
Varnode *srcPtr = data.getInternalString(byteArray.data(), numBytes, charPtrType, insertPoint);
if (srcPtr == (Varnode *)0)
return (PcodeOp *)0;
int4 index;
uint4 builtInId = selectStringCopyFunction(index);
glb->userops.registerBuiltin(builtInId);
PcodeOp *copyOp = data.newOp(4,insertPoint->getAddr());
data.opSetOpcode(copyOp, CPUI_CALLOTHER);
data.opSetInput(copyOp, data.newConstant(4, builtInId), 0);
Varnode *destPtr = constructTypedPointer(insertPoint);
data.opSetInput(copyOp, destPtr, 1);
data.opSetInput(copyOp, srcPtr, 2);
Varnode *lenVn = data.newConstant(4,index);
lenVn->updateType(copyOp->inputTypeLocal(3), false, false);
data.opSetInput(copyOp, lenVn, 3);
data.opInsertBefore(copyOp, insertPoint);
return copyOp;
}
/// \brief Analyze output descendants of the given PcodeOp being removed
///
/// Record any \b points where the output is being read, for later replacement.
/// Keep track of CPUI_PIECE ops whose input is from a PcodeOp being removed, and if both inputs are
/// visited, remove the input \e points and add the CPUI_PIECE to the list of PcodeOps being removed.
/// \param curNode is the given PcodeOp being removed
/// \param xref are the set of CPUI_PIECE ops with one input visited
/// \param points is the set of input points whose PcodeOp is being removed
/// \param deadOps is the current collection of PcodeOps being removed
void StringSequence::removeForward(const WriteNode &curNode,map<PcodeOp *,list<WriteNode>::iterator> &xref,
list<WriteNode> &points,vector<WriteNode> &deadOps)
{
Varnode *vn = curNode.op->getOut();
list<PcodeOp *>::const_iterator iter;
for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) {
PcodeOp *op = *iter;
map<PcodeOp *,list<WriteNode>::iterator>::iterator miter = xref.find(op);
if (miter != xref.end()) {
// We have seen the PIECE twice
uintb off = (*(*miter).second).offset;
if (curNode.offset < off)
off = curNode.offset;
points.erase((*miter).second);
deadOps.emplace_back(off,op,-1);
}
else {
int4 slot = op->getSlot(vn);
points.emplace_back(curNode.offset,op,slot);
if (op->code() == CPUI_PIECE) {
list<WriteNode>::iterator xrefIter = points.end();
--xrefIter;
xref[op] = xrefIter;
}
}
}
}
/// The COPY ops are removed. Any descendants of the COPY output are redefined with an INDIRECT around
/// the a CALLOTHER op. If the COPYs feed into a PIECE op (as part of a CONCAT stack), the PIECE is removed
/// as well, which may cascade into removal of other PIECE ops in the stack.
/// \param replaceOp is the CALLOTHER op creating the INDIRECT effect
void StringSequence::removeCopyOps(PcodeOp *replaceOp)
{
map<PcodeOp *,list<WriteNode>::iterator> concatSet;
list<WriteNode> points;
vector<WriteNode> deadOps;
for(int4 i=0;i<moveOps.size();++i) {
removeForward(moveOps[i],concatSet,points,deadOps);
}
int4 pos = 0;
while(pos < deadOps.size()) {
removeForward(deadOps[pos],concatSet,points,deadOps);
pos += 1;
}
for(list<WriteNode>::iterator iter=points.begin();iter!=points.end();++iter) {
PcodeOp *op = (*iter).op;
Varnode *vn = op->getIn((*iter).slot);
if (vn->getDef()->code() != CPUI_INDIRECT) {
Varnode *newIn = data.newConstant(vn->getSize(),0);
PcodeOp *indOp = data.newOp(2, replaceOp->getAddr());
data.opSetOpcode(indOp,CPUI_INDIRECT);
data.opSetInput(indOp,newIn,0);
data.opSetInput(indOp,data.newVarnodeIop(replaceOp),1);
data.opSetOutput(indOp, vn);
data.markIndirectCreation(indOp, false);
data.opInsertBefore(indOp,replaceOp);
}
}
for(int4 i=0;i<moveOps.size();++i)
data.opDestroy(moveOps[i].op);
for(int4 i=0;i<deadOps.size();++i)
data.opDestroy(deadOps[i].op);
}
/// The transform can only fail if the byte array does not encode a valid string, in which case \b false is returned.
/// Otherwise, a CALLOTHER representing \b memcpy is constructed taking the string constant as its \e source pointer.
/// The original COPY ops are removed.
/// \return \b true if the transform succeeded and the CALLOTHER is created
bool StringSequence::transform(void)
{
PcodeOp *memCpyOp = buildStringCopy();
if (memCpyOp == (PcodeOp *)0)
return false;
removeCopyOps(memCpyOp);
return true;
}
void RuleStringSequence::getOpList(vector<uint4> &oplist) const
{
oplist.push_back(CPUI_COPY);
}
/// \class RuleStringSequence
/// \brief Replace a sequence of COPY ops moving single characters with a \b memcpy CALLOTHER copying a whole string
///
/// Given a root COPY of a constant character, search for other COPYs in the same basic block that form a sequence
/// of characters that can be interpreted as a single string. Replace the sequence of COPYs with a single
/// \b memcpy CALLOTHER.
int4 RuleStringSequence::applyOp(PcodeOp *op,Funcdata &data)
{
if (!op->getIn(0)->isConstant()) return 0; // Constant
Varnode *outvn = op->getOut();
Datatype *ct = outvn->getType();
if (!ct->isCharPrint()) return 0; // Copied to a "char" data-type Varnode
if (ct->isOpaqueString()) return 0;
if (!outvn->isAddrTied()) return 0;
SymbolEntry *entry = data.getScopeLocal()->queryContainer(outvn->getAddr(), outvn->getSize(), op->getAddr());
if (entry == (SymbolEntry *)0)
return 0;
StringSequence sequence(data,ct,entry,op,outvn->getAddr());
if (!sequence.isValid())
return 0;
if (!sequence.transform())
return 0;
return 1;
}
} // End namespace ghidra

View File

@ -0,0 +1,86 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// \file constseq.hh
/// \brief Classes for combining constants written to a contiguous region of memory
#ifndef __CONSTSEQ_HH__
#define __CONSTSEQ_HH__
#include "ruleaction.hh"
namespace ghidra {
/// \brief A class for collecting sequences of COPY ops that hold string data
///
/// Given a starting Address and a Symbol with a character array as a component, a class instance collects
/// a maximal set of COPY ops that can be treated as writing a single string into memory. Then, if the
/// transform() method is called, an explicit string is constructed, and the COPYs are replaced with a
/// \b memcpy CALLOTHER that takes the string as its source input.
class StringSequence {
public:
static const int4 MINIMUM_SEQUENCE_LENGTH; ///< Minimum number of sequential characters to trigger replacement with CALLOTHER
/// \brief Helper class holding a data-flow edge and optionally a memory offset being COPYed into or from
class WriteNode {
friend class StringSequence;
uintb offset; ///< Offset into the memory region
PcodeOp *op; ///< PcodeOp moving into/outof memory region
int4 slot; ///< either input slot (>=0) or output (-1)
public:
WriteNode(uintb off,PcodeOp *o,int4 sl) { offset = off; op = o; slot = sl; } ///< Constructor
/// \brief Compare two nodes by their order within a basic block
bool operator<(const WriteNode &node2) const { return op->getSeqNum().getOrder() < node2.op->getSeqNum().getOrder(); }
};
private:
Funcdata &data; ///< Function being analyzed
PcodeOp *rootOp; ///< The root PcodeOp
Address rootAddr; ///< Address within the memory region associated with the root PcodeOp
Address startAddr; ///< Starting address of the memory region
SymbolEntry *entry; ///< Symbol at the root Address
int4 size; ///< Size of the memory region in bytes
Datatype *charType; ///< Element data-type
BlockBasic *block; ///< Basic block containing all the COPY ops
vector<WriteNode> moveOps; ///< COPYs into the array memory region
vector<uint1> byteArray; ///< Constants collected in a single array
bool collectCopyOps(void); ///< Collect ops COPYing constants into the memory region
bool checkBetweenCopy(PcodeOp *startOp,PcodeOp *endOp); ///< Check for interfering ops between the two given COPYs
bool checkCopyInterference(void); ///< Find maximal set of COPYs containing the root COPY with no interfering ops in between
bool formByteArray(void); ///< Put constant values from COPYs into a single byte array
uint4 selectStringCopyFunction(int4 &index); ///< Pick either strncpy, wcsncpy, or memcpy function used to copy string
PcodeOp *buildStringCopy(void); ///< Build the strncpy,wcsncpy, or memcpy function with string as input
static void removeForward(const WriteNode &curNode,map<PcodeOp *,list<WriteNode>::iterator> &xref,
list<WriteNode> &points,vector<WriteNode> &deadOps);
void removeCopyOps(PcodeOp *replaceOp); ///< Remove all the COPY ops from the basic block
Varnode *constructTypedPointer(PcodeOp *insertPoint);
public:
StringSequence(Funcdata &fdata,Datatype *ct,SymbolEntry *ent,PcodeOp *root,const Address &addr);
bool isValid(void) const { return size != 0; } ///< Return \b true if COPYs are found that look like a valid string
void clear(void); ///< Clear any resources used and mark the sequence as invalid
bool transform(void); ///< Transform COPYs into a single memcpy user-op
};
class RuleStringSequence : public Rule {
public:
RuleStringSequence(const string &g) : Rule( g, 0, "stringsequence") {} ///< Constructor
virtual Rule *clone(const ActionGroupList &grouplist) const {
if (!grouplist.contains(getGroup())) return (Rule *)0;
return new RuleStringSequence(getGroup());
}
virtual void getOpList(vector<uint4> &oplist) const;
virtual int4 applyOp(PcodeOp *op,Funcdata &data);
};
} // End namespace ghidra
#endif

View File

@ -17,6 +17,7 @@
#include "condexe.hh"
#include "double.hh"
#include "subflow.hh"
#include "constseq.hh"
namespace ghidra {
@ -2277,28 +2278,17 @@ int4 ActionRestructureVarnode::apply(Funcdata &data)
return 0;
}
int4 ActionRestructureHigh::apply(Funcdata &data)
int4 ActionMappedLocalSync::apply(Funcdata &data)
{
if (!data.isHighOn()) return 0;
ScopeLocal *l1 = data.getScopeLocal();
#ifdef OPACTION_DEBUG
if ((flags&rule_debug)!=0)
l1->turnOnDebug();
#endif
l1->restructureHigh();
if (data.syncVarnodesWithSymbols(l1,true,true))
count += 1;
#ifdef OPACTION_DEBUG
if ((flags&rule_debug)==0) return 0;
l1->turnOffDebug();
ostringstream s;
data.getScopeLocal()->printEntries(s);
data.getArch()->printDebug(s.str());
#endif
if (l1->hasOverlapProbems())
data.warningHeader("Could not reconcile some variable overlaps");
return 0;
}
@ -2645,7 +2635,7 @@ int4 ActionSetCasts::castInput(PcodeOp *op,int4 slot,Funcdata &data,CastStrategy
{
Datatype *ct;
Varnode *vn,*vnout;
Varnode *vn,*vnout,*vnin;
PcodeOp *newop;
ct = op->getOpcode()->getInputCast(op,slot,castStrategy); // Input type expected by this operation
@ -2657,13 +2647,20 @@ int4 ActionSetCasts::castInput(PcodeOp *op,int4 slot,Funcdata &data,CastStrategy
return 0;
}
vn = op->getIn(slot);
vnin = vn = op->getIn(slot);
// Check to make sure we don't have a double cast
if (vn->isWritten() && (vn->getDef()->code() == CPUI_CAST)) {
if (vn->isImplied() && (vn->loneDescend() == op)) {
vn->updateType(ct,false,false);
if (vn->getType()==ct)
if (vn->isImplied()) {
if (vn->loneDescend() == op) {
vn->updateType(ct,false,false);
if (vn->getType()==ct)
return 1;
}
vnin = vn->getDef()->getIn(0); // Cast directly from input of previous cast
if (ct == vnin->getType()) { // If the earlier data-type is what the input expects
data.opSetInput(op, vnin, slot); // Just use the earlier Varnode
return 1;
}
}
}
else if (vn->isConstant()) {
@ -2682,14 +2679,14 @@ int4 ActionSetCasts::castInput(PcodeOp *op,int4 slot,Funcdata &data,CastStrategy
return 1;
}
newop = data.newOp(1,op->getAddr());
vnout = data.newUniqueOut(vn->getSize(),newop);
vnout = data.newUniqueOut(vnin->getSize(),newop);
vnout->updateType(ct,false,false);
vnout->setImplied();
#ifdef CPUI_STATISTICS
data.getArch()->stats->countCast();
#endif
data.opSetOpcode(newop,CPUI_CAST);
data.opSetInput(newop,vn,0);
data.opSetInput(newop,vnin,0);
data.opSetInput(op,vnout,slot);
data.opInsertBefore(newop,op); // Cast comes AFTER operation
if (ct->needsResolution()) {
@ -2924,6 +2921,7 @@ void ActionNameVars::linkSymbols(Funcdata &data,vector<Varnode *> &namerec)
linkSpacebaseSymbol(curvn, data, namerec);
}
TypeFactory *typeFactory = data.getArch()->types;
for(int4 i=0;i<manage->numSpaces();++i) { // Build a list of nameable highs
spc = manage->getSpace(i);
if (spc == (AddrSpace *)0) continue;
@ -2948,6 +2946,8 @@ void ActionNameVars::linkSymbols(Funcdata &data,vector<Varnode *> &namerec)
if (vn->getSize() == sym->getType()->getSize())
sym->getScope()->overrideSizeLockType(sym,high->getType());
}
if (vn->isAddrTied() && !sym->getScope()->isGlobal())
high->finalizeDatatype(typeFactory);
}
}
}
@ -4562,10 +4562,7 @@ int4 ActionInputPrototype::apply(Funcdata &data)
ParamActive active(false);
Varnode *vn;
// Clear any unlocked local variables because these are
// getting cleared anyway in the restructure and may be
// using symbol names that we want
data.getScopeLocal()->clearUnlockedCategory(-1);
data.getScopeLocal()->clearCategory(Symbol::fake_input);
data.getFuncProto().clearUnlockedInput();
if (!data.getFuncProto().isInputLocked()) {
VarnodeDefSet::const_iterator iter,enditer;
@ -5329,7 +5326,7 @@ void ActionDatabase::buildDefaultGroups(void)
"deadcode", "typerecovery", "stackptrflow",
"blockrecovery", "stackvars", "deadcontrolflow", "switchnorm",
"cleanup", "splitcopy", "splitpointer", "merge", "dynamic", "casts", "analysis",
"fixateglobals", "fixateproto",
"fixateglobals", "fixateproto", "constsequence",
"segment", "returnsplit", "nodejoin", "doubleload", "doubleprecis",
"unreachable", "subvar", "floatprecision",
"conditionalexe", "" };
@ -5585,6 +5582,7 @@ void ActionDatabase::universalAction(Architecture *conf)
actfullloop->addAction( new ActionActiveReturn("protorecovery") );
}
act->addAction( actfullloop );
act->addAction( new ActionMappedLocalSync("localrecovery") );
act->addAction( new ActionStartCleanUp("cleanup") );
{
actcleanup = new ActionPool(Action::rule_repeatapply,"cleanup");
@ -5599,6 +5597,7 @@ void ActionDatabase::universalAction(Architecture *conf)
actcleanup->addRule( new RuleSplitCopy("splitcopy") );
actcleanup->addRule( new RuleSplitLoad("splitpointer") );
actcleanup->addRule( new RuleSplitStore("splitpointer") );
actcleanup->addRule( new RuleStringSequence("constsequence"));
}
act->addAction( actcleanup );
@ -5620,7 +5619,6 @@ void ActionDatabase::universalAction(Architecture *conf)
act->addAction( new ActionCopyMarker("merge") );
act->addAction( new ActionOutputPrototype("localrecovery") );
act->addAction( new ActionInputPrototype("fixateproto") );
act->addAction( new ActionRestructureHigh("localrecovery") );
act->addAction( new ActionMapGlobals("fixateglobals") );
act->addAction( new ActionDynamicSymbols("dynamic") );
act->addAction( new ActionNameVars("merge") );

View File

@ -844,15 +844,15 @@ public:
virtual int4 apply(Funcdata &data);
};
/// \brief Create symbols that map out the local stack-frame for the function.
/// \brief Do final synchronization of symbols in the local scope with Varnodes
///
/// This produces the final set of symbols on the stack.
class ActionRestructureHigh : public Action {
/// Push data-types from the last local scope restructuring onto Varnodes
class ActionMappedLocalSync : public Action {
public:
ActionRestructureHigh(const string &g) : Action(0,"restructure_high",g) {} ///< Constructor
ActionMappedLocalSync(const string &g) : Action(0,"mapped_local_sync",g) {} ///< Constructor
virtual Action *clone(const ActionGroupList &grouplist) const {
if (!grouplist.contains(getGroup())) return (Action *)0;
return new ActionRestructureHigh(getGroup());
return new ActionMappedLocalSync(getGroup());
}
virtual int4 apply(Funcdata &data);
};

View File

@ -2837,6 +2837,8 @@ void ScopeInternal::setCategory(Symbol *sym,int4 cat,int4 ind)
while(category.size() <= sym->category)
category.push_back(vector<Symbol *>());
vector<Symbol *> &list(category[sym->category]);
if (cat > 0)
sym->catindex = list.size();
while(list.size() <= sym->catindex)
list.push_back((Symbol *)0);
list[sym->catindex] = sym;

View File

@ -213,7 +213,8 @@ public:
no_category = -1, ///< Symbol is not in a special category
function_parameter = 0, ///< The Symbol is a parameter to a function
equate = 1, ///< The Symbol holds \e equate information about a constant
union_facet = 2 ///< Symbol holding read or write facing union field information
union_facet = 2, ///< Symbol holding read or write facing union field information
fake_input = 3 ///< Temporary placeholder for an input symbol prior to formalizing parameters
};
Symbol(Scope *sc,const string &nm,Datatype *ct); ///< Construct given a name and data-type
@ -735,7 +736,7 @@ public:
///
/// \param sym is the given Symbol
/// \param cat is the \e category to set for the Symbol
/// \param ind is the index position to set (within the category)
/// \param ind is, for the function_parameter category, the index position to set, and is unused for other categories
virtual void setCategory(Symbol *sym,int4 cat,int4 ind)=0;
virtual SymbolEntry *addSymbol(const string &nm,Datatype *ct,

View File

@ -330,8 +330,7 @@ PcodeOp *FlowInfo::xrefControlFlow(list<PcodeOp *>::const_iterator oiter,bool &s
break;
case CPUI_CALLOTHER:
{
InjectedUserOp *userop = dynamic_cast<InjectedUserOp *>(glb->userops.getOp(op->getIn(0)->getOffset()));
if (userop != (InjectedUserOp *)0)
if (glb->userops.getOp(op->getIn(0)->getOffset())->getType() == UserPcodeOp::injected)
injectlist.push_back(op);
break;
}

View File

@ -297,6 +297,47 @@ Varnode *Funcdata::findSpacebaseInput(AddrSpace *id) const
return vn;
}
/// \brief If it doesn't exist, create an input Varnode of the base register corresponding to the given address space
///
/// The address space must have a base register associated with it or an exception is thrown.
/// If a Varnode representing the incoming base register already exists, it is returned. Otherwise
/// a new Varnode is created and returned. In either case, the Varnode will have the TypeSpacebase data-type set.
/// \param id is the given address space
/// \return the input Varnode corresponding to the base register
Varnode *Funcdata::constructSpacebaseInput(AddrSpace *id)
{
Varnode *spacePtr = findSpacebaseInput(id);
if (spacePtr != (Varnode *)0)
return spacePtr;
if (id->numSpacebase() == 0)
throw LowlevelError("Unable to construct pointer into space: "+id->getName());
const VarnodeData &point(id->getSpacebase(0));
Datatype *ct = glb->types->getTypeSpacebase(id,getAddress());
Datatype *ptr = glb->types->getTypePointer(point.size,ct,id->getWordSize());
spacePtr = newVarnode(point.size, point.getAddr(), ptr);
spacePtr = setInputVarnode(spacePtr);
spacePtr->setFlags(Varnode::spacebase);
spacePtr->updateType(ptr, true, true);
return spacePtr;
}
/// \brief Create a constant representing the \e base of the given global address space
///
/// The constant will have the TypeSpacebase data-type set.
/// \param id is the given address space
/// \return the constant base Varnode
Varnode *Funcdata::constructConstSpacebase(AddrSpace *id)
{
Datatype *ct = glb->types->getTypeSpacebase(id,Address());
Datatype *ptr = glb->types->getTypePointer(id->getAddrSize(),ct,id->getWordSize());
Varnode *spacePtr = newConstant(id->getAddrSize(),0);
spacePtr->updateType(ptr,true,true);
spacePtr->setFlags(Varnode::spacebase);
return spacePtr;
}
/// \brief Convert a constant pointer into a \e ram CPUI_PTRSUB
///
/// A constant known to be a pointer into an address space like \b ram is converted

View File

@ -224,6 +224,8 @@ public:
void spacebase(void); ///< Mark registers that map to a virtual address space
Varnode *newSpacebasePtr(AddrSpace *id); ///< Construct a new \e spacebase register for a given address space
Varnode *findSpacebaseInput(AddrSpace *id) const;
Varnode *constructSpacebaseInput(AddrSpace *id);
Varnode *constructConstSpacebase(AddrSpace *id);
void spacebaseConstant(PcodeOp *op,int4 slot,SymbolEntry *entry,const Address &rampoint,uintb origval,int4 origsize);
int4 getHeritagePass(void) const { return heritage.getPass(); } ///< Get overall count of heritage passes
@ -428,6 +430,7 @@ public:
bool attemptDynamicMapping(SymbolEntry *entry,DynamicHash &dhash);
bool attemptDynamicMappingLate(SymbolEntry *entry,DynamicHash &dhash);
Merge &getMerge(void) { return covermerge; } ///< Get the Merge object for \b this function
Varnode *getInternalString(const uint1 *buf,int4 size,Datatype *ptrType,PcodeOp *readOp);
// op routines
PcodeOp *newOp(int4 inputs,const Address &pc); /// Allocate a new PcodeOp with Address

View File

@ -573,12 +573,12 @@ JumpTable::RecoveryMode Funcdata::earlyJumpTableFail(PcodeOp *op)
OpCode opc = op->code();
if (opc == CPUI_CALLOTHER) {
int4 id = (int4)op->getIn(0)->getOffset();
UserPcodeOp *userOp = glb->userops.getOp(id);
if (dynamic_cast<InjectedUserOp *>(userOp) != (InjectedUserOp *)0)
uint4 userOpType = glb->userops.getOp(id)->getType();
if (userOpType == UserPcodeOp::injected)
return JumpTable::success; // Don't try to back track through injection
if (dynamic_cast<JumpAssistOp *>(userOp) != (JumpAssistOp *)0)
if (userOpType == UserPcodeOp::jumpassist)
return JumpTable::success;
if (dynamic_cast<SegmentOp *>(userOp) != (SegmentOp *)0)
if (userOpType == UserPcodeOp::segment)
return JumpTable::success;
if (outhit)
return JumpTable::fail_callother; // Address formed via uninjected CALLOTHER, analysis will fail

View File

@ -632,7 +632,7 @@ bool Funcdata::replaceVolatile(Varnode *vn)
{
PcodeOp *newop;
if (vn->isWritten()) { // A written value
VolatileWriteOp *vw_op = glb->userops.getVolatileWrite();
UserPcodeOp *vw_op = glb->userops.registerBuiltin(UserPcodeOp::BUILTIN_VOLATILE_WRITE);
if (!vn->hasNoDescend()) throw LowlevelError("Volatile memory was propagated");
PcodeOp *defop = vn->getDef();
newop = newOp(3,defop->getAddr());
@ -651,7 +651,7 @@ bool Funcdata::replaceVolatile(Varnode *vn)
opInsertAfter(newop,defop); // Insert after defining op
}
else { // A read value
VolatileReadOp *vr_op = glb->userops.getVolatileRead();
UserPcodeOp *vr_op = glb->userops.registerBuiltin(UserPcodeOp::BUILTIN_VOLATILE_READ);
if (vn->hasNoDescend()) return false; // Dead
PcodeOp *readop = vn->loneDescend();
if (readop == (PcodeOp *)0)
@ -1002,7 +1002,6 @@ bool Funcdata::syncVarnodesWithSymbol(VarnodeLocSet::const_iterator &iter,uint4
if (ct != (Datatype *)0) {
if (vn->updateType(ct,false,false))
updateoccurred = true;
vn->getHigh()->finalizeDatatype(ct); // Permanently set the data-type on the HighVariable
}
} while(iter != enditer);
return updateoccurred;
@ -1312,6 +1311,41 @@ bool Funcdata::attemptDynamicMappingLate(SymbolEntry *entry,DynamicHash &dhash)
return true;
}
/// \brief Create Varnode (and associated PcodeOp) that will display as a string constant
///
/// The raw data for the encoded string is given. If the data encodes a legal string, the string
/// is stored in the StringManager, and a Varnode is created that will display in output as the
/// quoted string. A given pointer data-type is assigned to the new Varnode and also indicates
/// the character data-type associated with the encoding. Internally, a \e stringdata user-op is
/// also created and its output is the Varnode actually returned.
/// \param buf is the raw bytes of the encoded string
/// \param size is the number of bytes
/// \param ptrType is the given pointer to character data-type
/// \param readOp is the PcodeOp that will read the new Varnode
/// \return the new Varnode or null is the encoding isn't a legal string
Varnode *Funcdata::getInternalString(const uint1 *buf,int4 size,Datatype *ptrType,PcodeOp *readOp)
{
if (ptrType->getMetatype() != TYPE_PTR)
return (Varnode *)0;
Datatype *charType = ((TypePointer *)ptrType)->getPtrTo();
const Address &addr(readOp->getAddr());
uint8 hash = glb->stringManager->registerInternalStringData(addr, buf, size, charType);
if (hash == 0)
return (Varnode *)0;
glb->userops.registerBuiltin(UserPcodeOp::BUILTIN_STRINGDATA);
PcodeOp *stringOp = newOp(2,addr);
opSetOpcode(stringOp, CPUI_CALLOTHER);
stringOp->clearFlag(PcodeOp::call);
opSetInput(stringOp, newConstant(4, UserPcodeOp::BUILTIN_STRINGDATA), 0);
opSetInput(stringOp, newConstant(8, hash), 1);
Varnode *resVn = newUniqueOut(ptrType->getSize(), stringOp);
resVn->updateType(ptrType, true, false);
opInsertBefore(stringOp, readOp);
return resVn;
};
/// Follow the Varnode back to see if it comes from the return address for \b this function.
/// If so, return \b true. The return address can flow through COPY, INDIRECT, and AND operations.
/// If there are any other operations in the flow path, or if a standard storage location for the

View File

@ -2082,8 +2082,10 @@ bool JumpAssisted::recoverModel(Funcdata *fd,PcodeOp *indop,uint4 matchsize,uint
if (assistOp->code() != CPUI_CALLOTHER) return false;
if (assistOp->numInput() < 3) return false;
int4 index = assistOp->getIn(0)->getOffset();
userop = dynamic_cast<JumpAssistOp *>(fd->getArch()->userops.getOp(index));
if (userop == (JumpAssistOp *)0) return false;
UserPcodeOp *tmpOp = fd->getArch()->userops.getOp(index);
if (tmpOp->getType() != UserPcodeOp::jumpassist)
return false;
userop = (JumpAssistOp *)tmpOp;
switchvn = assistOp->getIn(1); // The switch variable
for(int4 i=2;i<assistOp->numInput();++i)

View File

@ -1384,7 +1384,7 @@ void Merge::groupPartialRoot(Varnode *vn)
baseOffset = entry->getOffset();
}
PieceNode::gatherPieces(pieces, vn, vn->getDef(), baseOffset);
PieceNode::gatherPieces(pieces, vn, vn->getDef(), baseOffset, baseOffset);
bool throwOut = false;
for(int4 i=0;i<pieces.size();++i) {
Varnode *nodeVn = pieces[i].getVarnode();
@ -1496,6 +1496,14 @@ void Merge::markInternalCopies(void)
if (p2->getOffset() != p1->getOffset() + v3->getSize()) break;
}
data.opMarkNonPrinting(op);
if (v2->isImplied()) {
v2->clearImplied();
v2->setExplicit();
}
if (v3->isImplied()) {
v3->clearImplied();
v3->setExplicit();
}
break;
case CPUI_SUBPIECE:
v1 = op->getOut();
@ -1513,6 +1521,10 @@ void Merge::markInternalCopies(void)
if (p2->getOffset() + val != p1->getOffset()) break;
}
data.opMarkNonPrinting(op);
if (v2->isImplied()) {
v2->clearImplied();
v2->setExplicit();
}
break;
default:
break;

View File

@ -756,9 +756,9 @@ int4 PcodeOp::compareOrder(const PcodeOp *bop) const
/// whether a Varnode is a leaf of this tree.
/// \param rootVn is the given root of the CONCAT tree
/// \param vn is the Varnode to test as a leaf
/// \param typeOffset is byte offset of the test Varnode within fully concatenated value
/// \param relOffset is byte offset of the test Varnode within fully concatenated value (rooted at \b rootVn)
/// \return \b true is the test Varnode is a leaf of the tree
bool PieceNode::isLeaf(Varnode *rootVn,Varnode *vn,int4 typeOffset)
bool PieceNode::isLeaf(Varnode *rootVn,Varnode *vn,int4 relOffset)
{
if (vn->isMapped() && rootVn->getSymbolEntry() != vn->getSymbolEntry()) {
@ -770,7 +770,7 @@ bool PieceNode::isLeaf(Varnode *rootVn,Varnode *vn,int4 typeOffset)
PcodeOp *op = vn->loneDescend();
if (op == (PcodeOp *)0) return true;
if (vn->isAddrTied()) {
Address addr = rootVn->getAddr() + typeOffset;
Address addr = rootVn->getAddr() + relOffset;
if (vn->getAddr() != addr) return true;
}
return false;
@ -820,17 +820,18 @@ Varnode *PieceNode::findRoot(Varnode *vn)
/// \param stack holds the markup for each node of the tree
/// \param rootVn is the given root of the tree
/// \param op is the current PIECE op to explore as part of the tree
/// \param baseOffset is the offset associated with the output of the current PIECE op
void PieceNode::gatherPieces(vector<PieceNode> &stack,Varnode *rootVn,PcodeOp *op,int4 baseOffset)
/// \param baseOffset is the offset associated with the output of the current PIECE op wihtin the data-type
/// \param rootOffset is the offset of the \b rootVn within the data-type
void PieceNode::gatherPieces(vector<PieceNode> &stack,Varnode *rootVn,PcodeOp *op,int4 baseOffset,int4 rootOffset)
{
for(int4 i=0;i<2;++i) {
Varnode *vn = op->getIn(i);
int4 offset = (rootVn->getSpace()->isBigEndian() == (i==1)) ? baseOffset + op->getIn(1-i)->getSize() : baseOffset;
bool res = isLeaf(rootVn,vn,offset);
bool res = isLeaf(rootVn,vn,offset-rootOffset);
stack.emplace_back(op,i,offset,res);
if (!res)
gatherPieces(stack,rootVn,vn->getDef(),offset);
gatherPieces(stack,rootVn,vn->getDef(),offset,rootOffset);
}
}

View File

@ -284,7 +284,7 @@ public:
Varnode *getVarnode(void) const { return pieceOp->getIn(slot); } ///< Get the Varnode representing \b this piece
static bool isLeaf(Varnode *rootVn,Varnode *vn,int4 typeOffset);
static Varnode *findRoot(Varnode *vn);
static void gatherPieces(vector<PieceNode> &stack,Varnode *rootVn,PcodeOp *op,int4 baseOffset);
static void gatherPieces(vector<PieceNode> &stack,Varnode *rootVn,PcodeOp *op,int4 baseOffset,int4 rootOffset);
};
/// A map from sequence number (SeqNum) to PcodeOp

View File

@ -675,15 +675,7 @@ void PrintC::opCallother(const PcodeOp *op)
{
UserPcodeOp *userop = glb->userops.getOp(op->getIn(0)->getOffset());
uint4 display = userop->getDisplay();
if (display == UserPcodeOp::annotation_assignment) {
pushOp(&assignment,op);
pushVn(op->getIn(2),op,mods);
pushVn(op->getIn(1),op,mods);
}
else if (display == UserPcodeOp::no_operator) {
pushVn(op->getIn(1),op,mods);
}
else { // Emit using functional syntax
if (display == 0) { // Emit using functional syntax
string nm = op->getOpcode()->getOperatorName(op);
pushOp(&function_call,op);
pushAtom(Atom(nm,optoken,EmitMarkup::funcname_color,op));
@ -698,6 +690,28 @@ void PrintC::opCallother(const PcodeOp *op)
else
pushAtom(Atom(EMPTY_STRING,blanktoken,EmitMarkup::no_color)); // Push empty token for void
}
else if (display == UserPcodeOp::annotation_assignment) {
pushOp(&assignment,op);
pushVn(op->getIn(2),op,mods);
pushVn(op->getIn(1),op,mods);
}
else if (display == UserPcodeOp::no_operator) {
pushVn(op->getIn(1),op,mods);
}
else if (display == UserPcodeOp::display_string) {
const Varnode *vn = op->getOut();
Datatype *ct = vn->getType();
ostringstream str;
if (ct->getMetatype() == TYPE_PTR) {
ct = ((TypePointer *)ct)->getPtrTo();
if (!printCharacterConstant(str,op->getIn(1)->getAddr(),ct))
str << "\"badstring\"";
}
else
str << "\"badstring\"";
pushAtom(Atom(str.str(),vartoken,EmitMarkup::const_color,op,vn));
}
}
void PrintC::opConstructor(const PcodeOp *op,bool withNew)

View File

@ -7060,7 +7060,7 @@ int4 RulePieceStructure::applyOp(PcodeOp *op,Funcdata &data)
vector<PieceNode> stack;
for(;;) {
PieceNode::gatherPieces(stack, outvn, op, baseOffset);
PieceNode::gatherPieces(stack, outvn, op, baseOffset, baseOffset);
if (!findReplaceZext(stack, ct, data)) // Check for INT_ZEXT leaves that need to be converted to PIECEs
break;
stack.clear(); // If we found some, regenerate the tree

View File

@ -272,7 +272,7 @@ AddrSpace *SleighBase::decodeSlaSpace(Decoder &decoder,const Translate *trans)
else if (attribId == sla::ATTRIB_SIZE)
addressSize = decoder.readSignedInteger();
else if (attribId == sla::ATTRIB_WORDSIZE)
wordsize = decoder.readUnsignedInteger();
wordsize = decoder.readSignedInteger();
else if (attribId == sla::ATTRIB_BIGENDIAN) {
bigEnd = decoder.readBool();
}

View File

@ -15,6 +15,7 @@
*/
#include "stringmanage.hh"
#include "architecture.hh"
#include "crc32.hh"
namespace ghidra {
@ -24,6 +25,85 @@ ElementId ELEM_BYTES = ElementId("bytes",83);
ElementId ELEM_STRING = ElementId("string",84);
ElementId ELEM_STRINGMANAGE = ElementId("stringmanage",85);
/// Assume the buffer contains a null terminated unicode encoded string.
/// Write the characters out (as UTF8) to the stream.
/// \param s is the output stream
/// \param buffer is the given byte buffer
/// \param size is the number of bytes in the buffer
/// \param charsize specifies the encoding (1=UTF8 2=UTF16 4=UTF32)
/// \param bigend is \b true if (UTF16 and UTF32) are big endian encoded
/// \return \b true if the byte array contains valid unicode
bool StringManager::writeUnicode(ostream &s,const uint1 *buffer,int4 size,int4 charsize,bool bigend)
{
int4 i=0;
int4 count=0;
int4 skip = charsize;
while(i<size) {
int4 codepoint = getCodepoint(buffer+i,charsize,bigend,skip);
if (codepoint < 0) return false;
if (codepoint == 0) break; // Terminator
writeUtf8(s, codepoint);
i += skip;
count += 1;
if (count >= maximumChars)
break;
}
return true;
}
/// \brief Translate and assign raw string data to a StringData object
///
/// The string data is provided as raw bytes. The data is translated to UTF-8 and truncated
/// to the \b maximumChars allowed by the manager. The encoding must be legal unicode as performed
/// by checkCharacters().
/// \param data is the StringData object to populate
/// \param buf is the raw byte array
/// \param size is the number of bytes in the array
/// \param charsize is the size of unicode encoding
/// \param numChars is the number of characters in the encoding as returned by checkCharacters()
/// \param bigend is \b true if UTF-16 and UTF-32 elements are big endian encoded
void StringManager::assignStringData(StringData &data,const uint1 *buf,int4 size,int4 charsize,int4 numChars,bool bigend)
{
if (charsize == 1 && numChars < maximumChars) {
data.byteData.reserve(size);
data.byteData.assign(buf,buf+size);
}
else {
// We need to translate to UTF8 and/or truncate
ostringstream s;
if (!writeUnicode(s, buf, size, charsize, bigend))
return;
string resString = s.str();
int4 newSize = resString.size();
data.byteData.reserve(newSize + 1);
const uint1 *ptr = (const uint1 *)resString.c_str();
data.byteData.assign(ptr,ptr+newSize);
data.byteData[newSize] = 0; // Make sure there is a null terminator
}
data.isTruncated = (numChars >= maximumChars);
}
/// \brief Calculate hash of a specific Address and contents of a byte array
///
/// Calculate a 32-bit CRC of the bytes and XOR into the upper part of the Address offset.
/// \param addr is the specific Address
/// \param buf is a pointer to the array of bytes
/// \param size is the number of bytes in the array
/// \return the 64-bit hash
uint8 StringManager::calcInternalHash(const Address &addr,const uint1 *buf,int4 size)
{
uint4 reg = 0x7b7c66a9;
for(int4 i=0;i<size;++i) {
reg = crc_update(reg, buf[i]);
}
uint8 res = addr.getOffset();
res ^= ((uint8)reg) << 32;
return res;
}
/// \param max is the maximum number of characters to allow before truncating string
StringManager::StringManager(int4 max)
@ -91,6 +171,33 @@ bool StringManager::isString(const Address &addr,Datatype *charType)
return !buffer.empty();
}
/// \brief Associate string data at a code address or other location that doesn't hold string data normally
///
/// The given byte buffer is decoded, and if it represents a legal string, a non-zero hash is returned,
/// constructed from an Address associated with the string and the string data itself. The registered string
/// can be retrieved via the getStringData() method using this hash as a constant Address. If the string is not
/// legal, 0 is returned.
/// \param addr is the address to associate with the string data
/// \param buf is a pointer to the array of raw bytes encoding the string
/// \param size is the number of bytes in the array
/// \param charType is a character data-type indicating the encoding
/// \return a hash associated with the string or 0
uint8 StringManager::registerInternalStringData(const Address &addr,const uint1 *buf,int4 size,Datatype *charType)
{
int4 charsize = charType->getSize();
int4 numChars = checkCharacters(buf, size, charsize, addr.isBigEndian());
if (numChars < 0)
return 0; // Not a legal encoding
uint8 hash = calcInternalHash(addr, buf, size);
Address constAddr = addr.getSpace()->getManager()->getConstant(hash);
StringData &stringData( stringMap[constAddr] );
stringData.byteData.clear();
stringData.isTruncated = false;
assignStringData(stringData, buf, size, charsize, numChars, addr.isBigEndian());
return hash;
}
/// Encode \<stringmanage> element, with \<string> children.
/// \param encoder is the stream encoder
void StringManager::encode(Encoder &encoder) const
@ -204,6 +311,33 @@ inline int4 StringManager::readUtf16(const uint1 *buf,bool bigend)
return codepoint;
}
/// \brief Make sure buffer has valid bounded set of unicode
///
/// Check that the given buffer contains valid unicode.
/// If the string is encoded in UTF8 or ASCII, we get (on average) a bit of check
/// per character. For UTF16, the surrogate reserved area gives at least some check.
/// \param buf is the byte array to check
/// \param size is the size of the buffer in bytes
/// \param charsize is the UTF encoding (1=UTF8, 2=UTF16, 4=UTF32)
/// \param bigend is \b true if the (UTF16 and UTF32) characters are big endian encoded
/// \return the number of characters or -1 if there is an invalid encoding
int4 StringManager::checkCharacters(const uint1 *buf,int4 size,int4 charsize,bool bigend)
{
if (buf == (const uint1 *)0) return -1;
int4 i=0;
int4 count=0;
int4 skip = charsize;
while(i<size) {
int4 codepoint = getCodepoint(buf+i,charsize,bigend,skip);
if (codepoint < 0) return -1;
if (codepoint == 0) break;
count += 1;
i += skip;
}
return count;
}
/// One or more bytes is consumed from the array, and the number of bytes used is passed back.
/// \param buf is a pointer to the bytes in the character array
/// \param charsize is 1 for UTF8, 2 for UTF16, or 4 for UTF32
@ -328,80 +462,12 @@ const vector<uint1> &StringManagerUnicode::getStringData(const Address &addr,Dat
return stringData.byteData; // Return the empty buffer
}
int4 numChars = checkCharacters(testBuffer, curBufferSize, charsize);
int4 numChars = checkCharacters(testBuffer, curBufferSize, charsize, addr.isBigEndian());
if (numChars < 0)
return stringData.byteData; // Return the empty buffer (invalid encoding)
if (charsize == 1 && numChars < maximumChars) {
stringData.byteData.reserve(curBufferSize);
stringData.byteData.assign(testBuffer,testBuffer+curBufferSize);
}
else {
// We need to translate to UTF8 and/or truncate
ostringstream s;
if (!writeUnicode(s, testBuffer, curBufferSize, charsize))
return stringData.byteData; // Return the empty buffer
string resString = s.str();
int4 newSize = resString.size();
stringData.byteData.reserve(newSize + 1);
const uint1 *ptr = (const uint1 *)resString.c_str();
stringData.byteData.assign(ptr,ptr+newSize);
stringData.byteData[newSize] = 0; // Make sure there is a null terminator
}
stringData.isTruncated = (numChars >= maximumChars);
assignStringData(stringData, testBuffer, curBufferSize, charsize, numChars, addr.isBigEndian());
isTrunc = stringData.isTruncated;
return stringData.byteData;
}
/// Check that the given buffer contains valid unicode.
/// If the string is encoded in UTF8 or ASCII, we get (on average) a bit of check
/// per character. For UTF16, the surrogate reserved area gives at least some check.
/// \param buf is the byte array to check
/// \param size is the size of the buffer in bytes
/// \param charsize is the UTF encoding (1=UTF8, 2=UTF16, 4=UTF32)
/// \return the number of characters or -1 if there is an invalid encoding
int4 StringManagerUnicode::checkCharacters(const uint1 *buf,int4 size,int4 charsize) const
{
if (buf == (const uint1 *)0) return -1;
bool bigend = glb->translate->isBigEndian();
int4 i=0;
int4 count=0;
int4 skip = charsize;
while(i<size) {
int4 codepoint = getCodepoint(buf+i,charsize,bigend,skip);
if (codepoint < 0) return -1;
if (codepoint == 0) break;
count += 1;
i += skip;
}
return count;
}
/// Assume the buffer contains a null terminated unicode encoded string.
/// Write the characters out (as UTF8) to the stream.
/// \param s is the output stream
/// \param buffer is the given byte buffer
/// \param size is the number of bytes in the buffer
/// \param charsize specifies the encoding (1=UTF8 2=UTF16 4=UTF32)
/// \return \b true if the byte array contains valid unicode
bool StringManagerUnicode::writeUnicode(ostream &s,uint1 *buffer,int4 size,int4 charsize)
{
bool bigend = glb->translate->isBigEndian();
int4 i=0;
int4 count=0;
int4 skip = charsize;
while(i<size) {
int4 codepoint = getCodepoint(buffer+i,charsize,bigend,skip);
if (codepoint < 0) return false;
if (codepoint == 0) break; // Terminator
writeUtf8(s, codepoint);
i += skip;
count += 1;
if (count >= maximumChars)
break;
}
return true;
}
} // End namespace ghidra

View File

@ -33,9 +33,10 @@ extern ElementId ELEM_STRINGMANAGE; ///< Marshaling element \<stringmanage>
/// \brief Storage for decoding and storing strings associated with an address
///
/// Looks at data in the loadimage to determine if it represents a "string".
/// Decodes the string for presentation in the output.
/// Stores the decoded string until its needed for presentation.
/// Looks at data in the loadimage to determine if it represents a "string". Decodes the string for
/// presentation in the output. Stores the decoded string until its needed for presentation. Strings are
/// associated with their starting address in memory. An \e internal string (that is not in the loadimage) can
/// be registered with the manager and will be associated with a constant.
class StringManager {
protected:
/// \brief String data (a sequence of bytes) stored by StringManager
@ -46,6 +47,9 @@ protected:
};
map<Address,StringData> stringMap; ///< Map from address to string data
int4 maximumChars; ///< Maximum characters in a string before truncating
bool writeUnicode(ostream &s,const uint1 *buffer,int4 size,int4 charsize,bool bigend); ///< Translate/copy unicode to UTF8
void assignStringData(StringData &data,const uint1 *buf,int4 size,int4 charsize,int4 numChars,bool bigend);
static uint8 calcInternalHash(const Address &addr,const uint1 *buf,int4 size);
public:
StringManager(int4 max); ///< Constructor
virtual ~StringManager(void); ///< Destructor
@ -64,12 +68,14 @@ public:
/// \return the byte array of UTF8 data
virtual const vector<uint1> &getStringData(const Address &addr,Datatype *charType,bool &isTrunc)=0;
uint8 registerInternalStringData(const Address &addr,const uint1 *buf,int4 size,Datatype *charType);
void encode(Encoder &encoder) const; ///< Encode cached strings to a stream
void decode(Decoder &decoder); ///< Restore string cache from a stream
static bool hasCharTerminator(const uint1 *buffer,int4 size,int4 charsize); ///< Check for a unicode string terminator
static int4 readUtf16(const uint1 *buf,bool bigend); ///< Read a UTF16 code point from a byte array
static void writeUtf8(ostream &s,int4 codepoint); ///< Write unicode character to stream in UTF8 encoding
static int4 checkCharacters(const uint1 *buf,int4 size,int4 charsize,bool bigend);
static int4 getCodepoint(const uint1 *buf,int4 charsize,bool bigend,int4 &skip); ///< Extract next \e unicode \e codepoint
};
@ -80,13 +86,11 @@ public:
class StringManagerUnicode : public StringManager {
Architecture *glb; ///< Underlying architecture
uint1 *testBuffer; ///< Temporary buffer for pulling in loadimage bytes
int4 checkCharacters(const uint1 *buf,int4 size,int4 charsize) const; ///< Make sure buffer has valid bounded set of unicode
public:
StringManagerUnicode(Architecture *g,int4 max); ///< Constructor
virtual ~StringManagerUnicode(void);
virtual const vector<uint1> &getStringData(const Address &addr,Datatype *charType,bool &isTrunc);
bool writeUnicode(ostream &s,uint1 *buffer,int4 size,int4 charsize); ///< Translate/copy unicode to UTF8
};
} // End namespace ghidra

View File

@ -2844,7 +2844,11 @@ TransformVar *LaneDivide::setReplacement(Varnode *vn,int4 numLanes,int4 skipLane
// if (vn->isFree())
// return (TransformVar *)0;
if (vn->isTypeLock() && vn->getType()->getMetatype() != TYPE_PARTIALSTRUCT) {
if (vn->isTypeLock()) {
type_metatype meta = vn->getType()->getMetatype();
if (meta > TYPE_ARRAY)
return (TransformVar *)0; // Don't split a primitive type
if (meta == TYPE_STRUCT || meta == TYPE_UNION)
return (TransformVar *)0;
}

View File

@ -30,10 +30,10 @@ AttributeId ATTRIB_ARRAYSIZE = AttributeId("arraysize",48);
AttributeId ATTRIB_CHAR = AttributeId("char",49);
AttributeId ATTRIB_CORE = AttributeId("core",50);
AttributeId ATTRIB_ENUM = AttributeId("enum",51);
AttributeId ATTRIB_ENUMSIGNED = AttributeId("enumsigned",52);
AttributeId ATTRIB_ENUMSIZE = AttributeId("enumsize",53);
AttributeId ATTRIB_INTSIZE = AttributeId("intsize",54);
AttributeId ATTRIB_LONGSIZE = AttributeId("longsize",55);
//AttributeId ATTRIB_ENUMSIGNED = AttributeId("enumsigned",52); // deprecated
//AttributeId ATTRIB_ENUMSIZE = AttributeId("enumsize",53); // deprecated
//AttributeId ATTRIB_INTSIZE = AttributeId("intsize",54); // deprecated
//AttributeId ATTRIB_LONGSIZE = AttributeId("longsize",55); // deprecated
AttributeId ATTRIB_OPAQUESTRING = AttributeId("opaquestring",56);
AttributeId ATTRIB_SIGNED = AttributeId("signed",57);
AttributeId ATTRIB_STRUCTALIGN = AttributeId("structalign",58);
@ -42,7 +42,7 @@ AttributeId ATTRIB_VARLENGTH = AttributeId("varlength",60);
//ElementId ELEM_ABSOLUTE_MAX_ALIGNMENT = ElementId("absolute_max_alignment", 37);
//ElementId ELEM_BITFIELD_PACKING = ElementId("bitfield_packing", 38);
//ElementId ELEM_CHAR_SIZE = ElementId("char_size", 39);
ElementId ELEM_CHAR_SIZE = ElementId("char_size", 39);
//ElementId ELEM_CHAR_TYPE = ElementId("char_type", 40);
ElementId ELEM_CORETYPES = ElementId("coretypes",41);
ElementId ELEM_DATA_ORGANIZATION = ElementId("data_organization", 42);
@ -68,7 +68,7 @@ ElementId ELEM_TYPE = ElementId("type",60);
ElementId ELEM_TYPEGRP = ElementId("typegrp",62);
ElementId ELEM_TYPEREF = ElementId("typeref",63);
//ElementId ELEM_USE_MS_CONVENTION = ElementId("use_MS_convention", 64);
//ElementId ELEM_WCHAR_SIZE = ElementId("wchar_size", 65);
ElementId ELEM_WCHAR_SIZE = ElementId("wchar_size", 65);
//ElementId ELEM_ZERO_LENGTH_BOUNDARY = ElementId("zero_length_boundary", 66);
// Some default routines for displaying data
@ -211,8 +211,8 @@ Datatype *Datatype::nearestArrayedComponentBackward(int8 off,int8 *newoff,int8 *
int4 Datatype::compare(const Datatype &op,int4 level) const
{
if (size != op.size) return (op.size - size);
if (submeta != op.submeta) return (submeta < op.submeta) ? -1 : 1;
if (size != op.size) return (op.size - size);
return 0;
}
@ -2898,6 +2898,8 @@ TypeFactory::TypeFactory(Architecture *g)
glb = g;
sizeOfInt = 0;
sizeOfLong = 0;
sizeOfChar = 0;
sizeOfWChar = 0;
sizeOfPointer = 0;
sizeOfAltPointer = 0;
enumsize = 0;
@ -2916,6 +2918,8 @@ void TypeFactory::clearCache(void)
typecache10 = (Datatype *)0;
typecache16 = (Datatype *)0;
type_nochar = (Datatype *)0;
for(i=0;i<5;++i)
charcache[i] = (Datatype *)0;
}
/// Set up default values for size of "int", structure alignment, and enums
@ -2935,6 +2939,10 @@ void TypeFactory::setupSizes(void)
if (sizeOfLong == 0) {
sizeOfLong = (sizeOfInt == 4) ? 8 : sizeOfInt;
}
if (sizeOfChar == 0)
sizeOfChar = 1;
if (sizeOfWChar == 0)
sizeOfWChar = 2;
if (sizeOfPointer == 0)
sizeOfPointer = glb->getDefaultDataSpace()->getAddrSize();
SegmentOp *segOp = glb->getSegmentOp(glb->getDefaultDataSpace());
@ -3003,11 +3011,15 @@ void TypeFactory::cacheCoreTypes(void)
// fallthru
case TYPE_UINT:
if (ct->isEnumType()) break; // Conceivably an enumeration
if (ct->isASCII()) { // Char is preferred over other int types
typecache[ct->getSize()][ct->getMetatype()-TYPE_FLOAT] = ct;
if (ct->isCharPrint()) {
if (ct->getSize() < 5)
charcache[ct->getSize()] = ct;
if (ct->isASCII()) { // Char is preferred over other int types
typecache[ct->getSize()][ct->getMetatype()-TYPE_FLOAT] = ct;
}
// Other character types (UTF16,UTF32) are not preferred
break;
}
if (ct->isCharPrint()) break; // Other character types (UTF16,UTF32) are not preferred
// fallthru
case TYPE_VOID:
case TYPE_UNKNOWN:
@ -3519,6 +3531,20 @@ Datatype *TypeFactory::getBase(int4 s,type_metatype m,const string &n)
return findAdd(tmp);
}
/// If a \e core character data-type of the given size exists, it is returned.
/// Otherwise an exception is thrown
/// \param s is the size in bytes of the desired character data-type
Datatype *TypeFactory::getTypeChar(int4 s)
{
if (s < 5) {
Datatype *res = charcache[s];
if (res != (Datatype *)0)
return res;
}
throw LowlevelError("Request for unsupported character data-type");
}
/// Retrieve or create the core "code" Datatype object
/// This has no prototype attached to it and is appropriate for anonymous function pointers.
/// \return the TypeCode object
@ -3983,10 +4009,6 @@ void TypeFactory::encode(Encoder &encoder) const
dependentOrder(deporder); // Put types in correct order
encoder.openElement(ELEM_TYPEGRP);
encoder.writeSignedInteger(ATTRIB_INTSIZE, sizeOfInt);
encoder.writeSignedInteger(ATTRIB_LONGSIZE, sizeOfLong);
encoder.writeSignedInteger(ATTRIB_ENUMSIZE, enumsize);
encoder.writeBool(ATTRIB_ENUMSIGNED, (enumtype==TYPE_INT));
for(iter=deporder.begin();iter!=deporder.end();++iter) {
if ((*iter)->getName().size()==0) continue; // Don't save anonymous types
if ((*iter)->isCoreType()) { // If this would be saved as a coretype
@ -4305,15 +4327,7 @@ void TypeFactory::decode(Decoder &decoder)
{
uint4 elemId = decoder.openElement(ELEM_TYPEGRP);
string metastring;
sizeOfInt = decoder.readSignedInteger(ATTRIB_INTSIZE);
sizeOfLong = decoder.readSignedInteger(ATTRIB_LONGSIZE);
enumsize = decoder.readSignedInteger(ATTRIB_ENUMSIZE);
if (decoder.readBool(ATTRIB_ENUMSIGNED))
enumtype = TYPE_INT;
else
enumtype = TYPE_UINT;
while(decoder.peekElement() != 0)
decodeTypeNoRef(decoder,false);
decoder.closeElement(elemId);
@ -4355,6 +4369,12 @@ void TypeFactory::decodeDataOrganization(Decoder &decoder)
else if (subId == ELEM_POINTER_SIZE) {
sizeOfPointer = decoder.readSignedInteger(ATTRIB_VALUE);
}
else if (subId == ELEM_CHAR_SIZE) {
sizeOfChar = decoder.readSignedInteger(ATTRIB_VALUE);
}
else if (subId == ELEM_WCHAR_SIZE) {
sizeOfWChar = decoder.readSignedInteger(ATTRIB_VALUE);
}
else if (subId == ELEM_SIZE_ALIGNMENT_MAP) {
decodeAlignmentMap(decoder);
}

View File

@ -28,10 +28,10 @@ extern AttributeId ATTRIB_ARRAYSIZE; ///< Marshaling attribute "arraysize"
extern AttributeId ATTRIB_CHAR; ///< Marshaling attribute "char"
extern AttributeId ATTRIB_CORE; ///< Marshaling attribute "core"
extern AttributeId ATTRIB_ENUM; ///< Marshaling attribute "enum"
extern AttributeId ATTRIB_ENUMSIGNED; ///< Marshaling attribute "enumsigned"
extern AttributeId ATTRIB_ENUMSIZE; ///< Marshaling attribute "enumsize"
extern AttributeId ATTRIB_INTSIZE; ///< Marshaling attribute "intsize"
extern AttributeId ATTRIB_LONGSIZE; ///< Marshaling attribute "longsize"
//extern AttributeId ATTRIB_ENUMSIGNED; ///< Marshaling attribute "enumsigned" deprecated
//extern AttributeId ATTRIB_ENUMSIZE; ///< Marshaling attribute "enumsize" deprecated
//extern AttributeId ATTRIB_INTSIZE; ///< Marshaling attribute "intsize" deprecated
//extern AttributeId ATTRIB_LONGSIZE; ///< Marshaling attribute "longsize" deprecated
extern AttributeId ATTRIB_OPAQUESTRING; ///< Marshaling attribute "opaquestring"
extern AttributeId ATTRIB_SIGNED; ///< Marshaling attribute "signed"
extern AttributeId ATTRIB_STRUCTALIGN; ///< Marshaling attribute "structalign"
@ -40,7 +40,7 @@ extern AttributeId ATTRIB_VARLENGTH; ///< Marshaling attribute "varlength"
//extern ElementId ELEM_ABSOLUTE_MAX_ALIGNMENT; ///< Marshaling element \<absolute_max_alignment>
//extern ElementId ELEM_BITFIELD_PACKING; ///< Marshaling element \<bitfield_packing>
//extern ElementId ELEM_CHAR_SIZE; ///< Marshaling element \<char_size>
extern ElementId ELEM_CHAR_SIZE; ///< Marshaling element \<char_size>
//extern ElementId ELEM_CHAR_TYPE; ///< Marshaling element \<char_type>
extern ElementId ELEM_CORETYPES; ///< Marshaling element \<coretypes>
extern ElementId ELEM_DATA_ORGANIZATION; ///< Marshaling element \<data_organization>
@ -66,7 +66,7 @@ extern ElementId ELEM_TYPE; ///< Marshaling element \<type>
extern ElementId ELEM_TYPEGRP; ///< Marshaling element \<typegrp>
extern ElementId ELEM_TYPEREF; ///< Marshaling element \<typeref>
//extern ElementId ELEM_USE_MS_CONVENTION; ///< Marshaling element \<use_MS_convention>
//extern ElementId ELEM_WCHAR_SIZE; ///< Marshaling element \<wchar_size>
extern ElementId ELEM_WCHAR_SIZE; ///< Marshaling element \<wchar_size>
//extern ElementId ELEM_ZERO_LENGTH_BOUNDARY; ///< Marshaling element \<zero_length_boundary>
/// Print a hex dump of a data buffer to stream
@ -555,6 +555,7 @@ class TypePartialStruct : public Datatype {
public:
TypePartialStruct(const TypePartialStruct &op); ///< Construct from another TypePartialStruct
TypePartialStruct(Datatype *contain,int4 off,int4 sz,Datatype *strip); ///< Constructor
int4 getOffset(void) const { return offset; } ///< Get the byte offset into the containing data-type
Datatype *getParent(void) const { return container; } ///< Get the data-type containing \b this piece
virtual void printRaw(ostream &s) const;
virtual Datatype *getSubType(int8 off,int8 *newoff) const;
@ -580,6 +581,7 @@ protected:
public:
TypePartialUnion(const TypePartialUnion &op); ///< Construct from another TypePartialUnion
TypePartialUnion(TypeUnion *contain,int4 off,int4 sz,Datatype *strip); ///< Constructor
int4 getOffset(void) const { return offset; } ///< Get the byte offset into the containing data-type
TypeUnion *getParentUnion(void) const { return container; } ///< Get the union which \b this is part of
virtual void printRaw(ostream &s) const;
virtual const TypeField *findTruncation(int8 off,int4 sz,const PcodeOp *op,int4 slot,int8 &newoff) const;
@ -697,8 +699,10 @@ public:
/// \brief Container class for all Datatype objects in an Architecture
class TypeFactory {
int4 sizeOfInt; ///< Size of the core "int" datatype
int4 sizeOfLong; ///< Size of the core "long" datatype
int4 sizeOfInt; ///< Size of the core "int" data-type
int4 sizeOfLong; ///< Size of the core "long" data-type
int4 sizeOfChar; ///< Size of the core "char" data-type
int4 sizeOfWChar; ///< Size of the core "wchar_t" data-type
int4 sizeOfPointer; ///< Size of pointers (into default data address space)
int4 sizeOfAltPointer; ///< Size of alternate pointers used by architecture (if not 0)
int4 enumsize; ///< Size of an enumerated type
@ -710,6 +714,7 @@ class TypeFactory {
Datatype *typecache10; ///< Specially cached 10-byte float type
Datatype *typecache16; ///< Specially cached 16-byte float type
Datatype *type_nochar; ///< Same dimensions as char but acts and displays as an INT
Datatype *charcache[5]; ///< Cached character data-types
Datatype *findNoName(Datatype &ct); ///< Find data-type (in this container) by function
void insert(Datatype *newtype); ///< Insert pointer into the cross-reference sets
Datatype *findAdd(Datatype &ct); ///< Find data-type in this container or add it
@ -740,6 +745,8 @@ public:
int4 getPrimitiveAlignSize(uint4 size) const; ///< Get the aligned size of a primitive data-type
int4 getSizeOfInt(void) const { return sizeOfInt; } ///< Get the size of the default "int"
int4 getSizeOfLong(void) const { return sizeOfLong; } ///< Get the size of the default "long"
int4 getSizeOfChar(void) const { return sizeOfChar; } ///< Get the size of the default "char"
int4 getSizeOfWChar(void) const { return sizeOfWChar; } ///< Get the size of the default "wchar_t"
int4 getSizeOfPointer(void) const { return sizeOfPointer; } ///< Get the size of pointers
int4 getSizeOfAltPointer(void) const { return sizeOfAltPointer; } ///< Get size of alternate pointers (or 0)
Architecture *getArch(void) const { return glb; } ///< Get the Architecture object
@ -759,6 +766,7 @@ public:
Datatype *getBaseNoChar(int4 s,type_metatype m); ///< Get atomic type excluding "char"
Datatype *getBase(int4 s,type_metatype m); ///< Get atomic type
Datatype *getBase(int4 s,type_metatype m,const string &n); ///< Get named atomic type
Datatype *getTypeChar(int4 s); ///< Get a character data-type by size
TypeCode *getTypeCode(void); ///< Get an "anonymous" function data-type
TypePointer *getTypePointerStripArray(int4 s,Datatype *pt,uint4 ws); ///< Construct a pointer data-type, stripping an ARRAY level
TypePointer *getTypePointer(int4 s,Datatype *pt,uint4 ws); ///< Construct an absolute pointer data-type

View File

@ -767,42 +767,20 @@ string TypeOpCallother::getOperatorName(const PcodeOp *op) const
Datatype *TypeOpCallother::getInputLocal(const PcodeOp *op,int4 slot) const
{
if (!op->doesSpecialPropagation())
return TypeOp::getInputLocal(op,slot);
Architecture *glb = tlst->getArch();
VolatileWriteOp *vw_op = glb->userops.getVolatileWrite(); // Check if this a volatile write op
if ((vw_op->getIndex() == op->getIn(0)->getOffset()) && (slot == 2)) { // And we are requesting slot 2
const Address &addr ( op->getIn(1)->getAddr() ); // Address of volatile memory
int4 size = op->getIn(2)->getSize(); // Size of memory being written
uint4 vflags = 0;
SymbolEntry *entry = glb->symboltab->getGlobalScope()->queryProperties(addr,size,op->getAddr(),vflags);
if (entry != (SymbolEntry *)0) {
Datatype *res = entry->getSizedType(addr,size);
if (res != (Datatype *)0)
return res;
}
}
UserPcodeOp *userOp = tlst->getArch()->userops.getOp(op->getIn(0)->getOffset());
Datatype *res = userOp->getInputLocal(op, slot);
if (res != (Datatype *)0)
return res;
return TypeOp::getInputLocal(op,slot);
}
Datatype *TypeOpCallother::getOutputLocal(const PcodeOp *op) const
{
if (!op->doesSpecialPropagation())
return TypeOp::getOutputLocal(op);
Architecture *glb = tlst->getArch();
VolatileReadOp *vr_op = glb->userops.getVolatileRead(); // Check if this a volatile read op
if (vr_op->getIndex() == op->getIn(0)->getOffset()) {
const Address &addr ( op->getIn(1)->getAddr() ); // Address of volatile memory
int4 size = op->getOut()->getSize(); // Size of memory being written
uint4 vflags = 0;
SymbolEntry *entry = glb->symboltab->getGlobalScope()->queryProperties(addr,size,op->getAddr(),vflags);
if (entry != (SymbolEntry *)0) {
Datatype *res = entry->getSizedType(addr,size);
if (res != (Datatype *)0)
return res;
}
}
UserPcodeOp *userOp = tlst->getArch()->userops.getOp(op->getIn(0)->getOffset());
Datatype *res = userOp->getOutputLocal(op);
if (res != (Datatype *)0)
return res;
return TypeOp::getOutputLocal(op);
}

View File

@ -27,12 +27,61 @@ ElementId ELEM_CONSTRESOLVE = ElementId("constresolve",127);
ElementId ELEM_JUMPASSIST = ElementId("jumpassist",128);
ElementId ELEM_SEGMENTOP = ElementId("segmentop",129);
const uint4 UserPcodeOp::BUILTIN_STRINGDATA = 0x10000000;
const uint4 UserPcodeOp::BUILTIN_VOLATILE_READ = 0x10000001;
const uint4 UserPcodeOp::BUILTIN_VOLATILE_WRITE = 0x10000002;
const uint4 UserPcodeOp::BUILTIN_MEMCPY = 0x10000003;
const uint4 UserPcodeOp::BUILTIN_STRNCPY = 0x10000004;
const uint4 UserPcodeOp::BUILTIN_WCSNCPY = 0x10000005;
int4 UserPcodeOp::extractAnnotationSize(const Varnode *vn,const PcodeOp *op)
{
throw LowlevelError("Unexpected annotation input for CALLOTHER " + name);
}
/// \brief Constructor given specific input/output data-types
///
/// An optional output data-type for the CALLOTHER can be specified and up to 4 input data-types
/// associated with the first 4 inputs to the CALLOTHER (after the userop id in slot 0).
/// \param nm is the name to associate with the user-op
/// \param g is the Architecture owning the new user-op
/// \param ind is the id associated with the user-op
/// \param out is the data-type to associate with the CALLOTHER output (may be null)
/// \param in0 is the first input data-type (may be null)
/// \param in1 is the second input data-type (may be null)
/// \param in2 is the third input data-type (may be null)
/// \param in3 is the fourth input data-type (may be null)
DatatypeUserOp::DatatypeUserOp(const string &nm,Architecture *g,int4 ind,Datatype *out,
Datatype *in0,Datatype *in1,Datatype *in2,Datatype *in3)
: UserPcodeOp(nm,g,datatype,ind)
{
outType = out;
if (in0 != (Datatype *)0)
inTypes.push_back(in0);
if (in1 != (Datatype *)0)
inTypes.push_back(in1);
if (in2 != (Datatype *)0)
inTypes.push_back(in2);
if (in3 != (Datatype *)0)
inTypes.push_back(in3);
}
Datatype *DatatypeUserOp::getOutputLocal(const PcodeOp *op) const
{
return outType;
}
Datatype *DatatypeUserOp::getInputLocal(const PcodeOp *op,int4 slot) const
{
slot -= 1;
if (slot >= 0 && slot < inTypes.size())
return inTypes[slot];
return (Datatype *)0;
}
void InjectedUserOp::decode(Decoder &decoder)
{
@ -76,6 +125,21 @@ string VolatileReadOp::getOperatorName(const PcodeOp *op) const
return appendSize(name,op->getOut()->getSize());
}
Datatype *VolatileReadOp::getOutputLocal(const PcodeOp *op) const
{
if (!op->doesSpecialPropagation())
return (Datatype *)0;
const Address &addr ( op->getIn(1)->getAddr() ); // Address of volatile memory
int4 size = op->getOut()->getSize(); // Size of memory being written
uint4 vflags = 0;
SymbolEntry *entry = glb->symboltab->getGlobalScope()->queryProperties(addr,size,op->getAddr(),vflags);
if (entry != (SymbolEntry *)0) {
return entry->getSizedType(addr,size);
}
return (Datatype *)0;
}
int4 VolatileReadOp::extractAnnotationSize(const Varnode *vn,const PcodeOp *op)
{
@ -92,17 +156,32 @@ string VolatileWriteOp::getOperatorName(const PcodeOp *op) const
return appendSize(name,op->getIn(2)->getSize());
}
Datatype *VolatileWriteOp::getInputLocal(const PcodeOp *op,int4 slot) const
{
if (!op->doesSpecialPropagation() || slot != 2)
return (Datatype *)0;
const Address &addr ( op->getIn(1)->getAddr() ); // Address of volatile memory
int4 size = op->getIn(2)->getSize(); // Size of memory being written
uint4 vflags = 0;
SymbolEntry *entry = glb->symboltab->getGlobalScope()->queryProperties(addr,size,op->getAddr(),vflags);
if (entry != (SymbolEntry *)0) {
return entry->getSizedType(addr,size);
}
return (Datatype *)0;
}
int4 VolatileWriteOp::extractAnnotationSize(const Varnode *vn,const PcodeOp *op)
{
return op->getIn(2)->getSize(); // Get size from the 3rd parameter of write function
}
/// \param g is the owning Architecture for this instance of the segment operation
/// \param nm is the low-level name of the segment operation
/// \param g is the owning Architecture for this instance of the segment operation
/// \param ind is the constant id identifying the specific CALLOTHER variant
SegmentOp::SegmentOp(Architecture *g,const string &nm,int4 ind)
: TermPatternOp(g,nm,ind)
SegmentOp::SegmentOp(const string &nm,Architecture *g,int4 ind)
: TermPatternOp(nm,g,segment,ind)
{
constresolve.space = (AddrSpace *)0;
}
@ -212,7 +291,7 @@ void SegmentOp::decode(Decoder &decoder)
/// \param g is the Architecture owning this set of jump assist scripts
JumpAssistOp::JumpAssistOp(Architecture *g)
: UserPcodeOp(g,"",0)
: UserPcodeOp("",g,jumpassist,0)
{
index2case = -1;
index2addr = -1;
@ -273,11 +352,22 @@ void JumpAssistOp::decode(Decoder &decoder)
useropindex = base->getIndex(); // Get the index from the core userop
}
InternalStringOp::InternalStringOp(Architecture *g)
: UserPcodeOp("stringdata",g,string_data,BUILTIN_STRINGDATA)
{
flags |= display_string;
}
Datatype *InternalStringOp::getOutputLocal(const PcodeOp *op) const
{
return op->getOut()->getType();
}
UserOpManage::UserOpManage(void)
{
vol_read = (VolatileReadOp *)0;
vol_write = (VolatileWriteOp *)0;
glb = (Architecture *)0;
}
UserOpManage::~UserOpManage(void)
@ -290,37 +380,38 @@ UserOpManage::~UserOpManage(void)
if (userop != (UserPcodeOp *)0)
delete userop;
}
map<uint4,UserPcodeOp *>::iterator oiter;
for(oiter=builtinmap.begin();oiter!=builtinmap.end();++oiter) {
delete (*oiter).second;
}
}
/// Every user defined p-code op is initially assigned an UnspecializedPcodeOp description,
/// which may get overridden later.
/// \param glb is the Architecture from which to draw user defined operations
void UserOpManage::initialize(Architecture *glb)
/// \param g is the Architecture from which to draw user defined operations
void UserOpManage::initialize(Architecture *g)
{
glb = g;
vector<string> basicops;
glb->translate->getUserOpNames(basicops);
for(uint4 i=0;i<basicops.size();++i) {
if (basicops[i].size()==0) continue;
UserPcodeOp *userop = new UnspecializedPcodeOp(glb,basicops[i],i);
UserPcodeOp *userop = new UnspecializedPcodeOp(basicops[i],glb,i);
registerOp(userop);
}
}
/// Establish defaults for necessary operators not already defined.
/// Currently this forces volatile read/write operations to exist.
/// \param glb is the owning Architecture
void UserOpManage::setDefaults(Architecture *glb)
{
if (vol_read == (VolatileReadOp *)0) {
VolatileReadOp *volread = new VolatileReadOp(glb,"read_volatile",useroplist.size(), false);
registerOp(volread);
}
if (vol_write == (VolatileWriteOp *)0) {
VolatileWriteOp *volwrite = new VolatileWriteOp(glb,"write_volatile",useroplist.size(), false);
registerOp(volwrite);
}
/// Retrieve a user-op description object by index
/// \param i is the index
/// \return the indicated user-op description
UserPcodeOp *UserOpManage::getOp(uint4 i) const {
if (i<useroplist.size())
return useroplist[i];
map<uint4,UserPcodeOp *>::const_iterator iter = builtinmap.find(i);
if (iter == builtinmap.end())
return (UserPcodeOp *)0;
return ((*iter).second);
}
/// \param nm is the low-level operation name
@ -334,6 +425,64 @@ UserPcodeOp *UserOpManage::getOp(const string &nm) const
return (*iter).second;
}
/// Retrieve a built-in user-op given its id. If user-op record does not already exist,
/// instantiate a default form of the record.
/// \param i is the index associated
/// \return the matching user-op record
UserPcodeOp *UserOpManage::registerBuiltin(uint4 i)
{
map<uint4,UserPcodeOp *>::const_iterator iter = builtinmap.find(i);
if (iter != builtinmap.end())
return (*iter).second;
UserPcodeOp *res;
switch(i) {
case UserPcodeOp::BUILTIN_STRINGDATA:
res = new InternalStringOp(glb);
break;
case UserPcodeOp::BUILTIN_VOLATILE_READ:
res = new VolatileReadOp("read_volatile",glb,false);
break;
case UserPcodeOp::BUILTIN_VOLATILE_WRITE:
res = new VolatileWriteOp("write_volatile",glb,false);
break;
case UserPcodeOp::BUILTIN_MEMCPY:
{
int4 ptrSize = glb->types->getSizeOfPointer();
int4 wordSize = glb->getDefaultDataSpace()->getWordSize();
Datatype *vType = glb->types->getTypeVoid();
Datatype *ptrType = glb->types->getTypePointer(ptrSize,vType,wordSize);
Datatype *intType = glb->types->getBase(4,TYPE_INT);
res = new DatatypeUserOp("builtin_memcpy",glb,UserPcodeOp::BUILTIN_MEMCPY,ptrType,ptrType,ptrType,intType);
break;
}
case UserPcodeOp::BUILTIN_STRNCPY: // Copy "char" elements
{
int4 ptrSize = glb->types->getSizeOfPointer();
int4 wordSize = glb->getDefaultDataSpace()->getWordSize();
Datatype *cType = glb->types->getTypeChar(glb->types->getSizeOfChar());
Datatype *ptrType = glb->types->getTypePointer(ptrSize,cType,wordSize);
Datatype *intType = glb->types->getBase(4,TYPE_INT);
res = new DatatypeUserOp("builtin_strncpy",glb,UserPcodeOp::BUILTIN_STRNCPY,ptrType,ptrType,ptrType,intType);
break;
}
case UserPcodeOp::BUILTIN_WCSNCPY: // Copy "wchar_t" elements
{
int4 ptrSize = glb->types->getSizeOfPointer();
int4 wordSize = glb->getDefaultDataSpace()->getWordSize();
Datatype *cType = glb->types->getTypeChar(glb->types->getSizeOfWChar());
Datatype *ptrType = glb->types->getTypePointer(ptrSize,cType,wordSize);
Datatype *intType = glb->types->getBase(4,TYPE_INT);
res = new DatatypeUserOp("builtin_wcsncpy",glb,UserPcodeOp::BUILTIN_WCSNCPY,ptrType,ptrType,ptrType,intType);
break;
}
default:
throw LowlevelError("Bad built-in userop id");
}
builtinmap[i] = res;
return res;
}
/// Add the description to the mapping by index and the mapping by name. Make same basic
/// sanity checks for conflicting values and duplicate operations and throw an
/// exception if there's a problem.
@ -375,19 +524,6 @@ void UserOpManage::registerOp(UserPcodeOp *op)
segmentop[index] = s_op;
return;
}
VolatileReadOp *tmpVolRead = dynamic_cast<VolatileReadOp *>(op);
if (tmpVolRead != (VolatileReadOp *)0) {
if (vol_read != (VolatileReadOp *)0)
throw LowlevelError("Multiple volatile reads registered");
vol_read = tmpVolRead;
return;
}
VolatileWriteOp *tmpVolWrite = dynamic_cast<VolatileWriteOp *>(op);
if (tmpVolWrite != (VolatileWriteOp *)0) {
if (vol_write != (VolatileWriteOp *)0)
throw LowlevelError("Multiple volatile writes registered");
vol_write = tmpVolWrite;
}
}
/// Create a SegmentOp description object based on the element and
@ -398,7 +534,7 @@ void UserOpManage::decodeSegmentOp(Decoder &decoder,Architecture *glb)
{
SegmentOp *s_op;
s_op = new SegmentOp(glb,"",useroplist.size());
s_op = new SegmentOp("",glb,useroplist.size());
try {
s_op->decode(decoder);
registerOp(s_op);
@ -435,20 +571,15 @@ void UserOpManage::decodeVolatile(Decoder &decoder,Architecture *glb)
}
if (readOpName.size() == 0 || writeOpName.size() == 0)
throw LowlevelError("Missing inputop/outputop attributes in <volatile> element");
VolatileReadOp *vr_op = new VolatileReadOp(glb,readOpName,useroplist.size(),functionalDisplay);
try {
registerOp(vr_op);
} catch(LowlevelError &err) {
delete vr_op;
throw err;
}
VolatileWriteOp *vw_op = new VolatileWriteOp(glb,writeOpName,useroplist.size(),functionalDisplay);
try {
registerOp(vw_op);
} catch(LowlevelError &err) {
delete vw_op;
throw err;
}
map<uint4,UserPcodeOp *>::const_iterator iter;
if (builtinmap.find(UserPcodeOp::BUILTIN_VOLATILE_READ) != builtinmap.end())
throw LowlevelError("read_volatile user-op registered more than once");
if (builtinmap.find(UserPcodeOp::BUILTIN_VOLATILE_WRITE) != builtinmap.end())
throw LowlevelError("write_volatile user-op registered more than once");
VolatileReadOp *vr_op = new VolatileReadOp(readOpName,glb,functionalDisplay);
builtinmap[UserPcodeOp::BUILTIN_VOLATILE_READ] = vr_op;
VolatileWriteOp *vw_op = new VolatileWriteOp(writeOpName,glb,functionalDisplay);
builtinmap[UserPcodeOp::BUILTIN_VOLATILE_WRITE] = vw_op;
}
/// Create an InjectedUserOp description object based on the element
@ -458,7 +589,7 @@ void UserOpManage::decodeVolatile(Decoder &decoder,Architecture *glb)
void UserOpManage::decodeCallOtherFixup(Decoder &decoder,Architecture *glb)
{
InjectedUserOp *op = new InjectedUserOp(glb,"",0,0);
InjectedUserOp *op = new InjectedUserOp("",glb,0,0);
try {
op->decode(decoder);
registerOp(op);
@ -505,7 +636,7 @@ void UserOpManage::manualCallOtherFixup(const string &useropname,const string &o
throw LowlevelError("Cannot fixup userop: "+useropname);
int4 injectid = glb->pcodeinjectlib->manualCallOtherFixup(useropname,outname,inname,snippet);
InjectedUserOp *op = new InjectedUserOp(glb,useropname,userop->getIndex(),injectid);
InjectedUserOp *op = new InjectedUserOp(useropname,glb,userop->getIndex(),injectid);
try {
registerOp(op);
} catch(LowlevelError &err) {

View File

@ -49,19 +49,40 @@ public:
/// \brief Enumeration of different boolean properties that can be assigned to a CALLOTHER
enum userop_flags {
annotation_assignment = 1, ///< Displayed as assignment, `in1 = in2`, where the first parameter is an annotation
no_operator = 2 ///< Don't emit special token, just emit the first input parameter as expression
no_operator = 2, ///< Don't emit special token, just emit the first input parameter as expression
display_string = 4 ///< Emit as a string constant
};
/// \brief User-op class encoded as an enum
enum userop_type {
unspecialized = 1, ///< Encoding for UnspecializedPcodeOp
injected = 2, ///< InjectedUserOp
volatile_read = 3, ///< VolatileReadOp
volatile_write = 4, ///< VolatileWriteOp
segment = 5, ///< SegmentOp
jumpassist = 6, ///< JumpAssistOp
string_data = 7, ///< InternalStringOp
datatype = 8 ///< DatatypeUserOp
};
static const uint4 BUILTIN_STRINGDATA; ///< Built-in id for the InternalStringOp
static const uint4 BUILTIN_VOLATILE_READ; ///< Built-in id for VolatileReadOp
static const uint4 BUILTIN_VOLATILE_WRITE; ///< Built-in id for VolatileWriteOp
static const uint4 BUILTIN_MEMCPY; ///< Built-in id for memcpy
static const uint4 BUILTIN_STRNCPY; ///< Built-in id for strcpy
static const uint4 BUILTIN_WCSNCPY; ///< Built-in id for wcsncpy
protected:
string name; ///< Low-level name of p-code operator
int4 useropindex; ///< Index passed in the CALLOTHER op
Architecture *glb; ///< Architecture owning the user defined op
uint4 type; ///< Encoded class type (userop_type)
int4 useropindex; ///< Index passed in the CALLOTHER op
uint4 flags; ///< Boolean attributes of the CALLOTHER
public:
UserPcodeOp(Architecture *g,const string &nm,int4 ind) {
name = nm; useropindex = ind; glb = g; flags = 0; } ///< Construct from name and index
UserPcodeOp(const string &nm,Architecture *g,uint4 tp,int4 ind) {
name = nm; glb = g; type = tp; useropindex = ind; flags = 0; } ///< Construct from name and index
const string &getName(void) const { return name; } ///< Get the low-level name of the p-code op
uint4 getType(void) const { return type; } ///< Get the encoded class type
int4 getIndex(void) const { return useropindex; } ///< Get the constant id of the op
uint4 getDisplay(void) const { return (flags & (annotation_assignment | no_operator)); } ///< Get display type (0=functional)
uint4 getDisplay(void) const {
return (flags & (annotation_assignment | no_operator | display_string)); } ///< Get display type (0=functional)
virtual ~UserPcodeOp(void) {} ///< Destructor
/// \brief Get the symbol representing this operation in decompiled code
@ -73,6 +94,19 @@ public:
virtual string getOperatorName(const PcodeOp *op) const {
return name; }
/// \brief Return the output data-type of the user-op if specified
///
/// \param op is the instantiation of the user-op
/// \return the data-type or null to indicate the data-type is unspecified
virtual Datatype *getOutputLocal(const PcodeOp *op) const { return (Datatype *)0; }
/// \brief Return the input data-type to the user-op in the given slot
///
/// \param op if the instantiation of the user-op
/// \param slot is the given input slot
/// \return the data-type or null to indicate the data-type is unspecified
virtual Datatype *getInputLocal(const PcodeOp *op,int4 slot) const { return (Datatype *)0; }
/// \brief Assign a size to an annotation input to \b this userop
///
/// Assuming an annotation refers to a special symbol accessed by \b this operation, retrieve the
@ -95,8 +129,23 @@ public:
/// but still has an unknown effect.
class UnspecializedPcodeOp : public UserPcodeOp {
public:
UnspecializedPcodeOp(Architecture *g,const string &nm,int4 ind)
: UserPcodeOp(g,nm,ind) {} ///< Constructor
UnspecializedPcodeOp(const string &nm,Architecture *g,int4 ind)
: UserPcodeOp(nm,g,unspecialized,ind) {} ///< Constructor
virtual void decode(Decoder &decoder) {}
};
/// \brief Generic user defined operation that provides input/output data-types
///
/// The CALLOTHER acts a source of data-type information within data-flow.
class DatatypeUserOp : public UserPcodeOp {
Datatype *outType; ///< Data-type of the output
vector<Datatype *> inTypes; ///< Data-type of the input(s)
public:
DatatypeUserOp(const string &nm,Architecture *g,int4 ind,Datatype *out,
Datatype *in0=(Datatype *)0,Datatype *in1=(Datatype *)0,
Datatype *in2=(Datatype *)0,Datatype *in3=(Datatype *)0);
virtual Datatype *getOutputLocal(const PcodeOp *op) const;
virtual Datatype *getInputLocal(const PcodeOp *op,int4 slot) const;
virtual void decode(Decoder &decoder) {}
};
@ -109,8 +158,8 @@ public:
class InjectedUserOp : public UserPcodeOp {
uint4 injectid; ///< The id of the injection object (to which this op maps)
public:
InjectedUserOp(Architecture *g,const string &nm,int4 ind,int4 injid)
: UserPcodeOp(g,nm,ind) { injectid = injid; } ///< Constructor
InjectedUserOp(const string &nm,Architecture *g,int4 ind,int4 injid)
: UserPcodeOp(nm,g,injected,ind) { injectid = injid; } ///< Constructor
uint4 getInjectId(void) const { return injectid; } ///< Get the id of the injection object
virtual void decode(Decoder &decoder);
};
@ -126,8 +175,8 @@ class VolatileOp : public UserPcodeOp {
protected:
static string appendSize(const string &base,int4 size); ///< Append a suffix to a string encoding a specific size
public:
VolatileOp(Architecture *g,const string &nm,int4 ind)
: UserPcodeOp(g,nm,ind) { } ///< Constructor
VolatileOp(const string &nm,Architecture *g,uint4 tp,int4 ind)
: UserPcodeOp(nm,g,tp,ind) { } ///< Constructor
virtual void decode(Decoder &decoder) {} ///< Currently volatile ops only need their name
};
@ -138,9 +187,10 @@ public:
/// is the actual value read from memory.
class VolatileReadOp : public VolatileOp {
public:
VolatileReadOp(Architecture *g,const string &nm,int4 ind,bool functional)
: VolatileOp(g,nm,ind) { flags = functional ? 0 : no_operator; } ///< Constructor
VolatileReadOp(const string &nm,Architecture *g,bool functional)
: VolatileOp(nm,g,volatile_read,BUILTIN_VOLATILE_READ) { flags = functional ? 0 : no_operator; } ///< Constructor
virtual string getOperatorName(const PcodeOp *op) const;
virtual Datatype *getOutputLocal(const PcodeOp *op) const;
virtual int4 extractAnnotationSize(const Varnode *vn,const PcodeOp *op);
};
@ -152,9 +202,10 @@ public:
/// - The Varnode value being written to the memory
class VolatileWriteOp : public VolatileOp {
public:
VolatileWriteOp(Architecture *g,const string &nm,int4 ind,bool functional)
: VolatileOp(g,nm,ind) { flags = functional ? 0 : annotation_assignment; } ///< Constructor
VolatileWriteOp(const string &nm,Architecture *g,bool functional)
: VolatileOp(nm,g,volatile_write,BUILTIN_VOLATILE_WRITE) { flags = functional ? 0 : annotation_assignment; } ///< Constructor
virtual string getOperatorName(const PcodeOp *op) const;
virtual Datatype *getInputLocal(const PcodeOp *op,int4 slot) const;
virtual int4 extractAnnotationSize(const Varnode *vn,const PcodeOp *op);
};
@ -170,7 +221,7 @@ public:
/// constant inputs (matching the format determined by unify()).
class TermPatternOp : public UserPcodeOp {
public:
TermPatternOp(Architecture *g,const string &nm,int4 ind) : UserPcodeOp(g,nm,ind) {} ///< Constructor
TermPatternOp(const string &nm,Architecture *g,uint4 tp,int4 ind) : UserPcodeOp(nm,g,tp,ind) {} ///< Constructor
virtual int4 getNumVariableTerms(void) const=0; ///< Get the number of input Varnodes expected
/// \brief Gather the formal input Varnode objects given the root PcodeOp
@ -218,7 +269,7 @@ class SegmentOp : public TermPatternOp {
bool supportsfarpointer; ///< Is \b true if the joined pair base:near acts as a \b far pointer
VarnodeData constresolve; ///< How to resolve constant near pointers
public:
SegmentOp(Architecture *g,const string &nm,int4 ind); ///< Constructor
SegmentOp(const string &nm,Architecture *g,int4 ind); ///< Constructor
AddrSpace *getSpace(void) const { return spc; } ///< Get the address space being pointed to
bool hasFarPointerSupport(void) const { return supportsfarpointer; } ///< Return \b true, if \b this op supports far pointers
int4 getBaseSize(void) const { return baseinsize; } ///< Get size in bytes of the base/segment value
@ -254,6 +305,17 @@ public:
virtual void decode(Decoder &decoder);
};
/// \brief An op that displays as an internal string
///
/// The user op takes no input parameters. In the decompiler output, it displays as a quoted string. The
/// string is associated with the address assigned to the user op and is pulled from StringManager as \e internal.
class InternalStringOp : public UserPcodeOp {
public:
InternalStringOp(Architecture *g); ///< Constructor
virtual Datatype *getOutputLocal(const PcodeOp *op) const;
virtual void decode(Decoder &decoder) {}
};
/// \brief Manager/container for description objects (UserPcodeOp) of user defined p-code ops
///
/// The description objects are referenced by the CALLOTHER constant id, (or by name during initialization).
@ -262,28 +324,22 @@ public:
/// may reassign a more specialized description object by parsing specific tags using
/// on of \b this class's parse* methods.
class UserOpManage {
Architecture *glb; ///< Architecture this manager is associated with
vector<UserPcodeOp *> useroplist; ///< Description objects indexed by CALLOTHER constant id
map<uint4,UserPcodeOp *> builtinmap; ///< Map from builtin ids to description objects
map<string,UserPcodeOp *> useropmap; ///< A map from the name of the user defined operation to a description object
vector<SegmentOp *> segmentop; ///< Segment operations supported by this Architecture
VolatileReadOp *vol_read; ///< (Single) volatile read operation
VolatileWriteOp *vol_write; ///< (Single) volatile write operation
void registerOp(UserPcodeOp *op); ///< Insert a new UserPcodeOp description object in the map(s)
public:
UserOpManage(void); ///< Construct an empty manager
~UserOpManage(void); ///< Destructor
void initialize(Architecture *glb); ///< Initialize description objects for all user defined ops
void setDefaults(Architecture *glb); ///< Create any required operations if they weren't explicitly defined
void initialize(Architecture *g); ///< Initialize description objects for all user defined ops
int4 numSegmentOps(void) const { return segmentop.size(); } ///< Number of segment operations supported
/// Retrieve a user-op description object by index
/// \param i is the index
/// \return the indicated user-op description
UserPcodeOp *getOp(int4 i) const {
if (i>=useroplist.size()) return (UserPcodeOp *)0;
return useroplist[i];
}
UserPcodeOp *getOp(uint4 i) const; ///< Retrieve a user-op description object by index
UserPcodeOp *getOp(const string &nm) const; ///< Retrieve description by name
UserPcodeOp *getOp(const string &nm) const; ///< Retrieve description by name
UserPcodeOp *registerBuiltin(uint4 i); ///< Make sure an active record exists for the given built-in op
/// Retrieve a segment-op description object by index
/// \param i is the index
@ -293,8 +349,6 @@ public:
return segmentop[i];
}
VolatileReadOp *getVolatileRead(void) const { return vol_read; } ///< Get (the) volatile read description
VolatileWriteOp *getVolatileWrite(void) const { return vol_write; } ///< Get (the) volatile write description
void decodeSegmentOp(Decoder &decoder,Architecture *glb); ///< Parse a \<segmentop> element
void decodeVolatile(Decoder &decoder,Architecture *glb); ///< Parse a \<volatile> element
void decodeCallOtherFixup(Decoder &decoder,Architecture *glb); ///< Parse a \<callotherfixup> element

View File

@ -533,16 +533,25 @@ SymbolEntry *HighVariable::getSymbolEntry(void) const
return (SymbolEntry *)0;
}
/// The data-type its dirtying mechanism is disabled. The data-type will not change, unless
/// this method is called again.
/// \param tp is the data-type to set
void HighVariable::finalizeDatatype(Datatype *tp)
/// If there is an associated Symbol, its data-type (or the appropriate piece) is assigned
/// to \b this. The dirtying mechanism is disabled so that data-type cannot change.
/// \param typeFactory is the factory used to construct any required piece
void HighVariable::finalizeDatatype(TypeFactory *typeFactory)
{
if (symbol == (Symbol *)0) return;
Datatype *cur = symbol->getType();
int4 off = symboloffset;
if (off < 0)
off = 0;
int4 sz = inst[0]->getSize();
Datatype *tp = typeFactory->getExactPiece(cur, off, sz);
if (tp == (Datatype *)0 || tp->getMetatype() == TYPE_UNKNOWN)
return;
type = tp;
if (type->hasStripped()) {
if (type->getMetatype() == TYPE_PARTIALUNION) {
if (symbol != (Symbol *)0 && symboloffset != -1) {
if (symboloffset != -1) {
type_metatype meta = symbol->getType()->getMetatype();
if (meta != TYPE_STRUCT && meta != TYPE_UNION) // If partial union does not have a bigger backing symbol
type = type->getStripped(); // strip the partial union

View File

@ -177,7 +177,7 @@ public:
int4 getSymbolOffset(void) const { return symboloffset; } ///< Get the Symbol offset associated with \b this
int4 numInstances(void) const { return inst.size(); } ///< Get the number of member Varnodes \b this has
Varnode *getInstance(int4 i) const { return inst[i]; } ///< Get the i-th member Varnode
void finalizeDatatype(Datatype *tp); ///< Set a final datatype for \b this variable
void finalizeDatatype(TypeFactory *typeFactory); ///< Set a final data-type matching the associated Symbol
void groupWith(int4 off,HighVariable *hi2); ///< Put \b this and another HighVariable in the same intersection group
void establishGroupSymbolOffset(void); ///< Transfer \b symbol offset of \b this to the VariableGroup

View File

@ -23,6 +23,35 @@ AttributeId ATTRIB_MAIN = AttributeId("main",134);
ElementId ELEM_LOCALDB = ElementId("localdb",228);
/// This is assumed to be \e open. If \b this is a primitive integer or float, and if the other range
/// is just a constant being COPYed, return \b true, even if the constant is bigger.
/// \param b is the other range to test for absorption
/// \return \b true if the other range can be absorbed as a constant
bool RangeHint::isConstAbsorbable(const RangeHint *b) const
{
if ((b->flags & copy_constant) == 0)
return false;
if (b->isTypeLock())
return false;
if (b->size < size)
return false;
type_metatype meta = type->getMetatype();
if (meta != TYPE_INT && meta != TYPE_UINT && meta != TYPE_BOOL && meta != TYPE_FLOAT)
return false;
type_metatype bMeta = b->type->getMetatype();
if (bMeta != TYPE_UNKNOWN && bMeta != TYPE_INT && bMeta != TYPE_UINT)
return false;
intb end = sstart;
if (highind > 0)
end += highind * type->getAlignSize();
else
end += size;
if (b->sstart > end)
return false;
return true;
}
/// \brief Can the given intersecting RangeHint coexist with \b this at their given offsets
///
/// Determine if the data-type information in the two ranges \e line \e up
@ -44,21 +73,26 @@ bool RangeHint::reconcile(const RangeHint *b) const
mod += a->type->getAlignSize();
Datatype *sub = a->type;
while((sub!=(Datatype *)0)&&(sub->getAlignSize() > b->type->getAlignSize()))
while((sub!=(Datatype *)0)&&(sub->getAlignSize() > b->type->getAlignSize())) {
sub = sub->getSubType(mod,&mod);
}
if (sub == (Datatype *)0) return false;
if (mod != 0) return false;
if (sub->getAlignSize() == b->type->getAlignSize()) return true;
if ((b->flags & Varnode::typelock)!=0) return false;
// If we reach here, component sizes do not match
// Check for data-types we want to protect more
if (sub != (Datatype *)0) {
if (sub->getAlignSize() == b->type->getAlignSize()) return true;
// If we reach here, b overlaps multiple components of a
}
// If we reach here, component sizes do not match. Check for data-types we want to protect more
if (b->rangeType == RangeType::open && b->isConstAbsorbable(a))
return true;
if (b->isTypeLock()) return false;
type_metatype meta = a->type->getMetatype();
if (meta != TYPE_STRUCT && meta != TYPE_UNION) {
if (meta != TYPE_ARRAY || ((TypeArray *)(a->type))->getBase()->getMetatype() == TYPE_UNKNOWN)
if (meta != TYPE_ARRAY || ((TypeArray *)(a->type))->getBase()->getMetatype() != TYPE_UNKNOWN)
return false;
}
// For structures, unions, and arrays, test if b looks like a partial data-type
// For structures, unions, and arrays, test if b looks like a partial/combined data-type
meta = b->type->getMetatype();
if (meta == TYPE_UNKNOWN || meta == TYPE_INT || meta == TYPE_UINT) {
return true;
@ -95,19 +129,29 @@ bool RangeHint::preferred(const RangeHint *b,bool reconcile) const
if (start != b->start)
return true; // Something must occupy a->start to b->start
// Prefer the locked type
if ((b->flags & Varnode::typelock)!=0) {
if ((flags & Varnode::typelock)==0)
if (b->isTypeLock()) {
if (!isTypeLock())
return false;
}
else if ((flags & Varnode::typelock)!=0)
else if (isTypeLock())
return true;
if (!reconcile) { // If the ranges don't reconcile
if (rangeType == open && b->rangeType != open) // Throw out the open range
return false;
if (b->rangeType == open && rangeType != open)
if (rangeType == open && b->rangeType != open) {
if (!reconcile)
return false; // Throw out open range
if (isConstAbsorbable(b))
return true;
}
else if (b->rangeType == open && rangeType != open) {
if (!reconcile)
return true; // Throw out open range
if (b->isConstAbsorbable(this))
return false;
}
else if (rangeType == fixed && b->rangeType == fixed) {
if (size != b->size && !reconcile)
return (size > b->size);
}
return (0>type->typeOrder(*b->type)); // Prefer the more specific
}
@ -127,8 +171,12 @@ bool RangeHint::attemptJoin(RangeHint *b)
{
if (rangeType != open) return false;
if (highind < 0) return false;
if (b->rangeType == endpoint) return false; // Don't merge with bounding range
if (isConstAbsorbable(b)) {
absorb(b);
return true;
}
if (highind < 0) return false;
Datatype *settype = type; // Assume we will keep this data-type
if (settype->getAlignSize() != b->type->getAlignSize()) return false;
if (settype != b->type) {
@ -151,9 +199,8 @@ bool RangeHint::attemptJoin(RangeHint *b)
else if (aTestType != bTestType) // If they are both not unknown, they must be the same
return false;
}
if ((flags & Varnode::typelock)!=0) return false;
if ((b->flags & Varnode::typelock)!=0) return false;
if (flags != b->flags) return false;
if (isTypeLock()) return false;
if (b->isTypeLock()) return false;
intb diffsz = b->sstart - sstart;
if ((diffsz % settype->getAlignSize()) != 0) return false;
diffsz /= settype->getAlignSize();
@ -170,16 +217,34 @@ bool RangeHint::attemptJoin(RangeHint *b)
void RangeHint::absorb(RangeHint *b)
{
if (b->rangeType == open && type->getAlignSize() == b->type->getAlignSize()) {
rangeType = open;
if (0 <= b->highind) { // If b has array indexing
intb diffsz = b->sstart - sstart;
diffsz /= type->getAlignSize();
int4 trialhi = b->highind + diffsz;
if (b->rangeType == open) {
if (type->getAlignSize() == b->type->getAlignSize()) { // Compatible element data-type
rangeType = open;
if (0 <= b->highind) { // If b has array indexing
intb diffsz = b->sstart - sstart;
diffsz /= type->getAlignSize();
int4 trialhi = b->highind + diffsz;
if (highind < trialhi)
highind = trialhi;
}
}
else if (start == b->start) {
type_metatype meta = type->getMetatype();
if (meta != TYPE_STRUCT && meta != TYPE_UNION)
rangeType = open;
}
}
else if ((b->flags & copy_constant)!=0 && rangeType == open) {
intb diffsz = b->sstart - sstart + b->size;
if (diffsz > size) {
int4 trialhi = diffsz / type->getAlignSize();
if (highind < trialhi)
highind = trialhi;
}
}
if ((flags & copy_constant)!=0 && (b->flags & copy_constant)==0) {
flags ^= copy_constant;
}
}
/// Given that \b this and the other RangeHint intersect, redefine \b this so that it
@ -206,12 +271,12 @@ bool RangeHint::merge(RangeHint *b,AddrSpace *space,TypeFactory *typeFactory)
}
else {
didReconcile = false;
resType = ((flags & Varnode::typelock) != 0) ? 0 : 2;
resType = isTypeLock() ? 0 : 2;
}
// Check for really problematic cases
if (!didReconcile) {
if ((flags & Varnode::typelock)!=0) {
if ((b->flags & Varnode::typelock)!=0)
if (isTypeLock()) {
if (b->isTypeLock())
throw LowlevelError("Overlapping forced variable types : " + type->getName() + " " + b->type->getName());
if (start != b->start)
return false; // Discard b entirely
@ -219,8 +284,7 @@ bool RangeHint::merge(RangeHint *b,AddrSpace *space,TypeFactory *typeFactory)
}
if (resType == 0) {
if (didReconcile)
absorb(b);
absorb(b);
}
else if (resType == 1) {
RangeHint copyRange = *this;
@ -250,7 +314,7 @@ bool RangeHint::merge(RangeHint *b,AddrSpace *space,TypeFactory *typeFactory)
return false;
}
/// Compare (signed) offset, size, RangeType, type lock, and high index, in that order.
/// Compare (signed) offset, size, RangeType, flags, and high index, in that order.
/// Datatype is \e not compared.
/// \param op2 is the other RangeHint to compare with \b this
/// \return -1, 0, or 1 depending on if \b this comes before, is equal to, or comes after
@ -263,10 +327,8 @@ int4 RangeHint::compare(const RangeHint &op2) const
return (size < op2.size) ? -1 : 1; // Small sizes come first
if (rangeType != op2.rangeType)
return (rangeType < op2.rangeType) ? -1 : 1;
uint4 thisLock = flags & Varnode::typelock;
uint4 op2Lock = op2.flags & Varnode::typelock;
if (thisLock != op2Lock)
return (thisLock < op2Lock) ? -1 : 1;
if (flags != op2.flags)
return (flags < op2.flags) ? -1 : 1;
if (highind != op2.highind)
return (highind < op2.highind) ? -1 : 1;
return 0;
@ -284,6 +346,7 @@ ScopeLocal::ScopeLocal(uint8 id,AddrSpace *spc,Funcdata *fd,Architecture *g) : S
maxParamOffset = 0;
rangeLocked = false;
stackGrowsNegative = true;
overlapProblems = false;
restrictScope(fd);
}
@ -522,7 +585,7 @@ bool ScopeLocal::adjustFit(RangeHint &a) const
{
if (a.size==0) return false; // Nothing to fit
if ((a.flags & Varnode::typelock)!=0) return false; // Already entered
if (a.isTypeLock()) return false; // Already entered
Address addr(space,a.start);
uintb maxsize = getRangeTree().longestFit(addr,a.size);
if (maxsize==0) return false;
@ -773,11 +836,14 @@ uintb AliasChecker::gatherOffset(Varnode *vn)
case CPUI_PTRADD:
othervn = def->getIn(2);
retval = gatherOffset(def->getIn(0));
// We need to treat PTRADD exactly as if it were encoded as an ADD and MULT
// Because a plain MULT truncates the ADD tree
// We only follow getIn(1) if the PTRADD multiply is by 1
if (othervn->isConstant() && (othervn->getOffset()==1))
retval = retval + gatherOffset(def->getIn(1));
if (def->getIn(1)->isConstant())
retval = retval + def->getIn(1)->getOffset() * othervn->getOffset();
else if (othervn->getOffset()==1) {
// We need to treat PTRADD exactly as if it were encoded as an ADD and MULT
// Because a plain MULT truncates the ADD tree
// We only follow getIn(1) if the PTRADD multiply is by 1
retval = retval + gatherOffset(def->getIn(1));
}
break;
case CPUI_SEGMENTOP:
retval = gatherOffset(def->getIn(2));
@ -849,6 +915,43 @@ void MapState::addRange(uintb st,Datatype *ct,uint4 fl,RangeHint::RangeType rt,i
#endif
}
/// If the data-type is an array, partial struct, or partial union, the reference may be added as \e open.
/// \param start is the starting offset of the range
/// \param ct is the data-type
/// \param flags indicates any boolean properties applied to the range
/// \param types is the TypeFactory used to construct unknown data-types
void MapState::addFixedType(uintb start,Datatype *ct,uint4 flags,TypeFactory *types)
{
if (ct->getMetatype() == TYPE_PARTIALSTRUCT) {
TypePartialStruct *tps = (TypePartialStruct *)ct;
ct = tps->getParent();
if (ct->getMetatype() == TYPE_STRUCT && tps->getOffset() == 0) { // If initial fields of TYPE_STRUCT are moved here
addRange(start,ct,0,RangeHint::open,-1); // Treat as an open reference
}
else if (ct->getMetatype() == TYPE_ARRAY) { // If elements of an array are moved here
ct = ((TypeArray *)ct)->getBase();
if (ct->getMetatype() != TYPE_UNKNOWN)
addRange(start,ct,0,RangeHint::open,-1); // Treat as an open reference
}
// If the Varnode is a constant COPY, generate a fixed reference as well
if (flags != 0) {
ct = types->getBase(tps->getSize(), TYPE_UNKNOWN);
addRange(start,ct,flags,RangeHint::fixed,-1);
}
}
else if (ct->getMetatype() == TYPE_PARTIALUNION) {
TypePartialUnion *tpu = (TypePartialUnion *)ct;
if (tpu->getOffset() == 0) { // If the initial fields of TYPE_UNION are moved here
ct = tpu->getParentUnion();
addRange(start,ct,0,RangeHint::open,-1); // Treat as an open reference
}
}
else {
addRange(start,ct,flags,RangeHint::fixed,-1);
}
}
/// Assuming a sorted list, from among a sequence of RangeHints with the same start and size, select
/// the most specific data-type. Set all elements to use this data-type, and eliminate duplicates.
void MapState::reconcileDatatypes(void)
@ -863,7 +966,7 @@ void MapState::reconcileDatatypes(void)
int4 curPos = 1;
while(curPos < maplist.size()) {
RangeHint *curHint = maplist[curPos++];
if (curHint->start == startHint->start && curHint->size == startHint->size) {
if (curHint->start == startHint->start && curHint->size == startHint->size && curHint->flags == startHint->flags) {
Datatype *curDatatype = curHint->type;
if (curDatatype->typeOrder(*startDatatype) < 0) // Take the most specific variant of data-type
startDatatype = curDatatype;
@ -947,7 +1050,8 @@ void MapState::gatherSymbols(const EntryMap *rangemap)
// if ((*iter).isPiece()) continue; // This should probably never happen
uintb start = (*riter).getAddr().getOffset();
Datatype *ct = sym->getType();
addRange(start,ct,sym->getFlags(),RangeHint::fixed,-1);
uint4 flags = sym->isTypeLocked() ? RangeHint::typelock : 0;
addRange(start,ct,flags,RangeHint::fixed,-1);
}
}
@ -974,6 +1078,42 @@ bool MapState::initialize(void)
return true;
}
/// Filter out INDIRECT, MULTIEQUAL, and PIECE operations that are just copying between the same storage location.
/// If there is another operation reading the Varnode, return \b true, otherwise return \b false.
/// \param vn is the given Varnode to test
/// \return \b true if there is an active operation reading the Varnode
bool MapState::isReadActive(Varnode *vn)
{
list<PcodeOp *>::const_iterator iter;
for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) {
PcodeOp *op = *iter;
if (op->isMarker()) {
if (vn->getAddr() != op->getOut()->getAddr())
return true;
}
else {
OpCode opc = op->code();
if (opc == CPUI_PIECE) {
Address addr = op->getOut()->getAddr();
int4 slot = addr.isBigEndian() ? 0 : 1;
if (op->getIn(slot) != vn) {
addr = addr + op->getIn(slot)->getSize();
}
if (vn->getAddr() != addr)
return true;
}
else if (opc == CPUI_SUBPIECE) {
// Any data-type information comes from the output Varnode, so we ignore input
}
else {
return true;
}
}
}
return false;
}
/// Add a RangeHint corresponding to each Varnode stored in the address space
/// for the given function. The current knowledge of the Varnode's data-type
/// is included as part of the hint.
@ -983,53 +1123,85 @@ void MapState::gatherVarnodes(const Funcdata &fd)
{
VarnodeLocSet::const_iterator riter,iterend;
Varnode *vn;
TypeFactory *types = fd.getArch()->types;
riter = fd.beginLoc(spaceid);
iterend = fd.endLoc(spaceid);
while(riter != iterend) {
vn = *riter++;
if (vn->isFree()) continue;
uintb start = vn->getOffset();
Datatype *ct = vn->getType();
// Assume parents are present so partials aren't needed
if (ct->getMetatype() == TYPE_PARTIALSTRUCT) continue;
if (ct->getMetatype() == TYPE_PARTIALUNION) continue;
// Do not force Varnode flags on the entry
// as the flags were inherited from the previous
// (now obsolete) entry
addRange(start,ct,0,RangeHint::fixed,-1);
// Do not force Varnode flags on the entry
// as the flags were inherited from the previous
// (now obsolete) entry
if (!vn->isWritten()) {
if (isReadActive(vn))
addFixedType(vn->getOffset(), vn->getType(), 0, types);
continue;
}
PcodeOp *op = vn->getDef();
switch(op->code()) {
case CPUI_INDIRECT:
{
Varnode *invn = op->getIn(0);
if (vn->getAddr() != invn->getAddr() || isReadActive(vn)) {
addFixedType(vn->getOffset(), vn->getType(), 0, types);
}
break;
}
case CPUI_MULTIEQUAL:
{
int4 i;
for(i=0;i<op->numInput();++i) {
Varnode *invn = op->getIn(i);
if (vn->getAddr() != invn->getAddr())
break;
}
if (i != op->numInput() || isReadActive(vn))
addFixedType(vn->getOffset(), vn->getType(), 0, types);
break;
}
case CPUI_PIECE:
{
// Treat PIECE as two COPYs
Address addr = vn->getAddr();
int4 slot = addr.isBigEndian() ? 0 : 1;
Varnode *inFirst = op->getIn(slot);
if (inFirst->getAddr() != addr)
addFixedType(addr.getOffset(),inFirst->getType(), 0, types);
addr = addr + inFirst->getSize();
Varnode *inSecond = op->getIn(1-slot);
if (inSecond->getAddr() != addr)
addFixedType(addr.getOffset(),inSecond->getType(), 0, types);
if (isReadActive(vn))
addFixedType(vn->getOffset(),vn->getType(), 0, types);
break;
}
case CPUI_SUBPIECE:
{
// Don't treat SUBPIECE as an active write if it is just copying to the same storage location
Address addr = op->getIn(0)->getAddr();
int4 trunc;
if (addr.isBigEndian()) {
trunc = op->getIn(0)->getSize() - vn->getSize() - (int4)op->getIn(1)->getOffset();
}
else {
trunc = (int4)op->getIn(1)->getOffset();
}
addr = addr + trunc;
if (addr != vn->getAddr() || isReadActive(vn)) {
addFixedType(vn->getOffset(), vn->getType(), 0, types);
}
break;
}
case CPUI_COPY:
addFixedType(vn->getOffset(), vn->getType(), op->getIn(0)->isConstant() ? RangeHint::copy_constant : 0, types);
break;
default:
addFixedType(vn->getOffset(), vn->getType(), 0, types);
break;
}
}
}
/// Add a RangeHint corresponding to each HighVariable that is mapped to our
/// address space for the given function.
/// \param fd is the given function
void MapState::gatherHighs(const Funcdata &fd)
{
vector<HighVariable *> varvec;
VarnodeLocSet::const_iterator riter,iterend;
Varnode *vn;
HighVariable *high;
riter = fd.beginLoc(spaceid);
iterend = fd.endLoc(spaceid);
while(riter != iterend) {
vn = *riter++;
high = vn->getHigh();
if (high == (HighVariable *)0) continue;
if (high->isMark()) continue;
if (!high->isAddrTied()) continue;
vn = high->getTiedVarnode(); // Original vn may not be good representative
high->setMark();
varvec.push_back(high);
uintb start = vn->getOffset();
Datatype *ct = high->getType(); // Get type from high
if (ct->getMetatype() == TYPE_PARTIALUNION) continue;
addRange(start,ct,0,RangeHint::fixed,-1);
}
for(int4 i=0;i<varvec.size();++i)
varvec[i]->clearMark();
}
/// For any Varnode that looks like a pointer into our address space, create an
/// \e open RangeHint. The size of the object may not be known.
/// \param fd is the given function
@ -1092,12 +1264,12 @@ void ScopeLocal::restructureVarnode(bool aliasyes)
state.gatherVarnodes(*fd); // Gather stack type information from varnodes
state.gatherOpen(*fd);
state.gatherSymbols(maptable[space->getIndex()]);
restructure(state);
overlapProblems = restructure(state);
// At some point, processing mapped input symbols may be folded
// into the above gather/restructure process, but for now
// we just define fake symbols so that mark_unaliased will work
clearUnlockedCategory(0);
clearUnlockedCategory(Symbol::function_parameter);
fakeInputSymbols();
state.sortAlias();
@ -1109,30 +1281,6 @@ void ScopeLocal::restructureVarnode(bool aliasyes)
annotateRawStackPtr(); // Add a special placeholder PTRSUB
}
/// Define stack Symbols based on HighVariables.
/// This method is called once at the end of decompilation to create the final set of stack Symbols after
/// all data-type propagation has settled. It creates a consistent data-type for all Varnode instances of
/// a HighVariable.
void ScopeLocal::restructureHigh(void)
{ // Define stack mapping based on highs
clearUnlockedCategory(-1); // Clear out any unlocked entries
MapState state(space,getRangeTree(),fd->getFuncProto().getParamRange(),
glb->types->getBase(1,TYPE_UNKNOWN)); // Organize list of ranges to insert
#ifdef OPACTION_DEBUG
if (debugon)
state.turnOnDebug(glb);
#endif
state.gatherHighs(*fd); // Gather stack type information from highs
state.gatherOpen(*fd);
state.gatherSymbols(maptable[space->getIndex()]);
bool overlapProblems = restructure(state);
if (overlapProblems)
fd->warningHeader("Could not reconcile some variable overlaps");
}
/// RangeHints from the given collection are merged into a definitive set of Symbols
/// for \b this scope. Overlapping or open RangeHints are adjusted to form a disjoint
/// cover of the mapped portion of the address space. Names for the disjoint cover elements
@ -1285,12 +1433,12 @@ void ScopeLocal::fakeInputSymbols(void)
int4 size = (endpoint - addr.getOffset()) + 1;
Datatype *ct = fd->getArch()->types->getBase(size,TYPE_UNKNOWN);
try {
addSymbol("",ct,addr,usepoint)->getSymbol();
Symbol *sym = addSymbol("",ct,addr,usepoint)->getSymbol();
setCategory(sym, Symbol::fake_input, -1);
}
catch(LowlevelError &err) {
fd->warningHeader(err.explain);
}
// setCategory(sym,0,index);
}
}
}

View File

@ -97,6 +97,11 @@ public:
open = 1, ///< An array with a (possibly unknown) number of elements
endpoint = 2 ///< An (artificial) boundary to the range of bytes getting analyzed
};
/// \brief Boolean properties for the range
enum {
typelock = 1, ///< Data-type for the range is locked
copy_constant = 2 ///< Only a constant is COPYed into the range
};
private:
uintb start; ///< Starting offset of \b this range of bytes
int4 size; ///< Number of bytes in a single element of this range
@ -109,6 +114,8 @@ public:
RangeHint(void) {} ///< Uninitialized constructor
RangeHint(uintb st,int4 sz,intb sst,Datatype *ct,uint4 fl,RangeType rt,int4 hi) {
start=st; size=sz; sstart=sst; type=ct; flags=fl; rangeType = rt; highind=hi; } ///< Initialized constructor
bool isTypeLock(void) const { return ((flags & typelock)!=0); } ///< Is the data-type for \b this range locked
bool isConstAbsorbable(const RangeHint *b) const; ///< Can another range by absorbed into \b this as a constant
bool reconcile(const RangeHint *b) const;
bool contain(const RangeHint *b) const;
bool preferred(const RangeHint *b,bool reconcile) const;
@ -173,7 +180,9 @@ class MapState {
AliasChecker checker; ///< A collection of pointer Varnodes into our address space
void addGuard(const LoadGuard &guard,OpCode opc,TypeFactory *typeFactory); ///< Add LoadGuard record as a hint to the collection
void addRange(uintb st,Datatype *ct,uint4 fl,RangeHint::RangeType rt,int4 hi); ///< Add a hint to the collection
void addFixedType(uintb start,Datatype *ct,uint4 flags,TypeFactory *types); ///< Add a fixed reference to a specific data-type
void reconcileDatatypes(void); ///< Decide on data-type for RangeHints at the same address
static bool isReadActive(Varnode *vn); ///< Is the given Varnode read by an active operation
public:
#ifdef OPACTION_DEBUG
mutable bool debugon;
@ -188,7 +197,6 @@ public:
const vector<uintb> &getAlias(void) { return checker.getAlias(); } ///< Get the list of alias starting offsets
void gatherSymbols(const EntryMap *rangemap); ///< Add Symbol information as hints to the collection
void gatherVarnodes(const Funcdata &fd); ///< Add stack Varnodes as hints to the collection
void gatherHighs(const Funcdata &fd); ///< Add HighVariables as hints to the collection
void gatherOpen(const Funcdata &fd); ///< Add pointer references as hints to the collection
RangeHint *next(void) { return *iter; } ///< Get the current RangeHint in the collection
bool getNext(void) { ++iter; if (iter==maplist.end()) return false; return true; } ///< Advance the iterator, return \b true if another hint is available
@ -210,6 +218,7 @@ class ScopeLocal : public ScopeInternal {
uintb maxParamOffset; ///< Maximum offset of parameter passed (to a called function) on the stack
bool stackGrowsNegative; ///< Marked \b true if the stack is considered to \e grow towards smaller offsets
bool rangeLocked; ///< True if the subset of addresses \e mapped to \b this scope has been locked
bool overlapProblems; ///< True if the last \b restructure had overlapping variable problems
bool adjustFit(RangeHint &a) const; ///< Make the given RangeHint fit in the current Symbol map
void createEntry(const RangeHint &a); ///< Create a Symbol entry corresponding to the given (fitted) RangeHint
bool restructure(MapState &state); ///< Merge hints into a formal Symbol layout of the address space
@ -225,6 +234,9 @@ public:
AddrSpace *getSpaceId(void) const { return space; } ///< Get the associated (stack) address space
/// \brief Return \b true if \b restructure analysis discovered overlapping variables
bool hasOverlapProbems(void) const { return overlapProblems; }
/// \brief Is this a storage location for \e unaffected registers
///
/// \param vn is the Varnode storing an \e unaffected register
@ -245,7 +257,6 @@ public:
int4 &index,uint4 flags) const;
void resetLocalWindow(void); ///< Reset the set of addresses that are considered mapped by the scope to the default
void restructureVarnode(bool aliasyes); ///< Layout mapped symbols based on Varnode information
void restructureHigh(void); ///< Layout mapped symbols based on HighVariable information
SymbolEntry *remapSymbol(Symbol *sym,const Address &addr,const Address &usepoint);
SymbolEntry *remapSymbolDynamic(Symbol *sym,uint8 hash,const Address &usepoint);
void recoverNameRecommendationsForSymbols(void);

View File

@ -0,0 +1,122 @@
<decompilertest>
<binaryimage arch="x86:LE:64:default:gcc">
<bytechunk space="ram" offset="0x100000" readonly="true">
f30f1efa4883ec28660f6f0540020000
b8616c00004889e766894424140f2904
24c744241062796520c64424166ce8cd
0f0000488d7c240ce8c30f00004883c4
28c3
</bytechunk>
<bytechunk space="ram" offset="0x100050" readonly="true">
f30f1efa48b868656c6c6f2077685548
83ec10488d6c240348894424034889ef
c744240b69726c00c644240f00e87e0f
00004889efc644240f0048b877686972
6c6564204889442403c744240b706561
73e85a0f00004883c4105dc3
</bytechunk>
<bytechunk space="ram" offset="0x1000b0" readonly="true">
f30f1efa4883ec38660f6f05a0010000
4889e70f290424660f6f05a10100000f
29442410660f6f05a40100000f294424
20e8220f00004883c438c3
</bytechunk>
<bytechunk space="ram" offset="0x1000f0" readonly="true">
f30f1efa48b86f6e652074776f204883
ec38893c244889e74889442408b84521
0000c7442410544852456689442414c6
4424160089742404e8e30e00004883c4
38c3
</bytechunk>
<bytechunk space="ram" offset="0x100140" readonly="true">
f30f1efa55b8730000005389fb4883ec
28660f6f05370100004889e566894424
104889ef0f290424e8930e00004889ef
885c2409e8870e00004883c4285b5dc3
</bytechunk>
<bytechunk space="ram" offset="0x100180" readonly="true">
f30f1efa4883ec28b92000000048b84d
6573736167653a4889042466894c2408
85ff7524ba0a0000004889e7c7442409
5a45524f668954240de8420e00004883
c428c3000000000048b8504f53495449
56454889e74889442409b80900000066
89442411e8170e00004883c428c3
</bytechunk>
<bytechunk space="ram" offset="0x1001f0" readonly="true">
f30f1efa83ff647e3748b852616e6765
206578c6051c0e000000488905070e00
00b865640000c705000e000063656564
668905fd0d0000c3
</bytechunk>
<bytechunk space="ram" offset="0x100230" readonly="true">
48b876616c6964c2a300488905d70d00
00c3
</bytechunk>
<bytechunk space="ram" offset="0x100250" readonly="true">
68656c6c6f20776f726c6400676f6f64
</bytechunk>
<bytechunk space="ram" offset="0x100260" readonly="true">
48000000450000004c0000004c000000
4f00000020000000570000004f000000
520000004c0000004400000000000000
</bytechunk>
<bytechunk space="ram" offset="0x100290" readonly="true">
736c656570696e6720646f67206c6965
</bytechunk>
<symbol space="ram" offset="0x100000" name="nullbetween"/>
<symbol space="ram" offset="0x100050" name="twomessages"/>
<symbol space="ram" offset="0x1000b0" name="utf32message"/>
<symbol space="ram" offset="0x1000f0" name="stringInStruct"/>
<symbol space="ram" offset="0x100140" name="alterString"/>
<symbol space="ram" offset="0x100180" name="conditionalString"/>
<symbol space="ram" offset="0x1001f0" name="globalString"/>
<symbol space="ram" offset="0x101000" name="customPrint"/>
<symbol space="ram" offset="0x101008" name="customPrintWide"/>
<symbol space="ram" offset="0x101010" name="receiveStruct"/>
</binaryimage>
<script>
<com>option readonly on</com>
<com>parse line struct stringstruct { int4 a; int4 b; char warning[32]; int4 c; };</com>
<com>parse line extern void customPrint(char *);</com>
<com>parse line extern void customPrintWide(wchar4 *);</com>
<com>parse line extern void receiveStruct(stringstruct *);</com>
<com>map addr r0x101018 char globstring[32]</com>
<com>lo fu nullbetween</com>
<com>decompile</com>
<com>print C</com>
<com>lo fu twomessages</com>
<com>decompile</com>
<com>print C</com>
<com>lo fu utf32message</com>
<com>decompile</com>
<com>print C</com>
<com>lo fu stringInStruct</com>
<com>decompile</com>
<com>print C</com>
<com>lo fu alterString</com>
<com>decompile</com>
<com>print C</com>
<com>lo fu conditionalString</com>
<com>decompile</com>
<com>print C</com>
<com>lo fu globalString</com>
<com>decompile</com>
<com>print C</com>
<com>quit</com>
</script>
<stringmatch name="Stack string #1" min="1" max="1">builtin_strncpy\(acStack_28.*"goodbye all",0xb\);</stringmatch>
<stringmatch name="Stack string #2" min="1" max="1">builtin_strncpy\(acStack_28,.*"hello world",0xc\);</stringmatch>
<stringmatch name="Stack string #3" min="1" max="1">builtin_strncpy\(acStack_15,.*"hello whirl",0xc\);</stringmatch>
<stringmatch name="Stack string #4" min="1" max="1">builtin_strncpy\(acStack_15,.*"whirled peas",0xd\);</stringmatch>
<stringmatch name="Stack string #5" min="1" max="1">acStack_15\[0xc\] = 0;</stringmatch>
<stringmatch name="Stack string #6" min="1" max="1">builtin_wcsncpy\(awStack_38,.*"HELLO WORLD",0xc\);</stringmatch>
<stringmatch name="Stack string #7" min="1" max="1">builtin_strncpy\(sStack_38\.warning,"one two THREE!",0xf\);</stringmatch>
<stringmatch name="Stack string #8" min="1" max="1">builtin_strncpy\(acStack_38,"sleeping dog lies",0x12\);</stringmatch>
<stringmatch name="Stack string #9" min="1" max="1">acStack_38\[9\] = param_1;</stringmatch>
<stringmatch name="Stack string #10" min="1" max="1">builtin_strncpy\(acStack_28,"Message: ",9\);</stringmatch>
<stringmatch name="Stack string #11" min="1" max="1">builtin_strncpy\(acStack_28 \+ 9,"ZERO\\n",6\);</stringmatch>
<stringmatch name="Stack string #12" min="1" max="1">builtin_strncpy\(acStack_28 \+ 9,"POSITIVE\\t",10\);</stringmatch>
<stringmatch name="Stack string #13" min="1" max="1">builtin_strncpy\(globstring,"Range exceeded",0xf\);</stringmatch>
<stringmatch name="Stack string #14" min="1" max="1">builtin_strncpy\(globstring,"valid&#xc2;&#xa3;",8\);</stringmatch>
</decompilertest>

View File

@ -29,7 +29,8 @@ import ghidra.app.plugin.processors.sleigh.symbol.ContextSymbol;
import ghidra.app.plugin.processors.sleigh.symbol.Symbol;
import ghidra.app.util.DataTypeDependencyOrderer;
import ghidra.program.model.address.*;
import ghidra.program.model.data.*;
import ghidra.program.model.data.BuiltIn;
import ghidra.program.model.data.DataType;
import ghidra.program.model.lang.*;
import ghidra.program.model.listing.*;
import ghidra.program.model.mem.MemoryAccessException;
@ -331,16 +332,9 @@ public class DecompileDebug {
}
private void dumpDataTypes(OutputStream debugStream) throws IOException {
DataOrganization dataOrganization = program.getCompilerSpec().getDataOrganization();
int intSize = dataOrganization.getIntegerSize();
int longSize = dataOrganization.getLongSize();
XmlEncode encoder = new XmlEncode();
encoder.openElement(ELEM_TYPEGRP);
encoder.writeSignedInteger(ATTRIB_INTSIZE, intSize);
encoder.writeSignedInteger(ATTRIB_LONGSIZE, longSize);
encoder.writeSignedInteger(ATTRIB_STRUCTALIGN, 4);
encoder.writeSignedInteger(ATTRIB_ENUMSIZE, 4);
encoder.writeBool(ATTRIB_ENUMSIGNED, false);
// structalign should come out of pcodelanguage.getCompilerSpec()
DataTypeDependencyOrderer TypeOrderer =
new DataTypeDependencyOrderer(program.getDataTypeManager(), dtypes);

View File

@ -115,10 +115,10 @@ public record AttributeId(String name, int id) {
public static final AttributeId ATTRIB_CHAR = new AttributeId("char", 49);
public static final AttributeId ATTRIB_CORE = new AttributeId("core", 50);
public static final AttributeId ATTRIB_ENUM = new AttributeId("enum", 51);
public static final AttributeId ATTRIB_ENUMSIGNED = new AttributeId("enumsigned", 52);
public static final AttributeId ATTRIB_ENUMSIZE = new AttributeId("enumsize", 53);
public static final AttributeId ATTRIB_INTSIZE = new AttributeId("intsize", 54);
public static final AttributeId ATTRIB_LONGSIZE = new AttributeId("longsize", 55);
// public static final AttributeId ATTRIB_ENUMSIGNED = new AttributeId("enumsigned", 52); // deprecated
// public static final AttributeId ATTRIB_ENUMSIZE = new AttributeId("enumsize", 53); // deprecated
// public static final AttributeId ATTRIB_INTSIZE = new AttributeId("intsize", 54); // deprecated
// public static final AttributeId ATTRIB_LONGSIZE = new AttributeId("longsize", 55); // deprecated
public static final AttributeId ATTRIB_OPAQUESTRING = new AttributeId("opaquestring", 56);
public static final AttributeId ATTRIB_SIGNED = new AttributeId("signed", 57);
public static final AttributeId ATTRIB_STRUCTALIGN = new AttributeId("structalign", 58);