From 3e78eaa99b8a0a91f6585c5bc80e7daddca2b3b3 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Sat, 22 Feb 2025 02:43:25 +0000 Subject: [PATCH 01/50] add more debug info --- src/lib/Reachable.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index cb20afa..4974fc0 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -351,6 +351,7 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { auto *Pred = *PI; if (reachable.insert(Pred).second) { + RA_DEBUG("Adding Pred: " << *Pred << "\n"); worklist.push_back(Pred); } } From f70340d325a37b68334cc565d6ba77d0d20c0fcb Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Sat, 22 Feb 2025 02:44:13 +0000 Subject: [PATCH 02/50] update ground truth of test 5 --- src/tests/BBtargets5.txt | 2 +- src/tests/ground_truth_distance5.txt | 14 +++++++------- src/tests/ground_truth_policy5.txt | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/tests/BBtargets5.txt b/src/tests/BBtargets5.txt index aeed917..ef40f3a 100644 --- a/src/tests/BBtargets5.txt +++ b/src/tests/BBtargets5.txt @@ -1 +1 @@ -5.c:16 \ No newline at end of file +5.c:14 \ No newline at end of file diff --git a/src/tests/ground_truth_distance5.txt b/src/tests/ground_truth_distance5.txt index 781abdc..cd9ea95 100644 --- a/src/tests/ground_truth_distance5.txt +++ b/src/tests/ground_truth_distance5.txt @@ -1,7 +1,7 @@ -628124478,5.c:35,0.000000 -3546980649,5.c:23,1000.000000 -628120118,5.c:31,0.000000 -628088539,5.c:25,0.000000 -628053689,5.c:16,0.000000 -628083092,5.c:20,0.000000 -628090717,5.c:27,0.000000 +628122300,5.c:33,0.000000 +3546908775,5.c:21,-0.000000 +628092893,5.c:29,0.000000 +628086361,5.c:23,0.000000 +628088539,5.c:25,-0.000000 +628051511,5.c:14,0.000000 +628089624,5.c:26,0.000000 diff --git a/src/tests/ground_truth_policy5.txt b/src/tests/ground_truth_policy5.txt index 12321e3..a151eb8 100644 --- a/src/tests/ground_truth_policy5.txt +++ b/src/tests/ground_truth_policy5.txt @@ -1,2 +1,2 @@ -3546980649,inf,0.000000 +3546908775,-0.000000,0.000000 ########## From 87884110508a2184e8496a66742cc2c18112c1ff Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Tue, 25 Feb 2025 01:01:02 +0000 Subject: [PATCH 03/50] add unit test 8 --- src/tests/8.c | 34 ++++++++++++++++++++++++++++ src/tests/BBtargets8.txt | 1 + src/tests/ground_truth_distance8.txt | 4 ++++ src/tests/ground_truth_policy8.txt | 2 ++ 4 files changed, 41 insertions(+) create mode 100644 src/tests/8.c create mode 100644 src/tests/BBtargets8.txt create mode 100644 src/tests/ground_truth_distance8.txt create mode 100644 src/tests/ground_truth_policy8.txt diff --git a/src/tests/8.c b/src/tests/8.c new file mode 100644 index 0000000..8e094a1 --- /dev/null +++ b/src/tests/8.c @@ -0,0 +1,34 @@ +// RUN: %clang -O0 -g -emit-llvm -c %s -o %t.bc +// RUN: %KAMain %t.bc --dump-distance=%t.distance.txt --dump-policy=%t.policy.txt --target-list=%S/BBtargets8.txt --entry-list=%S/entry.txt +// RUN: diff %t.distance.txt %S/ground_truth_distance8.txt +// RUN: diff %t.policy.txt %S/ground_truth_policy8.txt + +/* + Simple C program for reachability analysis testing with multiple conditional + branches and return edges. + + The expected outcome is that KAMain, when run over the generated LLVM bitcode, + will produce a distance file and a policy file that match the provided ground truth. +*/ + +int target() { + return 0; + } + +int somethingelse() { + return 0; +} + +void foo(int i) { + if (i) + target(); + else + somethingelse(); +} + +int main() { + int i = 0; + foo(i); + foo(i+1); + return 0; +} diff --git a/src/tests/BBtargets8.txt b/src/tests/BBtargets8.txt new file mode 100644 index 0000000..7aacca9 --- /dev/null +++ b/src/tests/BBtargets8.txt @@ -0,0 +1 @@ +8.c:15 \ No newline at end of file diff --git a/src/tests/ground_truth_distance8.txt b/src/tests/ground_truth_distance8.txt new file mode 100644 index 0000000..7f02cdc --- /dev/null +++ b/src/tests/ground_truth_distance8.txt @@ -0,0 +1,4 @@ +3929396380,8.c:30,1000.000000 +819947467,8.c:22,1000.000000 +3929364797,8.c:24,0.000000 +3929329949,8.c:15,0.000000 diff --git a/src/tests/ground_truth_policy8.txt b/src/tests/ground_truth_policy8.txt new file mode 100644 index 0000000..db369f6 --- /dev/null +++ b/src/tests/ground_truth_policy8.txt @@ -0,0 +1,2 @@ +819947467,1000.000000,0.000000 +########## From 4b1c47d05460e5896c668189406581668f123658 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 18 Apr 2025 20:44:02 +0000 Subject: [PATCH 04/50] fixup --- src/lib/KAMain.cc | 2 +- src/lib/Reachable.cc | 20 +++++++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/lib/KAMain.cc b/src/lib/KAMain.cc index 39258f2..e293ac5 100644 --- a/src/lib/KAMain.cc +++ b/src/lib/KAMain.cc @@ -214,7 +214,7 @@ int main(int argc, char **argv) { TyPMCGPass TyCG(&GlobalCtx); TyCG.run(GlobalCtx.Modules); - ReachableCallGraphPass RCGPass(&GlobalCtx, TargetList, EntryList, false); + ReachableCallGraphPass RCGPass(&GlobalCtx, TargetList, EntryList, true); RCGPass.run(GlobalCtx.Modules); if (!DumpBidMapping.empty() && !DumpFuncInfo.empty()){ diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 6229952..b51a19a 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -162,7 +162,8 @@ bool ReachableCallGraphPass::findCalleesByType(CallInst *CI, FuncSet &FS) { // just compare known args if (F->getFunctionType()->isVarArg()) { //errs() << "VarArg: " << F->getName() << "\n"; - KA_ERR("VarArg address taken function\n"); + WARNING("VarArg address taken function\n"); + continue; } else if (F->arg_size() != CS.arg_size()) { RA_DEBUG("ArgNum mismatch: " << F->getName() << "\n"); continue; @@ -501,7 +502,7 @@ void ReachableCallGraphPass::run(ModuleList &modules) { FuncSet &Callees = UseTypeBasedCallGraph ? calleeByType[CI] : Ctx->Callees[CI]; RA_LOG("\tfrom indirect call @" << CF->getName() << ", callee size = " << Callees.size() << "\n"); // XXX: skip potentially imprecise callsites? - if (Callees.size() > 50) { + if (itr->second.size() > 5 && Callees.size() > 50) { RA_DEBUG("Skip indirect call with too many callees\n"); continue; } @@ -724,14 +725,15 @@ void ReachableCallGraphPass::dumpDistance(std::ostream &OS, bool dumpSolution, b for (auto &I : *BB) { // check for callees if (const CallBase *CI = dyn_cast(&I)) { - auto itr = Ctx->Callees.find(CI); - if (itr == Ctx->Callees.end() && UseTypeBasedCallGraph) { - itr = calleeByType.find(CI); - } - for (auto F: itr->second) { + FuncSet &Callees = CI->isIndirectCall() ? + (UseTypeBasedCallGraph ? calleeByType[CI] : Ctx->Callees[CI]) + : Ctx->Callees[CI]; + for (auto F: Callees) { auto *FBB = &F->getEntryBlock(); - if (distances.find(FBB) != distances.end() && visited.insert(FBB).second) { - worklist.push_back(FBB); + if (distances.find(FBB) != distances.end()) { + RA_DEBUG("callee: " << F->getName() << " reachable \n"); + if (visited.insert(FBB).second) + worklist.push_back(FBB); } } } From b6bcc4ad312f6ca14948f6884b2f14cdd84747b9 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Tue, 11 Feb 2025 01:31:19 +0000 Subject: [PATCH 05/50] replace function multiple defination assertion with warning --- src/lib/KAMain.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/lib/KAMain.cc b/src/lib/KAMain.cc index d3ce13c..a43644c 100644 --- a/src/lib/KAMain.cc +++ b/src/lib/KAMain.cc @@ -140,7 +140,12 @@ void doBasicInitialization(Module *M) { // external linkage always ends up with the function name auto FID = F.getGUID(); if (!F.isDeclaration() && !F.empty()) { - assert(GlobalCtx.Funcs.count(FID) == 0); + if (GlobalCtx.Funcs.count(FID) != 0) { + WARNING("Function " << F.getName() + << " has been defined multiple times, previously in " + << GlobalCtx.Funcs[FID]->getParent()->getModuleIdentifier() + << ", and now in " << M->getModuleIdentifier() << "\n"); + } GlobalCtx.Funcs[FID] = &F; } else { GlobalCtx.ExtFuncs[FID] = &F; From b3efeade08a268d10ccd62bbe128c114c3b9dae9 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Tue, 11 Feb 2025 03:24:02 +0000 Subject: [PATCH 06/50] dump bid and func_guid info --- src/lib/KAMain.cc | 11 ++++ src/lib/Reachable.cc | 134 +++++++++++++++++++++++++++++++++++++++++-- src/lib/Reachable.h | 1 + 3 files changed, 142 insertions(+), 4 deletions(-) diff --git a/src/lib/KAMain.cc b/src/lib/KAMain.cc index a43644c..45b061b 100644 --- a/src/lib/KAMain.cc +++ b/src/lib/KAMain.cc @@ -54,6 +54,11 @@ cl::opt DumpPolicy( cl::opt DumpDistance( "dump-distance", cl::desc("Dump distance"), cl::init("")); +cl::opt DumpBidMapping( + "dump-bid-mapping", cl::desc("Dump basic block ID mapping, format: bid,fun_GUID,filepath:linenum"), cl::init("")); + +cl::opt DumpFuncInfo( + "dump-func-info", cl::desc("Dump function info, format: fun_GUID,fun_name,filepath,start_linenum,end_linenum"), cl::init("")); GlobalContext GlobalCtx; @@ -211,6 +216,12 @@ int main(int argc, char **argv) { ReachableCallGraphPass RCGPass(&GlobalCtx, TargetList, EntryList, false); RCGPass.run(GlobalCtx.Modules); + + if (!DumpBidMapping.empty() && !DumpFuncInfo.empty()){ + std::ofstream bbLocs(DumpBidMapping); + std::ofstream funcInfo(DumpFuncInfo); + RCGPass.dumpIDMapping(GlobalCtx.Modules, bbLocs, funcInfo); + } if (!DumpPolicy.empty()) { std::ofstream policy(DumpPolicy); RCGPass.dumpPolicy(policy); diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 6b0d4c0..6229952 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -7,26 +7,43 @@ */ -#include #include +#include +#include +#include #include -#include #include -#include #include -#include +#include +#include #include +#include +#include +#include +#include +#include +#include #include #include #include #include +#include +#include #include +#include #include "Reachable.h" #include "Annotation.h" #include "PointTo.h" + +#if defined(LLVM34) +#include "llvm/DebugInfo.h" +#else +#include "llvm/IR/DebugInfo.h" +#endif + #define RA_LOG(stmt) KA_LOG(2, "Reachable: " << stmt) #define RA_DEBUG(stmt) KA_LOG(3, "Reachable: " << stmt) @@ -596,6 +613,84 @@ std::string ReachableCallGraphPass::getSourceLocation(const BasicBlock *BB) { return "NoLoc:0"; } +/// \brief Retrieve the first available debug location in \p BB that is not +/// inside /usr/ and store the **absolute, normalized path** in \p Filename. +/// Sets \p Line and \p Col accordingly. +/// +/// This version does: +/// 1) Loops over instructions in \p BB +/// 2) Checks the debug location (and possibly inlined-at location) +/// 3) Builds an absolute, normalized path (resolving "." and "..") +/// 4) Skips if the path is empty, line=0, or the path starts with "/usr/" +/// 5) Returns the first valid debug info found +void getDebugLocationFullPath(const BasicBlock &BB, + std::string &Filename, + unsigned &Line, + unsigned &Col) { + Filename.clear(); + Line = 0; + Col = 0; + + // We don't want paths that point to system libraries in /usr/ + static const std::string Xlibs("/usr/"); + + // Iterate over instructions in the basic block + for (auto &Inst : BB) { + if (DILocation *Loc = Inst.getDebugLoc()) { + // Extract directory & filename + std::string Dir = Loc->getDirectory().str(); + std::string File = Loc->getFilename().str(); + unsigned L = Loc->getLine(); + unsigned C = Loc->getColumn(); + + // If there's no filename, check the inlined location + if (File.empty()) { + if (DILocation *inlinedAt = Loc->getInlinedAt()) { + Dir = inlinedAt->getDirectory().str(); + File = inlinedAt->getFilename().str(); + L = inlinedAt->getLine(); + C = inlinedAt->getColumn(); + } + } + + // Skip if still no filename or line==0 + if (File.empty() || L == 0) + continue; + + // Build an absolute path in a SmallString + llvm::SmallString<256> FullPath; + + // 1) If Dir is already absolute, just start with that. + // Otherwise, use the current working directory as a base. + if (!Dir.empty() && llvm::sys::path::is_absolute(Dir)) { + FullPath = Dir; + } else { + llvm::sys::fs::current_path(FullPath); // get the current working dir + if (!Dir.empty()) { + llvm::sys::path::append(FullPath, Dir); + } + } + + // 2) Append the filename + llvm::sys::path::append(FullPath, File); + + // 3) Remove dot segments (both "." and "..") + llvm::sys::path::remove_dots(FullPath, /*remove_dot_dot=*/true); + + // Now FullPath is absolute & normalized + // Check if it's in /usr/ + if (StringRef(FullPath).startswith(Xlibs)) + continue; // skip system-libs + + // Found a valid location => set output vars + Filename = FullPath.str().str(); // convert to std::string + Line = L; + Col = C; + break; // stop after the first valid location + } + } +} + void ReachableCallGraphPass::dumpDistance(std::ostream &OS, bool dumpSolution, bool dumpUnreachable) { std::deque worklist; std::unordered_set visited; @@ -730,6 +825,37 @@ void ReachableCallGraphPass::dumpPolicy(std::ostream &OS) { } } +void ReachableCallGraphPass::dumpIDMapping(ModuleList &modules, std::ostream &bbLocs, std::ostream &funcInfo) { + ModuleList::iterator i, e; + for (i = modules.begin(), e = modules.end(); i != e; ++i) { + Module *M = i->first; + for (auto &F : *M) { + unsigned minLine = std::numeric_limits::max(); + unsigned maxLine = 0; + std::string filepath; + + for (auto &BB : F) { + unsigned line = 0; + unsigned col = 0; + getDebugLocationFullPath(BB, filepath, line, col); + uint32_t bb_id = getBasicBlockId(&BB); + + if (line < minLine && line > 0) { + minLine = line; + } + if (line > maxLine && line > 0) { + maxLine = line; + } + if (!filepath.empty() && line != 0) + bbLocs << bb_id << "," << F.getGUID() << "," << filepath << ":" << line << "\n"; + + } + if (!filepath.empty() && minLine != std::numeric_limits::max() && maxLine != 0) + funcInfo << F.getGUID() << "," << F.getName().str() << "," << filepath << "," << minLine << "," << maxLine << "\n"; + } + } +} + void ReachableCallGraphPass::dumpCallees() { RES_REPORT("\n[dumpCallees]\n"); raw_ostream &OS = outs(); diff --git a/src/lib/Reachable.h b/src/lib/Reachable.h index b434d75..32ef06d 100644 --- a/src/lib/Reachable.h +++ b/src/lib/Reachable.h @@ -47,6 +47,7 @@ class ReachableCallGraphPass { // debug void dumpDistance(std::ostream &OS, bool dumpSolution = false, bool dumpUnreachable = false); void dumpPolicy(std::ostream &OS); + void dumpIDMapping(ModuleList &modules, std::ostream &bbLocs, std::ostream &funcInfo); void dumpCallees(); void dumpCallers(); }; From 1ad51d44a6d39da1b714a39696ef8a7d5118a9a8 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Tue, 18 Feb 2025 03:51:42 +0000 Subject: [PATCH 07/50] do not dump unreachable BB yet --- src/lib/KAMain.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/KAMain.cc b/src/lib/KAMain.cc index 45b061b..39258f2 100644 --- a/src/lib/KAMain.cc +++ b/src/lib/KAMain.cc @@ -228,7 +228,7 @@ int main(int argc, char **argv) { } if (!DumpDistance.empty()) { std::ofstream distance(DumpDistance); - RCGPass.dumpDistance(distance, true, true); + RCGPass.dumpDistance(distance, true, false); } return 0; From ab83e469e933677f9eb7424256b8249dfa54d9d3 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 18 Apr 2025 20:44:02 +0000 Subject: [PATCH 08/50] fixup --- src/lib/KAMain.cc | 2 +- src/lib/Reachable.cc | 20 +++++++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/lib/KAMain.cc b/src/lib/KAMain.cc index 39258f2..e293ac5 100644 --- a/src/lib/KAMain.cc +++ b/src/lib/KAMain.cc @@ -214,7 +214,7 @@ int main(int argc, char **argv) { TyPMCGPass TyCG(&GlobalCtx); TyCG.run(GlobalCtx.Modules); - ReachableCallGraphPass RCGPass(&GlobalCtx, TargetList, EntryList, false); + ReachableCallGraphPass RCGPass(&GlobalCtx, TargetList, EntryList, true); RCGPass.run(GlobalCtx.Modules); if (!DumpBidMapping.empty() && !DumpFuncInfo.empty()){ diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 6229952..b51a19a 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -162,7 +162,8 @@ bool ReachableCallGraphPass::findCalleesByType(CallInst *CI, FuncSet &FS) { // just compare known args if (F->getFunctionType()->isVarArg()) { //errs() << "VarArg: " << F->getName() << "\n"; - KA_ERR("VarArg address taken function\n"); + WARNING("VarArg address taken function\n"); + continue; } else if (F->arg_size() != CS.arg_size()) { RA_DEBUG("ArgNum mismatch: " << F->getName() << "\n"); continue; @@ -501,7 +502,7 @@ void ReachableCallGraphPass::run(ModuleList &modules) { FuncSet &Callees = UseTypeBasedCallGraph ? calleeByType[CI] : Ctx->Callees[CI]; RA_LOG("\tfrom indirect call @" << CF->getName() << ", callee size = " << Callees.size() << "\n"); // XXX: skip potentially imprecise callsites? - if (Callees.size() > 50) { + if (itr->second.size() > 5 && Callees.size() > 50) { RA_DEBUG("Skip indirect call with too many callees\n"); continue; } @@ -724,14 +725,15 @@ void ReachableCallGraphPass::dumpDistance(std::ostream &OS, bool dumpSolution, b for (auto &I : *BB) { // check for callees if (const CallBase *CI = dyn_cast(&I)) { - auto itr = Ctx->Callees.find(CI); - if (itr == Ctx->Callees.end() && UseTypeBasedCallGraph) { - itr = calleeByType.find(CI); - } - for (auto F: itr->second) { + FuncSet &Callees = CI->isIndirectCall() ? + (UseTypeBasedCallGraph ? calleeByType[CI] : Ctx->Callees[CI]) + : Ctx->Callees[CI]; + for (auto F: Callees) { auto *FBB = &F->getEntryBlock(); - if (distances.find(FBB) != distances.end() && visited.insert(FBB).second) { - worklist.push_back(FBB); + if (distances.find(FBB) != distances.end()) { + RA_DEBUG("callee: " << F->getName() << " reachable \n"); + if (visited.insert(FBB).second) + worklist.push_back(FBB); } } } From 5a97be65b755ed7a9fc18faeb0717a5ae95d6d6a Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 13 Jun 2025 20:58:53 +0000 Subject: [PATCH 09/50] dump branch target BIDs in policy. Format: bid, dis_FT, dis_TT, FT_bid, TT_bid --- src/lib/CallGraph.cc | 2 -- src/lib/Reachable.cc | 8 ++++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lib/CallGraph.cc b/src/lib/CallGraph.cc index 19f2155..bebef08 100644 --- a/src/lib/CallGraph.cc +++ b/src/lib/CallGraph.cc @@ -426,7 +426,6 @@ bool CallGraphPass::runOnFunction(Function *F) { #pragma clang diagnostic pop #endif // normal handling - bool isNull = false; Value *ptr = I->getOperand(0); NodeIndex ptrNode = NF.getValueNodeFor(ptr); auto itr = funcPtsGraph.find(ptrNode); @@ -438,7 +437,6 @@ bool CallGraphPass::runOnFunction(Function *F) { CG_LOG("Load: source obj: " << idx << "\n"); if (idx == NF.getNullObjectNode() && itr->second.find_next(idx) == end) { CG_LOG("Loading from null obj, ptr = " << ptrNode << "\n"); - isNull = true; // XXX funcPtsGraph[valNode].insert(idx); break; diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index b51a19a..751b399 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -768,6 +768,9 @@ void ReachableCallGraphPass::dumpPolicy(std::ostream &OS) { continue; auto TT = branch->getSuccessor(0); auto FT = branch->getSuccessor(1); + uint32_t TT_bid = getBasicBlockId(TT); + uint32_t FT_bid = getBasicBlockId(FT); + bool reached = false; OS << getBasicBlockId(BB) << ","; auto itr = distances.find(FT); @@ -779,11 +782,12 @@ void ReachableCallGraphPass::dumpPolicy(std::ostream &OS) { } itr = distances.find(TT); if (itr != distances.end()) { - OS << itr->second * 1000 << "\n"; + OS << itr->second * 1000 << ","; reached = true; } else { - OS << "inf\n"; + OS << "inf,"; } + OS << FT_bid << "," << TT_bid << "\n"; if (!reached) { bool hasCall = false; for (auto &I : *BB) { From 47e8539a2576754e2d844328b538f0d6b997b327 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Sat, 21 Jun 2025 01:51:06 +0000 Subject: [PATCH 10/50] fixup --- src/lib/Reachable.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 19df75c..57d38cd 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -1021,7 +1021,7 @@ void ReachableCallGraphPass::dumpPolicy(std::ostream &OS) { << "\nAnd no call in the BB\n"); } } else { - OS << getBasicBlockId(BB) << "," << tdist << "," << fdist << FT_bid << "," << TT_bid << BBIDs[BB] << "\n"; + OS << getBasicBlockId(BB) << "," << tdist << "," << fdist << "," << FT_bid << "," << TT_bid << "," << BBIDs[BB] << "\n"; } } From 27ad8d5de03ccd164d44cba719b02bb161cb7464 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Fri, 18 Jul 2025 08:41:19 +0000 Subject: [PATCH 11/50] dump distances for indirect callees --- src/lib/Reachable.cc | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 57d38cd..ef60ce9 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -922,23 +922,27 @@ void ReachableCallGraphPass::dumpDistance(std::ostream &OS, bool dumpSolution, b for (auto &I : *BB) { // check for callees if (const CallBase *CI = dyn_cast(&I)) { - if (CI->isInlineAsm() || CI->isIndirectCall()) { - continue; // skip inline asm + if (CI->isInlineAsm()) { + // skip inline asm calls + continue; } auto itr = Ctx->Callees.find(CI); + // lookup indirect callees if (itr == Ctx->Callees.end() && UseTypeBasedCallGraph) { itr = calleeByType.find(CI); - if (itr == calleeByType.end()) { - WARNING("No callees for " << *CI << "\n"); - continue; // no callees - } } - for (auto F: itr->second) { + if (itr == Ctx->Callees.end()) { + WARNING("No callees for " << *CI << "\n"); + continue; + } + for (auto *F : itr->second) { + if (F->isDeclaration() || F->empty()) { + continue; + } auto *FBB = &F->getEntryBlock(); - if (distances.find(FBB) != distances.end()) { - RA_DEBUG("callee: " << F->getName() << " reachable \n"); - if (visited.insert(FBB).second) - worklist.push_back(FBB); + if (distances.find(FBB) != distances.end() && visited.insert(FBB).second) { + RA_DEBUG("callee: " << F->getName() << " reachable \n"); + worklist.push_back(FBB); } } } From 1e75cdf9940e016c61ca33874f33f29c05cf128e Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Mon, 21 Jul 2025 06:31:34 +0000 Subject: [PATCH 12/50] Allow other routes back propagate to the entry when computing distances. --- src/lib/Reachable.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index ef60ce9..88ce475 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -640,8 +640,8 @@ void ReachableCallGraphPass::run(ModuleList &modules) { if (BB == &F->getEntryBlock()) { if (entryBBs.find(BB) != entryBBs.end()) { RA_LOG("Entry func " << F->getName() << " is reachable\n"); - break; - // continue; + // break; + continue; } auto itr = Ctx->Callers.find(F); if (itr == Ctx->Callers.end()) { From 3be9951884dd2be20269fa49e2029feef679f630 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 23 Jul 2025 22:41:32 +0000 Subject: [PATCH 13/50] annotate every BB with an id --- src/lib/Reachable.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 88ce475..541eb50 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -1113,6 +1113,8 @@ bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string su continue; // skip unreachable BBs if (BB.getFirstInsertionPt() == BB.end()) continue; // skip empty BBs + auto *BBID = ConstantInt::get(Int64Ty, BBIDs[&BB]); + term->setMetadata("bbid", MD); // annotate reachable basic block with ID and distance if (reachableBBs.count(&BB)) { // check if we have a distance @@ -1121,7 +1123,6 @@ bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string su dist *= 1000.0; // instrument a call to trace distance IRBuilder<> IRB(&*BB.getFirstInsertionPt()); - auto *BBID = ConstantInt::get(Int64Ty, BBIDs[&BB]); auto *Dist = ConstantInt::get(Int64Ty, (uint64_t)dist); IRB.CreateCall(TraceDistanceFunc, {BBID, Dist})->setCannotMerge(); @@ -1129,7 +1130,6 @@ bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string su auto term = BB.getTerminator(); MDNode *MD = MDNode::get(M->getContext(), {ConstantAsMetadata::get(BBID)}); - term->setMetadata("bbid", MD); } } } From 7cf310dc86a8392687a6dbf8118d4d03c55f8209 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 23 Jul 2025 23:31:37 +0000 Subject: [PATCH 14/50] Add max callstack depth threshold to ReachableCallGraphPass to cap caller propagation and avoid infinite loops. --- src/lib/Reachable.cc | 31 ++++++++++++++++++++++--------- src/lib/Reachable.h | 5 ++++- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 541eb50..2090182 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -389,8 +389,16 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor } RA_DEBUG(F->getName() << " is reachable\n"); - for (auto CI : itr->second) { - auto CBB = CI->getParent(); + unsigned currDepth = callDepth[BB]; + for (auto *CI : itr->second) { + auto *CBB = CI->getParent(); + unsigned newDepth = currDepth + 1; + if (newDepth > maxCallStackDepth) { + RA_LOG("Max depth reached (" << maxCallStackDepth + << ") for function " << F->getName() << ", skipping caller\n"); + continue; // do not propagate beyond threshold + } + callDepth[CBB] = newDepth; // record depth before enqueue // go through instructions, handle additional callees bool willReturn = true; bool added = false; @@ -473,11 +481,17 @@ void ReachableCallGraphPass::run(ModuleList &modules) { std::deque worklist; RA_DEBUG("\n\n=== Collecting exit BBs ===\n\n"); worklist.insert(worklist.end(), exitBBs.begin(), exitBBs.end()); + callDepth.clear(); + for (auto *BB : exitBBs) { + callDepth[BB] = 0; + } collectReachable(worklist, exitBBs); // now do a BFS search on the target list, find all reachable BBs first RA_LOG("\n\n=== Collecting reachable BBs ===\n\n"); + callDepth.clear(); for (const auto &kv : distances) { + callDepth[kv.first] = 0; worklist.push_back(kv.first); } collectReachable(worklist, reachableBBs); @@ -690,7 +704,7 @@ void ReachableCallGraphPass::run(ModuleList &modules) { FuncSet &Callees = UseTypeBasedCallGraph ? calleeByType[CI] : Ctx->Callees[CI]; RA_LOG("\tfrom indirect call @" << CF->getName() << ", callee size = " << Callees.size() << "\n"); // XXX: skip potentially imprecise callsites? - if (itr->second.size() > 5 && Callees.size() > 50) { + if (Callees.size() > 50) { RA_DEBUG("Skip indirect call with too many callees\n"); continue; } @@ -745,7 +759,8 @@ ReachableCallGraphPass::ReachableCallGraphPass(GlobalContext *Ctx_, bool propagateRet) : Ctx(Ctx_), UseTypeBasedCallGraph(typeBased), PropagateThroughReturnEdgees(propagateRet), - nextBBID(1000) { + nextBBID(1000), + maxCallStackDepth(15) { // parse target list // format: filename:line_number if (!TargetList.empty()) { @@ -916,8 +931,8 @@ void ReachableCallGraphPass::dumpDistance(std::ostream &OS, bool dumpSolution, b currentDist = dist; RA_LOG("Best option: " << BB->getParent()->getName() << " at " << currentDist << "\n"); } - OS << getBasicBlockId(BB) << "," << getSourceLocation(BB) << "," - << distances[BB] * 1000 << "," << BBIDs[BB] << "\n"; + OS << BBIDs[BB] << "," << getBasicBlockId(BB) << "," << getSourceLocation(BB) << "," + << distances[BB] * 1000 << "\n"; for (auto &I : *BB) { // check for callees @@ -990,8 +1005,6 @@ void ReachableCallGraphPass::dumpPolicy(std::ostream &OS) { continue; auto TT = branch->getSuccessor(0); auto FT = branch->getSuccessor(1); - uint32_t TT_bid = getBasicBlockId(TT); - uint32_t FT_bid = getBasicBlockId(FT); bool reached = false; std::string tdist; @@ -1025,7 +1038,7 @@ void ReachableCallGraphPass::dumpPolicy(std::ostream &OS) { << "\nAnd no call in the BB\n"); } } else { - OS << getBasicBlockId(BB) << "," << tdist << "," << fdist << "," << FT_bid << "," << TT_bid << "," << BBIDs[BB] << "\n"; + OS << BBIDs[BB] << "," << tdist << "," << fdist << "," << BBIDs[FT] << "," << BBIDs[TT] << "\n"; } } diff --git a/src/lib/Reachable.h b/src/lib/Reachable.h index 3356c93..deb0b77 100644 --- a/src/lib/Reachable.h +++ b/src/lib/Reachable.h @@ -26,9 +26,12 @@ class ReachableCallGraphPass { const bool UseTypeBasedCallGraph; const bool PropagateThroughReturnEdgees; - + std::unordered_map BBIDs; uint64_t nextBBID; + // Maximum call stack depth to propagate across callers + const unsigned maxCallStackDepth; + std::unordered_map callDepth; std::vector > targetList; std::vector entryList; From 65c36c28cb35bf3fbab254bc93d00e4509e1a771 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 23 Jul 2025 23:37:43 +0000 Subject: [PATCH 15/50] fixup --- src/lib/Reachable.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 2090182..52ed9af 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -1126,7 +1126,12 @@ bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string su continue; // skip unreachable BBs if (BB.getFirstInsertionPt() == BB.end()) continue; // skip empty BBs + + // add an annotation for other instrumentation auto *BBID = ConstantInt::get(Int64Ty, BBIDs[&BB]); + auto term = BB.getTerminator(); + MDNode *MD = MDNode::get(M->getContext(), + {ConstantAsMetadata::get(BBID)}); term->setMetadata("bbid", MD); // annotate reachable basic block with ID and distance if (reachableBBs.count(&BB)) { @@ -1138,11 +1143,6 @@ bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string su IRBuilder<> IRB(&*BB.getFirstInsertionPt()); auto *Dist = ConstantInt::get(Int64Ty, (uint64_t)dist); IRB.CreateCall(TraceDistanceFunc, {BBID, Dist})->setCannotMerge(); - - // add an annotation for other instrumentation - auto term = BB.getTerminator(); - MDNode *MD = MDNode::get(M->getContext(), - {ConstantAsMetadata::get(BBID)}); } } } From 7a6ed7aac36c50122a7bd40e7bd05ce781086fbf Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Thu, 24 Jul 2025 00:13:55 +0000 Subject: [PATCH 16/50] also check callstack length when computing distances --- src/lib/Reachable.cc | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 52ed9af..0e767d0 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -513,7 +513,9 @@ void ReachableCallGraphPass::run(ModuleList &modules) { // now calculate distances in a bottom-up manner std::unordered_set queued; std::unordered_set queuedCalls; + callDepth.clear(); for (const auto &kv : distances) { + callDepth[kv.first] = 0; worklist.push_back(kv.first); queued.insert(kv.first); } @@ -522,6 +524,10 @@ void ReachableCallGraphPass::run(ModuleList &modules) { auto *BB = worklist.front(); worklist.pop_front(); queued.erase(BB); + unsigned currDepth = callDepth[BB]; + if (currDepth >= maxCallStackDepth) { + continue; // do not propagate beyond threshold + } if (PropagateThroughReturnEdgees) { // go through instructions, looking for calls auto hasCalls = BBswithCalls.find(BB); @@ -582,6 +588,7 @@ void ReachableCallGraphPass::run(ModuleList &modules) { RA_DEBUG("Propagate distance " << dist << " to callee: " << F->getName() << "\n"); distances[&TBB] = dist; if (queued.insert(&TBB).second) { + callDepth[&TBB] = currDepth + 1; worklist.push_back(&TBB); } added = true; @@ -597,8 +604,10 @@ void ReachableCallGraphPass::run(ModuleList &modules) { // there is another callsite but no propagation is needed // simulate the propagation by adding the callsite to the queue queuedCalls.insert(propagate); - if (queued.insert(BB).second) - worklist.push_back(BB); + if (queued.insert(BB).second){ + callDepth[BB] = currDepth + 1; + worklist.push_back(BB); + } continue; } } else { @@ -645,8 +654,10 @@ void ReachableCallGraphPass::run(ModuleList &modules) { if (itr == distances.end() || itr->second > dist) { // RA_DEBUG("Adding Pred: " << *Pred << " with prob " << prob << "\n"); distances[Pred] = dist; - if (queued.insert(Pred).second) + if (queued.insert(Pred).second){ + callDepth[Pred] = currDepth; worklist.push_back(Pred); + } } } // entry block has no predecessor, add caller @@ -673,7 +684,7 @@ void ReachableCallGraphPass::run(ModuleList &modules) { continue; } } - + // check callers RA_LOG(F->getName() << " is reachable from " << itr->second.size() << " callers\n"); auto dist = distances[BB]; for (auto CI : itr->second) { @@ -694,8 +705,10 @@ void ReachableCallGraphPass::run(ModuleList &modules) { } else { distances[CBB] = dist; } - if (queued.insert(CBB).second) + if (queued.insert(CBB).second){ + callDepth[CBB] = currDepth + 1; worklist.push_back(CBB); + } } } else { // indirect call is tricky, treat like predecessors @@ -741,8 +754,10 @@ void ReachableCallGraphPass::run(ModuleList &modules) { } else { distances[CBB] = dist; } - if (queued.insert(CBB).second) + if (queued.insert(CBB).second){ + callDepth[CBB] = currDepth + 1; worklist.push_back(CBB); + } } } } From 5fc4c49f65fd71a5afe236d970a665fffaf3abdd Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Thu, 24 Jul 2025 04:22:13 +0000 Subject: [PATCH 17/50] add an option to control callstack length --- src/lib/KAMain.cc | 6 +++++- src/lib/Reachable.cc | 4 ++-- src/lib/Reachable.h | 3 ++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/lib/KAMain.cc b/src/lib/KAMain.cc index b504ee2..44a332b 100644 --- a/src/lib/KAMain.cc +++ b/src/lib/KAMain.cc @@ -42,6 +42,9 @@ cl::list InputFilenames( cl::opt VerboseLevel( "verbose", cl::desc("Verbose level"), cl::init(0)); +cl::opt CallStackLen( + "call-stack-len", cl::desc("The maximum call stack length from entry to the targets"), cl::init(10)); + cl::opt UseTypeBasedCallGraph( "type-based-callgraph", cl::desc("Use type-based call graph"), cl::init(false)); @@ -222,7 +225,8 @@ int main(int argc, char **argv) { TyCG.run(GlobalCtx.Modules); } - ReachableCallGraphPass RCGPass(&GlobalCtx, TargetList, EntryList, UseTypeBasedCallGraph); + ReachableCallGraphPass RCGPass(&GlobalCtx, TargetList, EntryList, + UseTypeBasedCallGraph, false, CallStackLen); RCGPass.run(GlobalCtx.Modules); if (!DumpBidMapping.empty() && !DumpFuncInfo.empty()){ diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 0e767d0..da7ed89 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -771,11 +771,11 @@ void ReachableCallGraphPass::run(ModuleList &modules) { ReachableCallGraphPass::ReachableCallGraphPass(GlobalContext *Ctx_, std::string &TargetList, std::string &EntryList, bool typeBased, - bool propagateRet) + bool propagateRet, unsigned CallStackLen) : Ctx(Ctx_), UseTypeBasedCallGraph(typeBased), PropagateThroughReturnEdgees(propagateRet), nextBBID(1000), - maxCallStackDepth(15) { + maxCallStackDepth(CallStackLen) { // parse target list // format: filename:line_number if (!TargetList.empty()) { diff --git a/src/lib/Reachable.h b/src/lib/Reachable.h index deb0b77..2f753c4 100644 --- a/src/lib/Reachable.h +++ b/src/lib/Reachable.h @@ -46,7 +46,8 @@ class ReachableCallGraphPass { public: ReachableCallGraphPass(GlobalContext *Ctx_, std::string &TargetList, - std::string &EntryList, bool typeBased = true, bool propagateRet = false); + std::string &EntryList, bool typeBased = true, + bool propagateRet = false, unsigned CallStackLen = 10); virtual bool doInitialization(llvm::Module *); virtual bool doFinalization(llvm::Module *); virtual void run(ModuleList &modules); From b81c62249d4616b56b1ee67f0a54d0d349a5c484 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Thu, 24 Jul 2025 07:06:28 +0000 Subject: [PATCH 18/50] Guard against empty distances in annotateModules --- src/lib/Reachable.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index da7ed89..0d38517 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -1118,11 +1118,14 @@ void ReachableCallGraphPass::dumpIDMapping(ModuleList &modules, std::ostream &bb bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string suffix) { ModuleList::iterator i, e; - double max_dist = std::max_element(distances.begin(), distances.end(), - [](const std::pair &a, - const std::pair &b) { - return a.second < b.second; - })->second; + double max_dist = INFINITY; + if (!distances.empty()) { + max_dist = std::max_element(distances.begin(), distances.end(), + [](const std::pair &a, + const std::pair &b) { + return a.second < b.second; + })->second; + } for (i = modules.begin(), e = modules.end(); i != e; ++i) { Module *M = i->first; From 471c03ce4c2e0a28caea75a3d6e9d53e4c2b1e64 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Fri, 25 Jul 2025 23:13:56 +0000 Subject: [PATCH 19/50] Sort dumpDistance output by ascending distance and simplify iteration --- src/lib/Reachable.cc | 98 +++++++++++++------------------------------- 1 file changed, 28 insertions(+), 70 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 0d38517..846b271 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -919,86 +919,44 @@ void getDebugLocationFullPath(const BasicBlock &BB, } void ReachableCallGraphPass::dumpDistance(std::ostream &OS, bool dumpSolution, bool dumpUnreachable) { - std::deque worklist; - std::unordered_set visited; - double currentDist = std::numeric_limits::max();; - for (auto BB : entryBBs) { - if (distances.find(BB) != distances.end()) { - RA_LOG("Entry BB of " << BB->getParent()->getName() << " is reachable\n"); - worklist.push_back(BB); - visited.insert(BB); - } - } - if (worklist.empty()) { - WARNING("Target not reachable from entry BBs\n"); - return; - } - - // set precision - OS << std::fixed << std::setprecision(6); - - // dump reachable bb - while (!worklist.empty()) { - auto *BB = worklist.front(); - worklist.pop_front(); - auto dist = distances[BB]; - if (dumpSolution && (dist < currentDist)) { - currentDist = dist; - RA_LOG("Best option: " << BB->getParent()->getName() << " at " << currentDist << "\n"); - } - OS << BBIDs[BB] << "," << getBasicBlockId(BB) << "," << getSourceLocation(BB) << "," - << distances[BB] * 1000 << "\n"; - - for (auto &I : *BB) { - // check for callees - if (const CallBase *CI = dyn_cast(&I)) { - if (CI->isInlineAsm()) { - // skip inline asm calls - continue; - } - auto itr = Ctx->Callees.find(CI); - // lookup indirect callees - if (itr == Ctx->Callees.end() && UseTypeBasedCallGraph) { - itr = calleeByType.find(CI); - } - if (itr == Ctx->Callees.end()) { - WARNING("No callees for " << *CI << "\n"); - continue; - } - for (auto *F : itr->second) { - if (F->isDeclaration() || F->empty()) { - continue; - } - auto *FBB = &F->getEntryBlock(); - if (distances.find(FBB) != distances.end() && visited.insert(FBB).second) { - RA_DEBUG("callee: " << F->getName() << " reachable \n"); - worklist.push_back(FBB); - } - } - } + // Set precision for output + OS << std::fixed << std::setprecision(6); + + // Copy and sort distances by ascending value + std::vector> sorted; + sorted.reserve(distances.size()); + for (const auto &entry : distances) { + sorted.emplace_back(entry.first, entry.second); } - for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { - auto *Succ = *SI; - if (distances.find(Succ) != distances.end() && visited.insert(Succ).second) { - worklist.push_back(Succ); - } + std::sort(sorted.begin(), sorted.end(), + [](const auto &a, const auto &b) { return a.second < b.second; }); + + // Output sorted distance entries + for (const auto &pair : sorted) { + const BasicBlock *BB = pair.first; + double dist = pair.second; + OS << BBIDs[BB] << "," + << getBasicBlockId(BB) << "," + << getSourceLocation(BB) << "," + << (dist * 1000) << "\n"; } - } - // dump unreachable bb + + // If dumpUnreachable is enabled, output unreachable basic blocks if (dumpUnreachable) { - for (auto BB : exitBBs) { + for (const auto *BB : exitBBs) { if (distances.find(BB) == distances.end()) { OS << BBIDs[BB] << "," << getBasicBlockId(BB) << "," << getSourceLocation(BB) << ",-1\n"; } } } - // dump the covered functions - std::unordered_set reachedFunctions; - for (auto BB : reachableBBs) { - reachedFunctions.insert(BB->getParent()); + + // Dump the covered functions + std::unordered_set reachedFunctions; + for (const auto &entry : distances) { + reachedFunctions.insert(entry.first->getParent()); } OS << "##########\n"; - for (auto *F : reachedFunctions) { + for (const auto *F : reachedFunctions) { OS << "fun:" << F->getName().str() << "\n"; } } From b2cbc16eb0edc7cf79edd437410681a97bbdd374 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Tue, 29 Jul 2025 06:19:11 +0000 Subject: [PATCH 20/50] more exit functions --- src/lib/Annotation.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lib/Annotation.cc b/src/lib/Annotation.cc index ae7a42d..3a63335 100644 --- a/src/lib/Annotation.cc +++ b/src/lib/Annotation.cc @@ -249,7 +249,10 @@ bool isExitFn(StringRef name) { if (name.equals("exit") || name.equals("_exit") || name.equals("_Exit") || + name.equals("quick_exit") || name.equals("exit_group") || + name.equals("terminate") || + name.equals("abort") || name.equals("panic") || name.equals("BUG") || name.equals("BUG_ON")) From 75e7f531a9b1859e421e4a74f56299c415e3193c Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Tue, 29 Jul 2025 06:41:46 +0000 Subject: [PATCH 21/50] find and dump critical BBs from reachable and unreachable BBs. --- src/lib/KAMain.cc | 13 ++++++-- src/lib/Reachable.cc | 78 +++++++++++++++++++++++++++++++------------- src/lib/Reachable.h | 7 ++-- 3 files changed, 70 insertions(+), 28 deletions(-) diff --git a/src/lib/KAMain.cc b/src/lib/KAMain.cc index 44a332b..06678c0 100644 --- a/src/lib/KAMain.cc +++ b/src/lib/KAMain.cc @@ -55,13 +55,16 @@ cl::opt EntryList( "entry-list", cl::desc("Entry list"), cl::init("")); cl::opt DumpPolicy( - "dump-policy", cl::desc("Dump static policy"), cl::init("")); + "dump-policy", cl::desc("Dump policy, format: bid,true_distance,false_distance,false_bid,true_bid"), cl::init("")); cl::opt DumpDistance( - "dump-distance", cl::desc("Dump distance"), cl::init("")); + "dump-distance", cl::desc("Dump distances, format: bid,bb_hash,loc,distance"), cl::init("")); + +cl::opt DumpCriticalBBs( + "dump-critical-branch", cl::desc("Dump critical basic blocks, format: critical_bid, exit_bid_1, exit_bid_2, ..."), cl::init("")); cl::opt DumpBidMapping( - "dump-bid-mapping", cl::desc("Dump basic block ID mapping, format: bid,fun_GUID,filepath:linenum"), cl::init("")); + "dump-bid-mapping", cl::desc("Dump basic block ID mapping, format: bid,bb_hash,fun_GUID,filepath:linenum"), cl::init("")); cl::opt DumpFuncInfo( "dump-func-info", cl::desc("Dump function info, format: fun_GUID,fun_name,filepath,start_linenum,end_linenum"), cl::init("")); @@ -245,6 +248,10 @@ int main(int argc, char **argv) { if (!DumpAnnotatedIR.empty()) { RCGPass.annotateModules(GlobalCtx.Modules, DumpAnnotatedIR); } + if (!DumpCriticalBBs.empty()) { + std::ofstream criticalBBs(DumpCriticalBBs); + RCGPass.dumpCriticalBBs(criticalBBs); + } return 0; } diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 846b271..68d2488 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -220,18 +220,22 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { RA_LOG("### Run on function: " << F->getName() << "\n"); for (auto &BB : *F) { + // assign a BB ID + if (BBIDs.find(&BB) == BBIDs.end()) { + BBIDs[&BB] = nextBBID++; + if (auto *SI = dyn_cast(BB.getTerminator())) { + // assign a unique ID to the switch case + nextBBID += SI->getNumCases(); + } + } + // treat any BB ending in llvm::UnreachableInst as an "exit" + if (isa(BB.getTerminator())) { + RA_LOG("Non ret BB: " << BB.getName() << "\n"); + exitBBs.insert(&BB); + } for (auto &i : BB) { Instruction *I = &i; - // assign a BB ID - if (BBIDs.find(&BB) == BBIDs.end()) { - BBIDs[&BB] = nextBBID++; - if (auto *SI = dyn_cast(BB.getTerminator())) { - // assign a unique ID to the switch case - nextBBID += SI->getNumCases(); - } - } - if (UseTypeBasedCallGraph) { if (CallBase *CI = dyn_cast(I)) { if (Function *CF = CI->getCalledFunction()) { @@ -240,7 +244,7 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { Changed |= Ctx->Callees[CI].insert(RCF).second; Changed |= Ctx->Callers[RCF].insert(CI).second; // check for call to exit functions - if (isExitFn(RCF->getName())) { + if (isExitFn(RCF->getName()) || CF->doesNotReturn()) { RA_LOG("Exit Call: " << *CI << "\n"); exitBBs.insert(CI->getParent()); } @@ -356,7 +360,8 @@ bool ReachableCallGraphPass::doFinalization(Module *M) { } void ReachableCallGraphPass::collectReachable(std::deque &worklist, - std::unordered_set &reachable) { + std::unordered_set &reachable, + const std::unordered_set &others) { while (!worklist.empty()) { auto *BB = worklist.front(); worklist.pop_front(); @@ -365,6 +370,12 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor auto *Pred = *PI; if (reachable.insert(Pred).second) { RA_DEBUG("Adding Pred: " << *Pred << "\n"); + // if the predecessor is reachable to the target + // stop propagating unreachable BB through it + if (others.find(Pred) != others.end()) { + criticalBBs[Pred].push_back(BB); + break; + } worklist.push_back(Pred); } } @@ -402,7 +413,8 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor // go through instructions, handle additional callees bool willReturn = true; bool added = false; - if (true /*PropagateThroughReturnEdgees*/) { + // do not PropagateThroughReturnEdgees when computing unreachable BBs + if (true /*PropagateThroughReturnEdgees*/ && others.empty()) { // always propagate reachability through return edges auto hasCalls = BBswithCalls.find(CBB); assert(hasCalls != BBswithCalls.end()); @@ -438,11 +450,18 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor if (added) break; // one callsite at a time } } - } + } // end of PropagateThroughReturnEdgees + if (willReturn && !added) { // if all callsites have been processed, add the CBB RA_DEBUG("\tadding caller: " << CI->getFunction()->getName() << "\n"); if (reachable.insert(CBB).second) { + // if the caller BB CBB is reachable to the target + // do not propagate unreachable BB through this call sites + if (others.find(CBB) != others.end()) { + criticalBBs[CBB].push_back(BB); + continue; + } worklist.push_back(CBB); } } @@ -477,17 +496,8 @@ void ReachableCallGraphPass::run(ModuleList &modules) { return; } - // do a BFS search on the call graph to find BB that can reach exits + // do a BFS search on the target list, find all reachable BBs first std::deque worklist; - RA_DEBUG("\n\n=== Collecting exit BBs ===\n\n"); - worklist.insert(worklist.end(), exitBBs.begin(), exitBBs.end()); - callDepth.clear(); - for (auto *BB : exitBBs) { - callDepth[BB] = 0; - } - collectReachable(worklist, exitBBs); - - // now do a BFS search on the target list, find all reachable BBs first RA_LOG("\n\n=== Collecting reachable BBs ===\n\n"); callDepth.clear(); for (const auto &kv : distances) { @@ -496,6 +506,16 @@ void ReachableCallGraphPass::run(ModuleList &modules) { } collectReachable(worklist, reachableBBs); + // do a BFS search on the call graph to find BB that can reach exits + worklist.clear(); + RA_DEBUG("\n\n=== Collecting exit BBs ===\n\n"); + callDepth.clear(); + for (auto *BB : exitBBs) { + callDepth[BB] = 0; + worklist.push_back(BB); + } + collectReachable(worklist, exitBBs, reachableBBs); + // check if target is reachable bool reached = false; for (auto &entry : entryBBs) { @@ -1051,6 +1071,9 @@ void ReachableCallGraphPass::dumpIDMapping(ModuleList &modules, std::ostream &bb unsigned minLine = std::numeric_limits::max(); unsigned maxLine = 0; std::string filepath; + if (F.isDeclaration() || F.empty() || F.isIntrinsic()) { + continue; // skip declaration and intrinsic + } for (auto &BB : F) { unsigned line = 0; @@ -1074,6 +1097,15 @@ void ReachableCallGraphPass::dumpIDMapping(ModuleList &modules, std::ostream &bb } } +void ReachableCallGraphPass::dumpCriticalBBs(std::ostream &OS) { + for (auto const &[BB, exits] : criticalBBs) { + OS << BBIDs[BB]; + for (auto *exitBB : exits) + OS << "," << BBIDs[exitBB]; + OS << "\n"; + } +} + bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string suffix) { ModuleList::iterator i, e; double max_dist = INFINITY; diff --git a/src/lib/Reachable.h b/src/lib/Reachable.h index 2f753c4..7363f6d 100644 --- a/src/lib/Reachable.h +++ b/src/lib/Reachable.h @@ -42,6 +42,7 @@ class ReachableCallGraphPass { using CallSequence = std::vector; std::unordered_map BBswithCalls; std::unordered_map callDistances; + std::unordered_map> criticalBBs; std::unordered_set reachableIndirectCalls; public: @@ -54,11 +55,13 @@ class ReachableCallGraphPass { // simple bfs pass void collectReachable(std::deque &worklist, - std::unordered_set &reachable); + std::unordered_set &reachable, + const std::unordered_set &others = {}); // debug - void dumpDistance(std::ostream &OS, bool dumpSolution = false, bool dumpUnreachable = false); void dumpPolicy(std::ostream &OS); + void dumpCriticalBBs(std::ostream &OS); + void dumpDistance(std::ostream &OS, bool dumpSolution = false, bool dumpUnreachable = false); void dumpIDMapping(ModuleList &modules, std::ostream &bbLocs, std::ostream &funcInfo); bool annotateModules(ModuleList &modules, std::string suffix=".annotated.bc"); void dumpCallees(); From f0e47543fb3b7ef778bdabeba93238c2045b90e8 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Tue, 29 Jul 2025 07:00:54 +0000 Subject: [PATCH 22/50] disable __taint_trace_distance instrumentation, only annotate bbid --- src/lib/Reachable.cc | 45 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 68d2488..6fa1115 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -230,7 +230,7 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { } // treat any BB ending in llvm::UnreachableInst as an "exit" if (isa(BB.getTerminator())) { - RA_LOG("Non ret BB: " << BB.getName() << "\n"); + RA_LOG("Unreachable Inst BB: " << BBIDs[&BB] << "\n"); exitBBs.insert(&BB); } for (auto &i : BB) { @@ -383,7 +383,6 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor auto *F = BB->getParent(); if (BB == &F->getEntryBlock()) { if (entryBBs.find(BB) != entryBBs.end()) { - RA_LOG("Entry func " << F->getName() << " is reachable\n"); continue; } auto itr = Ctx->Callers.find(F); @@ -1108,23 +1107,23 @@ void ReachableCallGraphPass::dumpCriticalBBs(std::ostream &OS) { bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string suffix) { ModuleList::iterator i, e; - double max_dist = INFINITY; - if (!distances.empty()) { - max_dist = std::max_element(distances.begin(), distances.end(), - [](const std::pair &a, - const std::pair &b) { - return a.second < b.second; - })->second; - } + // double max_dist = INFINITY; + // if (!distances.empty()) { + // max_dist = std::max_element(distances.begin(), distances.end(), + // [](const std::pair &a, + // const std::pair &b) { + // return a.second < b.second; + // })->second; + // } for (i = modules.begin(), e = modules.end(); i != e; ++i) { Module *M = i->first; auto ModName = M->getName().str(); auto NewName = ModName + suffix; - auto VoidTy = Type::getVoidTy(M->getContext()); + // auto VoidTy = Type::getVoidTy(M->getContext()); auto Int64Ty = Type::getInt64Ty(M->getContext()); - FunctionCallee TraceDistanceFunc = M->getOrInsertFunction( - "__taint_trace_distance", VoidTy, Int64Ty, Int64Ty); + // FunctionCallee TraceDistanceFunc = M->getOrInsertFunction( + // "__taint_trace_distance", VoidTy, Int64Ty, Int64Ty); for (auto &F : *M) { if (F.isDeclaration() || F.empty() || F.isIntrinsic()) { continue; // skip declaration and intrinsic @@ -1142,16 +1141,16 @@ bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string su {ConstantAsMetadata::get(BBID)}); term->setMetadata("bbid", MD); // annotate reachable basic block with ID and distance - if (reachableBBs.count(&BB)) { - // check if we have a distance - auto itr = distances.find(&BB); - double dist = (itr != distances.end()) ? itr->second : max_dist; - dist *= 1000.0; - // instrument a call to trace distance - IRBuilder<> IRB(&*BB.getFirstInsertionPt()); - auto *Dist = ConstantInt::get(Int64Ty, (uint64_t)dist); - IRB.CreateCall(TraceDistanceFunc, {BBID, Dist})->setCannotMerge(); - } + // if (reachableBBs.count(&BB)) { + // // check if we have a distance + // auto itr = distances.find(&BB); + // double dist = (itr != distances.end()) ? itr->second : max_dist; + // dist *= 1000.0; + // // instrument a call to trace distance + // IRBuilder<> IRB(&*BB.getFirstInsertionPt()); + // auto *Dist = ConstantInt::get(Int64Ty, (uint64_t)dist); + // IRB.CreateCall(TraceDistanceFunc, {BBID, Dist})->setCannotMerge(); + // } } } // verify From cb12c80fbe7c09ae62865bbf0d45b3058616fa9e Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 30 Jul 2025 01:04:14 +0000 Subject: [PATCH 23/50] rewrite propagateThroughReturnEdgees --- src/lib/KAMain.cc | 2 +- src/lib/Reachable.cc | 273 +++++++++++++++++-------------------------- src/lib/Reachable.h | 13 ++- 3 files changed, 114 insertions(+), 174 deletions(-) diff --git a/src/lib/KAMain.cc b/src/lib/KAMain.cc index 06678c0..0524fbb 100644 --- a/src/lib/KAMain.cc +++ b/src/lib/KAMain.cc @@ -229,7 +229,7 @@ int main(int argc, char **argv) { } ReachableCallGraphPass RCGPass(&GlobalCtx, TargetList, EntryList, - UseTypeBasedCallGraph, false, CallStackLen); + UseTypeBasedCallGraph, CallStackLen); RCGPass.run(GlobalCtx.Modules); if (!DumpBidMapping.empty() && !DumpFuncInfo.empty()){ diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 6fa1115..a5dea5b 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -359,17 +359,100 @@ bool ReachableCallGraphPass::doFinalization(Module *M) { return false; } +void ReachableCallGraphPass::propagateThroughReturnEdgees( + std::unordered_set &reachable, + const BasicBlock* CBB) { + auto hasCalls = BBswithCalls.find(CBB); + if (hasCalls == BBswithCalls.end()) return; + + std::deque worklist; + unsigned currDepth = callDepth[CBB]; + if (currDepth >= maxCallStackDepth) { + RA_LOG("Max depth reached (" << maxCallStackDepth + << ") for BB " << BBIDs[CBB] << ", skipping propagation\n"); + return; // do not propagate beyond threshold + } + + CallSequence calls = hasCalls->second; + for (size_t i = calls.size(); i-- > 0; ) { + // find the current callsite and there are additional callees before it + const llvm::CallBase* CI = calls[i]; + // Look up callees for this call site with proper map/iterator logic + // Unified lookup of direct or type-based callees + const FuncSet *callees = nullptr; + if (auto it = Ctx->Callees.find(CI); it != Ctx->Callees.end()) { + callees = &it->second; + } else if (UseTypeBasedCallGraph) { + if (auto it2 = calleeByType.find(CI); it2 != calleeByType.end()) { + callees = &it2->second; + } + } + if (!callees) { + RA_DEBUG("No callee for " << *CI << "\n"); + continue; + } + + // Iterate over each callee function + for (auto *F : *callees) { + if (isExitFn(F->getName()) || F->doesNotReturn()) { + RA_DEBUG("DoesNotReturn: " << F->getName() << "\n"); + break; // no further propagation for no-return functions + } + // add exit block(s) as reachable + for (auto &TBB : *F) { + if (isa(TBB.getTerminator())) { + continue; // skip unreachable BBs + } + if (isa(TBB.getTerminator())) { + RA_LOG("Adding callee: " << F->getName() << "\n"); + if (reachable.find(&TBB) != reachable.end()) { + continue; // already added + } + callDepth[&TBB] = currDepth + 1; + if (reachable.insert(&TBB).second) { + worklist.push_back(&TBB); + } + } + } // end of BBs for this callee F + } // end of candidate callees for this callsite CI + } // end of all callsites for this CBB + + // only back propagate through predecessors to prevent fake call edges. + while (!worklist.empty()) { + const BasicBlock *BB = worklist.front(); + worklist.pop_front(); + RA_DEBUG("Propagating through: " << BBIDs[BB] << "\n"); + // add predecessors + for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + const BasicBlock *Pred = *PI; + if (reachable.find(Pred) != reachable.end()) { + continue; // already added + } + if (reachable.insert(Pred).second) { + RA_DEBUG("Adding Pred: " << BBIDs[Pred] << "\n"); + propagateThroughReturnEdgees(reachable, Pred); + } + } + } +} + void ReachableCallGraphPass::collectReachable(std::deque &worklist, std::unordered_set &reachable, const std::unordered_set &others) { while (!worklist.empty()) { auto *BB = worklist.front(); worklist.pop_front(); + // add callee when computing reachable BBs + bool isComputingReachable = others.empty(); + if (isComputingReachable) { + // do not PropagateThroughReturnEdgees when computing unreachable BBs + propagateThroughReturnEdgees(reachable, BB); + } // add predecessors for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { auto *Pred = *PI; if (reachable.insert(Pred).second) { - RA_DEBUG("Adding Pred: " << *Pred << "\n"); + RA_DEBUG("Adding Pred: " << BBIDs[Pred] << "\n"); // if the predecessor is reachable to the target // stop propagating unreachable BB through it if (others.find(Pred) != others.end()) { @@ -409,60 +492,16 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor continue; // do not propagate beyond threshold } callDepth[CBB] = newDepth; // record depth before enqueue - // go through instructions, handle additional callees - bool willReturn = true; - bool added = false; - // do not PropagateThroughReturnEdgees when computing unreachable BBs - if (true /*PropagateThroughReturnEdgees*/ && others.empty()) { - // always propagate reachability through return edges - auto hasCalls = BBswithCalls.find(CBB); - assert(hasCalls != BBswithCalls.end()); - auto calls = hasCalls->second; - for (auto i = calls.size() - 1; i > 0; --i) { - if (calls[i] == CI) { - // find the current callsite and there are additional callees before it - auto PCI = calls[i - 1]; - auto fitr = Ctx->Callees.find(PCI); - if (fitr == Ctx->Callees.end()) { - if (UseTypeBasedCallGraph) { - fitr = calleeByType.find(PCI); - } - } - // any callsite here is guaranteed to have a callee - for (auto F : fitr->second) { - if (F->doesNotReturn()) { - RA_DEBUG("DoesNotReturn: " << F->getName() << "\n"); - willReturn = false; - break; // not need to continue - } - // add exit block(s) as reachable - for (auto &TBB : *F) { - if (isa(TBB.getTerminator())) { - RA_LOG("Adding callee: " << F->getName() << "\n"); - if (reachable.insert(&TBB).second) { - worklist.push_back(&TBB); - added = true; - } - } - } - } - if (added) break; // one callsite at a time - } - } - } // end of PropagateThroughReturnEdgees - - if (willReturn && !added) { - // if all callsites have been processed, add the CBB - RA_DEBUG("\tadding caller: " << CI->getFunction()->getName() << "\n"); - if (reachable.insert(CBB).second) { - // if the caller BB CBB is reachable to the target - // do not propagate unreachable BB through this call sites - if (others.find(CBB) != others.end()) { - criticalBBs[CBB].push_back(BB); - continue; - } - worklist.push_back(CBB); + // if all callsites have been processed, add the CBB + RA_DEBUG("\tadding caller: " << CI->getFunction()->getName() << "\n"); + if (reachable.insert(CBB).second) { + // if the caller BB CBB is reachable to the target + // do not propagate unreachable BB through this call sites + if (others.find(CBB) != others.end()) { + criticalBBs[CBB].push_back(BB); + continue; } + worklist.push_back(CBB); } } // end of callers } // end of entry block @@ -506,8 +545,8 @@ void ReachableCallGraphPass::run(ModuleList &modules) { collectReachable(worklist, reachableBBs); // do a BFS search on the call graph to find BB that can reach exits - worklist.clear(); RA_DEBUG("\n\n=== Collecting exit BBs ===\n\n"); + worklist.clear(); callDepth.clear(); for (auto *BB : exitBBs) { callDepth[BB] = 0; @@ -547,99 +586,6 @@ void ReachableCallGraphPass::run(ModuleList &modules) { if (currDepth >= maxCallStackDepth) { continue; // do not propagate beyond threshold } - if (PropagateThroughReturnEdgees) { - // go through instructions, looking for calls - auto hasCalls = BBswithCalls.find(BB); - if (hasCalls != BBswithCalls.end()) { - auto &calls = hasCalls->second; - bool finished = false; - const CallBase *propagate = nullptr; - double dist = NAN; - for (auto i = calls.size() - 1;; --i) { - // iterate through all callsites, in reverse order - auto *CI = calls[i]; - if (queuedCalls.find(CI) != queuedCalls.end()) { - // if the reachability comes from the callee - RA_DEBUG("Find current callsite: " << *CI << "\n"); - queuedCalls.erase(CI); - if (i > 0) { - // there are additional callees before the current callsite - // we need to propagate the reachability to them - RA_DEBUG("Propagate to additional callees\n"); - // get the distance of current callsite - auto itr = callDistances.find(CI); - assert(itr != callDistances.end()); - dist = itr->second; - // record the callsite to be propagated to - propagate = calls[i - 1]; - } else { - // all callees have been processed - finished = true; - } - break; // always break if coming from callee - } - if (i == 0) break; - } - if (!finished) { - // if not finished, we either have more callsite(s) to process, - // or the reachability is coming from the successor, - if (propagate == nullptr) { - // in the later case, we want to propagate BB distance to the last callsite - RA_DEBUG("Propagate BB distance to last callsite\n"); - propagate = calls.back(); - dist = distances[BB]; - } - // propagate to return sites in the callee - auto fitr = Ctx->Callees.find(propagate); - if (fitr == Ctx->Callees.end()) { - if (UseTypeBasedCallGraph) { - fitr = calleeByType.find(propagate); - } - } - bool added = false; - // any callsite here is guaranteed to have a callee - for (auto F : fitr->second) { - // add exit block(s) as reachable - for (auto &TBB : *F) { - if (isa(TBB.getTerminator())) { - auto itr = distances.find(&TBB); - if (itr == distances.end() || itr->second > dist) { - RA_DEBUG("Propagate distance " << dist << " to callee: " << F->getName() << "\n"); - distances[&TBB] = dist; - if (queued.insert(&TBB).second) { - callDepth[&TBB] = currDepth + 1; - worklist.push_back(&TBB); - } - added = true; - } - } - } - } - if (added) { - // if we have propagated reachability to a callee, it will come back, - // so we don't need to propagate to predecessors for now - continue; - } else { - // there is another callsite but no propagation is needed - // simulate the propagation by adding the callsite to the queue - queuedCalls.insert(propagate); - if (queued.insert(BB).second){ - callDepth[BB] = currDepth + 1; - worklist.push_back(BB); - } - continue; - } - } else { - // if all callsites have been processed, use the distance of the first - // callsite as the distance of the BB - RA_DEBUG("All callees processed\n"); - auto itr = callDistances.find(calls.front()); - assert(itr != callDistances.end()); - dist = itr->second; - distances[BB] = dist; - } - } - } // check predecessors for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { auto *Pred = *PI; @@ -718,12 +664,7 @@ void ReachableCallGraphPass::run(ModuleList &modules) { auto itr2 = callDistances.find(CI); if (itr2 == callDistances.end() || itr2->second > dist) { RA_DEBUG("Adding direct caller: " << CI->getFunction()->getName() << "\n"); - if (PropagateThroughReturnEdgees) { - callDistances[CI] = dist; - queuedCalls.insert(CI); - } else { - distances[CBB] = dist; - } + distances[CBB] = dist; if (queued.insert(CBB).second){ callDepth[CBB] = currDepth + 1; worklist.push_back(CBB); @@ -767,12 +708,7 @@ void ReachableCallGraphPass::run(ModuleList &modules) { auto itr2 = callDistances.find(CI); if (itr2 == callDistances.end() || itr2->second > dist) { RA_DEBUG("Adding indirect caller: " << CI->getFunction()->getName() << "\n"); - if (PropagateThroughReturnEdgees) { - callDistances[CI] = dist; - queuedCalls.insert(CI); - } else { distances[CBB] = dist; - } if (queued.insert(CBB).second){ callDepth[CBB] = currDepth + 1; worklist.push_back(CBB); @@ -788,13 +724,16 @@ void ReachableCallGraphPass::run(ModuleList &modules) { } } -ReachableCallGraphPass::ReachableCallGraphPass(GlobalContext *Ctx_, - std::string &TargetList, std::string &EntryList, bool typeBased, - bool propagateRet, unsigned CallStackLen) - : Ctx(Ctx_), UseTypeBasedCallGraph(typeBased), - PropagateThroughReturnEdgees(propagateRet), - nextBBID(1000), - maxCallStackDepth(CallStackLen) { +ReachableCallGraphPass::ReachableCallGraphPass( + GlobalContext *Ctx_, + std::string &TargetList, + std::string &EntryList, + bool typeBased, + unsigned CallStackLen) + : Ctx(Ctx_), + UseTypeBasedCallGraph(typeBased), + nextBBID(1000), + maxCallStackDepth(CallStackLen) { // parse target list // format: filename:line_number if (!TargetList.empty()) { @@ -1129,7 +1068,7 @@ bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string su continue; // skip declaration and intrinsic } for (auto &BB : F) { - if (isa(BB.getFirstNonPHIOrDbgOrLifetime())) + if (isa(BB.getTerminator())) continue; // skip unreachable BBs if (BB.getFirstInsertionPt() == BB.end()) continue; // skip empty BBs diff --git a/src/lib/Reachable.h b/src/lib/Reachable.h index 7363f6d..13f6fcf 100644 --- a/src/lib/Reachable.h +++ b/src/lib/Reachable.h @@ -25,7 +25,6 @@ class ReachableCallGraphPass { CallerMap callerByType; const bool UseTypeBasedCallGraph; - const bool PropagateThroughReturnEdgees; std::unordered_map BBIDs; uint64_t nextBBID; @@ -48,15 +47,17 @@ class ReachableCallGraphPass { public: ReachableCallGraphPass(GlobalContext *Ctx_, std::string &TargetList, std::string &EntryList, bool typeBased = true, - bool propagateRet = false, unsigned CallStackLen = 10); + unsigned CallStackLen = 10); virtual bool doInitialization(llvm::Module *); virtual bool doFinalization(llvm::Module *); virtual void run(ModuleList &modules); - // simple bfs pass - void collectReachable(std::deque &worklist, - std::unordered_set &reachable, - const std::unordered_set &others = {}); + // BFS pass + void collectReachable(std::deque &worklist, + std::unordered_set &reachable, + const std::unordered_set &others = {}); + void propagateThroughReturnEdgees(std::unordered_set &reachable, + const BasicBlock *CBB); // debug void dumpPolicy(std::ostream &OS); From e7ac35b337633b00f3151f3dbf811ceacda0953a Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 30 Jul 2025 02:19:59 +0000 Subject: [PATCH 24/50] do not readd reachable BB --- src/lib/Reachable.cc | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index a5dea5b..075ceb8 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -431,9 +431,10 @@ void ReachableCallGraphPass::propagateThroughReturnEdgees( if (reachable.insert(Pred).second) { RA_DEBUG("Adding Pred: " << BBIDs[Pred] << "\n"); propagateThroughReturnEdgees(reachable, Pred); + worklist.push_back(Pred); } - } - } + } // end of processing predecessors of this BB + } // end of propagation through predecessors } void ReachableCallGraphPass::collectReachable(std::deque &worklist, @@ -450,7 +451,10 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor } // add predecessors for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { - auto *Pred = *PI; + const BasicBlock *Pred = *PI; + if (reachable.find(Pred) != reachable.end()) { + continue; // already added + } if (reachable.insert(Pred).second) { RA_DEBUG("Adding Pred: " << BBIDs[Pred] << "\n"); // if the predecessor is reachable to the target @@ -491,7 +495,9 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor << ") for function " << F->getName() << ", skipping caller\n"); continue; // do not propagate beyond threshold } - callDepth[CBB] = newDepth; // record depth before enqueue + if (reachable.find(CBB) != reachable.end()) { + continue; // already added + } // if all callsites have been processed, add the CBB RA_DEBUG("\tadding caller: " << CI->getFunction()->getName() << "\n"); if (reachable.insert(CBB).second) { @@ -501,6 +507,7 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor criticalBBs[CBB].push_back(BB); continue; } + callDepth[CBB] = newDepth; // record depth before enqueue worklist.push_back(CBB); } } // end of callers From 84a2dd5ca2dfcbe5423e7ac40ca856178f399dcf Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 30 Jul 2025 02:32:32 +0000 Subject: [PATCH 25/50] add author info --- src/lib/KAMain.cc | 1 + src/lib/Reachable.cc | 1 + 2 files changed, 2 insertions(+) diff --git a/src/lib/KAMain.cc b/src/lib/KAMain.cc index 0524fbb..a15527f 100644 --- a/src/lib/KAMain.cc +++ b/src/lib/KAMain.cc @@ -5,6 +5,7 @@ * Copyright (C) 2015 Byoungyoung Lee * Copyright (C) 2016 Kangjie Lu * Copyright (C) 2015 - 2024 Chengyu Song + * Copyrigth (C) 2024 - 2025 Haochen Zeng * * For licensing details see LICENSE */ diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 075ceb8..5ea63bc 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -2,6 +2,7 @@ * Reachability-based Call Graph Analysis * * Copyrigth (C) 2024 - 2025 Chengyu Song + * Copyrigth (C) 2024 - 2025 Haochen Zeng * * For licensing details see LICENSE */ From 3365902f4bc9d2ea19f182721a93cca80095cf1b Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 30 Jul 2025 02:33:27 +0000 Subject: [PATCH 26/50] add png_error to isExitFn list --- src/lib/Annotation.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib/Annotation.cc b/src/lib/Annotation.cc index 3a63335..3e97c36 100644 --- a/src/lib/Annotation.cc +++ b/src/lib/Annotation.cc @@ -254,6 +254,7 @@ bool isExitFn(StringRef name) { name.equals("terminate") || name.equals("abort") || name.equals("panic") || + name.equals("png_error") || name.equals("BUG") || name.equals("BUG_ON")) return true; From 3dfc4f0476257842e00860f64a4d66ccdfca1ad2 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 30 Jul 2025 06:02:10 +0000 Subject: [PATCH 27/50] dump unreachable BBs --- src/lib/KAMain.cc | 2 +- src/lib/Reachable.cc | 76 +++++++++++++++++++++++--------------------- src/lib/Reachable.h | 3 +- 3 files changed, 42 insertions(+), 39 deletions(-) diff --git a/src/lib/KAMain.cc b/src/lib/KAMain.cc index a15527f..2ea5141 100644 --- a/src/lib/KAMain.cc +++ b/src/lib/KAMain.cc @@ -244,7 +244,7 @@ int main(int argc, char **argv) { } if (!DumpDistance.empty()) { std::ofstream distance(DumpDistance); - RCGPass.dumpDistance(distance, true, false); + RCGPass.dumpDistance(distance, true); } if (!DumpAnnotatedIR.empty()) { RCGPass.annotateModules(GlobalCtx.Modules, DumpAnnotatedIR); diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 5ea63bc..6bfbe3b 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -170,7 +170,7 @@ bool ReachableCallGraphPass::isCompatibleType(Type *T1, Type *T2) { bool ReachableCallGraphPass::findCalleesByType(CallBase *CB, FuncSet &FS) { bool Changed = false; - RA_LOG("Handle indirect call: " << *CB << "\n"); + RA_DEBUG("Handle indirect call: " << *CB << "\n"); for (const Function *F : Ctx->AddressTakenFuncs) { // just compare known args if (F->getFunctionType()->isVarArg()) { @@ -231,7 +231,7 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { } // treat any BB ending in llvm::UnreachableInst as an "exit" if (isa(BB.getTerminator())) { - RA_LOG("Unreachable Inst BB: " << BBIDs[&BB] << "\n"); + RA_DEBUG("Unreachable Inst BB: " << BBIDs[&BB] << "\n"); exitBBs.insert(&BB); } for (auto &i : BB) { @@ -246,7 +246,7 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { Changed |= Ctx->Callers[RCF].insert(CI).second; // check for call to exit functions if (isExitFn(RCF->getName()) || CF->doesNotReturn()) { - RA_LOG("Exit Call: " << *CI << "\n"); + RA_DEBUG("Exit Call: " << *CI << "\n"); exitBBs.insert(CI->getParent()); } } else if (!CI->isInlineAsm()) { @@ -367,7 +367,7 @@ void ReachableCallGraphPass::propagateThroughReturnEdgees( if (hasCalls == BBswithCalls.end()) return; std::deque worklist; - unsigned currDepth = callDepth[CBB]; + unsigned currDepth = retDepth[CBB]; if (currDepth >= maxCallStackDepth) { RA_LOG("Max depth reached (" << maxCallStackDepth << ") for BB " << BBIDs[CBB] << ", skipping propagation\n"); @@ -399,41 +399,40 @@ void ReachableCallGraphPass::propagateThroughReturnEdgees( RA_DEBUG("DoesNotReturn: " << F->getName() << "\n"); break; // no further propagation for no-return functions } + RA_LOG(F->getName() << " is reachable through ret edge to the targets\n"); // add exit block(s) as reachable for (auto &TBB : *F) { if (isa(TBB.getTerminator())) { continue; // skip unreachable BBs } if (isa(TBB.getTerminator())) { - RA_LOG("Adding callee: " << F->getName() << "\n"); + RA_DEBUG("Adding callee: " << F->getName() << "\n"); if (reachable.find(&TBB) != reachable.end()) { continue; // already added } - callDepth[&TBB] = currDepth + 1; - if (reachable.insert(&TBB).second) { - worklist.push_back(&TBB); - } + retDepth[&TBB] = currDepth + 1; + worklist.push_back(&TBB); } } // end of BBs for this callee F } // end of candidate callees for this callsite CI } // end of all callsites for this CBB - // only back propagate through predecessors to prevent fake call edges. + // BFS back propagate through predecessors + // Do not add callers to prevent fake call edges. while (!worklist.empty()) { const BasicBlock *BB = worklist.front(); worklist.pop_front(); - RA_DEBUG("Propagating through: " << BBIDs[BB] << "\n"); - // add predecessors + reachable.insert(BB); + // process the current BBs + propagateThroughReturnEdgees(reachable, BB); + RA_DEBUG("Propagating Ret BB through: " << BBIDs[BB] << "\n"); + // add its predecessors for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { const BasicBlock *Pred = *PI; if (reachable.find(Pred) != reachable.end()) { continue; // already added } - if (reachable.insert(Pred).second) { - RA_DEBUG("Adding Pred: " << BBIDs[Pred] << "\n"); - propagateThroughReturnEdgees(reachable, Pred); - worklist.push_back(Pred); - } + worklist.push_back(Pred); } // end of processing predecessors of this BB } // end of propagation through predecessors } @@ -441,11 +440,11 @@ void ReachableCallGraphPass::propagateThroughReturnEdgees( void ReachableCallGraphPass::collectReachable(std::deque &worklist, std::unordered_set &reachable, const std::unordered_set &others) { + bool isComputingReachable = others.empty(); while (!worklist.empty()) { auto *BB = worklist.front(); worklist.pop_front(); // add callee when computing reachable BBs - bool isComputingReachable = others.empty(); if (isComputingReachable) { // do not PropagateThroughReturnEdgees when computing unreachable BBs propagateThroughReturnEdgees(reachable, BB); @@ -453,17 +452,16 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor // add predecessors for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { const BasicBlock *Pred = *PI; - if (reachable.find(Pred) != reachable.end()) { - continue; // already added + // if the predecessor is reachable to the target + // stop propagating unreachable BB through it + if (others.find(Pred) != others.end()) { + criticalBBs[Pred].push_back(BB); + continue; } - if (reachable.insert(Pred).second) { - RA_DEBUG("Adding Pred: " << BBIDs[Pred] << "\n"); - // if the predecessor is reachable to the target - // stop propagating unreachable BB through it - if (others.find(Pred) != others.end()) { - criticalBBs[Pred].push_back(BB); - break; - } + if (reachable.find(Pred) != reachable.end()) { + continue; // already added + } else if(reachable.insert(Pred).second) { + RA_DEBUG("Adding " << BBIDs[BB] << "'s Pred: " << BBIDs[Pred] << "\n"); worklist.push_back(Pred); } } @@ -486,7 +484,11 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor } } - RA_DEBUG(F->getName() << " is reachable\n"); + if (isComputingReachable) { + RA_LOG(F->getName() << " is reachable through call edge to the targets\n"); + }else { + RA_LOG(F->getName() << " is reachable to the exit\n"); + } unsigned currDepth = callDepth[BB]; for (auto *CI : itr->second) { auto *CBB = CI->getParent(); @@ -546,18 +548,18 @@ void ReachableCallGraphPass::run(ModuleList &modules) { std::deque worklist; RA_LOG("\n\n=== Collecting reachable BBs ===\n\n"); callDepth.clear(); + retDepth.clear(); for (const auto &kv : distances) { - callDepth[kv.first] = 0; worklist.push_back(kv.first); } collectReachable(worklist, reachableBBs); // do a BFS search on the call graph to find BB that can reach exits - RA_DEBUG("\n\n=== Collecting exit BBs ===\n\n"); + RA_LOG("\n\n=== Collecting exit BBs ===\n\n"); worklist.clear(); callDepth.clear(); + retDepth.clear(); for (auto *BB : exitBBs) { - callDepth[BB] = 0; worklist.push_back(BB); } collectReachable(worklist, exitBBs, reachableBBs); @@ -637,7 +639,6 @@ void ReachableCallGraphPass::run(ModuleList &modules) { auto *F = BB->getParent(); if (BB == &F->getEntryBlock()) { if (entryBBs.find(BB) != entryBBs.end()) { - RA_LOG("Entry func " << F->getName() << " is reachable\n"); // break; continue; } @@ -884,7 +885,7 @@ void getDebugLocationFullPath(const BasicBlock &BB, } } -void ReachableCallGraphPass::dumpDistance(std::ostream &OS, bool dumpSolution, bool dumpUnreachable) { +void ReachableCallGraphPass::dumpDistance(std::ostream &OS, bool dumpUnreachable) { // Set precision for output OS << std::fixed << std::setprecision(6); @@ -910,9 +911,10 @@ void ReachableCallGraphPass::dumpDistance(std::ostream &OS, bool dumpSolution, b // If dumpUnreachable is enabled, output unreachable basic blocks if (dumpUnreachable) { for (const auto *BB : exitBBs) { - if (distances.find(BB) == distances.end()) { - OS << BBIDs[BB] << "," << getBasicBlockId(BB) << "," << getSourceLocation(BB) << ",-1\n"; - } + OS << BBIDs[BB] << "," + << getBasicBlockId(BB) << "," + << getSourceLocation(BB) + << ",-1\n"; } } diff --git a/src/lib/Reachable.h b/src/lib/Reachable.h index 13f6fcf..21c3aa7 100644 --- a/src/lib/Reachable.h +++ b/src/lib/Reachable.h @@ -31,6 +31,7 @@ class ReachableCallGraphPass { // Maximum call stack depth to propagate across callers const unsigned maxCallStackDepth; std::unordered_map callDepth; + std::unordered_map retDepth; std::vector > targetList; std::vector entryList; @@ -62,7 +63,7 @@ class ReachableCallGraphPass { // debug void dumpPolicy(std::ostream &OS); void dumpCriticalBBs(std::ostream &OS); - void dumpDistance(std::ostream &OS, bool dumpSolution = false, bool dumpUnreachable = false); + void dumpDistance(std::ostream &OS, bool dumpUnreachable = false); void dumpIDMapping(ModuleList &modules, std::ostream &bbLocs, std::ostream &funcInfo); bool annotateModules(ModuleList &modules, std::string suffix=".annotated.bc"); void dumpCallees(); From 9c8cd6c2cc6ba1c0dece91c6aafdf421e2611532 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 30 Jul 2025 06:44:55 +0000 Subject: [PATCH 28/50] python script to verify critical branch result --- src/tools/verify_critical_BBs.py | 80 ++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 src/tools/verify_critical_BBs.py diff --git a/src/tools/verify_critical_BBs.py b/src/tools/verify_critical_BBs.py new file mode 100644 index 0000000..3de89d4 --- /dev/null +++ b/src/tools/verify_critical_BBs.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +""" + Reachability-based Call Graph Analysis + + Copyrigth (C) 2024 - 2025 Haochen Zeng + + For licensing details see LICENSE +""" + +import os +import sys + +# Path to your bid -> location mapping +mapping_file = 'bid_loc_mapping.txt' +bid_file = "critical_BBs.txt" + +def get_bids(): + bids = [] + lines = [] + with open(bid_file) as fd: + lines = fd.readlines() + for l in lines: + items = l.split(',') + assert len(items) >= 2 + bids.append(int(items[0].strip())) + return bids + +def load_mappings(path): + """Read mapping_file into a dict: bid → (filepath, line_no).""" + m = {} + with open(path, 'r') as f: + for raw in f: + line = raw.strip() + if not line: + continue + # split only into 4 parts so the filepath:linenumber stays together + bid, _, _, loc = line.split(',', 3) + if ':' not in loc: + continue + file_path, lineno = loc.rsplit(':', 1) + try: + lineno = int(lineno) + except ValueError: + continue + m[int(bid)] = (file_path, lineno) + return m + +def show_context(file_path, line_no): + """Print the line_no ± ctx lines from file_path.""" + if not os.path.exists(file_path): + print(f"[ERROR] File not found: {file_path}", file=sys.stderr) + return + with open(file_path, 'r') as f: + lines = f.readlines() + # zero-based indices + idx = line_no - 1 + start = max(0, idx - 2) + end = min(len(lines), idx + 10 + 1) + + print(f"\n--- BID context: {os.path.basename(file_path)}:{line_no} ---") + print("```") + for i in range(start, end): + prefix = "=> " if i == idx else " " + # Pad line numbers for readability + print(f"{prefix}{i+1:4d}: {lines[i].rstrip()}") + print("```") + +def main(): + # load all mappings at once + mappings = load_mappings(mapping_file) + critical_bids = get_bids() + for bid in critical_bids: + if bid not in mappings: + print(f"[WARN] No mapping found for bid {bid}", file=sys.stderr) + continue + filepath, lineno = mappings[bid] + show_context(filepath, lineno) + +if __name__ == '__main__': + main() From a56bd7767bd733d050f7849f84d3289be4bc6949 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 30 Jul 2025 08:08:35 +0000 Subject: [PATCH 29/50] instrument __taint_trace_divergence callback for detecting divergent point --- src/lib/Reachable.cc | 60 ++++++++++++++++++++++++++------------------ src/lib/Reachable.h | 4 +++ 2 files changed, 40 insertions(+), 24 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 6bfbe3b..c5b4d0d 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -817,7 +817,7 @@ std::string ReachableCallGraphPass::getSourceLocation(const BasicBlock *BB) { /// 3) Builds an absolute, normalized path (resolving "." and "..") /// 4) Skips if the path is empty, line=0, or the path starts with "/usr/" /// 5) Returns the first valid debug info found -void getDebugLocationFullPath(const BasicBlock &BB, +void ReachableCallGraphPass::getDebugLocationFullPath(const BasicBlock &BB, std::string &Filename, unsigned &Line, unsigned &Col) { @@ -886,27 +886,25 @@ void getDebugLocationFullPath(const BasicBlock &BB, } void ReachableCallGraphPass::dumpDistance(std::ostream &OS, bool dumpUnreachable) { - // Set precision for output - OS << std::fixed << std::setprecision(6); - - // Copy and sort distances by ascending value - std::vector> sorted; - sorted.reserve(distances.size()); - for (const auto &entry : distances) { - sorted.emplace_back(entry.first, entry.second); - } - std::sort(sorted.begin(), sorted.end(), - [](const auto &a, const auto &b) { return a.second < b.second; }); - - // Output sorted distance entries - for (const auto &pair : sorted) { - const BasicBlock *BB = pair.first; - double dist = pair.second; - OS << BBIDs[BB] << "," - << getBasicBlockId(BB) << "," - << getSourceLocation(BB) << "," - << (dist * 1000) << "\n"; - } + // Set precision for output + OS << std::fixed << std::setprecision(6); + // Copy and sort distances by ascending value + std::vector> sorted; + sorted.reserve(distances.size()); + for (const auto &entry : distances) { + sorted.emplace_back(entry.first, entry.second); + } + std::sort(sorted.begin(), sorted.end(), + [](const auto &a, const auto &b) { return a.second < b.second; }); + // Output sorted distance entries + for (const auto &pair : sorted) { + const BasicBlock *BB = pair.first; + double dist = pair.second; + OS << BBIDs[BB] << "," + << getBasicBlockId(BB) << "," + << getSourceLocation(BB) << "," + << (dist * 1000) << "\n"; + } // If dumpUnreachable is enabled, output unreachable basic blocks if (dumpUnreachable) { @@ -930,7 +928,6 @@ void ReachableCallGraphPass::dumpDistance(std::ostream &OS, bool dumpUnreachable } void ReachableCallGraphPass::dumpPolicy(std::ostream &OS) { - // set precision OS << std::fixed << std::setprecision(6); @@ -1055,6 +1052,12 @@ void ReachableCallGraphPass::dumpCriticalBBs(std::ostream &OS) { } bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string suffix) { + std::unordered_set inverseCriticalBBs; + for (const auto &[k,v] : criticalBBs) { + for (const auto *exitBB : v) { + inverseCriticalBBs.insert(exitBB); + } + } ModuleList::iterator i, e; // double max_dist = INFINITY; // if (!distances.empty()) { @@ -1069,10 +1072,12 @@ bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string su Module *M = i->first; auto ModName = M->getName().str(); auto NewName = ModName + suffix; - // auto VoidTy = Type::getVoidTy(M->getContext()); + auto VoidTy = Type::getVoidTy(M->getContext()); auto Int64Ty = Type::getInt64Ty(M->getContext()); // FunctionCallee TraceDistanceFunc = M->getOrInsertFunction( // "__taint_trace_distance", VoidTy, Int64Ty, Int64Ty); + FunctionCallee TraceFunc = M->getOrInsertFunction( + "__taint_trace_divergence", VoidTy, Int64Ty); for (auto &F : *M) { if (F.isDeclaration() || F.empty() || F.isIntrinsic()) { continue; // skip declaration and intrinsic @@ -1089,6 +1094,13 @@ bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string su MDNode *MD = MDNode::get(M->getContext(), {ConstantAsMetadata::get(BBID)}); term->setMetadata("bbid", MD); + + // instrument __taint_trace_divergence callback + if (inverseCriticalBBs.count(&BB)) { + IRBuilder<> IRB(&*BB.getFirstInsertionPt()); + auto *CI = IRB.CreateCall(TraceFunc, {BBID}); + CI->setCannotMerge(); + } // annotate reachable basic block with ID and distance // if (reachableBBs.count(&BB)) { // // check if we have a distance diff --git a/src/lib/Reachable.h b/src/lib/Reachable.h index 21c3aa7..2a53753 100644 --- a/src/lib/Reachable.h +++ b/src/lib/Reachable.h @@ -18,6 +18,10 @@ class ReachableCallGraphPass { bool isCompatibleType(llvm::Type *T1, llvm::Type *T2); bool findCalleesByType(llvm::CallBase*, FuncSet&); std::string getSourceLocation(const llvm::BasicBlock *BB); + void getDebugLocationFullPath(const BasicBlock &BB, + std::string &Filename, + unsigned &Line, + unsigned &Col); GlobalContext *Ctx; From 6baa75d10bd31d1f51da6014a30a59d42d62f6c1 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Thu, 31 Jul 2025 22:08:15 +0000 Subject: [PATCH 30/50] set global flag has_reached_target to true once target has reached --- src/lib/Reachable.cc | 20 ++++++++++++++++++++ src/lib/Reachable.h | 1 + 2 files changed, 21 insertions(+) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index c5b4d0d..733b1b5 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -276,6 +276,7 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { if (f.find(target.first) != std::string::npos && loc.getLine() == target.second) { RA_LOG("Target I: " << *I << "\n"); distances[I->getParent()] = 0.0; + targetBBs.insert(I->getParent()); reachableBBs.insert(I->getParent()); } } @@ -1074,6 +1075,16 @@ bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string su auto NewName = ModName + suffix; auto VoidTy = Type::getVoidTy(M->getContext()); auto Int64Ty = Type::getInt64Ty(M->getContext()); + auto *BoolTy = Type::getInt1Ty(M->getContext()); + auto *TrueVal = ConstantInt::getTrue(BoolTy); + auto *FalseVal = ConstantInt::getFalse(BoolTy); + GlobalVariable *HasReachedTarget = cast( + M->getOrInsertGlobal("has_reached_target", BoolTy)); + HasReachedTarget->setLinkage(GlobalValue::LinkOnceODRLinkage); + HasReachedTarget->setComdat(M->getOrInsertComdat(HasReachedTarget->getName())); + if (!HasReachedTarget->hasInitializer()) + HasReachedTarget->setInitializer(FalseVal); + // FunctionCallee TraceDistanceFunc = M->getOrInsertFunction( // "__taint_trace_distance", VoidTy, Int64Ty, Int64Ty); FunctionCallee TraceFunc = M->getOrInsertFunction( @@ -1101,6 +1112,15 @@ bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string su auto *CI = IRB.CreateCall(TraceFunc, {BBID}); CI->setCannotMerge(); } + // Instrument code to set has_reached_target to true + for (const llvm::BasicBlock* tb : targetBBs) { + if (tb == &BB) { + IRBuilder<> IRB(BB.getTerminator()); + IRB.CreateStore(TrueVal, HasReachedTarget)->setMetadata( + M->getMDKindID("nosanitize"), MDNode::get(M->getContext(), None)); + break; + } + } // annotate reachable basic block with ID and distance // if (reachableBBs.count(&BB)) { // // check if we have a distance diff --git a/src/lib/Reachable.h b/src/lib/Reachable.h index 2a53753..ac0068d 100644 --- a/src/lib/Reachable.h +++ b/src/lib/Reachable.h @@ -39,6 +39,7 @@ class ReachableCallGraphPass { std::vector > targetList; std::vector entryList; + std::unordered_set targetBBs; std::unordered_set reachableBBs; std::unordered_map distances; std::unordered_set exitBBs; From 2225fee408486dde5099569b6a3d0ce7af2bca02 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Sat, 2 Aug 2025 06:23:44 +0000 Subject: [PATCH 31/50] more robust unreachable exitBBs detection --- src/lib/Reachable.cc | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 733b1b5..aec01d7 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -229,8 +229,9 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { nextBBID += SI->getNumCases(); } } - // treat any BB ending in llvm::UnreachableInst as an "exit" - if (isa(BB.getTerminator())) { + auto* TI = BB.getTerminator(); + // treat any BB ending in llvm::UnreachableInst and exception as an "exit" + if (isa(TI) || isa(TI)) { RA_DEBUG("Unreachable Inst BB: " << BBIDs[&BB] << "\n"); exitBBs.insert(&BB); } @@ -345,14 +346,38 @@ bool ReachableCallGraphPass::doInitialization(Module *M) { isEntry = (itr != entryList.end()); } if (isEntry) { + // Record entry block entryBBs.insert(&F.getEntryBlock()); + // Compute the maximum source line number for this function + unsigned maxLine = 0; for (auto &BB : F) { - if (isa(BB.getTerminator())) { - exitBBs.insert(&BB); + for (auto &I : BB) { + if (auto DL = I.getDebugLoc()) { + maxLine = std::max(maxLine, DL.getLine()); + } } } - } - } + // Seed exitBBs with normal exit terminators + for (auto &BB : F) { + auto *TI = BB.getTerminator(); + if (isa(TI) || isa(TI) || + isa(TI) || TI->getNumSuccessors() == 0) { + exitBBs.insert(&BB); + } + if (maxLine > 0) { + // Also include any BB whose debug line equals the function's last line + for (auto &I : BB) { + if (auto DL = I.getDebugLoc()) { + if (DL.getLine() == maxLine) { + exitBBs.insert(&BB); + break; + } + } + } + } + } // end of finding exitBBs + } // end of entry function processing + } // end of processing all functions in this Module return false; } @@ -554,6 +579,8 @@ void ReachableCallGraphPass::run(ModuleList &modules) { worklist.push_back(kv.first); } collectReachable(worklist, reachableBBs); + for (const auto *BB : exitBBs) + reachableBBs.erase(BB); // do a BFS search on the call graph to find BB that can reach exits RA_LOG("\n\n=== Collecting exit BBs ===\n\n"); From 6f1267a6af1652fc0d1de287d5c74b43d33f91c9 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Sat, 2 Aug 2025 06:42:08 +0000 Subject: [PATCH 32/50] skip any unreachable Preds when computing distances. --- src/lib/Reachable.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index aec01d7..0663c04 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -629,6 +629,10 @@ void ReachableCallGraphPass::run(ModuleList &modules) { auto *Pred = *PI; double numSucc = 0.0; double prob = 0.0; + if (reachableBBs.find(Pred) == reachableBBs.end()) { + RA_DEBUG("Skip unreachable Pred: " << *Pred << "\n"); + continue; + } for (auto SI = succ_begin(Pred), SE = succ_end(Pred); SI != SE; ++SI) { auto *Succ = *SI; numSucc += 1.0; From ef388a7b9d23c38b8f0ea74898fac839cef71cfa Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Sat, 2 Aug 2025 07:11:18 +0000 Subject: [PATCH 33/50] more friendly logs --- src/lib/Reachable.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 0663c04..00a835d 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -425,7 +425,10 @@ void ReachableCallGraphPass::propagateThroughReturnEdgees( RA_DEBUG("DoesNotReturn: " << F->getName() << "\n"); break; // no further propagation for no-return functions } - RA_LOG(F->getName() << " is reachable through ret edge to the targets\n"); + static std::unordered_set Seen; + if (Seen.insert(F).second) { + RA_LOG(F->getName() << " is reachable through ret edge to the targets\n"); + } // add exit block(s) as reachable for (auto &TBB : *F) { if (isa(TBB.getTerminator())) { @@ -648,7 +651,7 @@ void ReachableCallGraphPass::run(ModuleList &modules) { } prob /= numSucc; if (prob == 0.0) { - WARNING("prob dropped to 0 for basic block in " << BB->getParent()->getName() << "\n"); + WARNING("prob dropped to 0 for BB "<< getSourceLocation(BB) << " in " << BB->getParent()->getName() << "\n"); RA_DEBUG("\t " << *BB << "\n"); continue; } @@ -691,7 +694,10 @@ void ReachableCallGraphPass::run(ModuleList &modules) { } } // check callers - RA_LOG(F->getName() << " is reachable from " << itr->second.size() << " callers\n"); + static std::unordered_set Seen; + if (Seen.insert(F).second) { + RA_LOG(F->getName() << " is reachable from " << itr->second.size() << " callers\n"); + } auto dist = distances[BB]; for (auto CI : itr->second) { auto CBB = CI->getParent(); From 57ba1f979e19c0ffedaf407179d334504eebbe99 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Tue, 12 Aug 2025 04:08:04 +0000 Subject: [PATCH 34/50] Reachable: use iterative BFS with queued dedup in propagateThroughReturnEdgees to avoid missed cases and duplicate processing --- src/lib/Reachable.cc | 149 ++++++++++++++++++++++++------------------- 1 file changed, 84 insertions(+), 65 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 00a835d..b03f700 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -387,83 +387,102 @@ bool ReachableCallGraphPass::doFinalization(Module *M) { } void ReachableCallGraphPass::propagateThroughReturnEdgees( - std::unordered_set &reachable, - const BasicBlock* CBB) { - auto hasCalls = BBswithCalls.find(CBB); - if (hasCalls == BBswithCalls.end()) return; - + std::unordered_set &reachable, + const BasicBlock* startBB) { + // Pure iterative BFS to avoid recursion and ensure no cases are skipped. std::deque worklist; - unsigned currDepth = retDepth[CBB]; - if (currDepth >= maxCallStackDepth) { - RA_LOG("Max depth reached (" << maxCallStackDepth - << ") for BB " << BBIDs[CBB] << ", skipping propagation\n"); - return; // do not propagate beyond threshold + std::unordered_set queued; + + if (startBB == nullptr) { + return; } - CallSequence calls = hasCalls->second; - for (size_t i = calls.size(); i-- > 0; ) { - // find the current callsite and there are additional callees before it - const llvm::CallBase* CI = calls[i]; - // Look up callees for this call site with proper map/iterator logic - // Unified lookup of direct or type-based callees - const FuncSet *callees = nullptr; - if (auto it = Ctx->Callees.find(CI); it != Ctx->Callees.end()) { - callees = &it->second; - } else if (UseTypeBasedCallGraph) { - if (auto it2 = calleeByType.find(CI); it2 != calleeByType.end()) { - callees = &it2->second; - } - } - if (!callees) { - RA_DEBUG("No callee for " << *CI << "\n"); - continue; - } - - // Iterate over each callee function - for (auto *F : *callees) { - if (isExitFn(F->getName()) || F->doesNotReturn()) { - RA_DEBUG("DoesNotReturn: " << F->getName() << "\n"); - break; // no further propagation for no-return functions - } - static std::unordered_set Seen; - if (Seen.insert(F).second) { - RA_LOG(F->getName() << " is reachable through ret edge to the targets\n"); - } - // add exit block(s) as reachable - for (auto &TBB : *F) { - if (isa(TBB.getTerminator())) { - continue; // skip unreachable BBs - } - if (isa(TBB.getTerminator())) { - RA_DEBUG("Adding callee: " << F->getName() << "\n"); - if (reachable.find(&TBB) != reachable.end()) { - continue; // already added - } - retDepth[&TBB] = currDepth + 1; - worklist.push_back(&TBB); - } - } // end of BBs for this callee F - } // end of candidate callees for this callsite CI - } // end of all callsites for this CBB + // Seed + queued.insert(startBB); + worklist.push_back(startBB); - // BFS back propagate through predecessors // Do not add callers to prevent fake call edges. while (!worklist.empty()) { const BasicBlock *BB = worklist.front(); worklist.pop_front(); - reachable.insert(BB); - // process the current BBs - propagateThroughReturnEdgees(reachable, BB); + + // Mark reachable once dequeued to keep BFS semantics + if (!reachable.insert(BB).second) { + // Already processed + } + + // Check depth budget for return-edge expansion + unsigned currDepth = 0; + if (auto it = retDepth.find(BB); it != retDepth.end()) { + currDepth = it->second; + } + if (currDepth < maxCallStackDepth) { + // If this BB has interesting callsites, push callee return blocks + if (auto hasCalls = BBswithCalls.find(BB); hasCalls != BBswithCalls.end()) { + const CallSequence &calls = hasCalls->second; + for (size_t i = calls.size(); i-- > 0; ) { + const llvm::CallBase* CI = calls[i]; + + // Unified lookup of direct or type-based callees + const FuncSet *callees = nullptr; + if (auto it = Ctx->Callees.find(CI); it != Ctx->Callees.end()) { + callees = &it->second; + } else if (UseTypeBasedCallGraph) { + if (auto it2 = calleeByType.find(CI); it2 != calleeByType.end()) { + callees = &it2->second; + } + } + if (!callees) { + RA_DEBUG("No callee for " << *CI << "\n"); + continue; + } + + for (auto *F : *callees) { + if (isExitFn(F->getName()) || F->doesNotReturn()) { + RA_DEBUG("DoesNotReturn: " << F->getName() << "\n"); + break; // stop on no-return functions + } + static std::unordered_set Seen; + if (Seen.insert(F).second) { + RA_LOG(F->getName() << " is reachable through ret edge to the targets\n"); + } + for (auto &TBB : *F) { + if (isa(TBB.getTerminator())) { + continue; + } + if (isa(TBB.getTerminator())) { + if (!reachable.count(&TBB) && queued.insert(&TBB).second) { + RA_DEBUG("Adding callee: " << F->getName() << "\n"); + retDepth[&TBB] = currDepth + 1; + worklist.push_back(&TBB); + } + } + } + } + } + } + } else { + RA_LOG("Max depth reached (" << maxCallStackDepth + << ") for BB " << BBIDs[BB] << ", skipping propagation\n"); + } + RA_DEBUG("Propagating Ret BB through: " << BBIDs[BB] << "\n"); - // add its predecessors + + // Add CFG predecessors to continue backward propagation for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { const BasicBlock *Pred = *PI; - if (reachable.find(Pred) != reachable.end()) { - continue; // already added + if (reachable.count(Pred)) { + continue; // already processed } - worklist.push_back(Pred); - } // end of processing predecessors of this BB - } // end of propagation through predecessors + if (queued.insert(Pred).second) { + // keep same ret-depth across normal CFG edges + if (currDepth != 0) { + retDepth[Pred] = currDepth; + } + worklist.push_back(Pred); + } + } + } } void ReachableCallGraphPass::collectReachable(std::deque &worklist, From 05f16e991223906637e1ac368ebece527284f398 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Tue, 12 Aug 2025 04:21:04 +0000 Subject: [PATCH 35/50] Reachable: never treat entry blocks as exits; skip removing entry blocks from reachable when filtering exitBBs --- src/lib/Reachable.cc | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index b03f700..04b37b3 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -359,9 +359,14 @@ bool ReachableCallGraphPass::doInitialization(Module *M) { } // Seed exitBBs with normal exit terminators for (auto &BB : F) { + // Never treat the entry block as an exit block + if (&BB == &F.getEntryBlock()) { + continue; + } auto *TI = BB.getTerminator(); - if (isa(TI) || isa(TI) || - isa(TI) || TI->getNumSuccessors() == 0) { + if (isa(TI) + || isa(TI) + || isa(TI)) { exitBBs.insert(&BB); } if (maxLine > 0) { @@ -601,8 +606,13 @@ void ReachableCallGraphPass::run(ModuleList &modules) { worklist.push_back(kv.first); } collectReachable(worklist, reachableBBs); - for (const auto *BB : exitBBs) + // Remove exit blocks from reachable set, but never remove entry blocks + for (const auto *BB : exitBBs) { + if (entryBBs.find(BB) != entryBBs.end()) { + continue; + } reachableBBs.erase(BB); + } // do a BFS search on the call graph to find BB that can reach exits RA_LOG("\n\n=== Collecting exit BBs ===\n\n"); From fe24ea39fdf10438442867e144e86cb2e84761f6 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Tue, 12 Aug 2025 04:42:40 +0000 Subject: [PATCH 36/50] more logs --- src/lib/Reachable.cc | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 04b37b3..ffda599 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -348,6 +348,7 @@ bool ReachableCallGraphPass::doInitialization(Module *M) { if (isEntry) { // Record entry block entryBBs.insert(&F.getEntryBlock()); + RA_LOG("[init] Entry function detected: " << F.getName() << "\n"); // Compute the maximum source line number for this function unsigned maxLine = 0; for (auto &BB : F) { @@ -368,6 +369,7 @@ bool ReachableCallGraphPass::doInitialization(Module *M) { || isa(TI) || isa(TI)) { exitBBs.insert(&BB); + RA_LOG("[init] ExitByTerm added: " << F.getName() << " @ " << getSourceLocation(&BB) << "\n"); } if (maxLine > 0) { // Also include any BB whose debug line equals the function's last line @@ -375,6 +377,7 @@ bool ReachableCallGraphPass::doInitialization(Module *M) { if (auto DL = I.getDebugLoc()) { if (DL.getLine() == maxLine) { exitBBs.insert(&BB); + RA_LOG("[init] ExitByMaxLine added: " << F.getName() << " @ " << getSourceLocation(&BB) << " (maxLine=" << maxLine << ")\n"); break; } } @@ -501,6 +504,7 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor if (isComputingReachable) { // do not PropagateThroughReturnEdgees when computing unreachable BBs propagateThroughReturnEdgees(reachable, BB); + RA_LOG("[collectReachable] After ret-edge, reachable size=" << reachable.size() << ", current BB=" << BBIDs[BB] << " @ " << getSourceLocation(BB) << "\n"); } // add predecessors for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { @@ -532,7 +536,11 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor found = (itr != callerByType.end()); } if (!found) { - WARNING("No caller for " << F->getName() << "\n"); + static std::unordered_set WarnedNoCaller1; + if (WarnedNoCaller1.insert(F).second) { + std::string context_str = isComputingReachable ? "Reachable Analysis: " : "Unreachable Analysis: "; + WARNING(context_str << "No caller for " << F->getName() << "\n"); + } continue; } } @@ -596,6 +604,10 @@ void ReachableCallGraphPass::run(ModuleList &modules) { WARNING("No entry BBs found\n"); return; } + RA_LOG("[run] Num entry BBs: " << entryBBs.size() << "\n"); + for (auto *EBB : entryBBs) { + RA_LOG("[run] Entry BB: " << BBIDs[EBB] << " @ " << getSourceLocation(EBB) << " of function " << EBB->getParent()->getName() << "\n"); + } // do a BFS search on the target list, find all reachable BBs first std::deque worklist; @@ -606,12 +618,16 @@ void ReachableCallGraphPass::run(ModuleList &modules) { worklist.push_back(kv.first); } collectReachable(worklist, reachableBBs); + RA_LOG("[run] reachableBBs after target-backward: " << reachableBBs.size() << "\n"); // Remove exit blocks from reachable set, but never remove entry blocks for (const auto *BB : exitBBs) { if (entryBBs.find(BB) != entryBBs.end()) { + RA_LOG("[run] Skip removing entry from reachable: BB " << BBIDs[BB] << " @ " << getSourceLocation(BB) << "\n"); continue; } - reachableBBs.erase(BB); + if (reachableBBs.erase(BB)) { + RA_LOG("[run] Removed exit BB from reachable: " << BBIDs[BB] << " @ " << getSourceLocation(BB) << "\n"); + } } // do a BFS search on the call graph to find BB that can reach exits @@ -620,9 +636,11 @@ void ReachableCallGraphPass::run(ModuleList &modules) { callDepth.clear(); retDepth.clear(); for (auto *BB : exitBBs) { + RA_LOG("[run] Seed exit BB: " << BBIDs[BB] << " @ " << getSourceLocation(BB) << "\n"); worklist.push_back(BB); } collectReachable(worklist, exitBBs, reachableBBs); + RA_LOG("[run] exitBBs reachable to target size: " << exitBBs.size() << "\n"); // check if target is reachable bool reached = false; @@ -631,6 +649,9 @@ void ReachableCallGraphPass::run(ModuleList &modules) { RA_LOG("\n\n=== Target is reachable from entry ===\n\n"); reached = true; } + else { + RA_LOG("[run] Entry not in reachableBBs: " << BBIDs[entry] << " @ " << getSourceLocation(entry) << " func " << entry->getParent()->getName() << "\n"); + } } if (!reached) { @@ -652,6 +673,7 @@ void ReachableCallGraphPass::run(ModuleList &modules) { auto *BB = worklist.front(); worklist.pop_front(); queued.erase(BB); + RA_DEBUG("[distance] Pop BB: " << BBIDs[BB] << " @ " << getSourceLocation(BB) << ", depth=" << callDepth[BB] << "\n"); unsigned currDepth = callDepth[BB]; if (currDepth >= maxCallStackDepth) { continue; // do not propagate beyond threshold @@ -696,6 +718,7 @@ void ReachableCallGraphPass::run(ModuleList &modules) { if (queued.insert(Pred).second){ callDepth[Pred] = currDepth; worklist.push_back(Pred); + RA_DEBUG("[distance] Enqueue Pred: " << BBIDs[Pred] << " @ " << getSourceLocation(Pred) << ", dist=" << dist*1000 << "\n"); } } } @@ -714,10 +737,9 @@ void ReachableCallGraphPass::run(ModuleList &modules) { found = (itr != callerByType.end()); } if (!found) { - if (!F->getName().equals("main")) { + static std::unordered_set WarnedNoCaller2; + if (WarnedNoCaller2.insert(F).second) { WARNING("No caller for " << F->getName() << "\n"); - } else { - RA_LOG("main is reached\n"); } continue; } @@ -725,7 +747,7 @@ void ReachableCallGraphPass::run(ModuleList &modules) { // check callers static std::unordered_set Seen; if (Seen.insert(F).second) { - RA_LOG(F->getName() << " is reachable from " << itr->second.size() << " callers\n"); + RA_DEBUG(F->getName() << " is reachable from " << itr->second.size() << " callers\n"); } auto dist = distances[BB]; for (auto CI : itr->second) { @@ -744,6 +766,7 @@ void ReachableCallGraphPass::run(ModuleList &modules) { if (queued.insert(CBB).second){ callDepth[CBB] = currDepth + 1; worklist.push_back(CBB); + RA_DEBUG("[distance] Enqueue Caller CBB: " << BBIDs[CBB] << " @ " << getSourceLocation(CBB) << ", from F=" << F->getName() << "\n"); } } } else { @@ -751,7 +774,7 @@ void ReachableCallGraphPass::run(ModuleList &modules) { // for each call site, check if all its callees have been processed double prob = 0.0; FuncSet &Callees = UseTypeBasedCallGraph ? calleeByType[CI] : Ctx->Callees[CI]; - RA_LOG("\tfrom indirect call @" << CF->getName() << ", callee size = " << Callees.size() << "\n"); + RA_DEBUG("\tfrom indirect call @" << CF->getName() << ", callee size = " << Callees.size() << "\n"); // XXX: skip potentially imprecise callsites? if (Callees.size() > 50) { RA_DEBUG("Skip indirect call with too many callees\n"); From 410a08c907ef796ef3bc769f72378561913e6ef7 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Tue, 12 Aug 2025 04:51:31 +0000 Subject: [PATCH 37/50] Reachability: decouple return-edge propagation from CFG/caller BFS Make propagateThroughReturnEdgees collect ret-edge-only BBs without mutating main worklist/reachable Accumulate in collectReachable and merge after BFS; preserve retDepth budget --- src/lib/Reachable.cc | 146 ++++++++++++++++++++----------------------- src/lib/Reachable.h | 4 +- 2 files changed, 69 insertions(+), 81 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index ffda599..144db43 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -395,100 +395,80 @@ bool ReachableCallGraphPass::doFinalization(Module *M) { } void ReachableCallGraphPass::propagateThroughReturnEdgees( - std::unordered_set &reachable, + std::unordered_set &retReachable, const BasicBlock* startBB) { - // Pure iterative BFS to avoid recursion and ensure no cases are skipped. - std::deque worklist; - std::unordered_set queued; - + // Only collect BBs via return-edges. Do not touch the main worklist or callers. if (startBB == nullptr) { return; } - // Seed - queued.insert(startBB); - worklist.push_back(startBB); - - // Do not add callers to prevent fake call edges. - while (!worklist.empty()) { - const BasicBlock *BB = worklist.front(); - worklist.pop_front(); + std::deque local; + std::unordered_set visited; + visited.insert(startBB); + local.push_back(startBB); - // Mark reachable once dequeued to keep BFS semantics - if (!reachable.insert(BB).second) { - // Already processed - } + while (!local.empty()) { + const BasicBlock *BB = local.front(); + local.pop_front(); - // Check depth budget for return-edge expansion unsigned currDepth = 0; if (auto it = retDepth.find(BB); it != retDepth.end()) { currDepth = it->second; } - if (currDepth < maxCallStackDepth) { - // If this BB has interesting callsites, push callee return blocks - if (auto hasCalls = BBswithCalls.find(BB); hasCalls != BBswithCalls.end()) { - const CallSequence &calls = hasCalls->second; - for (size_t i = calls.size(); i-- > 0; ) { - const llvm::CallBase* CI = calls[i]; - - // Unified lookup of direct or type-based callees - const FuncSet *callees = nullptr; - if (auto it = Ctx->Callees.find(CI); it != Ctx->Callees.end()) { - callees = &it->second; - } else if (UseTypeBasedCallGraph) { - if (auto it2 = calleeByType.find(CI); it2 != calleeByType.end()) { - callees = &it2->second; - } - } - if (!callees) { - RA_DEBUG("No callee for " << *CI << "\n"); + if (currDepth >= maxCallStackDepth) { + RA_LOG("Max depth reached (" << maxCallStackDepth + << ") for BB " << BBIDs[BB] << ", skipping ret-edge propagation\n"); + continue; + } + + // If this BB has interesting callsites, push callee return blocks + auto hasCalls = BBswithCalls.find(BB); + if (hasCalls == BBswithCalls.end()) { + continue; + } + const CallSequence &calls = hasCalls->second; + for (size_t i = calls.size(); i-- > 0; ) { + const llvm::CallBase* CI = calls[i]; + // Unified lookup of direct or type-based callees + const FuncSet *callees = nullptr; + if (auto it = Ctx->Callees.find(CI); it != Ctx->Callees.end()) { + callees = &it->second; + } else if (UseTypeBasedCallGraph) { + if (auto it2 = calleeByType.find(CI); it2 != calleeByType.end()) { + callees = &it2->second; + } + } + if (!callees) { + RA_DEBUG("No callee for " << *CI << "\n"); + continue; + } + + for (auto *F : *callees) { + if (isExitFn(F->getName()) || F->doesNotReturn()) { + RA_DEBUG("DoesNotReturn: " << F->getName() << "\n"); + break; // stop on no-return functions + } + static std::unordered_set Seen; + if (Seen.insert(F).second) { + RA_LOG(F->getName() << " is reachable through ret edge to the targets\n"); + } + for (auto &TBB : *F) { + if (isa(TBB.getTerminator())) { continue; } - - for (auto *F : *callees) { - if (isExitFn(F->getName()) || F->doesNotReturn()) { - RA_DEBUG("DoesNotReturn: " << F->getName() << "\n"); - break; // stop on no-return functions - } - static std::unordered_set Seen; - if (Seen.insert(F).second) { - RA_LOG(F->getName() << " is reachable through ret edge to the targets\n"); - } - for (auto &TBB : *F) { - if (isa(TBB.getTerminator())) { - continue; - } - if (isa(TBB.getTerminator())) { - if (!reachable.count(&TBB) && queued.insert(&TBB).second) { - RA_DEBUG("Adding callee: " << F->getName() << "\n"); - retDepth[&TBB] = currDepth + 1; - worklist.push_back(&TBB); - } + if (isa(TBB.getTerminator())) { + if (retReachable.insert(&TBB).second) { + retDepth[&TBB] = currDepth + 1; + // Keep exploring ret-edges from new return blocks as well + if (visited.insert(&TBB).second) { + local.push_back(&TBB); } + RA_DEBUG("[ret] add callee ret-BB: " << F->getName() + << " -> " << BBIDs[&TBB] << "\n"); } } } } - } else { - RA_LOG("Max depth reached (" << maxCallStackDepth - << ") for BB " << BBIDs[BB] << ", skipping propagation\n"); - } - - RA_DEBUG("Propagating Ret BB through: " << BBIDs[BB] << "\n"); - - // Add CFG predecessors to continue backward propagation - for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { - const BasicBlock *Pred = *PI; - if (reachable.count(Pred)) { - continue; // already processed - } - if (queued.insert(Pred).second) { - // keep same ret-depth across normal CFG edges - if (currDepth != 0) { - retDepth[Pred] = currDepth; - } - worklist.push_back(Pred); - } } } } @@ -497,14 +477,16 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor std::unordered_set &reachable, const std::unordered_set &others) { bool isComputingReachable = others.empty(); + // Accumulator for ret-edge-only BBs across the whole BFS + std::unordered_set retEdgeAccum; while (!worklist.empty()) { auto *BB = worklist.front(); worklist.pop_front(); // add callee when computing reachable BBs if (isComputingReachable) { - // do not PropagateThroughReturnEdgees when computing unreachable BBs - propagateThroughReturnEdgees(reachable, BB); - RA_LOG("[collectReachable] After ret-edge, reachable size=" << reachable.size() << ", current BB=" << BBIDs[BB] << " @ " << getSourceLocation(BB) << "\n"); + // collect ret-edge-only BBs into accumulator; do not mutate 'reachable' here + propagateThroughReturnEdgees(retEdgeAccum, BB); + RA_DEBUG("[collectReachable] ret-edge accum size=" << retEdgeAccum.size() << ", from BB=" << BBIDs[BB] << " @ " << getSourceLocation(BB) << "\n"); } // add predecessors for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { @@ -577,6 +559,12 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor } // end of callers } // end of entry block } + // Merge ret-edge-only BBs after BFS completes + if (isComputingReachable) { + for (const BasicBlock *RBB : retEdgeAccum) { + reachable.insert(RBB); + } + } } void ReachableCallGraphPass::run(ModuleList &modules) { diff --git a/src/lib/Reachable.h b/src/lib/Reachable.h index ac0068d..1e2cab0 100644 --- a/src/lib/Reachable.h +++ b/src/lib/Reachable.h @@ -62,8 +62,8 @@ class ReachableCallGraphPass { void collectReachable(std::deque &worklist, std::unordered_set &reachable, const std::unordered_set &others = {}); - void propagateThroughReturnEdgees(std::unordered_set &reachable, - const BasicBlock *CBB); + void propagateThroughReturnEdgees(std::unordered_set &retReachable, + const BasicBlock *startBB); // debug void dumpPolicy(std::ostream &OS); From 530d8390902ec226e21c147ddcaf1d71bd22aca1 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 13 Aug 2025 18:42:00 +0000 Subject: [PATCH 38/50] more robust processing --- src/tools/verify_critical_BBs.py | 64 ++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/src/tools/verify_critical_BBs.py b/src/tools/verify_critical_BBs.py index 3de89d4..178d989 100644 --- a/src/tools/verify_critical_BBs.py +++ b/src/tools/verify_critical_BBs.py @@ -10,42 +10,45 @@ import os import sys -# Path to your bid -> location mapping -mapping_file = 'bid_loc_mapping.txt' -bid_file = "critical_BBs.txt" - def get_bids(): bids = [] - lines = [] - with open(bid_file) as fd: - lines = fd.readlines() - for l in lines: - items = l.split(',') - assert len(items) >= 2 - bids.append(int(items[0].strip())) + with open(bid_file, 'r') as fd: + for raw in fd: + line = raw.strip() + if not line or line.startswith('#'): + continue + first_field = line.split(',', 1)[0].strip() + try: + bids.append(int(first_field)) + except ValueError: + print(f"[WARN] Skip invalid bid line: {line}", file=sys.stderr) return bids def load_mappings(path): """Read mapping_file into a dict: bid → (filepath, line_no).""" - m = {} + mappings = {} with open(path, 'r') as f: for raw in f: line = raw.strip() - if not line: + if not line or line.startswith('#'): + continue + parts = line.split(',') + if len(parts) < 2: continue - # split only into 4 parts so the filepath:linenumber stays together - bid, _, _, loc = line.split(',', 3) + bid_str = parts[0].strip() + loc = parts[-1].strip() if ':' not in loc: continue - file_path, lineno = loc.rsplit(':', 1) + file_path, lineno_str = loc.rsplit(':', 1) try: - lineno = int(lineno) + bid = int(bid_str) + lineno = int(lineno_str) except ValueError: continue - m[int(bid)] = (file_path, lineno) - return m + mappings[bid] = (file_path, lineno) + return mappings -def show_context(file_path, line_no): +def show_context(file_path, line_no, bid): """Print the line_no ± ctx lines from file_path.""" if not os.path.exists(file_path): print(f"[ERROR] File not found: {file_path}", file=sys.stderr) @@ -57,7 +60,7 @@ def show_context(file_path, line_no): start = max(0, idx - 2) end = min(len(lines), idx + 10 + 1) - print(f"\n--- BID context: {os.path.basename(file_path)}:{line_no} ---") + print(f"\n--- BID({bid}) context: {os.path.basename(file_path)}:{line_no}---") print("```") for i in range(start, end): prefix = "=> " if i == idx else " " @@ -66,6 +69,21 @@ def show_context(file_path, line_no): print("```") def main(): + # Path to your bid -> location mapping + global mapping_file, bid_file + target_program = "" + if len(sys.argv) > 1: + target_program = sys.argv[1].strip() + "_" + mapping_file = f'{target_program}bid_loc_mapping.txt' + bid_file = f"{target_program}critical_BBs.txt" + + if not os.path.exists(mapping_file): + print(f"[ERROR] Mapping file not found: {mapping_file}", file=sys.stderr) + sys.exit(1) + if not os.path.exists(bid_file): + print(f"[ERROR] Critical BIDs file not found: {bid_file}", file=sys.stderr) + sys.exit(1) + # load all mappings at once mappings = load_mappings(mapping_file) critical_bids = get_bids() @@ -74,7 +92,7 @@ def main(): print(f"[WARN] No mapping found for bid {bid}", file=sys.stderr) continue filepath, lineno = mappings[bid] - show_context(filepath, lineno) + show_context(filepath, lineno, bid) if __name__ == '__main__': - main() + main() \ No newline at end of file From 8701f887115aac66b62ad1a3c65c705c827c9055 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 13 Aug 2025 19:09:34 +0000 Subject: [PATCH 39/50] Fix: Improve debug location path handling for system libs --- src/lib/Reachable.cc | 120 +++++++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 43 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 144db43..ae02e0d 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -903,62 +903,96 @@ void ReachableCallGraphPass::getDebugLocationFullPath(const BasicBlock &BB, Line = 0; Col = 0; - // We don't want paths that point to system libraries in /usr/ + // We don't want paths that point to system libraries static const std::string Xlibs("/usr/"); + auto isSystemLikePath = [](StringRef P) -> bool { + if (P.empty()) return false; + // Consider any path that is exactly /usr/... or contains /usr/ segment + // as system-like (covers sysroot cases like /toolchain/sysroot/usr/...) + if (P.startswith("/usr/")) return true; + return P.contains("/usr/"); + }; // Iterate over instructions in the basic block for (auto &Inst : BB) { if (DILocation *Loc = Inst.getDebugLoc()) { - // Extract directory & filename - std::string Dir = Loc->getDirectory().str(); - std::string File = Loc->getFilename().str(); - unsigned L = Loc->getLine(); - unsigned C = Loc->getColumn(); - - // If there's no filename, check the inlined location - if (File.empty()) { - if (DILocation *inlinedAt = Loc->getInlinedAt()) { - Dir = inlinedAt->getDirectory().str(); - File = inlinedAt->getFilename().str(); - L = inlinedAt->getLine(); - C = inlinedAt->getColumn(); + // Fallback: remember the first valid system-lib location if no user code is found + std::string systemFallbackPath; + unsigned systemFallbackLine = 0; + unsigned systemFallbackCol = 0; + + // Walk inlined-at chain from inner to outer to prefer user code call sites + for (DILocation *Cur = Loc; Cur != nullptr; Cur = Cur->getInlinedAt()) { + std::string Dir = Cur->getDirectory().str(); + std::string File = Cur->getFilename().str(); + unsigned L = Cur->getLine(); + unsigned C = Cur->getColumn(); + + // Skip if missing filename or invalid line + if (File.empty() || L == 0) + continue; + + // Normalize suspicious relative system paths like "usr/..." to "/usr/..." + if (!Dir.empty() && !llvm::sys::path::is_absolute(Dir) && llvm::StringRef(Dir).startswith("usr/")) { + Dir = "/" + Dir; + } + if (!File.empty() && !llvm::sys::path::is_absolute(File) && llvm::StringRef(File).startswith("usr/")) { + File = "/" + File; } - } - // Skip if still no filename or line==0 - if (File.empty() || L == 0) - continue; + // Build an absolute path in a SmallString + llvm::SmallString<256> FullPath; - // Build an absolute path in a SmallString - llvm::SmallString<256> FullPath; - - // 1) If Dir is already absolute, just start with that. - // Otherwise, use the current working directory as a base. - if (!Dir.empty() && llvm::sys::path::is_absolute(Dir)) { - FullPath = Dir; - } else { - llvm::sys::fs::current_path(FullPath); // get the current working dir - if (!Dir.empty()) { - llvm::sys::path::append(FullPath, Dir); + // If File itself is absolute, prefer it directly + if (!File.empty() && llvm::sys::path::is_absolute(File)) { + FullPath = File; + } else { + // If Dir is already absolute, start with that. Otherwise base on CWD. + if (!Dir.empty() && llvm::sys::path::is_absolute(Dir)) { + FullPath = Dir; + } else { + llvm::sys::fs::current_path(FullPath); + if (!Dir.empty()) { + llvm::sys::path::append(FullPath, Dir); + } + } + // Append the filename (relative) + llvm::sys::path::append(FullPath, File); } - } - // 2) Append the filename - llvm::sys::path::append(FullPath, File); + // Normalize dots + llvm::sys::path::remove_dots(FullPath, /*remove_dot_dot=*/true); - // 3) Remove dot segments (both "." and "..") - llvm::sys::path::remove_dots(FullPath, /*remove_dot_dot=*/true); + // Skip if system-like, but record the first one as a fallback + StringRef FullRef(FullPath); + if (isSystemLikePath(FullRef)) { + if (systemFallbackPath.empty()) { + systemFallbackPath = FullPath.str().str(); + systemFallbackLine = L; + systemFallbackCol = C; + } + continue; + } - // Now FullPath is absolute & normalized - // Check if it's in /usr/ - if (StringRef(FullPath).startswith(Xlibs)) - continue; // skip system-libs + // Found a valid location => set output vars + Filename = FullPath.str().str(); + Line = L; + Col = C; + break; + } - // Found a valid location => set output vars - Filename = FullPath.str().str(); // convert to std::string - Line = L; - Col = C; - break; // stop after the first valid location + // If we selected a valid non-system frame, stop scanning instructions + if (!Filename.empty()) + break; + + // If not found in this instruction's inlined chain, but we have a + // system fallback recorded, use it and stop. + if (Filename.empty() && !systemFallbackPath.empty()) { + Filename = systemFallbackPath; + Line = systemFallbackLine; + Col = systemFallbackCol; + break; + } } } } From 0750b2288b803b8f478cb6a7e5d862a79989af22 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 13 Aug 2025 21:26:20 +0000 Subject: [PATCH 40/50] better logging on unreachable BBs --- src/lib/Reachable.cc | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index ae02e0d..b0d295c 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -234,6 +234,10 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { if (isa(TI) || isa(TI)) { RA_DEBUG("Unreachable Inst BB: " << BBIDs[&BB] << "\n"); exitBBs.insert(&BB); + RA_LOG("[add-exit] by terminator: BB " << BBIDs[&BB] + << " @ " << getSourceLocation(&BB) + << " func " << F->getName() + << " term=" << TI->getOpcodeName() << "\n"); } for (auto &i : BB) { Instruction *I = &i; @@ -246,9 +250,19 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { Changed |= Ctx->Callees[CI].insert(RCF).second; Changed |= Ctx->Callers[RCF].insert(CI).second; // check for call to exit functions - if (isExitFn(RCF->getName()) || CF->doesNotReturn()) { + bool __isExitFn = isExitFn(RCF->getName()); + bool __doesNotReturn = CF->doesNotReturn(); + if (__isExitFn || __doesNotReturn) { RA_DEBUG("Exit Call: " << *CI << "\n"); exitBBs.insert(CI->getParent()); + RA_LOG("[add-exit] by call: BB " << BBIDs[CI->getParent()] + << " @ " << getSourceLocation(CI->getParent()) + << " func " << F->getName() + << " callee=" << RCF->getName() + << " reason=" << (__isExitFn ? "isExitFn" : "") + << ((__isExitFn && __doesNotReturn) ? "+" : "") + << (__doesNotReturn ? "doesNotReturn" : "") + << "\n"); } } else if (!CI->isInlineAsm()) { // indirect call @@ -369,7 +383,8 @@ bool ReachableCallGraphPass::doInitialization(Module *M) { || isa(TI) || isa(TI)) { exitBBs.insert(&BB); - RA_LOG("[init] ExitByTerm added: " << F.getName() << " @ " << getSourceLocation(&BB) << "\n"); + RA_LOG("[init] ExitByTerm added: " << F.getName() << " BB " << BBIDs[&BB] + << " @ " << getSourceLocation(&BB) << "\n"); } if (maxLine > 0) { // Also include any BB whose debug line equals the function's last line @@ -377,7 +392,8 @@ bool ReachableCallGraphPass::doInitialization(Module *M) { if (auto DL = I.getDebugLoc()) { if (DL.getLine() == maxLine) { exitBBs.insert(&BB); - RA_LOG("[init] ExitByMaxLine added: " << F.getName() << " @ " << getSourceLocation(&BB) << " (maxLine=" << maxLine << ")\n"); + RA_LOG("[init] ExitByMaxLine added: " << F.getName() << " BB " << BBIDs[&BB] + << " @ " << getSourceLocation(&BB) << " (maxLine=" << maxLine << ")\n"); break; } } @@ -501,6 +517,14 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor continue; // already added } else if(reachable.insert(Pred).second) { RA_DEBUG("Adding " << BBIDs[BB] << "'s Pred: " << BBIDs[Pred] << "\n"); + // When computing exit BBs (others is not empty), log propagation reason + if (!isComputingReachable) { + RA_LOG("[add-exit] by pred-edge: add BB " << BBIDs[Pred] + << " @ " << getSourceLocation(Pred) + << " func " << Pred->getParent()->getName() + << " from Succ " << BBIDs[BB] + << " @ " << getSourceLocation(BB) << "\n"); + } worklist.push_back(Pred); } } @@ -555,6 +579,13 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor } callDepth[CBB] = newDepth; // record depth before enqueue worklist.push_back(CBB); + // When computing exit BBs (others is not empty), log propagation via caller edge + if (!isComputingReachable) { + RA_LOG("[add-exit] by caller-edge: add BB " << BBIDs[CBB] + << " @ " << getSourceLocation(CBB) + << " func " << CBB->getParent()->getName() + << " via call into callee " << F->getName() << "\n"); + } } } // end of callers } // end of entry block From 943710a6343d5a0cf2b8832e0f646f6627bcbdf0 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 13 Aug 2025 22:00:50 +0000 Subject: [PATCH 41/50] Remove reachable BB from exitBBs --- src/lib/Reachable.cc | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index b0d295c..5393d2c 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -638,14 +638,10 @@ void ReachableCallGraphPass::run(ModuleList &modules) { } collectReachable(worklist, reachableBBs); RA_LOG("[run] reachableBBs after target-backward: " << reachableBBs.size() << "\n"); - // Remove exit blocks from reachable set, but never remove entry blocks for (const auto *BB : exitBBs) { - if (entryBBs.find(BB) != entryBBs.end()) { - RA_LOG("[run] Skip removing entry from reachable: BB " << BBIDs[BB] << " @ " << getSourceLocation(BB) << "\n"); - continue; - } - if (reachableBBs.erase(BB)) { - RA_LOG("[run] Removed exit BB from reachable: " << BBIDs[BB] << " @ " << getSourceLocation(BB) << "\n"); + if (reachableBBs.find(BB) != reachableBBs.end()) { + RA_LOG("[run] Removing reachable BB from exitBBs" << BBIDs[BB] << " @ " << getSourceLocation(BB) << "\n"); + exitBBs.erase(BB); } } From aa4c1afb244de8a6244b7389b0674eff6a3c9c84 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 13 Aug 2025 22:49:04 +0000 Subject: [PATCH 42/50] make sure no intersection bewteen reachable and unreachable BB sets. --- src/lib/Reachable.cc | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 5393d2c..31b31cb 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -222,7 +222,7 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { RA_LOG("### Run on function: " << F->getName() << "\n"); for (auto &BB : *F) { // assign a BB ID - if (BBIDs.find(&BB) == BBIDs.end()) { + if (BBIDs.find(&BB) == BBIDs.end() || BBIDs[&BB] == 0) { BBIDs[&BB] = nextBBID++; if (auto *SI = dyn_cast(BB.getTerminator())) { // assign a unique ID to the switch case @@ -383,8 +383,7 @@ bool ReachableCallGraphPass::doInitialization(Module *M) { || isa(TI) || isa(TI)) { exitBBs.insert(&BB); - RA_LOG("[init] ExitByTerm added: " << F.getName() << " BB " << BBIDs[&BB] - << " @ " << getSourceLocation(&BB) << "\n"); + RA_LOG("[init] ExitByTerm added: " << F.getName() << " BB @ " << getSourceLocation(&BB) << "\n"); } if (maxLine > 0) { // Also include any BB whose debug line equals the function's last line @@ -392,8 +391,7 @@ bool ReachableCallGraphPass::doInitialization(Module *M) { if (auto DL = I.getDebugLoc()) { if (DL.getLine() == maxLine) { exitBBs.insert(&BB); - RA_LOG("[init] ExitByMaxLine added: " << F.getName() << " BB " << BBIDs[&BB] - << " @ " << getSourceLocation(&BB) << " (maxLine=" << maxLine << ")\n"); + RA_LOG("[init] ExitByMaxLine added: " << F.getName() << " BB @ " << getSourceLocation(&BB) << " (maxLine=" << maxLine << ")\n"); break; } } @@ -503,6 +501,8 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor // collect ret-edge-only BBs into accumulator; do not mutate 'reachable' here propagateThroughReturnEdgees(retEdgeAccum, BB); RA_DEBUG("[collectReachable] ret-edge accum size=" << retEdgeAccum.size() << ", from BB=" << BBIDs[BB] << " @ " << getSourceLocation(BB) << "\n"); + }else if (others.find(BB) != others.end()) { + continue; } // add predecessors for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { @@ -565,18 +565,18 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor << ") for function " << F->getName() << ", skipping caller\n"); continue; // do not propagate beyond threshold } + // If this caller basic block is already known reachable-to-target, + // mark critical and skip adding to exit set. + if (others.find(CBB) != others.end()) { + criticalBBs[CBB].push_back(BB); + continue; + } if (reachable.find(CBB) != reachable.end()) { continue; // already added } // if all callsites have been processed, add the CBB RA_DEBUG("\tadding caller: " << CI->getFunction()->getName() << "\n"); if (reachable.insert(CBB).second) { - // if the caller BB CBB is reachable to the target - // do not propagate unreachable BB through this call sites - if (others.find(CBB) != others.end()) { - criticalBBs[CBB].push_back(BB); - continue; - } callDepth[CBB] = newDepth; // record depth before enqueue worklist.push_back(CBB); // When computing exit BBs (others is not empty), log propagation via caller edge @@ -638,9 +638,16 @@ void ReachableCallGraphPass::run(ModuleList &modules) { } collectReachable(worklist, reachableBBs); RA_LOG("[run] reachableBBs after target-backward: " << reachableBBs.size() << "\n"); - for (const auto *BB : exitBBs) { - if (reachableBBs.find(BB) != reachableBBs.end()) { - RA_LOG("[run] Removing reachable BB from exitBBs" << BBIDs[BB] << " @ " << getSourceLocation(BB) << "\n"); + { + std::vector toErase; + toErase.reserve(exitBBs.size()); + for (const auto *BB : exitBBs) { + if (reachableBBs.find(BB) != reachableBBs.end()) { + toErase.push_back(BB); + } + } + for (const auto *BB : toErase) { + RA_LOG("[run] Removing reachable BB from exitBBs " << BBIDs[BB] << " @ " << getSourceLocation(BB) << "\n"); exitBBs.erase(BB); } } From 8348f1f87b693db31ab23a57fb967df9e3815c04 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Wed, 13 Aug 2025 23:32:45 +0000 Subject: [PATCH 43/50] reachable: count resume as exit only for developer EH (skip cleanup LPs, require user dbg loc) --- src/lib/Reachable.cc | 344 +++++++++++++++++++++++++------------------ src/lib/Reachable.h | 5 - 2 files changed, 200 insertions(+), 149 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 31b31cb..1e8faef 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -56,6 +56,191 @@ using namespace llvm; +static std::string getSourceLocation(const BasicBlock *BB) { + for (const auto &I : *BB) { + auto loc = I.getDebugLoc(); + if (loc && loc.getLine() != 0) { + // Get the filename from the debug location + std::string f = loc->getFilename().str(); + // If filename is empty, get it from the parent function + if (f.empty()) { + f = BB->getParent()->getParent()->getSourceFileName(); + } + // Remove leading "./" if present + if (f.find("./") == 0) { + f = f.substr(2); + } + // Extract the base filename by finding the last '/' or '\\' + size_t pos = f.find_last_of("/\\"); + if (pos != std::string::npos) { + f = f.substr(pos + 1); + } + return f + ":" + std::to_string(loc.getLine()); + } + } + return "NoLoc:0"; +} + +/// \brief Retrieve the first available debug location in \p BB that is not +/// inside /usr/ and store the **absolute, normalized path** in \p Filename. +/// Sets \p Line and \p Col accordingly. +/// +/// This version does: +/// 1) Loops over instructions in \p BB +/// 2) Checks the debug location (and possibly inlined-at location) +/// 3) Builds an absolute, normalized path (resolving "." and "..") +/// 4) Skips if the path is empty, line=0, or the path starts with "/usr/" +/// 5) Returns the first valid debug info found +static void getDebugLocationFullPath(const BasicBlock &BB, + std::string &Filename, + unsigned &Line, + unsigned &Col) { + Filename.clear(); + Line = 0; + Col = 0; + + // We don't want paths that point to system libraries + static const std::string Xlibs("/usr/"); + auto isSystemLikePath = [](StringRef P) -> bool { + if (P.empty()) return false; + // Consider any path that is exactly /usr/... or contains /usr/ segment + // as system-like (covers sysroot cases like /toolchain/sysroot/usr/...) + if (P.startswith("/usr/")) return true; + return P.contains("/usr/"); + }; + + // Iterate over instructions in the basic block + for (auto &Inst : BB) { + if (DILocation *Loc = Inst.getDebugLoc()) { + // Fallback: remember the first valid system-lib location if no user code is found + std::string systemFallbackPath; + unsigned systemFallbackLine = 0; + unsigned systemFallbackCol = 0; + + // Walk inlined-at chain from inner to outer to prefer user code call sites + for (DILocation *Cur = Loc; Cur != nullptr; Cur = Cur->getInlinedAt()) { + std::string Dir = Cur->getDirectory().str(); + std::string File = Cur->getFilename().str(); + unsigned L = Cur->getLine(); + unsigned C = Cur->getColumn(); + + // Skip if missing filename or invalid line + if (File.empty() || L == 0) + continue; + + // Normalize suspicious relative system paths like "usr/..." to "/usr/..." + if (!Dir.empty() && !llvm::sys::path::is_absolute(Dir) && llvm::StringRef(Dir).startswith("usr/")) { + Dir = "/" + Dir; + } + if (!File.empty() && !llvm::sys::path::is_absolute(File) && llvm::StringRef(File).startswith("usr/")) { + File = "/" + File; + } + + // Build an absolute path in a SmallString + llvm::SmallString<256> FullPath; + + // If File itself is absolute, prefer it directly + if (!File.empty() && llvm::sys::path::is_absolute(File)) { + FullPath = File; + } else { + // If Dir is already absolute, start with that. Otherwise base on CWD. + if (!Dir.empty() && llvm::sys::path::is_absolute(Dir)) { + FullPath = Dir; + } else { + llvm::sys::fs::current_path(FullPath); + if (!Dir.empty()) { + llvm::sys::path::append(FullPath, Dir); + } + } + // Append the filename (relative) + llvm::sys::path::append(FullPath, File); + } + + // Normalize dots + llvm::sys::path::remove_dots(FullPath, /*remove_dot_dot=*/true); + + // Skip if system-like, but record the first one as a fallback + StringRef FullRef(FullPath); + if (isSystemLikePath(FullRef)) { + if (systemFallbackPath.empty()) { + systemFallbackPath = FullPath.str().str(); + systemFallbackLine = L; + systemFallbackCol = C; + } + continue; + } + + // Found a valid location => set output vars + Filename = FullPath.str().str(); + Line = L; + Col = C; + break; + } + + // If we selected a valid non-system frame, stop scanning instructions + if (!Filename.empty()) + break; + + // If not found in this instruction's inlined chain, but we have a + // system fallback recorded, use it and stop. + if (Filename.empty() && !systemFallbackPath.empty()) { + Filename = systemFallbackPath; + Line = systemFallbackLine; + Col = systemFallbackCol; + break; + } + } + } +} + +// === Helpers to distinguish developer-introduced EH from compiler cleanups === +static bool isPureCleanupLP(const llvm::BasicBlock *BB) { + // Look for a landingpad as the first non-PHI instruction; treat a pure + // `cleanup` landingpad with zero clauses as compiler-generated cleanup. + for (const llvm::Instruction &I : *BB) { + if (I.getOpcode() == llvm::Instruction::PHI) continue; // skip PHIs + if (auto *LPI = llvm::dyn_cast(&I)) { + return LPI->isCleanup() && LPI->getNumClauses() == 0; + } + break; // first non-PHI wasn't a landingpad + } + return false; +} + +static bool hasUserDebugLocation(const llvm::BasicBlock *BB, std::string &OutPath) { + OutPath.clear(); + unsigned L = 0, C = 0; + getDebugLocationFullPath(*BB, OutPath, L, C); + if (OutPath.empty()) return false; + llvm::StringRef P(OutPath); + // Be conservative: treat anything under /usr/ as non-user code + if (P.contains("/usr/")) return false; + return true; +} + +static bool isDeveloperExceptionBB(const llvm::BasicBlock *BB) { + // Only consider blocks that actually resume unwinding + if (!llvm::isa(BB->getTerminator())) + return false; + + // If this is a pure cleanup landing pad, it's almost certainly compiler-gen + if (isPureCleanupLP(BB)) + return false; + + // Require a non-system debug location + std::string P; + if (!hasUserDebugLocation(BB, P)) + return false; + +#if LLVM_VERSION_MAJOR >= 15 + if (auto DL = BB->getTerminator()->getDebugLoc()) { + if (DL->isImplicitCode()) + return false; // compiler-synthesized + } +#endif + return true; +} + Function* ReachableCallGraphPass::getFuncDef(Function *F) { FuncMap::iterator it = Ctx->Funcs.find(F->getGUID()); if (it != Ctx->Funcs.end()) @@ -230,14 +415,18 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { } } auto* TI = BB.getTerminator(); - // treat any BB ending in llvm::UnreachableInst and exception as an "exit" - if (isa(TI) || isa(TI)) { - RA_DEBUG("Unreachable Inst BB: " << BBIDs[&BB] << "\n"); + // Treat unreachable as exit; treat resume (EH) as exit only when it's + // likely developer-introduced (not compiler cleanup). + bool isDevEH = isa(TI) && isDeveloperExceptionBB(&BB); + if (isa(TI) || isDevEH) { + RA_DEBUG((isDevEH ? "Developer EH BB: " : "Unreachable Inst BB: ") << BBIDs[&BB] << "\n"); exitBBs.insert(&BB); RA_LOG("[add-exit] by terminator: BB " << BBIDs[&BB] << " @ " << getSourceLocation(&BB) << " func " << F->getName() - << " term=" << TI->getOpcodeName() << "\n"); + << " term=" << TI->getOpcodeName() + << (isDevEH ? ", reason=developer-exception" : "") + << "\n"); } for (auto &i : BB) { Instruction *I = &i; @@ -381,9 +570,13 @@ bool ReachableCallGraphPass::doInitialization(Module *M) { auto *TI = BB.getTerminator(); if (isa(TI) || isa(TI) - || isa(TI)) { - exitBBs.insert(&BB); - RA_LOG("[init] ExitByTerm added: " << F.getName() << " BB @ " << getSourceLocation(&BB) << "\n"); + || (isa(TI) && isDeveloperExceptionBB(&BB))) { + { + bool isDevEH = isa(TI) && isDeveloperExceptionBB(&BB); + exitBBs.insert(&BB); + RA_LOG("[init] ExitByTerm added: " << F.getName() << " BB @ " << getSourceLocation(&BB) + << (isDevEH ? " (developer-exception)" : "") << "\n"); + } } if (maxLine > 0) { // Also include any BB whose debug line equals the function's last line @@ -894,143 +1087,6 @@ ReachableCallGraphPass::ReachableCallGraphPass( } } -std::string ReachableCallGraphPass::getSourceLocation(const BasicBlock *BB) { - for (const auto &I : *BB) { - auto loc = I.getDebugLoc(); - if (loc && loc.getLine() != 0) { - // Get the filename from the debug location - std::string f = loc->getFilename().str(); - // If filename is empty, get it from the parent function - if (f.empty()) { - f = BB->getParent()->getParent()->getSourceFileName(); - } - // Remove leading "./" if present - if (f.find("./") == 0) { - f = f.substr(2); - } - // Extract the base filename by finding the last '/' or '\\' - size_t pos = f.find_last_of("/\\"); - if (pos != std::string::npos) { - f = f.substr(pos + 1); - } - return f + ":" + std::to_string(loc.getLine()); - } - } - return "NoLoc:0"; -} - -/// \brief Retrieve the first available debug location in \p BB that is not -/// inside /usr/ and store the **absolute, normalized path** in \p Filename. -/// Sets \p Line and \p Col accordingly. -/// -/// This version does: -/// 1) Loops over instructions in \p BB -/// 2) Checks the debug location (and possibly inlined-at location) -/// 3) Builds an absolute, normalized path (resolving "." and "..") -/// 4) Skips if the path is empty, line=0, or the path starts with "/usr/" -/// 5) Returns the first valid debug info found -void ReachableCallGraphPass::getDebugLocationFullPath(const BasicBlock &BB, - std::string &Filename, - unsigned &Line, - unsigned &Col) { - Filename.clear(); - Line = 0; - Col = 0; - - // We don't want paths that point to system libraries - static const std::string Xlibs("/usr/"); - auto isSystemLikePath = [](StringRef P) -> bool { - if (P.empty()) return false; - // Consider any path that is exactly /usr/... or contains /usr/ segment - // as system-like (covers sysroot cases like /toolchain/sysroot/usr/...) - if (P.startswith("/usr/")) return true; - return P.contains("/usr/"); - }; - - // Iterate over instructions in the basic block - for (auto &Inst : BB) { - if (DILocation *Loc = Inst.getDebugLoc()) { - // Fallback: remember the first valid system-lib location if no user code is found - std::string systemFallbackPath; - unsigned systemFallbackLine = 0; - unsigned systemFallbackCol = 0; - - // Walk inlined-at chain from inner to outer to prefer user code call sites - for (DILocation *Cur = Loc; Cur != nullptr; Cur = Cur->getInlinedAt()) { - std::string Dir = Cur->getDirectory().str(); - std::string File = Cur->getFilename().str(); - unsigned L = Cur->getLine(); - unsigned C = Cur->getColumn(); - - // Skip if missing filename or invalid line - if (File.empty() || L == 0) - continue; - - // Normalize suspicious relative system paths like "usr/..." to "/usr/..." - if (!Dir.empty() && !llvm::sys::path::is_absolute(Dir) && llvm::StringRef(Dir).startswith("usr/")) { - Dir = "/" + Dir; - } - if (!File.empty() && !llvm::sys::path::is_absolute(File) && llvm::StringRef(File).startswith("usr/")) { - File = "/" + File; - } - - // Build an absolute path in a SmallString - llvm::SmallString<256> FullPath; - - // If File itself is absolute, prefer it directly - if (!File.empty() && llvm::sys::path::is_absolute(File)) { - FullPath = File; - } else { - // If Dir is already absolute, start with that. Otherwise base on CWD. - if (!Dir.empty() && llvm::sys::path::is_absolute(Dir)) { - FullPath = Dir; - } else { - llvm::sys::fs::current_path(FullPath); - if (!Dir.empty()) { - llvm::sys::path::append(FullPath, Dir); - } - } - // Append the filename (relative) - llvm::sys::path::append(FullPath, File); - } - - // Normalize dots - llvm::sys::path::remove_dots(FullPath, /*remove_dot_dot=*/true); - - // Skip if system-like, but record the first one as a fallback - StringRef FullRef(FullPath); - if (isSystemLikePath(FullRef)) { - if (systemFallbackPath.empty()) { - systemFallbackPath = FullPath.str().str(); - systemFallbackLine = L; - systemFallbackCol = C; - } - continue; - } - - // Found a valid location => set output vars - Filename = FullPath.str().str(); - Line = L; - Col = C; - break; - } - - // If we selected a valid non-system frame, stop scanning instructions - if (!Filename.empty()) - break; - - // If not found in this instruction's inlined chain, but we have a - // system fallback recorded, use it and stop. - if (Filename.empty() && !systemFallbackPath.empty()) { - Filename = systemFallbackPath; - Line = systemFallbackLine; - Col = systemFallbackCol; - break; - } - } - } -} - void ReachableCallGraphPass::dumpDistance(std::ostream &OS, bool dumpUnreachable) { // Set precision for output OS << std::fixed << std::setprecision(6); diff --git a/src/lib/Reachable.h b/src/lib/Reachable.h index 1e2cab0..400866f 100644 --- a/src/lib/Reachable.h +++ b/src/lib/Reachable.h @@ -17,11 +17,6 @@ class ReachableCallGraphPass { bool runOnFunction(llvm::Function*); bool isCompatibleType(llvm::Type *T1, llvm::Type *T2); bool findCalleesByType(llvm::CallBase*, FuncSet&); - std::string getSourceLocation(const llvm::BasicBlock *BB); - void getDebugLocationFullPath(const BasicBlock &BB, - std::string &Filename, - unsigned &Line, - unsigned &Col); GlobalContext *Ctx; From 79ef2a6f1977e39cd422d1fcb6d189c90b7cbda7 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Thu, 14 Aug 2025 00:13:15 +0000 Subject: [PATCH 44/50] update entry function list --- src/lib/Annotation.cc | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/lib/Annotation.cc b/src/lib/Annotation.cc index 3e97c36..ae2da48 100644 --- a/src/lib/Annotation.cc +++ b/src/lib/Annotation.cc @@ -229,18 +229,8 @@ bool isAllocFn(StringRef name, int *size, int *flag) { bool isEntryFn(StringRef name) { if (name.equals("main") || - name.startswith("do_syscall_") || - name.endswith("do_softirq") || - name.equals("start_kernel") || - name.equals("init") || - name.equals("module_init") || - name.equals("module_exit") || - name.equals("init_module") || - name.equals("cleanup_module") || - name.equals("do_init_module") || - name.equals("do_cleanup_module") || - name.equals("do_one_initcall") || - name.equals("do_one_initcall_sync")) + name.startswith("LLVMFuzzerTestOneInput") || + name.startswith("FuzzerTestOneInput")) return true; else return false; } From 38bea690ac9ebf2e1f15ea24286e3e480710b45a Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Thu, 14 Aug 2025 02:10:37 +0000 Subject: [PATCH 45/50] sha bi AI, shan wo dai ma, xia xie yi tong --- src/lib/Reachable.cc | 48 ++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 1e8faef..fc235b9 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -425,7 +425,7 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { << " @ " << getSourceLocation(&BB) << " func " << F->getName() << " term=" << TI->getOpcodeName() - << (isDevEH ? ", reason=developer-exception" : "") + << (isDevEH ? ", reason=developer-exception" : "UnreachableInst") << "\n"); } for (auto &i : BB) { @@ -575,7 +575,7 @@ bool ReachableCallGraphPass::doInitialization(Module *M) { bool isDevEH = isa(TI) && isDeveloperExceptionBB(&BB); exitBBs.insert(&BB); RA_LOG("[init] ExitByTerm added: " << F.getName() << " BB @ " << getSourceLocation(&BB) - << (isDevEH ? " (developer-exception)" : "") << "\n"); + << (isDevEH ? " (developer-exception)" : "UnreachableInst or ReturnInst") << "\n"); } } if (maxLine > 0) { @@ -610,13 +610,12 @@ void ReachableCallGraphPass::propagateThroughReturnEdgees( } std::deque local; - std::unordered_set visited; - visited.insert(startBB); local.push_back(startBB); while (!local.empty()) { const BasicBlock *BB = local.front(); local.pop_front(); + retReachable.insert(BB); unsigned currDepth = 0; if (auto it = retDepth.find(BB); it != retDepth.end()) { @@ -628,6 +627,19 @@ void ReachableCallGraphPass::propagateThroughReturnEdgees( continue; } + // Add CFG predecessors to continue backward propagation + for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + const BasicBlock *Pred = *PI; + if (retReachable.count(Pred)) { + continue; // already processed + } + // keep same ret-depth across normal CFG edges + if (currDepth != 0) { + retDepth[Pred] = currDepth; + } + local.push_back(Pred); + } + // If this BB has interesting callsites, push callee return blocks auto hasCalls = BBswithCalls.find(BB); if (hasCalls == BBswithCalls.end()) { @@ -660,24 +672,20 @@ void ReachableCallGraphPass::propagateThroughReturnEdgees( RA_LOG(F->getName() << " is reachable through ret edge to the targets\n"); } for (auto &TBB : *F) { - if (isa(TBB.getTerminator())) { - continue; - } if (isa(TBB.getTerminator())) { - if (retReachable.insert(&TBB).second) { - retDepth[&TBB] = currDepth + 1; - // Keep exploring ret-edges from new return blocks as well - if (visited.insert(&TBB).second) { - local.push_back(&TBB); - } - RA_DEBUG("[ret] add callee ret-BB: " << F->getName() - << " -> " << BBIDs[&TBB] << "\n"); + if (retReachable.count(&TBB)) { + continue; // already processed } + retDepth[&TBB] = currDepth + 1; + // Keep exploring ret-edges from new return blocks as well + local.push_back(&TBB); + RA_DEBUG("[ret] add callee ret-BB: " << F->getName() + << " -> " << BBIDs[&TBB] << "\n"); } - } - } - } - } + } // end of propagate through return BBs + } // end of propagate through potential callees + } // end of propagate through all call sites + } // end of local worklist } void ReachableCallGraphPass::collectReachable(std::deque &worklist, @@ -840,7 +848,7 @@ void ReachableCallGraphPass::run(ModuleList &modules) { } } for (const auto *BB : toErase) { - RA_LOG("[run] Removing reachable BB from exitBBs " << BBIDs[BB] << " @ " << getSourceLocation(BB) << "\n"); + RA_LOG("[run] Removing BB from exitBBs " << BBIDs[BB] << " @ " << getSourceLocation(BB) << "\n"); exitBBs.erase(BB); } } From 744c7f0762cb4520fa5ccbab6a4d23634f82cc9a Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Thu, 14 Aug 2025 04:59:29 +0000 Subject: [PATCH 46/50] add more BB to exitBBs from unknow reachable BB in normal functions --- src/lib/Reachable.cc | 17 +++++++++++++++-- src/lib/Reachable.h | 1 + 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index fc235b9..9875366 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -482,6 +482,7 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { distances[I->getParent()] = 0.0; targetBBs.insert(I->getParent()); reachableBBs.insert(I->getParent()); + reachableFuns.insert(F); } } } @@ -662,16 +663,17 @@ void ReachableCallGraphPass::propagateThroughReturnEdgees( continue; } - for (auto *F : *callees) { + for (const auto *F : *callees) { if (isExitFn(F->getName()) || F->doesNotReturn()) { RA_DEBUG("DoesNotReturn: " << F->getName() << "\n"); break; // stop on no-return functions } static std::unordered_set Seen; if (Seen.insert(F).second) { + reachableFuns.insert(F); RA_LOG(F->getName() << " is reachable through ret edge to the targets\n"); } - for (auto &TBB : *F) { + for (const auto &TBB : *F) { if (isa(TBB.getTerminator())) { if (retReachable.count(&TBB)) { continue; // already processed @@ -753,6 +755,7 @@ void ReachableCallGraphPass::collectReachable(std::deque &wor } if (isComputingReachable) { + reachableFuns.insert(F); RA_LOG(F->getName() << " is reachable through call edge to the targets\n"); }else { RA_LOG(F->getName() << " is reachable to the exit\n"); @@ -839,7 +842,17 @@ void ReachableCallGraphPass::run(ModuleList &modules) { } collectReachable(worklist, reachableBBs); RA_LOG("[run] reachableBBs after target-backward: " << reachableBBs.size() << "\n"); + + // add more BB to exitBBs { + for (const auto *F: reachableFuns){ + for (const auto &TBB : *F) { + if (isa(TBB.getTerminator()) && reachableBBs.find(&TBB) == reachableBBs.end()) { + exitBBs.insert(&TBB); + } + } + } + std::vector toErase; toErase.reserve(exitBBs.size()); for (const auto *BB : exitBBs) { diff --git a/src/lib/Reachable.h b/src/lib/Reachable.h index 400866f..d8d4c32 100644 --- a/src/lib/Reachable.h +++ b/src/lib/Reachable.h @@ -36,6 +36,7 @@ class ReachableCallGraphPass { std::vector entryList; std::unordered_set targetBBs; std::unordered_set reachableBBs; + std::unordered_set reachableFuns; std::unordered_map distances; std::unordered_set exitBBs; std::unordered_set entryBBs; From aa9843202044e4eb3b0911f27d827569c4040687 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Thu, 14 Aug 2025 07:41:11 +0000 Subject: [PATCH 47/50] move logic from doInitialization to runOnFunction --- src/lib/Reachable.cc | 105 +++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 55 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 9875366..e26173a 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -405,6 +405,55 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { bool Changed = false; RA_LOG("### Run on function: " << F->getName() << "\n"); + + // if no entry specified, use the common one + // collect the exit block of the entry function too + bool isEntry = false; + if (entryList.empty()) { + isEntry = isEntryFn(F->getName()); + } else { + auto itr = std::find(entryList.begin(), entryList.end(), F->getName().str()); + isEntry = (itr != entryList.end()); + } + if (isEntry) { + // Record entry block + entryBBs.insert(&F->getEntryBlock()); + RA_LOG("[init] Entry function detected: " << F->getName() << "\n"); + // Compute the maximum source line number for this function (first pass) + unsigned maxLine = 0; + for (const auto &BB : *F) { + for (const auto &I : BB) { + if (auto DL = I.getDebugLoc()) { + maxLine = std::max(maxLine, DL.getLine()); + } + } + } + // Seed exitBBs (second pass) + for (const auto &BB : *F) { + // Never treat the entry block as an exit block + if (&BB == &F->getEntryBlock()) { + continue; + } + auto *TI = BB.getTerminator(); + if (isa(TI) || isa(TI)) { + exitBBs.insert(&BB); + RA_LOG("[init] ExitByTerm added: " << F->getName() << " BB @ " << getSourceLocation(&BB) << "\n"); + } + if (maxLine > 0) { + // Also include any BB whose debug line equals the function's last line + for (const auto &I : BB) { + if (auto DL = I.getDebugLoc()) { + if (DL.getLine() == maxLine) { + exitBBs.insert(&BB); + RA_LOG("[init] ExitByMaxLine added: " << F->getName() << " BB @ " << getSourceLocation(&BB) << " (maxLine=" << maxLine << ")\n"); + break; + } + } + } + } + } + } + for (auto &BB : *F) { // assign a BB ID if (BBIDs.find(&BB) == BBIDs.end() || BBIDs[&BB] == 0) { @@ -539,61 +588,7 @@ bool ReachableCallGraphPass::doInitialization(Module *M) { } } } - - // if no entry specified, use the common one - // collect the exit block of the entry function too - bool isEntry = false; - if (entryList.empty()) { - isEntry = isEntryFn(F.getName()); - } else { - auto itr = std::find(entryList.begin(), entryList.end(), F.getName().str()); - isEntry = (itr != entryList.end()); - } - if (isEntry) { - // Record entry block - entryBBs.insert(&F.getEntryBlock()); - RA_LOG("[init] Entry function detected: " << F.getName() << "\n"); - // Compute the maximum source line number for this function - unsigned maxLine = 0; - for (auto &BB : F) { - for (auto &I : BB) { - if (auto DL = I.getDebugLoc()) { - maxLine = std::max(maxLine, DL.getLine()); - } - } - } - // Seed exitBBs with normal exit terminators - for (auto &BB : F) { - // Never treat the entry block as an exit block - if (&BB == &F.getEntryBlock()) { - continue; - } - auto *TI = BB.getTerminator(); - if (isa(TI) - || isa(TI) - || (isa(TI) && isDeveloperExceptionBB(&BB))) { - { - bool isDevEH = isa(TI) && isDeveloperExceptionBB(&BB); - exitBBs.insert(&BB); - RA_LOG("[init] ExitByTerm added: " << F.getName() << " BB @ " << getSourceLocation(&BB) - << (isDevEH ? " (developer-exception)" : "UnreachableInst or ReturnInst") << "\n"); - } - } - if (maxLine > 0) { - // Also include any BB whose debug line equals the function's last line - for (auto &I : BB) { - if (auto DL = I.getDebugLoc()) { - if (DL.getLine() == maxLine) { - exitBBs.insert(&BB); - RA_LOG("[init] ExitByMaxLine added: " << F.getName() << " BB @ " << getSourceLocation(&BB) << " (maxLine=" << maxLine << ")\n"); - break; - } - } - } - } - } // end of finding exitBBs - } // end of entry function processing - } // end of processing all functions in this Module + } return false; } From 60a6f5323a730b4f191dcd43bcd5d726eeb9c2dc Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Thu, 14 Aug 2025 08:13:35 +0000 Subject: [PATCH 48/50] fixup --- src/lib/Reachable.cc | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index e26173a..78240b3 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -435,10 +435,6 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { continue; } auto *TI = BB.getTerminator(); - if (isa(TI) || isa(TI)) { - exitBBs.insert(&BB); - RA_LOG("[init] ExitByTerm added: " << F->getName() << " BB @ " << getSourceLocation(&BB) << "\n"); - } if (maxLine > 0) { // Also include any BB whose debug line equals the function's last line for (const auto &I : BB) { @@ -467,7 +463,7 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { // Treat unreachable as exit; treat resume (EH) as exit only when it's // likely developer-introduced (not compiler cleanup). bool isDevEH = isa(TI) && isDeveloperExceptionBB(&BB); - if (isa(TI) || isDevEH) { + if (isa(TI) || isa(TI) || isDevEH) { RA_DEBUG((isDevEH ? "Developer EH BB: " : "Unreachable Inst BB: ") << BBIDs[&BB] << "\n"); exitBBs.insert(&BB); RA_LOG("[add-exit] by terminator: BB " << BBIDs[&BB] @@ -838,16 +834,8 @@ void ReachableCallGraphPass::run(ModuleList &modules) { collectReachable(worklist, reachableBBs); RA_LOG("[run] reachableBBs after target-backward: " << reachableBBs.size() << "\n"); - // add more BB to exitBBs + // clean exitBBs { - for (const auto *F: reachableFuns){ - for (const auto &TBB : *F) { - if (isa(TBB.getTerminator()) && reachableBBs.find(&TBB) == reachableBBs.end()) { - exitBBs.insert(&TBB); - } - } - } - std::vector toErase; toErase.reserve(exitBBs.size()); for (const auto *BB : exitBBs) { From e703907a64e10e26d2fd73ae48e470428fb4e198 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Sun, 17 Aug 2025 01:06:22 +0000 Subject: [PATCH 49/50] =?UTF-8?q?dump=20caller=E2=86=92callee=20and=20call?= =?UTF-8?q?ee=E2=86=92caller=20mappings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib/KAMain.cc | 16 +++++ src/lib/Reachable.cc | 148 ++++++++++++++++++++++++------------------- src/lib/Reachable.h | 8 ++- 3 files changed, 104 insertions(+), 68 deletions(-) diff --git a/src/lib/KAMain.cc b/src/lib/KAMain.cc index 2ea5141..c24eb4a 100644 --- a/src/lib/KAMain.cc +++ b/src/lib/KAMain.cc @@ -70,6 +70,16 @@ cl::opt DumpBidMapping( cl::opt DumpFuncInfo( "dump-func-info", cl::desc("Dump function info, format: fun_GUID,fun_name,filepath,start_linenum,end_linenum"), cl::init("")); +cl::opt DumpCallerCallee( + "dump-caller-callee", + cl::desc("Dump caller → callee mapping, format: caller_GUID,callee_GUID,..."), + cl::init("")); + +cl::opt DumpCalleeCaller( + "dump-callee-caller", + cl::desc("Dump callee → caller mapping, format: callee_GUID,caller_GUID,..."), + cl::init("")); + cl::opt DumpAnnotatedIR( "dump-annotated-ir", cl::desc("Dump annotated IR"), cl::init("")); @@ -238,6 +248,12 @@ int main(int argc, char **argv) { std::ofstream funcInfo(DumpFuncInfo); RCGPass.dumpIDMapping(GlobalCtx.Modules, bbLocs, funcInfo); } + if (!DumpCallerCallee.empty() && !DumpCalleeCaller.empty()){ + std::ofstream callercallee(DumpCallerCallee); + std::ofstream calleecaller(DumpCalleeCaller); + RCGPass.dumpCallees(callercallee); + RCGPass.dumpCallers(calleecaller); + } if (!DumpPolicy.empty()) { std::ofstream policy(DumpPolicy); RCGPass.dumpPolicy(policy); diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index 78240b3..ee5b4b4 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -434,7 +434,6 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { if (&BB == &F->getEntryBlock()) { continue; } - auto *TI = BB.getTerminator(); if (maxLine > 0) { // Also include any BB whose debug line equals the function's last line for (const auto &I : BB) { @@ -1357,74 +1356,91 @@ bool ReachableCallGraphPass::annotateModules(ModuleList &modules, std::string su return true; } -void ReachableCallGraphPass::dumpCallees() { - RES_REPORT("\n[dumpCallees]\n"); - raw_ostream &OS = outs(); - OS << "Num of Callees: " << calleeByType.size() << "\n"; - for (CalleeMap::iterator i = calleeByType.begin(), - e = calleeByType.end(); i != e; ++i) { - - auto CI = i->first; - FuncSet &v = i->second; - // only dump indirect call? - if (CI->isInlineAsm() || CI->getCalledFunction() /*|| v.empty()*/) - continue; - - // OS << "CS:" << *CI << "\n"; - // const DebugLoc &LOC = CI->getDebugLoc(); - // OS << "LOC: "; - // LOC.print(OS); - // OS << "^@^"; - std::string prefix = "<" + CI->getParent()->getParent()->getParent()->getName().str() + ">" - + CI->getParent()->getParent()->getName().str() + "::"; -#if 1 - for (FuncSet::iterator j = v.begin(), ej = v.end(); - j != ej; ++j) { - //OS << "\t" << ((*j)->hasInternalLinkage() ? "f" : "F") - // << " " << (*j)->getName() << "\n"; - OS << prefix << *CI << "\t"; - OS << (*j)->getName() << "\n"; - } -#endif - // OS << "\n"; - - // v = Ctx->Callees[CI]; - // OS << "Callees: "; - // for (FuncSet::iterator j = v.begin(), ej = v.end(); - // j != ej; ++j) { - // OS << (*j)->getName() << "::"; - // } - // OS << "\n"; - if (v.empty()) { -#if LLVM_VERSION_MAJOR > 10 - OS << "!!EMPTY =>" << *CI->getCalledOperand()<<"\n"; -#else - OS << "!!EMPTY =>" << *CI->getCalledValue()<<"\n"; -#endif - OS<< "Uninitialized function pointer is dereferenced!\n"; - } +void ReachableCallGraphPass::dumpCallees(std::ostream &calleeInfo) { + RA_LOG("\n\n=== Dumping caller->callees ===\n\n"); + // Build caller -> set{callees} using direct edges first. + // If a caller has no direct callees recorded, fall back to type-based. + std::unordered_map> caller2callees; + for (const auto &kv : Ctx->Callees) { + const CallBase *CI = kv.first; + const FuncSet &FS = kv.second; + const Function *CallerF = CI->getFunction(); + auto &CalSet = caller2callees[CallerF]; + for (const Function *CalleeF : FS) { + CalSet.insert(CalleeF); + } + } + if (UseTypeBasedCallGraph) { + for (const auto &kv : calleeByType) { // calleeByType: CallBase* -> FuncSet + const CallBase *CI = kv.first; + const Function *CallerF = CI->getFunction(); + auto findIt = caller2callees.find(CallerF); + bool hasDirect = (findIt != caller2callees.end() && !findIt->second.empty()); + if (hasDirect) { + continue; // already have direct callees for this caller + } + const FuncSet &FS = kv.second; + auto &CalSet = caller2callees[CallerF]; + for (const Function *CalleeF : FS) { + CalSet.insert(CalleeF); + } + } + } + // Emit lines: callerGUID,calleeGUID,calleeGUID,... for callers that have any callees + for (const auto &kv : caller2callees) { + const Function *CallerF = kv.first; + const auto &Callees = kv.second; + if (Callees.empty()) { + continue; + } + calleeInfo << CallerF->getGUID(); + for (const Function *CF : Callees) { + calleeInfo << ',' << CF->getGUID(); } - RES_REPORT("\n[End of dumpCallees]\n"); + calleeInfo << '\n'; + } } -void ReachableCallGraphPass::dumpCallers() { - RES_REPORT("\n[dumpCallers]\n"); - for (auto M : Ctx->Callers) { - const Function *F = M.first; - CallInstSet &CIS = M.second; - RES_REPORT("F : " << getScopeName(F) << "\n"); - - for (auto *CI : CIS) { - auto CallerF = CI->getParent()->getParent(); - RES_REPORT("\t"); - if (CallerF && CallerF->hasName()) { - RES_REPORT("(" << getScopeName(CallerF) << ") "); - } else { - RES_REPORT("(anonymous) "); - } - - RES_REPORT(*CI << "\n"); +void ReachableCallGraphPass::dumpCallers(std::ostream &callerInfo) { + RA_LOG("\n\n=== Dumping callee->callers ===\n\n"); + // Collect all callees that have recorded callers (direct or type-based) + std::unordered_set allCallees; + for (const auto &kv : Ctx->Callers) { + allCallees.insert(kv.first); + } + if (UseTypeBasedCallGraph) { + for (const auto &kv : callerByType) { + allCallees.insert(kv.first); + } + } + // For each callee, emit one line: calleeGUID,callerGUID,callerGUID,... + for (const Function *Callee : allCallees) { + std::unordered_set callerFns; + // Direct callers + bool has_direct_callers = false; + if (auto it = Ctx->Callers.find(Callee); it != Ctx->Callers.end()) { + for (const CallBase *CI : it->second) { + callerFns.insert(CI->getFunction()); + has_direct_callers = true; + } + } + // Fallback to Type-based (indirect) callers + if (!has_direct_callers && UseTypeBasedCallGraph) { + if (auto it2 = callerByType.find(Callee); it2 != callerByType.end()) { + for (const CallBase *CI : it2->second) { + callerFns.insert(CI->getFunction()); } + } + } + if (callerFns.empty()) { + // No callers recorded for this callee + continue; } - RES_REPORT("\n[End of dumpCallers]\n"); + + callerInfo << Callee->getGUID(); + for (const Function *CallerF : callerFns) { + callerInfo << ',' << CallerF->getGUID(); + } + callerInfo << '\n'; + } } diff --git a/src/lib/Reachable.h b/src/lib/Reachable.h index d8d4c32..50dd914 100644 --- a/src/lib/Reachable.h +++ b/src/lib/Reachable.h @@ -67,8 +67,12 @@ class ReachableCallGraphPass { void dumpDistance(std::ostream &OS, bool dumpUnreachable = false); void dumpIDMapping(ModuleList &modules, std::ostream &bbLocs, std::ostream &funcInfo); bool annotateModules(ModuleList &modules, std::string suffix=".annotated.bc"); - void dumpCallees(); - void dumpCallers(); + // dumpCallees CSV format: + // one line per *caller* function: callerGUID,calleeGUID[,calleeGUID...] + void dumpCallees(std::ostream &calleeInfo); + // dumpCallers CSV format: + // one line per *callee* function: calleeGUID,callerGUID[,callerGUID...] + void dumpCallers(std::ostream &callerInfo); }; #endif From 3f5dbfdf270814353e90c675a683abbe5ccef6d4 Mon Sep 17 00:00:00 2001 From: Haochen Zeng Date: Tue, 26 Aug 2025 19:09:40 +0000 Subject: [PATCH 50/50] remove return ins from exitBBs --- src/lib/Reachable.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/Reachable.cc b/src/lib/Reachable.cc index ee5b4b4..f3403a9 100644 --- a/src/lib/Reachable.cc +++ b/src/lib/Reachable.cc @@ -462,7 +462,7 @@ bool ReachableCallGraphPass::runOnFunction(Function *F) { // Treat unreachable as exit; treat resume (EH) as exit only when it's // likely developer-introduced (not compiler cleanup). bool isDevEH = isa(TI) && isDeveloperExceptionBB(&BB); - if (isa(TI) || isa(TI) || isDevEH) { + if (isa(TI) || isDevEH) { RA_DEBUG((isDevEH ? "Developer EH BB: " : "Unreachable Inst BB: ") << BBIDs[&BB] << "\n"); exitBBs.insert(&BB); RA_LOG("[add-exit] by terminator: BB " << BBIDs[&BB]