Introduction
This book teaches modern C++ features (C++11, C++14, C++17, C++20, C++23) through minimal examples written for reverse engineers.
The source code is available here.
🚧 The book is still under construction. New chapters will be added and the existing ones might be modified.
References
- modern-cpp-features
- C++ reference
- C++ Standards Committee Papers
- C++23 Language Features
- C++23 Library Features
- C++23
Move semantics
Use case
Efficiently transfer ownership of large binary buffers when parsing Mach-O segments.
Explanation
When analyzing large binaries, we do not want to copy megabytes of data. Move semantics lets us transfer ownership of buffers efficiently. The new owner steals the pointer, leaving the old object empty.
Code
#include <cstdint>
#include <iostream>
#include <utility>
#include <vector>
struct Segment {
std::string name;
std::vector<uint8_t> data;
Segment(const std::string &n, size_t size) : name(n), data(size, 0xFF) {
std::cout << "Constructed segment: " << name << " (" << size
<< " bytes).\n";
}
// Move constructor.
Segment(Segment &&other) noexcept
: name(std::move(other.name)), data(std::move(other.data)) {
std::cout << "Moved segment.\n";
}
// Copy constructor.
Segment(const Segment &other) : name(other.name), data(other.data) {
std::cout << "Copied segment.\n";
}
};
Segment loadSegment() {
Segment seg("__TEXT", 1024 * 1024);
// Do not move here, as it might screw up RVO.
return seg;
}
int main() {
std::cout << "Loading segment.\n";
Segment text = loadSegment();
std::cout << "Transferring to analyzer.\n";
Segment analyzer_seg = std::move(text);
std::cout << "Original segment name: " << text.name << "\n";
std::cout << "Analyzer segment name: " << analyzer_seg.name << "\n";
return 0;
}
Output
$ ./src/c++11/build/move-semantics
Loading segment.
Constructed segment: __TEXT (1048576 bytes).
Transferring to analyzer.
Moved segment.
Original segment name:
Analyzer segment name: __TEXT
Rvalue references
Use case
Accept temporary disassembly results without copying.
Explanation
The key line is cache_ = std::move(instrs). Inside the function, instrs is an lvalue, so we need std::move. The rvalue reference parameter (&&) only controls what can be passed in, it does not automatically move.
Code
#include <cstdint>
#include <iostream>
#include <string>
#include <vector>
#include "tracked-vector.hpp"
struct Instruction {
uint64_t address;
std::string mnemonic;
std::string operands;
};
class Disassembler {
TrackedVector<Instruction> cache_;
public:
// Only accept temporaries or moved-from objects.
void consumeInstructions(TrackedVector<Instruction> &&instrs) {
// Move the value.
cache_ = std::move(instrs);
std::cout << "Cached " << cache_.size() << " instructions.\n";
}
size_t cacheSize() const { return cache_.size(); }
};
TrackedVector<Instruction> disassemble(uint64_t addr) {
TrackedVector<Instruction> result;
result.push_back({addr, "mov", "x29, sp"});
// Do not move here, as it might screw up RVO.
return result;
}
int main() {
Disassembler dis;
// Consuming temporary.
dis.consumeInstructions(disassemble(0x10000000));
// Consuming via std::move().
TrackedVector<Instruction> saved = disassemble(0x10001000);
dis.consumeInstructions(std::move(saved));
// Compile error.
// `an rvalue reference cannot be bound to an lvalue`
// dis.consumeInstructions(saved);
return 0;
}
Output
$ ./src/c++11/build/rvalue-references
Move assignment.
Cached 1 instructions.
Move assignment.
Cached 1 instructions.
Forwarding references
Use case
Generic wrapper that logs function calls while preserving argument types.
Explanation
T&& where T is a template is a forwarding reference (it binds to anything). Combined with std::forwards, it preserves whether the original argument was an lvalue or rvalue.
Code
#include <cstdint>
#include <iostream>
#include <utility>
void readMemory(uint64_t &addr) {
std::cout << "readMemory (lvalue ref): 0x" << std::hex << addr << "\n";
}
void readMemory(uint64_t &&addr) {
std::cout << "readMemory (rvalue ref): 0x" << std::hex << addr << "\n";
}
struct ReadMemoryWrapper {
template <typename T> void operator()(T &&addr) const {
readMemory(std::forward<T>(addr));
}
};
template <typename Func, typename Arg>
auto hookWrapper(const char *name, Func &&func, Arg &&arg)
-> decltype(func(std::forward<Arg>(arg))) {
std::cout << "Calling " << name << ".\n";
return func(std::forward<Arg>(arg));
}
int main() {
uint64_t address = 0x10001000;
std::cout << "Direct calls:\n";
readMemory(address);
readMemory(0x10002000);
std::cout << "Calls via hook wrapper:\n";
// Helper functor so the proper readMemory function is called.
ReadMemoryWrapper readMemoryWrapper;
// Deduction:
// Func = lambda&
// Func&& = lambda& && = lambda& (collapsed, T& && --> T&)
// Arg = uint64_t&
// Arg&& = uint64_t& && = uint64_t& (collapsed, T& && --> T&)
hookWrapper("readMemory", readMemoryWrapper, address);
// Deduction:
// Func = lambda&
// Func&& = lambda& && = lambda& (collapsed, T& && --> T&)
// Arg = uint64_t
// Arg&& = uint64_t&&
hookWrapper("readMemory", readMemoryWrapper, 0x10002000);
return 0;
}
Output
$ ./src/c++11/build/forwarding-references
Direct calls:
readMemory (lvalue ref): 0x10001000
readMemory (rvalue ref): 0x10002000
Calls via hook wrapper:
Calling readMemory.
readMemory (lvalue ref): 0x10001000
Calling readMemory.
readMemory (rvalue ref): 0x10002000
Variadic templates
Use case
Generic hook wrapper that intercepts calls with any number of arguments.
Explanation
typename... Args declares a parameter pack that captures zero or more types. Args... args captures the actual arguments. args... is used to expand them.
Code
#include <cstdint>
#include <iostream>
void readMemory(uint64_t addr) {
std::cout << "addr=0x" << std::hex << addr << "\n";
}
void readMemory(uint64_t addr, size_t len) {
std::cout << "addr=0x" << std::hex << addr << " len=0x" << std::hex << len
<< "\n";
}
template <typename... Args> void hookWrapper(const char *name, Args... args) {
std::cout << "Calling " << name << " (" << sizeof...(args) << " args).\n";
readMemory(args...);
}
int main() {
hookWrapper("readMemory", 0x10001000);
hookWrapper("readMemory", 0x10001000, 0x100);
return 0;
}
Output
$ ./src/c++11/build/variadic-templates
Calling readMemory (1 args).
addr=0x10001000
Calling readMemory (2 args).
addr=0x10001000 len=0x100
Initializer lists
Use case
Define known suspicious API patterns concisely.
Explanation
Initializer lists let us pass {a, b, c} syntax to functions. They do not allocate heap memory (unlike e.g. std::vector). Use them for read-only literal data.
Code
#include <initializer_list>
#include <iostream>
#include <string>
#include <vector>
struct ImportPattern {
std::string dylib;
std::vector<std::string> functions;
ImportPattern(std::string d, std::initializer_list<std::string> funcs)
: dylib(std::move(d)), functions(funcs) {}
};
void checkSuspiciousImports(std::initializer_list<ImportPattern> patterns) {
std::cout << "Checking " << patterns.size() << " suspicious patterns:\n";
for (const auto &p : patterns) {
std::cout << " " << p.dylib << ": ";
for (const auto &f : p.functions) {
std::cout << f << " ";
}
std::cout << "\n";
}
}
int main() {
// Outer: std::initializer_list<ImportPattern>
// Middle: constructor call via braces
// Inner: std::initializer_list<std::string>
checkSuspiciousImports({{"libSystem.B.dylib", {"ptrace", "sysctl"}}});
return 0;
}
Output
$ ./src/c++11/build/initializer-lists
Checking 1 suspicious patterns:
libSystem.B.dylib: ptrace sysctl
Static assertions
Use case
Validate struct layouts match binary format requirements at compile time.
Explanation
static_assert catches mistakes at compile time.
Code
#include <cstdint>
#include <iostream>
// https://stackoverflow.com/questions/21092415/force-c-structure-to-pack-tightly
// https://en.wikipedia.org/wiki/Mach-O
struct __attribute__((packed)) MachHeader64 {
uint32_t magic;
uint32_t cputype;
uint32_t cpusubtype;
uint32_t filetype;
uint32_t numofcmds;
uint32_t sizeofcmds;
uint32_t flags;
uint32_t reserved;
};
static_assert(sizeof(MachHeader64) == 32, "MachHeader64 must be 32 bytes.");
int main() {
std::cout << "MachHeader64 size: " << sizeof(MachHeader64) << " bytes\n";
std::cout << "All static assertions passed at compile time.\n";
return 0;
}
Output
$ ./src/c++11/build/static-assertions
MachHeader64 size: 32 bytes
All static assertions passed at compile time.
auto
Use case
Simplify iterator-heavy code when walking symbol tables.
Explanation
auto lets the compiler deduce types. Reduces verbosity with STL containers and iterators.
Code
#include <cstdint>
#include <iostream>
#include <map>
#include <string>
int main() {
std::map<uint64_t, std::string> symbols = {{0x10001000, "_main"},
{0x10002000, "_helper"}};
auto findSymbol = [&](uint64_t addr) -> std::string {
// std::__1::map<uint64_t, std::__1::string>::iterator it
auto it = symbols.find(addr);
return (it != symbols.end()) ? it->second : "<unknown>";
};
std::cout << "Lookup 0x10001000: " << findSymbol(0x10001000) << "\n";
std::cout << "Lookup 0x20000000: " << findSymbol(0x20000000) << "\n";
return 0;
}
Output
$ ./src/c++11/build/auto
Lookup 0x10001000: _main
Lookup 0x20000000: <unknown>
Lambda expressions
Use case
Custom predicates for filtering/searching binary data.
Explanation
Lambdas are inline function objects. They keep related code together, can cpature variables ([] captures nothing, [=] copies locals, [&] references locals) and avoid polluting the namespace.
Code
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <vector>
struct Symbol {
uint64_t address;
std::string name;
bool exported;
};
int main() {
std::vector<Symbol> symbols = {{0x1000, "_main", true},
{0x1100, "_private_helper", false},
{0x1200, "_public_api", true}};
auto isExported = [](const Symbol &s) { return s.exported; };
uint64_t searchStart = 0x1100;
uint64_t searchEnd = 0x1250;
auto inRange = [searchStart, searchEnd](const Symbol &s) {
return s.address >= searchStart && s.address < searchEnd;
};
std::cout << "Exported symbols:\n";
for (const auto &s : symbols) {
if (isExported(s)) {
std::cout << " 0x" << std::hex << s.address << " " << s.name << "\n";
}
}
std::cout << "Symbols in range:\n";
auto count = std::count_if(symbols.begin(), symbols.end(), inRange);
std::cout << " Found " << count << " symbols.\n";
return 0;
}
Output
$ ./src/c++11/build/lambda-expressions
Exported symbols:
0x1000 _main
0x1200 _public_api
Symbols in range:
Found 2 symbols.
decltype
Use case
Write generic code that works with different binary formats (32-bit vs 64-bit).
Explanation
decltype(expr) gives us the type of an expression. In generic code, we often cannot know the type ahead of time (it depends on template parameters). decltype(bin.getEntryPoint()) becomes uint32_t or uint64_t depending on which binary type we pass in.
Code
// https://stackoverflow.com/questions/12084040/what-is-the-difference-between-decltype-and-auto-as-a-placeholder-type-for-varia
#include <cstdint>
#include <iostream>
struct MachO32 {
uint32_t getEntryPoint() { return 0x1000; }
};
struct MachO64 {
uint64_t getEntryPoint() { return 0x10001000; }
};
template <typename Binary>
auto findEntry(Binary &bin) -> decltype(bin.getEntryPoint()) {
return bin.getEntryPoint();
}
int main() {
MachO32 bin32;
MachO64 bin64;
auto entry32 = findEntry(bin32);
auto entry64 = findEntry(bin64);
std::cout << "32-bit entry: 0x" << std::hex << entry32 << "\n";
std::cout << "64-bit entry: 0x" << std::hex << entry64 << "\n";
return 0;
}
Output
$ ./src/c++11/build/decltype
32-bit entry: 0x1000
64-bit entry: 0x10001000
Type aliases
Use case
Readable type names for binary analysis data structures.
Explanation
using aliases are more readable than typedef and work with templates.
Code
#include <cstdint>
#include <iostream>
#include <map>
using Address = uint64_t;
using Name = std::string;
using SymbolTable = std::map<Address, Name>;
int main() {
SymbolTable symbols;
// https://stackoverflow.com/questions/14788261/c-stdvector-emplace-vs-insert
symbols.emplace(0x10001000, "_main");
symbols.emplace(0x10002000, "_helper");
std::cout << "Symbols:\n";
for (const auto &s : symbols) {
std::cout << "0x" << std::hex << s.first << ":" << s.second << "\n";
}
return 0;
}
Output
$ ./src/c++11/build/type-aliases
Symbols:
0x10001000:_main
0x10002000:_helper
nullptr
Use case
Unambiguous null checks when functions are overloaded.
Explanation
NULL is just 0, so the compiler cannot choose between uint64_t and const char *. nullptr has type std::nullpts_t which only converts to pointers. This makes the call unambiguous.
Code
#include <cstdint>
#include <iostream>
void analyze(uint64_t addr) {
std::cout << "Analyzing address: 0x" << std::hex << addr << "\n";
}
void analyze(const char *symbol) {
if (symbol) {
std::cout << "Analyzing symbol: " << symbol << "\n";
} else {
std::cout << "Analyzing symbol: (null)\n";
}
}
int main() {
analyze(0x10001000);
analyze("_main");
// more than one instance of overloaded function "analyze" matches the
// argument list
// function "analyze(uint64_t addr)"
// function "analyze(const char *symbol)"
// analyze(NULL);
analyze(nullptr);
return 0;
}
Output
$ ./src/c++11/build/nullptr
Analyzing address: 0x10001000
Analyzing symbol: _main
Analyzing symbol: (null)
Strongly-typed enums
Use case
Type-safe Mach-O constants that do not pollute namespace.
Explanation
enum class creates type-safe enums. Values are scoped (Type::Value), do not implicitly convert to int and will not clash with other enums.
Code
#include <cstdint>
#include <iostream>
// C enum
// enum OldFileType { Executable = 2, Dylib = 6};
// enum class
// https://en.wikipedia.org/wiki/Mach-O
enum class FileType : uint32_t { Executable = 2, Dylib = 6 };
enum class CPUType : uint32_t { X86_64 = 0x01000007, ARM64 = 0x0100000C };
int main() {
FileType ft = FileType::Executable;
// CPUType cpu = CPUType::ARM64;
// a value of type "FileType" cannot be used to initialize an entity of type "int"
// int y = ft;
// no operator "==" matches these operands
// if (ft == cpu) {}
// no operator "==" matches these operands
// if (ft == 2) {}
uint32_t raw = static_cast<uint32_t>(ft);
std::cout << "FileType raw value: " << raw << "\n";
return 0;
}
Output
$ ./src/c++11/build/strongly-typed-enums
FileType raw value: 2
Attributes
Use case
Document that a function never returns (might help compiler optimization).
Explanation
[[noreturn]] tells the compiler a function never returns normally (it throws, calls abort, loops forever etc.). This mainly serves as documentation and an optimization hint. In practice, modern compilers are already very smart and adding [[noreturn]] does not help them.
Code
#include <cstdint>
#include <iostream>
[[noreturn]] void throwError(const char *msg) { throw std::runtime_error(msg); }
uint64_t getEntryPoint(uint32_t magic) {
// https://en.wikipedia.org/wiki/Mach-O
if (magic == 0xFEEDFACF) {
return 0x10001000;
}
throwError("Invalid magic.");
}
int main() {
try {
uint64_t entry = getEntryPoint(0xFEEDFACF);
std::cout << "Entry: 0x" << std::hex << entry << "\n";
entry = getEntryPoint(0xDEADBEEF);
std::cout << "Entry: 0x" << std::hex << entry << "\n";
} catch (const std::exception &e) {
std::cout << "Caught: " << e.what() << "\n";
}
return 0;
}
Output
$ ./src/c++11/build/attributes
Entry: 0x10001000
Caught: Invalid magic.
constexpr
Use case
Compile-time buffer sizes and calculations.
Explanation
constexpr values and functions can be evaluated at compile time. Array sizes, templates and static_assert all require compile-time constants. The same constexpr function works at compile time (for smallBuffer size) and runtime (with count variable).
Code
#include <cstdint>
#include <iostream>
constexpr size_t ARM64_INST_SIZE = 4;
constexpr size_t MAX_INSTS = 100;
constexpr size_t BUFFER_SIZE = ARM64_INST_SIZE * MAX_INSTS;
constexpr size_t calcBufferSize(size_t instCount) {
return ARM64_INST_SIZE * instCount;
}
int main() {
uint8_t buffer[BUFFER_SIZE];
uint8_t smallBuffer[calcBufferSize(10)];
size_t count = 50;
size_t runtimeSize = calcBufferSize(count);
std::cout << "Buffer size: " << sizeof(buffer) << "\n";
std::cout << "Small buffer size: " << sizeof(smallBuffer) << "\n";
std::cout << "Runtime calc: " << runtimeSize << "\n";
return 0;
}
Output
$ ./src/c++11/build/constexpr
Buffer size: 400
Small buffer size: 40
Runtime calc: 200
Delegating constructors
Use case
Constructors with default values that reuse common initialization.
Explanation
Delegating constructors let one constructor call another using the initializer list syntax : ConstructorName(args). This helps to avoid duplicating initialization code (all paths go through the primary constructor).
Code
#include <cstdint>
#include <iostream>
struct Section {
uint64_t addr;
size_t size;
const char *name;
// Primary constructor.
Section(uint64_t a, size_t s, const char *n) : addr(a), size(s), name(n) {
std::cout << "Created: " << name << " - 0x" << std::hex << addr << " - 0x"
<< std::hex << size << "\n";
}
// Delegate (default name).
Section(uint64_t a, size_t s) : Section(a, s, "__TEXT") {}
};
int main() {
Section a(0x1000, 0x100, "__DATA");
Section b(0x2000, 0x200);
return 0;
}
Output
$ ./src/c++11/build/delegating-constructors
Created: __DATA - 0x1000 - 0x100
Created: __TEXT - 0x2000 - 0x200
User-defined literals
Use case
Readable size constans without manual multiplication.
Explanation
User-defined literals let us write 64_KB instead of 64 * 1024. The _ prefix is required.
Code
#include <iostream>
constexpr size_t operator""_KB(unsigned long long kb) { return kb * 1024; }
constexpr size_t operator""_MB(unsigned long long mb) {
return mb * 1024 * 1024;
}
int main() {
size_t stackSize = 8_MB;
size_t pageSize = 4_KB;
std::cout << "Stack size: " << stackSize << " bytes.\n";
std::cout << "Page size: " << pageSize << " bytes.\n";
return 0;
}
Output
$ ./src/c++11/build/user-defined-literals
Stack size: 8388608 bytes.
Page size: 4096 bytes.
Explicit virtual overrides
Use case
Catch typos when overriding virtual functions.
Explanation
override tells the compiler “we intend to override a base class function”. If we misspell the function name or get the signature wrong, we get a compile error (instead of silently creating a new function that never gets called).
Code
#include <iostream>
struct Analyzer {
virtual void analyze() { std::cout << "Running base analyzer.\n"; }
};
struct MachOAnalyzer : Analyzer {
void analyze() override { std::cout << "Running MachO analyzer.\n"; }
// member function declared with 'override' does not override a base class member
// void analize() override {}
};
int main() {
MachOAnalyzer m;
m.analyze();
return 0;
}
Output
$ ./src/c++11/build/explicit-virtual-overrides
Running MachO analyzer.
Final specifier
Use case
Prevent overriding of security-critical methods.
Explanation
final on a method prevents derived classes from overriding it. final on a class prevents inheritance entirely.
Code
#include <iostream>
struct Validator {
virtual bool validate() { return false; }
};
struct SignatureValidator : Validator {
bool validate() override final { return true; }
};
struct ExtendedBalidator : SignatureValidator {
// cannot override 'final' function "SignatureValidator::validate"
// bool validate() override {}
};
struct SecureValidator final : Validator {
bool validate() override { return true; }
};
// a 'final' class type cannot be used as a base class
// struct ExtendedSecureValidator : SecureValidator {};
int main() {
SignatureValidator v;
std::cout << "Valid: " << std::boolalpha << v.validate() << "\n";
return 0;
}
Output
$ ./src/c++11/build/final-specifier
Valid: true
Default functions
Use case
Explicitly request compiler-generated special member functions (constructors, assignment operators and destructors). We implement a custom destructor for logging.
Explanation
The destructor is only for logging but defining it suppresses move generation. = default request all operations back. Since std::vector handles deep copy and move correctly, default operations are fine. According to the rule of 5, we implement all.
Code
#include <iostream>
#include <string>
#include <vector>
class DisassemblyResult {
std::vector<std::string> instructions;
public:
DisassemblyResult() = default;
~DisassemblyResult() {
std::cout << "Cleanup: " << instructions.size() << " instructions.\n";
}
// Copy constructor.
DisassemblyResult(const DisassemblyResult &) = default;
// Copy assignment.
DisassemblyResult &operator=(const DisassemblyResult &) = default;
// Move constructor.
DisassemblyResult(DisassemblyResult &&) = default;
// Move assignment.
DisassemblyResult &operator=(DisassemblyResult &&) = default;
void add(const std::string &s) { instructions.push_back(s); }
size_t count() const { return instructions.size(); }
};
int main() {
DisassemblyResult a;
a.add("stp x29, x30, [sp, #-16]!");
a.add("mov x29, sp");
DisassemblyResult b = a;
DisassemblyResult c = std::move(a);
std::cout << "a: " << a.count() << ", b: " << b.count() << ", c:" << c.count()
<< "\n";
return 0;
}
Output
$ ./src/c++11/build/default-functions
a: 0, b: 2, c:2
Cleanup: 2 instructions.
Cleanup: 2 instructions.
Cleanup: 0 instructions.
Deleted functions
Use case
Prevent copying of resources that cannot be safely duplicated.
Explanation
= delete disables a function entirely. Copying is deleted here because unique_ptr cannot be copied. Attempting to copy gives a compile error. Move is still allowed since it transfers ownership safely. According to the rule of 5, we implement all.
Code
#include <iostream>
#include <memory>
class CodeBuffer {
std::unique_ptr<uint8_t[]> data;
size_t size;
public:
CodeBuffer(size_t s) : data(new uint8_t[s]), size(s) {
std::cout << "Allocated " << size << " bytes.\n";
}
~CodeBuffer() { std::cout << "Freed " << size << " bytes.\n"; }
// Move constructor.
CodeBuffer(CodeBuffer &&other) noexcept
: data(std::move(other.data)), size(other.size) {
other.size = 0;
}
// Move assignment.
CodeBuffer &operator=(CodeBuffer &&other) {
if (this != &other) {
data = std::move(other.data);
size = other.size;
other.size = 0;
}
return *this;
}
// Copy constructor.
CodeBuffer(const CodeBuffer &) = delete;
// Copy assignment.
CodeBuffer &operator=(const CodeBuffer &) = delete;
};
int main() {
CodeBuffer a(1024);
// function "CodeBuffer::CodeBuffer(const CodeBuffer &)" cannot be referenced
// -- it is a deleted function
// CodeBuffer b = a;
CodeBuffer b = std::move(a);
return 0;
}
Output
$ ./src/c++11/build/deleted-functions
Allocated 1024 bytes.
Freed 1024 bytes.
Freed 0 bytes.
Range-based for loops
Use case
Iterate over binary sections without manual indexing.
Explanation
for (auto& x : container) iterates over all elements. Use const auto& for read-only access and auto& to modify elements. Works with any container that has begin()/end().
Code
#include <cstdint>
#include <iostream>
#include <vector>
int main() {
std::vector<uint64_t> addresses = {0x1000, 0x1100, 0x1200};
std::cout << "Addresses:\n";
for (const auto &addr : addresses) {
std::cout << "0x" << std::hex << addr << "\n";
}
for (auto &addr : addresses) {
// Apply ASLR slide/offset.
addr += 0x10000000;
}
std::cout << "After slide/offset:\n";
for (const auto &addr : addresses) {
std::cout << "0x" << std::hex << addr << "\n";
}
return 0;
}
Output
$ ./src/c++11/build/range-based-for-loops
Addresses:
0x1000
0x1100
0x1200
After slide/offset:
0x10001000
0x10001100
0x10001200
Special member functions for move semantics
This is covered in Deleted functions.
Converting constructors
Use case
Implicit conversion from brace-init-list to constructor arguments.
Explanation
Braces {} convert values into constructor arguments. Unlike (), braces prevent narrowing conversions (e.g. from double to int). If a constructor accepts std::initializer_list, it takes priority over other constructors when using {}.
Code
#include <cstdint>
#include <initializer_list>
#include <iostream>
struct Symbol {
uint64_t addr;
uint64_t size;
Symbol(uint64_t a) : addr(a), size(0) { std::cout << "Single arg.\n"; }
Symbol(uint64_t a, uint64_t s) : addr(a), size(s) {
std::cout << "Two args.\n";
}
Symbol(std::initializer_list<uint64_t> list)
: addr(list.size() > 0 ? *list.begin() : 0),
size(list.size() > 0 ? *(list.begin() + 1) : 0) {
std::cout << "initializer_list\n";
}
};
int main() {
Symbol s1(0x1000, 0x100);
Symbol s2{0x2000, 0x200};
Symbol s3 = {0x3000};
// invalid narrowing conversion from "double" to "unsigned long"
// Symbol s4 {1000.0};
Symbol s5(1000.0);
return 0;
}
Output
$ ./src/c++11/build/converting-constructors
Two args.
initializer_list
initializer_list
Single arg.
Explicit conversion functions
Use case
Safe boolean checks without accidental integer conversion.
Explanation
explicit operator bool() allows if (obj) checks but prevents conversion to int. Without explicit, code like int x = result would compile silently (and might cause bugs).
Code
#include <cstdint>
#include <iostream>
struct AnalysisResult {
bool valid;
uint64_t entry;
explicit operator bool() const { return valid; }
};
int main() {
AnalysisResult r{true, 0x1000};
if (r) {
std::cout << "Valid.\n";
}
// no suitable conversion function from "AnalysisResult" to "int" exists
// int x = r;
return 0;
}
Output
$ ./src/c++11/build/explicit-conversion-functions
Valid.
Inline namespaces
Use case
API versioning with default version.
Explanation
inline namespace makes its contents accessible from the parent namespace. Analyzer::run() calls V2::run() because V2 is marked inline. Old code can still explicitly use V1.
Code
#include <cstdint>
#include <iostream>
namespace Analyzer {
namespace V1 {
void run() { std::cout << "V1\n"; }
} // namespace V1
// Default.
inline namespace V2 {
void run() { std::cout << "V2\n"; }
} // namespace V2
} // namespace Analyzer
int main() {
Analyzer::run();
Analyzer::V1::run();
Analyzer::V2::run();
return 0;
}
Output
$ ./src/c++11/build/inline-namespaces
V2
V1
V2
Non-static data member initializers
Use case
Default values for struct members.
Explanation
Before C++11, defaults had to be set in every constructors. Now we can initialize members at declaration.
Code
#include <cstdint>
#include <iostream>
// Before C++11.
struct SectionOld {
uint64_t addr;
uint64_t size;
SectionOld() : addr(0), size(0x1000) {}
};
// C++11.
struct Section {
uint64_t addr = 0;
uint64_t size = 0x1000;
};
int main() {
Section s1;
std::cout << "addr: 0x" << std::hex << s1.addr << ", size: 0x" << std::hex
<< s1.size << "\n";
return 0;
}
Output
$ ./src/c++11/build/non-static-data-member-initializers
addr: 0x0, size: 0x1000
Right angle brackets
Use case
Nested templates without extra spaces.
Explanation
Before C++11, >> in nested templates was parsed as right-shift operator, requiring a space (vector<int> >).
Code
#include <cstdint>
#include <iostream>
#include <map>
#include <vector>
int main() {
std::map<uint64_t, std::vector<int>> data;
data.emplace(0x1000, std::vector<int>{1, 2});
data.emplace(0x2000, std::vector<int>{3});
for (const auto &e : data) {
std::cout << "0x" << std::hex << e.first << " : " << e.second.size()
<< " items.\n";
}
return 0;
}
Output
$ ./src/c++11/build/right-angle-brackets
0x1000 : 2 items.
0x2000 : 1 items.
Ref-qualified member functions
Use case
Different behavior for temporary vs persistent objects.
Explanation
& and && after member functions specify whether *this is an lvalue or rvalue. Return references from lvalues (persistent objects), move from rvalues (temporary objects).
Code
#include <iostream>
#include <vector>
class Buffer {
std::vector<int> data_ = {1, 2, 3};
public:
// lvalue.
const std::vector<int> &data() const & {
std::cout << "lvalue: returning ref.\n";
return data_;
}
// rvalue.
std::vector<int> data() && {
std::cout << "rvalue: moving.\n";
return std::move(data_);
}
};
Buffer makeBuffer() { return Buffer{}; }
int main() {
Buffer b;
auto &ref = b.data();
(void)ref;
auto owned = makeBuffer().data();
return 0;
}
Output
$ ./src/c++11/build/ref-qualified-member-functions
lvalue: returning ref.
rvalue: moving.
Trailing return types
Use case
Return types that depends on template parameters.
Explanation
auto f() -> Type is an alternative syntax for specifying return types. Required in C++11 when the return type depends on parameters (like decltype(a + b)). Also works with lambdas: []() -> Type {}.
Code
#include <cstdint>
#include <iostream>
template <typename A, typename B> auto add(A a, B b) -> decltype(a + b) {
return a + b;
}
auto getEntry = []() -> uint64_t { return 0x10001000; };
int main() {
auto r1 = add(0x1000, 0x100);
auto r2 = add(0x1000ULL, 0x100ULL);
std::cout << "r1: 0x" << std::hex << r1 << "\n";
std::cout << "r2: 0x" << std::hex << r2 << "\n";
std::cout << "entry: 0x" << std::hex << getEntry() << "\n";
return 0;
}
Output
$ ./src/c++11/build/trailing-return-types
r1: 0x1100
r2: 0x1100
entry: 0x10001000
noexcept
Use case
Enable efficient std::vector reallocation by promising moves will not throw.
Explanation
noexcept promises a function will not throw exceptions. std::vector only uses move during reallocation if the move constructor is noexcept (otherwise it copies for exception safety). Always mark move operations noexcept when possible.
Code
#include <cstdint>
#include <cstring>
#include <iostream>
#include <vector>
struct CodeBuffer {
uint8_t *data;
size_t size;
CodeBuffer(size_t n) : data(new uint8_t[n]), size(n) {
std::cout << "Allocated " << n << " bytes.\n";
}
~CodeBuffer() { delete[] data; }
// If noexcept is not added to the move constructor,
// copy is called.
CodeBuffer(const CodeBuffer &other)
: data(new uint8_t[other.size]), size(other.size) {
std::memcpy(data, other.data, size);
std::cout << "Copied.\n";
}
CodeBuffer(CodeBuffer &&other) noexcept : data(other.data), size(other.size) {
other.data = nullptr;
other.size = 0;
std::cout << "Moved.\n";
}
CodeBuffer &operator=(const CodeBuffer &) = delete;
CodeBuffer &operator=(CodeBuffer &&) = delete;
};
int main() {
std::vector<CodeBuffer> buffers;
buffers.emplace_back(100);
buffers.emplace_back(200);
buffers.emplace_back(300);
return 0;
}
Output
$ ./src/c++11/build/noexcept
Allocated 100 bytes.
Allocated 200 bytes.
Moved.
Allocated 300 bytes.
Moved.
Moved.
char32_t and char16_t
Use case
Handle Unicode strings in binary resources (Windows PE uses UTF-16).
Explanation
char16_t and char32_t are fixed-width Unicode types. Use u"..." for UTF-16 and U"..." for UTF-32.
Code
#include <cstdint>
#include <iostream>
int main() {
char16_t utf16[] = u"MZ";
(void)utf16;
char32_t utf32[] = U"MZ";
(void)utf32;
std::cout << "UTF-16 element size: " << sizeof(char16_t) << " bytes.\n";
std::cout << "UTF-32 element size: " << sizeof(char32_t) << " bytes.\n";
return 0;
}
Output
$ ./src/c++11/build/char32_t-and-char16_t
UTF-16 element size: 2 bytes.
UTF-32 element size: 4 bytes.
Raw string literals
Use case
Avoid escape hell in byte patterns.
Explanation
R"()" is a raw string (no escape sequences processed). Essential for regex patterns, YARA rules and byte signatures.
Code
#include <iostream>
#include <string>
int main() {
std::string pattern1 = "\\xFD\\x7B\\xBF\\xA9";
std::string pattern2 = R"(\xFD\x7B\xBF\xA9)";
std::cout << "Escaped: " << pattern1 << "\n";
std::cout << "Raw: " << pattern2 << "\n";
std::string rule = R"(
rule MachO {
strings: $magic = { CE FA ED FE }
}
)";
std::cout << rule << "\n";
return 0;
}
Output
$ ./src/c++11/build/raw-string-literals
Escaped: \xFD\x7B\xBF\xA9
Raw: \xFD\x7B\xBF\xA9
rule MachO {
strings: $magic = { CE FA ED FE }
}
std::move
Use case
Transfer ownership of data without copying.
Explanation
std::move casts an lvalue to an rvalue reference (enabling the move constructor/assignment). The data is transferred, not copied (the original vector is left empty).
Code
#include <iostream>
#include <string>
#include <vector>
int main() {
std::vector<std::string> instructions = {"stp x29, x30, [sp, #-16]!",
"mov x29, sp", "ret"};
std::cout << "Before move: " << instructions.size() << " instructions.\n";
std::vector<std::string> cache = std::move(instructions);
std::cout << "After move:\n";
std::cout << " original: " << instructions.size() << "\n";
std::cout << " cache: " << cache.size() << "\n";
return 0;
}
Output
$ ./src/c++11/build/std-move
Before move: 3 instructions.
After move:
original: 0
cache: 3
std::forward
Use case
Preserve lvalue/rvalue when passing arguments through a wrapper.
Explanation
std::forward<T>(arg) preserves the original value category. Without it, arg would always be treated as an lvalue (it has a name). Use it in templates that pass arguments to other functions.
Code
#include <iostream>
#include <string>
#include <utility>
void process(const std::string &s) { std::cout << "lvalue: " << s << "\n"; }
void process(std::string &&s) { std::cout << "rvalue: " << s << "\n"; }
template <typename T> void wrapper(T &&arg) { process(std::forward<T>(arg)); }
int main() {
std::string name = "_main";
// lvalue.
wrapper(name);
// rvalue.
wrapper(std::string("_helper"));
return 0;
}
Output
$ ./src/c++11/build/std-forward
lvalue: _main
rvalue: _helper
std::thread
Use case
Parallel analysis of binary segments.
Explanation
std::thread runs a function in a new thread. Pass the function and its arguments to the constructor. Call join() to wait for completion. Use std::mutex with std::lock_guard to prevent mixed output.
Code
#include <iostream>
#include <mutex>
#include <thread>
std::mutex printMutex;
void analyze(const char *segment) {
std::lock_guard<std::mutex> lock(printMutex);
std::cout << "Thread " << std::this_thread::get_id() << " analyzing "
<< segment << "\n";
}
int main() {
std::thread t1(analyze, "__TEXT");
std::thread t2(analyze, "__DATA");
t1.join();
t2.join();
std::cout << "Done.\n";
return 0;
}
Output
$ ./src/c++11/build/std-thread
Thread 0x16f573000 analyzing __TEXT
Thread 0x16f5ff000 analyzing __DATA
Done.
std::to_string
Use case
Build strings from numbers for logging.
Explanation
std::to_string converts numbers to std::string.
Code
#include <cstdint>
#include <iostream>
#include <string>
int main() {
int segments = 4;
size_t symbols = 127;
std::string msg = "Found " + std::to_string(segments) + " segments, " +
std::to_string(symbols) + " symbols.";
std::cout << msg << "\n";
return 0;
}
Output
$ ./src/c++11/build/std-to_string
Found 4 segments, 127 symbols.
Type traits
Use case
Compile-time checks for safe binary I/O.
Explanation
Type traits query type properties at compile-time. is_trivially_copyable checks if a type is safe for memcpy (POD/Plain Old Data structs are but classes with std::string are not). static_assert can be used to catch mistakes before runtime.
Code
#include <cstdint>
#include <iostream>
#include <type_traits>
// https://en.wikipedia.org/wiki/Mach-O
struct MachHeader {
uint32_t magic;
uint32_t cputype;
};
class Symbol {
std::string name;
public:
Symbol(const std::string &n) : name(n) {}
};
int main() {
std::cout << "MachHeader trivially copyable: " << std::boolalpha
<< std::is_trivially_copyable<MachHeader>::value << "\n";
std::cout << "Symbol trivially copyable: " << std::boolalpha
<< std::is_trivially_copyable<Symbol>::value << "\n";
static_assert(std::is_trivially_copyable<MachHeader>::value,
"MachHeader must be safe for memcpy.");
return 0;
}
Output
$ ./src/c++11/build/type-traits
MachHeader trivially copyable: true
Symbol trivially copyable: false
Smart pointers
Use case
Automatic memory management for parsed binary structures.
Explanation
unique_ptr has exclusive ownership (cannot copy, only move). shared_ptr allows multiple owners with reference counting. A shared_ptr holds the managed object and a reference counter. Accessing the reference counter is thread safe but manipulating the managed object is not thread-safe. Both auto-delete when the last owner goes out of scope (no manual delete needed).
Code
#include <cstdint>
#include <iostream>
#include <memory>
struct LoadCommand {
uint32_t cmd;
LoadCommand(uint32_t c) : cmd(c) { std::cout << " Created.\n"; }
~LoadCommand() { std::cout << " Destroyed.\n"; }
};
int main() {
std::cout << "unique_ptr\n";
{
// https://en.wikipedia.org/wiki/Mach-O
std::unique_ptr<LoadCommand> lc(new LoadCommand(0x19));
// function "std::__1::unique_ptr<_Tp, _Dp>::unique_ptr(const std::__1::unique_ptr<LoadCommand, std::__1::default_delete<LoadCommand>> &) [with _Tp=LoadCommand, _Dp=std::__1::default_delete<LoadCommand>]" (declared implicitly) cannot be referenced -- it is a deleted function
// auto copy = lc;
auto moved = std::move(lc);
}
std::cout << "shared_ptr\n";
{
// https://en.wikipedia.org/wiki/Mach-O
auto a = std::make_shared<LoadCommand>(0x02);
auto b = a;
std::cout << " Owners: " << a.use_count() << "\n";
}
return 0;
}
Output
$ ./src/c++11/build/smart-pointers
unique_ptr
Created.
Destroyed.
shared_ptr
Created.
Owners: 2
Destroyed.
std::chrono
Use case
Bechmark analysis operations.
Explanation
std::chrono provides type-safe time handling. Get the current time with now(), subtract to get duration and cast to desired units with duration_cast. Use high_resolution_clock for benchmarking.
Code
#include <chrono>
#include <iostream>
void analyze() {
volatile int sum = 0;
for (int i = 0; i < 1000000; ++i) {
sum += i;
}
}
int main() {
auto start = std::chrono::high_resolution_clock::now();
analyze();
auto end = std::chrono::high_resolution_clock::now();
auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
auto us = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "Elapsed: " << ms.count() << " ms (" << us.count() << " us).\n";
return 0;
}
Output
$ ./src/c++11/build/std-chrono
Elapsed: 2 ms (2883 us).
Tuples
Use case
Return multiple values from a function.
Explanation
std::tuple bundles multiple values of different types. Use std::make_tuple to create, std::get<N> to access by index.
Code
#include <cstdint>
#include <iostream>
#include <tuple>
std::tuple<bool, uint64_t, const char *> findEntry() {
return std::make_tuple(true, 0x10001000, "_main");
}
int main() {
auto result = findEntry();
std::cout << "Found: " << std::boolalpha << std::get<0>(result) << "\n";
std::cout << "Entry: 0x" << std::hex << std::get<1>(result) << "\n";
std::cout << "Symbol: " << std::get<2>(result) << "\n";
return 0;
}
Output
$ ./src/c++11/build/tuples
Found: true
Entry: 0x10001000
Symbol: _main
std::tie
Use case
Unpack tuples into separate variables.
Explanation
std::tie creates a tuple of references, allowing us to unpack a returned tuple into separate variables. We can use std::ignore to skip values we do not need. (Note: C++17 added structured bindings auto [a, b, c] = ... as a cleaner alternative).
Code
#include <cstdint>
#include <iostream>
#include <tuple>
std::tuple<uint64_t, uint64_t, const char *> getSegmentInfo() {
return std::make_tuple(0x10000000, 0x4000, "__TEXT");
}
int main() {
uint64_t addr;
const char *name;
std::tie(addr, std::ignore, name) = getSegmentInfo();
std::cout << name << " @ 0x" << std::hex << addr << "\n";
return 0;
}
Output
$ ./src/c++11/build/std-tie
__TEXT @ 0x10000000
std::array
Use case
Fixed-size buffer with bounds checking and STL support.
Explanation
std::array is a fixed-size container that wraps a C array. Unlike raw arrays, it knows its size, works with STL algorithms and has .at() for bound-checked access. T[N] access can be used as well (e.g. for performance critical applications).
Code
#include <algorithm>
#include <array>
#include <cstdint>
#include <iostream>
int main() {
std::array<uint8_t, 4> bytes = {
// $ echo "stp x29, x30, [sp, #-16]\!" | llvm-mc -triple=aarch64 -show-encoding
// stp x29, x30, [sp, #-16]! // encoding: [0xfd,0x7b,0xbf,0xa9]
0xFD, 0x7B, 0xBF, 0xA9};
std::cout << "Size: " << bytes.size() << "\n";
std::cout << "First: 0x" << std::hex << static_cast<int>(bytes.at(0)) << "\n";
std::cout << "Last: 0x" << std::hex << static_cast<int>(bytes[3]) << "\n";
return 0;
}
Output
$ ./src/c++11/build/std-array
Size: 4
First: 0xfd
Last: 0xa9
Unordered containers
Use case
O(1) symbol lookups by address.
Explanation
unordered_map and unordered_set use hash tybles for O(1) average lookup, insert and delete. Might be faster that map/set (which are O(log n)) but unordered.
Code
#include <cstdint>
#include <iostream>
#include <unordered_map>
#include <unordered_set>
int main() {
std::unordered_map<uint64_t, const char *> symbols;
symbols.emplace(0x10001000, "_main");
auto it = symbols.find(0x10001000);
if (it != symbols.end()) {
std::cout << "Found: " << it->second << "\n";
}
std::unordered_set<uint64_t> visited;
visited.insert(0x10001000);
// Duplicate ignored.
visited.insert(0x10001000);
std::cout << "Visited: " << visited.size() << " unique addresses.\n";
return 0;
}
Output
$ ./src/c++11/build/unordered-containers
Found: _main
Visited: 1 unique addresses.
std::make_shared
Use case
Efficient creation of shared pointers.
Explanation
std::make_shared<T>(arg) creates a shared_ptr in one allocation (managed object + control block). More efficient and exception-safe than shared_ptr<T>(new T(arg)) (which does 2 allocations).
Code
#include <iostream>
#include <memory>
struct Symbol {
const char *name;
Symbol(const char *n) : name(n) { std::cout << "Created.\n"; }
~Symbol() { std::cout << "Destroyed.\n"; }
};
int main() {
auto sym = std::make_shared<Symbol>("_main");
auto copy = sym;
std::cout << "Ref count: " << sym.use_count() << "\n";
return 0;
}
Output
$ ./src/c++11/build/std-make_shared
Created.
Ref count: 2
Destroyed.
std::ref
Use case
Pass references to threads (which copy arguments by default).
Explanation
std::thread copies its arguments by default. std::ref(x) creates a wrapper object that holds a pointer.
Code
#include <functional>
#include <iostream>
#include <thread>
void analyze(int &counter) { counter++; }
int main() {
int count = 0;
std::thread t(analyze, std::ref(count));
t.join();
std::cout << "Count: " << count << "\n";
return 0;
}
Output
$ ./src/c++11/build/std-ref
Count: 1
std::async
Use case
Run analysis tasks in parallel and collect the results.
Explanation
std::async runs a function asynchronously and returns a std::future. We can call .get() to wait for and retrieve the result. (Use std::launch::async to force a new thread and std::launch::deferred for lazy evaluation.)
Code
#include <future>
#include <iostream>
int analyze(const char *segment) {
std::cout << "Analyzing " << segment << "\n";
return 100;
}
int main() {
auto f1 = std::async(std::launch::async, analyze, "__TEXT");
auto f2 = std::async(std::launch::async, analyze, "__DATA");
int r1 = f1.get();
int r2 = f2.get();
std::cout << "Results: " << r1 << ", " << r2 << "\n";
return 0;
}
Output
$ ./src/c++11/build/std-async
Analyzing __TEXT
Analyzing __DATA
Results: 100, 100
std::begin/std::end
Use case
Write generic code that works with both C arrays and containers.
Explanation
std::begin() and std::end() work with both C arrays and STL containers. We can write one template that handles both. Without them, C arrays require separate handling.
Code
#include <algorithm>
#include <array>
#include <cstdint>
#include <iostream>
template <typename T> void hexDump(const T &data) {
for (auto it = std::begin(data); it != std::end(data); ++it) {
std::cout << std::hex << static_cast<int>(*it) << " ";
}
std::cout << "\n";
}
int main() {
// $ echo "stp x29, x30, [sp, #-16]\!" | llvm-mc -triple=aarch64 -show-encoding
// stp x29, x30, [sp, #-16]! // encoding: [0xfd,0x7b,0xbf,0xa9]
uint8_t cArray[] = {0xFD, 0x7B, 0xBF, 0xA9};
// $ echo "ret" | llvm-mc -triple=aarch64 -show-encoding
// ret // encoding: [0xc0,0x03,0x5f,0xd6]
std::array<uint8_t, 4> stdArray = {0xC0, 0x03, 0x5F, 0xD6};
hexDump(cArray);
hexDump(stdArray);
return 0;
}
Output
$ ./src/c++11/build/std-begin-end
fd 7b bf a9
c0 3 5f d6
Binary literals
Use case
Define ARM64 instruction masks and opcodes clearly.
Explanation
Binary literals (0b...) make bit patterns readable. ' can be used as a separator.
Code
#include <cstdint>
#include <iostream>
int main() {
// https://developer.arm.com/documentation/ddi0602/2025-12/Base-Instructions/BL--Branch-with-link-?lang=en
uint32_t blMask = 0b1111'1100'0000'0000'0000'0000'0000'0000;
uint32_t blOpcode = 0b1001'0100'0000'0000'0000'0000'0000'0000;
// $ echo "bl #0x40" | llvm-mc -arch=aarch64 -show-encoding
// bl #64 // encoding: [0x10,0x00,0x00,0x94]
uint32_t blInsn = 0x94000010;
bool isBl = (blInsn & blMask) == blOpcode;
std::cout << "Is BL: " << std::boolalpha << isBl << "\n";
return 0;
}
Output
$ ./src/c++14/build/binary-literals
Is BL: true
Generic lambda expressions
Use case
Generic filtering for any container type.
Explanation
auto in lambda parameters makes the closure’s operator() a template (so one lambda works with any type).
Code
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <vector>
int main() {
auto hexPrint = [](auto val) {
std::cout << "0x" << std::hex << val << "\n";
};
// unsigned long = uint64_t
hexPrint(0x10001000UL);
// unsigned int = uint32_t
hexPrint(0xFEEDFACF);
return 0;
}
Output
$ ./src/c++14/build/generic-lambda-expressions
0x10001000
0xfeedfacf
Lambda capture initializers
Use case
Move unique_ptr into a lambda for async analysis.
Explanation
[x = expr] initializes a capture with any expression. Enables moving unique_ptr into lambdas and creating computed captures.
Code
#include <cstdint>
#include <iostream>
#include <memory>
#include <vector>
struct AnalysisResult {
uint64_t entryPoint;
AnalysisResult(uint64_t e) : entryPoint(e) {}
};
int main() {
auto result = std::make_unique<AnalysisResult>(0x10001000);
auto task = [r = std::move(result)]() {
std::cout << "Entry: 0x" << std::hex << r->entryPoint << "\n";
};
std::cout << "Original moved: " << std::boolalpha << (result == nullptr)
<< "\n";
task();
int multiplier = 4;
auto getSize = [pageSize = 4096 * multiplier]() { return pageSize; };
std::cout << "Size: " << std::dec << getSize() << "\n";
return 0;
}
Output
$ ./src/c++14/build/lambda-capture-initializers
Original moved: true
Entry: 0x10001000
Size: 16384
Return type deduction
Use case
Simplify template functions that return complex types.
Explanation
auto return types lets the compiler deduce it from return statements. Cleaner than trailing return types. Use auto& to return references.
Code
#include <cstdint>
#include <iostream>
#include <string>
#include <vector>
// C++11
// template<typename Container>
// auto getFirst(Container& c) -> decltype(c.front()) {
// return c.front();
// }
template <typename Container> auto getFirst(Container &c) { return c.front(); }
int main() {
std::vector<uint64_t> addrrs = {0x1000, 0x2000};
auto first = getFirst(addrrs);
std::cout << "First address: 0x" << std::hex << first << "\n";
return 0;
}
Output
$ ./src/c++14/build/return-type-deduction
First address: 0x1000
decltype(auto)
Use case
Perfect forwarding of return types in wrappers.
Explanation
decltype(auto) deduces the exact type (including references and const). auto strips them.
Code
#include <cstdint>
#include <iostream>
uint64_t entry = 0x10001000;
uint64_t &getRef() { return entry; }
uint64_t getVal() { return entry; }
auto wrapperAuto() { return getRef(); }
decltype(auto) wrapperDecltype() { return getRef(); }
int main() {
// expression must be a modifiable lvalue
// wrapperAuto() = 0x10002000;
wrapperDecltype() = 0x10003000;
std::cout << "After decltype: 0x" << std::hex << entry << "\n";
return 0;
}
Output
$ ./src/c++14/build/decltype-auto
After decltype: 0x10003000
Relaxing constraints on constexpr functions
Use case
Compile-time instruction decoding with loops and conditionals.
Explanation
constexpr now allows loops, ifs, multiple returns and local variables (non-exhaustive list).
Code
#include <cstdint>
#include <iostream>
constexpr const char *getRegName(uint32_t reg) {
if (reg == 29)
return "fp";
if (reg == 30)
return "lr";
return "x?";
}
constexpr uint32_t decodeRd(uint32_t insn) { return insn & 0x1F; }
int main() {
// echo "mov x29, sp" | llvm-mc -arch=aarch64 -show-encoding
// mov x29, sp // encoding: [0xfd,0x03,0x00,0x91]
constexpr uint32_t insn = 0x910003fd;
// https://developer.arm.com/documentation/ddi0602/2025-12/Base-Instructions/MOV--register---Move-register-value--an-alias-of-ORR--shifted-register--?lang=en
constexpr auto rd = decodeRd(insn);
static_assert(rd == 29, "Rd should be x29");
std::cout << "Rd: " << getRegName(rd) << "\n";
return 0;
}
Output
$ ./src/c++14/build/relaxing-constraints-on-constexpr-functions
Rd: fp
Variable templates
Use case
Architecture specific constants.
Explanation
Variable templates allow templated constants. Cleaner than static class members or constexpr functions.
Code
#include <cstdint>
#include <iostream>
template <typename T> constexpr T pageSize = 0x1000;
template <typename T> constexpr T machoBase = 0;
// template<> marks an explicit specialization (for type uint64_t).
template <> constexpr uint64_t machoBase<uint64_t> = 0x10000000;
// template<> marks an explicit specialization (for type uint32_t).
template <> constexpr uint32_t machoBase<uint32_t> = 0x1000;
int main() {
std::cout << std::hex;
std::cout << "Page size: 0x" << pageSize<uint64_t> << "\n";
std::cout << "64-bit base: 0x" << machoBase<uint64_t> << "\n";
std::cout << "32-bit base: 0x" << machoBase<uint32_t> << "\n";
return 0;
}
Output
$ ./src/c++14/build/variable-templates
Page size: 0x1000
64-bit base: 0x10000000
32-bit base: 0x1000
[[deprecated]] attribute
Use case
Mark old analysis APIs for removal.
Explanation
[[deprecated]] generates compiler warnings when deprecated items are used.
Code
#include <cstdint>
#include <iostream>
[[deprecated("Use analyzeV2() instead.")]]
void analyze(uint64_t addr) {
std::cout << "Old analysis: 0x" << std::hex << addr << "\n";
}
void analyzeV2(uint64_t addr) {
std::cout << "New analysis: 0x" << std::hex << addr << "\n";
}
int main() {
analyzeV2(0x10001000);
// Uncomment to see deprecation warning.
// analyze(0x10001000);
return 0;
}
Output
$ ./src/c++14/build/deprecated-attribute
New analysis: 0x10001000
User-defined literals for standard library types
Use case
Readable time durations for analysis timeouts.
Explanation
Some standard library literals: h, s, ms, us. Requires using namespace std::chrono_literals.
Code
#include <chrono>
#include <iostream>
#include <string>
#include <thread>
using namespace std::chrono_literals;
void analyzeWithTimeout(std::chrono::milliseconds timeout) {
std::cout << "Timeout: " << timeout.count() << "ms\n";
}
int main() {
auto analysisTimeout = 500ms;
auto longTimeout = 2s;
analyzeWithTimeout(analysisTimeout);
analyzeWithTimeout(longTimeout);
return 0;
}
Output
$ ./src/c++14/build/user-defined-literals-for-standard-library-types
Timeout: 500ms
Timeout: 2000ms
Compile-time integer sequences
Use case
Unpack tuple of segment info into function arguments.
Explanation
std::index_sequence and std::make_index_sequence<N> generate compile-time sequences 0, 1, 2, ..., N-1. Use with parameter pack expansion to unpack tuples or iterate arrays at compile-time.
Code
#include <cstdint>
#include <iostream>
#include <tuple>
#include <utility>
void printSegment(const char *name, uint64_t addr, uint64_t size) {
std::cout << name << " @ 0x" << std::hex << addr << " (size: 0x" << size
<< ")\n";
}
// Step 2: I... is 0, 1, 2 for a tuple with 3 elements.
template <typename Tuple, std::size_t... I>
void callImpl(Tuple &&t, std::index_sequence<I...>) {
// std::get<I>(t) expands to: std::get<0>(t), std::get<1>(t), std::get<2>(t).
printSegment(std::get<I>(std::forward<Tuple>(t))...);
}
// Step 1: create index sequence from tuple size.
template <typename Tuple> void callWithTuple(Tuple &&t) {
// make_index_sequence<3>{} creates index_sequence<0, 1, 2>.
// decay_t strips the reference.
callImpl(
t,
std::make_index_sequence<std::tuple_size<std::decay_t<Tuple>>::value>{});
}
int main() {
auto seg = std::make_tuple("__TEXT", 0x10000000, 0x4000);
// Unpacks to: printSegment("__TEXT", 0x10000000, 0x4000);
callWithTuple(seg);
return 0;
}
Output
$ ./src/c++14/build/compile-time-integer-sequences
__TEXT @ 0x10000000 (size: 0x4000)
std::make_unique
Use case
Safe creation of unique_ptr for parsed structures.
Explanation
std::make_unique<T>(args) creates a unique_ptr<T> without using new. It is exception-safe and cleaner than unique_ptr<T>(new T(args)). Equivalent to what make_shared is for shared_ptr.
Code
#include <cstdint>
#include <iostream>
#include <memory>
// https://en.wikipedia.org/wiki/Mach-O
struct LoadCommand {
uint32_t cmd;
uint32_t size;
LoadCommand(uint32_t c, uint32_t s) : cmd(c), size(s) {
std::cout << "Created LC 0x" << std::hex << cmd << "\n";
}
~LoadCommand() { std::cout << "Destroyed LC 0x" << std::hex << cmd << "\n"; }
};
int main() {
auto segment = std::make_unique<LoadCommand>(0x19, 72);
std::cout << "Segment size: " << std::dec << segment->size << "\n";
return 0;
}
Output
$ ./src/c++14/build/std-make_unique
Created LC 0x19
Segment size: 72
Destroyed LC 0x19
Template argument deduction for class templates
Use case
Simpler container initialization.
Explanation
Class template arguments can now be deduced from constructor arguments, similarly to function templates.
Code
#include <cstdint>
#include <iostream>
#include <vector>
template <typename T> struct Buffer {
T *data;
size_t size;
Buffer(T *d, size_t s) : data(d), size(s) {}
};
int main() {
// $ echo "stp x29, x30, [sp, #-16]\!" | llvm-mc -triple=aarch64 -show-encoding
// stp x29, x30, [sp, #-16]! // encoding: [0xfd,0x7b,0xbf,0xa9]
uint8_t bytes[] = {0xFD, 0x7B, 0xBF, 0xA9};
// C++14
// Buffer<uint8_t> buf(bytes, 4);
// C++17
Buffer buf(bytes, 4);
std::cout << "Buffer size: " << buf.size << "\n";
return 0;
}
Output
$ ./src/c++17/build/template-argument-deduction-for-class-templates
Buffer size: 4
Non-type template parameters with auto
Use case
Compile-time buffer sizes with any constant type.
Explanation
auto lets one template accept any constant type (int, unsigned, char etc.). Before C++17, we would need template<int N> or template<size_t N> (separate templates for each type).
Code
#include <cstdint>
#include <iostream>
template <auto N> struct Buffer {
uint8_t data[N];
static constexpr auto size = N;
};
int main() {
Buffer<64> small;
Buffer<0x1000> page;
std::cout << "Small: " << small.size << "\n";
std::cout << "Page: 0x" << std::hex << page.size << "\n";
return 0;
}
Output
$ ./src/c++17/build/non-type-template-parameters-with-auto
Small: 64
Page: 0x1000
Folding expressions
Use case
Variadic logging or combining flags.
Explanation
Fold expressions expand parameter packs over an operator. Unary fold has one operator ((pack | ...)). Binary fold has two operators plus an init value ((init << ... << pack)).
Code
#include <cstdint>
#include <iostream>
template <typename... Args> void log(Args... args) {
// Binary fold: init op ... op pack (2 operators).
// Expands to: ((std::cout << arg1) << arg2) << arg3.
(std::cout << ... << args) << "\n";
}
template <typename... Flags> constexpr auto combineFlags(Flags... flags) {
// Unary fold: pack op ... (1 operator).
// Expands to: flag1 | flag2 | flag3.
return (flags | ...);
}
int main() {
log("Entry: 0x", std::hex, 0x10001000);
// https://github.com/apple-oss-distributions/xnu/blob/f6217f891ac0bb64f3d375211650a4c1ff8ca1ea/EXTERNAL_HEADERS/mach-o/loader.h#L145
// https://github.com/apple-oss-distributions/xnu/blob/f6217f891ac0bb64f3d375211650a4c1ff8ca1ea/EXTERNAL_HEADERS/mach-o/loader.h#L190
constexpr uint32_t MH_PIE = 0x20000;
constexpr uint32_t MH_TWOLEVEL = 0x80;
constexpr auto flags = combineFlags(MH_PIE, MH_TWOLEVEL);
std::cout << "Flags: 0x" << std::hex << flags << "\n";
return 0;
}
Output
$ ./src/c++17/build/folding-expressions
Entry: 0x10001000
Flags: 0x20080
New rules for auto deduction from braced-init-list
Use case
Cleaner initialization.
Explanation
auto x{value} now deduces to the value’s type, not std::initializer_list.
Code
#include <iostream>
int main() {
// C++17: deduces int.
auto addr0{0x1000};
// Before C+17: it would deduce std::initializer_list<int>
// auto {42};
// std::initializer_list still works with =.
auto addr1 = {0x1100, 0x1200, 0x1300};
std::cout << "addr0: 0x" << std::hex << addr0 << "\n";
std::cout << "addr1.size: " << addr1.size() << "\n";
return 0;
}
Output
$ ./src/c++17/build/new-rules-for-auto-deduction-from-braced-init-list
addr0: 0x1000
addr1.size: 3
constexpr lambda
Use case
Compile-time instruction decoding.
Explanation
Lambdas can now be constexpr.
Code
#include <cstdint>
#include <iostream>
int main() {
// https://developer.arm.com/documentation/ddi0602/2025-12/Base-Instructions/MOV--register---Move-register-value--an-alias-of-ORR--shifted-register--?lang=en
constexpr auto decodeRd = [](uint32_t insn) { return insn & 0x1F; };
// $ echo "mov x29, sp" | llvm-mc -triple=aarch64 -show-encoding
// mov x29, sp // encoding: [0xfd,0x03,0x00,0x91]
constexpr uint32_t insn = 0x910003fd;
constexpr auto rd = decodeRd(insn);
static_assert(rd == 29, "Rd should be x29");
std::cout << "Rd: x" << rd << "\n";
return 0;
}
Output
$ ./src/c++17/build/constexpr-lambda
Rd: x29
Lambda capture this by value
Use case
Safe callbacks that outlive the object.
Explanation
[*this] captures a copy of the object. Safe even after the original object is destroyed.
Code
#include <cstdint>
#include <functional>
#include <iostream>
struct Analyzer {
uint64_t baseAddr = 0x10000000;
auto getCallback() {
return [*this]() { return baseAddr; };
}
};
int main() {
std::function<uint64_t()> callback;
{
Analyzer a;
callback = a.getCallback();
a.baseAddr = 0xDEAD;
}
std::cout << "Base: 0x" << std::hex << callback() << "\n";
return 0;
}
Output
$ ./src/c++17/build/lambda-capture-this-by-value
Base: 0x10000000
inline variables
Use case
Header-only constants without ODR issues.
Explanation
inline variables can be defined in headers without causing multiple definition errors.
Code
#include "macho_constants.hpp"
#include <iostream>
int main() {
std::cout << "64-bit magic: 0x" << std::hex << MH_MAGIC_64 << "\n";
return 0;
}
Output
$ ./src/c++17/build/inline-variables
64-bit magic: 0xfeedfacf
Nested namespaces
Use case
Cleaner namespace declarations.
Explanation
Nested namespaces can be declared with :: separator in a single line.
Code
#include <cstdint>
#include <iostream>
// C++17:
namespace re::macho::arm64 {
// $ echo "nop" | llvm-mc -triple=aarch64 -show-encoding
// nop // encoding: [0x1f,0x20,0x03,0xd5]
constexpr uint32_t NOP = 0xD503201F;
} // namespace re::macho::arm64
// Before C++17:
// namespace re { namespace { macho { namespace arm64 {...}}}}
int main() {
std::cout << "NOP: 0x" << std::hex << re::macho::arm64::NOP << "\n";
return 0;
}
Output
$ ./src/c++17/build/nested-namespaces
NOP: 0xd503201f
Structured bindings
Use case
Unpack tuples, pairs, arrays and structs cleanly.
Explanation
auto [a, b, c] = expr unpack tuple-like objects and aggregate types (e.g. plain structs with public members) into named variables. Cleaner than std::tie and std::get.
Code
#include <cstdint>
#include <iostream>
#include <map>
struct Symbol {
bool found;
uint64_t addr;
const char *name;
};
Symbol findSymbol([[maybe_unused]] const char *name) {
return {true, 0x10001000, "_main"};
}
int main() {
auto [found, addr, name] = findSymbol("_main");
std::cout << name << " @ 0x" << std::hex << addr << "\n";
std::map<uint64_t, const char *> symbols{{0x10001000, "_main"},
{0x10002000, "_helper"}};
for (const auto &[addr, name] : symbols) {
std::cout << name << " @ 0x" << std::hex << addr << "\n";
}
return 0;
}
Output
$ ./src/c++17/build/structured-bindings
_main @ 0x10001000
_main @ 0x10001000
_helper @ 0x10002000
Selection statements with initializer
Use case
Limit variable scope in conditionals.
Explanation
if (init; condition) declares variables scoped to the if/else block.
Code
#include <cstdint>
#include <iostream>
#include <map>
int main() {
std::map<uint64_t, const char *> symbols{{0x10001000, "_main"}};
if (auto it = symbols.find(0x10001000); it != symbols.end()) {
std::cout << "Found: " << it->second << "\n";
}
return 0;
}
Output
$ ./src/c++17/build/selection-statements-with-initializer
Found: _main
constexpr if
Use case
Compile-time branching for 32/64-bit handling.
Explanation
if constexpr evaluates at compile-time.
Code
#include <cstdint>
#include <iostream>
#include <type_traits>
template <typename T> void printAddr(T addr) {
if constexpr (sizeof(T) == 8) {
std::cout << "64-bit: 0x" << std::hex << addr << "\n";
} else {
std::cout << "32-bit: 0x" << std::hex << addr << "\n";
}
}
int main() {
printAddr(uint64_t{0x10001000});
printAddr(uint32_t{0x1000});
return 0;
}
Output
$ ./src/c++17/build/constexpr-if
64-bit: 0x10001000
32-bit: 0x1000
UTF-8 character literals
Use case
Explicit UTF-8 encoding.
Explanation
u8'x' creates a UTF-8 encoded character literal.
Code
#include <iostream>
int main() {
char c = u8'X';
std::cout << "Char: " << c << "\n";
return 0;
}
Output
$ ./src/c++17/build/utf-8-character-literals
Char: X
Direct-list-initialization of enums
Use case
Initialize scoped enum from raw value (without cast).
Explanation
Scoped enums can now be initialized with braces from their underlying type.
Code
#include <cstdint>
#include <iostream>
// https://en.wikipedia.org/wiki/Mach-O
enum class MHFileType : uint32_t {};
int main() {
MHFileType exe{0x2};
std::cout << "Filetype: 0x" << std::hex << static_cast<uint32_t>(exe) << "\n";
return 0;
}
Output
$ ./src/c++17/build/direct-list-initialization-of-enums
Filetype: 0x2
[[fallthrough]], [[nodiscard]], [[maybe_unused]] attributes
Use case
Express intent and catch bugs.
Explanation
[[fallthrough]] documents intentional fallthroughs. [[nodiscard]] warns if return value is ignored. [[maybe_unused]] suppresses unused warnings.
Code
#include <cstdint>
#include <iostream>
// https://en.wikipedia.org/wiki/Mach-O
[[nodiscard]] bool validate(uint32_t magic) { return magic == 0xFEEDFACF; }
// https://developer.arm.com/documentation/ddi0602/2025-12/Base-Instructions/B--Branch-?lang=en
// https://developer.arm.com/documentation/ddi0602/2025-12/Base-Instructions/BL--Branch-with-link-?lang=en
void analyzeInstr(uint32_t instr, [[maybe_unused]] uint64_t addr) {
// Bits 31..26.
uint32_t op = (instr >> 26) & 0x3F;
switch (op) {
// BL.
case 0x5:
std::cout << "Recording call.\n";
[[fallthrough]];
// B.
case 0x25:
std::cout << "Analyzing branch.\n";
break;
default:
break;
}
}
int main() {
bool ok = validate(0xFEEDFACF);
std::cout << "Magic is valid: " << std::boolalpha << ok << "\n";
// $ echo "bl 0x40" | llvm-mc -triple=aarch64 -show-encoding
// bl #64 // encoding: [0x10,0x00,0x00,0x94]
analyzeInstr(0x94000010, 0x1000);
// $ echo "b 0x40" | llvm-mc -triple=aarch64 -show-encoding
// b #64 // encoding: [0x10,0x00,0x00,0x14]
analyzeInstr(0x14000010, 0x2000);
return 0;
}
Output
$ ./src/c++17/build/fallthrough-nodiscard-maybe_unused
Magic is valid: true
Analyzing branch.
Recording call.
Analyzing branch.
__has_include
Use case
Portable header detection.
Explanation
__has_include checks if a header exists at preprocessing time.
Code
#include <iostream>
#if __has_include(<mach-o/loader.h>)
#define BINARY_FORMAT "Mach-O (macOS/iOS)"
#elif __has_include(<elf.h>)
#define BINARY_FORMAT "ELF (Linux)"
#else
#define BINARY_FORMAT "unknown"
#endif
int main() {
std::cout << "Binary format: " << BINARY_FORMAT << "\n";
return 0;
}
Output
$ ./src/c++17/build/has-include
Binary format: Mach-O (macOS/iOS)
Class template argument deduction
Use case
Cleaner standard library usage.
Explanation
CTAD lets us omit template arguments when the compiler can deduce them from constructor arguments.
Code
#include <iostream>
#include <vector>
int main() {
std::vector addresses{0x1000, 0x1100, 0x1200};
std::cout << "Vector size: " << addresses.size() << " \n";
return 0;
}
Output
$ ./src/c++17/build/class-template-argument-deduction
Vector size: 3
std::variant
Use case
Type-safe union for different value types.
Explanation
std::variant is a type-safe union. Use std::get<T> or std::visit to access values.
Code
#include <cstdint>
#include <iostream>
#include <string>
#include <variant>
using SymbolValue = std::variant<uint64_t, std::string>;
int main() {
// auto&& is a forwarding reference.
// Same rules as T&& where T is deduced.
// auto&& a = lvalue; // Deduces to T& (T&& & --> T&).
// auto&& b = rvalue; // Deduces to T&& (T&& && --> T&&).
auto printer = [](auto &&arg) {
// decay_t gives us the type we would get if passed by value (auto x).
using T = std::decay_t<decltype(arg)>;
if constexpr (std::is_same_v<T, uint64_t>) {
std::cout << "Address: 0x" << std::hex << arg << "\n";
} else if constexpr (std::is_same_v<T, std::string>) {
std::cout << "Name: " << arg << "\n";
}
};
SymbolValue val = uint64_t{0x10001000};
std::visit(printer, val);
val = std::string("_main");
std::visit(printer, val);
return 0;
}
Output
$ ./src/c++17/build/std-variant
Address: 0x10001000
Name: _main
std::optional
Use case
Return value that may or may not exist.
Explanation
std::optional<T> holds either a value or nothing. Cleaner than returning sentinel values or pointers.
Code
#include <cstdint>
#include <iostream>
#include <optional>
std::optional<uint64_t> findSymbol(const char *name) {
if (name[0] == '_') {
return 0x10001000;
}
return std::nullopt;
}
int main() {
if (auto addr = findSymbol("_main")) {
std::cout << "Found: 0x" << std::hex << *addr << "\n";
}
auto missing = findSymbol("invalid");
std::cout << "Default: 0x" << missing.value_or(0) << "\n";
return 0;
}
Output
$ ./src/c++17/build/std-optional
Found: 0x10001000
Default: 0x0
std::any
Use case
Store arbitrary metadata on symbols.
Explanation
std::any stores any single value with type safety at access time. Unlike void*, it knows its type.
Code
#include <any>
#include <cstdint>
#include <iostream>
#include <map>
#include <string>
int main() {
std::map<std::string, std::any> symInfo;
symInfo.emplace("address", uint64_t{0x10001000});
symInfo.emplace("name", std::string{"_main"});
symInfo.emplace("isExported", true);
std::cout << "Address: 0x" << std::hex
<< std::any_cast<uint64_t>(symInfo["address"]) << "\n";
// https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-builtin
std::cout << "Address type: " << symInfo["address"].type().name() << "\n";
std::cout << "Name: " << std::any_cast<std::string>(symInfo["name"]) << "\n";
return 0;
}
Output
$ ./src/c++17/build/std-any
Address: 0x10001000
Address type: y
Name: _main
std::string_view
Use case
Non-owning string reference for parsing.
Explanation
std::string_view is a non-owning view into a string. No copies, works with any string source.
Code
#include <iostream>
#include <string_view>
void parseSymbol(std::string_view sym) {
if (sym.substr(0, 1) == "_") {
sym.remove_prefix(1);
}
std::cout << "Symbol: " << sym << "\n";
}
int main() {
// From literal.
parseSymbol("_main");
std::string s = "_helper";
// From std::string.
parseSymbol(s);
char buf[] = "_func";
// From array.
parseSymbol({buf, 5});
return 0;
}
Output
$ ./src/c++17/build/std-string_view
Symbol: main
Symbol: helper
Symbol: func
std::invoke
Use case
Uniformly call any callable.
Explanation
std::invoke calls any callable uniformly (functions, lambdas, member functions etc.).
Code
#include <cstdint>
#include <functional>
#include <iostream>
struct Analyzer {
uint64_t base = 0x10000000;
uint64_t getBase() { return base; }
};
int main() {
auto toOffset = [](uint64_t addr, uint64_t base) { return addr - base; };
std::cout << "Offset: 0x" << std::hex
<< std::invoke(toOffset, 0x10001000, 0x10000000) << "\n";
Analyzer a;
std::cout << "Base: 0x" << std::hex << std::invoke(&Analyzer::getBase, a)
<< "\n";
return 0;
}
Output
$ ./src/c++17/build/std-invoke
Offset: 0x1000
Base: 0x10000000
std::apply
Use case
Call functions with tupple arguments.
Explanation
std::apply unpacks a tuple and calls a function with its elements as arguments. Simpler than index_sequence.
Code
#include <cstdint>
#include <iostream>
#include <tuple>
void printSegment(const char *name, uint64_t addr, uint64_t size) {
std::cout << name << " @ 0x" << std::hex << addr << " (size: 0x" << size
<< ")\n";
}
int main() {
auto seg = std::make_tuple("__TEXT", 0x10000000, 0x4000);
std::apply(printSegment, seg);
return 0;
}
Output
$ ./src/c++17/build/std-apply
__TEXT @ 0x10000000 (size: 0x4000)
std::filesystem
Use case
Binary file operations.
Explanation
std::filesystem provides portable file system operations (paths, queries, directory iteration etc.).
Code
#include <filesystem>
#include <iostream>
namespace fs = std::filesystem;
int main() {
fs::path app = "/Applications/Firefox.app/Contents/MacOS/firefox";
if (fs::exists(app)) {
std::cout << "Binary: " << app.filename() << "\n";
std::cout << "Size: " << fs::file_size(app) << " bytes.\n";
}
fs::path frameworks = "/System/Library/Frameworks";
int count = 0;
for (const auto &entry : fs::directory_iterator(frameworks)) {
if (entry.path().extension() == ".framework") {
std::cout << entry.path().filename() << "\n";
++count;
if (count == 3) {
break;
}
}
}
return 0;
}
Output
$ ./src/c++17/build/std-filesystem
Binary: "firefox"
Size: 174304 bytes.
"WiFiAware.framework"
"JavaRuntimeSupport.framework"
"MetricKit.framework"
std::byte
Use case
Explicit byte type for binary data.
Explanation
std::byte is for raw bytes. Only bitwise ops are allowed (no arithmetic). Cleaner intent than char or uint8_t.
Code
#include <cstdint>
#include <iostream>
int main() {
// https://en.wikipedia.org/wiki/Mach-O
// 64-bit: 0xCF (0xFEEDFACF).
// 32-bit: 0xCE (0xFEEDFACE).
std::byte magic{0xCF};
std::byte mask{0x0F};
std::byte result = magic & mask;
bool is64 = (result == std::byte{0x0F});
std::cout << "64-bit: " << std::boolalpha << is64 << "\n";
return 0;
}
Output
$ ./src/c++17/build/std-byte
64-bit: true
Splicing for maps and sets
Use case
Move entries between symbol tables without copying.
Explanation
extract removes and returns a node handle for single entries. merge transfers all nodes from one container to another. Both avoid copying data.
Code
#include <cstdint>
#include <iostream>
#include <map>
int main() {
std::map<uint64_t, const char *> text = {{0x1000, "_main"},
{0x1100, "_helper"}};
std::map<uint64_t, const char *> data = {{0x2000, "_global"},
{0x2100, "_buffer"}};
data.insert(text.extract(0x1100));
data.merge(text);
std::cout << "text size: " << text.size() << "\n";
std::cout << "data size: " << data.size() << "\n";
for (const auto &[addr, name] : data) {
std::cout << "0x" << std::hex << addr << ": " << name << "\n";
}
return 0;
}
Output
$ ./src/c++17/build/splicing-for-maps-and-sets
text size: 0
data size: 4
0x1000: _main
0x1100: _helper
0x2000: _global
0x2100: _buffer
Parallel algorithms
PSTL is still WIP. Additionally, it requires the -fexperimental-library build flag. Therefore, it is not covered here yet.
std::sample
Use case
Random sampling from symbol table.
Explanation
std::sample randomly selects n elements from a range (without replacement, meaning once an element is picked, it cannot be picked again).
Code
#include <algorithm>
#include <iostream>
#include <random>
#include <vector>
int main() {
std::vector<const char *> symbols = {"_main", "_helper", "_init", "_fini",
"_start"};
std::vector<const char *> sampled;
std::sample(symbols.begin(), symbols.end(), std::back_inserter(sampled), 2,
std::mt19937{std::random_device{}()});
for (auto s : sampled) {
std::cout << s << "\n";
}
return 0;
}
Output
$ ./src/c++17/build/std-sample
_init
_fini
std::clamp
Use case
Bound values to valid ranges.
Explanation
std::clamp(v, lo, hi) returns v bounded to [lo, hi].
Code
#include <algorithm>
#include <cstdint>
#include <iostream>
int main() {
uint64_t offset = 0x5000;
uint64_t minOff = 0x1000;
uint64_t maxOff = 0x4000;
auto bounded = std::clamp(offset, minOff, maxOff);
std::cout << "Clamped: 0x" << std::hex << bounded << "\n";
return 0;
}
Output
$ ./src/c++17/build/std-clamp
Clamped: 0x4000
std::reduce
Use case
Sum segment sizes, calculate total pages needed.
Explanation
std::reduce sums elements (parallelizable). std::transform_reduce transforms each element first, then reduces. Both require associative (grouping does not matter) and commutative (order does not matter).
Code
#include <cstdint>
#include <iostream>
#include <numeric>
#include <vector>
int main() {
std::vector<uint64_t> segmentSizes{0x1234, 0x2345, 0x3456};
auto rawTotal = std::reduce(segmentSizes.begin(), segmentSizes.end());
std::cout << "Raw total: 0x" << std::hex << rawTotal << "\n";
// Count total 4k pages needed.
// Adding 0xFFF (page size - 1) before dividing rounds up.
// (0x1234 + 0xFFF) / 0x1000 = 0x2233 / 0x1000 = 2 pages.
auto totalPages = std::transform_reduce(
segmentSizes.begin(), segmentSizes.end(), uint64_t{0}, std::plus{},
[](uint64_t size) { return (size + 0xFFF) / 0x1000; });
std::cout << "Pages needed: " << std::dec << totalPages << "\n";
return 0;
}
Output
$ ./src/c++17/build/std-reduce
Raw total: 0x69cf
Pages needed: 9
Prefix sum algorithms
Use case
Compute string table offsets.
Explanation
exclusive_scan = start offsets. inclusive_scan = end offsets. transform_* variants transform before scanning.
Code
#include <iostream>
#include <numeric>
#include <vector>
int main() {
// Strings: "_main", "_helper", "_start".
std::vector<size_t> lens{5, 7, 6};
std::vector<size_t> out(lens.size());
std::exclusive_scan(lens.begin(), lens.end(), out.begin(), 0);
for (const auto &s : out) {
std::cout << s << " ";
}
std::cout << "\n";
std::inclusive_scan(lens.begin(), lens.end(), out.begin());
for (const auto &s : out) {
std::cout << s << " ";
}
std::cout << "\n";
// Add null terminator, then scan.
std::transform_exclusive_scan(lens.begin(), lens.end(), out.begin(), 0,
std::plus{}, [](size_t l) { return l + 1; });
for (const auto &s : out) {
std::cout << s << " ";
}
std::cout << "\n";
std::transform_inclusive_scan(lens.begin(), lens.end(), out.begin(),
std::plus{}, [](size_t l) { return l + 1; });
for (const auto &s : out) {
std::cout << s << " ";
}
std::cout << "\n";
return 0;
}
Output
$ ./src/c++17/build/prefix-sum-algorithms
0 5 12
5 12 18
0 6 14
6 14 21
std::gcd and std::lcm
Use case
Detect cipher block size, find XOR key period.
Explanation
std::gcd and std::lcm compute greatest common divisor and least common multiple. GCD of ciphertext lengths suggests the block size. LCM of XOR key lengths reveals when the combined keystream repeats (might be useful for breaking multi-key XOR).
Code
#include <iostream>
#include <numeric>
int main() {
size_t len1 = 48;
size_t len2 = 64;
size_t len3 = 80;
auto blockSize = std::gcd(std::gcd(len1, len2), len3);
std::cout << "Block size: " << blockSize << " bytes.\n";
size_t key1Len = 7;
size_t key2Len = 5;
auto period = std::lcm(key1Len, key2Len);
std::cout << "Pattern repeats every " << period << " bytes.\n";
return 0;
}
Output
$ ./src/c++17/build/std-gcd-std-lcm
Block size: 16 bytes.
Pattern repeats every 35 bytes.
std::not_fn
Use case
Negate predicates.
Explanation
std::not_fn wraps a callable and negates its result.
Code
#include <algorithm>
#include <cstdint>
#include <functional>
#include <iostream>
#include <vector>
int main() {
std::vector<uint64_t> addrs{0x1000, 0x0, 0x2000, 0x0, 0x3000};
auto isZero = [](uint64_t a) { return a == 0; };
auto count = std::count_if(addrs.begin(), addrs.end(), std::not_fn(isZero));
std::cout << "Non-zero: " << count << "\n";
return 0;
}
Output
$ ./src/c++17/build/std-not_fn
Non-zero: 3
String conversion to/from numbers
Use case
Fast, non-throwing number parsing.
Explanation
std::from_chars and std::to_chars are fast, non-allocating conversions.
Code
#include <charconv>
#include <cstdint>
#include <iostream>
#include <string_view>
int main() {
std::string_view str = "10001000";
uint64_t addr;
// ptr points to where parsing stops.
auto [ptr, ec] =
std::from_chars(str.data(), str.data() + str.size(), addr, 16);
// std::errc{} --> success (no error).
// Similar to errno but type-safe.
if (ec == std::errc{}) {
std::cout << "Parsed 0x" << std::hex << addr << "\n";
}
char buf[20];
auto [end, ec2] = std::to_chars(buf, buf + sizeof(buf), addr, 16);
std::cout << "String: " << std::string_view(buf, end - buf) << "\n";
return 0;
}
Output
$ ./src/c++17/build/string-conversion-to-from-numbers
Parsed 0x10001000
String: 10001000
Rounding functions for chrono durations and timepoints
Use case
Benchmark analysis passes.
Explanation
floor rounds down, ceil rounds up, round rounds to nearest.
Code
#include <chrono>
#include <iostream>
#include <thread>
int main() {
using namespace std::chrono;
auto start = high_resolution_clock::now();
std::this_thread::sleep_for(1500ms);
auto end = high_resolution_clock::now();
auto elapsed = end - start;
std::cout << "floor: " << floor<seconds>(elapsed).count() << "s.\n";
std::cout << "ceil: " << ceil<seconds>(elapsed).count() << "s.\n";
std::cout << "round: " << round<seconds>(elapsed).count() << "s.\n";
std::cout << "exact: " << round<milliseconds>(elapsed).count() << "ms.\n";
return 0;
}
Output
$ ./src/c++17/build/rounding-functions-for-chrono-durations-and-timepoints
floor: 1s.
ceil: 2s.
round: 2s.
exact: 1503ms.
Coroutines
Coroutines make cooperative concurrency possible. According to C++ Support in Clang, coroutines are only partially supported. Therefore, it is not covered here yet.
Concepts
Use case
Constrain template parameters for type safety.
Explanation
Concepts use readable constraints. The compiler error now says exactly which concept failed any why.
Code
#include <concepts>
#include <cstdint>
#include <iostream>
#include <type_traits>
template <typename T>
concept Unsigned = std::is_unsigned_v<T>;
template <Unsigned T> void printHex(T value) {
std::cout << "0x" << std::hex << value << "\n";
}
int main() {
uint64_t addr = 0x10001000;
printHex(addr);
/*
concepts.cpp:x:x: error: no matching function for call to
'printHex'
xx | printHex(-1);
| ^~~~~~~~
concepts.cpp:x:x: note: candidate template ignored: constraints
not satisfied [with T = int]
x | template <Unsigned T> void printHex(T value) {
| ^
concepts.cpp:x:x: note: because 'int' does not satisfy 'Unsigned'
x | template <Unsigned T> void printHex(T value) {
| ^
concepts.cpp:x:x: note: because 'std::is_unsigned_v<int>'
evaluated to false
x | concept Unsigned = std::is_unsigned_v<T>;
| ^
1 error generated.
*/
// printHex(-1);
return 0;
}
Output
$ ./src/c++20/build/concepts
0x10001000
requires clause
Use case
Add constraints inline without defining a named concept.
Explanation
requires clause adds constraints without a named concept.
Code
#include <cstdint>
#include <iostream>
#include <type_traits>
template <typename T>
requires std::is_integral_v<T> && (sizeof(T) >= 4)
T extractBits(T value, int start, int len) {
return (value >> start) & ((T{1} << len) - 1);
}
int main() {
// $ echo "bl 0x40" | llvm-mc -triple=aarch64 -show-encoding
// bl #64 // encoding: [0x10,0x00,0x00,0x94]
uint32_t instr = 0x94000010;
// https://developer.arm.com/documentation/ddi0602/2025-12/Base-Instructions/BL--Branch-with-link-?lang=en
auto offset = extractBits(instr, 0, 26);
std::cout << "Offset: 0x" << std::hex << offset << "\n";
return 0;
}
Output
$ ./src/c++20/build/requires-clause
Offset: 0x10
Three-way comparison (spaceship operator)
Use case
Auto-generate all comparison operators.
Explanation
<=> with = default generates all comparison operators from member-wise comparison (comparing each member in order, one by one).
Code
#include <compare>
#include <cstdint>
#include <iostream>
struct Symbol {
uint64_t addr;
const char *name;
auto operator<=>(const Symbol &) const = default;
};
int main() {
Symbol main{0x10001000, "_main"};
Symbol helper{0x10002000, "_helper"};
std::cout << std::boolalpha;
std::cout << "main < helper: " << (main < helper) << "\n";
std::cout << "main == helper: " << (main == helper) << "\n";
return 0;
}
Output
$ ./src/c++20/build/three-way-comparison
main < helper: true
main == helper: false
Designated initializers
Use case
Initialize structs by field name for clarity.
Explanation
Name fields explicitly. Order must match declaration. Unspecified fields are zero-initialized.
Code
#include <cstdint>
#include <iostream>
// https://en.wikipedia.org/wiki/Mach-O
struct MachHeader64 {
uint32_t magic;
// uint32_t cputype;
// uint32_t cpusubtype;
uint32_t filetype;
uint32_t numofcmds;
// uint32_t sizeofcmds;
// uint32_t flags;
// uint32_t reserved;
};
int main() {
MachHeader64 hdr{
.magic = 0xFEEDFACF,
.filetype = 0x2, // MH_EXECUTE
.numofcmds = 0x0,
};
std::cout << "Magic: 0x" << std::hex << hdr.magic << "\n";
std::cout << "Num of cmds: " << std::dec << hdr.numofcmds << "\n";
return 0;
}
Output
$ ./src/c++20/build/designated-initializers
Magic: 0xfeedfacf
Num of cmds: 0
Template syntax for lambdas
Use case
Access template parameter inside lambda.
Explanation
[]<typename T> gives access to T inside lambda.
Code
#include <cstdint>
#include <iostream>
#include <vector>
int main() {
auto analyze = []<typename T>(const std::vector<T> &data) {
std::cout << "Element size: " << sizeof(T) << " bytes.\n";
for (const auto &addr : data) {
std::cout << "Analyzing 0x" << std::hex << addr << ".\n";
}
};
std::vector<uint64_t> addrs{0x10001000, 0x10002000};
analyze(addrs);
return 0;
}
Output
$ ./src/c++20/build/template-syntax-for-lambdas
Element size: 8 bytes.
Analyzing 0x10001000.
Analyzing 0x10002000.
Range-based for loop with initializer
Use case
Limit scope of loop variable.
Explanation
Like C++17 if with initializer, but for range-based for loops.
Code
#include <cstdint>
#include <iostream>
#include <vector>
int main() {
for (std::vector<uint64_t> addrs{0x1000, 0x1100, 0x1200}; auto addr : addrs) {
std::cout << "0x" << std::hex << addr << "\n";
}
return 0;
}
Output
$ ./src/c++20/build/range-based-for-loop-with-initializer
0x1000
0x1100
0x1200
[[likely]] and [[unlikely]] attributes
Use case
Hint to optimizer for branch prediction.
Explanation
Hints for branch prediction. Does not change semantics (meaning of the code), may improve performance.
Code
#include <cstdint>
#include <iostream>
// https://en.wikipedia.org/wiki/Mach-O
bool isMachO(const uint8_t *data) {
uint32_t magic = *reinterpret_cast<const uint32_t *>(data);
if (magic == 0xFEEDFACF) [[likely]] {
return true;
} else if (magic == 0xFEEDFACE) [[unlikely]] {
return true;
}
return false;
}
int main() {
uint8_t data[] = {0xCF, 0xFA, 0xED, 0xFE};
std::cout << "Is Mach-O: " << std::boolalpha << isMachO(data) << "\n";
return 0;
}
Output
$ ./src/c++20/build/likely-unlikely-attributes
Is Mach-O: true
Deprecate implicit capture of this
Use case
Avoid dangling pointer bugs with lambdas.
Explanation
[=] captures this by pointer. Object dies –> dangling pointer. C++20 requires explicit this or *this.
Code
#include <cstdint>
#include <functional>
#include <iostream>
struct Analyzer {
uint64_t base = 0x10000000;
// implicit capture of 'this' with a capture default of '=' is deprecated
// auto getBadLogger() {
// return [=]() { return base; };
// }
auto getGoodLogger() {
return [=, this]() { return base; };
}
// Copy of *this.
// Safe if object dies.
auto getSafeLogger() {
return [=, *this]() { return base; };
}
};
int main() {
auto goodLogger = Analyzer{}.getGoodLogger();
std::cout << "Base: 0x" << std::hex << goodLogger() << "\n";
auto safeLogger = Analyzer{}.getSafeLogger();
std::cout << "Base: 0x" << std::hex << safeLogger() << "\n";
return 0;
}
Output
$ ./src/c++20/build/deprecate-implicit-capture-of-this
Base: 0x10000000
Base: 0x10000000
Class types in non-type template parameters
Use case
Use structs as template parameters.
Explanation
Structs with only public members, no pointers and constexpr-compatible types can be NTTP. Keeps related values together (mask + value) instead of separate template params. Compiler fully inlines (no runtime overhead).
Code
#include <cstdint>
#include <iostream>
struct Opcode {
uint32_t mask;
uint32_t value;
};
template <Opcode op> bool matches(uint32_t instr) {
return (instr & op.mask) == op.value;
}
int main() {
// https://developer.arm.com/documentation/ddi0602/2025-12/Base-Instructions/BL--Branch-with-link-?lang=en
constexpr Opcode BL{0xFC000000, 0x94000000};
// $ echo "bl 0x40" | llvm-mc -triple=aarch64 -show-encoding
// bl #64 // encoding: [0x10,0x00,0x00,0x94]
uint32_t instr = 0x94000010;
if (matches<BL>(instr)) {
std::cout << "Matched BL." << "\n";
}
return 0;
}
Output
$ ./src/c++20/build/class-types-in-non-type-template-parameters
Matched BL.
constexpr virtual functions
Use case
Polymorphism at compile-time.
Explanation
Virtual functions can be constexpr in C++20, enabling compile-time polymorphism.
Code
#include <cstdint>
#include <iostream>
struct Arch {
virtual constexpr uint32_t pointerSize() const = 0;
virtual constexpr ~Arch() = default;
};
struct ARM64 : Arch {
constexpr uint32_t pointerSize() const override { return 8; }
};
struct ARM32 : Arch {
constexpr uint32_t pointerSize() const override { return 4; }
};
template <typename T> constexpr uint64_t stackAlloc(int count) {
T arch;
return arch.pointerSize() * count;
}
int main() {
constexpr auto size = stackAlloc<ARM64>(4);
static_assert(size == 32, "Stack alloc is expected to be 32 bytes.");
std::cout << "Stack alloc: " << size << " bytes.\n";
return 0;
}
Output
$ ./src/c++20/build/constexpr-virtual-functions
Stack alloc: 32 bytes.
explicit(bool)
Use case
Conditionally make constructor explicit.
Explanation
explicit(condition) makes constructor explicit only when condition is true.
Code
#include <cstdint>
#include <iostream>
#include <type_traits>
struct Address {
uint64_t value;
// Implicit for integral, explicit for others.
template <typename T>
explicit(!std::is_integral_v<T>) Address(T v)
: value(static_cast<uint64_t>(v)) {}
};
void analyze(Address addr) {
std::cout << "Analyzing: 0x" << std::hex << addr.value << "\n";
}
int main() {
analyze(0x1000);
// no suitable constructor exists to convert from "double" to "Address"
// analyze(3000.0);
analyze(Address{4096.0});
return 0;
}
Output
$ ./src/c++20/build/explicit-bool
Analyzing: 0x1000
Analyzing: 0x1000
consteval (immediate functions)
Use case
Force compile-time evaluation.
Explanation
consteval = must be compile-time. constexpr = can be compile-time.
Code
#include <cstdint>
#include <iostream>
// https://developer.arm.com/documentation/ddi0602/2025-12/Base-Instructions/BL--Branch-with-link-?lang=en
// $ echo "bl 0x40" | llvm-mc -triple=aarch64 -show-encoding
// bl #64 // encoding: [0x10,0x00,0x00,0x94]
consteval uint32_t makeBlOpcode(uint32_t offset) {
return 0x94000000 | ((offset / 4) & 0x03FFFFFF);
}
int main() {
constexpr uint32_t bl_0x40 = makeBlOpcode(0x40);
std::cout << "BL 0x40: 0x" << std::hex << bl_0x40 << "\n";
return 0;
}
Output
$ ./src/c++20/build/consteval
BL 0x40: 0x94000010
using enum
Use case
Import enum values into scope.
Explanation
using enum brings all enum values into scope. No more LoadCommand:: prefix inside switch.
Code
#include <cstdint>
#include <iostream>
// https://github.com/apple-oss-distributions/xnu/blob/f6217f891ac0bb64f3d375211650a4c1ff8ca1ea/EXTERNAL_HEADERS/mach-o/loader.h
enum class LoadCommand : uint32_t {
LC_SEGMENT_64 = 0X19,
LC_SYMTAB = 0x2,
};
const char *lcName(LoadCommand cmd) {
using enum LoadCommand;
switch (cmd) {
case LC_SEGMENT_64:
return "LC_SEGMENT_64";
case LC_SYMTAB:
return "LC_SYMTAB";
default:
return "UNKNOWN";
}
}
int main() {
std::cout << lcName(LoadCommand::LC_SEGMENT_64) << "\n";
return 0;
}
Output
$ ./src/c++20/build/using-enum
LC_SEGMENT_64
Lambda capture of parameter pack
Use case
Capture variadic args in a lambda.
Explanation
...args in capture expands pack. Each element capture separately.
Code
#include <cstdint>
#include <iostream>
template <typename... Args> auto makeLogger(Args &&...args) {
return [... captures = std::forward<Args>(args)]() {
std::cout << std::hex;
((std::cout << captures << " "), ...);
std::cout << "\n";
};
}
int main() {
auto log = makeLogger("Symbol:", "_main", "@", 0x10001000);
log();
return 0;
}
Output
$ ./src/c++20/build/lambda-capture-of-parameter-pack
Symbol: _main @ 10001000
char8_t
Use case
Distint type for UTF-8 strings.
Explanation
char8_t is a distinct type for UTF-8. Prevents accidentally mixing UTF-8 with other encodings.
Code
#include <cstdint>
#include <iostream>
int main() {
// https://www.utf8-chartable.de/
const char8_t *utf8_str = u8"_main\u0023";
std::cout << "Size: " << sizeof(char8_t) << " byte.\n";
std::cout << reinterpret_cast<const char *>(utf8_str) << "\n";
return 0;
}
Output
$ ./src/c++20/build/char8_t
Size: 1 byte.
_main#
constinit
Use case
Ensure static variable is initialized at compile-time.
Explanation
constinit = compile-time init. Prevents static initialization order bugs.
Code
#include <cstdint>
#include <iostream>
constinit uint64_t g_baseAddr = 0x10000000;
int main() {
std::cout << "Base: 0x" << std::hex << g_baseAddr << "\n";
return 0;
}
Output
$ ./src/c++20/build/constinit
Base: 0x10000000
__VA_OPT__
Use case
Clean variadic macros without trailing comma issues.
Explanation
__VA_OPT__(,) inserts comma only when __VA_ARGS__ is non-empty.
Code
#include <cstdint>
#include <format>
#include <iostream>
#define LOG(fmt, ...) \
std::cout << std::format(fmt __VA_OPT__(, ) __VA_ARGS__) << "\n"
int main() {
// Without __VA_OPT__(,), this:
// std::cout << std::format(fmt, __VA_ARGS__) << "\n"
// would expand to:
// std::cout << std::format("Analysis started.", ) << "\n"
// causing an error.
LOG("Analysis started.");
LOG("Found branch at {:#x}", 0x1000);
return 0;
}
Output
$ ./src/c++20/build/va-opt
Analysis started.
Found branch at 0x1000
Concepts library
Use case
Use standard library concepts for constraints.
Explanation
Standard library provides ready-to-use concepts.
Code
#include <concepts>
#include <cstdint>
#include <iostream>
// https://en.cppreference.com/w/cpp/concepts.html#Standard_library_concepts
template <std::integral T> T alignTo(T value, T alignment) {
// https://en.wikipedia.org/wiki/Data_structure_alignment
// aligned = (offset + (align - 1)) & ~(align - 1)
return (value + alignment - 1) & ~(alignment - 1);
}
template <std::invocable<uint64_t> F>
void forEachAddress(uint64_t start, uint64_t end, uint64_t step, F callback) {
for (uint64_t addr = start; addr < end; addr += step) {
callback(addr);
}
}
struct Instruction {
virtual ~Instruction() = default;
};
struct BranchInstr : Instruction {};
template <std::derived_from<Instruction> T>
void analyze([[maybe_unused]] const T &instr) {
std::cout << "Analyzing instruction.\n";
}
int main() {
std::cout << "Aligned: 0x" << std::hex << alignTo(0x1234, 0x1000) << "\n";
forEachAddress(0x1000, 0x1010, 4, [](uint64_t addr) {
std::cout << "0x" << std::hex << addr << "\n";
});
BranchInstr br;
analyze(br);
return 0;
}
Output
$ ./src/c++20/build/concepts-library
Aligned: 0x2000
0x1000
0x1004
0x1008
0x100c
Analyzing instruction.
std::format
Use case
Type-safe printf-style formatting.
Explanation
std::format combines printf convenience with type safety.
Code
#include <cstdint>
#include <format>
#include <iostream>
int main() {
uint64_t addr = 0x10001000;
const char *name = "_main";
std::string s = std::format("Symbol '{}' at {:#x}", name, addr);
std::cout << s << "\n";
return 0;
}
Output
$ ./src/c++20/build/std-format
Symbol '_main' at 0x10001000
Synchronized buffered outputstream
Use case
Thread-safe logging without mixed output.
Explanation
std::osyncstream buffers output and flushes atomically. Without it, concurrent cout writes mix randomly. Note that it is not supported by Apple currently (look for “Synchronized buffered”).
Code
#include <cstdint>
#include <iostream>
#include <thread>
#include <vector>
#if defined(__APPLE__)
#include <mutex>
#define HAS_SYNCSTREAM 0
std::mutex cout_mutex;
#else
#include <syncstream>
#define HAS_SYNCSTREAM 1
#endif
void analyzeSegment(uint64_t addr, const char *name) {
#if HAS_SYNCSTREAM
std::osyncstream{std::cout} << "Thread " << std::this_thread::get_id()
<< ": analyzing " << name << " @ 0x" << std::hex
<< addr << "\n";
#else
std::lock_guard lock(cout_mutex);
std::cout << "Thread " << std::this_thread::get_id() << ": analyzing " << name
<< " @ 0x" << std::hex << addr << "\n";
#endif
}
int main() {
std::vector<std::jthread> threads;
threads.emplace_back(analyzeSegment, 0x10001000, "__TEXT");
threads.emplace_back(analyzeSegment, 0x10002000, "__DATA");
threads.emplace_back(analyzeSegment, 0x10003000, "__LINKEDIT");
return 0;
}
Output
$ ./src/c++20/build/synchronised-buffered-outputstream
Thread 139842723919552: analyzing __TEXT @ 0x10001000
Thread 139842715526848: analyzing __DATA @ 0x10002000
Thread 139842707134144: analyzing __LINKEDIT @ 0x10003000
std::span
Use case
Non-owning view of contiguous memory.
Explanation
std::span replaces pointer+length pairs. Non-owning, works with any contiguous container.
Code
#include <cstdint>
#include <iostream>
#include <span>
#include <vector>
void hexDump(std::span<const uint8_t> data) {
for (auto byte : data) {
std::cout << std::hex << static_cast<int>(byte) << " ";
}
std::cout << "\n";
}
int main() {
// https://github.com/apple-oss-distributions/xnu/blob/f6217f891ac0bb64f3d375211650a4c1ff8ca1ea/EXTERNAL_HEADERS/mach-o/loader.h#L84
std::vector<uint8_t> vec{0xCF, 0xFA, 0xED, 0xFE};
uint8_t arr[] = {0x00, 0x00, 0x00, 0x00};
hexDump(vec);
hexDump(arr);
hexDump({vec.data(), 2});
return 0;
}
Output
$ ./src/c++20/build/std-span
cf fa ed fe
0 0 0 0
cf fa
Bit operations
Use case
Portable bit manipulation.
Explanation
<bit> header provides portable bit operations. Before C++20, for example, we had to use __builtin_popcount(x) (gcc/clang) vs __popcnt(x) (MSVC).
Code
#include <bit>
#include <cstdint>
#include <iostream>
int main() {
uint32_t flags = 0b1010'0000'0000'0000'0000'0000'1000'0000;
// https://en.cppreference.com/w/cpp/header/bit.html
std::cout << "popcount: " << std::popcount(flags) << "\n";
std::cout << "has_single_bit: " << std::has_single_bit(flags) << "\n";
std::cout << "countl_zero: " << std::countl_zero(flags) << "\n";
std::cout << "countr_zero: " << std::countr_zero(flags) << "\n";
std::cout << "bit_width: " << std::bit_width(flags) << "\n";
return 0;
}
Output
$ ./src/c++20/build/bit-operations
popcount: 3
has_single_bit: 0
countl_zero: 0
countr_zero: 7
bit_width: 32
Math constants
Use case
Standard mathematical constants.
Explanation
No more #define PI 3.14159....
Code
#include <iostream>
#include <numbers>
int main() {
double radius = 16.0;
double circumference = 2 * std::numbers::pi * radius;
std::cout << "Circumference: " << circumference << "\n";
return 0;
}
Output
$ ./src/c++20/build/math-constants
Circumference: 100.531
std::is_constant_evaluated
Use case
Different code paths for compile-time and runtime.
Explanation
std::is_constant_evaluated returns true during constant evaluation. Enables different implementation for compile-time vs runtime.
Code
#include <cstdint>
#include <iostream>
#include <type_traits>
// https://en.wikipedia.org/wiki/Mach-O
constexpr bool isMachO(uint32_t magic) {
if (std::is_constant_evaluated()) {
return magic == 0xFEEDFACF || magic == 0xFEEDFACE;
} else {
std::cout << "Checking magic at runtime.\n";
return magic == 0xFEEDFACF || magic == 0xFEEDFACE;
}
}
int main() {
constexpr bool a = isMachO(0xFEEDFACF);
bool b = isMachO(0xFEEDFACF);
std::cout << "a: " << std::boolalpha << a << ", b: " << b << "\n";
return 0;
}
Output
$ ./src/c++20/build/std-is_constant_evaluated
Checking magic at runtime.
a: true, b: true
std::make_shared array support
Use case
Allocate shared arrays for buffers.
Explanation
std::make_shared<T[]>(n) allocates an array with shared ownership. Before C++20, we needed shared_ptr<T[]>(new T[n]).
Code
#include <cstdint>
#include <iostream>
#include <memory>
int main() {
auto buffer = std::make_shared<uint8_t[]>(0x1000);
// https://en.wikipedia.org/wiki/Mach-O
buffer[0] = 0xCF;
buffer[1] = 0xFA;
buffer[2] = 0xED;
buffer[3] = 0xFE;
std::cout << "Magic: 0x" << std::hex << static_cast<int>(buffer[3])
<< static_cast<int>(buffer[2]) << static_cast<int>(buffer[1])
<< static_cast<int>(buffer[0]) << "\n";
return 0;
}
Output
$ ./src/c++20/build/std-make_shared-array-support
Magic: 0xfeedfacf
starts_with and ends_with on strings
Use case
Check string prefixes/suffixes.
Explanation
Before C++20: s.find("_OBJC_") == 0 or s.substr(0, 6) == "_OBJC_. Now just s.starts_with("_OBJC_).
Code
#include <iostream>
#include <string>
#include <string_view>
void classifySymbol(std::string_view sym) {
if (sym.starts_with("_OBJC_")) {
std::cout << sym << ": ObjC metadata.\n";
} else if (sym.starts_with("_Z")) {
std::cout << sym << ": C++ mangled.\n";
} else if (sym.ends_with("_block_invoke")) {
std::cout << sym << ": ObjC block.\n";
} else {
std::cout << sym << ": Other.\n";
}
}
int main() {
// https://stackoverflow.com/questions/12323417/symbol-not-found-objc-class-nsobject
classifySymbol("_OBJC_CLASS_$_NSObject");
// https://en.wikipedia.org/wiki/Name_mangling
classifySymbol("_ZN9org8wikipedia7Article6formatEv");
// https://apple-dev.groups.io/g/xcode/topic/symbolic_breakpoints_some/34200828
classifySymbol(
"-[UIPresentationController runTransitionForCurrentState]_block_invoke");
return 0;
}
Output
$ ./src/c++20/build/starts_with-ends_with
_OBJC_CLASS_$_NSObject: ObjC metadata.
_ZN9org8wikipedia7Article6formatEv: C++ mangled.
-[UIPresentationController runTransitionForCurrentState]_block_invoke: ObjC block.
contains for associative containers
Use case
Check if key exists without verbose iterator check.
Explanation
.contains() replaces the verbose find() != end() idiom.
Code
#include <cstdint>
#include <iostream>
#include <map>
#include <set>
int main() {
std::map<uint64_t, const char *> symbols{{0x10001000, "_main"},
{0x10002000, "_helper"}};
std::set<uint64_t> breakpoints{0x10001000, 0x10002000};
uint64_t addr = 0x10001000;
if (symbols.contains(addr)) {
std::cout << "Symbol found: " << symbols[addr] << "\n";
}
if (breakpoints.contains(addr)) {
std::cout << "Breakpoint hit!\n";
}
return 0;
}
Output
$ ./src/c++20/build/contains-for-associative-containers
Symbol found: _main
Breakpoint hit!
std::bit_cast
Use case
Safer reinterpretation of bits.
Explanation
std::bit_cast copies bits to a new object. Also constexpr-friendly and catches size mismatches at compile-time.
Code
#include <array>
#include <bit>
#include <cstdint>
#include <iostream>
int main() {
// https://en.wikipedia.org/wiki/Mach-O
std::array<uint8_t, 4> bytes = {0xCF, 0xFA, 0xED, 0xFE};
uint32_t magic = std::bit_cast<uint32_t>(bytes);
std::cout << "Magic: 0x" << std::hex << magic << "\n";
return 0;
}
Output
$ ./src/c++20/build/std-bit_cast
Magic: 0xfeedfacf
std::midpoint
Use case
Calculate midpoint without overflow.
Explanation
(a * b) / 2 can overflow. std::midpoint handles this correctly.
Code
#include <cstdint>
#include <iostream>
#include <numeric>
int main() {
uint64_t start = 0x10000000;
uint64_t end = 0x10001000;
uint64_t mid = std::midpoint(start, end);
std::cout << "Range: 0x" << std::hex << start << " - 0x" << end << "\n";
std::cout << "Midpoint: 0x" << mid << "\n";
return 0;
}
Output
$ ./src/c++20/build/std-midpoint
Range: 0x10000000 - 0x10001000
Midpoint: 0x10000800
std::to_array
Use case
Convert C array to std::array.
Explanation
std::to_array deduces size automatically. Useful for converting legacy C arrays.
Code
#include <array>
#include <cstdint>
#include <iostream>
int main() {
// https://en.wikipedia.org/wiki/Mach-O
uint8_t magic[] = {0xCF, 0xFA, 0xED, 0xFE};
auto arr = std::to_array(magic);
std::cout << "Size: " << arr.size() << "\n";
// $ echo "mov x29, sp" | llvm-mc -arch=aarch64 -show-encoding
// mov x29, sp // encoding: [0xfd,0x03,0x00,0x91]
// $ echo "bl 0x40" | llvm-mc -arch=aarch64 -show-encoding
// bl #64 // encoding: [0x10,0x00,0x00,0x94]
auto opcodes = std::to_array<uint32_t>({0x910003FD, 0x94000010});
for (auto op : opcodes) {
std::cout << "0x" << std::hex << op << "\n";
}
return 0;
}
Output
$ ./src/c++20/build/std-to_array
Size: 4
0x910003fd
0x94000010
std::bind_front
Use case
Partial function application (bind first N args).
Explanation
std::bind_front binds leading arguments.
Code
#include <cstdint>
#include <functional>
#include <iostream>
void logMsg(const char *level, uint64_t addr, const char *msg) {
std::cout << "[" << level << "] 0x" << std::hex << addr << ": " << msg
<< "\n";
}
int main() {
auto logError = std::bind_front(logMsg, "ERROR");
auto logInfo = std::bind_front(logMsg, "INFO");
logError(0x10001000, "Invalid instruction.");
logInfo(0x10001000, "Entry point.");
return 0;
}
Output
$ ./src/c++20/build/std-bind_front
[ERROR] 0x10001000: Invalid instruction.
[INFO] 0x10001000: Entry point.
std::erase/std::erase_if (uniform container erasure)
Use case
Simplified container element removal.
Explanation
Before C++20: v.erase(std::remove_if(v.begin(), v.end(), pred), v.end()). Now just std::erase_if(v, pred).
Code
#include <iostream>
#include <string>
#include <vector>
int main() {
std::vector<std::string> symbols{"_main", "__stub_helper", "_helper"};
auto n = std::erase(symbols, "_main");
std::cout << "Removed " << n << " element(s).\n";
n = std::erase_if(
symbols, [](const std::string &s) { return s.starts_with("__stub"); });
std::cout << "Removed " << n << " element(s).\n";
for (const auto &s : symbols) {
std::cout << s << "\n";
}
return 0;
}
Output
$ ./src/c++20/build/std-erase-std-erase_if
Removed 1 element(s).
Removed 1 element(s).
_helper
Three-way comparison helpers (spaceship operator helpers)
Use case
Convert spaceship result to bool.
Explanation
std::is_eq, std::is_lt, std::is_gt, std::is_lteq, std::is_gteq convert comparison result to bool.
Code
#include <compare>
#include <cstdint>
#include <iostream>
struct Symbol {
uint64_t addr;
auto operator<=>(const Symbol &) const = default;
};
int main() {
Symbol a{0x1000}, b{0x2000}, c{0x1000};
auto cmp = a <=> b;
std::cout << std::boolalpha;
std::cout << "is_lt: " << std::is_lt(cmp) << "\n";
std::cout << "is_eq: " << std::is_eq(cmp) << "\n";
std::cout << "is_gt:" << std::is_gt(cmp) << "\n";
std::cout << "is_eq(a <=> c): " << std::is_eq(a <=> c) << "\n";
std::cout << "is_lteq(a <=> b): " << std::is_lteq(a <=> b) << "\n";
std::cout << "is_gteq(b <=> a): " << std::is_gteq(b <=> a) << "\n";
return 0;
}
Output
$ ./src/c++20/build/three-way-comparison-helpers
is_lt: true
is_eq: false
is_gt:false
is_eq(a <=> c): true
is_lteq(a <=> b): true
is_gteq(b <=> a): true
std::lexicographical_compare_three_way
Use case
Compare byte sequences with three-way result.
Explanation
Lexicographical compare = compare element by element until one differs. Returns three-way result (less, equal, greater) instead of just bool. Useful for sorting/comparing binary data.
Code
#include <algorithm>
#include <compare>
#include <cstdint>
#include <iostream>
#include <vector>
int main() {
// https://en.wikipedia.org/wiki/Mach-O
std::vector<uint8_t> magic1 = {0xCF, 0xFA, 0xED, 0xFE};
std::vector<uint8_t> magic2 = {0xCE, 0xFA, 0xED, 0xFE};
std::vector<uint8_t> magic3 = {0xCF, 0xFA, 0xED, 0xFE};
auto cmp12 = std::lexicographical_compare_three_way(
magic1.begin(), magic1.end(), magic2.begin(), magic2.end());
auto cmp13 = std::lexicographical_compare_three_way(
magic1.begin(), magic1.end(), magic3.begin(), magic3.end());
std::cout << std::boolalpha;
std::cout << "magic1 > magic2: " << std::is_gt(cmp12) << "\n";
std::cout << "magic1 == magic3: " << std::is_eq(cmp13) << "\n";
return 0;
}
Output
$ ./src/c++20/build/std-lexicographical_compare_three_way
magic1 > magic2: true
magic1 == magic3: true
std::views
Use case
Composable, lazy sequence operations.
Explanation
Views are lazy, no copies until we iterate. Pipe | chains operations. Common views: filter, transform, take, drop, reverse, split, join.
Code
#include <cstdint>
#include <iostream>
#include <ranges>
#include <vector>
int main() {
std::vector<uint64_t> addresses{0x1000, 0x1010, 0x2000, 0x1020, 0x3000};
auto view = addresses |
std::views::filter([](auto a) { return a < 0x2000; }) |
std::views::transform([](auto a) { return a - 0x1000; });
for (auto addr : view) {
std::cout << "0x" << std::hex << addr << "\n";
}
return 0;
}
Output
$ ./src/c++20/build/std-views
0x0
0x10
0x20
std::ranges
Use case
Simplified algorithm calls. Pass container directly, use projections instead of lambdas.
Explanation
std::ranges:: alrogithms take containers directly (no .begin()/.end()). Projections (&Symbol::addr) extract the field to compare (replacing simple lambdas).
Code
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <ranges>
#include <vector>
struct Symbol {
uint64_t addr;
const char *name;
};
int main() {
std::vector<Symbol> symbols{
{0x1030, "_helper"}, {0x1000, "_main"}, {0x1020, "_init"}};
std::ranges::sort(symbols, std::less{}, &Symbol::addr);
for (const auto &s : symbols) {
std::cout << s.name << " @ 0x" << std::hex << s.addr << "\n";
}
return 0;
}
Output
$ ./src/c++20/build/std-ranges
_main @ 0x1000
_init @ 0x1020
_helper @ 0x1030
if consteval
Use case
Different code paths for compile-time vs runtime.
Explanation
if consteval lets us use different error handling: throw at compile-time (causes compile error), abort()/cerr at runtime. Replaces if (std::is_constant_evaluated()).
Code
#include <cstdint>
#include <cstdlib>
#include <iostream>
#include <print>
constexpr uint32_t decodeOpcode(uint32_t encoded, uint32_t key) {
uint32_t decoded = encoded ^ key;
// https://developer.arm.com/documentation/ddi0602/2025-12/Index-by-Encoding
// Simplified check.
uint32_t op0 = (decoded >> 25) & 0xF;
bool valid = op0 != 0;
if (!valid) {
if consteval {
throw("Invalid opcode detected at compile time.");
} else {
std::cerr << "Invalid opcode: " << std::hex << decoded << "\n";
std::abort();
}
}
return decoded;
}
int main() {
// https://github.com/jgamblin/Mirai-Source-Code/blob/3273043e1ef9c0bb41bd9fcdc5317f7b797a2a94/mirai/bot/table.c#L13
constexpr uint32_t key = 0xDEADBEEF;
// $ echo "bl 0x40" | llvm-mc -triple=aarch64 -show-encoding
// bl #64 // encoding: [0x10,0x00,0x00,0x94]
constexpr uint32_t op1 = decodeOpcode(0x94000010 ^ key, key);
uint32_t userInput = 0x94000010 ^ key;
uint32_t op2 = decodeOpcode(userInput, key);
std::println("{:#x}, {:#x}", op1, op2);
return 0;
}
Output
$ ./src/c++23/build/if-consteval
0x94000010, 0x94000010
Deducing this
Use case
Avoid duplicating const/non-const member function overloads.
Explanation
this Self&& self makes the object parameter explicit. Compiler deduces const/ref qualifiers automatically. Eliminates boilerplate overloads.
Code
#include <cstdint>
#include <print>
#include <vector>
class Section {
std::vector<uint8_t> data_;
public:
Section(std::initializer_list<uint8_t> d) : data_(d) {}
// Before C++23: needed const and non-const overloads.
// uint8_t* data() { return data_.data(); }
// const uint8_t* data() const { return data_.data(); }
// C++23: one function handles both.
// Instead of an implicit this pointer,
// we declare the object parameter explicitly.
// The compiler deduces Self based on how we call it.
// When the argument is an lvalue:
// Self&& --> Section& && --> Section&
// When the argument is an rvalue:
// Self&& --> Section&&
// auto&& follows the same reference collapsing rules.
template <typename Self> auto &&data(this Self &&self) {
// std::forward preserves the value category (lvalue/rvalue) and constness.
return std::forward<Self>(self).data_;
}
};
int main() {
// https://en.wikipedia.org/wiki/Mach-O
Section sec{0xCF, 0xFA, 0xED, 0xFE};
const Section csec{0xCF, 0xFA, 0xED, 0xFE};
sec.data()[0] = 0xFF;
auto b = csec.data()[0];
std::println("{:#x}", b);
return 0;
}
Output
$ ./src/c++23/build/deducing-this
0xcf
Multidimensional [] operator
Use case
2D memory views (matrixes).
Explanation
Before C++23, operator[] was defined to take exactly one parameter. Now it accepts multiple parameters directly.
Code
#include <cstdint>
#include <print>
#include <vector>
class MemoryView2D {
uint8_t *data_;
// Number of columns per row.
size_t width_;
public:
MemoryView2D(uint8_t *d, size_t w) : data_(d), width_(w) {}
uint8_t &operator[](size_t row, size_t col) {
return data_[row * width_ + col];
}
};
int main() {
// 1D array.
std::vector<uint8_t> buffer(16 * 16, 0);
// 2D view.
MemoryView2D view(buffer.data(), 16);
view[0, 0] = 0xCF;
view[0, 1] = 0xFA;
view[1, 0] = 0xED;
std::println("{:#x}", view[0, 0]);
return 0;
}
Output
$ ./src/c++23/build/multidimensional-subscript-operator
0xcf
[[assume]] attribute
Use case
Help compiler optimize with known invariants (a condition that is always true at a certain point in your code).
Explanation
[[assume(expr)]] tells the compiler the expression is always true. Enables aggressive optimizations. UB if assumption is violated at runtime.
Code
#include <cstdint>
#include <print>
// https://en.wikipedia.org/wiki/Mach-O
bool is64Bit(uint32_t magic) {
[[assume(magic == 0xFEEDFACE || magic == 0xFEEDFACF)]];
return magic == 0xFEEDFACF;
}
int main() {
uint32_t magic = 0xFEEDFACF;
std::println("64-bit: {}", is64Bit(magic));
return 0;
}
Output
$ ./src/c++23/build/assume-attribute
64-bit: true
size_t literal suffix
Use case
Avoid signed/unsigned comparison warnings.
Explanation
0uz = std::size_t, 0z = signed version of std::size_t. Integer literals are explained here.
Code
#include <cstdint>
#include <print>
#include <vector>
int main() {
std::vector<uint8_t> buffer(256);
// Before C++23:
// warning: comparison of integers of different signs
// for (int i = 0; i < buffer.size(); ++i) {}
// C++23
for (auto i = 0uz; i < buffer.size(); ++i) {
buffer[i] = static_cast<uint8_t>(i);
}
auto index = 10uz;
std::println("{}", buffer[index]);
return 0;
}
Output
$ ./src/c++23/build/size_t-literal-suffix
10
#elifdef/#elifndef
Use case
Cleaner platform detection macros.
Explanation
Before C++23: #elif defined(x). Now: #elifdef x.
Code
#include <print>
#ifdef __aarch64__
#define ARCH "ARM64"
#elifdef __x86_64__
#define ARCH "x86_64"
#else
#define ARCH "unknown"
#endif
int main() {
std::println("Arch: {}", ARCH);
return 0;
}
Output
$ ./src/c++23/build/elifdef-elifndef
Arch: ARM64
static operator()
Use case
Stateless function objects without object overhead.
Explanation
static operator() means no this pointer needed. This enables certain compiler optimizations. Also works for lambdas: []() static { ... }.
Code
#include <algorithm>
#include <cstdint>
#include <print>
#include <vector>
struct IsNop {
static bool operator()(uint32_t opcode) {
// $ echo "nop" | llvm-mc -arch=aarch64 -show-encoding
// nop // encoding: [0x1f,0x20,0x03,0xd5]
return opcode == 0xD503201F;
}
};
int main() {
std::vector<uint32_t> opcodes = {
0xD503201F,
// $ echo "bl 0x40" | llvm-mc -arch=aarch64 -show-encoding
// bl #64 // encoding: [0x10,0x00,0x00,0x94]
0x94000010,
};
auto count = std::count_if(opcodes.begin(), opcodes.end(), IsNop{});
std::println("NOP count: {}", count);
return 0;
}
Output
$ ./src/c++23/build/static-call-operator
NOP count: 1
auto(x) (decay copy)
Use case
Create explicit copy in expressions.
Explanation
auto(x) creates a decayed prvalue copy. Useful in generic code.
Code
#include <algorithm>
#include <print>
#include <vector>
int main() {
std::vector<uint64_t> addrs = {0x1050, 0x1010, 0x1040, 0x1020};
// Before C++23: need temporary variable.
// auto copy = data;
// C++23: inline decay copy.
auto sorted = [](auto v) {
std::sort(v.begin(), v.end());
return v;
}(auto(addrs));
std::println("Original first: {:#x}", addrs[0]);
std::println("Sorted first: {:#x}", sorted[0]);
return 0;
}
Output
$ ./src/c++23/build/auto-x-decay-copy
Original first: 0x1050
Sorted first: 0x1010
Lambda without parentheses
Use case
Cleaner lambdas with no parameters.
Explanation
[]{} is now valid instead of [](){}. Small but reduces visual noise.
Code
#include <print>
#include <thread>
int main() {
auto analyzer = [] { std::println("Running analysis."); };
analyzer();
return 0;
}
Output
$ ./src/c++23/build/lambda-without-parentheses
Running analysis.
#warning directive
Use case
Emit compiler warnings from preprocessor.
Explanation
#warning emits a warning during compilation.
Code
#include <cstdint>
#if INTPTR_MAX == INT32_MAX
#warning "32-bit build. Address space limited."
#endif
int main() { return 0; }
Output
$ ./src/c++23/build/warning-directive
static_assert(false) in templates
Use case
Error on invalid template instantiation.
Explanation
static_assert(false) inside a template is deferred until instantiation.
Code
#include <cstdint>
#include <print>
template <typename T> T byteswap(T val) {
if constexpr (sizeof(T) == 1) {
return val;
} else if constexpr (sizeof(T) == 2) {
return (val >> 8 | val << 8);
} else {
static_assert(false, "Unsupported size.");
}
}
int main() {
auto swapped1 = byteswap(uint8_t{0x12});
std::println("Swapped: {:#x}", swapped1);
auto swapped2 = byteswap(uint16_t{0x12});
std::println("Swapped: {:#x}", swapped2);
// error: static assertion failed: Unsupported size.
// byteswap(uint32_t{0x12});
return 0;
}
Output
$ ./src/c++23/build/static_assert-false-in-templates
Swapped: 0x12
Swapped: 0x1200
import std;
Use case
Import entire standard library with one statement.
Explanation
import std; replaces all #include <...> headers. Faster compilation, no macro leakage.
Standard library modules are either unsupported or only partially supported at the moment (search for “Standard Library Modules”). Therefore, they are not covered here yet.
std::print/std::println
Use case
Formatted output without << chaining.
Explanation
std::println = std::format + newline + stdout. Replaces verbose std::cout << ... << "\n chains.
Code
#include <cstdint>
#include <print>
int main() {
uint64_t addr = 0x10001000;
// $ echo "bl 0x40" | llvm-mc -triple=aarch64 -show-encoding
// bl #64 // encoding: [0x10,0x00,0x00,0x94]
uint32_t opcode = 0x94000010;
// https://en.cppreference.com/w/cpp/utility/format/spec.html
std::println("Entry point: {:#x}", addr);
std::println("Opcode: {:#x} at {:#x}", opcode, addr);
return 0;
}
Output
$ ./src/c++23/build/std-print-std-println
Entry point: 0x10001000
Opcode: 0x94000010 at 0x10001000
std::expected
Use case
Error handling without exceptions
Explanation
std::expected<T, E> holds either a value or an error. Like Rust’s Result<T, E>. Better than std::optional when you need error info.
Code
#include <cstdint>
#include <cstring>
#include <expected>
#include <print>
#include <utility>
enum class ParseError {
InvalidMagic,
TooShort,
};
std::expected<uint32_t, ParseError> parseMagic(const uint8_t *data,
size_t size) {
if (size < 4) {
return std::unexpected(ParseError::TooShort);
}
uint32_t magic;
std::memcpy(&magic, data, sizeof(magic));
// https://en.wikipedia.org/wiki/Mach-O
if (magic != 0xFEEDFACF && magic != 0xFEEDFACE) {
return std::unexpected(ParseError::InvalidMagic);
}
return magic;
}
int main() {
std::array<uint8_t, 4> buffer = {0xCF, 0xFA, 0xED, 0xFE};
auto result = parseMagic(buffer.data(), buffer.size());
if (result) {
std::println("Magic: {:#x}", result.value());
} else {
std::println("Error: {}", std::to_underlying(result.error()));
}
return 0;
}
Output
$ ./src/c++23/build/std-expected
Magic: 0xfeedfacf
std::flat_map
Use case
Cache-friendly sorted map for lookup tables.
Explanation
std::flat_map uses sorted vectors internally instead of tree nodes (std::map). Better cache locality, smaller memory footprint (compared to the tree where each node stores extra pointers). Drop-in replacement for std::map. Better for read-heavy use, worse for frequent insertions (elements have to be shifted).
Code
// std::flat_map is not fully implemented everywhere yet.
// https://en.cppreference.com/w/cpp/compiler_support/23.html
#ifdef __APPLE__
#include <cstdint>
#include <flat_map>
#include <print>
#include <string>
int main() {
std::flat_map<uint64_t, std::string> symbols;
// Sorts on insert and shifts elements.
symbols.emplace(0x10001000, "_main");
symbols.emplace(0x10002000, "_helper");
symbols.emplace(0x10003000, "_cleanup");
// Fast lookup. Uses binary search.
if (auto it = symbols.find(0x10001000); it != symbols.end()) {
std::println("{:#x}: {}", it->first, it->second);
}
// Iterate over memory sequentially. Best case for CPU cache.
for (const auto &[addr, name] : symbols) {
std::println("{:#x}: {}", addr, name);
}
return 0;
}
#else
int main() { return 0; }
#endif
Output
$ ./src/c++23/build/std-flat_map
0x10001000: _main
0x10001000: _main
0x10002000: _helper
0x10003000: _cleanup
std::mdspan
Use case
Multidimensional view of contiguous memory.
Explanation
std::mdspan = multidimensional std::span. Non-owning view with arbitrary dimensions.
Code
// std::mdspan is not fully implemented everywhere yet.
// https://en.cppreference.com/w/cpp/compiler_support/23.html
#ifdef __APPLE__
#include <cstdint>
#include <mdspan>
#include <print>
int main() {
// https://en.wikipedia.org/wiki/Mach-O
// clang-format off
std::array<uint8_t, 16> data = {0xCF, 0xFA, 0xED, 0xFE, 0x00, 0x00, 0x00, 0x00,
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08};
// clang-format on
std::mdspan view(data.data(), 2, 8);
// view.extent(0) is 2 (rows)
// view.extent(1) is 8 (columns)
for (size_t row = 0; row < view.extent(0); ++row) {
for (size_t col = 0; col < view.extent(1); ++col) {
std::print("{:02x}", view[row, col]);
}
std::println("");
}
return 0;
}
#else
int main() { return 0; }
#endif
Output
$ ./src/c++23/build/std-mdspan
cffaedfe00000000
0102030405060708
std::ranges::contains
Use case
Check if element exists in range.
Explanation
Simple contains() for ranges. No more find() != end() idiom.
Code
#include <algorithm>
#include <print>
#include <string>
#include <vector>
int main() {
std::vector<std::string> imports = {"_printf", "_malloc", "_free", "_exit"};
// Before C++23:
// bool found = std::find(imports.begin(), imports.end(), "_malloc") != imports.end();
// C++23:
if (std::ranges::contains(imports, "_malloc")) {
std::println("Uses malloc.");
}
// $ echo "bl 0x40" | llvm-mc -triple=aarch64 -show-encoding
// bl #64 // encoding: [0x10,0x00,0x00,0x94]
std::vector<uint8_t> bytes = {0x00, 0x94, 0x00, 0x00, 0x10, 0x00};
std::vector<uint8_t> bl = {0x94, 0x00, 0x00, 0x10};
if (std::ranges::contains_subrange(bytes, bl)) {
std::println("Found BL instruction.");
}
return 0;
}
Output
$ ./src/c++23/build/std-ranges-contains
Uses malloc.
Found BL instruction.
std::ranges::fold_left
Use case
Accumulate with explicit direction.
Explanation
fold_left = std::accumulate for ranges. Also fold_right, fold_left_first (uses the first element as initial value).
Code
#include <algorithm>
#include <cstdint>
#include <print>
#include <vector>
int main() {
std::vector<size_t> sectionSizes = {0x1000, 0x2000, 0x500, 0x800};
auto total = std::ranges::fold_left(sectionSizes, 0uz, std::plus{});
std::println("Total size: {:#x}", total);
// https://en.wikipedia.org/wiki/Mach-O
std::vector<uint8_t> data = {0xCF, 0xFA, 0xED, 0xFE};
auto checksum = std::ranges::fold_left(data, uint8_t{0}, std::bit_xor{});
std::println("Checksum: {:#x}", checksum);
return 0;
}
Output
$ ./src/c++23/build/std-ranges-fold_left
Total size: 0x3d00
Checksum: 0x26
std::byteswap
Use case
Endianness conversion.
Explanation
Before C++23: builtins. Now portable std::byteswap.
Code
#include <bit>
#include <cstdint>
#include <print>
int main() {
// https://en.wikipedia.org/wiki/Mach-O
uint32_t beMagic = 0xFEEDFACF;
uint32_t leMagic = std::byteswap(beMagic);
std::println("BE: {:#x}", beMagic);
std::println("LE: {:#x}", leMagic);
return 0;
}
Output
$ ./src/c++23/build/std-byteswap
BE: 0xfeedfacf
LE: 0xcffaedfe
std::to_underlying
Use case
Get underlying value of enum.
Explanation
Shorter than static_cast<std::underlying_type_t<E>>(e). Works with scoped enums.
Code
#include <cstdint>
#include <print>
#include <utility>
// https://github.com/apple-oss-distributions/xnu/blob/f6217f891ac0bb64f3d375211650a4c1ff8ca1ea/EXTERNAL_HEADERS/mach-o/loader.h#L110
enum class MachOFileType : uint32_t {
Execute = 0x2,
Dylib = 0x6,
Bundle = 0x8,
Dsym = 0xa
};
int main() {
MachOFileType type = MachOFileType::Dylib;
// Before C++23:
// auto val = static_cast<std::underlying_type_t<MachOFileType>>(type);
// C++23:
auto val = std::to_underlying(type);
std::println("File type: {:#x}", val);
return 0;
}
Output
$ ./src/c++23/build/std-to_underlying
File type: 0x6
std::unreachable
Use case
Mark unreachable code for optimization.
Explanation
std::unreachable = UB if reached, but enables certain optimizations (e.g. compiler can eliminate dead code paths).
Code
#include <cstdint>
#include <print>
#include <utility>
enum class Arch { ARM64, X86_64 };
const char *archName(Arch a) {
switch (a) {
case Arch::ARM64:
return "arm64";
case Arch::X86_64:
return "x86_64";
}
std::unreachable();
}
int main() {
std::println("{}", archName(Arch::ARM64));
return 0;
}
Output
$ ./src/c++23/build/std-unreachable
arm64
Monadic operations for std::optional
Use case
Chain operations that might fail.
Explanation
transform = apply functions, wrap result in optional (if empty, stays empty). and_then = apply function that returns optional, flatten result (avoids optional<optional<T>>). or_else = provide fallback if empty. No more nested if (opt.has_value()) checks.
Code
#include <cstdint>
#include <optional>
#include <print>
#include <string>
std::optional<uint64_t> findSymbol(const std::string &name) {
if (name == "_main")
return 0x10001000;
return std::nullopt;
}
std::optional<std::string> demangleName(uint64_t addr) {
if (addr == 0x10001000)
return "main";
return std::nullopt;
}
int main() {
auto result =
findSymbol("_main")
.transform([](uint64_t addr) { return addr + 0x10; })
.and_then([](uint64_t addr) { return demangleName(addr - 0x10); })
.or_else([]() -> std::optional<std::string> { return "unknown"; });
std::println("Result: {}", *result);
return 0;
}
Output
$ ./src/c++23/build/monadic-operations
Result: main
std::stacktrace
Use case
Capture callstack for debugging/logging.
Explanation
std::stacktrace::current() captures the call stack.
Code
// std::stacktrace is not fully implemented everywhere yet.
// https://en.cppreference.com/w/cpp/compiler_support/23.html
#if defined(__linux__)
#include <print>
#include <stacktrace>
#include <string_view>
void analyzeFunction(uint64_t addr) {
std::println("Analyzing {:#x}", addr);
std::println("Callstack:\n{}", std::stacktrace::current());
}
void processSection([[maybe_unused]] std::string_view name) {
analyzeFunction(0x10001000);
}
int main() {
processSection("__TEXT");
return 0;
}
#else
int main() { return 0; }
#endif
Output
$ ./src/c++23/build/std-stacktrace
Analyzing 0x10001000
Callstack:
0# analyzeFunction(unsigned long) at :0
1# processSection(std::basic_string_view<char, std::char_traits<char> >) at :0
2# main at :0
3# __libc_start_call_main at ../sysdeps/nptl/libc_start_call_main.h:58
4# __libc_start_main_impl at ../csu/libc-start.c:360
5# _start at :0
6#
std::views::zip
Use case
Iterate multiple ranges together.
Explanation
std::views::zip combines multiple ranges into tuples. Iterates in “lockstep”. Stops at shortest range.
Code
#include <cstdint>
#include <print>
#include <ranges>
#include <string>
#include <vector>
int main() {
std::vector<uint64_t> addresses = {0x1000, 0x1100, 0x1200};
std::vector<std::string> names = {"_main", "_helper", "_exit"};
std::vector<size_t> sizes = {0x100, 0x50, 0x50};
for (auto [addr, name, size] : std::views::zip(addresses, names, sizes)) {
std::println("{:#x}: {} (size: {:#x})", addr, name, size);
}
return 0;
}
Output
$ ./src/c++23/build/std-views-zip
0x1000: _main (size: 0x100)
0x1100: _helper (size: 0x50)
0x1200: _exit (size: 0x50)
std::views::chunk
Use case
Process data in fixed-size blocks.
Explanation
chunk(n) splits range into n-sized subranges. Last chunk may be smaller.
Code
// std::views::chunk is not fully implemented everywhere yet.
// https://en.cppreference.com/w/cpp/compiler_support/23.html
#if defined(__linux__)
#include <cstdint>
#include <print>
#include <ranges>
#include <vector>
int main() {
std::vector<uint8_t> data = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
for (auto chunk : data | std::views::chunk(4)) {
for (auto byte : chunk) {
std::print("{:02x}", byte);
}
std::println("");
}
return 0;
}
#else
int main() { return 0; }
#endif
Output
$ ./src/c++23/build/std-views-chunk
00010203
04050607
08090a0b
0c0d0e0f
std::views::enumerate
Use case
Index + value iteration.
Explanation
enumerate provides (index, value) pairs. No manual counter needed.
Code
// std::views::enumerate is not fully implemented everywhere yet.
// https://en.cppreference.com/w/cpp/compiler_support/23.html
#if defined(__linux__)
#include <print>
#include <ranges>
#include <string>
#include <vector>
int main() {
std::vector<std::string> sections = {"__TEXT", "__DATA", "__LINKEDIT"};
for (auto [idx, name] : std::views::enumerate(sections)) {
std::println("Section {}: {}", idx, name);
}
return 0;
}
#else
int main() { return 0; }
#endif
Output
$ ./src/c++23/build/std-views-enumerate
Section 0: __TEXT
Section 1: __DATA
Section 2: __LINKEDIT
std::move_only_function
Use case
Type-erased callable that captures move-only types.
Explanation
Like std::function but does not require copyable callable.
Code
// std::move_only_function is not fully implemented everywhere yet.
// https://en.cppreference.com/w/cpp/compiler_support/23.html
#if defined(__linux__)
#include <functional>
#include <memory>
#include <print>
int main() {
auto addr = std::make_unique<uint64_t>(0x1000);
std::move_only_function<void()> task = [a = std::move(addr)]() {
std::println("Address: {:#x}", *a);
};
task();
return 0;
}
#else
int main() { return 0; }
#endif
Output
$ ./src/c++23/build/std-move_only_function
Address: 0x1000
std::string::contains
Use case
Check if string contains substring.
Explanation
containt() returns bool directly. Cleaner than find() != npos.
Code
#include <print>
#include <string>
int main() {
std::string symbol = "_OBJC_CLASS_$_NSObject";
// Before C++23:
// if (symbol.find("OBJC") != std::string::npos) {}
// C++23:
if (symbol.contains("OBJC")) {
std::println("Objective-C symbol.");
}
return 0;
}
Output
$ ./src/c++23/build/std-string-contains
Objective-C symbol.
constexpr std::unique_ptr
Use case
Smart pointers in compile-time code.
Explanation
std::unique_ptr is now fully constexpr.
Code
#include <array>
#include <memory>
constexpr bool isValidMagic(uint32_t magic) {
auto valid = std::make_unique<uint32_t[]>(2);
// https://en.wikipedia.org/wiki/Mach-O
valid[0] = 0xFEEDFACF;
valid[1] = 0xFEEDFACE;
for (int i = 0; i < 2; ++i) {
if (valid[i] == magic)
return true;
}
return false;
}
int main() {
static_assert(isValidMagic(0xFEEDFACF), "Invalid magic.");
return 0;
}
Output
$ ./src/c++23/build/constexpr-std-unique_ptr
Formatting ranges and tuples
Use case
Print containers directly with std::print/std::format.
Explanation
std::print and std::format now support ranges and tuples. Format specs like :#x apply to elements.
Code
// Formatting ranges is not fully implemented everywhere yet.
// https://en.cppreference.com/w/cpp/compiler_support/23.html
#if defined(__APPLE__)
#include <map>
#include <print>
#include <tuple>
#include <vector>
int main() {
std::vector<int> addrs = {0x1000, 0x1100, 0x1200};
std::map<std::string, int> symbols = {{"_main", 0x1000}, {"_helper", 0x1100}};
std::println("Addresses: {::#x}", addrs);
std::println("Symbols: {}", symbols);
return 0;
}
#else
int main() { return 0; }
#endif
Output
$ ./src/c++23/build/formatting-ranges-and-tuples
Addresses: [0x1000, 0x1100, 0x1200]
Symbols: {"_helper": 4352, "_main": 4096}