Loading...
Loading...
C++ library for reducing tail latency in RAM reads by hedging across multiple DRAM channels with uncorrelated refresh schedules
npx skill4agent add aradotso/trending-skills tailslayer-dram-hedged-readsSkill by ara.so — Daily 2026 Skills collection.
git clone https://github.com/LaurieWired/tailslayer.git
cp -r tailslayer/include/tailslayer /your/project/include/#include <tailslayer/hedged_reader.hpp>git clone https://github.com/LaurieWired/tailslayer.git
cd tailslayer
make
./tailslayer_exampletailslayer::HedgedReader<T, SignalFn, WorkFn, SignalArgs, WorkArgs>| Parameter | Description |
|---|---|
| Value type stored and read |
| Function that waits for a trigger and returns the index to read |
| Function called with the value immediately after read |
| (optional) |
| (optional) |
HedgedReader(
uint64_t channel_offset = DEFAULT_OFFSET, // undocumented channel scrambling offset
uint64_t channel_bit = DEFAULT_BIT, // bit used for channel selection
std::size_t n_replicas = 2 // number of DRAM channel replicas
)reader.insert(T value); // Insert value, replicated across all channels
reader.start_workers(); // Launch per-channel worker threads (blocking)tailslayer::pin_to_core(core_id); // Pin calling thread to a specific core
tailslayer::CORE_MAIN // Constant: recommended core for main thread#include <tailslayer/hedged_reader.hpp>
#include <cstdint>
#include <cstdio>
// 1. Define your signal function — waits for your event, returns index to read
[[gnu::always_inline]] inline std::size_t my_signal() {
// Example: busy-wait for an external flag, then return the index
extern volatile std::size_t g_index;
extern volatile bool g_trigger;
while (!g_trigger) {}
g_trigger = false;
return g_index;
}
// 2. Define your work function — receives the read value immediately
template <typename T>
[[gnu::always_inline]] inline void my_work(T val) {
// Process val as fast as possible
printf("Read value: %u\n", (unsigned)val);
}
int main() {
using T = uint8_t;
// Pin main thread to recommended core
tailslayer::pin_to_core(tailslayer::CORE_MAIN);
// Construct reader with 2 replicas (default)
tailslayer::HedgedReader<T, my_signal, my_work<T>> reader{};
// Insert data — replicated across both DRAM channels automatically
reader.insert(0x43);
reader.insert(0x44);
// Launch workers — blocks; workers spin until signal fires
reader.start_workers();
return 0;
}tailslayer::ArgList<...>#include <tailslayer/hedged_reader.hpp>
// Signal function with args
[[gnu::always_inline]] inline std::size_t my_signal(int threshold, int channel) {
// use threshold and channel...
return 0;
}
// Work function with args
template <typename T>
[[gnu::always_inline]] inline void my_work(T val, int multiplier) {
volatile int result = (int)val * multiplier;
(void)result;
}
int main() {
using T = uint8_t;
tailslayer::pin_to_core(tailslayer::CORE_MAIN);
tailslayer::HedgedReader<
T,
my_signal,
my_work<T>,
tailslayer::ArgList<10, 1>, // args forwarded to my_signal: threshold=10, channel=1
tailslayer::ArgList<2> // args forwarded to my_work: multiplier=2
> reader{};
reader.insert(0xAB);
reader.start_workers();
}// Example: 4 replicas, custom channel bit 8 (common for AMD/Intel)
tailslayer::HedgedReader<T, my_signal, my_work<T>> reader{
/* channel_offset */ 0,
/* channel_bit */ 8,
/* n_replicas */ 4
};Note: N-way (more than 2 replicas) hedging requires using the benchmark code in. The main library header currently exposes 2 channels by default.discovery/benchmark/
cd discovery/benchmark
make
sudo chrt -f 99 ./hedged_read_cpp --all --channel-bit 8| Flag | Description |
|---|---|
| Run all channel configurations |
| Specify the DRAM channel selection bit (try 6, 7, or 8 for your platform) |
cd discovery
gcc -O2 -o trefi_probe trefi_probe.c
sudo ./trefi_probe| Platform | Typical Channel Bit | Notes |
|---|---|---|
| AMD (Zen) | 6 or 7 | Verify with benchmark |
| Intel | 6, 7, or 8 | Run benchmark with |
| AWS Graviton | 8 | Confirmed working |
--all// Pre-load order book prices into hedged reader
// Signal on market data arrival, process immediately
[[gnu::always_inline]] inline std::size_t await_market_signal() {
extern volatile std::size_t g_book_idx;
extern volatile bool g_tick;
while (!g_tick) { __builtin_ia32_pause(); }
g_tick = false;
return g_book_idx;
}
template <typename T>
[[gnu::always_inline]] inline void process_price(T price) {
// Submit order using price with minimal latency
extern void submit_order(T);
submit_order(price);
}
int main() {
tailslayer::pin_to_core(tailslayer::CORE_MAIN);
tailslayer::HedgedReader<uint64_t, await_market_signal, process_price<uint64_t>> reader{};
for (uint64_t price : preloaded_prices) {
reader.insert(price);
}
reader.start_workers();
}// Each insert automatically maps to correct DRAM channel via address calculation
// Access is via logical index — tailslayer manages physical placement
tailslayer::HedgedReader<uint32_t, my_signal, my_work<uint32_t>> reader{};
std::vector<uint32_t> lut = {100, 200, 300, 400};
for (auto v : lut) {
reader.insert(v);
}
reader.start_workers();--channel-bit--allisolcpus=sudo chrt -f 99 ./your_binaryinclude/tailslayer/hedged_reader.hpp-std=c++17start_workers()insert()insert()tailslayer/
├── include/tailslayer/
│ └── hedged_reader.hpp # Main library header (copy this)
├── tailslayer_example.cpp # Usage example
├── discovery/
│ ├── trefi_probe.c # DRAM refresh spike timing tool
│ └── benchmark/ # N-way channel hedging benchmark
└── Makefile