From d8a4af44150ab0f6763711b437f2719f17c4f41c Mon Sep 17 00:00:00 2001 From: smcio Date: Tue, 13 Jan 2026 17:00:32 +0000 Subject: [PATCH 01/28] rewards: ensure global stake cache is updated during partitioned epoch rewards --- pkg/replay/transaction.go | 1 - pkg/rewards/rewards.go | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pkg/replay/transaction.go b/pkg/replay/transaction.go index be9035ce..a9f58690 100644 --- a/pkg/replay/transaction.go +++ b/pkg/replay/transaction.go @@ -184,7 +184,6 @@ func recordStakeDelegation(acct *accounts.Account) { if isEmpty || isUninitialized { global.DeleteStakeCacheItem(acct.Key) } else { - //mlog.Log.Debugf("added stake delegation record for %s: %v", acct.Key, acct) stakeState, err := sealevel.UnmarshalStakeState(acct.Data) if err == nil { delegation := stakeState.Stake.Stake.Delegation diff --git a/pkg/rewards/rewards.go b/pkg/rewards/rewards.go index d83fcffa..7a2968f3 100644 --- a/pkg/rewards/rewards.go +++ b/pkg/rewards/rewards.go @@ -11,6 +11,7 @@ import ( "github.com/Overclock-Validator/mithril/pkg/accounts" "github.com/Overclock-Validator/mithril/pkg/accountsdb" "github.com/Overclock-Validator/mithril/pkg/features" + "github.com/Overclock-Validator/mithril/pkg/global" "github.com/Overclock-Validator/mithril/pkg/rpcclient" "github.com/Overclock-Validator/mithril/pkg/safemath" "github.com/Overclock-Validator/mithril/pkg/sealevel" @@ -226,6 +227,11 @@ func DistributeStakingRewardsForPartition(acctsDb *accountsdb.AccountsDb, partit accts[idx] = stakeAcct distributedLamports.Add(reward.StakerRewards) + + // update the stake cache + delegationToCache := stakeState.Stake.Stake.Delegation + delegationToCache.CreditsObserved = stakeState.Stake.Stake.CreditsObserved + global.PutStakeCacheItem(stakePk, &delegationToCache) }) for idx, stakePk := range partition.Pubkeys() { From 85c53d4d65c1c66c1ecd0b575e57335aa864552c Mon Sep 17 00:00:00 2001 From: smcio Date: Tue, 13 Jan 2026 20:16:38 +0000 Subject: [PATCH 02/28] update vote cache when validator identity is changed --- pkg/replay/transaction.go | 7 +++++++ pkg/sealevel/execution_ctx.go | 19 ++++++++++--------- pkg/sealevel/vote_program.go | 4 ++++ 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/pkg/replay/transaction.go b/pkg/replay/transaction.go index a9f58690..55f046d5 100644 --- a/pkg/replay/transaction.go +++ b/pkg/replay/transaction.go @@ -240,6 +240,13 @@ func recordStakeAndVoteAccounts(slotCtx *sealevel.SlotCtx, execCtx *sealevel.Exe recordStakeDelegation(acct) } } + + if len(execCtx.ModifiedValidatorIdentities) != 0 { + for _, nodePubkey := range execCtx.ModifiedValidatorIdentities { + voteStateVersions := global.VoteCacheItem(nodePubkey) + global.PutVoteCacheItem(nodePubkey, voteStateVersions) + } + } } func handleFailedTx(slotCtx *sealevel.SlotCtx, tx *solana.Transaction, instrs []sealevel.Instruction, computeBudgetLimits *sealevel.ComputeBudgetLimits, instrErr error, rentStateErr error) (*fees.TxFeeInfo, error) { diff --git a/pkg/sealevel/execution_ctx.go b/pkg/sealevel/execution_ctx.go index 1b5b9a40..80ce8ac0 100644 --- a/pkg/sealevel/execution_ctx.go +++ b/pkg/sealevel/execution_ctx.go @@ -16,15 +16,16 @@ import ( ) type ExecutionCtx struct { - Log Logger - Accounts accounts.Accounts - TransactionContext *TransactionCtx - Features features.Features - ComputeMeter cu.ComputeMeter - Blockhash [32]byte - PrevLamportsPerSignature uint64 - SlotCtx *SlotCtx - ModifiedVoteStates map[solana.PublicKey]*VoteStateVersions + Log Logger + Accounts accounts.Accounts + TransactionContext *TransactionCtx + Features features.Features + ComputeMeter cu.ComputeMeter + Blockhash [32]byte + PrevLamportsPerSignature uint64 + SlotCtx *SlotCtx + ModifiedVoteStates map[solana.PublicKey]*VoteStateVersions + ModifiedValidatorIdentities []solana.PublicKey } type SlotBank struct { diff --git a/pkg/sealevel/vote_program.go b/pkg/sealevel/vote_program.go index 490100fb..2f58b9ff 100644 --- a/pkg/sealevel/vote_program.go +++ b/pkg/sealevel/vote_program.go @@ -1198,6 +1198,10 @@ func VoteProgramUpdateValidatorIdentity(execCtx *ExecutionCtx, voteAcct *Borrowe voteState.NodePubkey = nodePubkey err = setVoteAccountState(execCtx, voteAcct, voteState, f) + if err == nil { + execCtx.ModifiedValidatorIdentities = append(execCtx.ModifiedValidatorIdentities, nodePubkey) + } + return err } From 6b237258b83fdc80c3baa6e449a5cb1a801bbe59 Mon Sep 17 00:00:00 2001 From: smcio Date: Tue, 13 Jan 2026 20:40:34 +0000 Subject: [PATCH 03/28] Revert "update vote cache when validator identity is changed" This reverts commit 85c53d4d65c1c66c1ecd0b575e57335aa864552c. --- pkg/replay/transaction.go | 7 ------- pkg/sealevel/execution_ctx.go | 19 +++++++++---------- pkg/sealevel/vote_program.go | 4 ---- 3 files changed, 9 insertions(+), 21 deletions(-) diff --git a/pkg/replay/transaction.go b/pkg/replay/transaction.go index 55f046d5..a9f58690 100644 --- a/pkg/replay/transaction.go +++ b/pkg/replay/transaction.go @@ -240,13 +240,6 @@ func recordStakeAndVoteAccounts(slotCtx *sealevel.SlotCtx, execCtx *sealevel.Exe recordStakeDelegation(acct) } } - - if len(execCtx.ModifiedValidatorIdentities) != 0 { - for _, nodePubkey := range execCtx.ModifiedValidatorIdentities { - voteStateVersions := global.VoteCacheItem(nodePubkey) - global.PutVoteCacheItem(nodePubkey, voteStateVersions) - } - } } func handleFailedTx(slotCtx *sealevel.SlotCtx, tx *solana.Transaction, instrs []sealevel.Instruction, computeBudgetLimits *sealevel.ComputeBudgetLimits, instrErr error, rentStateErr error) (*fees.TxFeeInfo, error) { diff --git a/pkg/sealevel/execution_ctx.go b/pkg/sealevel/execution_ctx.go index 80ce8ac0..1b5b9a40 100644 --- a/pkg/sealevel/execution_ctx.go +++ b/pkg/sealevel/execution_ctx.go @@ -16,16 +16,15 @@ import ( ) type ExecutionCtx struct { - Log Logger - Accounts accounts.Accounts - TransactionContext *TransactionCtx - Features features.Features - ComputeMeter cu.ComputeMeter - Blockhash [32]byte - PrevLamportsPerSignature uint64 - SlotCtx *SlotCtx - ModifiedVoteStates map[solana.PublicKey]*VoteStateVersions - ModifiedValidatorIdentities []solana.PublicKey + Log Logger + Accounts accounts.Accounts + TransactionContext *TransactionCtx + Features features.Features + ComputeMeter cu.ComputeMeter + Blockhash [32]byte + PrevLamportsPerSignature uint64 + SlotCtx *SlotCtx + ModifiedVoteStates map[solana.PublicKey]*VoteStateVersions } type SlotBank struct { diff --git a/pkg/sealevel/vote_program.go b/pkg/sealevel/vote_program.go index 2f58b9ff..490100fb 100644 --- a/pkg/sealevel/vote_program.go +++ b/pkg/sealevel/vote_program.go @@ -1198,10 +1198,6 @@ func VoteProgramUpdateValidatorIdentity(execCtx *ExecutionCtx, voteAcct *Borrowe voteState.NodePubkey = nodePubkey err = setVoteAccountState(execCtx, voteAcct, voteState, f) - if err == nil { - execCtx.ModifiedValidatorIdentities = append(execCtx.ModifiedValidatorIdentities, nodePubkey) - } - return err } From dfe858007a44c427cb511d8279d651d3221c1141 Mon Sep 17 00:00:00 2001 From: smcio Date: Tue, 13 Jan 2026 20:48:04 +0000 Subject: [PATCH 04/28] fix possible corner-case with calculation of leader schedule --- pkg/replay/epoch.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pkg/replay/epoch.go b/pkg/replay/epoch.go index f84ebaad..fa596dca 100644 --- a/pkg/replay/epoch.go +++ b/pkg/replay/epoch.go @@ -150,7 +150,12 @@ func handleEpochTransition(acctsDb *accountsdb.AccountsDb, rpcc *rpcclient.RpcCl updateEpochStakesAndRefreshVoteCache(leaderScheduleEpoch, block, epochSchedule, f, acctsDb, prevSlotCtx.Slot) if global.ManageLeaderSchedule() { - _, err = PrepareLeaderScheduleLocalFromVoteCache(newEpoch, epochSchedule, "") + if len(global.EpochStakesVoteAccts(newEpoch)) > 0 { + _, err = PrepareLeaderScheduleLocal(newEpoch, epochSchedule, "") + } else { + _, err = PrepareLeaderScheduleLocalFromVoteCache(newEpoch, epochSchedule, "") + } + if err != nil { panic(err) } From d45ea13e3a54a8a5f8b3e181dbb749cd6b21210f Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Wed, 14 Jan 2026 10:19:14 -0600 Subject: [PATCH 05/28] perf(simd186): memoize accounts to avoid double-clone in loadAndValidateTxAccts When SIMD-186 is active, loadAndValidateTxAcctsSimd186 was loading each account twice: once for size accumulation (Pass 1) and once for building TransactionAccounts (Pass 2). Each GetAccount call clones the account, causing 2x allocations and data copies per account per transaction. Changes: - Add acctCache slice to store accounts from Pass 1 - Reuse cached accounts in Pass 2 instead of re-cloning - Replace programIdIdxs slice with isProgramIdx boolean mask for O(1) lookup (eliminates slices.Contains linear scan in hot loop) - Reuse cache in program validation loop via tx.Message.Instructions index Impact: ~50% reduction in account allocations/copies per transaction, reduced GC pressure during high-throughput replay. Co-Authored-By: Claude Opus 4.5 --- pkg/replay/accounts.go | 49 ++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/pkg/replay/accounts.go b/pkg/replay/accounts.go index 7cbd518f..344f40b2 100644 --- a/pkg/replay/accounts.go +++ b/pkg/replay/accounts.go @@ -219,11 +219,16 @@ func loadAndValidateTxAcctsSimd186(slotCtx *sealevel.SlotCtx, acctMetasPerInstr return nil, err } - for _, pubkey := range acctKeys { + // Memoize accounts loaded in Pass 1 to avoid re-cloning in Pass 2 + // Use slice indexed by account position (same ordering as txAcctMetas) + acctCache := make([]*accounts.Account, len(acctKeys)) + + for i, pubkey := range acctKeys { acct, err := slotCtx.GetAccount(pubkey) if err != nil { panic("should be impossible - programming error") } + acctCache[i] = acct // Cache by index for reuse in Pass 2 err = accumulator.collectAcct(acct) if err != nil { return nil, err @@ -235,11 +240,15 @@ func loadAndValidateTxAcctsSimd186(slotCtx *sealevel.SlotCtx, acctMetasPerInstr return nil, err } - var programIdIdxs []uint64 + // Use boolean mask for O(1) program index lookup + isProgramIdx := make([]bool, len(acctKeys)) instructionAcctPubkeys := make(map[solana.PublicKey]struct{}) for instrIdx, instr := range tx.Message.Instructions { - programIdIdxs = append(programIdIdxs, uint64(instr.ProgramIDIndex)) + i := int(instr.ProgramIDIndex) + if i >= 0 && i < len(isProgramIdx) { + isProgramIdx[i] = true + } ias := acctMetasPerInstr[instrIdx] for _, ia := range ias { instructionAcctPubkeys[ia.Pubkey] = struct{}{} @@ -251,21 +260,17 @@ func loadAndValidateTxAcctsSimd186(slotCtx *sealevel.SlotCtx, acctMetasPerInstr for idx, acctMeta := range txAcctMetas { var acct *accounts.Account + cached := acctCache[idx] // Reuse account from Pass 1 _, instrContainsAcctMeta := instructionAcctPubkeys[acctMeta.PublicKey] if acctMeta.PublicKey == sealevel.SysvarInstructionsAddr { acct = instrsAcct - } else if !slotCtx.Features.IsActive(features.DisableAccountLoaderSpecialCase) && slices.Contains(programIdIdxs, uint64(idx)) && !acctMeta.IsWritable && !instrContainsAcctMeta { - tmp, err := slotCtx.GetAccount(acctMeta.PublicKey) - if err != nil { - return nil, err - } - acct = &accounts.Account{Key: acctMeta.PublicKey, Owner: tmp.Owner, Executable: true, IsDummy: true} + } else if !slotCtx.Features.IsActive(features.DisableAccountLoaderSpecialCase) && isProgramIdx[idx] && !acctMeta.IsWritable && !instrContainsAcctMeta { + // Dummy account case - only need owner from cached account + acct = &accounts.Account{Key: acctMeta.PublicKey, Owner: cached.Owner, Executable: true, IsDummy: true} } else { - acct, err = slotCtx.GetAccount(acctMeta.PublicKey) - if err != nil { - return nil, err - } + // Normal case - use cached account directly + acct = cached } acctsForTx = append(acctsForTx, *acct) @@ -278,16 +283,24 @@ func loadAndValidateTxAcctsSimd186(slotCtx *sealevel.SlotCtx, acctMetasPerInstr removeAcctsExecutableFlagChecks := slotCtx.Features.IsActive(features.RemoveAccountsExecutableFlagChecks) - for _, instr := range instrs { + for instrIdx, instr := range instrs { if instr.ProgramId == addresses.NativeLoaderAddr { continue } - programAcct, err := slotCtx.GetAccount(instr.ProgramId) - if err != nil { - programAcct, err = slotCtx.GetAccountFromAccountsDb(instr.ProgramId) + // Use cached account via ProgramIDIndex from tx.Message + programIdx := int(tx.Message.Instructions[instrIdx].ProgramIDIndex) + programAcct := acctCache[programIdx] + + // Fallback if not in cache (shouldn't happen for valid txs) + if programAcct == nil { + var err error + programAcct, err = slotCtx.GetAccount(instr.ProgramId) if err != nil { - return nil, TxErrProgramAccountNotFound + programAcct, err = slotCtx.GetAccountFromAccountsDb(instr.ProgramId) + if err != nil { + return nil, TxErrProgramAccountNotFound + } } } From d02ca7d627f819b3de653fe0e083a233c46a4cc4 Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 12:10:48 -0600 Subject: [PATCH 06/28] perf: optimize reward distribution memory and pool usage - Add MarshalStakeStakeInto to write stake state directly into existing buffer, eliminating ~600MB of allocations during reward distribution - Remove unnecessary ants.Release() calls that were tearing down global ants state after each partition (4 occurrences) - Add InRewardsWindow flag to AccountsDb to skip caching stake accounts during reward distribution (prevents cache pollution from 1.25M one-shot reads) Co-Authored-By: Claude Opus 4.5 --- pkg/accountsdb/accountsdb.go | 11 ++++++++++- pkg/replay/rewards.go | 4 ++++ pkg/rewards/rewards.go | 7 +------ pkg/sealevel/stake_state.go | 30 ++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 7 deletions(-) diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index 7cd2e866..2af8739d 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -27,6 +27,11 @@ type AccountsDb struct { VoteAcctCache otter.Cache[solana.PublicKey, *accounts.Account] CommonAcctsCache otter.Cache[solana.PublicKey, *accounts.Account] ProgramCache otter.Cache[solana.PublicKey, *ProgramCacheEntry] + + // InRewardsWindow is set during partitioned epoch rewards distribution. + // When true, stake accounts are not cached in CommonAcctsCache since they're + // one-shot reads that would evict genuinely hot accounts. + InRewardsWindow bool } // silentLogger implements pebble.Logger but discards all messages. @@ -196,8 +201,12 @@ func (accountsDb *AccountsDb) GetAccount(slot uint64, pubkey solana.PublicKey) ( acct.Slot = acctIdxEntry.Slot - if solana.PublicKeyFromBytes(acct.Owner[:]) == addresses.VoteProgramAddr { + owner := solana.PublicKeyFromBytes(acct.Owner[:]) + if owner == addresses.VoteProgramAddr { accountsDb.VoteAcctCache.Set(pubkey, acct) + } else if owner == addresses.StakeProgramAddr && accountsDb.InRewardsWindow { + // During reward distribution, stake accounts are one-shot reads that would + // evict genuinely hot accounts from the cache. Skip caching them. } else { accountsDb.CommonAcctsCache.Set(pubkey, acct) } diff --git a/pkg/replay/rewards.go b/pkg/replay/rewards.go index fd24d045..f202ce5e 100644 --- a/pkg/replay/rewards.go +++ b/pkg/replay/rewards.go @@ -218,6 +218,9 @@ func distributePartitionedEpochRewardsForSlot(acctsDb *accountsdb.AccountsDb, ep epochRewards.MustUnmarshalWithDecoder(decoder) partitionIdx := currentBlockHeight - epochRewards.DistributionStartingBlockHeight + + // Set flag to prevent stake account cache pollution during one-shot reward reads + acctsDb.InRewardsWindow = true distributedAccts, parentDistributedAccts, distributedLamports := rewards.DistributeStakingRewardsForPartition(acctsDb, partitionedEpochRewardsInfo.RewardPartitions.Partition(partitionIdx), partitionedEpochRewardsInfo.StakingRewards, currentSlot) parentDistributedAccts = append(parentDistributedAccts, epochRewardsAcct.Clone()) @@ -226,6 +229,7 @@ func distributePartitionedEpochRewardsForSlot(acctsDb *accountsdb.AccountsDb, ep if partitionedEpochRewardsInfo.NumRewardPartitionsRemaining == 0 { epochRewards.Active = false + acctsDb.InRewardsWindow = false } writer := new(bytes.Buffer) diff --git a/pkg/rewards/rewards.go b/pkg/rewards/rewards.go index 7a2968f3..b248f10b 100644 --- a/pkg/rewards/rewards.go +++ b/pkg/rewards/rewards.go @@ -163,7 +163,6 @@ func DistributeVotingRewards(acctsDb *accountsdb.AccountsDb, validatorRewards ma wg.Wait() workerPool.Release() - ants.Release() err := acctsDb.StoreAccounts(updatedAccts, slot) if err != nil { @@ -213,11 +212,10 @@ func DistributeStakingRewardsForPartition(acctsDb *accountsdb.AccountsDb, partit stakeState.Stake.Stake.CreditsObserved = reward.NewCreditsObserved stakeState.Stake.Stake.Delegation.StakeLamports = safemath.SaturatingAddU64(stakeState.Stake.Stake.Delegation.StakeLamports, uint64(reward.StakerRewards)) - newStakeStateBytes, err := sealevel.MarshalStakeStake(stakeState) + err = sealevel.MarshalStakeStakeInto(stakeState, stakeAcct.Data) if err != nil { panic(fmt.Sprintf("unable to serialize new stake account state in distributing partitioned rewards: %s", err)) } - copy(stakeAcct.Data, newStakeStateBytes) // update lamports in stake account stakeAcct.Lamports, err = safemath.CheckedAddU64(stakeAcct.Lamports, uint64(reward.StakerRewards)) @@ -242,7 +240,6 @@ func DistributeStakingRewardsForPartition(acctsDb *accountsdb.AccountsDb, partit wg.Wait() workerPool.Release() - ants.Release() err := acctsDb.StoreAccounts(accts, slot) if err != nil { @@ -356,7 +353,6 @@ func CalculateStakeRewardsAndPartitions(pointsPerStakeAcct map[solana.PublicKey] } wg.Wait() partitionCalcWorkerPool.Release() - ants.Release() return stakeInfoResults, validatorRewards, partitions } @@ -503,7 +499,6 @@ func CalculateStakePoints( wg.Wait() workerPool.Release() - ants.Release() return pointsAccum.CalculatedStakePoints(), pointsAccum.TotalPoints() } diff --git a/pkg/sealevel/stake_state.go b/pkg/sealevel/stake_state.go index 873e6c9e..9151d7fd 100644 --- a/pkg/sealevel/stake_state.go +++ b/pkg/sealevel/stake_state.go @@ -2,6 +2,7 @@ package sealevel import ( "bytes" + "fmt" "math" "github.com/Overclock-Validator/mithril/pkg/features" @@ -765,6 +766,35 @@ func MarshalStakeStake(state *StakeStateV2) ([]byte, error) { } } +// fixedSliceWriter implements io.Writer over a fixed-size byte slice, +// avoiding allocation during serialization. +type fixedSliceWriter struct { + buf []byte + pos int +} + +func (w *fixedSliceWriter) Write(p []byte) (int, error) { + if w.pos+len(p) > len(w.buf) { + return 0, fmt.Errorf("write exceeds buffer: pos=%d, write=%d, cap=%d", w.pos, len(p), len(w.buf)) + } + copy(w.buf[w.pos:], p) + w.pos += len(p) + return len(p), nil +} + +// MarshalStakeStakeInto writes the stake state directly into dst, avoiding allocation. +// dst must be at least StakeStateV2Size (200) bytes. +func MarshalStakeStakeInto(state *StakeStateV2, dst []byte) error { + if len(dst) < StakeStateV2Size { + return fmt.Errorf("destination buffer too small: %d < %d", len(dst), StakeStateV2Size) + } + + writer := &fixedSliceWriter{buf: dst[:StakeStateV2Size], pos: 0} + encoder := bin.NewBinEncoder(writer) + + return state.MarshalWithEncoder(encoder) +} + func setStakeAccountState(acct *BorrowedAccount, stakeState *StakeStateV2, f features.Features) error { stakeStateBytes, err := MarshalStakeStake(stakeState) if err != nil { From 4c64df0b93beddd91c75f56bff15520d677f0dd2 Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 12:45:56 -0600 Subject: [PATCH 07/28] perf: reward distribution optimizations + thread safety - Add MarshalStakeStakeInto for zero-allocation stake serialization - Add InRewardsWindow atomic.Bool to skip stake account caching during rewards - Cache bypass on both read and write paths (prevents cache thrashing) - Remove unnecessary ants.Release() calls (4x) - Add docs/TODO.md tracking known issues Co-Authored-By: Claude Opus 4.5 --- docs/TODO.md | 88 ++++++++++++++++++++++++++++++++++++ pkg/accountsdb/accountsdb.go | 15 ++++-- pkg/replay/rewards.go | 6 +-- pkg/sealevel/stake_state.go | 8 +--- 4 files changed, 103 insertions(+), 14 deletions(-) create mode 100644 docs/TODO.md diff --git a/docs/TODO.md b/docs/TODO.md new file mode 100644 index 00000000..ee806050 --- /dev/null +++ b/docs/TODO.md @@ -0,0 +1,88 @@ +# TODO / Known Issues + +Identified on branch `perf/reward-distribution-optimizations` at commit `3b2ad67` +dev HEAD at time of identification: `a25b2e3` +Date: 2026-01-13 + +--- + +## Failing Tests + +### 1. Address Lookup Table Tests - `InstrErrUnsupportedProgramId` + +**File:** `pkg/sealevel/address_lookup_table_test.go` +**Test:** `TestExecute_AddrLookupTable_Program_Test_Create_Lookup_Table_Idempotent` (and likely all other ALT tests) + +**Root Cause:** `AddressLookupTableAddr` and `StakeProgramAddr` were accidentally removed from `resolveNativeProgramById` switch in `pkg/sealevel/native_programs_common.go`. + +| Program | Removed In | Commit Date | Commit Message | +|---------|------------|-------------|----------------| +| `AddressLookupTableAddr` | `d47c16b` | May 16, 2025 | "many optimisations and changes" | +| `StakeProgramAddr` | `e890f9e` | Jul 26, 2025 | "snapshot download, stake program migration, refactoring" | + +**Fix:** Add these cases back to the switch in `resolveNativeProgramById`: +```go +case a.StakeProgramAddr: + return StakeProgramExecute, a.StakeProgramAddrStr, nil +case a.AddressLookupTableAddr: + return AddressLookupTableExecute, a.AddressLookupTableProgramAddrStr, nil +``` + +--- + +### 2. Bank Hash Test - Nil Pointer Dereference + +**File:** `pkg/replay/hash_test.go` +**Test:** `Test_Compute_Bank_Hash` + +**Error:** +``` +panic: runtime error: invalid memory address or nil pointer dereference +pkg/replay/hash.go:227 - shouldIncludeEah(0x0, 0x0) +``` + +**Root Cause:** Test passes `nil` for the first argument to `shouldIncludeEah`, which dereferences it without a nil check. + +**Fix:** Either add nil check in `shouldIncludeEah` or fix the test to pass valid arguments. + +--- + +## Agave/Firedancer Parity Issues + +### 3. Missing "Burned Rewards" Semantics in Reward Distribution + +**File:** `pkg/rewards/rewards.go` (lines 180-230) + +**Problem:** Mithril does not implement "burn" semantics for per-account failures during partitioned reward distribution. This diverges from both Agave and Firedancer. + +**Current Mithril behavior:** +- `GetAccount` error → panic (aborts replay) +- `UnmarshalStakeState` error → silent skip (reward lost, not counted) +- `MarshalStakeStakeInto` error → panic (aborts replay) +- Lamport overflow → panic (aborts replay) + +**Agave behavior** (`distribution.rs:260`): +- `build_updated_stake_reward` returns `DistributionError::UnableToSetState` or `AccountNotFound` +- Caller logs error and adds to `lamports_burned` +- Continues processing remaining accounts + +**Firedancer behavior** (`fd_rewards.c:958`): +- `distribute_epoch_reward_to_stake_acc` returns non-zero on decode/non-stake/etc. +- Caller increments `lamports_burned` and continues + +**Failure scenarios that should burn (not panic):** +- Account missing / not found +- Stake state decode fails (including short/invalid data) +- Account isn't a stake account +- Lamport add overflows +- `set_state`/encode fails (e.g., data too small) + +**Fix required:** +1. Add `lamports_burned` tracking to reward distribution +2. Change panics to log + burn + continue +3. `epochRewards.Distribute()` should receive `distributedLamports` (successful) separately from burned amount +4. Ensure `SysvarEpochRewards.DistributedRewards` advances correctly (may need to include burned in total) + +**Note:** The current silent skip on `UnmarshalStakeState` error reduces `distributedLamports` but doesn't track it as burned, which may cause `SysvarEpochRewards` to diverge from Agave/FD. + +--- diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index 2af8739d..66f3eef2 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -30,8 +30,9 @@ type AccountsDb struct { // InRewardsWindow is set during partitioned epoch rewards distribution. // When true, stake accounts are not cached in CommonAcctsCache since they're - // one-shot reads that would evict genuinely hot accounts. - InRewardsWindow bool + // one-shot reads/writes that would evict genuinely hot accounts. + // Atomic for safe concurrent access from RPC goroutines. + InRewardsWindow atomic.Bool } // silentLogger implements pebble.Logger but discards all messages. @@ -204,7 +205,7 @@ func (accountsDb *AccountsDb) GetAccount(slot uint64, pubkey solana.PublicKey) ( owner := solana.PublicKeyFromBytes(acct.Owner[:]) if owner == addresses.VoteProgramAddr { accountsDb.VoteAcctCache.Set(pubkey, acct) - } else if owner == addresses.StakeProgramAddr && accountsDb.InRewardsWindow { + } else if owner == addresses.StakeProgramAddr && accountsDb.InRewardsWindow.Load() { // During reward distribution, stake accounts are one-shot reads that would // evict genuinely hot accounts from the cache. Skip caching them. } else { @@ -224,13 +225,17 @@ func (accountsDb *AccountsDb) StoreAccounts(accts []*accounts.Account, slot uint accountsDb.storeAccountsInternal(accts, slot) + inRewardsWindow := accountsDb.InRewardsWindow.Load() for _, acct := range accts { if acct == nil { continue } - // if vote account, do not serialize up and write into accountsdb - just save it in cache. - if solana.PublicKeyFromBytes(acct.Owner[:]) == addresses.VoteProgramAddr { + owner := solana.PublicKeyFromBytes(acct.Owner[:]) + if owner == addresses.VoteProgramAddr { accountsDb.VoteAcctCache.Set(acct.Key, acct) + } else if owner == addresses.StakeProgramAddr && inRewardsWindow { + // During reward distribution, stake accounts are one-shot writes that would + // evict genuinely hot accounts from the cache. Skip caching them. } else { accountsDb.CommonAcctsCache.Set(acct.Key, acct) } diff --git a/pkg/replay/rewards.go b/pkg/replay/rewards.go index f202ce5e..2d025e77 100644 --- a/pkg/replay/rewards.go +++ b/pkg/replay/rewards.go @@ -219,8 +219,8 @@ func distributePartitionedEpochRewardsForSlot(acctsDb *accountsdb.AccountsDb, ep partitionIdx := currentBlockHeight - epochRewards.DistributionStartingBlockHeight - // Set flag to prevent stake account cache pollution during one-shot reward reads - acctsDb.InRewardsWindow = true + // Set flag to prevent stake account cache pollution during one-shot reward reads/writes + acctsDb.InRewardsWindow.Store(true) distributedAccts, parentDistributedAccts, distributedLamports := rewards.DistributeStakingRewardsForPartition(acctsDb, partitionedEpochRewardsInfo.RewardPartitions.Partition(partitionIdx), partitionedEpochRewardsInfo.StakingRewards, currentSlot) parentDistributedAccts = append(parentDistributedAccts, epochRewardsAcct.Clone()) @@ -229,7 +229,7 @@ func distributePartitionedEpochRewardsForSlot(acctsDb *accountsdb.AccountsDb, ep if partitionedEpochRewardsInfo.NumRewardPartitionsRemaining == 0 { epochRewards.Active = false - acctsDb.InRewardsWindow = false + acctsDb.InRewardsWindow.Store(false) } writer := new(bytes.Buffer) diff --git a/pkg/sealevel/stake_state.go b/pkg/sealevel/stake_state.go index 9151d7fd..d45043f4 100644 --- a/pkg/sealevel/stake_state.go +++ b/pkg/sealevel/stake_state.go @@ -783,13 +783,9 @@ func (w *fixedSliceWriter) Write(p []byte) (int, error) { } // MarshalStakeStakeInto writes the stake state directly into dst, avoiding allocation. -// dst must be at least StakeStateV2Size (200) bytes. +// dst should be at least StakeStateV2Size (200) bytes for valid stake accounts. func MarshalStakeStakeInto(state *StakeStateV2, dst []byte) error { - if len(dst) < StakeStateV2Size { - return fmt.Errorf("destination buffer too small: %d < %d", len(dst), StakeStateV2Size) - } - - writer := &fixedSliceWriter{buf: dst[:StakeStateV2Size], pos: 0} + writer := &fixedSliceWriter{buf: dst, pos: 0} encoder := bin.NewBinEncoder(writer) return state.MarshalWithEncoder(encoder) From fc63bb820313eab0c05830777fbed9ca3eec2995 Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 13:05:53 -0600 Subject: [PATCH 08/28] perf: reuse worker pool across reward partitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add WorkerPool field to PartitionedRewardDistributionInfo - Add rewardDistributionTask struct to carry per-task context - Create pool once on first partition, reuse for all 243 partitions - Release pool when NumRewardPartitionsRemaining == 0 - Eliminates 243× pool create/destroy cycles during rewards Co-Authored-By: Claude Opus 4.5 --- pkg/replay/rewards.go | 10 ++- pkg/rewards/rewards.go | 152 +++++++++++++++++++++++++---------------- 2 files changed, 103 insertions(+), 59 deletions(-) diff --git a/pkg/replay/rewards.go b/pkg/replay/rewards.go index 2d025e77..a84a2af9 100644 --- a/pkg/replay/rewards.go +++ b/pkg/replay/rewards.go @@ -219,9 +219,16 @@ func distributePartitionedEpochRewardsForSlot(acctsDb *accountsdb.AccountsDb, ep partitionIdx := currentBlockHeight - epochRewards.DistributionStartingBlockHeight + // Initialize shared worker pool on first partition (reused across all 243 partitions) + if partitionedEpochRewardsInfo.WorkerPool == nil { + if err := partitionedEpochRewardsInfo.InitWorkerPool(); err != nil { + panic(fmt.Sprintf("unable to initialize reward distribution worker pool: %s", err)) + } + } + // Set flag to prevent stake account cache pollution during one-shot reward reads/writes acctsDb.InRewardsWindow.Store(true) - distributedAccts, parentDistributedAccts, distributedLamports := rewards.DistributeStakingRewardsForPartition(acctsDb, partitionedEpochRewardsInfo.RewardPartitions.Partition(partitionIdx), partitionedEpochRewardsInfo.StakingRewards, currentSlot) + distributedAccts, parentDistributedAccts, distributedLamports := rewards.DistributeStakingRewardsForPartition(acctsDb, partitionedEpochRewardsInfo.RewardPartitions.Partition(partitionIdx), partitionedEpochRewardsInfo.StakingRewards, currentSlot, partitionedEpochRewardsInfo.WorkerPool) parentDistributedAccts = append(parentDistributedAccts, epochRewardsAcct.Clone()) epochRewards.Distribute(distributedLamports) @@ -230,6 +237,7 @@ func distributePartitionedEpochRewardsForSlot(acctsDb *accountsdb.AccountsDb, ep if partitionedEpochRewardsInfo.NumRewardPartitionsRemaining == 0 { epochRewards.Active = false acctsDb.InRewardsWindow.Store(false) + partitionedEpochRewardsInfo.ReleaseWorkerPool() } writer := new(bytes.Buffer) diff --git a/pkg/rewards/rewards.go b/pkg/rewards/rewards.go index b248f10b..8ae03406 100644 --- a/pkg/rewards/rewards.go +++ b/pkg/rewards/rewards.go @@ -36,6 +36,87 @@ type PartitionedRewardDistributionInfo struct { Credits map[solana.PublicKey]CalculatedStakePoints RewardPartitions Partitions StakingRewards map[solana.PublicKey]*CalculatedStakeRewards + WorkerPool *ants.PoolWithFunc +} + +// rewardDistributionTask carries all context needed for processing one stake account. +// Used with the shared worker pool to avoid per-partition pool creation overhead. +type rewardDistributionTask struct { + acctsDb *accountsdb.AccountsDb + slot uint64 + stakingRewards map[solana.PublicKey]*CalculatedStakeRewards + accts []*accounts.Account + parentAccts []*accounts.Account + distributedLamports *atomic.Uint64 + wg *sync.WaitGroup + idx int + pubkey solana.PublicKey +} + +// rewardDistributionWorker is the shared worker function for stake reward distribution. +func rewardDistributionWorker(i interface{}) { + task := i.(*rewardDistributionTask) + defer task.wg.Done() + + reward, ok := task.stakingRewards[task.pubkey] + if !ok { + return + } + + stakeAcct, err := task.acctsDb.GetAccount(task.slot, task.pubkey) + if err != nil { + panic(fmt.Sprintf("unable to get acct %s from acctsdb for partitioned epoch rewards distribution in slot %d", task.pubkey, task.slot)) + } + task.parentAccts[task.idx] = stakeAcct.Clone() + + stakeState, err := sealevel.UnmarshalStakeState(stakeAcct.Data) + if err != nil { + return + } + + stakeState.Stake.Stake.CreditsObserved = reward.NewCreditsObserved + stakeState.Stake.Stake.Delegation.StakeLamports = safemath.SaturatingAddU64(stakeState.Stake.Stake.Delegation.StakeLamports, uint64(reward.StakerRewards)) + + err = sealevel.MarshalStakeStakeInto(stakeState, stakeAcct.Data) + if err != nil { + panic(fmt.Sprintf("unable to serialize new stake account state in distributing partitioned rewards: %s", err)) + } + + stakeAcct.Lamports, err = safemath.CheckedAddU64(stakeAcct.Lamports, uint64(reward.StakerRewards)) + if err != nil { + panic(fmt.Sprintf("overflow in partitioned epoch rewards distribution in slot %d to acct %s: %s", task.slot, task.pubkey, err)) + } + + task.accts[task.idx] = stakeAcct + task.distributedLamports.Add(reward.StakerRewards) + + // update the stake cache + delegationToCache := stakeState.Stake.Stake.Delegation + delegationToCache.CreditsObserved = stakeState.Stake.Stake.CreditsObserved + global.PutStakeCacheItem(task.pubkey, &delegationToCache) +} + +// InitWorkerPool creates the shared worker pool for reward distribution. +// Call once at the start of partitioned rewards, before processing any partition. +func (info *PartitionedRewardDistributionInfo) InitWorkerPool() error { + if info.WorkerPool != nil { + return nil + } + size := runtime.GOMAXPROCS(0) * 8 + pool, err := ants.NewPoolWithFunc(size, rewardDistributionWorker) + if err != nil { + return err + } + info.WorkerPool = pool + return nil +} + +// ReleaseWorkerPool releases the shared pool. Call when NumRewardPartitionsRemaining == 0. +func (info *PartitionedRewardDistributionInfo) ReleaseWorkerPool() { + if info.WorkerPool != nil { + info.WorkerPool.Release() + info.WorkerPool = nil + } } type CalculatedStakePoints struct { @@ -172,75 +253,30 @@ func DistributeVotingRewards(acctsDb *accountsdb.AccountsDb, validatorRewards ma return updatedAccts, parentUpdatedAccts, totalVotingRewards.Load() } -type idxAndPubkey struct { - idx int - pubkey solana.PublicKey -} - -func DistributeStakingRewardsForPartition(acctsDb *accountsdb.AccountsDb, partition *Partition, stakingRewards map[solana.PublicKey]*CalculatedStakeRewards, slot uint64) ([]*accounts.Account, []*accounts.Account, uint64) { +func DistributeStakingRewardsForPartition(acctsDb *accountsdb.AccountsDb, partition *Partition, stakingRewards map[solana.PublicKey]*CalculatedStakeRewards, slot uint64, workerPool *ants.PoolWithFunc) ([]*accounts.Account, []*accounts.Account, uint64) { var distributedLamports atomic.Uint64 accts := make([]*accounts.Account, partition.NumPubkeys()) parentAccts := make([]*accounts.Account, partition.NumPubkeys()) var wg sync.WaitGroup - size := runtime.GOMAXPROCS(0) * 8 - workerPool, _ := ants.NewPoolWithFunc(size, func(i interface{}) { - defer wg.Done() - - ip := i.(idxAndPubkey) - idx := ip.idx - stakePk := ip.pubkey - - reward, ok := stakingRewards[stakePk] - if !ok { - return - } - - stakeAcct, err := acctsDb.GetAccount(slot, stakePk) - if err != nil { - panic(fmt.Sprintf("unable to get acct %s from acctsdb for partitioned epoch rewards distribution in slot %d", stakePk, slot)) - } - parentAccts[idx] = stakeAcct.Clone() - - // update the delegation in the stake account state - stakeState, err := sealevel.UnmarshalStakeState(stakeAcct.Data) - if err != nil { - return - } - - stakeState.Stake.Stake.CreditsObserved = reward.NewCreditsObserved - stakeState.Stake.Stake.Delegation.StakeLamports = safemath.SaturatingAddU64(stakeState.Stake.Stake.Delegation.StakeLamports, uint64(reward.StakerRewards)) - - err = sealevel.MarshalStakeStakeInto(stakeState, stakeAcct.Data) - if err != nil { - panic(fmt.Sprintf("unable to serialize new stake account state in distributing partitioned rewards: %s", err)) - } - - // update lamports in stake account - stakeAcct.Lamports, err = safemath.CheckedAddU64(stakeAcct.Lamports, uint64(reward.StakerRewards)) - if err != nil { - panic(fmt.Sprintf("overflow in partitioned epoch rewards distribution in slot %d to acct %s: %s", slot, stakePk, err)) - } - - accts[idx] = stakeAcct - distributedLamports.Add(reward.StakerRewards) - - // update the stake cache - delegationToCache := stakeState.Stake.Stake.Delegation - delegationToCache.CreditsObserved = stakeState.Stake.Stake.CreditsObserved - global.PutStakeCacheItem(stakePk, &delegationToCache) - }) - for idx, stakePk := range partition.Pubkeys() { - ip := idxAndPubkey{idx: idx, pubkey: stakePk} + task := &rewardDistributionTask{ + acctsDb: acctsDb, + slot: slot, + stakingRewards: stakingRewards, + accts: accts, + parentAccts: parentAccts, + distributedLamports: &distributedLamports, + wg: &wg, + idx: idx, + pubkey: stakePk, + } wg.Add(1) - workerPool.Invoke(ip) + workerPool.Invoke(task) } wg.Wait() - workerPool.Release() - err := acctsDb.StoreAccounts(accts, slot) if err != nil { panic(fmt.Sprintf("error updating accounts for partitioned epoch rewards in slot %d: %s", slot, err)) From 4c2dea667c00e9b1c75776cb9a09a6520f39ee68 Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 13:37:35 -0600 Subject: [PATCH 09/28] perf: always exclude stake accounts from CommonAcctsCache Simplifies cache logic by always skipping stake accounts, not just during rewards. Stake accounts are rarely accessed outside the ~243 slot reward window, and caching them would evict genuinely hot accounts. - Remove InRewardsWindow flag (no longer needed) - Add explanatory comments on cache strategy Co-Authored-By: Claude Opus 4.5 --- pkg/accountsdb/accountsdb.go | 26 +++++++++++--------------- pkg/replay/rewards.go | 3 --- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index 66f3eef2..3b4cf8af 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -24,15 +24,12 @@ type AccountsDb struct { BankHashStore *pebble.DB AcctsDir string LargestFileId atomic.Uint64 - VoteAcctCache otter.Cache[solana.PublicKey, *accounts.Account] - CommonAcctsCache otter.Cache[solana.PublicKey, *accounts.Account] + VoteAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Vote accounts cached separately (frequently accessed) + CommonAcctsCache otter.Cache[solana.PublicKey, *accounts.Account] // General accounts (excludes vote & stake) ProgramCache otter.Cache[solana.PublicKey, *ProgramCacheEntry] - - // InRewardsWindow is set during partitioned epoch rewards distribution. - // When true, stake accounts are not cached in CommonAcctsCache since they're - // one-shot reads/writes that would evict genuinely hot accounts. - // Atomic for safe concurrent access from RPC goroutines. - InRewardsWindow atomic.Bool + // Note: Stake accounts are intentionally NOT cached. They're rarely accessed outside + // epoch rewards, and during the ~243 slot reward window, ~1.25M stake accounts would + // completely thrash any reasonably-sized cache, evicting genuinely hot accounts. } // silentLogger implements pebble.Logger but discards all messages. @@ -205,9 +202,9 @@ func (accountsDb *AccountsDb) GetAccount(slot uint64, pubkey solana.PublicKey) ( owner := solana.PublicKeyFromBytes(acct.Owner[:]) if owner == addresses.VoteProgramAddr { accountsDb.VoteAcctCache.Set(pubkey, acct) - } else if owner == addresses.StakeProgramAddr && accountsDb.InRewardsWindow.Load() { - // During reward distribution, stake accounts are one-shot reads that would - // evict genuinely hot accounts from the cache. Skip caching them. + } else if owner == addresses.StakeProgramAddr { + // Stake accounts are not cached - they're rarely accessed outside rewards, + // and during rewards they're one-shot reads that would evict hot accounts. } else { accountsDb.CommonAcctsCache.Set(pubkey, acct) } @@ -225,7 +222,6 @@ func (accountsDb *AccountsDb) StoreAccounts(accts []*accounts.Account, slot uint accountsDb.storeAccountsInternal(accts, slot) - inRewardsWindow := accountsDb.InRewardsWindow.Load() for _, acct := range accts { if acct == nil { continue @@ -233,9 +229,9 @@ func (accountsDb *AccountsDb) StoreAccounts(accts []*accounts.Account, slot uint owner := solana.PublicKeyFromBytes(acct.Owner[:]) if owner == addresses.VoteProgramAddr { accountsDb.VoteAcctCache.Set(acct.Key, acct) - } else if owner == addresses.StakeProgramAddr && inRewardsWindow { - // During reward distribution, stake accounts are one-shot writes that would - // evict genuinely hot accounts from the cache. Skip caching them. + } else if owner == addresses.StakeProgramAddr { + // Stake accounts are not cached - they're rarely accessed outside rewards, + // and during rewards they're one-shot writes that would evict hot accounts. } else { accountsDb.CommonAcctsCache.Set(acct.Key, acct) } diff --git a/pkg/replay/rewards.go b/pkg/replay/rewards.go index a84a2af9..60aebffb 100644 --- a/pkg/replay/rewards.go +++ b/pkg/replay/rewards.go @@ -226,8 +226,6 @@ func distributePartitionedEpochRewardsForSlot(acctsDb *accountsdb.AccountsDb, ep } } - // Set flag to prevent stake account cache pollution during one-shot reward reads/writes - acctsDb.InRewardsWindow.Store(true) distributedAccts, parentDistributedAccts, distributedLamports := rewards.DistributeStakingRewardsForPartition(acctsDb, partitionedEpochRewardsInfo.RewardPartitions.Partition(partitionIdx), partitionedEpochRewardsInfo.StakingRewards, currentSlot, partitionedEpochRewardsInfo.WorkerPool) parentDistributedAccts = append(parentDistributedAccts, epochRewardsAcct.Clone()) @@ -236,7 +234,6 @@ func distributePartitionedEpochRewardsForSlot(acctsDb *accountsdb.AccountsDb, ep if partitionedEpochRewardsInfo.NumRewardPartitionsRemaining == 0 { epochRewards.Active = false - acctsDb.InRewardsWindow.Store(false) partitionedEpochRewardsInfo.ReleaseWorkerPool() } From 9f3c4d7c46d1feed6e884649ddb5af1f9d757bdd Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 13:47:39 -0600 Subject: [PATCH 10/28] Add separate 2k-entry StakeAcctCache for stake accounts Stake accounts now have their own dedicated small LRU cache (2k entries) instead of competing with CommonAcctsCache. This prevents stake accounts from evicting genuinely hot non-stake accounts while still providing some caching benefit for stake accounts accessed multiple times outside the rewards period. Co-Authored-By: Claude Opus 4.5 --- pkg/accountsdb/accountsdb.go | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index 3b4cf8af..f4a83df6 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -26,10 +26,13 @@ type AccountsDb struct { LargestFileId atomic.Uint64 VoteAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Vote accounts cached separately (frequently accessed) CommonAcctsCache otter.Cache[solana.PublicKey, *accounts.Account] // General accounts (excludes vote & stake) + StakeAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Stake accounts cached separately (small 2k cache) ProgramCache otter.Cache[solana.PublicKey, *ProgramCacheEntry] - // Note: Stake accounts are intentionally NOT cached. They're rarely accessed outside - // epoch rewards, and during the ~243 slot reward window, ~1.25M stake accounts would - // completely thrash any reasonably-sized cache, evicting genuinely hot accounts. + // Note: Stake accounts have their own small cache (2k entries) separate from CommonAcctsCache. + // During the ~243 slot reward window, ~1.25M stake accounts are touched exactly once each, + // so caching provides no benefit there. But outside rewards, some stake accounts may be + // accessed multiple times, and having a separate cache prevents them from evicting hot + // non-stake accounts from CommonAcctsCache. } // silentLogger implements pebble.Logger but discards all messages. @@ -129,6 +132,15 @@ func (accountsDb *AccountsDb) InitCaches() { if err != nil { panic(err) } + + accountsDb.StakeAcctCache, err = otter.MustBuilder[solana.PublicKey, *accounts.Account](2000). + Cost(func(key solana.PublicKey, acct *accounts.Account) uint32 { + return 1 + }). + Build() + if err != nil { + panic(err) + } } type ProgramCacheEntry struct { @@ -154,6 +166,11 @@ func (accountsDb *AccountsDb) GetAccount(slot uint64, pubkey solana.PublicKey) ( return cachedAcct, nil } + cachedAcct, hasAcct = accountsDb.StakeAcctCache.Get(pubkey) + if hasAcct { + return cachedAcct, nil + } + cachedAcct, hasAcct = accountsDb.CommonAcctsCache.Get(pubkey) if hasAcct { return cachedAcct, nil @@ -203,8 +220,8 @@ func (accountsDb *AccountsDb) GetAccount(slot uint64, pubkey solana.PublicKey) ( if owner == addresses.VoteProgramAddr { accountsDb.VoteAcctCache.Set(pubkey, acct) } else if owner == addresses.StakeProgramAddr { - // Stake accounts are not cached - they're rarely accessed outside rewards, - // and during rewards they're one-shot reads that would evict hot accounts. + // Stake accounts have their own small cache to prevent evicting hot non-stake accounts + accountsDb.StakeAcctCache.Set(pubkey, acct) } else { accountsDb.CommonAcctsCache.Set(pubkey, acct) } @@ -230,8 +247,8 @@ func (accountsDb *AccountsDb) StoreAccounts(accts []*accounts.Account, slot uint if owner == addresses.VoteProgramAddr { accountsDb.VoteAcctCache.Set(acct.Key, acct) } else if owner == addresses.StakeProgramAddr { - // Stake accounts are not cached - they're rarely accessed outside rewards, - // and during rewards they're one-shot writes that would evict hot accounts. + // Stake accounts have their own small cache to prevent evicting hot non-stake accounts + accountsDb.StakeAcctCache.Set(acct.Key, acct) } else { accountsDb.CommonAcctsCache.Set(acct.Key, acct) } From 3e04c1a48a370135fda27e89895dd2b695959496 Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 14:12:24 -0600 Subject: [PATCH 11/28] Make AccountsDB LRU cache sizes configurable via config.toml Added [tuning.cache] section with: - vote_acct_lru: Vote account data cache (default 5000) - stake_acct_lru: Stake account data cache (default 2000) - common_acct_lru: General account cache (default 10000) - program_lru: Compiled BPF program cache (default 5000) Config comments clarify these are different from the global vote/stake caches used for leader schedule (which are unbounded maps storing vote state and delegations, not full account data). Co-Authored-By: Claude Opus 4.5 --- cmd/mithril/node/node.go | 14 ++++++++++++-- config.example.toml | 26 ++++++++++++++++++++++++++ pkg/accountsdb/accountsdb.go | 29 ++++++++++++++++++++++++----- pkg/config/config.go | 12 ++++++++++++ 4 files changed, 74 insertions(+), 7 deletions(-) diff --git a/cmd/mithril/node/node.go b/cmd/mithril/node/node.go index 0ad87901..1c2affd3 100644 --- a/cmd/mithril/node/node.go +++ b/cmd/mithril/node/node.go @@ -855,7 +855,12 @@ func runVerifyRange(c *cobra.Command, args []string) { klog.Fatalf("end slot cannot be lower than start slot") } mlog.Log.Infof("will replay startSlot=%d endSlot=%d", startSlot, endSlot) - accountsDb.InitCaches() + accountsDb.InitCaches( + config.GetInt("tuning.cache.vote_acct_lru"), + config.GetInt("tuning.cache.stake_acct_lru"), + config.GetInt("tuning.cache.common_acct_lru"), + config.GetInt("tuning.cache.program_lru"), + ) metricsWriter, metricsWriterCleanup, err := createBufWriter(metricsPath) if err != nil { @@ -1610,7 +1615,12 @@ postBootstrap: } liveEndSlot := uint64(math.MaxUint64) - accountsDb.InitCaches() + accountsDb.InitCaches( + config.GetInt("tuning.cache.vote_acct_lru"), + config.GetInt("tuning.cache.stake_acct_lru"), + config.GetInt("tuning.cache.common_acct_lru"), + config.GetInt("tuning.cache.program_lru"), + ) metricsWriter, metricsWriterCleanup, err := createBufWriter(metricsPath) if err != nil { diff --git a/config.example.toml b/config.example.toml index 488cf479..4d31bfa2 100644 --- a/config.example.toml +++ b/config.example.toml @@ -263,6 +263,32 @@ name = "mithril" # Filename to write CPU profile (for offline analysis) # cpu_profile_path = "/tmp/cpuprof.pprof" + # [tuning.cache] - AccountsDB LRU Cache Sizes + # + # These control the LRU caches for fast account data reads during replay. + # Values are NUMBER OF ENTRIES, not bytes. + # + # NOTE: These are DIFFERENT from the global vote/stake caches used for + # leader schedule building. Those are unbounded maps that store vote STATE + # (voting history, credits) and stake DELEGATIONS. These LRU caches store + # full ACCOUNT data for frequently-accessed accounts. + # + # Larger caches = fewer disk reads, but more memory usage. + # Memory per entry is ~200-1000 bytes depending on account data size. + [tuning.cache] + # Vote account data cache - number of entries (frequently accessed during replay) + vote_acct_lru = 5000 + + # Stake account data cache - number of entries (separated to avoid evicting + # hot accounts during epoch rewards when ~1.25M stake accounts are touched once each) + stake_acct_lru = 2000 + + # General account data cache - number of entries (everything except vote & stake) + common_acct_lru = 10000 + + # Compiled BPF program cache - number of entries + program_lru = 5000 + # ============================================================================ # [debug] - Debug Logging # ============================================================================ diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index f4a83df6..1337f436 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -104,9 +104,25 @@ func (accountsDb *AccountsDb) CloseDb() { mlog.Log.Infof("CloseDb: done\n") // extra newline for spacing after close } -func (accountsDb *AccountsDb) InitCaches() { +// InitCaches initializes the LRU caches with the given sizes. +// Pass 0 for any size to use a reasonable builtin value. +func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, commonSize, programSize int) { + // Apply builtin values when config not set + if voteSize <= 0 { + voteSize = 5000 + } + if stakeSize <= 0 { + stakeSize = 2000 + } + if commonSize <= 0 { + commonSize = 10000 + } + if programSize <= 0 { + programSize = 5000 + } + var err error - accountsDb.VoteAcctCache, err = otter.MustBuilder[solana.PublicKey, *accounts.Account](5000). + accountsDb.VoteAcctCache, err = otter.MustBuilder[solana.PublicKey, *accounts.Account](voteSize). Cost(func(key solana.PublicKey, acct *accounts.Account) uint32 { return 1 }). @@ -115,7 +131,7 @@ func (accountsDb *AccountsDb) InitCaches() { panic(err) } - accountsDb.ProgramCache, err = otter.MustBuilder[solana.PublicKey, *ProgramCacheEntry](5000). + accountsDb.ProgramCache, err = otter.MustBuilder[solana.PublicKey, *ProgramCacheEntry](programSize). Cost(func(key solana.PublicKey, progEntry *ProgramCacheEntry) uint32 { return 1 }). @@ -124,7 +140,7 @@ func (accountsDb *AccountsDb) InitCaches() { panic(err) } - accountsDb.CommonAcctsCache, err = otter.MustBuilder[solana.PublicKey, *accounts.Account](10000). + accountsDb.CommonAcctsCache, err = otter.MustBuilder[solana.PublicKey, *accounts.Account](commonSize). Cost(func(key solana.PublicKey, acct *accounts.Account) uint32 { return 1 }). @@ -133,7 +149,7 @@ func (accountsDb *AccountsDb) InitCaches() { panic(err) } - accountsDb.StakeAcctCache, err = otter.MustBuilder[solana.PublicKey, *accounts.Account](2000). + accountsDb.StakeAcctCache, err = otter.MustBuilder[solana.PublicKey, *accounts.Account](stakeSize). Cost(func(key solana.PublicKey, acct *accounts.Account) uint32 { return 1 }). @@ -141,6 +157,9 @@ func (accountsDb *AccountsDb) InitCaches() { if err != nil { panic(err) } + + mlog.Log.Infof("AccountsDB caches initialized: vote=%d stake=%d common=%d program=%d", + voteSize, stakeSize, commonSize, programSize) } type ProgramCacheEntry struct { diff --git a/pkg/config/config.go b/pkg/config/config.go index ef953e00..3424ae18 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -43,6 +43,17 @@ type DebugConfig struct { AccountWrites []string `toml:"account_writes" mapstructure:"account_writes"` // was: debugacctwrites } +// CacheConfig holds LRU cache sizing for AccountsDB +// These are DIFFERENT from the global vote/stake caches used for leader schedule - +// those are unbounded maps holding vote state and delegation info. +// These LRU caches store full account data for fast reads during replay. +type CacheConfig struct { + VoteAcctLRU int `toml:"vote_acct_lru" mapstructure:"vote_acct_lru"` // Vote account data (default: 5000) + StakeAcctLRU int `toml:"stake_acct_lru" mapstructure:"stake_acct_lru"` // Stake account data (default: 2000) + CommonAcctLRU int `toml:"common_acct_lru" mapstructure:"common_acct_lru"` // All other accounts (default: 10000) + ProgramLRU int `toml:"program_lru" mapstructure:"program_lru"` // Compiled BPF programs (default: 5000) +} + // DevelopmentConfig holds development/tuning configuration (matches Firedancer [development] section) type DevelopmentConfig struct { ZstdDecoderConcurrency int `toml:"zstd_decoder_concurrency" mapstructure:"zstd_decoder_concurrency"` // was: zstd-decoder-concurrency @@ -52,6 +63,7 @@ type DevelopmentConfig struct { UsePool bool `toml:"use_pool" mapstructure:"use_pool"` // was: use-pool Pprof PprofConfig `toml:"pprof" mapstructure:"pprof"` Debug DebugConfig `toml:"debug" mapstructure:"debug"` + Cache CacheConfig `toml:"cache" mapstructure:"cache"` } // ReportingConfig holds metrics/reporting configuration (matches Firedancer [reporting] section) From fa1a5ca07e9522c6f9e1889428cc739f185f348e Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 14:54:06 -0600 Subject: [PATCH 12/28] Improve snapshot log message consistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - "Using snapshot file:" → "Using full snapshot:" - "Parsing manifest from {path}" → "Parsing full/incremental snapshot manifest..." - Remove redundant path repetition after initial "Using" lines Co-Authored-By: Claude Opus 4.5 --- cmd/mithril/node/node.go | 2 +- pkg/snapshot/build_db.go | 8 ++++---- pkg/snapshot/build_db_with_incr.go | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cmd/mithril/node/node.go b/cmd/mithril/node/node.go index 1c2affd3..fce20fbe 100644 --- a/cmd/mithril/node/node.go +++ b/cmd/mithril/node/node.go @@ -1165,7 +1165,7 @@ func runLive(c *cobra.Command, args []string) { // Handle explicit --snapshot flag (bypasses all auto-discovery, does NOT delete snapshot files) if snapshotArchivePath != "" { - mlog.Log.Infof("Using snapshot file: %s", snapshotArchivePath) + mlog.Log.Infof("Using full snapshot: %s", snapshotArchivePath) // Parse full snapshot slot from filename for validation fullSnapshotSlot := parseSlotFromSnapshotName(filepath.Base(snapshotArchivePath)) diff --git a/pkg/snapshot/build_db.go b/pkg/snapshot/build_db.go index 31054b98..bc5e6f45 100644 --- a/pkg/snapshot/build_db.go +++ b/pkg/snapshot/build_db.go @@ -164,21 +164,21 @@ func BuildAccountsDbPaths( // Clean any leftover artifacts from previous incomplete runs (e.g., Ctrl+C) CleanAccountsDbDir(accountsDbDir) - mlog.Log.Infof("Parsing manifest from %s", snapshotFile) + mlog.Log.Infof("Parsing full snapshot manifest...") manifest, err := UnmarshalManifestFromSnapshot(ctx, snapshotFile, accountsDbDir) if err != nil { return nil, nil, fmt.Errorf("reading snapshot manifest: %v", err) } - mlog.Log.Infof("Parsed manifest from full snapshot") + mlog.Log.Infof("Parsed full snapshot manifest") var incrementalManifest *SnapshotManifest if incrementalSnapshotFile != "" { - mlog.Log.Infof("Parsing manifest from %s", incrementalSnapshotFile) + mlog.Log.Infof("Parsing incremental snapshot manifest...") incrementalManifest, err = UnmarshalManifestFromSnapshot(ctx, incrementalSnapshotFile, accountsDbDir) if err != nil { return nil, nil, fmt.Errorf("reading incremental snapshot manifest: %v", err) } - mlog.Log.Infof("Parsed manifest from incremental snapshot") + mlog.Log.Infof("Parsed incremental snapshot manifest") } start := time.Now() diff --git a/pkg/snapshot/build_db_with_incr.go b/pkg/snapshot/build_db_with_incr.go index f60a9bbd..c942685f 100644 --- a/pkg/snapshot/build_db_with_incr.go +++ b/pkg/snapshot/build_db_with_incr.go @@ -50,12 +50,12 @@ func BuildAccountsDbAuto( // Clean any leftover artifacts from previous incomplete runs (e.g., Ctrl+C) CleanAccountsDbDir(accountsDbDir) - mlog.Log.Infof("Parsing manifest from %s", fullSnapshotFile) + mlog.Log.Infof("Parsing full snapshot manifest...") manifest, err := UnmarshalManifestFromSnapshot(ctx, fullSnapshotFile, accountsDbDir) if err != nil { return nil, nil, fmt.Errorf("reading snapshot manifest: %v", err) } - mlog.Log.Infof("Parsed manifest from full snapshot") + mlog.Log.Infof("Parsed full snapshot manifest") start := time.Now() @@ -171,7 +171,7 @@ func BuildAccountsDbAuto( } incrSnapshotStart := time.Now() - mlog.Log.Infof("Parsing manifest from %s", incrementalSnapshotPath) + mlog.Log.Infof("Parsing incremental snapshot manifest...") incrementalManifestCopy, err := UnmarshalManifestFromSnapshot(ctx, incrementalSnapshotPath, accountsDbDir) if err != nil { mlog.Log.Errorf("reading incremental snapshot manifest: %v", err) @@ -179,7 +179,7 @@ func BuildAccountsDbAuto( } // Copy the manifest so the worker pool's pointer has the value. *incrementalManifest = *incrementalManifestCopy - mlog.Log.Infof("Parsed manifest from incremental snapshot") + mlog.Log.Infof("Parsed incremental snapshot manifest") // Determine save path for incremental snapshot if streaming from HTTP var incrSavePath string From e227175ac9ad7cca002ee3c675adcb7801bb926e Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 15:38:54 -0600 Subject: [PATCH 13/28] fix: prevent stale cache entries when account owner changes Add cacheAccount() helper that evicts from ALL caches before inserting into the correct one based on owner. This prevents stale data when an account changes owner (e.g., stake account closed becomes system-owned). Previously, GetAccount would find stale entries in the old cache since it checks specialized caches first. Co-Authored-By: Claude Opus 4.5 --- pkg/accountsdb/accountsdb.go | 38 +++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index 1337f436..81c26a61 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -179,6 +179,24 @@ func (accountsDb *AccountsDb) RemoveProgramFromCache(pubkey solana.PublicKey) { accountsDb.ProgramCache.Delete(pubkey) } +// cacheAccount evicts stale entries from all caches, then inserts into the correct +// cache based on owner. This prevents stale data when an account changes owner +// (e.g., stake account closed becomes system-owned). +func (accountsDb *AccountsDb) cacheAccount(acct *accounts.Account) { + accountsDb.VoteAcctCache.Delete(acct.Key) + accountsDb.StakeAcctCache.Delete(acct.Key) + accountsDb.CommonAcctsCache.Delete(acct.Key) + + owner := solana.PublicKeyFromBytes(acct.Owner[:]) + if owner == addresses.VoteProgramAddr { + accountsDb.VoteAcctCache.Set(acct.Key, acct) + } else if owner == addresses.StakeProgramAddr { + accountsDb.StakeAcctCache.Set(acct.Key, acct) + } else { + accountsDb.CommonAcctsCache.Set(acct.Key, acct) + } +} + func (accountsDb *AccountsDb) GetAccount(slot uint64, pubkey solana.PublicKey) (*accounts.Account, error) { cachedAcct, hasAcct := accountsDb.VoteAcctCache.Get(pubkey) if hasAcct { @@ -235,15 +253,7 @@ func (accountsDb *AccountsDb) GetAccount(slot uint64, pubkey solana.PublicKey) ( acct.Slot = acctIdxEntry.Slot - owner := solana.PublicKeyFromBytes(acct.Owner[:]) - if owner == addresses.VoteProgramAddr { - accountsDb.VoteAcctCache.Set(pubkey, acct) - } else if owner == addresses.StakeProgramAddr { - // Stake accounts have their own small cache to prevent evicting hot non-stake accounts - accountsDb.StakeAcctCache.Set(pubkey, acct) - } else { - accountsDb.CommonAcctsCache.Set(pubkey, acct) - } + accountsDb.cacheAccount(acct) return acct, err } @@ -262,15 +272,7 @@ func (accountsDb *AccountsDb) StoreAccounts(accts []*accounts.Account, slot uint if acct == nil { continue } - owner := solana.PublicKeyFromBytes(acct.Owner[:]) - if owner == addresses.VoteProgramAddr { - accountsDb.VoteAcctCache.Set(acct.Key, acct) - } else if owner == addresses.StakeProgramAddr { - // Stake accounts have their own small cache to prevent evicting hot non-stake accounts - accountsDb.StakeAcctCache.Set(acct.Key, acct) - } else { - accountsDb.CommonAcctsCache.Set(acct.Key, acct) - } + accountsDb.cacheAccount(acct) } return nil From c7691eed8a5948e3c8215b7a2184db13a205f992 Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 15:55:36 -0600 Subject: [PATCH 14/28] perf: skip caching new stake accounts during reward distribution Add InRewardsWindow flag to AccountsDb. When true, stake accounts are only updated in cache if already present - new entries are not added. During the ~243 slot reward window, ~1.25M stake accounts are touched exactly once each. Without this optimization, every access would evict an older entry (cache thrash with 0% hit rate). With this change: - Hot stake accounts (already cached) get updated with new reward data - Cold stake accounts (1.25M - 2k) stay out of cache - Zero thrashing, zero evictions of hot entries Co-Authored-By: Claude Opus 4.5 --- pkg/accountsdb/accountsdb.go | 34 ++++++++++++++++++++++++---------- pkg/replay/rewards.go | 4 ++++ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index 81c26a61..4f8fb73b 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -28,11 +28,11 @@ type AccountsDb struct { CommonAcctsCache otter.Cache[solana.PublicKey, *accounts.Account] // General accounts (excludes vote & stake) StakeAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Stake accounts cached separately (small 2k cache) ProgramCache otter.Cache[solana.PublicKey, *ProgramCacheEntry] + InRewardsWindow bool // When true, only update existing stake cache entries (don't add new ones) // Note: Stake accounts have their own small cache (2k entries) separate from CommonAcctsCache. - // During the ~243 slot reward window, ~1.25M stake accounts are touched exactly once each, - // so caching provides no benefit there. But outside rewards, some stake accounts may be - // accessed multiple times, and having a separate cache prevents them from evicting hot - // non-stake accounts from CommonAcctsCache. + // During the ~243 slot reward window, ~1.25M stake accounts are touched exactly once each. + // When InRewardsWindow is true, we only update existing cache entries - we don't add new ones. + // This prevents cache thrash while preserving hot stake accounts. } // silentLogger implements pebble.Logger but discards all messages. @@ -179,20 +179,34 @@ func (accountsDb *AccountsDb) RemoveProgramFromCache(pubkey solana.PublicKey) { accountsDb.ProgramCache.Delete(pubkey) } -// cacheAccount evicts stale entries from all caches, then inserts into the correct +// cacheAccount evicts stale entries from other caches, then inserts into the correct // cache based on owner. This prevents stale data when an account changes owner // (e.g., stake account closed becomes system-owned). +// +// During rewards window (InRewardsWindow=true), stake accounts are only updated if +// already cached - new entries are not added. This prevents cache thrash from the +// ~1.25M one-shot stake account accesses while preserving hot entries. func (accountsDb *AccountsDb) cacheAccount(acct *accounts.Account) { - accountsDb.VoteAcctCache.Delete(acct.Key) - accountsDb.StakeAcctCache.Delete(acct.Key) - accountsDb.CommonAcctsCache.Delete(acct.Key) - owner := solana.PublicKeyFromBytes(acct.Owner[:]) + if owner == addresses.VoteProgramAddr { + accountsDb.StakeAcctCache.Delete(acct.Key) + accountsDb.CommonAcctsCache.Delete(acct.Key) accountsDb.VoteAcctCache.Set(acct.Key, acct) } else if owner == addresses.StakeProgramAddr { - accountsDb.StakeAcctCache.Set(acct.Key, acct) + accountsDb.VoteAcctCache.Delete(acct.Key) + accountsDb.CommonAcctsCache.Delete(acct.Key) + // During rewards: only update existing entries, don't add new ones + if accountsDb.InRewardsWindow { + if _, exists := accountsDb.StakeAcctCache.Get(acct.Key); exists { + accountsDb.StakeAcctCache.Set(acct.Key, acct) + } + } else { + accountsDb.StakeAcctCache.Set(acct.Key, acct) + } } else { + accountsDb.VoteAcctCache.Delete(acct.Key) + accountsDb.StakeAcctCache.Delete(acct.Key) accountsDb.CommonAcctsCache.Set(acct.Key, acct) } } diff --git a/pkg/replay/rewards.go b/pkg/replay/rewards.go index 60aebffb..f76a73cf 100644 --- a/pkg/replay/rewards.go +++ b/pkg/replay/rewards.go @@ -226,7 +226,11 @@ func distributePartitionedEpochRewardsForSlot(acctsDb *accountsdb.AccountsDb, ep } } + // During reward distribution, don't add new stake accounts to cache (prevents thrash). + // Only existing hot entries are updated. + acctsDb.InRewardsWindow = true distributedAccts, parentDistributedAccts, distributedLamports := rewards.DistributeStakingRewardsForPartition(acctsDb, partitionedEpochRewardsInfo.RewardPartitions.Partition(partitionIdx), partitionedEpochRewardsInfo.StakingRewards, currentSlot, partitionedEpochRewardsInfo.WorkerPool) + acctsDb.InRewardsWindow = false parentDistributedAccts = append(parentDistributedAccts, epochRewardsAcct.Clone()) epochRewards.Distribute(distributedLamports) From 4f55841e42c48a9da3ce433df89d1d84e3eb4b0e Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 16:22:31 -0600 Subject: [PATCH 15/28] perf: add cache hit/miss profiling to 100-slot summary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track cache hits and misses by type (common/stake/vote) and size bucket (small ≤256B, medium 257-4096B, large >4096B) to help identify caching opportunities. Output format: cache: 95.2% hit rate | hits: common 12500, stake 800, vote 4200 | misses: common 523 (s:412 m:89 l:22), stake 150 (s:150 m:0 l:0), vote 12 Atomic counters add negligible overhead (~3-10ns per increment). Co-Authored-By: Claude Opus 4.5 --- pkg/accountsdb/accountsdb.go | 80 ++++++++++++++++++++++++++++++++++++ pkg/replay/block.go | 18 +++++++- 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index 4f8fb73b..e1e35c15 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -46,6 +46,80 @@ var ( ErrNoAccount = errors.New("ErrNoAccount") ) +// Cache hit/miss counters for profiling +// Miss counters are by size bucket: Small ≤256 bytes, Medium 257-4096 bytes, Large >4096 bytes +var ( + // Cache hits (total per cache type) + CommonCacheHits atomic.Uint64 + StakeCacheHits atomic.Uint64 + VoteCacheHits atomic.Uint64 + + // Common cache misses (non-stake, non-vote accounts) + CommonCacheMissSmall atomic.Uint64 + CommonCacheMissMedium atomic.Uint64 + CommonCacheMissLarge atomic.Uint64 + + // Stake cache misses + StakeCacheMissSmall atomic.Uint64 + StakeCacheMissMedium atomic.Uint64 + StakeCacheMissLarge atomic.Uint64 + + // Vote cache misses (for completeness) + VoteCacheMissSmall atomic.Uint64 + VoteCacheMissMedium atomic.Uint64 + VoteCacheMissLarge atomic.Uint64 +) + +// CacheStats holds cache hit/miss counts for reporting +type CacheStats struct { + // Hits per cache type + CommonHits, StakeHits, VoteHits uint64 + // Misses by size bucket + CommonMissSmall, CommonMissMedium, CommonMissLarge uint64 + StakeMissSmall, StakeMissMedium, StakeMissLarge uint64 + VoteMissSmall, VoteMissMedium, VoteMissLarge uint64 +} + +// GetAndResetCacheStats returns current cache hit/miss counts and resets them +func GetAndResetCacheStats() CacheStats { + return CacheStats{ + CommonHits: CommonCacheHits.Swap(0), + StakeHits: StakeCacheHits.Swap(0), + VoteHits: VoteCacheHits.Swap(0), + CommonMissSmall: CommonCacheMissSmall.Swap(0), + CommonMissMedium: CommonCacheMissMedium.Swap(0), + CommonMissLarge: CommonCacheMissLarge.Swap(0), + StakeMissSmall: StakeCacheMissSmall.Swap(0), + StakeMissMedium: StakeCacheMissMedium.Swap(0), + StakeMissLarge: StakeCacheMissLarge.Swap(0), + VoteMissSmall: VoteCacheMissSmall.Swap(0), + VoteMissMedium: VoteCacheMissMedium.Swap(0), + VoteMissLarge: VoteCacheMissLarge.Swap(0), + } +} + +// recordCacheMiss increments the appropriate cache miss counter based on owner and size +func recordCacheMiss(owner solana.PublicKey, dataLen uint64) { + // Classify by size bucket + var small, medium, large *atomic.Uint64 + + if owner == addresses.VoteProgramAddr { + small, medium, large = &VoteCacheMissSmall, &VoteCacheMissMedium, &VoteCacheMissLarge + } else if owner == addresses.StakeProgramAddr { + small, medium, large = &StakeCacheMissSmall, &StakeCacheMissMedium, &StakeCacheMissLarge + } else { + small, medium, large = &CommonCacheMissSmall, &CommonCacheMissMedium, &CommonCacheMissLarge + } + + if dataLen <= 256 { + small.Add(1) + } else if dataLen <= 4096 { + medium.Add(1) + } else { + large.Add(1) + } +} + func OpenDb(accountsDbDir string) (*AccountsDb, error) { // check for existence of the 'accounts' directory, which holds the appendvecs appendVecsDir := fmt.Sprintf("%s/accounts", accountsDbDir) @@ -214,16 +288,19 @@ func (accountsDb *AccountsDb) cacheAccount(acct *accounts.Account) { func (accountsDb *AccountsDb) GetAccount(slot uint64, pubkey solana.PublicKey) (*accounts.Account, error) { cachedAcct, hasAcct := accountsDb.VoteAcctCache.Get(pubkey) if hasAcct { + VoteCacheHits.Add(1) return cachedAcct, nil } cachedAcct, hasAcct = accountsDb.StakeAcctCache.Get(pubkey) if hasAcct { + StakeCacheHits.Add(1) return cachedAcct, nil } cachedAcct, hasAcct = accountsDb.CommonAcctsCache.Get(pubkey) if hasAcct { + CommonCacheHits.Add(1) return cachedAcct, nil } @@ -267,6 +344,9 @@ func (accountsDb *AccountsDb) GetAccount(slot uint64, pubkey solana.PublicKey) ( acct.Slot = acctIdxEntry.Slot + // Record cache miss by owner type and size bucket (for profiling) + recordCacheMiss(solana.PublicKeyFromBytes(acct.Owner[:]), uint64(len(acct.Data))) + accountsDb.cacheAccount(acct) return acct, err diff --git a/pkg/replay/block.go b/pkg/replay/block.go index fce171ed..dac557f1 100644 --- a/pkg/replay/block.go +++ b/pkg/replay/block.go @@ -1690,7 +1690,23 @@ func ReplayBlocks( mlog.Log.InfofPrecise(" execution: median %.3fs, min %.3fs, max %.3fs | wait: median %.3fs, min %.3fs, max %.3fs | replay total: median %.3fs", medExec, minExec, maxExec, medWait, minWait, maxWait, medTotal) - // Line 4: RPC/fetch debugging info + // Line 4: Cache hit/miss stats + cs := accountsdb.GetAndResetCacheStats() + commonMiss := cs.CommonMissSmall + cs.CommonMissMedium + cs.CommonMissLarge + stakeMiss := cs.StakeMissSmall + cs.StakeMissMedium + cs.StakeMissLarge + voteMiss := cs.VoteMissSmall + cs.VoteMissMedium + cs.VoteMissLarge + totalHits := cs.CommonHits + cs.StakeHits + cs.VoteHits + totalMiss := commonMiss + stakeMiss + voteMiss + if totalHits+totalMiss > 0 { + hitRate := float64(totalHits) / float64(totalHits+totalMiss) * 100 + mlog.Log.InfofPrecise(" cache: %.1f%% hit rate | hits: common %d, stake %d, vote %d | misses: common %d (s:%d m:%d l:%d), stake %d (s:%d m:%d l:%d), vote %d", + hitRate, cs.CommonHits, cs.StakeHits, cs.VoteHits, + commonMiss, cs.CommonMissSmall, cs.CommonMissMedium, cs.CommonMissLarge, + stakeMiss, cs.StakeMissSmall, cs.StakeMissMedium, cs.StakeMissLarge, + voteMiss) + } + + // Line 5: RPC/fetch debugging info if fetchStats.Attempts > 0 { retryRate := float64(fetchStats.Retries) / float64(fetchStats.Attempts) * 100 prefetch := fetchStats.BufferDepth + fetchStats.ReorderBufLen From cdb5125bab45f429b489fdaff62efc14d03548eb Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 16:36:53 -0600 Subject: [PATCH 16/28] perf: split common cache into small/large account caches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - SmallAcctCache (500k entries, ~100MB): accounts ≤256 bytes - LargeAcctCache (10k entries): accounts >256 bytes Small accounts (token accounts ~165B, etc.) now have dedicated cache that won't be evicted by larger accounts. 500k entries is cheap at ~100MB since small accounts average ~200 bytes. Updated 100-slot summary to show per-cache hits/misses with size breakdown for large cache misses (medium 257-4096B vs large >4096B). Co-Authored-By: Claude Opus 4.5 --- cmd/mithril/node/node.go | 6 +- config.example.toml | 8 +- pkg/accountsdb/accountsdb.go | 145 +++++++++++++++++------------------ pkg/config/config.go | 9 ++- pkg/replay/block.go | 17 ++-- 5 files changed, 93 insertions(+), 92 deletions(-) diff --git a/cmd/mithril/node/node.go b/cmd/mithril/node/node.go index fce20fbe..8e268bbe 100644 --- a/cmd/mithril/node/node.go +++ b/cmd/mithril/node/node.go @@ -858,7 +858,8 @@ func runVerifyRange(c *cobra.Command, args []string) { accountsDb.InitCaches( config.GetInt("tuning.cache.vote_acct_lru"), config.GetInt("tuning.cache.stake_acct_lru"), - config.GetInt("tuning.cache.common_acct_lru"), + config.GetInt("tuning.cache.small_acct_lru"), + config.GetInt("tuning.cache.large_acct_lru"), config.GetInt("tuning.cache.program_lru"), ) @@ -1618,7 +1619,8 @@ postBootstrap: accountsDb.InitCaches( config.GetInt("tuning.cache.vote_acct_lru"), config.GetInt("tuning.cache.stake_acct_lru"), - config.GetInt("tuning.cache.common_acct_lru"), + config.GetInt("tuning.cache.small_acct_lru"), + config.GetInt("tuning.cache.large_acct_lru"), config.GetInt("tuning.cache.program_lru"), ) diff --git a/config.example.toml b/config.example.toml index 4d31bfa2..2638440e 100644 --- a/config.example.toml +++ b/config.example.toml @@ -283,8 +283,12 @@ name = "mithril" # hot accounts during epoch rewards when ~1.25M stake accounts are touched once each) stake_acct_lru = 2000 - # General account data cache - number of entries (everything except vote & stake) - common_acct_lru = 10000 + # Small account data cache - accounts ≤256 bytes (token accounts, etc.) + # Large size (500k) is cheap since small accounts are ~200 bytes each (~100MB total) + small_acct_lru = 500000 + + # Large account data cache - accounts >256 bytes + large_acct_lru = 10000 # Compiled BPF program cache - number of entries program_lru = 5000 diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index e1e35c15..0c4c6e5f 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -24,15 +24,12 @@ type AccountsDb struct { BankHashStore *pebble.DB AcctsDir string LargestFileId atomic.Uint64 - VoteAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Vote accounts cached separately (frequently accessed) - CommonAcctsCache otter.Cache[solana.PublicKey, *accounts.Account] // General accounts (excludes vote & stake) - StakeAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Stake accounts cached separately (small 2k cache) + VoteAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Vote accounts (frequently accessed) + StakeAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Stake accounts (small 2k cache) + SmallAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Small accounts ≤256 bytes (500k entries, ~100MB) + LargeAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Large accounts >256 bytes (10k entries) ProgramCache otter.Cache[solana.PublicKey, *ProgramCacheEntry] InRewardsWindow bool // When true, only update existing stake cache entries (don't add new ones) - // Note: Stake accounts have their own small cache (2k entries) separate from CommonAcctsCache. - // During the ~243 slot reward window, ~1.25M stake accounts are touched exactly once each. - // When InRewardsWindow is true, we only update existing cache entries - we don't add new ones. - // This prevents cache thrash while preserving hot stake accounts. } // silentLogger implements pebble.Logger but discards all messages. @@ -47,76 +44,56 @@ var ( ) // Cache hit/miss counters for profiling -// Miss counters are by size bucket: Small ≤256 bytes, Medium 257-4096 bytes, Large >4096 bytes var ( - // Cache hits (total per cache type) - CommonCacheHits atomic.Uint64 - StakeCacheHits atomic.Uint64 - VoteCacheHits atomic.Uint64 - - // Common cache misses (non-stake, non-vote accounts) - CommonCacheMissSmall atomic.Uint64 - CommonCacheMissMedium atomic.Uint64 - CommonCacheMissLarge atomic.Uint64 - - // Stake cache misses - StakeCacheMissSmall atomic.Uint64 - StakeCacheMissMedium atomic.Uint64 - StakeCacheMissLarge atomic.Uint64 - - // Vote cache misses (for completeness) - VoteCacheMissSmall atomic.Uint64 - VoteCacheMissMedium atomic.Uint64 - VoteCacheMissLarge atomic.Uint64 + // Cache hits per cache type + SmallCacheHits atomic.Uint64 // Small accounts ≤256 bytes + LargeCacheHits atomic.Uint64 // Large accounts >256 bytes + StakeCacheHits atomic.Uint64 + VoteCacheHits atomic.Uint64 + + // Cache misses + SmallCacheMisses atomic.Uint64 // Small accounts ≤256 bytes + LargeCacheMissMedium atomic.Uint64 // Large cache: 257-4096 bytes + LargeCacheMissLarge atomic.Uint64 // Large cache: >4096 bytes + StakeCacheMisses atomic.Uint64 + VoteCacheMisses atomic.Uint64 ) // CacheStats holds cache hit/miss counts for reporting type CacheStats struct { - // Hits per cache type - CommonHits, StakeHits, VoteHits uint64 - // Misses by size bucket - CommonMissSmall, CommonMissMedium, CommonMissLarge uint64 - StakeMissSmall, StakeMissMedium, StakeMissLarge uint64 - VoteMissSmall, VoteMissMedium, VoteMissLarge uint64 + SmallHits, LargeHits, StakeHits, VoteHits uint64 + SmallMisses uint64 + LargeMissMedium, LargeMissLarge uint64 // Size breakdown for large cache + StakeMisses, VoteMisses uint64 } // GetAndResetCacheStats returns current cache hit/miss counts and resets them func GetAndResetCacheStats() CacheStats { return CacheStats{ - CommonHits: CommonCacheHits.Swap(0), - StakeHits: StakeCacheHits.Swap(0), - VoteHits: VoteCacheHits.Swap(0), - CommonMissSmall: CommonCacheMissSmall.Swap(0), - CommonMissMedium: CommonCacheMissMedium.Swap(0), - CommonMissLarge: CommonCacheMissLarge.Swap(0), - StakeMissSmall: StakeCacheMissSmall.Swap(0), - StakeMissMedium: StakeCacheMissMedium.Swap(0), - StakeMissLarge: StakeCacheMissLarge.Swap(0), - VoteMissSmall: VoteCacheMissSmall.Swap(0), - VoteMissMedium: VoteCacheMissMedium.Swap(0), - VoteMissLarge: VoteCacheMissLarge.Swap(0), + SmallHits: SmallCacheHits.Swap(0), + LargeHits: LargeCacheHits.Swap(0), + StakeHits: StakeCacheHits.Swap(0), + VoteHits: VoteCacheHits.Swap(0), + SmallMisses: SmallCacheMisses.Swap(0), + LargeMissMedium: LargeCacheMissMedium.Swap(0), + LargeMissLarge: LargeCacheMissLarge.Swap(0), + StakeMisses: StakeCacheMisses.Swap(0), + VoteMisses: VoteCacheMisses.Swap(0), } } // recordCacheMiss increments the appropriate cache miss counter based on owner and size func recordCacheMiss(owner solana.PublicKey, dataLen uint64) { - // Classify by size bucket - var small, medium, large *atomic.Uint64 - if owner == addresses.VoteProgramAddr { - small, medium, large = &VoteCacheMissSmall, &VoteCacheMissMedium, &VoteCacheMissLarge + VoteCacheMisses.Add(1) } else if owner == addresses.StakeProgramAddr { - small, medium, large = &StakeCacheMissSmall, &StakeCacheMissMedium, &StakeCacheMissLarge - } else { - small, medium, large = &CommonCacheMissSmall, &CommonCacheMissMedium, &CommonCacheMissLarge - } - - if dataLen <= 256 { - small.Add(1) + StakeCacheMisses.Add(1) + } else if dataLen <= 256 { + SmallCacheMisses.Add(1) } else if dataLen <= 4096 { - medium.Add(1) + LargeCacheMissMedium.Add(1) // 257-4096 bytes } else { - large.Add(1) + LargeCacheMissLarge.Add(1) // >4096 bytes } } @@ -180,7 +157,7 @@ func (accountsDb *AccountsDb) CloseDb() { // InitCaches initializes the LRU caches with the given sizes. // Pass 0 for any size to use a reasonable builtin value. -func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, commonSize, programSize int) { +func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, smallSize, largeSize, programSize int) { // Apply builtin values when config not set if voteSize <= 0 { voteSize = 5000 @@ -188,8 +165,11 @@ func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, commonSize, progra if stakeSize <= 0 { stakeSize = 2000 } - if commonSize <= 0 { - commonSize = 10000 + if smallSize <= 0 { + smallSize = 500000 // 500k small accounts (~100MB) + } + if largeSize <= 0 { + largeSize = 10000 // 10k large accounts } if programSize <= 0 { programSize = 5000 @@ -214,7 +194,16 @@ func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, commonSize, progra panic(err) } - accountsDb.CommonAcctsCache, err = otter.MustBuilder[solana.PublicKey, *accounts.Account](commonSize). + accountsDb.SmallAcctCache, err = otter.MustBuilder[solana.PublicKey, *accounts.Account](smallSize). + Cost(func(key solana.PublicKey, acct *accounts.Account) uint32 { + return 1 + }). + Build() + if err != nil { + panic(err) + } + + accountsDb.LargeAcctCache, err = otter.MustBuilder[solana.PublicKey, *accounts.Account](largeSize). Cost(func(key solana.PublicKey, acct *accounts.Account) uint32 { return 1 }). @@ -232,8 +221,8 @@ func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, commonSize, progra panic(err) } - mlog.Log.Infof("AccountsDB caches initialized: vote=%d stake=%d common=%d program=%d", - voteSize, stakeSize, commonSize, programSize) + mlog.Log.Infof("AccountsDB caches initialized: vote=%d stake=%d small=%d large=%d program=%d", + voteSize, stakeSize, smallSize, largeSize, programSize) } type ProgramCacheEntry struct { @@ -254,7 +243,7 @@ func (accountsDb *AccountsDb) RemoveProgramFromCache(pubkey solana.PublicKey) { } // cacheAccount evicts stale entries from other caches, then inserts into the correct -// cache based on owner. This prevents stale data when an account changes owner +// cache based on owner and size. This prevents stale data when an account changes owner // (e.g., stake account closed becomes system-owned). // // During rewards window (InRewardsWindow=true), stake accounts are only updated if @@ -263,13 +252,15 @@ func (accountsDb *AccountsDb) RemoveProgramFromCache(pubkey solana.PublicKey) { func (accountsDb *AccountsDb) cacheAccount(acct *accounts.Account) { owner := solana.PublicKeyFromBytes(acct.Owner[:]) + // Always evict from all caches first to prevent stale entries + accountsDb.VoteAcctCache.Delete(acct.Key) + accountsDb.StakeAcctCache.Delete(acct.Key) + accountsDb.SmallAcctCache.Delete(acct.Key) + accountsDb.LargeAcctCache.Delete(acct.Key) + if owner == addresses.VoteProgramAddr { - accountsDb.StakeAcctCache.Delete(acct.Key) - accountsDb.CommonAcctsCache.Delete(acct.Key) accountsDb.VoteAcctCache.Set(acct.Key, acct) } else if owner == addresses.StakeProgramAddr { - accountsDb.VoteAcctCache.Delete(acct.Key) - accountsDb.CommonAcctsCache.Delete(acct.Key) // During rewards: only update existing entries, don't add new ones if accountsDb.InRewardsWindow { if _, exists := accountsDb.StakeAcctCache.Get(acct.Key); exists { @@ -278,10 +269,10 @@ func (accountsDb *AccountsDb) cacheAccount(acct *accounts.Account) { } else { accountsDb.StakeAcctCache.Set(acct.Key, acct) } + } else if len(acct.Data) <= 256 { + accountsDb.SmallAcctCache.Set(acct.Key, acct) } else { - accountsDb.VoteAcctCache.Delete(acct.Key) - accountsDb.StakeAcctCache.Delete(acct.Key) - accountsDb.CommonAcctsCache.Set(acct.Key, acct) + accountsDb.LargeAcctCache.Set(acct.Key, acct) } } @@ -298,9 +289,15 @@ func (accountsDb *AccountsDb) GetAccount(slot uint64, pubkey solana.PublicKey) ( return cachedAcct, nil } - cachedAcct, hasAcct = accountsDb.CommonAcctsCache.Get(pubkey) + cachedAcct, hasAcct = accountsDb.SmallAcctCache.Get(pubkey) + if hasAcct { + SmallCacheHits.Add(1) + return cachedAcct, nil + } + + cachedAcct, hasAcct = accountsDb.LargeAcctCache.Get(pubkey) if hasAcct { - CommonCacheHits.Add(1) + LargeCacheHits.Add(1) return cachedAcct, nil } diff --git a/pkg/config/config.go b/pkg/config/config.go index 3424ae18..4644b2b3 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -48,10 +48,11 @@ type DebugConfig struct { // those are unbounded maps holding vote state and delegation info. // These LRU caches store full account data for fast reads during replay. type CacheConfig struct { - VoteAcctLRU int `toml:"vote_acct_lru" mapstructure:"vote_acct_lru"` // Vote account data (default: 5000) - StakeAcctLRU int `toml:"stake_acct_lru" mapstructure:"stake_acct_lru"` // Stake account data (default: 2000) - CommonAcctLRU int `toml:"common_acct_lru" mapstructure:"common_acct_lru"` // All other accounts (default: 10000) - ProgramLRU int `toml:"program_lru" mapstructure:"program_lru"` // Compiled BPF programs (default: 5000) + VoteAcctLRU int `toml:"vote_acct_lru" mapstructure:"vote_acct_lru"` // Vote account data (default: 5000) + StakeAcctLRU int `toml:"stake_acct_lru" mapstructure:"stake_acct_lru"` // Stake account data (default: 2000) + SmallAcctLRU int `toml:"small_acct_lru" mapstructure:"small_acct_lru"` // Small accounts ≤256 bytes (default: 500000) + LargeAcctLRU int `toml:"large_acct_lru" mapstructure:"large_acct_lru"` // Large accounts >256 bytes (default: 10000) + ProgramLRU int `toml:"program_lru" mapstructure:"program_lru"` // Compiled BPF programs (default: 5000) } // DevelopmentConfig holds development/tuning configuration (matches Firedancer [development] section) diff --git a/pkg/replay/block.go b/pkg/replay/block.go index dac557f1..4befbdfd 100644 --- a/pkg/replay/block.go +++ b/pkg/replay/block.go @@ -1692,18 +1692,15 @@ func ReplayBlocks( // Line 4: Cache hit/miss stats cs := accountsdb.GetAndResetCacheStats() - commonMiss := cs.CommonMissSmall + cs.CommonMissMedium + cs.CommonMissLarge - stakeMiss := cs.StakeMissSmall + cs.StakeMissMedium + cs.StakeMissLarge - voteMiss := cs.VoteMissSmall + cs.VoteMissMedium + cs.VoteMissLarge - totalHits := cs.CommonHits + cs.StakeHits + cs.VoteHits - totalMiss := commonMiss + stakeMiss + voteMiss + largeMiss := cs.LargeMissMedium + cs.LargeMissLarge + totalHits := cs.SmallHits + cs.LargeHits + cs.StakeHits + cs.VoteHits + totalMiss := cs.SmallMisses + largeMiss + cs.StakeMisses + cs.VoteMisses if totalHits+totalMiss > 0 { hitRate := float64(totalHits) / float64(totalHits+totalMiss) * 100 - mlog.Log.InfofPrecise(" cache: %.1f%% hit rate | hits: common %d, stake %d, vote %d | misses: common %d (s:%d m:%d l:%d), stake %d (s:%d m:%d l:%d), vote %d", - hitRate, cs.CommonHits, cs.StakeHits, cs.VoteHits, - commonMiss, cs.CommonMissSmall, cs.CommonMissMedium, cs.CommonMissLarge, - stakeMiss, cs.StakeMissSmall, cs.StakeMissMedium, cs.StakeMissLarge, - voteMiss) + mlog.Log.InfofPrecise(" cache: %.1f%% hit | hits: small %d, large %d, stake %d, vote %d | miss: small %d, large %d (m:%d l:%d), stake %d, vote %d", + hitRate, cs.SmallHits, cs.LargeHits, cs.StakeHits, cs.VoteHits, + cs.SmallMisses, largeMiss, cs.LargeMissMedium, cs.LargeMissLarge, + cs.StakeMisses, cs.VoteMisses) } // Line 5: RPC/fetch debugging info From fad0841c10250443902cc325d004e17b7e0cce40 Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 16:56:53 -0600 Subject: [PATCH 17/28] Add granular cache miss size buckets for profiling Track cache misses by size to inform cache threshold decisions: - 257-512 bytes: evaluate expanding small cache threshold - 513-4K bytes: medium accounts - 4K-64K bytes: large accounts - >64K bytes: huge accounts (evaluate excluding from cache) Co-Authored-By: Claude Opus 4.5 --- pkg/accountsdb/accountsdb.go | 47 ++++++++++++++++++++++-------------- pkg/replay/block.go | 9 ++++--- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index 0c4c6e5f..b48ba58c 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -51,34 +51,41 @@ var ( StakeCacheHits atomic.Uint64 VoteCacheHits atomic.Uint64 - // Cache misses - SmallCacheMisses atomic.Uint64 // Small accounts ≤256 bytes - LargeCacheMissMedium atomic.Uint64 // Large cache: 257-4096 bytes - LargeCacheMissLarge atomic.Uint64 // Large cache: >4096 bytes - StakeCacheMisses atomic.Uint64 - VoteCacheMisses atomic.Uint64 + // Cache misses - granular size buckets + SmallCacheMisses atomic.Uint64 // ≤256 bytes + LargeCacheMiss257to512 atomic.Uint64 // 257-512 bytes (to evaluate small threshold) + LargeCacheMiss513to4K atomic.Uint64 // 513-4096 bytes + LargeCacheMiss4Kto64K atomic.Uint64 // 4097-65536 bytes + LargeCacheMissHuge atomic.Uint64 // >65536 bytes (to see if huge accounts are common) + StakeCacheMisses atomic.Uint64 + VoteCacheMisses atomic.Uint64 ) // CacheStats holds cache hit/miss counts for reporting type CacheStats struct { SmallHits, LargeHits, StakeHits, VoteHits uint64 SmallMisses uint64 - LargeMissMedium, LargeMissLarge uint64 // Size breakdown for large cache + LargeMiss257to512 uint64 // 257-512 bytes (evaluate small threshold) + LargeMiss513to4K uint64 // 513-4096 bytes + LargeMiss4Kto64K uint64 // 4097-65536 bytes + LargeMissHuge uint64 // >65536 bytes StakeMisses, VoteMisses uint64 } // GetAndResetCacheStats returns current cache hit/miss counts and resets them func GetAndResetCacheStats() CacheStats { return CacheStats{ - SmallHits: SmallCacheHits.Swap(0), - LargeHits: LargeCacheHits.Swap(0), - StakeHits: StakeCacheHits.Swap(0), - VoteHits: VoteCacheHits.Swap(0), - SmallMisses: SmallCacheMisses.Swap(0), - LargeMissMedium: LargeCacheMissMedium.Swap(0), - LargeMissLarge: LargeCacheMissLarge.Swap(0), - StakeMisses: StakeCacheMisses.Swap(0), - VoteMisses: VoteCacheMisses.Swap(0), + SmallHits: SmallCacheHits.Swap(0), + LargeHits: LargeCacheHits.Swap(0), + StakeHits: StakeCacheHits.Swap(0), + VoteHits: VoteCacheHits.Swap(0), + SmallMisses: SmallCacheMisses.Swap(0), + LargeMiss257to512: LargeCacheMiss257to512.Swap(0), + LargeMiss513to4K: LargeCacheMiss513to4K.Swap(0), + LargeMiss4Kto64K: LargeCacheMiss4Kto64K.Swap(0), + LargeMissHuge: LargeCacheMissHuge.Swap(0), + StakeMisses: StakeCacheMisses.Swap(0), + VoteMisses: VoteCacheMisses.Swap(0), } } @@ -90,10 +97,14 @@ func recordCacheMiss(owner solana.PublicKey, dataLen uint64) { StakeCacheMisses.Add(1) } else if dataLen <= 256 { SmallCacheMisses.Add(1) + } else if dataLen <= 512 { + LargeCacheMiss257to512.Add(1) } else if dataLen <= 4096 { - LargeCacheMissMedium.Add(1) // 257-4096 bytes + LargeCacheMiss513to4K.Add(1) + } else if dataLen <= 65536 { + LargeCacheMiss4Kto64K.Add(1) } else { - LargeCacheMissLarge.Add(1) // >4096 bytes + LargeCacheMissHuge.Add(1) // >64KB } } diff --git a/pkg/replay/block.go b/pkg/replay/block.go index 4befbdfd..df8a5e37 100644 --- a/pkg/replay/block.go +++ b/pkg/replay/block.go @@ -1692,14 +1692,15 @@ func ReplayBlocks( // Line 4: Cache hit/miss stats cs := accountsdb.GetAndResetCacheStats() - largeMiss := cs.LargeMissMedium + cs.LargeMissLarge + largeMissTotal := cs.LargeMiss257to512 + cs.LargeMiss513to4K + cs.LargeMiss4Kto64K + cs.LargeMissHuge totalHits := cs.SmallHits + cs.LargeHits + cs.StakeHits + cs.VoteHits - totalMiss := cs.SmallMisses + largeMiss + cs.StakeMisses + cs.VoteMisses + totalMiss := cs.SmallMisses + largeMissTotal + cs.StakeMisses + cs.VoteMisses if totalHits+totalMiss > 0 { hitRate := float64(totalHits) / float64(totalHits+totalMiss) * 100 - mlog.Log.InfofPrecise(" cache: %.1f%% hit | hits: small %d, large %d, stake %d, vote %d | miss: small %d, large %d (m:%d l:%d), stake %d, vote %d", + // Show size breakdown: 257-512 (could expand small), 513-4K, 4K-64K, >64K (huge) + mlog.Log.InfofPrecise(" cache: %.1f%% hit | hits: small %d, large %d, stake %d, vote %d | miss: small %d, large %d [257-512:%d 513-4K:%d 4K-64K:%d >64K:%d], stake %d, vote %d", hitRate, cs.SmallHits, cs.LargeHits, cs.StakeHits, cs.VoteHits, - cs.SmallMisses, largeMiss, cs.LargeMissMedium, cs.LargeMissLarge, + cs.SmallMisses, largeMissTotal, cs.LargeMiss257to512, cs.LargeMiss513to4K, cs.LargeMiss4Kto64K, cs.LargeMissHuge, cs.StakeMisses, cs.VoteMisses) } From ef0efce946339c7f799569e1d4941802cb3bf4fa Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 17:23:56 -0600 Subject: [PATCH 18/28] Add cache fill stats to 100-slot summary Shows current cache size vs capacity to determine if misses are from cold cache (warming up) or eviction pressure (cache too small). Example output: cache fill: small 45000/500000 (9%), large 8500/10000 (85%), stake 1200/2000, vote 4800/5000 Co-Authored-By: Claude Opus 4.5 --- pkg/accountsdb/accountsdb.go | 22 ++++++++++++++++++++++ pkg/replay/block.go | 9 ++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index b48ba58c..e44081d6 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -89,6 +89,28 @@ func GetAndResetCacheStats() CacheStats { } } +// CacheFillStats holds current cache fill levels +type CacheFillStats struct { + SmallSize, SmallCap int + LargeSize, LargeCap int + StakeSize, StakeCap int + VoteSize, VoteCap int +} + +// GetCacheFillStats returns current cache fill levels (size/capacity) +func (accountsDb *AccountsDb) GetCacheFillStats() CacheFillStats { + return CacheFillStats{ + SmallSize: accountsDb.SmallAcctCache.Size(), + SmallCap: accountsDb.SmallAcctCache.Capacity(), + LargeSize: accountsDb.LargeAcctCache.Size(), + LargeCap: accountsDb.LargeAcctCache.Capacity(), + StakeSize: accountsDb.StakeAcctCache.Size(), + StakeCap: accountsDb.StakeAcctCache.Capacity(), + VoteSize: accountsDb.VoteAcctCache.Size(), + VoteCap: accountsDb.VoteAcctCache.Capacity(), + } +} + // recordCacheMiss increments the appropriate cache miss counter based on owner and size func recordCacheMiss(owner solana.PublicKey, dataLen uint64) { if owner == addresses.VoteProgramAddr { diff --git a/pkg/replay/block.go b/pkg/replay/block.go index df8a5e37..eed0401a 100644 --- a/pkg/replay/block.go +++ b/pkg/replay/block.go @@ -1698,10 +1698,17 @@ func ReplayBlocks( if totalHits+totalMiss > 0 { hitRate := float64(totalHits) / float64(totalHits+totalMiss) * 100 // Show size breakdown: 257-512 (could expand small), 513-4K, 4K-64K, >64K (huge) - mlog.Log.InfofPrecise(" cache: %.1f%% hit | hits: small %d, large %d, stake %d, vote %d | miss: small %d, large %d [257-512:%d 513-4K:%d 4K-64K:%d >64K:%d], stake %d, vote %d", + mlog.Log.InfofPrecise(" cache: %.1f%% hit | hits: small %d, large %d, stake %d, vote %d | miss: small %d, large %d [257-512:%d 512-4K:%d 4K-64K:%d >64K:%d], stake %d, vote %d", hitRate, cs.SmallHits, cs.LargeHits, cs.StakeHits, cs.VoteHits, cs.SmallMisses, largeMissTotal, cs.LargeMiss257to512, cs.LargeMiss513to4K, cs.LargeMiss4Kto64K, cs.LargeMissHuge, cs.StakeMisses, cs.VoteMisses) + + // Cache fill stats + cf := acctsDb.GetCacheFillStats() + mlog.Log.InfofPrecise(" cache fill: small %d/%d (%.0f%%), large %d/%d (%.0f%%), stake %d/%d, vote %d/%d", + cf.SmallSize, cf.SmallCap, float64(cf.SmallSize)/float64(cf.SmallCap)*100, + cf.LargeSize, cf.LargeCap, float64(cf.LargeSize)/float64(cf.LargeCap)*100, + cf.StakeSize, cf.StakeCap, cf.VoteSize, cf.VoteCap) } // Line 5: RPC/fetch debugging info From 81438be10f97fbfef485bc876ecf59afcf4c1a25 Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 18:02:40 -0600 Subject: [PATCH 19/28] perf: restructure account cache into small/medium/huge tiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Small: ≤512 bytes (500k entries) - Medium: 512-64KB (20k entries) - Huge: >64KB (500 entries, mostly programs) Adds granular miss stats for huge range: 64K-256K, 256K-1M, >1M Co-Authored-By: Claude Opus 4.5 --- cmd/mithril/node/node.go | 6 +- config.example.toml | 11 ++- pkg/accountsdb/accountsdb.go | 151 +++++++++++++++++++++-------------- pkg/config/config.go | 11 +-- pkg/replay/block.go | 18 ++--- 5 files changed, 119 insertions(+), 78 deletions(-) diff --git a/cmd/mithril/node/node.go b/cmd/mithril/node/node.go index 8e268bbe..4c330e9f 100644 --- a/cmd/mithril/node/node.go +++ b/cmd/mithril/node/node.go @@ -859,7 +859,8 @@ func runVerifyRange(c *cobra.Command, args []string) { config.GetInt("tuning.cache.vote_acct_lru"), config.GetInt("tuning.cache.stake_acct_lru"), config.GetInt("tuning.cache.small_acct_lru"), - config.GetInt("tuning.cache.large_acct_lru"), + config.GetInt("tuning.cache.medium_acct_lru"), + config.GetInt("tuning.cache.huge_acct_lru"), config.GetInt("tuning.cache.program_lru"), ) @@ -1620,7 +1621,8 @@ postBootstrap: config.GetInt("tuning.cache.vote_acct_lru"), config.GetInt("tuning.cache.stake_acct_lru"), config.GetInt("tuning.cache.small_acct_lru"), - config.GetInt("tuning.cache.large_acct_lru"), + config.GetInt("tuning.cache.medium_acct_lru"), + config.GetInt("tuning.cache.huge_acct_lru"), config.GetInt("tuning.cache.program_lru"), ) diff --git a/config.example.toml b/config.example.toml index 2638440e..fd725649 100644 --- a/config.example.toml +++ b/config.example.toml @@ -283,12 +283,15 @@ name = "mithril" # hot accounts during epoch rewards when ~1.25M stake accounts are touched once each) stake_acct_lru = 2000 - # Small account data cache - accounts ≤256 bytes (token accounts, etc.) - # Large size (500k) is cheap since small accounts are ~200 bytes each (~100MB total) + # Small account data cache - accounts ≤512 bytes (token accounts, etc.) + # Large size (500k) is cheap since small accounts are ~500 bytes each small_acct_lru = 500000 - # Large account data cache - accounts >256 bytes - large_acct_lru = 10000 + # Medium account data cache - accounts 512-64KB + medium_acct_lru = 20000 + + # Huge account data cache - accounts >64KB (mostly programs) + huge_acct_lru = 500 # Compiled BPF program cache - number of entries program_lru = 5000 diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index e44081d6..09273d11 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -26,8 +26,9 @@ type AccountsDb struct { LargestFileId atomic.Uint64 VoteAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Vote accounts (frequently accessed) StakeAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Stake accounts (small 2k cache) - SmallAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Small accounts ≤256 bytes (500k entries, ~100MB) - LargeAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Large accounts >256 bytes (10k entries) + SmallAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Small accounts ≤512 bytes (500k entries) + MediumAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Medium accounts 512-64KB (20k entries) + HugeAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Huge accounts >64KB (500 entries, mostly programs) ProgramCache otter.Cache[solana.PublicKey, *ProgramCacheEntry] InRewardsWindow bool // When true, only update existing stake cache entries (don't add new ones) } @@ -46,68 +47,77 @@ var ( // Cache hit/miss counters for profiling var ( // Cache hits per cache type - SmallCacheHits atomic.Uint64 // Small accounts ≤256 bytes - LargeCacheHits atomic.Uint64 // Large accounts >256 bytes - StakeCacheHits atomic.Uint64 - VoteCacheHits atomic.Uint64 - - // Cache misses - granular size buckets - SmallCacheMisses atomic.Uint64 // ≤256 bytes - LargeCacheMiss257to512 atomic.Uint64 // 257-512 bytes (to evaluate small threshold) - LargeCacheMiss513to4K atomic.Uint64 // 513-4096 bytes - LargeCacheMiss4Kto64K atomic.Uint64 // 4097-65536 bytes - LargeCacheMissHuge atomic.Uint64 // >65536 bytes (to see if huge accounts are common) - StakeCacheMisses atomic.Uint64 - VoteCacheMisses atomic.Uint64 + SmallCacheHits atomic.Uint64 // Small accounts ≤512 bytes + MediumCacheHits atomic.Uint64 // Medium accounts 512-64KB + HugeCacheHits atomic.Uint64 // Huge accounts >64KB + StakeCacheHits atomic.Uint64 + VoteCacheHits atomic.Uint64 + + // Cache misses per cache type + SmallCacheMisses atomic.Uint64 // ≤512 bytes + MediumCacheMisses atomic.Uint64 // 512-64KB + HugeCacheMisses atomic.Uint64 // >64KB (total) + StakeCacheMisses atomic.Uint64 + VoteCacheMisses atomic.Uint64 + + // Granular miss breakdown within huge range (>64KB) + HugeMiss64Kto256K atomic.Uint64 // 64KB-256KB + HugeMiss256Kto1M atomic.Uint64 // 256KB-1MB + HugeMissOver1M atomic.Uint64 // >1MB ) // CacheStats holds cache hit/miss counts for reporting type CacheStats struct { - SmallHits, LargeHits, StakeHits, VoteHits uint64 - SmallMisses uint64 - LargeMiss257to512 uint64 // 257-512 bytes (evaluate small threshold) - LargeMiss513to4K uint64 // 513-4096 bytes - LargeMiss4Kto64K uint64 // 4097-65536 bytes - LargeMissHuge uint64 // >65536 bytes - StakeMisses, VoteMisses uint64 + SmallHits, MediumHits, HugeHits, StakeHits, VoteHits uint64 + SmallMisses, MediumMisses, HugeMisses uint64 + StakeMisses, VoteMisses uint64 + // Granular breakdown within huge range + HugeMiss64Kto256K uint64 // 64KB-256KB + HugeMiss256Kto1M uint64 // 256KB-1MB + HugeMissOver1M uint64 // >1MB } // GetAndResetCacheStats returns current cache hit/miss counts and resets them func GetAndResetCacheStats() CacheStats { return CacheStats{ SmallHits: SmallCacheHits.Swap(0), - LargeHits: LargeCacheHits.Swap(0), + MediumHits: MediumCacheHits.Swap(0), + HugeHits: HugeCacheHits.Swap(0), StakeHits: StakeCacheHits.Swap(0), VoteHits: VoteCacheHits.Swap(0), SmallMisses: SmallCacheMisses.Swap(0), - LargeMiss257to512: LargeCacheMiss257to512.Swap(0), - LargeMiss513to4K: LargeCacheMiss513to4K.Swap(0), - LargeMiss4Kto64K: LargeCacheMiss4Kto64K.Swap(0), - LargeMissHuge: LargeCacheMissHuge.Swap(0), + MediumMisses: MediumCacheMisses.Swap(0), + HugeMisses: HugeCacheMisses.Swap(0), StakeMisses: StakeCacheMisses.Swap(0), VoteMisses: VoteCacheMisses.Swap(0), + HugeMiss64Kto256K: HugeMiss64Kto256K.Swap(0), + HugeMiss256Kto1M: HugeMiss256Kto1M.Swap(0), + HugeMissOver1M: HugeMissOver1M.Swap(0), } } // CacheFillStats holds current cache fill levels type CacheFillStats struct { - SmallSize, SmallCap int - LargeSize, LargeCap int - StakeSize, StakeCap int - VoteSize, VoteCap int + SmallSize, SmallCap int + MediumSize, MediumCap int + HugeSize, HugeCap int + StakeSize, StakeCap int + VoteSize, VoteCap int } // GetCacheFillStats returns current cache fill levels (size/capacity) func (accountsDb *AccountsDb) GetCacheFillStats() CacheFillStats { return CacheFillStats{ - SmallSize: accountsDb.SmallAcctCache.Size(), - SmallCap: accountsDb.SmallAcctCache.Capacity(), - LargeSize: accountsDb.LargeAcctCache.Size(), - LargeCap: accountsDb.LargeAcctCache.Capacity(), - StakeSize: accountsDb.StakeAcctCache.Size(), - StakeCap: accountsDb.StakeAcctCache.Capacity(), - VoteSize: accountsDb.VoteAcctCache.Size(), - VoteCap: accountsDb.VoteAcctCache.Capacity(), + SmallSize: accountsDb.SmallAcctCache.Size(), + SmallCap: accountsDb.SmallAcctCache.Capacity(), + MediumSize: accountsDb.MediumAcctCache.Size(), + MediumCap: accountsDb.MediumAcctCache.Capacity(), + HugeSize: accountsDb.HugeAcctCache.Size(), + HugeCap: accountsDb.HugeAcctCache.Capacity(), + StakeSize: accountsDb.StakeAcctCache.Size(), + StakeCap: accountsDb.StakeAcctCache.Capacity(), + VoteSize: accountsDb.VoteAcctCache.Size(), + VoteCap: accountsDb.VoteAcctCache.Capacity(), } } @@ -117,16 +127,20 @@ func recordCacheMiss(owner solana.PublicKey, dataLen uint64) { VoteCacheMisses.Add(1) } else if owner == addresses.StakeProgramAddr { StakeCacheMisses.Add(1) - } else if dataLen <= 256 { - SmallCacheMisses.Add(1) } else if dataLen <= 512 { - LargeCacheMiss257to512.Add(1) - } else if dataLen <= 4096 { - LargeCacheMiss513to4K.Add(1) + SmallCacheMisses.Add(1) } else if dataLen <= 65536 { - LargeCacheMiss4Kto64K.Add(1) + MediumCacheMisses.Add(1) } else { - LargeCacheMissHuge.Add(1) // >64KB + // Huge: >64KB - track total and granular breakdown + HugeCacheMisses.Add(1) + if dataLen <= 262144 { // 64KB-256KB + HugeMiss64Kto256K.Add(1) + } else if dataLen <= 1048576 { // 256KB-1MB + HugeMiss256Kto1M.Add(1) + } else { // >1MB + HugeMissOver1M.Add(1) + } } } @@ -190,7 +204,7 @@ func (accountsDb *AccountsDb) CloseDb() { // InitCaches initializes the LRU caches with the given sizes. // Pass 0 for any size to use a reasonable builtin value. -func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, smallSize, largeSize, programSize int) { +func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, smallSize, mediumSize, hugeSize, programSize int) { // Apply builtin values when config not set if voteSize <= 0 { voteSize = 5000 @@ -199,10 +213,13 @@ func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, smallSize, largeSi stakeSize = 2000 } if smallSize <= 0 { - smallSize = 500000 // 500k small accounts (~100MB) + smallSize = 500000 // 500k small accounts ≤512 bytes + } + if mediumSize <= 0 { + mediumSize = 20000 // 20k medium accounts 512-64KB } - if largeSize <= 0 { - largeSize = 10000 // 10k large accounts + if hugeSize <= 0 { + hugeSize = 500 // 500 huge accounts >64KB (mostly programs) } if programSize <= 0 { programSize = 5000 @@ -236,7 +253,16 @@ func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, smallSize, largeSi panic(err) } - accountsDb.LargeAcctCache, err = otter.MustBuilder[solana.PublicKey, *accounts.Account](largeSize). + accountsDb.MediumAcctCache, err = otter.MustBuilder[solana.PublicKey, *accounts.Account](mediumSize). + Cost(func(key solana.PublicKey, acct *accounts.Account) uint32 { + return 1 + }). + Build() + if err != nil { + panic(err) + } + + accountsDb.HugeAcctCache, err = otter.MustBuilder[solana.PublicKey, *accounts.Account](hugeSize). Cost(func(key solana.PublicKey, acct *accounts.Account) uint32 { return 1 }). @@ -254,8 +280,8 @@ func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, smallSize, largeSi panic(err) } - mlog.Log.Infof("AccountsDB caches initialized: vote=%d stake=%d small=%d large=%d program=%d", - voteSize, stakeSize, smallSize, largeSize, programSize) + mlog.Log.Infof("AccountsDB caches initialized: vote=%d stake=%d small=%d medium=%d huge=%d program=%d", + voteSize, stakeSize, smallSize, mediumSize, hugeSize, programSize) } type ProgramCacheEntry struct { @@ -289,7 +315,8 @@ func (accountsDb *AccountsDb) cacheAccount(acct *accounts.Account) { accountsDb.VoteAcctCache.Delete(acct.Key) accountsDb.StakeAcctCache.Delete(acct.Key) accountsDb.SmallAcctCache.Delete(acct.Key) - accountsDb.LargeAcctCache.Delete(acct.Key) + accountsDb.MediumAcctCache.Delete(acct.Key) + accountsDb.HugeAcctCache.Delete(acct.Key) if owner == addresses.VoteProgramAddr { accountsDb.VoteAcctCache.Set(acct.Key, acct) @@ -302,10 +329,12 @@ func (accountsDb *AccountsDb) cacheAccount(acct *accounts.Account) { } else { accountsDb.StakeAcctCache.Set(acct.Key, acct) } - } else if len(acct.Data) <= 256 { + } else if len(acct.Data) <= 512 { accountsDb.SmallAcctCache.Set(acct.Key, acct) + } else if len(acct.Data) <= 65536 { + accountsDb.MediumAcctCache.Set(acct.Key, acct) } else { - accountsDb.LargeAcctCache.Set(acct.Key, acct) + accountsDb.HugeAcctCache.Set(acct.Key, acct) } } @@ -328,9 +357,15 @@ func (accountsDb *AccountsDb) GetAccount(slot uint64, pubkey solana.PublicKey) ( return cachedAcct, nil } - cachedAcct, hasAcct = accountsDb.LargeAcctCache.Get(pubkey) + cachedAcct, hasAcct = accountsDb.MediumAcctCache.Get(pubkey) + if hasAcct { + MediumCacheHits.Add(1) + return cachedAcct, nil + } + + cachedAcct, hasAcct = accountsDb.HugeAcctCache.Get(pubkey) if hasAcct { - LargeCacheHits.Add(1) + HugeCacheHits.Add(1) return cachedAcct, nil } diff --git a/pkg/config/config.go b/pkg/config/config.go index 4644b2b3..9ee71096 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -48,11 +48,12 @@ type DebugConfig struct { // those are unbounded maps holding vote state and delegation info. // These LRU caches store full account data for fast reads during replay. type CacheConfig struct { - VoteAcctLRU int `toml:"vote_acct_lru" mapstructure:"vote_acct_lru"` // Vote account data (default: 5000) - StakeAcctLRU int `toml:"stake_acct_lru" mapstructure:"stake_acct_lru"` // Stake account data (default: 2000) - SmallAcctLRU int `toml:"small_acct_lru" mapstructure:"small_acct_lru"` // Small accounts ≤256 bytes (default: 500000) - LargeAcctLRU int `toml:"large_acct_lru" mapstructure:"large_acct_lru"` // Large accounts >256 bytes (default: 10000) - ProgramLRU int `toml:"program_lru" mapstructure:"program_lru"` // Compiled BPF programs (default: 5000) + VoteAcctLRU int `toml:"vote_acct_lru" mapstructure:"vote_acct_lru"` // Vote account data (default: 5000) + StakeAcctLRU int `toml:"stake_acct_lru" mapstructure:"stake_acct_lru"` // Stake account data (default: 2000) + SmallAcctLRU int `toml:"small_acct_lru" mapstructure:"small_acct_lru"` // Small accounts ≤512 bytes (default: 500000) + MediumAcctLRU int `toml:"medium_acct_lru" mapstructure:"medium_acct_lru"` // Medium accounts 512-64KB (default: 20000) + HugeAcctLRU int `toml:"huge_acct_lru" mapstructure:"huge_acct_lru"` // Huge accounts >64KB (default: 500) + ProgramLRU int `toml:"program_lru" mapstructure:"program_lru"` // Compiled BPF programs (default: 5000) } // DevelopmentConfig holds development/tuning configuration (matches Firedancer [development] section) diff --git a/pkg/replay/block.go b/pkg/replay/block.go index eed0401a..83222412 100644 --- a/pkg/replay/block.go +++ b/pkg/replay/block.go @@ -1692,22 +1692,22 @@ func ReplayBlocks( // Line 4: Cache hit/miss stats cs := accountsdb.GetAndResetCacheStats() - largeMissTotal := cs.LargeMiss257to512 + cs.LargeMiss513to4K + cs.LargeMiss4Kto64K + cs.LargeMissHuge - totalHits := cs.SmallHits + cs.LargeHits + cs.StakeHits + cs.VoteHits - totalMiss := cs.SmallMisses + largeMissTotal + cs.StakeMisses + cs.VoteMisses + totalHits := cs.SmallHits + cs.MediumHits + cs.HugeHits + cs.StakeHits + cs.VoteHits + totalMiss := cs.SmallMisses + cs.MediumMisses + cs.HugeMisses + cs.StakeMisses + cs.VoteMisses if totalHits+totalMiss > 0 { hitRate := float64(totalHits) / float64(totalHits+totalMiss) * 100 - // Show size breakdown: 257-512 (could expand small), 513-4K, 4K-64K, >64K (huge) - mlog.Log.InfofPrecise(" cache: %.1f%% hit | hits: small %d, large %d, stake %d, vote %d | miss: small %d, large %d [257-512:%d 512-4K:%d 4K-64K:%d >64K:%d], stake %d, vote %d", - hitRate, cs.SmallHits, cs.LargeHits, cs.StakeHits, cs.VoteHits, - cs.SmallMisses, largeMissTotal, cs.LargeMiss257to512, cs.LargeMiss513to4K, cs.LargeMiss4Kto64K, cs.LargeMissHuge, + // Show hits and misses per cache, with granular breakdown for huge (>64KB) + mlog.Log.InfofPrecise(" cache: %.1f%% hit | hits: small %d, medium %d, huge %d, stake %d, vote %d | miss: small %d, medium %d, huge %d [64K-256K:%d 256K-1M:%d >1M:%d], stake %d, vote %d", + hitRate, cs.SmallHits, cs.MediumHits, cs.HugeHits, cs.StakeHits, cs.VoteHits, + cs.SmallMisses, cs.MediumMisses, cs.HugeMisses, cs.HugeMiss64Kto256K, cs.HugeMiss256Kto1M, cs.HugeMissOver1M, cs.StakeMisses, cs.VoteMisses) // Cache fill stats cf := acctsDb.GetCacheFillStats() - mlog.Log.InfofPrecise(" cache fill: small %d/%d (%.0f%%), large %d/%d (%.0f%%), stake %d/%d, vote %d/%d", + mlog.Log.InfofPrecise(" cache fill: small %d/%d (%.0f%%), medium %d/%d (%.0f%%), huge %d/%d (%.0f%%), stake %d/%d, vote %d/%d", cf.SmallSize, cf.SmallCap, float64(cf.SmallSize)/float64(cf.SmallCap)*100, - cf.LargeSize, cf.LargeCap, float64(cf.LargeSize)/float64(cf.LargeCap)*100, + cf.MediumSize, cf.MediumCap, float64(cf.MediumSize)/float64(cf.MediumCap)*100, + cf.HugeSize, cf.HugeCap, float64(cf.HugeSize)/float64(cf.HugeCap)*100, cf.StakeSize, cf.StakeCap, cf.VoteSize, cf.VoteCap) } From f3e99cc4947c59e0df21c8d0eda679ce6fdc7026 Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 19:39:31 -0600 Subject: [PATCH 20/28] perf: add admit-on-second-hit filter for common account caches Reduce small cache default from 500k to 50k to lower GC pressure. Add optional admit-on-second-hit policy for common accounts (small/ medium/huge) - accounts must be seen twice within a configurable slot window before being cached. This filters one-shot reads that would pollute the cache and evict genuinely hot entries. New config: tuning.cache.seen_once_reset_slots (0=disabled, default) Also adds SeenOnceFiltered/SeenOnceAdmitted counters to track filter effectiveness. Co-Authored-By: Claude Opus 4.5 --- cmd/mithril/node/node.go | 2 + config.example.toml | 10 +++- pkg/accountsdb/accountsdb.go | 96 +++++++++++++++++++++++++++++++----- pkg/config/config.go | 13 ++--- 4 files changed, 100 insertions(+), 21 deletions(-) diff --git a/cmd/mithril/node/node.go b/cmd/mithril/node/node.go index 4c330e9f..2ea0d4e9 100644 --- a/cmd/mithril/node/node.go +++ b/cmd/mithril/node/node.go @@ -862,6 +862,7 @@ func runVerifyRange(c *cobra.Command, args []string) { config.GetInt("tuning.cache.medium_acct_lru"), config.GetInt("tuning.cache.huge_acct_lru"), config.GetInt("tuning.cache.program_lru"), + uint64(config.GetInt("tuning.cache.seen_once_reset_slots")), ) metricsWriter, metricsWriterCleanup, err := createBufWriter(metricsPath) @@ -1624,6 +1625,7 @@ postBootstrap: config.GetInt("tuning.cache.medium_acct_lru"), config.GetInt("tuning.cache.huge_acct_lru"), config.GetInt("tuning.cache.program_lru"), + uint64(config.GetInt("tuning.cache.seen_once_reset_slots")), ) metricsWriter, metricsWriterCleanup, err := createBufWriter(metricsPath) diff --git a/config.example.toml b/config.example.toml index fd725649..63677ae4 100644 --- a/config.example.toml +++ b/config.example.toml @@ -284,8 +284,7 @@ name = "mithril" stake_acct_lru = 2000 # Small account data cache - accounts ≤512 bytes (token accounts, etc.) - # Large size (500k) is cheap since small accounts are ~500 bytes each - small_acct_lru = 500000 + small_acct_lru = 50000 # Medium account data cache - accounts 512-64KB medium_acct_lru = 20000 @@ -296,6 +295,13 @@ name = "mithril" # Compiled BPF program cache - number of entries program_lru = 5000 + # Admit-on-second-hit filter for common accounts (small/medium/huge) + # Only caches accounts seen twice within this many slots, filtering one-shot reads. + # 0 = disabled (cache everything immediately, like traditional LRU) + # >0 = enable filtering, reset tracking every N slots (~N×0.4 seconds) + # Recommended: 100 slots (~40 seconds) if enabled + seen_once_reset_slots = 0 + # ============================================================================ # [debug] - Debug Logging # ============================================================================ diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index 09273d11..99f9bd03 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -26,11 +26,17 @@ type AccountsDb struct { LargestFileId atomic.Uint64 VoteAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Vote accounts (frequently accessed) StakeAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Stake accounts (small 2k cache) - SmallAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Small accounts ≤512 bytes (500k entries) + SmallAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Small accounts ≤512 bytes (50k entries) MediumAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Medium accounts 512-64KB (20k entries) HugeAcctCache otter.Cache[solana.PublicKey, *accounts.Account] // Huge accounts >64KB (500 entries, mostly programs) ProgramCache otter.Cache[solana.PublicKey, *ProgramCacheEntry] InRewardsWindow bool // When true, only update existing stake cache entries (don't add new ones) + + // Admit-on-second-hit for common accounts (small/medium/huge) + // Only accounts seen twice within the reset window get cached, filtering one-shot reads. + CommonSeenOnce map[solana.PublicKey]struct{} // Keys seen once but not yet cached + CommonSeenOnceSlot uint64 // Slot when current window started + CommonSeenOnceResetSlots uint64 // Reset interval in slots (default: 100) } // silentLogger implements pebble.Logger but discards all messages. @@ -64,6 +70,10 @@ var ( HugeMiss64Kto256K atomic.Uint64 // 64KB-256KB HugeMiss256Kto1M atomic.Uint64 // 256KB-1MB HugeMissOver1M atomic.Uint64 // >1MB + + // Admit-on-second-hit filter stats + SeenOnceFiltered atomic.Uint64 // First hit, added to seen-once tracking + SeenOnceAdmitted atomic.Uint64 // Second hit, admitted to cache ) // CacheStats holds cache hit/miss counts for reporting @@ -75,6 +85,9 @@ type CacheStats struct { HugeMiss64Kto256K uint64 // 64KB-256KB HugeMiss256Kto1M uint64 // 256KB-1MB HugeMissOver1M uint64 // >1MB + // Admit-on-second-hit stats + SeenOnceFiltered uint64 // First hits (tracked but not cached) + SeenOnceAdmitted uint64 // Second hits (admitted to cache) } // GetAndResetCacheStats returns current cache hit/miss counts and resets them @@ -93,6 +106,8 @@ func GetAndResetCacheStats() CacheStats { HugeMiss64Kto256K: HugeMiss64Kto256K.Swap(0), HugeMiss256Kto1M: HugeMiss256Kto1M.Swap(0), HugeMissOver1M: HugeMissOver1M.Swap(0), + SeenOnceFiltered: SeenOnceFiltered.Swap(0), + SeenOnceAdmitted: SeenOnceAdmitted.Swap(0), } } @@ -204,7 +219,10 @@ func (accountsDb *AccountsDb) CloseDb() { // InitCaches initializes the LRU caches with the given sizes. // Pass 0 for any size to use a reasonable builtin value. -func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, smallSize, mediumSize, hugeSize, programSize int) { +// seenOnceResetSlots controls the admit-on-second-hit window for common accounts: +// - 0 = disabled (cache everything immediately, no filtering) +// - >0 = enable filtering, reset tracking every N slots +func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, smallSize, mediumSize, hugeSize, programSize int, seenOnceResetSlots uint64) { // Apply builtin values when config not set if voteSize <= 0 { voteSize = 5000 @@ -213,7 +231,7 @@ func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, smallSize, mediumS stakeSize = 2000 } if smallSize <= 0 { - smallSize = 500000 // 500k small accounts ≤512 bytes + smallSize = 50000 // 50k small accounts ≤512 bytes } if mediumSize <= 0 { mediumSize = 20000 // 20k medium accounts 512-64KB @@ -280,8 +298,19 @@ func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, smallSize, mediumS panic(err) } - mlog.Log.Infof("AccountsDB caches initialized: vote=%d stake=%d small=%d medium=%d huge=%d program=%d", - voteSize, stakeSize, smallSize, mediumSize, hugeSize, programSize) + // Initialize admit-on-second-hit tracking for common accounts + accountsDb.CommonSeenOnceResetSlots = seenOnceResetSlots + if seenOnceResetSlots > 0 { + accountsDb.CommonSeenOnce = make(map[solana.PublicKey]struct{}) + } + + if seenOnceResetSlots > 0 { + mlog.Log.Infof("AccountsDB caches initialized: vote=%d stake=%d small=%d medium=%d huge=%d program=%d seenOnceReset=%d", + voteSize, stakeSize, smallSize, mediumSize, hugeSize, programSize, seenOnceResetSlots) + } else { + mlog.Log.Infof("AccountsDB caches initialized: vote=%d stake=%d small=%d medium=%d huge=%d program=%d (seen-once filter disabled)", + voteSize, stakeSize, smallSize, mediumSize, hugeSize, programSize) + } } type ProgramCacheEntry struct { @@ -308,7 +337,11 @@ func (accountsDb *AccountsDb) RemoveProgramFromCache(pubkey solana.PublicKey) { // During rewards window (InRewardsWindow=true), stake accounts are only updated if // already cached - new entries are not added. This prevents cache thrash from the // ~1.25M one-shot stake account accesses while preserving hot entries. -func (accountsDb *AccountsDb) cacheAccount(acct *accounts.Account) { +// +// For common accounts (small/medium/huge), if CommonSeenOnceResetSlots > 0, uses +// admit-on-second-hit: only caches accounts seen twice within the reset window. +// This filters one-shot reads that would pollute the cache. +func (accountsDb *AccountsDb) cacheAccount(acct *accounts.Account, slot uint64) { owner := solana.PublicKeyFromBytes(acct.Owner[:]) // Always evict from all caches first to prevent stale entries @@ -329,12 +362,49 @@ func (accountsDb *AccountsDb) cacheAccount(acct *accounts.Account) { } else { accountsDb.StakeAcctCache.Set(acct.Key, acct) } - } else if len(acct.Data) <= 512 { - accountsDb.SmallAcctCache.Set(acct.Key, acct) - } else if len(acct.Data) <= 65536 { - accountsDb.MediumAcctCache.Set(acct.Key, acct) } else { - accountsDb.HugeAcctCache.Set(acct.Key, acct) + // Common accounts (small/medium/huge) - apply admit-on-second-hit if enabled + accountsDb.cacheCommonAccount(acct, slot) + } +} + +// cacheCommonAccount handles caching for non-vote, non-stake accounts. +// If seen-once filtering is enabled, only admits on second hit within the reset window. +func (accountsDb *AccountsDb) cacheCommonAccount(acct *accounts.Account, slot uint64) { + // If seen-once filtering is disabled, cache immediately + if accountsDb.CommonSeenOnceResetSlots == 0 { + if len(acct.Data) <= 512 { + accountsDb.SmallAcctCache.Set(acct.Key, acct) + } else if len(acct.Data) <= 65536 { + accountsDb.MediumAcctCache.Set(acct.Key, acct) + } else { + accountsDb.HugeAcctCache.Set(acct.Key, acct) + } + return + } + + // Check if it's time to reset the seen-once tracking window + if slot-accountsDb.CommonSeenOnceSlot >= accountsDb.CommonSeenOnceResetSlots { + accountsDb.CommonSeenOnce = make(map[solana.PublicKey]struct{}) + accountsDb.CommonSeenOnceSlot = slot + } + + // Admit-on-second-hit: only cache if seen before in this window + if _, seenBefore := accountsDb.CommonSeenOnce[acct.Key]; seenBefore { + // Second hit - admit to cache + SeenOnceAdmitted.Add(1) + delete(accountsDb.CommonSeenOnce, acct.Key) + if len(acct.Data) <= 512 { + accountsDb.SmallAcctCache.Set(acct.Key, acct) + } else if len(acct.Data) <= 65536 { + accountsDb.MediumAcctCache.Set(acct.Key, acct) + } else { + accountsDb.HugeAcctCache.Set(acct.Key, acct) + } + } else { + // First hit - just track it, don't cache yet + SeenOnceFiltered.Add(1) + accountsDb.CommonSeenOnce[acct.Key] = struct{}{} } } @@ -412,7 +482,7 @@ func (accountsDb *AccountsDb) GetAccount(slot uint64, pubkey solana.PublicKey) ( // Record cache miss by owner type and size bucket (for profiling) recordCacheMiss(solana.PublicKeyFromBytes(acct.Owner[:]), uint64(len(acct.Data))) - accountsDb.cacheAccount(acct) + accountsDb.cacheAccount(acct, slot) return acct, err } @@ -431,7 +501,7 @@ func (accountsDb *AccountsDb) StoreAccounts(accts []*accounts.Account, slot uint if acct == nil { continue } - accountsDb.cacheAccount(acct) + accountsDb.cacheAccount(acct, slot) } return nil diff --git a/pkg/config/config.go b/pkg/config/config.go index 9ee71096..a16f1c2b 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -48,12 +48,13 @@ type DebugConfig struct { // those are unbounded maps holding vote state and delegation info. // These LRU caches store full account data for fast reads during replay. type CacheConfig struct { - VoteAcctLRU int `toml:"vote_acct_lru" mapstructure:"vote_acct_lru"` // Vote account data (default: 5000) - StakeAcctLRU int `toml:"stake_acct_lru" mapstructure:"stake_acct_lru"` // Stake account data (default: 2000) - SmallAcctLRU int `toml:"small_acct_lru" mapstructure:"small_acct_lru"` // Small accounts ≤512 bytes (default: 500000) - MediumAcctLRU int `toml:"medium_acct_lru" mapstructure:"medium_acct_lru"` // Medium accounts 512-64KB (default: 20000) - HugeAcctLRU int `toml:"huge_acct_lru" mapstructure:"huge_acct_lru"` // Huge accounts >64KB (default: 500) - ProgramLRU int `toml:"program_lru" mapstructure:"program_lru"` // Compiled BPF programs (default: 5000) + VoteAcctLRU int `toml:"vote_acct_lru" mapstructure:"vote_acct_lru"` // Vote account data (default: 5000) + StakeAcctLRU int `toml:"stake_acct_lru" mapstructure:"stake_acct_lru"` // Stake account data (default: 2000) + SmallAcctLRU int `toml:"small_acct_lru" mapstructure:"small_acct_lru"` // Small accounts ≤512 bytes (default: 50000) + MediumAcctLRU int `toml:"medium_acct_lru" mapstructure:"medium_acct_lru"` // Medium accounts 512-64KB (default: 20000) + HugeAcctLRU int `toml:"huge_acct_lru" mapstructure:"huge_acct_lru"` // Huge accounts >64KB (default: 500) + ProgramLRU int `toml:"program_lru" mapstructure:"program_lru"` // Compiled BPF programs (default: 5000) + SeenOnceResetSlots int `toml:"seen_once_reset_slots" mapstructure:"seen_once_reset_slots"` // Admit-on-second-hit window (0=disabled, default: 0) } // DevelopmentConfig holds development/tuning configuration (matches Firedancer [development] section) From a42232d39e121324e72329bfba2a682878590c75 Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 21:15:23 -0600 Subject: [PATCH 21/28] fix: replace map-based seen-once filter with LRU, fix oscillation bug Fixes three issues with the admit-on-second-hit filter: 1. Missing mutex: CommonSeenOnce map was accessed concurrently without synchronization. Replaced with thread-safe otter LRU cache. 2. Unbounded growth: Map grew without limit within each slot window. LRU has fixed capacity and naturally evicts old entries. 3. Oscillation bug: Writes caused accounts to alternate between cached and not-cached states. Root cause was checking the seen-once map AFTER deleting from caches, so every write looked like a first hit. Fixed by checking wasCommonCached BEFORE deleting using GetEntryQuietly. Config change: seen_once_reset_slots -> seen_once_filter_size - 0 = disabled (default, backward compatible) - >0 = LRU capacity (recommended: 50000) Co-Authored-By: Claude Opus 4.5 --- cmd/mithril/node/node.go | 4 +- config.example.toml | 13 ++-- pkg/accountsdb/accountsdb.go | 118 ++++++++++++++++++++--------------- pkg/config/config.go | 2 +- 4 files changed, 79 insertions(+), 58 deletions(-) diff --git a/cmd/mithril/node/node.go b/cmd/mithril/node/node.go index 2ea0d4e9..29903c58 100644 --- a/cmd/mithril/node/node.go +++ b/cmd/mithril/node/node.go @@ -862,7 +862,7 @@ func runVerifyRange(c *cobra.Command, args []string) { config.GetInt("tuning.cache.medium_acct_lru"), config.GetInt("tuning.cache.huge_acct_lru"), config.GetInt("tuning.cache.program_lru"), - uint64(config.GetInt("tuning.cache.seen_once_reset_slots")), + config.GetInt("tuning.cache.seen_once_filter_size"), ) metricsWriter, metricsWriterCleanup, err := createBufWriter(metricsPath) @@ -1625,7 +1625,7 @@ postBootstrap: config.GetInt("tuning.cache.medium_acct_lru"), config.GetInt("tuning.cache.huge_acct_lru"), config.GetInt("tuning.cache.program_lru"), - uint64(config.GetInt("tuning.cache.seen_once_reset_slots")), + config.GetInt("tuning.cache.seen_once_filter_size"), ) metricsWriter, metricsWriterCleanup, err := createBufWriter(metricsPath) diff --git a/config.example.toml b/config.example.toml index 63677ae4..c1a5c871 100644 --- a/config.example.toml +++ b/config.example.toml @@ -295,12 +295,15 @@ name = "mithril" # Compiled BPF program cache - number of entries program_lru = 5000 - # Admit-on-second-hit filter for common accounts (small/medium/huge) - # Only caches accounts seen twice within this many slots, filtering one-shot reads. + # Admit-on-second-hit LRU filter for common accounts (small/medium/huge) + # Only caches accounts seen twice within the filter window, filtering one-shot reads. # 0 = disabled (cache everything immediately, like traditional LRU) - # >0 = enable filtering, reset tracking every N slots (~N×0.4 seconds) - # Recommended: 100 slots (~40 seconds) if enabled - seen_once_reset_slots = 0 + # >0 = enable filtering with LRU of this capacity + # Recommended: 50000 (roughly 0.5-1x of small+medium+huge total) + # Monitor SeenOnceAdmitted/SeenOnceFiltered ratio to tune: + # <5-10% admitted = too strict (filter too small) + # >40-50% admitted = too lenient (filter too large or most accesses are hot) + seen_once_filter_size = 0 # ============================================================================ # [debug] - Debug Logging diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index 99f9bd03..0ed08ba6 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -32,11 +32,10 @@ type AccountsDb struct { ProgramCache otter.Cache[solana.PublicKey, *ProgramCacheEntry] InRewardsWindow bool // When true, only update existing stake cache entries (don't add new ones) - // Admit-on-second-hit for common accounts (small/medium/huge) - // Only accounts seen twice within the reset window get cached, filtering one-shot reads. - CommonSeenOnce map[solana.PublicKey]struct{} // Keys seen once but not yet cached - CommonSeenOnceSlot uint64 // Slot when current window started - CommonSeenOnceResetSlots uint64 // Reset interval in slots (default: 100) + // Admit-on-second-hit LRU filter for common accounts (small/medium/huge) + // Only accounts seen twice within the filter window get cached, filtering one-shot reads. + // nil = disabled (cache everything immediately) + SeenOnceFilter *otter.Cache[solana.PublicKey, struct{}] } // silentLogger implements pebble.Logger but discards all messages. @@ -219,10 +218,10 @@ func (accountsDb *AccountsDb) CloseDb() { // InitCaches initializes the LRU caches with the given sizes. // Pass 0 for any size to use a reasonable builtin value. -// seenOnceResetSlots controls the admit-on-second-hit window for common accounts: +// seenOnceFilterSize controls the admit-on-second-hit LRU filter for common accounts: // - 0 = disabled (cache everything immediately, no filtering) -// - >0 = enable filtering, reset tracking every N slots -func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, smallSize, mediumSize, hugeSize, programSize int, seenOnceResetSlots uint64) { +// - >0 = enable filtering with LRU of this capacity +func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, smallSize, mediumSize, hugeSize, programSize int, seenOnceFilterSize int) { // Apply builtin values when config not set if voteSize <= 0 { voteSize = 5000 @@ -298,16 +297,23 @@ func (accountsDb *AccountsDb) InitCaches(voteSize, stakeSize, smallSize, mediumS panic(err) } - // Initialize admit-on-second-hit tracking for common accounts - accountsDb.CommonSeenOnceResetSlots = seenOnceResetSlots - if seenOnceResetSlots > 0 { - accountsDb.CommonSeenOnce = make(map[solana.PublicKey]struct{}) - } - - if seenOnceResetSlots > 0 { - mlog.Log.Infof("AccountsDB caches initialized: vote=%d stake=%d small=%d medium=%d huge=%d program=%d seenOnceReset=%d", - voteSize, stakeSize, smallSize, mediumSize, hugeSize, programSize, seenOnceResetSlots) + // Initialize admit-on-second-hit LRU filter for common accounts + if seenOnceFilterSize > 0 { + // Otter v1 requires capacity >= 10 + if seenOnceFilterSize < 10 { + seenOnceFilterSize = 10 + } + filter, err := otter.MustBuilder[solana.PublicKey, struct{}](seenOnceFilterSize). + Cost(func(key solana.PublicKey, val struct{}) uint32 { return 1 }). + Build() + if err != nil { + panic(err) + } + accountsDb.SeenOnceFilter = &filter + mlog.Log.Infof("AccountsDB caches initialized: vote=%d stake=%d small=%d medium=%d huge=%d program=%d seenOnceFilter=%d", + voteSize, stakeSize, smallSize, mediumSize, hugeSize, programSize, seenOnceFilterSize) } else { + // SeenOnceFilter stays nil (disabled) mlog.Log.Infof("AccountsDB caches initialized: vote=%d stake=%d small=%d medium=%d huge=%d program=%d (seen-once filter disabled)", voteSize, stakeSize, smallSize, mediumSize, hugeSize, programSize) } @@ -338,13 +344,27 @@ func (accountsDb *AccountsDb) RemoveProgramFromCache(pubkey solana.PublicKey) { // already cached - new entries are not added. This prevents cache thrash from the // ~1.25M one-shot stake account accesses while preserving hot entries. // -// For common accounts (small/medium/huge), if CommonSeenOnceResetSlots > 0, uses -// admit-on-second-hit: only caches accounts seen twice within the reset window. +// For common accounts (small/medium/huge), if SeenOnceFilter != nil, uses +// admit-on-second-hit: only caches accounts seen twice within the filter window. // This filters one-shot reads that would pollute the cache. func (accountsDb *AccountsDb) cacheAccount(acct *accounts.Account, slot uint64) { owner := solana.PublicKeyFromBytes(acct.Owner[:]) - // Always evict from all caches first to prevent stale entries + // OSCILLATION FIX: Check if already in a common cache BEFORE deleting + // This preserves "was hot" information so writes don't oscillate + wasCommonCached := false + if owner != addresses.VoteProgramAddr && owner != addresses.StakeProgramAddr { + // Use GetEntryQuietly to avoid inflating hit stats + _, wasCommonCached = accountsDb.SmallAcctCache.Extension().GetEntryQuietly(acct.Key) + if !wasCommonCached { + _, wasCommonCached = accountsDb.MediumAcctCache.Extension().GetEntryQuietly(acct.Key) + } + if !wasCommonCached { + _, wasCommonCached = accountsDb.HugeAcctCache.Extension().GetEntryQuietly(acct.Key) + } + } + + // Delete from all caches (prevents stale data if size tier changes) accountsDb.VoteAcctCache.Delete(acct.Key) accountsDb.StakeAcctCache.Delete(acct.Key) accountsDb.SmallAcctCache.Delete(acct.Key) @@ -363,48 +383,46 @@ func (accountsDb *AccountsDb) cacheAccount(acct *accounts.Account, slot uint64) accountsDb.StakeAcctCache.Set(acct.Key, acct) } } else { - // Common accounts (small/medium/huge) - apply admit-on-second-hit if enabled - accountsDb.cacheCommonAccount(acct, slot) + // Common accounts - pass wasCommonCached to bypass filter if already hot + accountsDb.cacheCommonAccount(acct, wasCommonCached) } } // cacheCommonAccount handles caching for non-vote, non-stake accounts. -// If seen-once filtering is enabled, only admits on second hit within the reset window. -func (accountsDb *AccountsDb) cacheCommonAccount(acct *accounts.Account, slot uint64) { - // If seen-once filtering is disabled, cache immediately - if accountsDb.CommonSeenOnceResetSlots == 0 { - if len(acct.Data) <= 512 { - accountsDb.SmallAcctCache.Set(acct.Key, acct) - } else if len(acct.Data) <= 65536 { - accountsDb.MediumAcctCache.Set(acct.Key, acct) - } else { - accountsDb.HugeAcctCache.Set(acct.Key, acct) +// If force=true or filter disabled, caches immediately. +// Otherwise applies admit-on-second-hit filter. +func (accountsDb *AccountsDb) cacheCommonAccount(acct *accounts.Account, force bool) { + // Direct cache if filter disabled or account was already cached (force=true) + if accountsDb.SeenOnceFilter == nil || force { + accountsDb.cacheCommonDirect(acct) + // Cleanup from filter if present (account is now hot) + if accountsDb.SeenOnceFilter != nil { + accountsDb.SeenOnceFilter.Delete(acct.Key) } return } - // Check if it's time to reset the seen-once tracking window - if slot-accountsDb.CommonSeenOnceSlot >= accountsDb.CommonSeenOnceResetSlots { - accountsDb.CommonSeenOnce = make(map[solana.PublicKey]struct{}) - accountsDb.CommonSeenOnceSlot = slot - } - - // Admit-on-second-hit: only cache if seen before in this window - if _, seenBefore := accountsDb.CommonSeenOnce[acct.Key]; seenBefore { + // Admit-on-second-hit: only cache if seen before + if _, seenBefore := accountsDb.SeenOnceFilter.Get(acct.Key); seenBefore { // Second hit - admit to cache SeenOnceAdmitted.Add(1) - delete(accountsDb.CommonSeenOnce, acct.Key) - if len(acct.Data) <= 512 { - accountsDb.SmallAcctCache.Set(acct.Key, acct) - } else if len(acct.Data) <= 65536 { - accountsDb.MediumAcctCache.Set(acct.Key, acct) - } else { - accountsDb.HugeAcctCache.Set(acct.Key, acct) - } + accountsDb.SeenOnceFilter.Delete(acct.Key) // Delete AFTER checking + accountsDb.cacheCommonDirect(acct) } else { - // First hit - just track it, don't cache yet + // First hit - track but don't cache SeenOnceFiltered.Add(1) - accountsDb.CommonSeenOnce[acct.Key] = struct{}{} + accountsDb.SeenOnceFilter.Set(acct.Key, struct{}{}) + } +} + +// cacheCommonDirect inserts into the appropriate size-tiered cache +func (accountsDb *AccountsDb) cacheCommonDirect(acct *accounts.Account) { + if len(acct.Data) <= 512 { + accountsDb.SmallAcctCache.Set(acct.Key, acct) + } else if len(acct.Data) <= 65536 { + accountsDb.MediumAcctCache.Set(acct.Key, acct) + } else { + accountsDb.HugeAcctCache.Set(acct.Key, acct) } } diff --git a/pkg/config/config.go b/pkg/config/config.go index a16f1c2b..1e00579e 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -54,7 +54,7 @@ type CacheConfig struct { MediumAcctLRU int `toml:"medium_acct_lru" mapstructure:"medium_acct_lru"` // Medium accounts 512-64KB (default: 20000) HugeAcctLRU int `toml:"huge_acct_lru" mapstructure:"huge_acct_lru"` // Huge accounts >64KB (default: 500) ProgramLRU int `toml:"program_lru" mapstructure:"program_lru"` // Compiled BPF programs (default: 5000) - SeenOnceResetSlots int `toml:"seen_once_reset_slots" mapstructure:"seen_once_reset_slots"` // Admit-on-second-hit window (0=disabled, default: 0) + SeenOnceFilterSize int `toml:"seen_once_filter_size" mapstructure:"seen_once_filter_size"` // Admit-on-second-hit LRU capacity (0=disabled, default: 0) } // DevelopmentConfig holds development/tuning configuration (matches Firedancer [development] section) From 4f42c5f0e01f20592ef5636c9edd793c00e2915f Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 21:21:22 -0600 Subject: [PATCH 22/28] fix: remove unused slot parameter from cacheAccount The slot parameter was used by the old map-based seen-once filter for window resets. With the LRU-based implementation, it's no longer needed. Co-Authored-By: Claude Opus 4.5 --- pkg/accountsdb/accountsdb.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index 0ed08ba6..7d281d4f 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -347,7 +347,7 @@ func (accountsDb *AccountsDb) RemoveProgramFromCache(pubkey solana.PublicKey) { // For common accounts (small/medium/huge), if SeenOnceFilter != nil, uses // admit-on-second-hit: only caches accounts seen twice within the filter window. // This filters one-shot reads that would pollute the cache. -func (accountsDb *AccountsDb) cacheAccount(acct *accounts.Account, slot uint64) { +func (accountsDb *AccountsDb) cacheAccount(acct *accounts.Account) { owner := solana.PublicKeyFromBytes(acct.Owner[:]) // OSCILLATION FIX: Check if already in a common cache BEFORE deleting @@ -500,7 +500,7 @@ func (accountsDb *AccountsDb) GetAccount(slot uint64, pubkey solana.PublicKey) ( // Record cache miss by owner type and size bucket (for profiling) recordCacheMiss(solana.PublicKeyFromBytes(acct.Owner[:]), uint64(len(acct.Data))) - accountsDb.cacheAccount(acct, slot) + accountsDb.cacheAccount(acct) return acct, err } @@ -519,7 +519,7 @@ func (accountsDb *AccountsDb) StoreAccounts(accts []*accounts.Account, slot uint if acct == nil { continue } - accountsDb.cacheAccount(acct, slot) + accountsDb.cacheAccount(acct) } return nil From 455c56297413b5fa39df3e7fa1d2bcf874683e8a Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 21:43:04 -0600 Subject: [PATCH 23/28] feat: add seen-once filter stats to 100-slot summary Shows admit rate and counts when filter is active. Co-Authored-By: Claude Opus 4.5 --- pkg/replay/block.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pkg/replay/block.go b/pkg/replay/block.go index 83222412..51b27fd9 100644 --- a/pkg/replay/block.go +++ b/pkg/replay/block.go @@ -1709,6 +1709,14 @@ func ReplayBlocks( cf.MediumSize, cf.MediumCap, float64(cf.MediumSize)/float64(cf.MediumCap)*100, cf.HugeSize, cf.HugeCap, float64(cf.HugeSize)/float64(cf.HugeCap)*100, cf.StakeSize, cf.StakeCap, cf.VoteSize, cf.VoteCap) + + // Seen-once filter stats (only show if filter is active) + if cs.SeenOnceFiltered > 0 || cs.SeenOnceAdmitted > 0 { + total := cs.SeenOnceFiltered + cs.SeenOnceAdmitted + admitRate := float64(cs.SeenOnceAdmitted) / float64(total) * 100 + mlog.Log.InfofPrecise(" seen-once filter: %.1f%% admitted (%d/%d) | filtered %d", + admitRate, cs.SeenOnceAdmitted, total, cs.SeenOnceFiltered) + } } // Line 5: RPC/fetch debugging info From a99dbe314084c0c3a27048f8e82b1f889c0a34ed Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Tue, 13 Jan 2026 21:49:29 -0600 Subject: [PATCH 24/28] feat: add memory stats to 100-slot summary Shows heap, total alloc, sys memory, GC count/pause, and goroutine count. Co-Authored-By: Claude Opus 4.5 --- pkg/replay/block.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pkg/replay/block.go b/pkg/replay/block.go index 51b27fd9..ee16bd8c 100644 --- a/pkg/replay/block.go +++ b/pkg/replay/block.go @@ -1719,6 +1719,17 @@ func ReplayBlocks( } } + // Memory stats for GC pressure monitoring + var m runtime.MemStats + runtime.ReadMemStats(&m) + mlog.Log.InfofPrecise(" mem: heap %.1fMB | alloc %.1fMB | sys %.1fMB | gc %d (%.1fms total) | goroutines %d", + float64(m.HeapAlloc)/1024/1024, + float64(m.TotalAlloc)/1024/1024, + float64(m.Sys)/1024/1024, + m.NumGC, + float64(m.PauseTotalNs)/1e6, + runtime.NumGoroutine()) + // Line 5: RPC/fetch debugging info if fetchStats.Attempts > 0 { retryRate := float64(fetchStats.Retries) / float64(fetchStats.Attempts) * 100 From 5439bec6674e0e8e5201bff46ad8f02c41f1c0ae Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Wed, 14 Jan 2026 09:44:47 -0600 Subject: [PATCH 25/28] Add delta memory stats to 100-slot summary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track ΔNumGC, ΔPauseTotalNs (as Δms), ΔTotalAlloc (as MB/s rate), and ΔHeapAlloc per 100-slot interval to show GC pressure trends rather than just cumulative values. Co-Authored-By: Claude Opus 4.5 --- pkg/replay/block.go | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/pkg/replay/block.go b/pkg/replay/block.go index ee16bd8c..c0feb462 100644 --- a/pkg/replay/block.go +++ b/pkg/replay/block.go @@ -1175,6 +1175,12 @@ func ReplayBlocks( var nonVoteTxCounts []uint64 // non-vote txns per block var justCrossedEpochBoundary bool + // Memory stats tracking for delta calculations + var lastMemStats runtime.MemStats + var lastSummaryTime time.Time + runtime.ReadMemStats(&lastMemStats) + lastSummaryTime = time.Now() + // Preallocate slices for 100 blocks const summaryInterval = 100 execTimes = make([]float64, 0, summaryInterval) @@ -1719,17 +1725,33 @@ func ReplayBlocks( } } - // Memory stats for GC pressure monitoring + // Memory stats for GC pressure monitoring (with deltas) var m runtime.MemStats runtime.ReadMemStats(&m) - mlog.Log.InfofPrecise(" mem: heap %.1fMB | alloc %.1fMB | sys %.1fMB | gc %d (%.1fms total) | goroutines %d", - float64(m.HeapAlloc)/1024/1024, - float64(m.TotalAlloc)/1024/1024, + elapsed := time.Since(lastSummaryTime).Seconds() + if elapsed < 0.001 { + elapsed = 0.001 // Avoid division by zero + } + + // Calculate deltas + deltaGC := m.NumGC - lastMemStats.NumGC + deltaPauseMs := float64(m.PauseTotalNs-lastMemStats.PauseTotalNs) / 1e6 + deltaAllocMB := float64(m.TotalAlloc-lastMemStats.TotalAlloc) / 1024 / 1024 + allocPerSec := deltaAllocMB / elapsed + deltaHeapMB := float64(m.HeapAlloc) - float64(lastMemStats.HeapAlloc) + deltaHeapMB = deltaHeapMB / 1024 / 1024 + + mlog.Log.InfofPrecise(" mem: heap %.1fMB (Δ%+.1fMB) | alloc %.1fMB/s | gc %d (Δ%d, Δ%.1fms) | sys %.1fMB | goroutines %d", + float64(m.HeapAlloc)/1024/1024, deltaHeapMB, + allocPerSec, + m.NumGC, deltaGC, deltaPauseMs, float64(m.Sys)/1024/1024, - m.NumGC, - float64(m.PauseTotalNs)/1e6, runtime.NumGoroutine()) + // Update tracking for next interval + lastMemStats = m + lastSummaryTime = time.Now() + // Line 5: RPC/fetch debugging info if fetchStats.Attempts > 0 { retryRate := float64(fetchStats.Retries) / float64(fetchStats.Attempts) * 100 From eb0ec9891df065e9536506248e26e05fa4a67b6f Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Wed, 14 Jan 2026 10:59:40 -0600 Subject: [PATCH 26/28] perf: add program cache hit/miss stats with size breakdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add ProgramCacheHits/ProgramCacheMisses counters in MaybeGetProgramFromCache - Add ProgramMissUnder1M/ProgramMissOver1M for size breakdown on misses - Add RecordProgramCacheMissSize() helper called from all loaders (BpfLoader2, BpfLoaderUpgradeable, LoaderV4) after loading program bytes - Add ProgramSize/ProgramCap to CacheFillStats - Update 100-slot summary logging to show program cache stats and fill level Output format: program cache: 98.5% hit (1234/1253) | miss by size: <1M:15 ≥1M:4 cache fill: ... program 3200/5000 (64%) Co-Authored-By: Claude Opus 4.5 --- pkg/accountsdb/accountsdb.go | 115 ++++++++++++++++++++++------------- pkg/replay/block.go | 13 +++- pkg/sealevel/bpf_loader.go | 2 + pkg/sealevel/loader_v4.go | 1 + 4 files changed, 88 insertions(+), 43 deletions(-) diff --git a/pkg/accountsdb/accountsdb.go b/pkg/accountsdb/accountsdb.go index 7d281d4f..711b878c 100644 --- a/pkg/accountsdb/accountsdb.go +++ b/pkg/accountsdb/accountsdb.go @@ -52,24 +52,30 @@ var ( // Cache hit/miss counters for profiling var ( // Cache hits per cache type - SmallCacheHits atomic.Uint64 // Small accounts ≤512 bytes - MediumCacheHits atomic.Uint64 // Medium accounts 512-64KB - HugeCacheHits atomic.Uint64 // Huge accounts >64KB - StakeCacheHits atomic.Uint64 - VoteCacheHits atomic.Uint64 + SmallCacheHits atomic.Uint64 // Small accounts ≤512 bytes + MediumCacheHits atomic.Uint64 // Medium accounts 512-64KB + HugeCacheHits atomic.Uint64 // Huge accounts >64KB + StakeCacheHits atomic.Uint64 + VoteCacheHits atomic.Uint64 + ProgramCacheHits atomic.Uint64 // Compiled BPF programs // Cache misses per cache type - SmallCacheMisses atomic.Uint64 // ≤512 bytes - MediumCacheMisses atomic.Uint64 // 512-64KB - HugeCacheMisses atomic.Uint64 // >64KB (total) - StakeCacheMisses atomic.Uint64 - VoteCacheMisses atomic.Uint64 + SmallCacheMisses atomic.Uint64 // ≤512 bytes + MediumCacheMisses atomic.Uint64 // 512-64KB + HugeCacheMisses atomic.Uint64 // >64KB (total) + StakeCacheMisses atomic.Uint64 + VoteCacheMisses atomic.Uint64 + ProgramCacheMisses atomic.Uint64 // Compiled BPF programs // Granular miss breakdown within huge range (>64KB) HugeMiss64Kto256K atomic.Uint64 // 64KB-256KB HugeMiss256Kto1M atomic.Uint64 // 256KB-1MB HugeMissOver1M atomic.Uint64 // >1MB + // Program cache miss size breakdown + ProgramMissUnder1M atomic.Uint64 // <1MB programs + ProgramMissOver1M atomic.Uint64 // ≥1MB programs + // Admit-on-second-hit filter stats SeenOnceFiltered atomic.Uint64 // First hit, added to seen-once tracking SeenOnceAdmitted atomic.Uint64 // Second hit, admitted to cache @@ -80,6 +86,10 @@ type CacheStats struct { SmallHits, MediumHits, HugeHits, StakeHits, VoteHits uint64 SmallMisses, MediumMisses, HugeMisses uint64 StakeMisses, VoteMisses uint64 + // Program cache stats + ProgramHits, ProgramMisses uint64 + ProgramMissUnder1M uint64 // <1MB programs + ProgramMissOver1M uint64 // ≥1MB programs // Granular breakdown within huge range HugeMiss64Kto256K uint64 // 64KB-256KB HugeMiss256Kto1M uint64 // 256KB-1MB @@ -92,46 +102,53 @@ type CacheStats struct { // GetAndResetCacheStats returns current cache hit/miss counts and resets them func GetAndResetCacheStats() CacheStats { return CacheStats{ - SmallHits: SmallCacheHits.Swap(0), - MediumHits: MediumCacheHits.Swap(0), - HugeHits: HugeCacheHits.Swap(0), - StakeHits: StakeCacheHits.Swap(0), - VoteHits: VoteCacheHits.Swap(0), - SmallMisses: SmallCacheMisses.Swap(0), - MediumMisses: MediumCacheMisses.Swap(0), - HugeMisses: HugeCacheMisses.Swap(0), - StakeMisses: StakeCacheMisses.Swap(0), - VoteMisses: VoteCacheMisses.Swap(0), - HugeMiss64Kto256K: HugeMiss64Kto256K.Swap(0), - HugeMiss256Kto1M: HugeMiss256Kto1M.Swap(0), - HugeMissOver1M: HugeMissOver1M.Swap(0), - SeenOnceFiltered: SeenOnceFiltered.Swap(0), - SeenOnceAdmitted: SeenOnceAdmitted.Swap(0), + SmallHits: SmallCacheHits.Swap(0), + MediumHits: MediumCacheHits.Swap(0), + HugeHits: HugeCacheHits.Swap(0), + StakeHits: StakeCacheHits.Swap(0), + VoteHits: VoteCacheHits.Swap(0), + ProgramHits: ProgramCacheHits.Swap(0), + SmallMisses: SmallCacheMisses.Swap(0), + MediumMisses: MediumCacheMisses.Swap(0), + HugeMisses: HugeCacheMisses.Swap(0), + StakeMisses: StakeCacheMisses.Swap(0), + VoteMisses: VoteCacheMisses.Swap(0), + ProgramMisses: ProgramCacheMisses.Swap(0), + ProgramMissUnder1M: ProgramMissUnder1M.Swap(0), + ProgramMissOver1M: ProgramMissOver1M.Swap(0), + HugeMiss64Kto256K: HugeMiss64Kto256K.Swap(0), + HugeMiss256Kto1M: HugeMiss256Kto1M.Swap(0), + HugeMissOver1M: HugeMissOver1M.Swap(0), + SeenOnceFiltered: SeenOnceFiltered.Swap(0), + SeenOnceAdmitted: SeenOnceAdmitted.Swap(0), } } // CacheFillStats holds current cache fill levels type CacheFillStats struct { - SmallSize, SmallCap int - MediumSize, MediumCap int - HugeSize, HugeCap int - StakeSize, StakeCap int - VoteSize, VoteCap int + SmallSize, SmallCap int + MediumSize, MediumCap int + HugeSize, HugeCap int + StakeSize, StakeCap int + VoteSize, VoteCap int + ProgramSize, ProgramCap int } // GetCacheFillStats returns current cache fill levels (size/capacity) func (accountsDb *AccountsDb) GetCacheFillStats() CacheFillStats { return CacheFillStats{ - SmallSize: accountsDb.SmallAcctCache.Size(), - SmallCap: accountsDb.SmallAcctCache.Capacity(), - MediumSize: accountsDb.MediumAcctCache.Size(), - MediumCap: accountsDb.MediumAcctCache.Capacity(), - HugeSize: accountsDb.HugeAcctCache.Size(), - HugeCap: accountsDb.HugeAcctCache.Capacity(), - StakeSize: accountsDb.StakeAcctCache.Size(), - StakeCap: accountsDb.StakeAcctCache.Capacity(), - VoteSize: accountsDb.VoteAcctCache.Size(), - VoteCap: accountsDb.VoteAcctCache.Capacity(), + SmallSize: accountsDb.SmallAcctCache.Size(), + SmallCap: accountsDb.SmallAcctCache.Capacity(), + MediumSize: accountsDb.MediumAcctCache.Size(), + MediumCap: accountsDb.MediumAcctCache.Capacity(), + HugeSize: accountsDb.HugeAcctCache.Size(), + HugeCap: accountsDb.HugeAcctCache.Capacity(), + StakeSize: accountsDb.StakeAcctCache.Size(), + StakeCap: accountsDb.StakeAcctCache.Capacity(), + VoteSize: accountsDb.VoteAcctCache.Size(), + VoteCap: accountsDb.VoteAcctCache.Capacity(), + ProgramSize: accountsDb.ProgramCache.Size(), + ProgramCap: accountsDb.ProgramCache.Capacity(), } } @@ -325,7 +342,23 @@ type ProgramCacheEntry struct { } func (accountsDb *AccountsDb) MaybeGetProgramFromCache(pubkey solana.PublicKey) (*ProgramCacheEntry, bool) { - return accountsDb.ProgramCache.Get(pubkey) + entry, found := accountsDb.ProgramCache.Get(pubkey) + if found { + ProgramCacheHits.Add(1) + } else { + ProgramCacheMisses.Add(1) + } + return entry, found +} + +// RecordProgramCacheMissSize records the size breakdown for a program cache miss. +// Call this after loading the program to track <1MB vs ≥1MB breakdown. +func RecordProgramCacheMissSize(programBytes uint64) { + if programBytes >= 1024*1024 { + ProgramMissOver1M.Add(1) + } else { + ProgramMissUnder1M.Add(1) + } } func (accountsDb *AccountsDb) AddProgramToCache(pubkey solana.PublicKey, programEntry *ProgramCacheEntry) { diff --git a/pkg/replay/block.go b/pkg/replay/block.go index c0feb462..ec14240c 100644 --- a/pkg/replay/block.go +++ b/pkg/replay/block.go @@ -1708,13 +1708,22 @@ func ReplayBlocks( cs.SmallMisses, cs.MediumMisses, cs.HugeMisses, cs.HugeMiss64Kto256K, cs.HugeMiss256Kto1M, cs.HugeMissOver1M, cs.StakeMisses, cs.VoteMisses) + // Program cache stats (compiled BPF programs) + if cs.ProgramHits+cs.ProgramMisses > 0 { + progHitRate := float64(cs.ProgramHits) / float64(cs.ProgramHits+cs.ProgramMisses) * 100 + mlog.Log.InfofPrecise(" program cache: %.1f%% hit (%d/%d) | miss by size: <1M:%d ≥1M:%d", + progHitRate, cs.ProgramHits, cs.ProgramHits+cs.ProgramMisses, + cs.ProgramMissUnder1M, cs.ProgramMissOver1M) + } + // Cache fill stats cf := acctsDb.GetCacheFillStats() - mlog.Log.InfofPrecise(" cache fill: small %d/%d (%.0f%%), medium %d/%d (%.0f%%), huge %d/%d (%.0f%%), stake %d/%d, vote %d/%d", + mlog.Log.InfofPrecise(" cache fill: small %d/%d (%.0f%%), medium %d/%d (%.0f%%), huge %d/%d (%.0f%%), stake %d/%d, vote %d/%d, program %d/%d (%.0f%%)", cf.SmallSize, cf.SmallCap, float64(cf.SmallSize)/float64(cf.SmallCap)*100, cf.MediumSize, cf.MediumCap, float64(cf.MediumSize)/float64(cf.MediumCap)*100, cf.HugeSize, cf.HugeCap, float64(cf.HugeSize)/float64(cf.HugeCap)*100, - cf.StakeSize, cf.StakeCap, cf.VoteSize, cf.VoteCap) + cf.StakeSize, cf.StakeCap, cf.VoteSize, cf.VoteCap, + cf.ProgramSize, cf.ProgramCap, float64(cf.ProgramSize)/float64(cf.ProgramCap)*100) // Seen-once filter stats (only show if filter is active) if cs.SeenOnceFiltered > 0 || cs.SeenOnceAdmitted > 0 { diff --git a/pkg/sealevel/bpf_loader.go b/pkg/sealevel/bpf_loader.go index 6cffc888..e04fa4e9 100644 --- a/pkg/sealevel/bpf_loader.go +++ b/pkg/sealevel/bpf_loader.go @@ -1169,6 +1169,7 @@ func BpfLoaderProgramExecute(execCtx *ExecutionCtx) error { programBytes = programAcct.Data() } programAcctKey = programAcct.Key() + accountsdb.RecordProgramCacheMissSize(uint64(len(programBytes))) } } else if programOwner == a.BpfLoaderUpgradeableAddr { var programAcctState *UpgradeableLoaderState @@ -1236,6 +1237,7 @@ func BpfLoaderProgramExecute(execCtx *ExecutionCtx) error { programAcctKey = programAcctState.Program.ProgramDataAddress programBytes = programDataAcct.Data[upgradeableLoaderSizeOfProgramDataMetaData:] metrics.GlobalBlockReplay.GetProgramDataUncachedMarshal.AddTimingSince(start) + accountsdb.RecordProgramCacheMissSize(uint64(len(programBytes))) } } else { return InstrErrUnsupportedProgramId diff --git a/pkg/sealevel/loader_v4.go b/pkg/sealevel/loader_v4.go index 5e029e46..6624a7f3 100644 --- a/pkg/sealevel/loader_v4.go +++ b/pkg/sealevel/loader_v4.go @@ -305,6 +305,7 @@ func LoaderV4Execute(execCtx *ExecutionCtx) error { } programBytes = programDataAcct.Data[loaderV4ProgramDataOffset:] + accountsdb.RecordProgramCacheMissSize(uint64(len(programBytes))) } syscallRegistry := sbpf.SyscallRegistry(func(u uint32) (sbpf.Syscall, bool) { From ed8c17ae545eee3eeb49100062c499f62eebcb1f Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Wed, 14 Jan 2026 11:49:41 -0600 Subject: [PATCH 27/28] fix: add net/http/pprof import to enable /debug/pprof/* handlers The pprof HTTP server was starting but missing the side-effect import that registers the standard pprof handlers (/debug/pprof/profile, /debug/pprof/heap, etc). Only custom handlers were available. Co-Authored-By: Claude Opus 4.5 --- cmd/mithril/node/pprof.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/mithril/node/pprof.go b/cmd/mithril/node/pprof.go index 75030dd4..001ec46e 100644 --- a/cmd/mithril/node/pprof.go +++ b/cmd/mithril/node/pprof.go @@ -3,6 +3,7 @@ package node import ( "fmt" "net/http" + _ "net/http/pprof" // registers /debug/pprof/* handlers "runtime" "strconv" "time" From 12377aab96a5cf6e0af240196243cba3ac84eacc Mon Sep 17 00:00:00 2001 From: 7layermagik <7layermagik@users.noreply.github.com> Date: Wed, 14 Jan 2026 12:00:03 -0600 Subject: [PATCH 28/28] Add pprof HTTP server support to mithril run command - Add --pprof-port flag to Run command (was only on verify-range) - Call startPprofHandlers() in runLive() when pprof port is configured - Fix config.example.toml to use [development] section to match config.go (was [tuning] which didn't match the Go struct mapstructure tags) Co-Authored-By: Claude Opus 4.5 --- cmd/mithril/node/node.go | 6 ++++++ config.example.toml | 12 ++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/cmd/mithril/node/node.go b/cmd/mithril/node/node.go index 29903c58..c8ad6cf5 100644 --- a/cmd/mithril/node/node.go +++ b/cmd/mithril/node/node.go @@ -193,6 +193,7 @@ func init() { Run.Flags().BoolVar(&sbpf.UsePool, "use-pool", true, "Disable to allocate fresh slices") // [tuning.pprof] section flags + Run.Flags().Int64Var(&pprofPort, "pprof-port", -1, "Port to serve HTTP pprof endpoint") Run.Flags().StringVar(&cpuprofPath, "cpu-profile-path", "", "Filename to write CPU profile") // [debug] section flags @@ -1083,6 +1084,11 @@ func runLive(c *cobra.Command, args []string) { // Now start the metrics server (after banner so errors don't appear first) statsd.StartMetricsServer() + // Start pprof HTTP server if configured + if pprofPort != -1 { + startPprofHandlers(int(pprofPort)) + } + // Determine if using Lightbringer based on block source // NOTE: Lightbringer mode is TEMPORARILY DISABLED. The background block downloader that // wrote Lightbringer blocks to disk was removed due to reliability issues (panics, race conditions). diff --git a/config.example.toml b/config.example.toml index c1a5c871..5d73152c 100644 --- a/config.example.toml +++ b/config.example.toml @@ -232,13 +232,13 @@ name = "mithril" port = 8899 # ============================================================================ -# [tuning] - Performance Tuning & Profiling +# [development] - Performance Tuning & Profiling # ============================================================================ # # Advanced settings for optimizing Mithril's performance. # The defaults work well for most deployments. -[tuning] +[development] # Zstd decoder concurrency (defaults to NumCPU) # zstd_decoder_concurrency = 16 @@ -254,8 +254,8 @@ name = "mithril" # Enable/disable pool allocator for slices use_pool = true - # [tuning.pprof] - CPU/Memory Profiling - [tuning.pprof] + # [development.pprof] - CPU/Memory Profiling + [development.pprof] # Port to serve HTTP pprof endpoint (-1 to disable) # Access at http://localhost:PORT/debug/pprof/ # port = 6060 @@ -263,7 +263,7 @@ name = "mithril" # Filename to write CPU profile (for offline analysis) # cpu_profile_path = "/tmp/cpuprof.pprof" - # [tuning.cache] - AccountsDB LRU Cache Sizes + # [development.cache] - AccountsDB LRU Cache Sizes # # These control the LRU caches for fast account data reads during replay. # Values are NUMBER OF ENTRIES, not bytes. @@ -275,7 +275,7 @@ name = "mithril" # # Larger caches = fewer disk reads, but more memory usage. # Memory per entry is ~200-1000 bytes depending on account data size. - [tuning.cache] + [development.cache] # Vote account data cache - number of entries (frequently accessed during replay) vote_acct_lru = 5000