2025-03-01 18:41:36 +01:00

216 lines
6.4 KiB
Go

// Copyright (c) HashiCorp, Inc
// SPDX-License-Identifier: MPL-2.0
package wal
import (
"sync/atomic"
"github.com/benbjohnson/immutable"
"github.com/hashicorp/raft-wal/types"
)
// state is an immutable snapshot of the state of the log. Modifications must be
// made by copying and modifying the copy. This is easy enough because segments
// is an immutable map so changing and re-assigning to the clone won't impact
// the original map, and tail is just a pointer that can be mutated in the
// shallow clone. Note that methods called on the tail segmentWriter may mutate
// it's state so must only be called while holding the WAL's writeLock.
type state struct {
// refCount tracks readers that are reading segments based on this metadata.
// It is accessed atomically nd must be 64 bit aligned (i.e. leave it at the
// start of the struct).
refCount int32
// finaliser is set at most once while WAL is holding the write lock in order
// to provide a func that must be called when all current readers are done
// with this state. It's used for deferring closing and deleting old segments
// until we can be sure no reads are still in progress on them.
finalizer atomic.Value // func()
nextSegmentID uint64
// nextBaseIndex is used to signal which baseIndex to use next if there are no
// segments or current tail.
nextBaseIndex uint64
segments *immutable.SortedMap[uint64, segmentState]
tail types.SegmentWriter
}
type segmentState struct {
types.SegmentInfo
// r is the SegmentReader for our in-memory state.
r types.SegmentReader
}
// Commit converts the in-memory state into a PersistentState.
func (s *state) Persistent() types.PersistentState {
segs := make([]types.SegmentInfo, 0, s.segments.Len())
it := s.segments.Iterator()
for !it.Done() {
_, s, _ := it.Next()
segs = append(segs, s.SegmentInfo)
}
return types.PersistentState{
NextSegmentID: s.nextSegmentID,
Segments: segs,
}
}
func (s *state) getLog(index uint64) (*types.PooledBuffer, error) {
// Check the tail writer first
if s.tail != nil {
raw, err := s.tail.GetLog(index)
if err != nil && err != ErrNotFound {
// Return actual errors since they might mask the fact that index really
// is in the tail but failed to read for some other reason.
return nil, err
}
if err == nil {
// No error means we found it and just need to decode.
return raw, nil
}
// Not in the tail segment, fall back to searching previous segments.
}
seg, err := s.findSegmentReader(index)
if err != nil {
return nil, err
}
return seg.GetLog(index)
}
// findSegmentReader searches the segment tree for the segment that contains the
// log at index idx. It may return the tail segment which may not in fact
// contain idx if idx is larger than the last written index. Typically this is
// called after already checking with the tail writer whether the log is in
// there which means the caller can be sure it's not going to return the tail
// segment.
func (s *state) findSegmentReader(idx uint64) (types.SegmentReader, error) {
if s.segments.Len() == 0 {
return nil, ErrNotFound
}
// Search for a segment with baseIndex.
it := s.segments.Iterator()
// The baseIndex we want is the first one lower or equal to idx. Seek gets us
// to the first result equal or greater so we are either at it (if equal) or
// on the one _after_ the one we need. We step back since that's most likely
it.Seek(idx)
// The first call to Next/Prev actually returns the node the iterator is
// currently on (which is probably the one after the one we want) but in some
// edge cases we might actually want this one. Rather than reversing back and
// coming forward again, just check both this and the one before it.
_, seg, ok := it.Prev()
if ok && seg.BaseIndex > idx {
_, seg, ok = it.Prev()
}
// We either have the right segment or it doesn't exist.
if ok && seg.MinIndex <= idx && (seg.MaxIndex == 0 || seg.MaxIndex >= idx) {
return seg.r, nil
}
return nil, ErrNotFound
}
func (s *state) getTailInfo() *segmentState {
it := s.segments.Iterator()
it.Last()
_, tail, ok := it.Next()
if !ok {
return nil
}
return &tail
}
func (s *state) append(entries []types.LogEntry) error {
return s.tail.Append(entries)
}
func (s *state) firstIndex() uint64 {
it := s.segments.Iterator()
_, seg, ok := it.Next()
if !ok {
return 0
}
if seg.SealTime.IsZero() {
// First segment is unsealed so is also the tail. Check it actually has at
// least one log in otherwise it doesn't matter what the BaseIndex/MinIndex
// are.
if s.tail.LastIndex() == 0 {
// No logs in the WAL
return 0
}
// At least one log exists, return the MinIndex
}
return seg.MinIndex
}
func (s *state) lastIndex() uint64 {
tailIdx := s.tail.LastIndex()
if tailIdx > 0 {
return tailIdx
}
// Current tail is empty. Check there are previous sealed segments.
it := s.segments.Iterator()
it.Last()
_, _, ok := it.Prev()
if !ok {
// No tail! shouldn't be possible but means no logs yet
return 0
}
// Go back to the segment before the tail
_, _, ok = it.Prev()
if !ok {
// No previous segment so the whole log is empty
return 0
}
// There was a previous segment so it's MaxIndex will be one less than the
// tail's BaseIndex.
tailSeg := s.getTailInfo()
if tailSeg == nil || tailSeg.BaseIndex == 0 {
return 0
}
return tailSeg.BaseIndex - 1
}
func (s *state) acquire() func() {
atomic.AddInt32(&s.refCount, 1)
return s.release
}
func (s *state) release() {
// decrement on release
new := atomic.AddInt32(&s.refCount, -1)
if new == 0 {
// Cleanup state associated with this version now all refs have gone. Since
// there are no more refs and we should not set a finalizer until this state
// is no longer the active state, we can be sure this will happen only one.
// Even still lets swap the fn to ensure we only call finalizer once ever!
// We can't swap actual nil as it's not the same type as func() so do a
// dance with a nilFn below.
var nilFn func()
fnRaw := s.finalizer.Swap(nilFn)
if fn, ok := fnRaw.(func()); ok && fn != nil {
fn()
}
}
}
// clone returns a new state which is a shallow copy of just the immutable parts
// of s. This is safer than a simple assignment copy because that "reads" the
// atomically modified state non-atomically. We never want to copy the refCount
// or finalizer anyway.
func (s *state) clone() state {
return state{
nextSegmentID: s.nextSegmentID,
segments: s.segments,
tail: s.tail,
}
}