Grosse refonte
This commit is contained in:
parent
76e8d66875
commit
61c54f3d8f
@ -1,22 +1,11 @@
|
||||
package domain
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"go.uber.org/zap"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
"libvirt.org/go/libvirt"
|
||||
|
||||
"deevirt.fr/compute/pkg/api/proto"
|
||||
"deevirt.fr/compute/pkg/api/raft"
|
||||
"deevirt.fr/compute/pkg/config"
|
||||
"deevirt.fr/compute/pkg/scheduler"
|
||||
deevirt_schema "deevirt.fr/compute/pkg/schema/deevirt"
|
||||
"deevirt.fr/compute/pkg/raft"
|
||||
)
|
||||
|
||||
type Domain struct {
|
||||
@ -26,8 +15,8 @@ type Domain struct {
|
||||
proto.UnimplementedDomainServer
|
||||
}
|
||||
|
||||
func (d *Domain) connectNode(NodeId string) (*libvirt.Connect, error) {
|
||||
var jCluster deevirt_schema.NodeStore
|
||||
/*func (d *Domain) connectNode(NodeId string) (*libvirt.Connect, error) {
|
||||
var jCluster schema.NodeStore
|
||||
cluster, _ := d.Store.Get("/etc/libvirt/cluster")
|
||||
json.Unmarshal(cluster, &jCluster)
|
||||
|
||||
@ -51,7 +40,7 @@ func (d *Domain) connectDomain(ctx context.Context, domainID string) (string, *l
|
||||
DomainId: domainID,
|
||||
})
|
||||
|
||||
var jCluster deevirt_schema.NodeStore
|
||||
var jCluster schema.NodeStore
|
||||
cluster, _ := d.Store.Get("/etc/libvirt/cluster")
|
||||
json.Unmarshal(cluster, &jCluster)
|
||||
|
||||
@ -72,15 +61,15 @@ func (d *Domain) List(ctx context.Context, in *proto.DomainListAllRequest) (*pro
|
||||
}
|
||||
|
||||
for domId, data := range domains {
|
||||
domData := deevirt_schema.DomainStore{}
|
||||
domData := schema.Domain{}
|
||||
json.Unmarshal(data, &domData)
|
||||
|
||||
nodeData, _ := d.Store.Get(fmt.Sprintf("/etc/libvirt/%s/%s/%s", domData.Type, domData.NodeId, domId))
|
||||
domNodeData := deevirt_schema.DomainToNodeStore{}
|
||||
domNodeData := schema.DomainToNode{}
|
||||
json.Unmarshal(nodeData, &domNodeData)
|
||||
|
||||
domainsListResponse = append(domainsListResponse, &proto.DomainListResponse{
|
||||
NodeId: domData.NodeId,
|
||||
//NodeId: domData.NodeId,
|
||||
DomainId: domId,
|
||||
Config: string(domData.Config),
|
||||
State: int64(domNodeData.State),
|
||||
@ -100,11 +89,11 @@ func (d *Domain) Get(ctx context.Context, req *proto.DomainListRequest) (*proto.
|
||||
return nil, status.Errorf(codes.Internal, "Error read a store %v", err)
|
||||
}
|
||||
|
||||
domData := deevirt_schema.DomainStore{}
|
||||
domData := deevirt_schema.Domain{}
|
||||
json.Unmarshal(domain, &domData)
|
||||
|
||||
nodeData, _ := d.Store.Get(fmt.Sprintf("/etc/libvirt/%s/%s/%s", domData.Type, domData.NodeId, req.DomainId))
|
||||
domNodeData := deevirt_schema.DomainToNodeStore{}
|
||||
domNodeData := deevirt_schema.DomainToNode{}
|
||||
json.Unmarshal(nodeData, &domNodeData)
|
||||
|
||||
domainsListResponse = proto.DomainListResponse{
|
||||
@ -198,4 +187,4 @@ func (d *Domain) Migrate(in *proto.DomainMigrateRequest, stream proto.Domain_Mig
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
}
|
||||
}*/
|
@ -3,15 +3,8 @@ package domain
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"deevirt.fr/compute/pkg/amqp"
|
||||
"deevirt.fr/compute/pkg/api/proto"
|
||||
"deevirt.fr/compute/pkg/schema"
|
||||
deevirt_schema "deevirt.fr/compute/pkg/schema/deevirt"
|
||||
"libvirt.org/go/libvirt"
|
||||
)
|
||||
|
||||
@ -20,7 +13,7 @@ type EventsDetail map[string]string
|
||||
func (d *Domain) domainEventLifecycle(nodeId string, domainId string, state int64, event *libvirt.DomainEventLifecycle) {
|
||||
d.Logger.Sugar().Infof("%s => %s: Evènement %v", nodeId, domainId, event)
|
||||
|
||||
domStore := deevirt_schema.DomainStore{}
|
||||
/*domStore := schema.Domain{}
|
||||
domData, err := d.Store.Get(fmt.Sprintf("/etc/libvirt/domain/%s", domainId))
|
||||
if err != nil || len(domData) == 0 {
|
||||
d.Logger.Sugar().Errorf("Critique !!, la VM %s n'existe pas ou comporte une erreur importante !", domainId)
|
||||
@ -31,7 +24,7 @@ func (d *Domain) domainEventLifecycle(nodeId string, domainId string, state int6
|
||||
case libvirt.DOMAIN_EVENT_DEFINED:
|
||||
// Changement de noeud !
|
||||
oldNodeId := strings.Clone(domStore.NodeId)
|
||||
dom2node, _ := json.Marshal(deevirt_schema.DomainToNodeStore{
|
||||
dom2node, _ := json.Marshal(deevirt_schema.DomainToNode{
|
||||
State: int(state),
|
||||
})
|
||||
d.Store.Set(fmt.Sprintf("/etc/libvirt/domain/qemu/%s/%s", nodeId, domainId), dom2node)
|
||||
@ -65,7 +58,7 @@ func (d *Domain) domainEventLifecycle(nodeId string, domainId string, state int6
|
||||
|
||||
// MAJ de l'état
|
||||
nodeData, _ := d.Store.Get(fmt.Sprintf("/etc/libvirt/qemu/%s/%s", nodeId, domainId))
|
||||
domNodeData := deevirt_schema.DomainToNodeStore{}
|
||||
domNodeData := deevirt_schema.DomainToNode{}
|
||||
json.Unmarshal(nodeData, &domNodeData)
|
||||
|
||||
domNodeData.State = int(state)
|
||||
@ -92,7 +85,7 @@ func (d *Domain) domainEventLifecycle(nodeId string, domainId string, state int6
|
||||
"."+desc.Metadata.DeevirtInstance.DeevirtDatacenterID+
|
||||
"."+domainId,
|
||||
e)
|
||||
defer a.Close()
|
||||
defer a.Close()*/
|
||||
}
|
||||
|
||||
func (d *Domain) Event(ctx context.Context, req *proto.DomainEventRequest) (*proto.DomainEventResponse, error) {
|
105
cmd/mgr/main.go
105
cmd/mgr/main.go
@ -1,9 +1,108 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"deevirt.fr/compute/pkg/api"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
raft_hashicorp "github.com/hashicorp/raft"
|
||||
"go.uber.org/zap"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials"
|
||||
"google.golang.org/grpc/reflection"
|
||||
|
||||
"deevirt.fr/compute/cmd/mgr/domain"
|
||||
"deevirt.fr/compute/cmd/mgr/node"
|
||||
"deevirt.fr/compute/cmd/mgr/worker"
|
||||
|
||||
pb "deevirt.fr/compute/pkg/api/proto"
|
||||
"deevirt.fr/compute/pkg/config"
|
||||
"deevirt.fr/compute/pkg/raft"
|
||||
)
|
||||
|
||||
func main() {
|
||||
api.Server()
|
||||
func createGRPCServer(conf *config.Config) *grpc.Server {
|
||||
if conf.Manager.TlsKey != "" {
|
||||
cert, err := tls.LoadX509KeyPair(conf.Manager.TlsCert, conf.Manager.TlsKey)
|
||||
if err != nil {
|
||||
log.Fatalf("Erreur chargement du certificat: %v", err)
|
||||
}
|
||||
|
||||
// Charger la CA (facultatif, pour la vérification des clients)
|
||||
caCert, err := os.ReadFile(conf.Manager.TlsCert)
|
||||
if err != nil {
|
||||
log.Fatalf("Erreur chargement CA: %v", err)
|
||||
}
|
||||
certPool := x509.NewCertPool()
|
||||
certPool.AppendCertsFromPEM(caCert)
|
||||
|
||||
// Créer les credentials TLS
|
||||
creds := credentials.NewTLS(&tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
ClientCAs: certPool,
|
||||
ClientAuth: tls.RequireAndVerifyClientCert, // Authentification mutuelle (mTLS),
|
||||
})
|
||||
|
||||
return grpc.NewServer(grpc.Creds(creds))
|
||||
}
|
||||
|
||||
return grpc.NewServer()
|
||||
}
|
||||
|
||||
func main() {
|
||||
logger, _ := zap.NewProduction()
|
||||
|
||||
// Récupération de la configuration deevirt
|
||||
conf, err := config.New()
|
||||
if err != nil {
|
||||
log.Fatalf("failed load configuration: %v", err)
|
||||
}
|
||||
|
||||
sock, err := net.Listen("tcp", fmt.Sprintf(":%d", 4480))
|
||||
if err != nil {
|
||||
log.Fatalf("failed to listen: %v", err)
|
||||
}
|
||||
|
||||
r := raft.New(conf)
|
||||
|
||||
s, tm, err := r.Open()
|
||||
if err != nil {
|
||||
log.Fatalf("failed to start raft: %v", err)
|
||||
}
|
||||
|
||||
// Observer pour surveiller les changements d'état
|
||||
stateCh := make(chan raft_hashicorp.Observation, 1) // Canal de type raft.Observation
|
||||
s.Raft.RegisterObserver(raft_hashicorp.NewObserver(stateCh, true, nil))
|
||||
|
||||
nodes := &worker.RaftNode{
|
||||
Bootstrap: false,
|
||||
Store: s,
|
||||
NodeID: conf.NodeID,
|
||||
StateCh: stateCh,
|
||||
}
|
||||
|
||||
go nodes.WatchStateChanges()
|
||||
|
||||
// On temporise 5 secondes, le temps de laisser la reprise des logs
|
||||
time.Sleep(5 * time.Second)
|
||||
server := createGRPCServer(conf)
|
||||
pb.RegisterNodeServer(server, &node.Node{
|
||||
Config: conf,
|
||||
Store: r,
|
||||
})
|
||||
pb.RegisterDomainServer(server, &domain.Domain{
|
||||
Config: conf,
|
||||
Store: r,
|
||||
Logger: logger,
|
||||
})
|
||||
tm.Register(server)
|
||||
//leaderhealth.Setup(r, s, []string{"Example"})
|
||||
raft.Register(server, r.Raft)
|
||||
reflection.Register(server)
|
||||
if err := server.Serve(sock); err != nil {
|
||||
log.Fatalf("failed to serve: %v", err)
|
||||
}
|
||||
}
|
||||
|
@ -2,13 +2,14 @@ package node
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
|
||||
"deevirt.fr/compute/pkg/api/proto"
|
||||
"deevirt.fr/compute/pkg/api/raft"
|
||||
"deevirt.fr/compute/pkg/config"
|
||||
deevirt_schema "deevirt.fr/compute/pkg/schema/deevirt"
|
||||
"deevirt.fr/compute/pkg/raft"
|
||||
"deevirt.fr/compute/pkg/schema"
|
||||
)
|
||||
|
||||
type Node struct {
|
||||
@ -29,9 +30,11 @@ func (n *Node) Alive(stream proto.Node_AliveServer) error {
|
||||
|
||||
log.Printf("Received heartbeat: %v", req)
|
||||
|
||||
cluster := deevirt_schema.NodeStore{}
|
||||
cluster := schema.NodeStore{}
|
||||
println("on reçit une demande")
|
||||
res, _ := n.Store.Get("/etc/libvirt/cluster")
|
||||
json.Unmarshal(res, &cluster)
|
||||
fmt.Printf("%v\n", res)
|
||||
cluster[n.Config.NodeID].LastUpdate = req.Timestamp
|
||||
|
||||
d, _ := json.Marshal(cluster)
|
@ -1,43 +1,47 @@
|
||||
package raft
|
||||
package worker
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
raft_hashicorp "github.com/hashicorp/raft"
|
||||
"google.golang.org/protobuf/types/known/timestamppb"
|
||||
"libvirt.org/go/libvirt"
|
||||
|
||||
"deevirt.fr/compute/pkg/config"
|
||||
etcd_client "deevirt.fr/compute/pkg/etcd"
|
||||
deevirt_schema "deevirt.fr/compute/pkg/schema/deevirt"
|
||||
"deevirt.fr/compute/pkg/raft"
|
||||
"deevirt.fr/compute/pkg/schema"
|
||||
//"deevirt.fr/compute/pkg/scheduler"
|
||||
)
|
||||
|
||||
type RaftNode struct {
|
||||
Bootstrap bool
|
||||
Raft *raft_hashicorp.Raft
|
||||
Store *Store
|
||||
Conf *config.Config
|
||||
Store *raft.Store
|
||||
NodeID string
|
||||
StateCh chan raft_hashicorp.Observation
|
||||
}
|
||||
|
||||
func (n *RaftNode) init() {
|
||||
println("bootstrap :")
|
||||
nodes := make(deevirt_schema.NodeStore)
|
||||
nodes := make(schema.NodeStore)
|
||||
|
||||
// Récupération des Noeuds ID
|
||||
etcd, _ := etcd_client.New(n.Store.conf.EtcdURI)
|
||||
etcd, _ := etcd_client.New(n.Conf.EtcdURI)
|
||||
defer etcd.Close()
|
||||
|
||||
for key, value := range etcd_client.GetNodes(etcd, n.Store.conf.ClusterID) {
|
||||
for key, value := range etcd_client.GetNodes(etcd, n.Conf.ClusterID) {
|
||||
var libvirt_uri string
|
||||
|
||||
nodes[key] = &deevirt_schema.NodeStoreInfo{
|
||||
nodes[key] = &schema.NodeStoreInfo{
|
||||
IpManagement: value.IpManagement,
|
||||
}
|
||||
|
||||
if n.Store.conf.LibvirtTLS {
|
||||
if n.Conf.LibvirtTLS {
|
||||
libvirt_uri = fmt.Sprintf("qemu+tls://%s/system", value.IpManagement)
|
||||
} else {
|
||||
libvirt_uri = fmt.Sprintf("qemu+tcp://%s/system", value.IpManagement)
|
||||
@ -49,28 +53,42 @@ func (n *RaftNode) init() {
|
||||
}
|
||||
defer c.Close()
|
||||
|
||||
// On récupère la liste des domaines.
|
||||
getDomains, _ := c.ListAllDomains(libvirt.CONNECT_LIST_DOMAINS_PERSISTENT)
|
||||
for _, domain := range getDomains {
|
||||
conf, _ := domain.GetXMLDesc(libvirt.DOMAIN_XML_INACTIVE)
|
||||
uuid, _ := domain.GetUUIDString()
|
||||
state, _, _ := domain.GetState()
|
||||
|
||||
dStore, _ := json.Marshal(deevirt_schema.DomainStore{
|
||||
NodeId: key,
|
||||
Type: "qemu",
|
||||
Config: []byte(conf),
|
||||
})
|
||||
n.Store.Set(fmt.Sprintf("/etc/libvirt/domain/%s", uuid), dStore)
|
||||
// On enregistre la configuration
|
||||
domainStore, _ := json.Marshal(schema.Domain{
|
||||
|
||||
dDomainToNode, _ := json.Marshal(deevirt_schema.DomainToNodeStore{
|
||||
State: int(state),
|
||||
Type: "qemu",
|
||||
Config: base64.StdEncoding.EncodeToString([]byte(conf)),
|
||||
})
|
||||
n.Store.Set(fmt.Sprintf("/etc/libvirt/qemu/%s/%s", key, uuid), dDomainToNode)
|
||||
n.Store.Set(fmt.Sprintf("/domain/%s", uuid), domainStore)
|
||||
|
||||
// On enregistre le noeud
|
||||
domainStateStore, _ := json.Marshal(schema.DomainNode{
|
||||
NodeId: key,
|
||||
State: int(state),
|
||||
})
|
||||
n.Store.Set(fmt.Sprintf("/domain/%s/node", uuid), domainStateStore)
|
||||
|
||||
// On associe au noeud
|
||||
currentTime := time.Now()
|
||||
newTime := currentTime.Add(3600 * time.Second) // On ajoute 3600 secondes pour permettre au moniteur de se synchroniser
|
||||
|
||||
DomainLibvirtStore, _ := json.Marshal(schema.DomainLock{
|
||||
LifeCycle: int(state),
|
||||
Expiry: timestamppb.New(newTime),
|
||||
})
|
||||
n.Store.Set(fmt.Sprintf("/etc/libvirt/qemu/%s/%s", key, uuid), DomainLibvirtStore)
|
||||
}
|
||||
}
|
||||
|
||||
jNodes, _ := json.Marshal(nodes)
|
||||
n.Store.Set("/etc/libvirt/cluster", jNodes)
|
||||
n.Store.Set("/cluster", jNodes)
|
||||
}
|
||||
|
||||
// Fonction pour surveiller et afficher les changements d'état
|
||||
@ -80,33 +98,35 @@ func (n *RaftNode) WatchStateChanges() {
|
||||
for obs := range n.StateCh {
|
||||
switch evt := obs.Data.(type) {
|
||||
case raft_hashicorp.RaftState:
|
||||
log.Println("[ÉVÉNEMENT] Changement d'état Raft :", evt)
|
||||
|
||||
if evt == raft_hashicorp.Leader {
|
||||
log.Println("[ÉVÉNEMENT] Changement d'état Raft :", evt)
|
||||
if n.Bootstrap {
|
||||
n.init()
|
||||
}
|
||||
|
||||
// On attend une seconde avant de démarrer le worker
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
// On attend que les logs soient synchronisés !
|
||||
barrier := n.Store.Raft.Barrier(10 * time.Second)
|
||||
if err := barrier.Error(); err != nil {
|
||||
return
|
||||
}
|
||||
log.Println("Démarrage du worker !")
|
||||
worker.Start()
|
||||
} else {
|
||||
worker.Stop()
|
||||
}
|
||||
|
||||
log.Println("[ÉVÉNEMENT] Changement d'état Raft :", evt)
|
||||
case raft_hashicorp.LeaderObservation:
|
||||
log.Println("[ÉVÉNEMENT] Le leader est", evt.LeaderID)
|
||||
case raft_hashicorp.PeerObservation:
|
||||
if n.Raft.State() == raft_hashicorp.Leader {
|
||||
if n.Store.Raft.State() == raft_hashicorp.Leader {
|
||||
peerID := evt.Peer.ID
|
||||
peerAddr := evt.Peer.Address
|
||||
|
||||
log.Println("[NOUVEAU NŒUD] Détection de", peerID, "à", peerAddr)
|
||||
log.Println("[ACTION] Ajout automatique en tant que voter...")
|
||||
|
||||
future := n.Raft.AddVoter(peerID, peerAddr, 0, 0)
|
||||
future := n.Store.Raft.AddVoter(peerID, peerAddr, 0, 0)
|
||||
if err := future.Error(); err != nil {
|
||||
log.Println("[ERREUR] Impossible d'ajouter", peerID, ":", err)
|
||||
} else {
|
@ -1,4 +1,4 @@
|
||||
package raft
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
@ -11,10 +11,11 @@ import (
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
"deevirt.fr/compute/pkg/api/libvirt"
|
||||
"deevirt.fr/compute/pkg/config"
|
||||
"deevirt.fr/compute/pkg/scheduler"
|
||||
deevirt_schema "deevirt.fr/compute/pkg/schema/deevirt"
|
||||
"deevirt.fr/compute/pkg/libvirt"
|
||||
scheduler "deevirt.fr/compute/pkg/metrics"
|
||||
"deevirt.fr/compute/pkg/raft"
|
||||
"deevirt.fr/compute/pkg/schema"
|
||||
)
|
||||
|
||||
type Worker struct {
|
||||
@ -22,14 +23,14 @@ type Worker struct {
|
||||
cancel context.CancelFunc
|
||||
cancelled bool
|
||||
|
||||
store *Store
|
||||
store *raft.Store
|
||||
|
||||
config *config.Config
|
||||
nodes deevirt_schema.NodeStore
|
||||
nodes schema.NodeStore
|
||||
log *zap.SugaredLogger
|
||||
}
|
||||
|
||||
func NewWorker(r *Store) (*Worker, error) {
|
||||
func NewWorker(r *raft.Store) (*Worker, error) {
|
||||
config, _ := config.New()
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
@ -133,7 +134,7 @@ On controle périodiquement l'accessibilité à libvirt, indépendamment du prog
|
||||
Cette vérification assure un double controle pour la HA.
|
||||
*/
|
||||
func (w *Worker) handleLibvirtControl() {
|
||||
var nodes deevirt_schema.NodeStore
|
||||
var nodes schema.NodeStore
|
||||
cluster, err := w.store.Get("/etc/libvirt/cluster")
|
||||
if err != nil {
|
||||
w.log.Errorf("Erreur lors de la récupération des données de cluster: %v", err)
|
||||
@ -149,7 +150,7 @@ func (w *Worker) handleLibvirtControl() {
|
||||
|
||||
for _, conf := range nodes {
|
||||
// Créer une connexion à libvirt
|
||||
c, err := libvirt.New(conf.IpManagement, w.store.conf.LibvirtTLS)
|
||||
c, err := libvirt.New(conf.IpManagement, w.config.LibvirtTLS)
|
||||
if err != nil {
|
||||
w.log.Warnf("Impossible de créer la connexion libvirt pour %s: %v", conf.IpManagement, err)
|
||||
//conf.Alive = false
|
@ -17,13 +17,13 @@ import (
|
||||
"deevirt.fr/compute/pkg/api/libvirt"
|
||||
pb "deevirt.fr/compute/pkg/api/proto"
|
||||
"deevirt.fr/compute/pkg/config"
|
||||
deevirt_schema "deevirt.fr/compute/pkg/schema/deevirt"
|
||||
"deevirt.fr/compute/pkg/schema"
|
||||
)
|
||||
|
||||
type qemu struct {
|
||||
clientVirt *go_libvirt.Connect
|
||||
config *config.Config
|
||||
nodes deevirt_schema.NodeStore
|
||||
nodes schema.NodeStore
|
||||
}
|
||||
|
||||
func NewQemu(c *go_libvirt.Connect) qemu {
|
||||
@ -47,13 +47,18 @@ func (q qemu) stonith(ctx context.Context) {
|
||||
log.Printf("L'accessibilité avec les manager est revenue, la procédure d'urgence est avortée.")
|
||||
return
|
||||
case <-time.After(10 * time.Second):
|
||||
// On controle l'accessibilité des autres serveurs via libvirt, si un serveur est accessible, on peut supposer un problème avec le manager
|
||||
for _, domData := range q.nodes {
|
||||
_, err := libvirt.New(domData.IpManagement, q.config.LibvirtTLS)
|
||||
if err == nil {
|
||||
log.Printf("Au moins un noeud est joignable, la procédure d'urgence est avortée.")
|
||||
return
|
||||
if len(q.nodes) > 0 {
|
||||
// On controle l'accessibilité des autres serveurs via libvirt, si un serveur est accessible, on peut supposer un problème avec le manager
|
||||
for _, domData := range q.nodes {
|
||||
_, err := libvirt.New(domData.IpManagement, q.config.LibvirtTLS)
|
||||
if err == nil {
|
||||
log.Printf("Au moins un noeud est joignable, la procédure d'urgence est avortée.")
|
||||
return
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.Printf("Le noeud est indépendant, on avorte")
|
||||
return
|
||||
}
|
||||
|
||||
// Manager inaccessible et autres noeuds libvirt aussi
|
||||
@ -110,7 +115,7 @@ func (q qemu) heartbeat() {
|
||||
log.Println("🔌 Connexion fermée par le serveur")
|
||||
break
|
||||
} else {
|
||||
nodeStore := deevirt_schema.NodeStore{}
|
||||
nodeStore := schema.NodeStore{}
|
||||
json.Unmarshal(resp.Nodes, &nodeStore)
|
||||
q.nodes = nodeStore
|
||||
}
|
||||
@ -140,7 +145,7 @@ func (q qemu) heartbeat() {
|
||||
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Second)
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -240,7 +240,7 @@ func CollectDomain(ch chan<- prometheus.Metric, stat libvirt.DomainStats, hostna
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var desc schema.Domain
|
||||
var desc schema.DomainXML
|
||||
err = xml.Unmarshal([]byte(xmlDesc), &desc)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -297,7 +297,7 @@ func CollectDomain(ch chan<- prometheus.Metric, stat libvirt.DomainStats, hostna
|
||||
return nil
|
||||
}
|
||||
|
||||
func CollectDomainVCPU(ch chan<- prometheus.Metric, stat []libvirt.DomainStatsVcpu, hostname string, domainUUID string, desc schema.Domain) {
|
||||
func CollectDomainVCPU(ch chan<- prometheus.Metric, stat []libvirt.DomainStatsVcpu, hostname string, domainUUID string, desc schema.DomainXML) {
|
||||
for idx, vcpu := range stat {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
libvirtDomainVcpuState,
|
||||
@ -335,7 +335,7 @@ func CollectDomainVCPU(ch chan<- prometheus.Metric, stat []libvirt.DomainStatsVc
|
||||
}
|
||||
}
|
||||
|
||||
func CollectDomainBalloon(ch chan<- prometheus.Metric, stat *libvirt.DomainStatsBalloon, hostname string, domainUUID string, desc schema.Domain) {
|
||||
func CollectDomainBalloon(ch chan<- prometheus.Metric, stat *libvirt.DomainStatsBalloon, hostname string, domainUUID string, desc schema.DomainXML) {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
libvirtDomainBalloonStatCurrentBytes,
|
||||
prometheus.GaugeValue,
|
||||
@ -416,7 +416,7 @@ func CollectDomainBalloon(ch chan<- prometheus.Metric, stat *libvirt.DomainStats
|
||||
|
||||
}
|
||||
|
||||
func CollectDomainBlock(ch chan<- prometheus.Metric, stat []libvirt.DomainStatsBlock, hostname string, domainUUID string, desc schema.Domain) {
|
||||
func CollectDomainBlock(ch chan<- prometheus.Metric, stat []libvirt.DomainStatsBlock, hostname string, domainUUID string, desc schema.DomainXML) {
|
||||
for _, block := range stat {
|
||||
|
||||
if block.RdBytesSet {
|
||||
@ -532,7 +532,7 @@ func CollectDomainBlock(ch chan<- prometheus.Metric, stat []libvirt.DomainStatsB
|
||||
}
|
||||
}
|
||||
|
||||
func CollectDomainNet(ch chan<- prometheus.Metric, stat []libvirt.DomainStatsNet, hostname string, domainUUID string, desc schema.Domain) {
|
||||
func CollectDomainNet(ch chan<- prometheus.Metric, stat []libvirt.DomainStatsNet, hostname string, domainUUID string, desc schema.DomainXML) {
|
||||
for _, iface := range stat {
|
||||
|
||||
if iface.RxBytesSet {
|
||||
|
6
go.mod
6
go.mod
@ -24,10 +24,14 @@ require (
|
||||
|
||||
require (
|
||||
github.com/armon/go-metrics v0.4.1 // indirect
|
||||
github.com/benbjohnson/immutable v0.4.0 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/boltdb/bolt v1.3.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/coreos/etcd v3.3.27+incompatible // indirect
|
||||
github.com/coreos/go-semver v0.3.1 // indirect
|
||||
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf // indirect
|
||||
github.com/coreos/pkg v0.0.0-20220810130054-c7d1c02cb6cf // indirect
|
||||
github.com/fatih/color v1.18.0 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/golang/protobuf v1.5.4 // indirect
|
||||
@ -38,6 +42,7 @@ require (
|
||||
github.com/hashicorp/go-msgpack v1.1.5 // indirect
|
||||
github.com/hashicorp/go-msgpack/v2 v2.1.3 // indirect
|
||||
github.com/hashicorp/golang-lru v1.0.2 // indirect
|
||||
github.com/hashicorp/raft-wal v0.4.2 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/klauspost/compress v1.18.0 // indirect
|
||||
github.com/mattn/go-colorable v0.1.14 // indirect
|
||||
@ -51,6 +56,7 @@ require (
|
||||
go.etcd.io/etcd/api/v3 v3.5.18 // indirect
|
||||
go.etcd.io/etcd/client/pkg/v3 v3.5.18 // indirect
|
||||
go.uber.org/multierr v1.11.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20220827204233-334a2380cb91 // indirect
|
||||
golang.org/x/net v0.35.0 // indirect
|
||||
golang.org/x/oauth2 v0.27.0 // indirect
|
||||
golang.org/x/sys v0.30.0 // indirect
|
||||
|
12
go.sum
12
go.sum
@ -7,6 +7,8 @@ github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRF
|
||||
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
|
||||
github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA=
|
||||
github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4=
|
||||
github.com/benbjohnson/immutable v0.4.0 h1:CTqXbEerYso8YzVPxmWxh2gnoRQbbB9X1quUC8+vGZA=
|
||||
github.com/benbjohnson/immutable v0.4.0/go.mod h1:iAr8OjJGLnLmVUr9MZ/rz4PWUy6Ouc2JLYuMArmvAJM=
|
||||
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
|
||||
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
@ -18,10 +20,16 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
|
||||
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
|
||||
github.com/coreos/etcd v3.3.27+incompatible h1:QIudLb9KeBsE5zyYxd1mjzRSkzLg9Wf9QlRwFgd6oTA=
|
||||
github.com/coreos/etcd v3.3.27+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
|
||||
github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4=
|
||||
github.com/coreos/go-semver v0.3.1/go.mod h1:irMmmIw/7yzSRPWryHsK7EYSg09caPQL03VsM8rvUec=
|
||||
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU=
|
||||
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
|
||||
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
|
||||
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||
github.com/coreos/pkg v0.0.0-20220810130054-c7d1c02cb6cf h1:GOPo6vn/vTN+3IwZBvXX0y5doJfSC7My0cdzelyOCsQ=
|
||||
github.com/coreos/pkg v0.0.0-20220810130054-c7d1c02cb6cf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
@ -96,6 +104,8 @@ github.com/hashicorp/raft-boltdb v0.0.0-20230125174641-2a8082862702 h1:RLKEcCuKc
|
||||
github.com/hashicorp/raft-boltdb v0.0.0-20230125174641-2a8082862702/go.mod h1:nTakvJ4XYq45UXtn0DbwR4aU9ZdjlnIenpbs6Cd+FM0=
|
||||
github.com/hashicorp/raft-boltdb/v2 v2.3.1 h1:ackhdCNPKblmOhjEU9+4lHSJYFkJd6Jqyvj6eW9pwkc=
|
||||
github.com/hashicorp/raft-boltdb/v2 v2.3.1/go.mod h1:n4S+g43dXF1tqDT+yzcXHhXM6y7MrlUd3TTwGRcUvQE=
|
||||
github.com/hashicorp/raft-wal v0.4.2 h1:DV1jgqEumNfdNpOaZ9mL1Gu7Mz59epFtiE6CoqnHrlY=
|
||||
github.com/hashicorp/raft-wal v0.4.2/go.mod h1:S92ainH+6fRuWk6BtZKJ8EgcGgNTKx48Hk5dhOOY1DM=
|
||||
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
|
||||
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
|
||||
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
||||
@ -216,6 +226,8 @@ golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnf
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/exp v0.0.0-20220827204233-334a2380cb91 h1:tnebWN09GYg9OLPss1KXj8txwZc6X6uMr6VFdcGNbHw=
|
||||
golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
|
||||
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
|
@ -1,105 +0,0 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/hashicorp/raft"
|
||||
)
|
||||
|
||||
type Fsm Store
|
||||
|
||||
// Apply applies a Raft log entry to the key-value store.
|
||||
func (f *Fsm) Apply(l *raft.Log) interface{} {
|
||||
/*if f.Raft.State() == raft.Leader {
|
||||
println("j'insert dans etcd !")
|
||||
}*/
|
||||
|
||||
var c command
|
||||
if err := json.Unmarshal(l.Data, &c); err != nil {
|
||||
panic(fmt.Sprintf("failed to unmarshal command: %s", err.Error()))
|
||||
}
|
||||
|
||||
switch c.Op {
|
||||
case "set":
|
||||
return f.applySet(c.Key, c.Value)
|
||||
case "delete":
|
||||
return f.applyDelete(c.Key)
|
||||
default:
|
||||
panic(fmt.Sprintf("unrecognized command op: %s", c.Op))
|
||||
}
|
||||
}
|
||||
|
||||
// Snapshot returns a snapshot of the key-value store.
|
||||
func (f *Fsm) Snapshot() (raft.FSMSnapshot, error) {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
|
||||
// Clone the map.
|
||||
o := make(map[string][]byte)
|
||||
for k, v := range f.m {
|
||||
o[k] = v
|
||||
}
|
||||
|
||||
return &fsmSnapshot{store: o}, nil
|
||||
}
|
||||
|
||||
// Restore stores the key-value store to a previous state.
|
||||
func (f *Fsm) Restore(rc io.ReadCloser) error {
|
||||
o := make(map[string][]byte)
|
||||
|
||||
if err := json.NewDecoder(rc).Decode(&o); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set the state from the snapshot, no lock required according to
|
||||
// Hashicorp docs.
|
||||
f.m = o
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *Fsm) applySet(key string, value []byte) interface{} {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.m[key] = value
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *Fsm) applyDelete(key string) interface{} {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
delete(f.m, key)
|
||||
return nil
|
||||
}
|
||||
|
||||
type fsmSnapshot struct {
|
||||
store map[string][]byte
|
||||
}
|
||||
|
||||
func (f *fsmSnapshot) Persist(sink raft.SnapshotSink) error {
|
||||
err := func() error {
|
||||
// Encode data.
|
||||
b, err := json.Marshal(f.store)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write data to sink.
|
||||
if _, err := sink.Write(b); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Close the sink.
|
||||
return sink.Close()
|
||||
}()
|
||||
|
||||
if err != nil {
|
||||
sink.Cancel()
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (f *fsmSnapshot) Release() {
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
package raft
|
||||
|
||||
/*type NodeStore map[string]*NodeStoreInfo
|
||||
|
||||
type NodeStoreInfo struct {
|
||||
IpManagement string
|
||||
Alive bool
|
||||
Scoring int
|
||||
}
|
||||
|
||||
type DomainStore struct {
|
||||
Config string `json:"config"`
|
||||
State int `json:"state"`
|
||||
Migrate bool `json:"Migrate"`
|
||||
}
|
||||
|
||||
type SchemaDomain struct {
|
||||
Config string `json:"config"`
|
||||
State int `json:"state"`
|
||||
}*/
|
||||
|
||||
// Metrics
|
||||
type DomainUsage struct {
|
||||
DomID string
|
||||
Usage float64
|
||||
}
|
@ -1,90 +0,0 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"os"
|
||||
|
||||
"go.uber.org/zap"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials"
|
||||
"google.golang.org/grpc/reflection"
|
||||
|
||||
"deevirt.fr/compute/pkg/api/domain"
|
||||
"deevirt.fr/compute/pkg/api/node"
|
||||
|
||||
pb "deevirt.fr/compute/pkg/api/proto"
|
||||
"deevirt.fr/compute/pkg/api/raft"
|
||||
"deevirt.fr/compute/pkg/config"
|
||||
)
|
||||
|
||||
func createGRPCServer(conf *config.Config) *grpc.Server {
|
||||
if conf.Manager.TlsKey != "" {
|
||||
cert, err := tls.LoadX509KeyPair(conf.Manager.TlsCert, conf.Manager.TlsKey)
|
||||
if err != nil {
|
||||
log.Fatalf("Erreur chargement du certificat: %v", err)
|
||||
}
|
||||
|
||||
// Charger la CA (facultatif, pour la vérification des clients)
|
||||
caCert, err := os.ReadFile(conf.Manager.TlsCert)
|
||||
if err != nil {
|
||||
log.Fatalf("Erreur chargement CA: %v", err)
|
||||
}
|
||||
certPool := x509.NewCertPool()
|
||||
certPool.AppendCertsFromPEM(caCert)
|
||||
|
||||
// Créer les credentials TLS
|
||||
creds := credentials.NewTLS(&tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
ClientCAs: certPool,
|
||||
ClientAuth: tls.RequireAndVerifyClientCert, // Authentification mutuelle (mTLS),
|
||||
})
|
||||
|
||||
return grpc.NewServer(grpc.Creds(creds))
|
||||
}
|
||||
|
||||
return grpc.NewServer()
|
||||
}
|
||||
|
||||
func Server() {
|
||||
logger, _ := zap.NewProduction()
|
||||
|
||||
// Récupération de la configuration deevirt
|
||||
conf, err := config.New()
|
||||
if err != nil {
|
||||
log.Fatalf("failed load configuration: %v", err)
|
||||
}
|
||||
|
||||
sock, err := net.Listen("tcp", fmt.Sprintf(":%d", 4480))
|
||||
if err != nil {
|
||||
log.Fatalf("failed to listen: %v", err)
|
||||
}
|
||||
|
||||
r := raft.New(conf)
|
||||
|
||||
tm, err := r.Open()
|
||||
if err != nil {
|
||||
log.Fatalf("failed to start raft: %v", err)
|
||||
}
|
||||
|
||||
s := createGRPCServer(conf)
|
||||
pb.RegisterNodeServer(s, &node.Node{
|
||||
Config: conf,
|
||||
Store: r,
|
||||
})
|
||||
pb.RegisterDomainServer(s, &domain.Domain{
|
||||
Config: conf,
|
||||
Store: r,
|
||||
Logger: logger,
|
||||
})
|
||||
tm.Register(s)
|
||||
//leaderhealth.Setup(r, s, []string{"Example"})
|
||||
raft.Register(s, r.Raft)
|
||||
reflection.Register(s)
|
||||
if err := s.Serve(sock); err != nil {
|
||||
log.Fatalf("failed to serve: %v", err)
|
||||
}
|
||||
}
|
147
pkg/raft/fsm.go
Normal file
147
pkg/raft/fsm.go
Normal file
@ -0,0 +1,147 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/raft"
|
||||
clientv3 "go.etcd.io/etcd/client/v3"
|
||||
)
|
||||
|
||||
type FSM struct {
|
||||
store *Store
|
||||
client *clientv3.Client
|
||||
}
|
||||
|
||||
func NewFSM(endpoints []string, store *Store) (*FSM, error) {
|
||||
// Se connecter au cluster etcd
|
||||
client, err := clientv3.New(clientv3.Config{
|
||||
Endpoints: endpoints,
|
||||
DialTimeout: 5 * time.Second,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &FSM{
|
||||
store: store,
|
||||
client: client,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Apply applies a Raft log entry to the key-value store.
|
||||
func (f *FSM) Apply(l *raft.Log) interface{} {
|
||||
switch l.Type {
|
||||
case raft.LogCommand:
|
||||
var c command
|
||||
if err := json.Unmarshal(l.Data, &c); err != nil {
|
||||
panic(fmt.Sprintf("failed to unmarshal command: %s", err.Error()))
|
||||
}
|
||||
|
||||
switch c.Op {
|
||||
case "set":
|
||||
f.applySet(c.Key, c.Value)
|
||||
case "delete":
|
||||
f.applyDelete(c.Key)
|
||||
default:
|
||||
panic(fmt.Sprintf("unrecognized command op: %s", c.Op))
|
||||
}
|
||||
|
||||
// On réplique sur etcd si ce n'est pas une reprise des logs et si le noeud est leader
|
||||
if l.Index > f.store.lastIndex && f.store.Raft.State() == raft.Leader {
|
||||
regex := regexp.MustCompile(`^/domain`)
|
||||
match := regex.MatchString(c.Key)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
if match {
|
||||
switch c.Op {
|
||||
case "set":
|
||||
f.client.Put(ctx, fmt.Sprintf("/deevirt/cluster/%s%s", f.store.conf.ClusterID, c.Key), string(c.Value))
|
||||
case "delete":
|
||||
f.client.Delete(ctx, fmt.Sprintf("/deevirt/cluster/%s%s", f.store.conf.ClusterID, c.Key))
|
||||
}
|
||||
}
|
||||
defer cancel()
|
||||
}
|
||||
default:
|
||||
println(l.Type.String())
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Snapshot returns a snapshot of the key-value store.
|
||||
func (f *FSM) Snapshot() (raft.FSMSnapshot, error) {
|
||||
f.store.mu.Lock()
|
||||
defer f.store.mu.Unlock()
|
||||
|
||||
// Clone the map.
|
||||
o := make(map[string][]byte)
|
||||
for k, v := range f.store.m {
|
||||
o[k] = v
|
||||
}
|
||||
|
||||
return &fsmSnapshot{store: o}, nil
|
||||
}
|
||||
|
||||
// Restore stores the key-value store to a previous state.
|
||||
func (f *FSM) Restore(rc io.ReadCloser) error {
|
||||
o := make(map[string][]byte)
|
||||
|
||||
if err := json.NewDecoder(rc).Decode(&o); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set the state from the snapshot, no lock required according to
|
||||
// Hashicorp docs.
|
||||
f.store.m = o
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *FSM) applySet(key string, value []byte) interface{} {
|
||||
f.store.mu.Lock()
|
||||
defer f.store.mu.Unlock()
|
||||
f.store.m[key] = value
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *FSM) applyDelete(key string) interface{} {
|
||||
f.store.mu.Lock()
|
||||
defer f.store.mu.Unlock()
|
||||
delete(f.store.m, key)
|
||||
return nil
|
||||
}
|
||||
|
||||
type fsmSnapshot struct {
|
||||
store map[string][]byte
|
||||
}
|
||||
|
||||
func (f *fsmSnapshot) Persist(sink raft.SnapshotSink) error {
|
||||
err := func() error {
|
||||
// Encode data.
|
||||
b, err := json.Marshal(f.store)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write data to sink.
|
||||
if _, err := sink.Write(b); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Close the sink.
|
||||
return sink.Close()
|
||||
}()
|
||||
|
||||
if err != nil {
|
||||
sink.Cancel()
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (f *fsmSnapshot) Release() {
|
||||
}
|
@ -13,9 +13,10 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
transport "deevirt.fr/compute/pkg/api/raft/transport"
|
||||
transport "deevirt.fr/compute/pkg/raft/transport"
|
||||
"github.com/hashicorp/raft"
|
||||
raftboltdb "github.com/hashicorp/raft-boltdb/v2"
|
||||
raftwal "github.com/hashicorp/raft-wal"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials"
|
||||
|
||||
@ -52,6 +53,8 @@ type Store struct {
|
||||
|
||||
Raft *raft.Raft // The consensus mechanism
|
||||
|
||||
lastIndex uint64
|
||||
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
@ -92,30 +95,47 @@ func New(conf *config.Config) *Store {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Store) Open() (*transport.Manager, error) {
|
||||
func (s *Store) Open() (*Store, *transport.Manager, error) {
|
||||
// Création du répertoire
|
||||
baseDir := filepath.Join("/var/lib/deevirt/mgr/", s.conf.NodeID)
|
||||
err := os.MkdirAll(baseDir, 0740)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
walDir := filepath.Join(baseDir, "/wal")
|
||||
err = os.MkdirAll(walDir, 0740)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
c := raft.DefaultConfig()
|
||||
c.SnapshotInterval = 60 * time.Second
|
||||
c.SnapshotThreshold = 1000
|
||||
c.SnapshotThreshold = 500
|
||||
c.HeartbeatTimeout = 2 * time.Second
|
||||
c.ElectionTimeout = 3 * time.Second
|
||||
|
||||
c.LocalID = raft.ServerID(s.conf.NodeID)
|
||||
|
||||
ldb, err := raftboltdb.NewBoltStore(filepath.Join(baseDir, "logs.dat"))
|
||||
// Créer un LogStore avec Raft-WAL
|
||||
logStore, err := raftwal.Open(walDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(`boltdb.NewBoltStore(%q): %v`, filepath.Join(baseDir, "logs.dat"), err)
|
||||
log.Fatalf("Erreur lors de la création du LogStore Raft-WAL : %v", err)
|
||||
}
|
||||
|
||||
s.lastIndex, err = logStore.LastIndex()
|
||||
if err != nil {
|
||||
log.Fatalf("Erreur lors de la récupération de l'index de la dernière entrée: %v", err)
|
||||
}
|
||||
|
||||
stableStore, err := raftboltdb.NewBoltStore(filepath.Join(baseDir, "logs.dat"))
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf(`boltdb.NewBoltStore(%q): %v`, filepath.Join(baseDir, "logs.dat"), err)
|
||||
}
|
||||
|
||||
fss, err := raft.NewFileSnapshotStore(baseDir, 3, os.Stderr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(`raft.NewFileSnapshotStore(%q, ...): %v`, baseDir, err)
|
||||
return nil, nil, fmt.Errorf(`raft.NewFileSnapshotStore(%q, ...): %v`, baseDir, err)
|
||||
}
|
||||
|
||||
dialOption := []grpc.DialOption{}
|
||||
@ -126,36 +146,27 @@ func (s *Store) Open() (*transport.Manager, error) {
|
||||
|
||||
tm := transport.New(raft.ServerAddress(s.conf.AddressPrivate), dialOption)
|
||||
|
||||
r, err := raft.NewRaft(c, (*Fsm)(s), ldb, ldb, fss, tm.Transport())
|
||||
fsm, err := NewFSM(strings.Split(s.conf.EtcdURI, ","), s)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("raft.NewRaft: %v", err)
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
|
||||
r, err := raft.NewRaft(c, fsm, logStore, stableStore, fss, tm.Transport())
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("raft.NewRaft: %v", err)
|
||||
}
|
||||
s.Raft = r
|
||||
|
||||
// Observer pour surveiller les changements d'état
|
||||
stateCh := make(chan raft.Observation, 1) // Canal de type raft.Observation
|
||||
r.RegisterObserver(raft.NewObserver(stateCh, true, nil))
|
||||
|
||||
node := &RaftNode{
|
||||
Bootstrap: false,
|
||||
Raft: r,
|
||||
Store: s,
|
||||
NodeID: s.conf.NodeID,
|
||||
StateCh: stateCh,
|
||||
}
|
||||
|
||||
go node.WatchStateChanges()
|
||||
|
||||
hasState, _ := checkIfStateExists(ldb)
|
||||
hasState, _ := checkIfStateExists(logStore)
|
||||
|
||||
if strings.Split(s.conf.AddressPrivate, ":")[0] == s.conf.AddressPrivate && !hasState {
|
||||
println("Démarrage du bootstrap ! ")
|
||||
node.Bootstrap = true
|
||||
//node.Bootstrap = true
|
||||
|
||||
// Récupération des Noeuds ID
|
||||
etcd, err := etcd_client.New(s.conf.EtcdURI)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, nil, err
|
||||
}
|
||||
defer etcd.Close()
|
||||
|
||||
@ -178,11 +189,11 @@ func (s *Store) Open() (*transport.Manager, error) {
|
||||
}
|
||||
f := r.BootstrapCluster(cfg)
|
||||
if err := f.Error(); err != nil {
|
||||
return nil, fmt.Errorf("raft.Raft.BootstrapCluster: %v", err)
|
||||
return nil, nil, fmt.Errorf("raft.Raft.BootstrapCluster: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return tm, nil
|
||||
return s, tm, nil
|
||||
}
|
||||
|
||||
type LsOptions struct {
|
||||
@ -192,6 +203,11 @@ type LsOptions struct {
|
||||
|
||||
// Retourne le contenu de la clé
|
||||
func (s *Store) Ls(key string, options LsOptions) (map[string][]byte, error) {
|
||||
barrier := s.Raft.Barrier(10 * time.Second)
|
||||
if err := barrier.Error(); err != nil {
|
||||
return nil, fmt.Errorf("barrier timeout: %v", err)
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
@ -227,6 +243,11 @@ func (s *Store) Ls(key string, options LsOptions) (map[string][]byte, error) {
|
||||
|
||||
// Get returns the value for the given key.
|
||||
func (s *Store) Get(key string) ([]byte, error) {
|
||||
barrier := s.Raft.Barrier(10 * time.Second)
|
||||
if err := barrier.Error(); err != nil {
|
||||
return nil, fmt.Errorf("barrier timeout: %v", err)
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
return s.m[key], nil
|
||||
@ -272,7 +293,7 @@ func (s *Store) Delete(key string) error {
|
||||
}
|
||||
|
||||
// Vérifie si l'état Raft existe déjà
|
||||
func checkIfStateExists(logStore *raftboltdb.BoltStore) (bool, error) {
|
||||
func checkIfStateExists(logStore *raftwal.WAL) (bool, error) {
|
||||
// Vérifier les logs Raft
|
||||
firstIndex, err := logStore.FirstIndex()
|
||||
if err != nil {
|
@ -1,12 +0,0 @@
|
||||
package deevirt_schema
|
||||
|
||||
// Schema dans le store
|
||||
type DomainStore struct {
|
||||
Type string `json:"type"`
|
||||
NodeId string `json:"nodeID"`
|
||||
Config []byte `json:"config"`
|
||||
}
|
||||
|
||||
type DomainToNodeStore struct {
|
||||
State int `json:"state"`
|
||||
}
|
@ -1,20 +1,59 @@
|
||||
package schema
|
||||
|
||||
import "google.golang.org/protobuf/types/known/timestamppb"
|
||||
|
||||
/*
|
||||
/domain/{domain_id}
|
||||
*/
|
||||
// Schema dans le store
|
||||
type Domain struct {
|
||||
Metadata Metadata `xml:"metadata"`
|
||||
Type string `json:"type"` // Qemu seulement pour le moment
|
||||
Config string `json:"config"` // La configuration xml libvirt
|
||||
}
|
||||
|
||||
type Metadata struct {
|
||||
DeevirtInstance Instance `xml:"instance"`
|
||||
/*
|
||||
/domain/{domain_id}/node
|
||||
*/
|
||||
type DomainNode struct {
|
||||
NodeId string `json:"nodeID"` // NodeID Owner
|
||||
State int `json:"state"` // Son etat persistant
|
||||
}
|
||||
|
||||
type Instance struct {
|
||||
/*
|
||||
/domain/{domain_id}/agent
|
||||
*/
|
||||
type DomainAgent struct {
|
||||
// A définir
|
||||
}
|
||||
|
||||
/*
|
||||
/etc/libvirt/{type}/{node_id}/{domain_id}
|
||||
*/
|
||||
type DomainLock struct {
|
||||
LifeCycle int `json:"lifeycle"` // Son etat réel
|
||||
Expiry *timestamppb.Timestamp `json:"expiry"` // Date d'expiration du verouillage
|
||||
}
|
||||
|
||||
// Other
|
||||
type DomainToNode struct {
|
||||
State int `json:"state"`
|
||||
}
|
||||
|
||||
type DomainXML struct {
|
||||
Metadata MetadataXML `xml:"metadata"`
|
||||
}
|
||||
|
||||
type MetadataXML struct {
|
||||
DeevirtInstance InstanceXML `xml:"instance"`
|
||||
}
|
||||
|
||||
type InstanceXML struct {
|
||||
DeevirtCompanyID string `xml:"company_id"`
|
||||
DeevirtDatacenterID string `xml:"datacenter_id"`
|
||||
}
|
||||
|
||||
// JSON SCHEMA for AMQP
|
||||
type DomainStateJSON struct {
|
||||
type DomainStateAMQP struct {
|
||||
CompanyID string `json:"company_id"`
|
||||
DatacenterID string `json:"datacenter_id"`
|
||||
DomainID string `json:"domain_id"`
|
||||
|
@ -1,4 +1,4 @@
|
||||
package deevirt_schema
|
||||
package schema
|
||||
|
||||
import "google.golang.org/protobuf/types/known/timestamppb"
|
||||
|
19
vendor/github.com/benbjohnson/immutable/LICENSE
generated
vendored
Normal file
19
vendor/github.com/benbjohnson/immutable/LICENSE
generated
vendored
Normal file
@ -0,0 +1,19 @@
|
||||
Copyright 2019 Ben Johnson
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
301
vendor/github.com/benbjohnson/immutable/README.md
generated
vendored
Normal file
301
vendor/github.com/benbjohnson/immutable/README.md
generated
vendored
Normal file
@ -0,0 +1,301 @@
|
||||
Immutable    
|
||||
=========
|
||||
|
||||
This repository contains *generic* immutable collection types for Go. It includes
|
||||
`List`, `Map`, and `SortedMap` implementations. Immutable collections can
|
||||
provide efficient, lock free sharing of data by requiring that edits to the
|
||||
collections return new collections.
|
||||
|
||||
The collection types in this library are meant to mimic Go built-in collections
|
||||
such as`slice` and `map`. The primary usage difference between Go collections
|
||||
and `immutable` collections is that `immutable` collections always return a new
|
||||
collection on mutation so you will need to save the new reference.
|
||||
|
||||
Immutable collections are not for every situation, however, as they can incur
|
||||
additional CPU and memory overhead. Please evaluate the cost/benefit for your
|
||||
particular project.
|
||||
|
||||
Special thanks to the [Immutable.js](https://immutable-js.github.io/immutable-js/)
|
||||
team as the `List` & `Map` implementations are loose ports from that project.
|
||||
|
||||
|
||||
## List
|
||||
|
||||
The `List` type represents a sorted, indexed collection of values and operates
|
||||
similarly to a Go slice. It supports efficient append, prepend, update, and
|
||||
slice operations.
|
||||
|
||||
|
||||
### Adding list elements
|
||||
|
||||
Elements can be added to the end of the list with the `Append()` method or added
|
||||
to the beginning of the list with the `Prepend()` method. Unlike Go slices,
|
||||
prepending is as efficient as appending.
|
||||
|
||||
```go
|
||||
// Create a list with 3 elements.
|
||||
l := immutable.NewList[string]()
|
||||
l = l.Append("foo")
|
||||
l = l.Append("bar")
|
||||
l = l.Prepend("baz")
|
||||
|
||||
fmt.Println(l.Len()) // 3
|
||||
fmt.Println(l.Get(0)) // "baz"
|
||||
fmt.Println(l.Get(1)) // "foo"
|
||||
fmt.Println(l.Get(2)) // "bar"
|
||||
```
|
||||
|
||||
Note that each change to the list results in a new list being created. These
|
||||
lists are all snapshots at that point in time and cannot be changed so they
|
||||
are safe to share between multiple goroutines.
|
||||
|
||||
### Updating list elements
|
||||
|
||||
You can also overwrite existing elements by using the `Set()` method. In the
|
||||
following example, we'll update the third element in our list and return the
|
||||
new list to a new variable. You can see that our old `l` variable retains a
|
||||
snapshot of the original value.
|
||||
|
||||
```go
|
||||
l := immutable.NewList[string]()
|
||||
l = l.Append("foo")
|
||||
l = l.Append("bar")
|
||||
newList := l.Set(2, "baz")
|
||||
|
||||
fmt.Println(l.Get(1)) // "bar"
|
||||
fmt.Println(newList.Get(1)) // "baz"
|
||||
```
|
||||
|
||||
### Deriving sublists
|
||||
|
||||
You can create a sublist by using the `Slice()` method. This method works with
|
||||
the same rules as subslicing a Go slice:
|
||||
|
||||
```go
|
||||
l = l.Slice(0, 2)
|
||||
|
||||
fmt.Println(l.Len()) // 2
|
||||
fmt.Println(l.Get(0)) // "baz"
|
||||
fmt.Println(l.Get(1)) // "foo"
|
||||
```
|
||||
|
||||
Please note that since `List` follows the same rules as slices, it will panic if
|
||||
you try to `Get()`, `Set()`, or `Slice()` with indexes that are outside of
|
||||
the range of the `List`.
|
||||
|
||||
|
||||
|
||||
### Iterating lists
|
||||
|
||||
Iterators provide a clean, simple way to iterate over the elements of the list
|
||||
in order. This is more efficient than simply calling `Get()` for each index.
|
||||
|
||||
Below is an example of iterating over all elements of our list from above:
|
||||
|
||||
```go
|
||||
itr := l.Iterator()
|
||||
for !itr.Done() {
|
||||
index, value, _ := itr.Next()
|
||||
fmt.Printf("Index %d equals %v\n", index, value)
|
||||
}
|
||||
|
||||
// Index 0 equals baz
|
||||
// Index 1 equals foo
|
||||
```
|
||||
|
||||
By default iterators start from index zero, however, the `Seek()` method can be
|
||||
used to jump to a given index.
|
||||
|
||||
|
||||
### Efficiently building lists
|
||||
|
||||
If you are building large lists, it is significantly more efficient to use the
|
||||
`ListBuilder`. It uses nearly the same API as `List` except that it updates
|
||||
a list in-place until you are ready to use it. This can improve bulk list
|
||||
building by 10x or more.
|
||||
|
||||
```go
|
||||
b := immutable.NewListBuilder[string]()
|
||||
b.Append("foo")
|
||||
b.Append("bar")
|
||||
b.Set(2, "baz")
|
||||
|
||||
l := b.List()
|
||||
fmt.Println(l.Get(0)) // "foo"
|
||||
fmt.Println(l.Get(1)) // "baz"
|
||||
```
|
||||
|
||||
Builders are invalid after the call to `List()`.
|
||||
|
||||
|
||||
## Map
|
||||
|
||||
The `Map` represents an associative array that maps unique keys to values. It
|
||||
is implemented to act similarly to the built-in Go `map` type. It is implemented
|
||||
as a [Hash-Array Mapped Trie](https://lampwww.epfl.ch/papers/idealhashtrees.pdf).
|
||||
|
||||
Maps require a `Hasher` to hash keys and check for equality. There are built-in
|
||||
hasher implementations for most primitive types such as `int`, `uint`, `string`,
|
||||
and `[]byte` keys. You may pass in a `nil` hasher to `NewMap()` if you are using
|
||||
one of these key types.
|
||||
|
||||
|
||||
### Setting map key/value pairs
|
||||
|
||||
You can add a key/value pair to the map by using the `Set()` method. It will
|
||||
add the key if it does not exist or it will overwrite the value for the key if
|
||||
it does exist.
|
||||
|
||||
Values may be fetched for a key using the `Get()` method. This method returns
|
||||
the value as well as a flag indicating if the key existed. The flag is useful
|
||||
to check if a `nil` value was set for a key versus a key did not exist.
|
||||
|
||||
```go
|
||||
m := immutable.NewMap[string,int](nil)
|
||||
m = m.Set("jane", 100)
|
||||
m = m.Set("susy", 200)
|
||||
m = m.Set("jane", 300) // overwrite
|
||||
|
||||
fmt.Println(m.Len()) // 2
|
||||
|
||||
v, ok := m.Get("jane")
|
||||
fmt.Println(v, ok) // 300 true
|
||||
|
||||
v, ok = m.Get("susy")
|
||||
fmt.Println(v, ok) // 200, true
|
||||
|
||||
v, ok = m.Get("john")
|
||||
fmt.Println(v, ok) // nil, false
|
||||
```
|
||||
|
||||
|
||||
### Removing map keys
|
||||
|
||||
Keys may be removed from the map by using the `Delete()` method. If the key does
|
||||
not exist then the original map is returned instead of a new one.
|
||||
|
||||
```go
|
||||
m := immutable.NewMap[string,int](nil)
|
||||
m = m.Set("jane", 100)
|
||||
m = m.Delete("jane")
|
||||
|
||||
fmt.Println(m.Len()) // 0
|
||||
|
||||
v, ok := m.Get("jane")
|
||||
fmt.Println(v, ok) // nil false
|
||||
```
|
||||
|
||||
|
||||
### Iterating maps
|
||||
|
||||
Maps are unsorted, however, iterators can be used to loop over all key/value
|
||||
pairs in the collection. Unlike Go maps, iterators are deterministic when
|
||||
iterating over key/value pairs.
|
||||
|
||||
```go
|
||||
m := immutable.NewMap[string,int](nil)
|
||||
m = m.Set("jane", 100)
|
||||
m = m.Set("susy", 200)
|
||||
|
||||
itr := m.Iterator()
|
||||
for !itr.Done() {
|
||||
k, v := itr.Next()
|
||||
fmt.Println(k, v)
|
||||
}
|
||||
|
||||
// susy 200
|
||||
// jane 100
|
||||
```
|
||||
|
||||
Note that you should not rely on two maps with the same key/value pairs to
|
||||
iterate in the same order. Ordering can be insertion order dependent when two
|
||||
keys generate the same hash.
|
||||
|
||||
|
||||
### Efficiently building maps
|
||||
|
||||
If you are executing multiple mutations on a map, it can be much more efficient
|
||||
to use the `MapBuilder`. It uses nearly the same API as `Map` except that it
|
||||
updates a map in-place until you are ready to use it.
|
||||
|
||||
```go
|
||||
b := immutable.NewMapBuilder[string,int](nil)
|
||||
b.Set("foo", 100)
|
||||
b.Set("bar", 200)
|
||||
b.Set("foo", 300)
|
||||
|
||||
m := b.Map()
|
||||
fmt.Println(m.Get("foo")) // "300"
|
||||
fmt.Println(m.Get("bar")) // "200"
|
||||
```
|
||||
|
||||
Builders are invalid after the call to `Map()`.
|
||||
|
||||
|
||||
### Implementing a custom Hasher
|
||||
|
||||
If you need to use a key type besides `int`, `uint`, `string`, or `[]byte` then
|
||||
you'll need to create a custom `Hasher` implementation and pass it to `NewMap()`
|
||||
on creation.
|
||||
|
||||
Hashers are fairly simple. They only need to generate hashes for a given key
|
||||
and check equality given two keys.
|
||||
|
||||
```go
|
||||
type Hasher[K constraints.Ordered] interface {
|
||||
Hash(key K) uint32
|
||||
Equal(a, b K) bool
|
||||
}
|
||||
```
|
||||
|
||||
Please see the internal `intHasher`, `uintHasher`, `stringHasher`, and
|
||||
`byteSliceHasher` for examples.
|
||||
|
||||
|
||||
## Sorted Map
|
||||
|
||||
The `SortedMap` represents an associative array that maps unique keys to values.
|
||||
Unlike the `Map`, however, keys can be iterated over in-order. It is implemented
|
||||
as a B+tree.
|
||||
|
||||
Sorted maps require a `Comparer` to sort keys and check for equality. There are
|
||||
built-in comparer implementations for `int`, `uint`, `string`, and `[]byte` keys.
|
||||
You may pass a `nil` comparer to `NewSortedMap()` if you are using one of these
|
||||
key types.
|
||||
|
||||
The API is identical to the `Map` implementation. The sorted map also has a
|
||||
companion `SortedMapBuilder` for more efficiently building maps.
|
||||
|
||||
|
||||
### Implementing a custom Comparer
|
||||
|
||||
If you need to use a key type besides `int`, `uint`, `string`, or `[]byte`
|
||||
then you'll need to create a custom `Comparer` implementation and pass it to
|
||||
`NewSortedMap()` on creation.
|
||||
|
||||
Comparers on have one method—`Compare()`. It works the same as the
|
||||
`strings.Compare()` function. It returns `-1` if `a` is less than `b`, returns
|
||||
`1` if a is greater than `b`, and returns `0` if `a` is equal to `b`.
|
||||
|
||||
```go
|
||||
type Comparer[K constraints.Ordered] interface {
|
||||
Compare(a, b K) int
|
||||
}
|
||||
```
|
||||
|
||||
Please see the internal `intComparer`, `uintComparer`, `stringComparer`, and
|
||||
`byteSliceComparer` for examples.
|
||||
|
||||
|
||||
|
||||
## Contributing
|
||||
|
||||
The goal of `immutable` is to provide stable, reasonably performant, immutable
|
||||
collections library for Go that has a simple, idiomatic API. As such, additional
|
||||
features and minor performance improvements will generally not be accepted. If
|
||||
you have a suggestion for a clearer API or substantial performance improvement,
|
||||
_please_ open an issue first to discuss. All pull requests without a related
|
||||
issue will be closed immediately.
|
||||
|
||||
Please submit issues relating to bugs & documentation improvements.
|
||||
|
2400
vendor/github.com/benbjohnson/immutable/immutable.go
generated
vendored
Normal file
2400
vendor/github.com/benbjohnson/immutable/immutable.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
202
vendor/github.com/coreos/etcd/LICENSE
generated
vendored
Normal file
202
vendor/github.com/coreos/etcd/LICENSE
generated
vendored
Normal file
@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
27
vendor/github.com/coreos/etcd/pkg/fileutil/dir_unix.go
generated
vendored
Normal file
27
vendor/github.com/coreos/etcd/pkg/fileutil/dir_unix.go
generated
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build !windows
|
||||
|
||||
package fileutil
|
||||
|
||||
import "os"
|
||||
|
||||
const (
|
||||
// PrivateDirMode grants owner to make/remove files inside the directory.
|
||||
PrivateDirMode = 0700
|
||||
)
|
||||
|
||||
// OpenDir opens a directory for syncing.
|
||||
func OpenDir(path string) (*os.File, error) { return os.Open(path) }
|
51
vendor/github.com/coreos/etcd/pkg/fileutil/dir_windows.go
generated
vendored
Normal file
51
vendor/github.com/coreos/etcd/pkg/fileutil/dir_windows.go
generated
vendored
Normal file
@ -0,0 +1,51 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build windows
|
||||
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
const (
|
||||
// PrivateDirMode grants owner to make/remove files inside the directory.
|
||||
PrivateDirMode = 0777
|
||||
)
|
||||
|
||||
// OpenDir opens a directory in windows with write access for syncing.
|
||||
func OpenDir(path string) (*os.File, error) {
|
||||
fd, err := openDir(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return os.NewFile(uintptr(fd), path), nil
|
||||
}
|
||||
|
||||
func openDir(path string) (fd syscall.Handle, err error) {
|
||||
if len(path) == 0 {
|
||||
return syscall.InvalidHandle, syscall.ERROR_FILE_NOT_FOUND
|
||||
}
|
||||
pathp, err := syscall.UTF16PtrFromString(path)
|
||||
if err != nil {
|
||||
return syscall.InvalidHandle, err
|
||||
}
|
||||
access := uint32(syscall.GENERIC_READ | syscall.GENERIC_WRITE)
|
||||
sharemode := uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE)
|
||||
createmode := uint32(syscall.OPEN_EXISTING)
|
||||
fl := uint32(syscall.FILE_FLAG_BACKUP_SEMANTICS)
|
||||
return syscall.CreateFile(pathp, access, sharemode, nil, createmode, fl, 0)
|
||||
}
|
147
vendor/github.com/coreos/etcd/pkg/fileutil/fileutil.go
generated
vendored
Normal file
147
vendor/github.com/coreos/etcd/pkg/fileutil/fileutil.go
generated
vendored
Normal file
@ -0,0 +1,147 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package fileutil implements utility functions related to files and paths.
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
|
||||
"github.com/coreos/pkg/capnslog"
|
||||
)
|
||||
|
||||
const (
|
||||
// PrivateFileMode grants owner to read/write a file.
|
||||
PrivateFileMode = 0600
|
||||
)
|
||||
|
||||
var (
|
||||
plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "pkg/fileutil")
|
||||
)
|
||||
|
||||
// IsDirWriteable checks if dir is writable by writing and removing a file
|
||||
// to dir. It returns nil if dir is writable.
|
||||
func IsDirWriteable(dir string) error {
|
||||
f := filepath.Join(dir, ".touch")
|
||||
if err := ioutil.WriteFile(f, []byte(""), PrivateFileMode); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.Remove(f)
|
||||
}
|
||||
|
||||
// ReadDir returns the filenames in the given directory in sorted order.
|
||||
func ReadDir(dirpath string) ([]string, error) {
|
||||
dir, err := os.Open(dirpath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer dir.Close()
|
||||
names, err := dir.Readdirnames(-1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
sort.Strings(names)
|
||||
return names, nil
|
||||
}
|
||||
|
||||
// TouchDirAll is similar to os.MkdirAll. It creates directories with 0700 permission if any directory
|
||||
// does not exists. TouchDirAll also ensures the given directory is writable.
|
||||
func TouchDirAll(dir string) error {
|
||||
// If path is already a directory, MkdirAll does nothing and returns nil, so,
|
||||
// first check if dir exist with an expected permission mode.
|
||||
if Exist(dir) {
|
||||
err := CheckDirPermission(dir, PrivateDirMode)
|
||||
if err != nil {
|
||||
plog.Warningf("check file permission: %v", err)
|
||||
}
|
||||
} else {
|
||||
err := os.MkdirAll(dir, PrivateDirMode)
|
||||
if err != nil {
|
||||
// if mkdirAll("a/text") and "text" is not
|
||||
// a directory, this will return syscall.ENOTDIR
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return IsDirWriteable(dir)
|
||||
}
|
||||
|
||||
// CreateDirAll is similar to TouchDirAll but returns error
|
||||
// if the deepest directory was not empty.
|
||||
func CreateDirAll(dir string) error {
|
||||
err := TouchDirAll(dir)
|
||||
if err == nil {
|
||||
var ns []string
|
||||
ns, err = ReadDir(dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(ns) != 0 {
|
||||
err = fmt.Errorf("expected %q to be empty, got %q", dir, ns)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func Exist(name string) bool {
|
||||
_, err := os.Stat(name)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// ZeroToEnd zeros a file starting from SEEK_CUR to its SEEK_END. May temporarily
|
||||
// shorten the length of the file.
|
||||
func ZeroToEnd(f *os.File) error {
|
||||
// TODO: support FALLOC_FL_ZERO_RANGE
|
||||
off, err := f.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
lenf, lerr := f.Seek(0, io.SeekEnd)
|
||||
if lerr != nil {
|
||||
return lerr
|
||||
}
|
||||
if err = f.Truncate(off); err != nil {
|
||||
return err
|
||||
}
|
||||
// make sure blocks remain allocated
|
||||
if err = Preallocate(f, lenf, true); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = f.Seek(off, io.SeekStart)
|
||||
return err
|
||||
}
|
||||
|
||||
// CheckDirPermission checks permission on an existing dir.
|
||||
// Returns error if dir is empty or exist with a different permission than specified.
|
||||
func CheckDirPermission(dir string, perm os.FileMode) error {
|
||||
if !Exist(dir) {
|
||||
return fmt.Errorf("directory %q empty, cannot check permission.", dir)
|
||||
}
|
||||
//check the existing permission on the directory
|
||||
dirInfo, err := os.Stat(dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dirMode := dirInfo.Mode().Perm()
|
||||
if dirMode != perm {
|
||||
err = fmt.Errorf("directory %q exist, but the permission is %q. The recommended permission is %q to prevent possible unprivileged access to the data.", dir, dirInfo.Mode(), os.FileMode(PrivateDirMode))
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
26
vendor/github.com/coreos/etcd/pkg/fileutil/lock.go
generated
vendored
Normal file
26
vendor/github.com/coreos/etcd/pkg/fileutil/lock.go
generated
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrLocked = errors.New("fileutil: file already locked")
|
||||
)
|
||||
|
||||
type LockedFile struct{ *os.File }
|
49
vendor/github.com/coreos/etcd/pkg/fileutil/lock_flock.go
generated
vendored
Normal file
49
vendor/github.com/coreos/etcd/pkg/fileutil/lock_flock.go
generated
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build !windows,!plan9,!solaris
|
||||
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func flockTryLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) {
|
||||
f, err := os.OpenFile(path, flag, perm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err != nil {
|
||||
f.Close()
|
||||
if err == syscall.EWOULDBLOCK {
|
||||
err = ErrLocked
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &LockedFile{f}, nil
|
||||
}
|
||||
|
||||
func flockLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) {
|
||||
f, err := os.OpenFile(path, flag, perm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX); err != nil {
|
||||
f.Close()
|
||||
return nil, err
|
||||
}
|
||||
return &LockedFile{f}, err
|
||||
}
|
97
vendor/github.com/coreos/etcd/pkg/fileutil/lock_linux.go
generated
vendored
Normal file
97
vendor/github.com/coreos/etcd/pkg/fileutil/lock_linux.go
generated
vendored
Normal file
@ -0,0 +1,97 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build linux
|
||||
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// This used to call syscall.Flock() but that call fails with EBADF on NFS.
|
||||
// An alternative is lockf() which works on NFS but that call lets a process lock
|
||||
// the same file twice. Instead, use Linux's non-standard open file descriptor
|
||||
// locks which will block if the process already holds the file lock.
|
||||
//
|
||||
// constants from /usr/include/bits/fcntl-linux.h
|
||||
const (
|
||||
F_OFD_GETLK = 36
|
||||
F_OFD_SETLK = 37
|
||||
F_OFD_SETLKW = 38
|
||||
)
|
||||
|
||||
var (
|
||||
wrlck = syscall.Flock_t{
|
||||
Type: syscall.F_WRLCK,
|
||||
Whence: int16(io.SeekStart),
|
||||
Start: 0,
|
||||
Len: 0,
|
||||
}
|
||||
|
||||
linuxTryLockFile = flockTryLockFile
|
||||
linuxLockFile = flockLockFile
|
||||
)
|
||||
|
||||
func init() {
|
||||
// use open file descriptor locks if the system supports it
|
||||
getlk := syscall.Flock_t{Type: syscall.F_RDLCK}
|
||||
if err := syscall.FcntlFlock(0, F_OFD_GETLK, &getlk); err == nil {
|
||||
linuxTryLockFile = ofdTryLockFile
|
||||
linuxLockFile = ofdLockFile
|
||||
}
|
||||
}
|
||||
|
||||
func TryLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) {
|
||||
return linuxTryLockFile(path, flag, perm)
|
||||
}
|
||||
|
||||
func ofdTryLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) {
|
||||
f, err := os.OpenFile(path, flag, perm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
flock := wrlck
|
||||
if err = syscall.FcntlFlock(f.Fd(), F_OFD_SETLK, &flock); err != nil {
|
||||
f.Close()
|
||||
if err == syscall.EWOULDBLOCK {
|
||||
err = ErrLocked
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &LockedFile{f}, nil
|
||||
}
|
||||
|
||||
func LockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) {
|
||||
return linuxLockFile(path, flag, perm)
|
||||
}
|
||||
|
||||
func ofdLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) {
|
||||
f, err := os.OpenFile(path, flag, perm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
flock := wrlck
|
||||
err = syscall.FcntlFlock(f.Fd(), F_OFD_SETLKW, &flock)
|
||||
|
||||
if err != nil {
|
||||
f.Close()
|
||||
return nil, err
|
||||
}
|
||||
return &LockedFile{f}, err
|
||||
}
|
45
vendor/github.com/coreos/etcd/pkg/fileutil/lock_plan9.go
generated
vendored
Normal file
45
vendor/github.com/coreos/etcd/pkg/fileutil/lock_plan9.go
generated
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TryLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) {
|
||||
if err := os.Chmod(path, syscall.DMEXCL|PrivateFileMode); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
f, err := os.Open(path, flag, perm)
|
||||
if err != nil {
|
||||
return nil, ErrLocked
|
||||
}
|
||||
return &LockedFile{f}, nil
|
||||
}
|
||||
|
||||
func LockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) {
|
||||
if err := os.Chmod(path, syscall.DMEXCL|PrivateFileMode); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for {
|
||||
f, err := os.OpenFile(path, flag, perm)
|
||||
if err == nil {
|
||||
return &LockedFile{f}, nil
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}
|
62
vendor/github.com/coreos/etcd/pkg/fileutil/lock_solaris.go
generated
vendored
Normal file
62
vendor/github.com/coreos/etcd/pkg/fileutil/lock_solaris.go
generated
vendored
Normal file
@ -0,0 +1,62 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build solaris
|
||||
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func TryLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) {
|
||||
var lock syscall.Flock_t
|
||||
lock.Start = 0
|
||||
lock.Len = 0
|
||||
lock.Pid = 0
|
||||
lock.Type = syscall.F_WRLCK
|
||||
lock.Whence = 0
|
||||
lock.Pid = 0
|
||||
f, err := os.OpenFile(path, flag, perm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &lock); err != nil {
|
||||
f.Close()
|
||||
if err == syscall.EAGAIN {
|
||||
err = ErrLocked
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &LockedFile{f}, nil
|
||||
}
|
||||
|
||||
func LockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) {
|
||||
var lock syscall.Flock_t
|
||||
lock.Start = 0
|
||||
lock.Len = 0
|
||||
lock.Pid = 0
|
||||
lock.Type = syscall.F_WRLCK
|
||||
lock.Whence = 0
|
||||
f, err := os.OpenFile(path, flag, perm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err = syscall.FcntlFlock(f.Fd(), syscall.F_SETLKW, &lock); err != nil {
|
||||
f.Close()
|
||||
return nil, err
|
||||
}
|
||||
return &LockedFile{f}, nil
|
||||
}
|
29
vendor/github.com/coreos/etcd/pkg/fileutil/lock_unix.go
generated
vendored
Normal file
29
vendor/github.com/coreos/etcd/pkg/fileutil/lock_unix.go
generated
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build !windows,!plan9,!solaris,!linux
|
||||
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"os"
|
||||
)
|
||||
|
||||
func TryLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) {
|
||||
return flockTryLockFile(path, flag, perm)
|
||||
}
|
||||
|
||||
func LockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) {
|
||||
return flockLockFile(path, flag, perm)
|
||||
}
|
125
vendor/github.com/coreos/etcd/pkg/fileutil/lock_windows.go
generated
vendored
Normal file
125
vendor/github.com/coreos/etcd/pkg/fileutil/lock_windows.go
generated
vendored
Normal file
@ -0,0 +1,125 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build windows
|
||||
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
var (
|
||||
modkernel32 = syscall.NewLazyDLL("kernel32.dll")
|
||||
procLockFileEx = modkernel32.NewProc("LockFileEx")
|
||||
|
||||
errLocked = errors.New("The process cannot access the file because another process has locked a portion of the file.")
|
||||
)
|
||||
|
||||
const (
|
||||
// https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx
|
||||
LOCKFILE_EXCLUSIVE_LOCK = 2
|
||||
LOCKFILE_FAIL_IMMEDIATELY = 1
|
||||
|
||||
// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx
|
||||
errLockViolation syscall.Errno = 0x21
|
||||
)
|
||||
|
||||
func TryLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) {
|
||||
f, err := open(path, flag, perm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := lockFile(syscall.Handle(f.Fd()), LOCKFILE_FAIL_IMMEDIATELY); err != nil {
|
||||
f.Close()
|
||||
return nil, err
|
||||
}
|
||||
return &LockedFile{f}, nil
|
||||
}
|
||||
|
||||
func LockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) {
|
||||
f, err := open(path, flag, perm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := lockFile(syscall.Handle(f.Fd()), 0); err != nil {
|
||||
f.Close()
|
||||
return nil, err
|
||||
}
|
||||
return &LockedFile{f}, nil
|
||||
}
|
||||
|
||||
func open(path string, flag int, perm os.FileMode) (*os.File, error) {
|
||||
if path == "" {
|
||||
return nil, fmt.Errorf("cannot open empty filename")
|
||||
}
|
||||
var access uint32
|
||||
switch flag {
|
||||
case syscall.O_RDONLY:
|
||||
access = syscall.GENERIC_READ
|
||||
case syscall.O_WRONLY:
|
||||
access = syscall.GENERIC_WRITE
|
||||
case syscall.O_RDWR:
|
||||
access = syscall.GENERIC_READ | syscall.GENERIC_WRITE
|
||||
case syscall.O_WRONLY | syscall.O_CREAT:
|
||||
access = syscall.GENERIC_ALL
|
||||
default:
|
||||
panic(fmt.Errorf("flag %v is not supported", flag))
|
||||
}
|
||||
fd, err := syscall.CreateFile(&(syscall.StringToUTF16(path)[0]),
|
||||
access,
|
||||
syscall.FILE_SHARE_READ|syscall.FILE_SHARE_WRITE|syscall.FILE_SHARE_DELETE,
|
||||
nil,
|
||||
syscall.OPEN_ALWAYS,
|
||||
syscall.FILE_ATTRIBUTE_NORMAL,
|
||||
0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return os.NewFile(uintptr(fd), path), nil
|
||||
}
|
||||
|
||||
func lockFile(fd syscall.Handle, flags uint32) error {
|
||||
var flag uint32 = LOCKFILE_EXCLUSIVE_LOCK
|
||||
flag |= flags
|
||||
if fd == syscall.InvalidHandle {
|
||||
return nil
|
||||
}
|
||||
err := lockFileEx(fd, flag, 1, 0, &syscall.Overlapped{})
|
||||
if err == nil {
|
||||
return nil
|
||||
} else if err.Error() == errLocked.Error() {
|
||||
return ErrLocked
|
||||
} else if err != errLockViolation {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func lockFileEx(h syscall.Handle, flags, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
|
||||
var reserved uint32 = 0
|
||||
r1, _, e1 := syscall.Syscall6(procLockFileEx.Addr(), 6, uintptr(h), uintptr(flags), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)))
|
||||
if r1 == 0 {
|
||||
if e1 != 0 {
|
||||
err = error(e1)
|
||||
} else {
|
||||
err = syscall.EINVAL
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
54
vendor/github.com/coreos/etcd/pkg/fileutil/preallocate.go
generated
vendored
Normal file
54
vendor/github.com/coreos/etcd/pkg/fileutil/preallocate.go
generated
vendored
Normal file
@ -0,0 +1,54 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
)
|
||||
|
||||
// Preallocate tries to allocate the space for given
|
||||
// file. This operation is only supported on linux by a
|
||||
// few filesystems (btrfs, ext4, etc.).
|
||||
// If the operation is unsupported, no error will be returned.
|
||||
// Otherwise, the error encountered will be returned.
|
||||
func Preallocate(f *os.File, sizeInBytes int64, extendFile bool) error {
|
||||
if sizeInBytes == 0 {
|
||||
// fallocate will return EINVAL if length is 0; skip
|
||||
return nil
|
||||
}
|
||||
if extendFile {
|
||||
return preallocExtend(f, sizeInBytes)
|
||||
}
|
||||
return preallocFixed(f, sizeInBytes)
|
||||
}
|
||||
|
||||
func preallocExtendTrunc(f *os.File, sizeInBytes int64) error {
|
||||
curOff, err := f.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
size, err := f.Seek(sizeInBytes, io.SeekEnd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err = f.Seek(curOff, io.SeekStart); err != nil {
|
||||
return err
|
||||
}
|
||||
if sizeInBytes > size {
|
||||
return nil
|
||||
}
|
||||
return f.Truncate(sizeInBytes)
|
||||
}
|
65
vendor/github.com/coreos/etcd/pkg/fileutil/preallocate_darwin.go
generated
vendored
Normal file
65
vendor/github.com/coreos/etcd/pkg/fileutil/preallocate_darwin.go
generated
vendored
Normal file
@ -0,0 +1,65 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build darwin
|
||||
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func preallocExtend(f *os.File, sizeInBytes int64) error {
|
||||
if err := preallocFixed(f, sizeInBytes); err != nil {
|
||||
return err
|
||||
}
|
||||
return preallocExtendTrunc(f, sizeInBytes)
|
||||
}
|
||||
|
||||
func preallocFixed(f *os.File, sizeInBytes int64) error {
|
||||
// allocate all requested space or no space at all
|
||||
// TODO: allocate contiguous space on disk with F_ALLOCATECONTIG flag
|
||||
fstore := &syscall.Fstore_t{
|
||||
Flags: syscall.F_ALLOCATEALL,
|
||||
Posmode: syscall.F_PEOFPOSMODE,
|
||||
Length: sizeInBytes}
|
||||
p := unsafe.Pointer(fstore)
|
||||
_, _, errno := syscall.Syscall(syscall.SYS_FCNTL, f.Fd(), uintptr(syscall.F_PREALLOCATE), uintptr(p))
|
||||
if errno == 0 || errno == syscall.ENOTSUP {
|
||||
return nil
|
||||
}
|
||||
|
||||
// wrong argument to fallocate syscall
|
||||
if errno == syscall.EINVAL {
|
||||
// filesystem "st_blocks" are allocated in the units of
|
||||
// "Allocation Block Size" (run "diskutil info /" command)
|
||||
var stat syscall.Stat_t
|
||||
syscall.Fstat(int(f.Fd()), &stat)
|
||||
|
||||
// syscall.Statfs_t.Bsize is "optimal transfer block size"
|
||||
// and contains matching 4096 value when latest OS X kernel
|
||||
// supports 4,096 KB filesystem block size
|
||||
var statfs syscall.Statfs_t
|
||||
syscall.Fstatfs(int(f.Fd()), &statfs)
|
||||
blockSize := int64(statfs.Bsize)
|
||||
|
||||
if stat.Blocks*blockSize >= sizeInBytes {
|
||||
// enough blocks are already allocated
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return errno
|
||||
}
|
49
vendor/github.com/coreos/etcd/pkg/fileutil/preallocate_unix.go
generated
vendored
Normal file
49
vendor/github.com/coreos/etcd/pkg/fileutil/preallocate_unix.go
generated
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build linux
|
||||
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func preallocExtend(f *os.File, sizeInBytes int64) error {
|
||||
// use mode = 0 to change size
|
||||
err := syscall.Fallocate(int(f.Fd()), 0, 0, sizeInBytes)
|
||||
if err != nil {
|
||||
errno, ok := err.(syscall.Errno)
|
||||
// not supported; fallback
|
||||
// fallocate EINTRs frequently in some environments; fallback
|
||||
if ok && (errno == syscall.ENOTSUP || errno == syscall.EINTR) {
|
||||
return preallocExtendTrunc(f, sizeInBytes)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func preallocFixed(f *os.File, sizeInBytes int64) error {
|
||||
// use mode = 1 to keep size; see FALLOC_FL_KEEP_SIZE
|
||||
err := syscall.Fallocate(int(f.Fd()), 1, 0, sizeInBytes)
|
||||
if err != nil {
|
||||
errno, ok := err.(syscall.Errno)
|
||||
// treat not supported as nil error
|
||||
if ok && errno == syscall.ENOTSUP {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
25
vendor/github.com/coreos/etcd/pkg/fileutil/preallocate_unsupported.go
generated
vendored
Normal file
25
vendor/github.com/coreos/etcd/pkg/fileutil/preallocate_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build !linux,!darwin
|
||||
|
||||
package fileutil
|
||||
|
||||
import "os"
|
||||
|
||||
func preallocExtend(f *os.File, sizeInBytes int64) error {
|
||||
return preallocExtendTrunc(f, sizeInBytes)
|
||||
}
|
||||
|
||||
func preallocFixed(f *os.File, sizeInBytes int64) error { return nil }
|
88
vendor/github.com/coreos/etcd/pkg/fileutil/purge.go
generated
vendored
Normal file
88
vendor/github.com/coreos/etcd/pkg/fileutil/purge.go
generated
vendored
Normal file
@ -0,0 +1,88 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
func PurgeFile(dirname string, suffix string, max uint, interval time.Duration, stop <-chan struct{}) <-chan error {
|
||||
return purgeFile(dirname, suffix, max, interval, stop, nil, nil)
|
||||
}
|
||||
|
||||
func PurgeFileWithDoneNotify(dirname string, suffix string, max uint, interval time.Duration, stop <-chan struct{}) (<-chan struct{}, <-chan error) {
|
||||
doneC := make(chan struct{})
|
||||
errC := purgeFile(dirname, suffix, max, interval, stop, nil, doneC)
|
||||
return doneC, errC
|
||||
}
|
||||
|
||||
// purgeFile is the internal implementation for PurgeFile which can post purged files to purgec if non-nil.
|
||||
// if donec is non-nil, the function closes it to notify its exit.
|
||||
func purgeFile(dirname string, suffix string, max uint, interval time.Duration, stop <-chan struct{}, purgec chan<- string, donec chan<- struct{}) <-chan error {
|
||||
errC := make(chan error, 1)
|
||||
go func() {
|
||||
if donec != nil {
|
||||
defer close(donec)
|
||||
}
|
||||
for {
|
||||
fnames, err := ReadDir(dirname)
|
||||
if err != nil {
|
||||
errC <- err
|
||||
return
|
||||
}
|
||||
newfnames := make([]string, 0)
|
||||
for _, fname := range fnames {
|
||||
if strings.HasSuffix(fname, suffix) {
|
||||
newfnames = append(newfnames, fname)
|
||||
}
|
||||
}
|
||||
sort.Strings(newfnames)
|
||||
fnames = newfnames
|
||||
for len(newfnames) > int(max) {
|
||||
f := filepath.Join(dirname, newfnames[0])
|
||||
l, err := TryLockFile(f, os.O_WRONLY, PrivateFileMode)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
if err = os.Remove(f); err != nil {
|
||||
errC <- err
|
||||
return
|
||||
}
|
||||
if err = l.Close(); err != nil {
|
||||
plog.Errorf("error unlocking %s when purging file (%v)", l.Name(), err)
|
||||
errC <- err
|
||||
return
|
||||
}
|
||||
plog.Infof("purged file %s successfully", f)
|
||||
newfnames = newfnames[1:]
|
||||
}
|
||||
if purgec != nil {
|
||||
for i := 0; i < len(fnames)-len(newfnames); i++ {
|
||||
purgec <- fnames[i]
|
||||
}
|
||||
}
|
||||
select {
|
||||
case <-time.After(interval):
|
||||
case <-stop:
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
return errC
|
||||
}
|
29
vendor/github.com/coreos/etcd/pkg/fileutil/sync.go
generated
vendored
Normal file
29
vendor/github.com/coreos/etcd/pkg/fileutil/sync.go
generated
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build !linux,!darwin
|
||||
|
||||
package fileutil
|
||||
|
||||
import "os"
|
||||
|
||||
// Fsync is a wrapper around file.Sync(). Special handling is needed on darwin platform.
|
||||
func Fsync(f *os.File) error {
|
||||
return f.Sync()
|
||||
}
|
||||
|
||||
// Fdatasync is a wrapper around file.Sync(). Special handling is needed on linux platform.
|
||||
func Fdatasync(f *os.File) error {
|
||||
return f.Sync()
|
||||
}
|
40
vendor/github.com/coreos/etcd/pkg/fileutil/sync_darwin.go
generated
vendored
Normal file
40
vendor/github.com/coreos/etcd/pkg/fileutil/sync_darwin.go
generated
vendored
Normal file
@ -0,0 +1,40 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build darwin
|
||||
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// Fsync on HFS/OSX flushes the data on to the physical drive but the drive
|
||||
// may not write it to the persistent media for quite sometime and it may be
|
||||
// written in out-of-order sequence. Using F_FULLFSYNC ensures that the
|
||||
// physical drive's buffer will also get flushed to the media.
|
||||
func Fsync(f *os.File) error {
|
||||
_, _, errno := syscall.Syscall(syscall.SYS_FCNTL, f.Fd(), uintptr(syscall.F_FULLFSYNC), uintptr(0))
|
||||
if errno == 0 {
|
||||
return nil
|
||||
}
|
||||
return errno
|
||||
}
|
||||
|
||||
// Fdatasync on darwin platform invokes fcntl(F_FULLFSYNC) for actual persistence
|
||||
// on physical drive media.
|
||||
func Fdatasync(f *os.File) error {
|
||||
return Fsync(f)
|
||||
}
|
34
vendor/github.com/coreos/etcd/pkg/fileutil/sync_linux.go
generated
vendored
Normal file
34
vendor/github.com/coreos/etcd/pkg/fileutil/sync_linux.go
generated
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build linux
|
||||
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// Fsync is a wrapper around file.Sync(). Special handling is needed on darwin platform.
|
||||
func Fsync(f *os.File) error {
|
||||
return f.Sync()
|
||||
}
|
||||
|
||||
// Fdatasync is similar to fsync(), but does not flush modified metadata
|
||||
// unless that metadata is needed in order to allow a subsequent data retrieval
|
||||
// to be correctly handled.
|
||||
func Fdatasync(f *os.File) error {
|
||||
return syscall.Fdatasync(int(f.Fd()))
|
||||
}
|
191
vendor/github.com/coreos/go-systemd/LICENSE
generated
vendored
Normal file
191
vendor/github.com/coreos/go-systemd/LICENSE
generated
vendored
Normal file
@ -0,0 +1,191 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction, and
|
||||
distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by the copyright
|
||||
owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all other entities
|
||||
that control, are controlled by, or are under common control with that entity.
|
||||
For the purposes of this definition, "control" means (i) the power, direct or
|
||||
indirect, to cause the direction or management of such entity, whether by
|
||||
contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity exercising
|
||||
permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications, including
|
||||
but not limited to software source code, documentation source, and configuration
|
||||
files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical transformation or
|
||||
translation of a Source form, including but not limited to compiled object code,
|
||||
generated documentation, and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or Object form, made
|
||||
available under the License, as indicated by a copyright notice that is included
|
||||
in or attached to the work (an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object form, that
|
||||
is based on (or derived from) the Work and for which the editorial revisions,
|
||||
annotations, elaborations, or other modifications represent, as a whole, an
|
||||
original work of authorship. For the purposes of this License, Derivative Works
|
||||
shall not include works that remain separable from, or merely link (or bind by
|
||||
name) to the interfaces of, the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including the original version
|
||||
of the Work and any modifications or additions to that Work or Derivative Works
|
||||
thereof, that is intentionally submitted to Licensor for inclusion in the Work
|
||||
by the copyright owner or by an individual or Legal Entity authorized to submit
|
||||
on behalf of the copyright owner. For the purposes of this definition,
|
||||
"submitted" means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems, and
|
||||
issue tracking systems that are managed by, or on behalf of, the Licensor for
|
||||
the purpose of discussing and improving the Work, but excluding communication
|
||||
that is conspicuously marked or otherwise designated in writing by the copyright
|
||||
owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
|
||||
of whom a Contribution has been received by Licensor and subsequently
|
||||
incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License.
|
||||
|
||||
Subject to the terms and conditions of this License, each Contributor hereby
|
||||
grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
|
||||
irrevocable copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the Work and such
|
||||
Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License.
|
||||
|
||||
Subject to the terms and conditions of this License, each Contributor hereby
|
||||
grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
|
||||
irrevocable (except as stated in this section) patent license to make, have
|
||||
made, use, offer to sell, sell, import, and otherwise transfer the Work, where
|
||||
such license applies only to those patent claims licensable by such Contributor
|
||||
that are necessarily infringed by their Contribution(s) alone or by combination
|
||||
of their Contribution(s) with the Work to which such Contribution(s) was
|
||||
submitted. If You institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work or a
|
||||
Contribution incorporated within the Work constitutes direct or contributory
|
||||
patent infringement, then any patent licenses granted to You under this License
|
||||
for that Work shall terminate as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution.
|
||||
|
||||
You may reproduce and distribute copies of the Work or Derivative Works thereof
|
||||
in any medium, with or without modifications, and in Source or Object form,
|
||||
provided that You meet the following conditions:
|
||||
|
||||
You must give any other recipients of the Work or Derivative Works a copy of
|
||||
this License; and
|
||||
You must cause any modified files to carry prominent notices stating that You
|
||||
changed the files; and
|
||||
You must retain, in the Source form of any Derivative Works that You distribute,
|
||||
all copyright, patent, trademark, and attribution notices from the Source form
|
||||
of the Work, excluding those notices that do not pertain to any part of the
|
||||
Derivative Works; and
|
||||
If the Work includes a "NOTICE" text file as part of its distribution, then any
|
||||
Derivative Works that You distribute must include a readable copy of the
|
||||
attribution notices contained within such NOTICE file, excluding those notices
|
||||
that do not pertain to any part of the Derivative Works, in at least one of the
|
||||
following places: within a NOTICE text file distributed as part of the
|
||||
Derivative Works; within the Source form or documentation, if provided along
|
||||
with the Derivative Works; or, within a display generated by the Derivative
|
||||
Works, if and wherever such third-party notices normally appear. The contents of
|
||||
the NOTICE file are for informational purposes only and do not modify the
|
||||
License. You may add Your own attribution notices within Derivative Works that
|
||||
You distribute, alongside or as an addendum to the NOTICE text from the Work,
|
||||
provided that such additional attribution notices cannot be construed as
|
||||
modifying the License.
|
||||
You may add Your own copyright statement to Your modifications and may provide
|
||||
additional or different license terms and conditions for use, reproduction, or
|
||||
distribution of Your modifications, or for any such Derivative Works as a whole,
|
||||
provided Your use, reproduction, and distribution of the Work otherwise complies
|
||||
with the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions.
|
||||
|
||||
Unless You explicitly state otherwise, any Contribution intentionally submitted
|
||||
for inclusion in the Work by You to the Licensor shall be under the terms and
|
||||
conditions of this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify the terms of
|
||||
any separate license agreement you may have executed with Licensor regarding
|
||||
such Contributions.
|
||||
|
||||
6. Trademarks.
|
||||
|
||||
This License does not grant permission to use the trade names, trademarks,
|
||||
service marks, or product names of the Licensor, except as required for
|
||||
reasonable and customary use in describing the origin of the Work and
|
||||
reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty.
|
||||
|
||||
Unless required by applicable law or agreed to in writing, Licensor provides the
|
||||
Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
|
||||
including, without limitation, any warranties or conditions of TITLE,
|
||||
NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
|
||||
solely responsible for determining the appropriateness of using or
|
||||
redistributing the Work and assume any risks associated with Your exercise of
|
||||
permissions under this License.
|
||||
|
||||
8. Limitation of Liability.
|
||||
|
||||
In no event and under no legal theory, whether in tort (including negligence),
|
||||
contract, or otherwise, unless required by applicable law (such as deliberate
|
||||
and grossly negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special, incidental,
|
||||
or consequential damages of any character arising as a result of this License or
|
||||
out of the use or inability to use the Work (including but not limited to
|
||||
damages for loss of goodwill, work stoppage, computer failure or malfunction, or
|
||||
any and all other commercial damages or losses), even if such Contributor has
|
||||
been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability.
|
||||
|
||||
While redistributing the Work or Derivative Works thereof, You may choose to
|
||||
offer, and charge a fee for, acceptance of support, warranty, indemnity, or
|
||||
other liability obligations and/or rights consistent with this License. However,
|
||||
in accepting such obligations, You may act only on Your own behalf and on Your
|
||||
sole responsibility, not on behalf of any other Contributor, and only if You
|
||||
agree to indemnify, defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason of your
|
||||
accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work
|
||||
|
||||
To apply the Apache License to your work, attach the following boilerplate
|
||||
notice, with the fields enclosed by brackets "[]" replaced with your own
|
||||
identifying information. (Don't include the brackets!) The text should be
|
||||
enclosed in the appropriate comment syntax for the file format. We also
|
||||
recommend that a file or class name and description of purpose be included on
|
||||
the same "printed page" as the copyright notice for easier identification within
|
||||
third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
225
vendor/github.com/coreos/go-systemd/journal/journal.go
generated
vendored
Normal file
225
vendor/github.com/coreos/go-systemd/journal/journal.go
generated
vendored
Normal file
@ -0,0 +1,225 @@
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package journal provides write bindings to the local systemd journal.
|
||||
// It is implemented in pure Go and connects to the journal directly over its
|
||||
// unix socket.
|
||||
//
|
||||
// To read from the journal, see the "sdjournal" package, which wraps the
|
||||
// sd-journal a C API.
|
||||
//
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd-journald.service.html
|
||||
package journal
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// Priority of a journal message
|
||||
type Priority int
|
||||
|
||||
const (
|
||||
PriEmerg Priority = iota
|
||||
PriAlert
|
||||
PriCrit
|
||||
PriErr
|
||||
PriWarning
|
||||
PriNotice
|
||||
PriInfo
|
||||
PriDebug
|
||||
)
|
||||
|
||||
var (
|
||||
// This can be overridden at build-time:
|
||||
// https://github.com/golang/go/wiki/GcToolchainTricks#including-build-information-in-the-executable
|
||||
journalSocket = "/run/systemd/journal/socket"
|
||||
|
||||
// unixConnPtr atomically holds the local unconnected Unix-domain socket.
|
||||
// Concrete safe pointer type: *net.UnixConn
|
||||
unixConnPtr unsafe.Pointer
|
||||
// onceConn ensures that unixConnPtr is initialized exactly once.
|
||||
onceConn sync.Once
|
||||
)
|
||||
|
||||
func init() {
|
||||
onceConn.Do(initConn)
|
||||
}
|
||||
|
||||
// Enabled checks whether the local systemd journal is available for logging.
|
||||
func Enabled() bool {
|
||||
onceConn.Do(initConn)
|
||||
|
||||
if (*net.UnixConn)(atomic.LoadPointer(&unixConnPtr)) == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if _, err := net.Dial("unixgram", journalSocket); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// Send a message to the local systemd journal. vars is a map of journald
|
||||
// fields to values. Fields must be composed of uppercase letters, numbers,
|
||||
// and underscores, but must not start with an underscore. Within these
|
||||
// restrictions, any arbitrary field name may be used. Some names have special
|
||||
// significance: see the journalctl documentation
|
||||
// (http://www.freedesktop.org/software/systemd/man/systemd.journal-fields.html)
|
||||
// for more details. vars may be nil.
|
||||
func Send(message string, priority Priority, vars map[string]string) error {
|
||||
conn := (*net.UnixConn)(atomic.LoadPointer(&unixConnPtr))
|
||||
if conn == nil {
|
||||
return errors.New("could not initialize socket to journald")
|
||||
}
|
||||
|
||||
socketAddr := &net.UnixAddr{
|
||||
Name: journalSocket,
|
||||
Net: "unixgram",
|
||||
}
|
||||
|
||||
data := new(bytes.Buffer)
|
||||
appendVariable(data, "PRIORITY", strconv.Itoa(int(priority)))
|
||||
appendVariable(data, "MESSAGE", message)
|
||||
for k, v := range vars {
|
||||
appendVariable(data, k, v)
|
||||
}
|
||||
|
||||
_, _, err := conn.WriteMsgUnix(data.Bytes(), nil, socketAddr)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if !isSocketSpaceError(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
// Large log entry, send it via tempfile and ancillary-fd.
|
||||
file, err := tempFd()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
_, err = io.Copy(file, data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
rights := syscall.UnixRights(int(file.Fd()))
|
||||
_, _, err = conn.WriteMsgUnix([]byte{}, rights, socketAddr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Print prints a message to the local systemd journal using Send().
|
||||
func Print(priority Priority, format string, a ...interface{}) error {
|
||||
return Send(fmt.Sprintf(format, a...), priority, nil)
|
||||
}
|
||||
|
||||
func appendVariable(w io.Writer, name, value string) {
|
||||
if err := validVarName(name); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "variable name %s contains invalid character, ignoring\n", name)
|
||||
}
|
||||
if strings.ContainsRune(value, '\n') {
|
||||
/* When the value contains a newline, we write:
|
||||
* - the variable name, followed by a newline
|
||||
* - the size (in 64bit little endian format)
|
||||
* - the data, followed by a newline
|
||||
*/
|
||||
fmt.Fprintln(w, name)
|
||||
binary.Write(w, binary.LittleEndian, uint64(len(value)))
|
||||
fmt.Fprintln(w, value)
|
||||
} else {
|
||||
/* just write the variable and value all on one line */
|
||||
fmt.Fprintf(w, "%s=%s\n", name, value)
|
||||
}
|
||||
}
|
||||
|
||||
// validVarName validates a variable name to make sure journald will accept it.
|
||||
// The variable name must be in uppercase and consist only of characters,
|
||||
// numbers and underscores, and may not begin with an underscore:
|
||||
// https://www.freedesktop.org/software/systemd/man/sd_journal_print.html
|
||||
func validVarName(name string) error {
|
||||
if name == "" {
|
||||
return errors.New("Empty variable name")
|
||||
} else if name[0] == '_' {
|
||||
return errors.New("Variable name begins with an underscore")
|
||||
}
|
||||
|
||||
for _, c := range name {
|
||||
if !(('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || c == '_') {
|
||||
return errors.New("Variable name contains invalid characters")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// isSocketSpaceError checks whether the error is signaling
|
||||
// an "overlarge message" condition.
|
||||
func isSocketSpaceError(err error) bool {
|
||||
opErr, ok := err.(*net.OpError)
|
||||
if !ok || opErr == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
sysErr, ok := opErr.Err.(*os.SyscallError)
|
||||
if !ok || sysErr == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return sysErr.Err == syscall.EMSGSIZE || sysErr.Err == syscall.ENOBUFS
|
||||
}
|
||||
|
||||
// tempFd creates a temporary, unlinked file under `/dev/shm`.
|
||||
func tempFd() (*os.File, error) {
|
||||
file, err := ioutil.TempFile("/dev/shm/", "journal.XXXXX")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = syscall.Unlink(file.Name())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return file, nil
|
||||
}
|
||||
|
||||
// initConn initializes the global `unixConnPtr` socket.
|
||||
// It is meant to be called exactly once, at program startup.
|
||||
func initConn() {
|
||||
autobind, err := net.ResolveUnixAddr("unixgram", "")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
sock, err := net.ListenUnixgram("unixgram", autobind)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
atomic.StorePointer(&unixConnPtr, unsafe.Pointer(sock))
|
||||
}
|
202
vendor/github.com/coreos/pkg/LICENSE
generated
vendored
Normal file
202
vendor/github.com/coreos/pkg/LICENSE
generated
vendored
Normal file
@ -0,0 +1,202 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright {yyyy} {name of copyright owner}
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
39
vendor/github.com/coreos/pkg/capnslog/README.md
generated
vendored
Normal file
39
vendor/github.com/coreos/pkg/capnslog/README.md
generated
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
# capnslog, the CoreOS logging package
|
||||
|
||||
There are far too many logging packages out there, with varying degrees of licenses, far too many features (colorization, all sorts of log frameworks) or are just a pain to use (lack of `Fatalln()`?).
|
||||
capnslog provides a simple but consistent logging interface suitable for all kinds of projects.
|
||||
|
||||
### Design Principles
|
||||
|
||||
##### `package main` is the place where logging gets turned on and routed
|
||||
|
||||
A library should not touch log options, only generate log entries. Libraries are silent until main lets them speak.
|
||||
|
||||
##### All log options are runtime-configurable.
|
||||
|
||||
Still the job of `main` to expose these configurations. `main` may delegate this to, say, a configuration webhook, but does so explicitly.
|
||||
|
||||
##### There is one log object per package. It is registered under its repository and package name.
|
||||
|
||||
`main` activates logging for its repository and any dependency repositories it would also like to have output in its logstream. `main` also dictates at which level each subpackage logs.
|
||||
|
||||
##### There is *one* output stream, and it is an `io.Writer` composed with a formatter.
|
||||
|
||||
Splitting streams is probably not the job of your program, but rather, your log aggregation framework. If you must split output streams, again, `main` configures this and you can write a very simple two-output struct that satisfies io.Writer.
|
||||
|
||||
Fancy colorful formatting and JSON output are beyond the scope of a basic logging framework -- they're application/log-collector dependent. These are, at best, provided as options, but more likely, provided by your application.
|
||||
|
||||
##### Log objects are an interface
|
||||
|
||||
An object knows best how to print itself. Log objects can collect more interesting metadata if they wish, however, because text isn't going away anytime soon, they must all be marshalable to text. The simplest log object is a string, which returns itself. If you wish to do more fancy tricks for printing your log objects, see also JSON output -- introspect and write a formatter which can handle your advanced log interface. Making strings is the only thing guaranteed.
|
||||
|
||||
##### Log levels have specific meanings:
|
||||
|
||||
* Critical: Unrecoverable. Must fail.
|
||||
* Error: Data has been lost, a request has failed for a bad reason, or a required resource has been lost
|
||||
* Warning: (Hopefully) Temporary conditions that may cause errors, but may work fine. A replica disappearing (that may reconnect) is a warning.
|
||||
* Notice: Normal, but important (uncommon) log information.
|
||||
* Info: Normal, working log information, everything is fine, but helpful notices for auditing or common operations.
|
||||
* Debug: Everything is still fine, but even common operations may be logged, and less helpful but more quantity of notices.
|
||||
* Trace: Anything goes, from logging every function call as part of a common operation, to tracing execution of a query.
|
||||
|
157
vendor/github.com/coreos/pkg/capnslog/formatters.go
generated
vendored
Normal file
157
vendor/github.com/coreos/pkg/capnslog/formatters.go
generated
vendored
Normal file
@ -0,0 +1,157 @@
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package capnslog
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Formatter interface {
|
||||
Format(pkg string, level LogLevel, depth int, entries ...interface{})
|
||||
Flush()
|
||||
}
|
||||
|
||||
func NewStringFormatter(w io.Writer) Formatter {
|
||||
return &StringFormatter{
|
||||
w: bufio.NewWriter(w),
|
||||
}
|
||||
}
|
||||
|
||||
type StringFormatter struct {
|
||||
w *bufio.Writer
|
||||
}
|
||||
|
||||
func (s *StringFormatter) Format(pkg string, l LogLevel, i int, entries ...interface{}) {
|
||||
now := time.Now().UTC()
|
||||
s.w.WriteString(now.Format(time.RFC3339))
|
||||
s.w.WriteByte(' ')
|
||||
writeEntries(s.w, pkg, l, i, entries...)
|
||||
s.Flush()
|
||||
}
|
||||
|
||||
func writeEntries(w *bufio.Writer, pkg string, _ LogLevel, _ int, entries ...interface{}) {
|
||||
if pkg != "" {
|
||||
w.WriteString(pkg + ": ")
|
||||
}
|
||||
str := fmt.Sprint(entries...)
|
||||
endsInNL := strings.HasSuffix(str, "\n")
|
||||
w.WriteString(str)
|
||||
if !endsInNL {
|
||||
w.WriteString("\n")
|
||||
}
|
||||
}
|
||||
|
||||
func (s *StringFormatter) Flush() {
|
||||
s.w.Flush()
|
||||
}
|
||||
|
||||
func NewPrettyFormatter(w io.Writer, debug bool) Formatter {
|
||||
return &PrettyFormatter{
|
||||
w: bufio.NewWriter(w),
|
||||
debug: debug,
|
||||
}
|
||||
}
|
||||
|
||||
type PrettyFormatter struct {
|
||||
w *bufio.Writer
|
||||
debug bool
|
||||
}
|
||||
|
||||
func (c *PrettyFormatter) Format(pkg string, l LogLevel, depth int, entries ...interface{}) {
|
||||
now := time.Now()
|
||||
ts := now.Format("2006-01-02 15:04:05")
|
||||
c.w.WriteString(ts)
|
||||
ms := now.Nanosecond() / 1000
|
||||
c.w.WriteString(fmt.Sprintf(".%06d", ms))
|
||||
if c.debug {
|
||||
_, file, line, ok := runtime.Caller(depth) // It's always the same number of frames to the user's call.
|
||||
if !ok {
|
||||
file = "???"
|
||||
line = 1
|
||||
} else {
|
||||
slash := strings.LastIndex(file, "/")
|
||||
if slash >= 0 {
|
||||
file = file[slash+1:]
|
||||
}
|
||||
}
|
||||
if line < 0 {
|
||||
line = 0 // not a real line number
|
||||
}
|
||||
c.w.WriteString(fmt.Sprintf(" [%s:%d]", file, line))
|
||||
}
|
||||
c.w.WriteString(fmt.Sprint(" ", l.Char(), " | "))
|
||||
writeEntries(c.w, pkg, l, depth, entries...)
|
||||
c.Flush()
|
||||
}
|
||||
|
||||
func (c *PrettyFormatter) Flush() {
|
||||
c.w.Flush()
|
||||
}
|
||||
|
||||
// LogFormatter emulates the form of the traditional built-in logger.
|
||||
type LogFormatter struct {
|
||||
logger *log.Logger
|
||||
prefix string
|
||||
}
|
||||
|
||||
// NewLogFormatter is a helper to produce a new LogFormatter struct. It uses the
|
||||
// golang log package to actually do the logging work so that logs look similar.
|
||||
func NewLogFormatter(w io.Writer, prefix string, flag int) Formatter {
|
||||
return &LogFormatter{
|
||||
logger: log.New(w, "", flag), // don't use prefix here
|
||||
prefix: prefix, // save it instead
|
||||
}
|
||||
}
|
||||
|
||||
// Format builds a log message for the LogFormatter. The LogLevel is ignored.
|
||||
func (lf *LogFormatter) Format(pkg string, _ LogLevel, _ int, entries ...interface{}) {
|
||||
str := fmt.Sprint(entries...)
|
||||
prefix := lf.prefix
|
||||
if pkg != "" {
|
||||
prefix = fmt.Sprintf("%s%s: ", prefix, pkg)
|
||||
}
|
||||
lf.logger.Output(5, fmt.Sprintf("%s%v", prefix, str)) // call depth is 5
|
||||
}
|
||||
|
||||
// Flush is included so that the interface is complete, but is a no-op.
|
||||
func (lf *LogFormatter) Flush() {
|
||||
// noop
|
||||
}
|
||||
|
||||
// NilFormatter is a no-op log formatter that does nothing.
|
||||
type NilFormatter struct {
|
||||
}
|
||||
|
||||
// NewNilFormatter is a helper to produce a new LogFormatter struct. It logs no
|
||||
// messages so that you can cause part of your logging to be silent.
|
||||
func NewNilFormatter() Formatter {
|
||||
return &NilFormatter{}
|
||||
}
|
||||
|
||||
// Format does nothing.
|
||||
func (_ *NilFormatter) Format(_ string, _ LogLevel, _ int, _ ...interface{}) {
|
||||
// noop
|
||||
}
|
||||
|
||||
// Flush is included so that the interface is complete, but is a no-op.
|
||||
func (_ *NilFormatter) Flush() {
|
||||
// noop
|
||||
}
|
96
vendor/github.com/coreos/pkg/capnslog/glog_formatter.go
generated
vendored
Normal file
96
vendor/github.com/coreos/pkg/capnslog/glog_formatter.go
generated
vendored
Normal file
@ -0,0 +1,96 @@
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package capnslog
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"io"
|
||||
"os"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var pid = os.Getpid()
|
||||
|
||||
type GlogFormatter struct {
|
||||
StringFormatter
|
||||
}
|
||||
|
||||
func NewGlogFormatter(w io.Writer) *GlogFormatter {
|
||||
g := &GlogFormatter{}
|
||||
g.w = bufio.NewWriter(w)
|
||||
return g
|
||||
}
|
||||
|
||||
func (g GlogFormatter) Format(pkg string, level LogLevel, depth int, entries ...interface{}) {
|
||||
g.w.Write(GlogHeader(level, depth+1))
|
||||
g.StringFormatter.Format(pkg, level, depth+1, entries...)
|
||||
}
|
||||
|
||||
func GlogHeader(level LogLevel, depth int) []byte {
|
||||
// Lmmdd hh:mm:ss.uuuuuu threadid file:line]
|
||||
now := time.Now().UTC()
|
||||
_, file, line, ok := runtime.Caller(depth) // It's always the same number of frames to the user's call.
|
||||
if !ok {
|
||||
file = "???"
|
||||
line = 1
|
||||
} else {
|
||||
slash := strings.LastIndex(file, "/")
|
||||
if slash >= 0 {
|
||||
file = file[slash+1:]
|
||||
}
|
||||
}
|
||||
if line < 0 {
|
||||
line = 0 // not a real line number
|
||||
}
|
||||
buf := &bytes.Buffer{}
|
||||
buf.Grow(30)
|
||||
_, month, day := now.Date()
|
||||
hour, minute, second := now.Clock()
|
||||
buf.WriteString(level.Char())
|
||||
twoDigits(buf, int(month))
|
||||
twoDigits(buf, day)
|
||||
buf.WriteByte(' ')
|
||||
twoDigits(buf, hour)
|
||||
buf.WriteByte(':')
|
||||
twoDigits(buf, minute)
|
||||
buf.WriteByte(':')
|
||||
twoDigits(buf, second)
|
||||
buf.WriteByte('.')
|
||||
buf.WriteString(strconv.Itoa(now.Nanosecond() / 1000))
|
||||
buf.WriteByte('Z')
|
||||
buf.WriteByte(' ')
|
||||
buf.WriteString(strconv.Itoa(pid))
|
||||
buf.WriteByte(' ')
|
||||
buf.WriteString(file)
|
||||
buf.WriteByte(':')
|
||||
buf.WriteString(strconv.Itoa(line))
|
||||
buf.WriteByte(']')
|
||||
buf.WriteByte(' ')
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
const digits = "0123456789"
|
||||
|
||||
func twoDigits(b *bytes.Buffer, d int) {
|
||||
c2 := digits[d%10]
|
||||
d /= 10
|
||||
c1 := digits[d%10]
|
||||
b.WriteByte(c1)
|
||||
b.WriteByte(c2)
|
||||
}
|
50
vendor/github.com/coreos/pkg/capnslog/init.go
generated
vendored
Normal file
50
vendor/github.com/coreos/pkg/capnslog/init.go
generated
vendored
Normal file
@ -0,0 +1,50 @@
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
//go:build !windows
|
||||
// +build !windows
|
||||
|
||||
package capnslog
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// Here's where the opinionation comes in. We need some sensible defaults,
|
||||
// especially after taking over the log package. Your project (whatever it may
|
||||
// be) may see things differently. That's okay; there should be no defaults in
|
||||
// the main package that cannot be controlled or overridden programatically,
|
||||
// otherwise it's a bug. Doing so is creating your own init_log.go file much
|
||||
// like this one.
|
||||
|
||||
func init() {
|
||||
initHijack()
|
||||
|
||||
// Go `log` package uses os.Stderr.
|
||||
SetFormatter(NewDefaultFormatter(os.Stderr))
|
||||
SetGlobalLogLevel(INFO)
|
||||
}
|
||||
|
||||
func NewDefaultFormatter(out io.Writer) Formatter {
|
||||
if syscall.Getppid() == 1 {
|
||||
// We're running under init, which may be systemd.
|
||||
f, err := NewJournaldFormatter()
|
||||
if err == nil {
|
||||
return f
|
||||
}
|
||||
}
|
||||
return NewPrettyFormatter(out, false)
|
||||
}
|
25
vendor/github.com/coreos/pkg/capnslog/init_windows.go
generated
vendored
Normal file
25
vendor/github.com/coreos/pkg/capnslog/init_windows.go
generated
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package capnslog
|
||||
|
||||
import "os"
|
||||
|
||||
func init() {
|
||||
initHijack()
|
||||
|
||||
// Go `log` package uses os.Stderr.
|
||||
SetFormatter(NewPrettyFormatter(os.Stderr, false))
|
||||
SetGlobalLogLevel(INFO)
|
||||
}
|
69
vendor/github.com/coreos/pkg/capnslog/journald_formatter.go
generated
vendored
Normal file
69
vendor/github.com/coreos/pkg/capnslog/journald_formatter.go
generated
vendored
Normal file
@ -0,0 +1,69 @@
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
//go:build !windows
|
||||
// +build !windows
|
||||
|
||||
package capnslog
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/coreos/go-systemd/journal"
|
||||
)
|
||||
|
||||
func NewJournaldFormatter() (Formatter, error) {
|
||||
if !journal.Enabled() {
|
||||
return nil, errors.New("No systemd detected")
|
||||
}
|
||||
return &journaldFormatter{}, nil
|
||||
}
|
||||
|
||||
type journaldFormatter struct{}
|
||||
|
||||
func (j *journaldFormatter) Format(pkg string, l LogLevel, _ int, entries ...interface{}) {
|
||||
var pri journal.Priority
|
||||
switch l {
|
||||
case CRITICAL:
|
||||
pri = journal.PriCrit
|
||||
case ERROR:
|
||||
pri = journal.PriErr
|
||||
case WARNING:
|
||||
pri = journal.PriWarning
|
||||
case NOTICE:
|
||||
pri = journal.PriNotice
|
||||
case INFO:
|
||||
pri = journal.PriInfo
|
||||
case DEBUG:
|
||||
pri = journal.PriDebug
|
||||
case TRACE:
|
||||
pri = journal.PriDebug
|
||||
default:
|
||||
panic("Unhandled loglevel")
|
||||
}
|
||||
msg := fmt.Sprint(entries...)
|
||||
tags := map[string]string{
|
||||
"PACKAGE": pkg,
|
||||
"SYSLOG_IDENTIFIER": filepath.Base(os.Args[0]),
|
||||
}
|
||||
err := journal.Send(msg, pri, tags)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
}
|
||||
}
|
||||
|
||||
func (j *journaldFormatter) Flush() {}
|
39
vendor/github.com/coreos/pkg/capnslog/log_hijack.go
generated
vendored
Normal file
39
vendor/github.com/coreos/pkg/capnslog/log_hijack.go
generated
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package capnslog
|
||||
|
||||
import (
|
||||
"log"
|
||||
)
|
||||
|
||||
func initHijack() {
|
||||
pkg := NewPackageLogger("log", "")
|
||||
w := packageWriter{pkg}
|
||||
log.SetFlags(0)
|
||||
log.SetPrefix("")
|
||||
log.SetOutput(w)
|
||||
}
|
||||
|
||||
type packageWriter struct {
|
||||
pl *PackageLogger
|
||||
}
|
||||
|
||||
func (p packageWriter) Write(b []byte) (int, error) {
|
||||
if p.pl.level < INFO {
|
||||
return 0, nil
|
||||
}
|
||||
p.pl.internalLog(calldepth+2, INFO, string(b))
|
||||
return len(b), nil
|
||||
}
|
245
vendor/github.com/coreos/pkg/capnslog/logmap.go
generated
vendored
Normal file
245
vendor/github.com/coreos/pkg/capnslog/logmap.go
generated
vendored
Normal file
@ -0,0 +1,245 @@
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package capnslog
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// LogLevel is the set of all log levels.
|
||||
type LogLevel int8
|
||||
|
||||
const (
|
||||
// CRITICAL is the lowest log level; only errors which will end the program will be propagated.
|
||||
CRITICAL LogLevel = iota - 1
|
||||
// ERROR is for errors that are not fatal but lead to troubling behavior.
|
||||
ERROR
|
||||
// WARNING is for errors which are not fatal and not errors, but are unusual. Often sourced from misconfigurations.
|
||||
WARNING
|
||||
// NOTICE is for normal but significant conditions.
|
||||
NOTICE
|
||||
// INFO is a log level for common, everyday log updates.
|
||||
INFO
|
||||
// DEBUG is the default hidden level for more verbose updates about internal processes.
|
||||
DEBUG
|
||||
// TRACE is for (potentially) call by call tracing of programs.
|
||||
TRACE
|
||||
)
|
||||
|
||||
// Char returns a single-character representation of the log level.
|
||||
func (l LogLevel) Char() string {
|
||||
switch l {
|
||||
case CRITICAL:
|
||||
return "C"
|
||||
case ERROR:
|
||||
return "E"
|
||||
case WARNING:
|
||||
return "W"
|
||||
case NOTICE:
|
||||
return "N"
|
||||
case INFO:
|
||||
return "I"
|
||||
case DEBUG:
|
||||
return "D"
|
||||
case TRACE:
|
||||
return "T"
|
||||
default:
|
||||
panic("Unhandled loglevel")
|
||||
}
|
||||
}
|
||||
|
||||
// String returns a multi-character representation of the log level.
|
||||
func (l LogLevel) String() string {
|
||||
switch l {
|
||||
case CRITICAL:
|
||||
return "CRITICAL"
|
||||
case ERROR:
|
||||
return "ERROR"
|
||||
case WARNING:
|
||||
return "WARNING"
|
||||
case NOTICE:
|
||||
return "NOTICE"
|
||||
case INFO:
|
||||
return "INFO"
|
||||
case DEBUG:
|
||||
return "DEBUG"
|
||||
case TRACE:
|
||||
return "TRACE"
|
||||
default:
|
||||
panic("Unhandled loglevel")
|
||||
}
|
||||
}
|
||||
|
||||
// Update using the given string value. Fulfills the flag.Value interface.
|
||||
func (l *LogLevel) Set(s string) error {
|
||||
value, err := ParseLevel(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*l = value
|
||||
return nil
|
||||
}
|
||||
|
||||
// Returns an empty string, only here to fulfill the pflag.Value interface.
|
||||
func (l *LogLevel) Type() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// ParseLevel translates some potential loglevel strings into their corresponding levels.
|
||||
func ParseLevel(s string) (LogLevel, error) {
|
||||
switch s {
|
||||
case "CRITICAL", "C":
|
||||
return CRITICAL, nil
|
||||
case "ERROR", "0", "E":
|
||||
return ERROR, nil
|
||||
case "WARNING", "1", "W":
|
||||
return WARNING, nil
|
||||
case "NOTICE", "2", "N":
|
||||
return NOTICE, nil
|
||||
case "INFO", "3", "I":
|
||||
return INFO, nil
|
||||
case "DEBUG", "4", "D":
|
||||
return DEBUG, nil
|
||||
case "TRACE", "5", "T":
|
||||
return TRACE, nil
|
||||
}
|
||||
return CRITICAL, errors.New("couldn't parse log level " + s)
|
||||
}
|
||||
|
||||
type RepoLogger map[string]*PackageLogger
|
||||
|
||||
type loggerStruct struct {
|
||||
sync.Mutex
|
||||
repoMap map[string]RepoLogger
|
||||
formatter Formatter
|
||||
}
|
||||
|
||||
// logger is the global logger
|
||||
var logger = new(loggerStruct)
|
||||
|
||||
// SetGlobalLogLevel sets the log level for all packages in all repositories
|
||||
// registered with capnslog.
|
||||
func SetGlobalLogLevel(l LogLevel) {
|
||||
logger.Lock()
|
||||
defer logger.Unlock()
|
||||
for _, r := range logger.repoMap {
|
||||
r.setRepoLogLevelInternal(l)
|
||||
}
|
||||
}
|
||||
|
||||
// GetRepoLogger may return the handle to the repository's set of packages' loggers.
|
||||
func GetRepoLogger(repo string) (RepoLogger, error) {
|
||||
logger.Lock()
|
||||
defer logger.Unlock()
|
||||
r, ok := logger.repoMap[repo]
|
||||
if !ok {
|
||||
return nil, errors.New("no packages registered for repo " + repo)
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// MustRepoLogger returns the handle to the repository's packages' loggers.
|
||||
func MustRepoLogger(repo string) RepoLogger {
|
||||
r, err := GetRepoLogger(repo)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// SetRepoLogLevel sets the log level for all packages in the repository.
|
||||
func (r RepoLogger) SetRepoLogLevel(l LogLevel) {
|
||||
logger.Lock()
|
||||
defer logger.Unlock()
|
||||
r.setRepoLogLevelInternal(l)
|
||||
}
|
||||
|
||||
func (r RepoLogger) setRepoLogLevelInternal(l LogLevel) {
|
||||
for _, v := range r {
|
||||
v.level = l
|
||||
}
|
||||
}
|
||||
|
||||
// ParseLogLevelConfig parses a comma-separated string of "package=loglevel", in
|
||||
// order, and returns a map of the results, for use in SetLogLevel.
|
||||
func (r RepoLogger) ParseLogLevelConfig(conf string) (map[string]LogLevel, error) {
|
||||
setlist := strings.Split(conf, ",")
|
||||
out := make(map[string]LogLevel)
|
||||
for _, setstring := range setlist {
|
||||
setting := strings.Split(setstring, "=")
|
||||
if len(setting) != 2 {
|
||||
return nil, errors.New("oddly structured `pkg=level` option: " + setstring)
|
||||
}
|
||||
l, err := ParseLevel(setting[1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out[setting[0]] = l
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// SetLogLevel takes a map of package names within a repository to their desired
|
||||
// loglevel, and sets the levels appropriately. Unknown packages are ignored.
|
||||
// "*" is a special package name that corresponds to all packages, and will be
|
||||
// processed first.
|
||||
func (r RepoLogger) SetLogLevel(m map[string]LogLevel) {
|
||||
logger.Lock()
|
||||
defer logger.Unlock()
|
||||
if l, ok := m["*"]; ok {
|
||||
r.setRepoLogLevelInternal(l)
|
||||
}
|
||||
for k, v := range m {
|
||||
l, ok := r[k]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
l.level = v
|
||||
}
|
||||
}
|
||||
|
||||
// SetFormatter sets the formatting function for all logs.
|
||||
func SetFormatter(f Formatter) {
|
||||
logger.Lock()
|
||||
defer logger.Unlock()
|
||||
logger.formatter = f
|
||||
}
|
||||
|
||||
// NewPackageLogger creates a package logger object.
|
||||
// This should be defined as a global var in your package, referencing your repo.
|
||||
func NewPackageLogger(repo string, pkg string) (p *PackageLogger) {
|
||||
logger.Lock()
|
||||
defer logger.Unlock()
|
||||
if logger.repoMap == nil {
|
||||
logger.repoMap = make(map[string]RepoLogger)
|
||||
}
|
||||
r, rok := logger.repoMap[repo]
|
||||
if !rok {
|
||||
logger.repoMap[repo] = make(RepoLogger)
|
||||
r = logger.repoMap[repo]
|
||||
}
|
||||
p, pok := r[pkg]
|
||||
if !pok {
|
||||
r[pkg] = &PackageLogger{
|
||||
pkg: pkg,
|
||||
level: INFO,
|
||||
}
|
||||
p = r[pkg]
|
||||
}
|
||||
return
|
||||
}
|
191
vendor/github.com/coreos/pkg/capnslog/pkg_logger.go
generated
vendored
Normal file
191
vendor/github.com/coreos/pkg/capnslog/pkg_logger.go
generated
vendored
Normal file
@ -0,0 +1,191 @@
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package capnslog
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
type PackageLogger struct {
|
||||
pkg string
|
||||
level LogLevel
|
||||
}
|
||||
|
||||
const calldepth = 2
|
||||
|
||||
func (p *PackageLogger) internalLog(depth int, inLevel LogLevel, entries ...interface{}) {
|
||||
logger.Lock()
|
||||
defer logger.Unlock()
|
||||
if inLevel != CRITICAL && p.level < inLevel {
|
||||
return
|
||||
}
|
||||
if logger.formatter != nil {
|
||||
logger.formatter.Format(p.pkg, inLevel, depth+1, entries...)
|
||||
}
|
||||
}
|
||||
|
||||
// SetLevel allows users to change the current logging level.
|
||||
func (p *PackageLogger) SetLevel(l LogLevel) {
|
||||
logger.Lock()
|
||||
defer logger.Unlock()
|
||||
p.level = l
|
||||
}
|
||||
|
||||
// LevelAt checks if the given log level will be outputted under current setting.
|
||||
func (p *PackageLogger) LevelAt(l LogLevel) bool {
|
||||
logger.Lock()
|
||||
defer logger.Unlock()
|
||||
return p.level >= l
|
||||
}
|
||||
|
||||
// Log a formatted string at any level between ERROR and TRACE
|
||||
func (p *PackageLogger) Logf(l LogLevel, format string, args ...interface{}) {
|
||||
p.internalLog(calldepth, l, fmt.Sprintf(format, args...))
|
||||
}
|
||||
|
||||
// Log a message at any level between ERROR and TRACE
|
||||
func (p *PackageLogger) Log(l LogLevel, args ...interface{}) {
|
||||
p.internalLog(calldepth, l, fmt.Sprint(args...))
|
||||
}
|
||||
|
||||
// log stdlib compatibility
|
||||
|
||||
func (p *PackageLogger) Println(args ...interface{}) {
|
||||
p.internalLog(calldepth, INFO, fmt.Sprintln(args...))
|
||||
}
|
||||
|
||||
func (p *PackageLogger) Printf(format string, args ...interface{}) {
|
||||
p.Logf(INFO, format, args...)
|
||||
}
|
||||
|
||||
func (p *PackageLogger) Print(args ...interface{}) {
|
||||
p.internalLog(calldepth, INFO, fmt.Sprint(args...))
|
||||
}
|
||||
|
||||
// Panic and fatal
|
||||
|
||||
func (p *PackageLogger) Panicf(format string, args ...interface{}) {
|
||||
s := fmt.Sprintf(format, args...)
|
||||
p.internalLog(calldepth, CRITICAL, s)
|
||||
panic(s)
|
||||
}
|
||||
|
||||
func (p *PackageLogger) Panic(args ...interface{}) {
|
||||
s := fmt.Sprint(args...)
|
||||
p.internalLog(calldepth, CRITICAL, s)
|
||||
panic(s)
|
||||
}
|
||||
|
||||
func (p *PackageLogger) Panicln(args ...interface{}) {
|
||||
s := fmt.Sprintln(args...)
|
||||
p.internalLog(calldepth, CRITICAL, s)
|
||||
panic(s)
|
||||
}
|
||||
|
||||
func (p *PackageLogger) Fatalf(format string, args ...interface{}) {
|
||||
p.Logf(CRITICAL, format, args...)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
func (p *PackageLogger) Fatal(args ...interface{}) {
|
||||
s := fmt.Sprint(args...)
|
||||
p.internalLog(calldepth, CRITICAL, s)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
func (p *PackageLogger) Fatalln(args ...interface{}) {
|
||||
s := fmt.Sprintln(args...)
|
||||
p.internalLog(calldepth, CRITICAL, s)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Error Functions
|
||||
|
||||
func (p *PackageLogger) Errorf(format string, args ...interface{}) {
|
||||
p.Logf(ERROR, format, args...)
|
||||
}
|
||||
|
||||
func (p *PackageLogger) Error(entries ...interface{}) {
|
||||
p.internalLog(calldepth, ERROR, entries...)
|
||||
}
|
||||
|
||||
// Warning Functions
|
||||
|
||||
func (p *PackageLogger) Warningf(format string, args ...interface{}) {
|
||||
p.Logf(WARNING, format, args...)
|
||||
}
|
||||
|
||||
func (p *PackageLogger) Warning(entries ...interface{}) {
|
||||
p.internalLog(calldepth, WARNING, entries...)
|
||||
}
|
||||
|
||||
// Notice Functions
|
||||
|
||||
func (p *PackageLogger) Noticef(format string, args ...interface{}) {
|
||||
p.Logf(NOTICE, format, args...)
|
||||
}
|
||||
|
||||
func (p *PackageLogger) Notice(entries ...interface{}) {
|
||||
p.internalLog(calldepth, NOTICE, entries...)
|
||||
}
|
||||
|
||||
// Info Functions
|
||||
|
||||
func (p *PackageLogger) Infof(format string, args ...interface{}) {
|
||||
p.Logf(INFO, format, args...)
|
||||
}
|
||||
|
||||
func (p *PackageLogger) Info(entries ...interface{}) {
|
||||
p.internalLog(calldepth, INFO, entries...)
|
||||
}
|
||||
|
||||
// Debug Functions
|
||||
|
||||
func (p *PackageLogger) Debugf(format string, args ...interface{}) {
|
||||
if p.level < DEBUG {
|
||||
return
|
||||
}
|
||||
p.Logf(DEBUG, format, args...)
|
||||
}
|
||||
|
||||
func (p *PackageLogger) Debug(entries ...interface{}) {
|
||||
if p.level < DEBUG {
|
||||
return
|
||||
}
|
||||
p.internalLog(calldepth, DEBUG, entries...)
|
||||
}
|
||||
|
||||
// Trace Functions
|
||||
|
||||
func (p *PackageLogger) Tracef(format string, args ...interface{}) {
|
||||
if p.level < TRACE {
|
||||
return
|
||||
}
|
||||
p.Logf(TRACE, format, args...)
|
||||
}
|
||||
|
||||
func (p *PackageLogger) Trace(entries ...interface{}) {
|
||||
if p.level < TRACE {
|
||||
return
|
||||
}
|
||||
p.internalLog(calldepth, TRACE, entries...)
|
||||
}
|
||||
|
||||
func (p *PackageLogger) Flush() {
|
||||
logger.Lock()
|
||||
defer logger.Unlock()
|
||||
logger.formatter.Flush()
|
||||
}
|
66
vendor/github.com/coreos/pkg/capnslog/syslog_formatter.go
generated
vendored
Normal file
66
vendor/github.com/coreos/pkg/capnslog/syslog_formatter.go
generated
vendored
Normal file
@ -0,0 +1,66 @@
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
//go:build !windows
|
||||
// +build !windows
|
||||
|
||||
package capnslog
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/syslog"
|
||||
)
|
||||
|
||||
func NewSyslogFormatter(w *syslog.Writer) Formatter {
|
||||
return &syslogFormatter{w}
|
||||
}
|
||||
|
||||
func NewDefaultSyslogFormatter(tag string) (Formatter, error) {
|
||||
w, err := syslog.New(syslog.LOG_DEBUG, tag)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return NewSyslogFormatter(w), nil
|
||||
}
|
||||
|
||||
type syslogFormatter struct {
|
||||
w *syslog.Writer
|
||||
}
|
||||
|
||||
func (s *syslogFormatter) Format(pkg string, l LogLevel, _ int, entries ...interface{}) {
|
||||
for _, entry := range entries {
|
||||
str := fmt.Sprint(entry)
|
||||
switch l {
|
||||
case CRITICAL:
|
||||
s.w.Crit(str)
|
||||
case ERROR:
|
||||
s.w.Err(str)
|
||||
case WARNING:
|
||||
s.w.Warning(str)
|
||||
case NOTICE:
|
||||
s.w.Notice(str)
|
||||
case INFO:
|
||||
s.w.Info(str)
|
||||
case DEBUG:
|
||||
s.w.Debug(str)
|
||||
case TRACE:
|
||||
s.w.Debug(str)
|
||||
default:
|
||||
panic("Unhandled loglevel")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *syslogFormatter) Flush() {
|
||||
}
|
375
vendor/github.com/hashicorp/raft-wal/LICENSE
generated
vendored
Normal file
375
vendor/github.com/hashicorp/raft-wal/LICENSE
generated
vendored
Normal file
@ -0,0 +1,375 @@
|
||||
Copyright (c) 2022 HashiCorp, Inc.
|
||||
|
||||
Mozilla Public License Version 2.0
|
||||
==================================
|
||||
|
||||
1. Definitions
|
||||
--------------
|
||||
|
||||
1.1. "Contributor"
|
||||
means each individual or legal entity that creates, contributes to
|
||||
the creation of, or owns Covered Software.
|
||||
|
||||
1.2. "Contributor Version"
|
||||
means the combination of the Contributions of others (if any) used
|
||||
by a Contributor and that particular Contributor's Contribution.
|
||||
|
||||
1.3. "Contribution"
|
||||
means Covered Software of a particular Contributor.
|
||||
|
||||
1.4. "Covered Software"
|
||||
means Source Code Form to which the initial Contributor has attached
|
||||
the notice in Exhibit A, the Executable Form of such Source Code
|
||||
Form, and Modifications of such Source Code Form, in each case
|
||||
including portions thereof.
|
||||
|
||||
1.5. "Incompatible With Secondary Licenses"
|
||||
means
|
||||
|
||||
(a) that the initial Contributor has attached the notice described
|
||||
in Exhibit B to the Covered Software; or
|
||||
|
||||
(b) that the Covered Software was made available under the terms of
|
||||
version 1.1 or earlier of the License, but not also under the
|
||||
terms of a Secondary License.
|
||||
|
||||
1.6. "Executable Form"
|
||||
means any form of the work other than Source Code Form.
|
||||
|
||||
1.7. "Larger Work"
|
||||
means a work that combines Covered Software with other material, in
|
||||
a separate file or files, that is not Covered Software.
|
||||
|
||||
1.8. "License"
|
||||
means this document.
|
||||
|
||||
1.9. "Licensable"
|
||||
means having the right to grant, to the maximum extent possible,
|
||||
whether at the time of the initial grant or subsequently, any and
|
||||
all of the rights conveyed by this License.
|
||||
|
||||
1.10. "Modifications"
|
||||
means any of the following:
|
||||
|
||||
(a) any file in Source Code Form that results from an addition to,
|
||||
deletion from, or modification of the contents of Covered
|
||||
Software; or
|
||||
|
||||
(b) any new file in Source Code Form that contains any Covered
|
||||
Software.
|
||||
|
||||
1.11. "Patent Claims" of a Contributor
|
||||
means any patent claim(s), including without limitation, method,
|
||||
process, and apparatus claims, in any patent Licensable by such
|
||||
Contributor that would be infringed, but for the grant of the
|
||||
License, by the making, using, selling, offering for sale, having
|
||||
made, import, or transfer of either its Contributions or its
|
||||
Contributor Version.
|
||||
|
||||
1.12. "Secondary License"
|
||||
means either the GNU General Public License, Version 2.0, the GNU
|
||||
Lesser General Public License, Version 2.1, the GNU Affero General
|
||||
Public License, Version 3.0, or any later versions of those
|
||||
licenses.
|
||||
|
||||
1.13. "Source Code Form"
|
||||
means the form of the work preferred for making modifications.
|
||||
|
||||
1.14. "You" (or "Your")
|
||||
means an individual or a legal entity exercising rights under this
|
||||
License. For legal entities, "You" includes any entity that
|
||||
controls, is controlled by, or is under common control with You. For
|
||||
purposes of this definition, "control" means (a) the power, direct
|
||||
or indirect, to cause the direction or management of such entity,
|
||||
whether by contract or otherwise, or (b) ownership of more than
|
||||
fifty percent (50%) of the outstanding shares or beneficial
|
||||
ownership of such entity.
|
||||
|
||||
2. License Grants and Conditions
|
||||
--------------------------------
|
||||
|
||||
2.1. Grants
|
||||
|
||||
Each Contributor hereby grants You a world-wide, royalty-free,
|
||||
non-exclusive license:
|
||||
|
||||
(a) under intellectual property rights (other than patent or trademark)
|
||||
Licensable by such Contributor to use, reproduce, make available,
|
||||
modify, display, perform, distribute, and otherwise exploit its
|
||||
Contributions, either on an unmodified basis, with Modifications, or
|
||||
as part of a Larger Work; and
|
||||
|
||||
(b) under Patent Claims of such Contributor to make, use, sell, offer
|
||||
for sale, have made, import, and otherwise transfer either its
|
||||
Contributions or its Contributor Version.
|
||||
|
||||
2.2. Effective Date
|
||||
|
||||
The licenses granted in Section 2.1 with respect to any Contribution
|
||||
become effective for each Contribution on the date the Contributor first
|
||||
distributes such Contribution.
|
||||
|
||||
2.3. Limitations on Grant Scope
|
||||
|
||||
The licenses granted in this Section 2 are the only rights granted under
|
||||
this License. No additional rights or licenses will be implied from the
|
||||
distribution or licensing of Covered Software under this License.
|
||||
Notwithstanding Section 2.1(b) above, no patent license is granted by a
|
||||
Contributor:
|
||||
|
||||
(a) for any code that a Contributor has removed from Covered Software;
|
||||
or
|
||||
|
||||
(b) for infringements caused by: (i) Your and any other third party's
|
||||
modifications of Covered Software, or (ii) the combination of its
|
||||
Contributions with other software (except as part of its Contributor
|
||||
Version); or
|
||||
|
||||
(c) under Patent Claims infringed by Covered Software in the absence of
|
||||
its Contributions.
|
||||
|
||||
This License does not grant any rights in the trademarks, service marks,
|
||||
or logos of any Contributor (except as may be necessary to comply with
|
||||
the notice requirements in Section 3.4).
|
||||
|
||||
2.4. Subsequent Licenses
|
||||
|
||||
No Contributor makes additional grants as a result of Your choice to
|
||||
distribute the Covered Software under a subsequent version of this
|
||||
License (see Section 10.2) or under the terms of a Secondary License (if
|
||||
permitted under the terms of Section 3.3).
|
||||
|
||||
2.5. Representation
|
||||
|
||||
Each Contributor represents that the Contributor believes its
|
||||
Contributions are its original creation(s) or it has sufficient rights
|
||||
to grant the rights to its Contributions conveyed by this License.
|
||||
|
||||
2.6. Fair Use
|
||||
|
||||
This License is not intended to limit any rights You have under
|
||||
applicable copyright doctrines of fair use, fair dealing, or other
|
||||
equivalents.
|
||||
|
||||
2.7. Conditions
|
||||
|
||||
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
|
||||
in Section 2.1.
|
||||
|
||||
3. Responsibilities
|
||||
-------------------
|
||||
|
||||
3.1. Distribution of Source Form
|
||||
|
||||
All distribution of Covered Software in Source Code Form, including any
|
||||
Modifications that You create or to which You contribute, must be under
|
||||
the terms of this License. You must inform recipients that the Source
|
||||
Code Form of the Covered Software is governed by the terms of this
|
||||
License, and how they can obtain a copy of this License. You may not
|
||||
attempt to alter or restrict the recipients' rights in the Source Code
|
||||
Form.
|
||||
|
||||
3.2. Distribution of Executable Form
|
||||
|
||||
If You distribute Covered Software in Executable Form then:
|
||||
|
||||
(a) such Covered Software must also be made available in Source Code
|
||||
Form, as described in Section 3.1, and You must inform recipients of
|
||||
the Executable Form how they can obtain a copy of such Source Code
|
||||
Form by reasonable means in a timely manner, at a charge no more
|
||||
than the cost of distribution to the recipient; and
|
||||
|
||||
(b) You may distribute such Executable Form under the terms of this
|
||||
License, or sublicense it under different terms, provided that the
|
||||
license for the Executable Form does not attempt to limit or alter
|
||||
the recipients' rights in the Source Code Form under this License.
|
||||
|
||||
3.3. Distribution of a Larger Work
|
||||
|
||||
You may create and distribute a Larger Work under terms of Your choice,
|
||||
provided that You also comply with the requirements of this License for
|
||||
the Covered Software. If the Larger Work is a combination of Covered
|
||||
Software with a work governed by one or more Secondary Licenses, and the
|
||||
Covered Software is not Incompatible With Secondary Licenses, this
|
||||
License permits You to additionally distribute such Covered Software
|
||||
under the terms of such Secondary License(s), so that the recipient of
|
||||
the Larger Work may, at their option, further distribute the Covered
|
||||
Software under the terms of either this License or such Secondary
|
||||
License(s).
|
||||
|
||||
3.4. Notices
|
||||
|
||||
You may not remove or alter the substance of any license notices
|
||||
(including copyright notices, patent notices, disclaimers of warranty,
|
||||
or limitations of liability) contained within the Source Code Form of
|
||||
the Covered Software, except that You may alter any license notices to
|
||||
the extent required to remedy known factual inaccuracies.
|
||||
|
||||
3.5. Application of Additional Terms
|
||||
|
||||
You may choose to offer, and to charge a fee for, warranty, support,
|
||||
indemnity or liability obligations to one or more recipients of Covered
|
||||
Software. However, You may do so only on Your own behalf, and not on
|
||||
behalf of any Contributor. You must make it absolutely clear that any
|
||||
such warranty, support, indemnity, or liability obligation is offered by
|
||||
You alone, and You hereby agree to indemnify every Contributor for any
|
||||
liability incurred by such Contributor as a result of warranty, support,
|
||||
indemnity or liability terms You offer. You may include additional
|
||||
disclaimers of warranty and limitations of liability specific to any
|
||||
jurisdiction.
|
||||
|
||||
4. Inability to Comply Due to Statute or Regulation
|
||||
---------------------------------------------------
|
||||
|
||||
If it is impossible for You to comply with any of the terms of this
|
||||
License with respect to some or all of the Covered Software due to
|
||||
statute, judicial order, or regulation then You must: (a) comply with
|
||||
the terms of this License to the maximum extent possible; and (b)
|
||||
describe the limitations and the code they affect. Such description must
|
||||
be placed in a text file included with all distributions of the Covered
|
||||
Software under this License. Except to the extent prohibited by statute
|
||||
or regulation, such description must be sufficiently detailed for a
|
||||
recipient of ordinary skill to be able to understand it.
|
||||
|
||||
5. Termination
|
||||
--------------
|
||||
|
||||
5.1. The rights granted under this License will terminate automatically
|
||||
if You fail to comply with any of its terms. However, if You become
|
||||
compliant, then the rights granted under this License from a particular
|
||||
Contributor are reinstated (a) provisionally, unless and until such
|
||||
Contributor explicitly and finally terminates Your grants, and (b) on an
|
||||
ongoing basis, if such Contributor fails to notify You of the
|
||||
non-compliance by some reasonable means prior to 60 days after You have
|
||||
come back into compliance. Moreover, Your grants from a particular
|
||||
Contributor are reinstated on an ongoing basis if such Contributor
|
||||
notifies You of the non-compliance by some reasonable means, this is the
|
||||
first time You have received notice of non-compliance with this License
|
||||
from such Contributor, and You become compliant prior to 30 days after
|
||||
Your receipt of the notice.
|
||||
|
||||
5.2. If You initiate litigation against any entity by asserting a patent
|
||||
infringement claim (excluding declaratory judgment actions,
|
||||
counter-claims, and cross-claims) alleging that a Contributor Version
|
||||
directly or indirectly infringes any patent, then the rights granted to
|
||||
You by any and all Contributors for the Covered Software under Section
|
||||
2.1 of this License shall terminate.
|
||||
|
||||
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
|
||||
end user license agreements (excluding distributors and resellers) which
|
||||
have been validly granted by You or Your distributors under this License
|
||||
prior to termination shall survive termination.
|
||||
|
||||
************************************************************************
|
||||
* *
|
||||
* 6. Disclaimer of Warranty *
|
||||
* ------------------------- *
|
||||
* *
|
||||
* Covered Software is provided under this License on an "as is" *
|
||||
* basis, without warranty of any kind, either expressed, implied, or *
|
||||
* statutory, including, without limitation, warranties that the *
|
||||
* Covered Software is free of defects, merchantable, fit for a *
|
||||
* particular purpose or non-infringing. The entire risk as to the *
|
||||
* quality and performance of the Covered Software is with You. *
|
||||
* Should any Covered Software prove defective in any respect, You *
|
||||
* (not any Contributor) assume the cost of any necessary servicing, *
|
||||
* repair, or correction. This disclaimer of warranty constitutes an *
|
||||
* essential part of this License. No use of any Covered Software is *
|
||||
* authorized under this License except under this disclaimer. *
|
||||
* *
|
||||
************************************************************************
|
||||
|
||||
************************************************************************
|
||||
* *
|
||||
* 7. Limitation of Liability *
|
||||
* -------------------------- *
|
||||
* *
|
||||
* Under no circumstances and under no legal theory, whether tort *
|
||||
* (including negligence), contract, or otherwise, shall any *
|
||||
* Contributor, or anyone who distributes Covered Software as *
|
||||
* permitted above, be liable to You for any direct, indirect, *
|
||||
* special, incidental, or consequential damages of any character *
|
||||
* including, without limitation, damages for lost profits, loss of *
|
||||
* goodwill, work stoppage, computer failure or malfunction, or any *
|
||||
* and all other commercial damages or losses, even if such party *
|
||||
* shall have been informed of the possibility of such damages. This *
|
||||
* limitation of liability shall not apply to liability for death or *
|
||||
* personal injury resulting from such party's negligence to the *
|
||||
* extent applicable law prohibits such limitation. Some *
|
||||
* jurisdictions do not allow the exclusion or limitation of *
|
||||
* incidental or consequential damages, so this exclusion and *
|
||||
* limitation may not apply to You. *
|
||||
* *
|
||||
************************************************************************
|
||||
|
||||
8. Litigation
|
||||
-------------
|
||||
|
||||
Any litigation relating to this License may be brought only in the
|
||||
courts of a jurisdiction where the defendant maintains its principal
|
||||
place of business and such litigation shall be governed by laws of that
|
||||
jurisdiction, without reference to its conflict-of-law provisions.
|
||||
Nothing in this Section shall prevent a party's ability to bring
|
||||
cross-claims or counter-claims.
|
||||
|
||||
9. Miscellaneous
|
||||
----------------
|
||||
|
||||
This License represents the complete agreement concerning the subject
|
||||
matter hereof. If any provision of this License is held to be
|
||||
unenforceable, such provision shall be reformed only to the extent
|
||||
necessary to make it enforceable. Any law or regulation which provides
|
||||
that the language of a contract shall be construed against the drafter
|
||||
shall not be used to construe this License against a Contributor.
|
||||
|
||||
10. Versions of the License
|
||||
---------------------------
|
||||
|
||||
10.1. New Versions
|
||||
|
||||
Mozilla Foundation is the license steward. Except as provided in Section
|
||||
10.3, no one other than the license steward has the right to modify or
|
||||
publish new versions of this License. Each version will be given a
|
||||
distinguishing version number.
|
||||
|
||||
10.2. Effect of New Versions
|
||||
|
||||
You may distribute the Covered Software under the terms of the version
|
||||
of the License under which You originally received the Covered Software,
|
||||
or under the terms of any subsequent version published by the license
|
||||
steward.
|
||||
|
||||
10.3. Modified Versions
|
||||
|
||||
If you create software not governed by this License, and you want to
|
||||
create a new license for such software, you may create and use a
|
||||
modified version of this License if you rename the license and remove
|
||||
any references to the name of the license steward (except to note that
|
||||
such modified license differs from this License).
|
||||
|
||||
10.4. Distributing Source Code Form that is Incompatible With Secondary
|
||||
Licenses
|
||||
|
||||
If You choose to distribute Source Code Form that is Incompatible With
|
||||
Secondary Licenses under the terms of this version of the License, the
|
||||
notice described in Exhibit B of this License must be attached.
|
||||
|
||||
Exhibit A - Source Code Form License Notice
|
||||
-------------------------------------------
|
||||
|
||||
This Source Code Form is subject to the terms of the Mozilla Public
|
||||
License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
If it is not possible or desirable to put the notice in a particular
|
||||
file, then You may include the notice in a location (such as a LICENSE
|
||||
file in a relevant directory) where a recipient would be likely to look
|
||||
for such a notice.
|
||||
|
||||
You may add additional accurate notices of copyright ownership.
|
||||
|
||||
Exhibit B - "Incompatible With Secondary Licenses" Notice
|
||||
---------------------------------------------------------
|
||||
|
||||
This Source Code Form is "Incompatible With Secondary Licenses", as
|
||||
defined by the Mozilla Public License, v. 2.0.
|
752
vendor/github.com/hashicorp/raft-wal/README.md
generated
vendored
Normal file
752
vendor/github.com/hashicorp/raft-wal/README.md
generated
vendored
Normal file
@ -0,0 +1,752 @@
|
||||
# Raft WAL
|
||||
|
||||
This library implements a Write-Ahead Log (WAL) suitable for use with
|
||||
[`hashicorp/raft`](https://github.com/hashicorp/raft).
|
||||
|
||||
Specifically the library provides and instance of raft's `LogStore` and
|
||||
`StableStore` interfaces for storing both raft logs and the other small items
|
||||
that require stable storage (like which term the node last voted in).
|
||||
|
||||
**This library is still considered experimental!**
|
||||
|
||||
It is complete and reasonably well tested so far but we plan to complete more
|
||||
rigorous end-to-end testing and performance analysis within our products and
|
||||
together with some of our users before we consider this safe for production.
|
||||
|
||||
The advantage of this library over `hashicorp/raft-boltdb` that has been used
|
||||
for many years in HashiCorp products are:
|
||||
1. Efficient truncations that don't cause later appends to slow down due to
|
||||
free space tracking issues in BoltDB's btree.
|
||||
2. More efficient appends due to only one fsync per append vs two in BoltDB.
|
||||
3. More efficient and suitable on-disk structure for a log vs a copy-on-write
|
||||
BTree.
|
||||
|
||||
We aim to provide roughly equivalent resiliency to crashes as respected storage
|
||||
systems such as SQLite, LevelDB/RocksDB and etcd. BoltDB technically has a
|
||||
stronger property due to it's page-aligned model (no partial sector overwrites).
|
||||
We initially [designed a WAL on the same principals](/01-WAL-pages.md),
|
||||
however felt that the additional complexity it adds wasn't justified given the
|
||||
weaker assumptions that many other battle-tested systems above use.
|
||||
|
||||
Our design goals for crash recovery are:
|
||||
|
||||
- Crashes at any point must not loose committed log entries or result in a
|
||||
corrupt file, even if in-flight sector writes are not atomic.
|
||||
- We _do_ assume [Powersafe Overwrites](#powersafe-overwrites-psow) where
|
||||
partial sectors can be appended to without corrupting existing data even in a
|
||||
power failure.
|
||||
- Latent errors (i.e. silent corruption in the FS or disk) _may_ be detected
|
||||
during a read, but we assume that the file-system and disk are responsible
|
||||
for this really. (i.e. we don't validate checksums on every record read).
|
||||
This is equivalent to SQLite, LMDB, BoltDB etc.
|
||||
|
||||
See the [system assumptions](#system-assumptions) and [crash
|
||||
safety](#crash-safety) sections for more details.
|
||||
|
||||
## Limitations
|
||||
|
||||
Here are some notable (but we think acceptable) limitations of this design.
|
||||
|
||||
* Segment files can't be larger than 4GiB. (Current default is 64MiB).
|
||||
* Individual records can't be larger than 4GiB without changing the format.
|
||||
(Current limit is 64MiB).
|
||||
* Appended log entries must have monotonically increasing `Index` fields with
|
||||
no gaps (though may start at any index in an empty log).
|
||||
* Only head or tail truncations are supported. `DeleteRange` will error if the
|
||||
range is not a prefix of suffix of the log. `hashicorp/raft` never needs
|
||||
that.
|
||||
* No encryption or compression support.
|
||||
* Though we do provide a pluggable entry codec and internally treat each
|
||||
entry as opaque bytes so it's possible to supply a custom codec that
|
||||
transforms entries in any way desired.
|
||||
* If the segment tail file is lost _after_ entries are committed to it due to
|
||||
manual intervention or filesystem bug, the WAL can't distinguish that from a
|
||||
crash during rotation that left the file missing since we don't update
|
||||
metadata on every append for performance reasons. In most other cases,
|
||||
missing data would be detected on recovery and fail the recovery to protect
|
||||
from silent data loss, but in this particular case that's not possible
|
||||
without significantly impacting performance in the steady state by updating
|
||||
the last committed entry to meta DB on every append. We assume this is
|
||||
reasonable since previous LogStore implementations would also "silently"
|
||||
loose data if the database files were removed too.
|
||||
|
||||
## Storage Format Overview
|
||||
|
||||
The WAL has two types of file: a meta store and one or more log segments.
|
||||
|
||||
### Meta Store
|
||||
|
||||
We need to provide a `StableStore` interface for small amounts of Raft data. We
|
||||
also need to store some meta data about log segments to simplify managing them
|
||||
in an atomic and crash-safe way.
|
||||
|
||||
Since this data is _generally_ small we could invent our own storage format with
|
||||
some sort of double-buffering and limit ourselves to a single page of data etc.
|
||||
But since performance is not critical for meta-data operations and size is
|
||||
extremely unlikely to get larger than a few KiB, we choose instead the pragmatic
|
||||
approach of using BoltDB for our `wal-meta.db`.
|
||||
|
||||
The meta database contains two buckets: `stable` containing key/values persisted
|
||||
by Raft via the `StableStore` interface, and `wal-state` which contains the
|
||||
source-of-truth meta data about which segment files should be considered part of
|
||||
the current log.
|
||||
|
||||
The `wal-state` bucket contains one record with all the state since it's only
|
||||
loaded or persisted in one atomic batch and is small. The state is just a JSON
|
||||
encoded object described by the following structs. JSON encoding is used as this
|
||||
is not performance sensitive and it's simpler to work with and more human
|
||||
readable.
|
||||
|
||||
```go
|
||||
type PersistentState struct {
|
||||
NextSegmentID uint64
|
||||
Segments []SegmentInfo
|
||||
}
|
||||
type SegmentInfo struct {
|
||||
ID uint64
|
||||
BaseIndex uint64
|
||||
MinIndex uint64
|
||||
MaxIndex uint64
|
||||
Codec uint64
|
||||
IndexStart uint64
|
||||
CreateTime time.Time
|
||||
SealTime time.Time
|
||||
}
|
||||
```
|
||||
|
||||
The last segment (with highest baseIndex) is the "tail" and must be the only one where
|
||||
`SealTime = 0` (i.e. it's unsealed). `IndexStart` and `MaxIndex` are also zero until
|
||||
the segments is sealed.
|
||||
|
||||
Why use BoltDB when the main reason for this library is because the existing
|
||||
BoltDB `LogStore` has performance issues?
|
||||
|
||||
Well, the major performance issue in `raft-boltdb` occurs when a large amount of
|
||||
log data is written and then truncated, the overhead of tracking all the free
|
||||
space in the file makes further appends slower.
|
||||
|
||||
Our use here is orders of magnitude lighter than storing all log data. As an
|
||||
example, let's assume we allow 100GiB of logs to be kept around which is at
|
||||
least an order of magnitude larger than the largest current known Consul user's
|
||||
worst-case log size, and two orders of magnitude more than the largest Consul
|
||||
deployments steady-state. Assuming fixed 64MiB segments, that would require
|
||||
about 1600 segments which encode to about 125 bytes in JSON each. Even at this
|
||||
extreme, the meta DB only has to hold under 200KiB.
|
||||
|
||||
Even if a truncation occurs that reduces that all the way back to a single
|
||||
segment, 200KiB is only a hundred or so pages (allowing for btree overhead) so
|
||||
the free list will never be larger than a single 4KB page.
|
||||
|
||||
On top of that, we only pay the cost of a write to BoltDB for meta-data
|
||||
transactions: rotating to a new segment, or truncating. The vast majority of
|
||||
appends only need to append to a log segment.
|
||||
|
||||
### Segment Files
|
||||
|
||||
Segment files are pre-allocated (if supported by the filesystem) on creation to
|
||||
a fixed size. By default we use 64MiB segment files. This sections defines the
|
||||
encoding for those files. All integer types are encoded in little-endian order.
|
||||
|
||||
The file starts with a fixed-size header that is written once before the
|
||||
first comitted entries.
|
||||
|
||||
```
|
||||
0 1 2 3 4 5 6 7 8
|
||||
+------+------+------+------+------+------+------+------+
|
||||
| Magic | Reserved | Vsn |
|
||||
+------+------+------+------+------+------+------+------+
|
||||
| BaseIndex |
|
||||
+------+------+------+------+------+------+------+------+
|
||||
| SegmentID |
|
||||
+------+------+------+------+------+------+------+------+
|
||||
| Codec |
|
||||
+------+------+------+------+------+------+------+------+
|
||||
```
|
||||
|
||||
| Field | Type | Description |
|
||||
| ------------ | --------- | ----------- |
|
||||
| `Magic` | `uint32` | The randomly chosen value `0x58eb6b0d`. |
|
||||
| `Reserved` | `[3]byte` | Bytes reserved for future file flags. |
|
||||
| `Vsn` | `uint8` | The version of the file, currently `0x0`. |
|
||||
| `BaseIndex` | `uint64` | The raft Index of the first entry that will be stored in this file. |
|
||||
| `SegmentID` | `uint64` | A unique identifier for this segment file. |
|
||||
| `Codec` | `uint64` | The codec used to write the file. |
|
||||
|
||||
Each segment file is named `<BaseIndex>-<SegmentID>.wal`. `BaseIndex` is
|
||||
formatted in decimal with leading zeros and a fixed width of 20 chars.
|
||||
`SegmentID` is formatted in lower-case hex with zero padding to 16 chars wide.
|
||||
This has the nice property of them sorting lexicographically in the directory,
|
||||
although we don't rely on that.
|
||||
|
||||
### Frames
|
||||
|
||||
Log entries are stored in consecutive frames after the header. As well as log
|
||||
entry frames there are a few meta data frame types too. Each frame starts with
|
||||
an 8-byte header.
|
||||
|
||||
```
|
||||
0 1 2 3 4 5 6 7 8
|
||||
+------+------+------+------+------+------+------+------+
|
||||
| Type | Reserved | Length/CRC |
|
||||
+------+------+------+------+------+------+------+------+
|
||||
```
|
||||
|
||||
| Field | Type | Description |
|
||||
| ------------- | ----------- | ----------- |
|
||||
| `Type` | `uint8` | The frame type. See below. |
|
||||
| `Length/CRC` | `uint32` | Depends on Type. See Below |
|
||||
|
||||
|
||||
| Type | Value | Description |
|
||||
| ---- | ----- | ----------- |
|
||||
| `Invalid` | `0x0` | The frame is invalid. We make zero value invalid so we can detect unwritten frames cleanly. |
|
||||
| `Entry` | `0x1` | The frame contains an entire log entry. |
|
||||
| `Index` | `0x2` | The frame contains an index array, not actual log entries. |
|
||||
| `Commit` | `0x3` | The frame contains a CRC for all data written in a batch. |
|
||||
|
||||
#### Index Frame
|
||||
|
||||
An index frame payload is an array of `uint32` file offsets for the
|
||||
correspoinding records. The first element of the array contains the file offset
|
||||
of the frame containing the first entry in the segment and so on.
|
||||
|
||||
`Length` is used to indicate the length in bytes of the array (i.e. number of
|
||||
entries in the segments is `Length/4`).
|
||||
|
||||
Index frames are written only when the segment is sealed and a commit frame
|
||||
follows to validate the final write.
|
||||
|
||||
#### Commit Frame
|
||||
|
||||
A Commit frame marks the last write before fsync is called. In order to detect
|
||||
incomplete or torn writes on recovery the commit frame stores a CRC of all the
|
||||
bytes appended since the last fsync.
|
||||
|
||||
`CRC` is used to specify a CRC32 (Castagnoli) over all bytes written since the
|
||||
last fsync. That is, since just after the last commit frame, or just after the
|
||||
file header.
|
||||
|
||||
There may also be 4 bytes of padding to keep alignment. Later we could
|
||||
use these too.
|
||||
|
||||
#### Alignment
|
||||
|
||||
All frame headers are written with 8-byte alignment to ensure they remain in a
|
||||
single disk sector. We don't entirely depend on atomic sector writes for
|
||||
correctness, but it's a simple way to improve our chances or being able to read
|
||||
through the file on a recovery with some sectors missing.
|
||||
|
||||
We add an implicit 0-7 null bytes after each frame to ensure the next frame
|
||||
header is aligned. This padding is _not_ represented in `Length` but it is
|
||||
always present and is deterministic by rounding up `Length` to the nearest
|
||||
multiple of 8. It is always accounted for when reading and CRCs are calculated
|
||||
over raw bytes written so always include the padding (zero) bytes.
|
||||
|
||||
Despite alignment we still don't blindly trust the headers we read are valid. A
|
||||
CRC mismatch or invalid record format indicate torn writes in the last batch
|
||||
written and we always safety check the size of lengths read before allocating
|
||||
memory for them - Entry lengths can't be bigger than the `MaxEntrySize` which
|
||||
we default to 64MiB.
|
||||
|
||||
### Sealing
|
||||
|
||||
Once a segment file has grown larger than the configured soft-limit (64MiB
|
||||
default), we "seal" it. This process involves:
|
||||
|
||||
1. Write out the in-memory index of record offsets to an index frame.
|
||||
2. Write a commit frame to validate all bytes appended in this final append
|
||||
(which probably included one or more records that took the segment file over
|
||||
the limit).
|
||||
3. Return the final `IndexStart` to be stored in `wal-meta.db`
|
||||
|
||||
Sealed files can have their indexes read directly on open from the IndexStart in
|
||||
`wal-meta.db` so records can be looked up in constant time.
|
||||
|
||||
## Log Lookup by Index
|
||||
|
||||
For an unsealed segment we first lookup the offset in the in-memory index.
|
||||
|
||||
For a sealed segment we can discover the index frame location from the metadata
|
||||
and then perform a read at the right location in the file to lookup the record's
|
||||
offset. Implementations may choose to cache or memory-map the index array but we
|
||||
will initially just read the specific entry we need each time and assume the OS
|
||||
page cache will make that fast for frequently accessed index areas or in-order
|
||||
traversals. We don't have to read the whole index, just the 4 byte entry we care
|
||||
about since we can work out it's offset from IndexStart, the BaseIndex of the
|
||||
segment, and the Index being searched for.
|
||||
|
||||
# Crash Safety
|
||||
|
||||
Crash safety must be maintained through three type of write operation: appending
|
||||
a batch of entries, truncating from the head (oldest) entries, and truncating
|
||||
the newest entries.
|
||||
|
||||
## Appending Entries
|
||||
|
||||
We want to `fsync` only once for an append batch, however many entries were in
|
||||
it. We assume [Powersafe Overwrites](#powersafe-overwrites-psow) or PSOW, a
|
||||
weaker assumption than atomic sector writes in general. Thanks to PSOW, we
|
||||
assume we can start appending at the tail of the file right after previously
|
||||
committed entries even if the new entries will be written to the same sector as
|
||||
the older entries, and that the system will never corrupt the already committed
|
||||
part of the sector even if it is not atomic and arbitrarily garbles the part of
|
||||
the sector we actually did write.
|
||||
|
||||
At the end of the batch we write a `Commit` frame containing the CRC over the
|
||||
data written during the current batch.
|
||||
|
||||
In a crash one of the following states occurs:
|
||||
1. All sectors modified across all frames make it to disk (crash _after_ fsync).
|
||||
2. A torn write: one or more sectors, anywhere in the modified tail of the file
|
||||
might not be persisted. We don't assume they are zero, they might be
|
||||
arbitrarily garbled (crash _before_ fsync).
|
||||
|
||||
We can check which one of these is true with the recovery procedure outlined
|
||||
below. If we find the last batch _was_ torn. It must not have been acknowledged
|
||||
to Raft yet (since `fsync` can't have returned) and so it is safe to assume that
|
||||
the previous commit frame is the tail of the log we've actually acknowledged.
|
||||
|
||||
### Recovery
|
||||
|
||||
We cover recovering the segments generally below since we have to account for
|
||||
truncations. All segments except the tail were fsynced during seal before the
|
||||
new tail was added to the meta DB so we can assume they are all made it to disk
|
||||
if a later tail was added.
|
||||
|
||||
On startup we just need to recover the tail log as follows:
|
||||
|
||||
1. If the file doesn't exist, create it from Meta DB information. DONE.
|
||||
2. Open file and validate header matches filename. If not delete it and go to 1.
|
||||
3. Read all records in the file in sequence, keeping track of the last two
|
||||
commit frames observed.
|
||||
1. If the file ends with a corrupt frame or non commit frame, discard
|
||||
anything after the last commit frame. We're DONE because we wouldn't have
|
||||
written extra frames after commit until fsync completed so this commit
|
||||
must have been acknowledged.
|
||||
1. Else the file ends with a commit frame. Validate its checksum. If it is good DONE.
|
||||
2. If CRC is not good then discard everything back to previous commit frame and DONE.
|
||||
4. If we read an index frame in that process and the commit frame proceeding it
|
||||
is the new tail then mark the segment as sealed and return the seal info
|
||||
(crash occured after seal but before updating `wal-meta.db`)
|
||||
|
||||
## Head Truncations
|
||||
|
||||
The most common form of truncation is a "head" truncation or removing the oldest
|
||||
prefix of entries after a periodic snapshot has been made to reclaim space.
|
||||
|
||||
To be crash safe we can't rely on atomically updating or deleting multiple
|
||||
segment files. The process looks like this.
|
||||
|
||||
1. In one transaction on Meta DB:
|
||||
1. Update the `meta.min_index` to be the new min.
|
||||
2. Delete any segments from the `segments` bucket that are sealed and where
|
||||
their highest index is less than the new min index.
|
||||
3. Commit Txn. This is the commit point for crash recovery.
|
||||
2. Update in memory segment state to match (if not done already with a lock
|
||||
held).
|
||||
3. Delete any segment files we just removed from the meta DB.
|
||||
|
||||
### Recovery
|
||||
|
||||
The meta data update is crash safe thanks to BoltDB being the source of truth.
|
||||
|
||||
1. Reload meta state from Meta DB.
|
||||
2. Walk the files in the dir.
|
||||
2. For each one:
|
||||
1. Check if that file is present in Meta DB. If not mark it for deletion.
|
||||
2. (optionally) validate the file header file size and final block trailer
|
||||
to ensure the file appears to be well-formed and contain the expected
|
||||
data.
|
||||
4. Delete the obsolete segments marked (could be done in a background thread).
|
||||
|
||||
## Tail Truncations
|
||||
|
||||
Raft occasionally needs to truncate entries from the tail of the log, i.e.
|
||||
remove the _most recent_ N entries. This can occur when a follower has
|
||||
replicated entries from an old leader that was partitioned with it, but later
|
||||
discovers they conflict with entries committed by the new leader in a later
|
||||
term. The bounds on how long a partitioned leader can continue to replicate to
|
||||
a follower are generally pretty small (30 seconds or so) so it's unlikely that
|
||||
the number of records to be truncated will ever be large compared to the size
|
||||
of a segment file, but we have to account for needing to delete one or more
|
||||
segment files from the tail, as well as truncate older entries out of the new
|
||||
tail.
|
||||
|
||||
This follows roughly the same pattern as head-truncation, although there is an
|
||||
added complication. A naive implementation that used only the baseIndex as a
|
||||
segment file name could in theory get into a tricky state where it's ambiguous
|
||||
whether the tail segment is an old one that was logically truncated away but we
|
||||
crashed before actually unlinking, or a new replacement with committed data in.
|
||||
|
||||
It's possible to solve this with complex transactional semantics but we take
|
||||
the simpler approach of just assigning every segment a unique identifier
|
||||
separate from it's baseIndex. So to truncate the tail follows the same
|
||||
procedure as the head above: segments we remove from Meta DB can be
|
||||
un-ambiguously deleted on recovery because their IDs won't match even if later
|
||||
segments end up with the same baseIndex.
|
||||
|
||||
Since these truncations are generally rare and disk space is generally not a
|
||||
major bottleneck, we also choose not to try to actually re-use a segment file
|
||||
that was previously written and sealed by truncating it etc. Instead we just
|
||||
mark it as "sealed" in the Meta DB and with a MaxIndex of the highest index
|
||||
left after the truncation (which we check on reads) and start a new segment at
|
||||
the next index.
|
||||
|
||||
## System Assumptions
|
||||
|
||||
There are no straight answers to any question about which guarantees can be
|
||||
reliably relied on across operating systems, file systems, raid controllers and
|
||||
hardware devices. We state [our assumptions](#our-assumptions) followed by a
|
||||
summary of the assumptions made by some other respectable sources for
|
||||
comparison.
|
||||
|
||||
### Our Assumptions
|
||||
|
||||
We've tried to make the weakest assumptions we can while still keeping things
|
||||
relatively simple and performant.
|
||||
|
||||
We assume:
|
||||
1. That while silent latent errors are possible, they are generally rare and
|
||||
there's not a whole lot we can do other than return a `Corrupt` error on
|
||||
read. In most cases the hardware or filesystem will detect and return an
|
||||
error on read anyway for latent corruption. Not doing so is regarded as a
|
||||
bug in the OS/filesystem/hardware. For this reason we don't go out of our
|
||||
way to checksum everything to protect against "bitrot". This is roughly
|
||||
equivalent to assumptions in BoltDB, LMDB and SQLite.
|
||||
|
||||
While we respect the work in [Protocol Aware Recovery for Consensus-based
|
||||
Storage](https://www.usenix.org/system/files/conference/fast18/fast18-alagappan.pdf)
|
||||
we choose not to implement a WAL format that allows identifying the index
|
||||
and term of "lost" records on read errors so they can be recovered from
|
||||
peers. This is mostly for the pragmatic reason that the Raft library this is
|
||||
designed to work with would need a major re-write to take advantage of that
|
||||
anyway. The proposed format in that paper also seems to make stronger
|
||||
assumptions about sector atomicity than we are comfortable with too.
|
||||
2. That sector writes are _not_ atomic. (Equivalent to SQLite, weaker than
|
||||
almost everything else).
|
||||
3. That writing a partial sector does _not_ corrupt any already stored data in
|
||||
that sector outside of the range being written (
|
||||
[PSOW](#powersafe-overwrites-psow)), (Equivalent to SQLite's defaults,
|
||||
RocksDB and Etcd).
|
||||
3. That `fsync` as implemented in Go's standard library actually flushes all
|
||||
written sectors of the file to persistent media.
|
||||
4. That `fsync` on a parent dir is sufficient to ensure newly created files are
|
||||
not lost after a crash (assuming the file itself was written and `fsync`ed
|
||||
first).
|
||||
6. That appending to files may not be atomic since the filesystem metadata
|
||||
about the size of the file may not be updated atomically with the data.
|
||||
Generally we pre-allocate files where possible without writing all zeros but
|
||||
we do potentially extend them if the last batch doesn't fit into the
|
||||
allocated space or the filesystem doesn't support pre-allocation. Either way
|
||||
we don't rely on the filesystem's reported size and validate the tail is
|
||||
coherent on recovery.
|
||||
|
||||
### Published Paper on Consensus Disk Recovery
|
||||
|
||||
In the paper on [Protocol Aware Recovery for Consensus-based
|
||||
Storage](https://www.usenix.org/system/files/conference/fast18/fast18-alagappan.pdf)
|
||||
the authors assume that corruptions of the log can happen due to either torn
|
||||
writes (for multi-sector appends) or latent corruptions after commit. They
|
||||
explain the need to detect which it was because torn writes only loose
|
||||
un-acknowledged records and so are safe to detect and truncate, while corruption
|
||||
of previously committed records impacts the correctness of the protocol more
|
||||
generally. Their whole paper seems to indicate that these post-commit
|
||||
corruptions are a major problem that needs to be correctly handled (which may
|
||||
well be true). On the flip side, their WAL format design writes a separate index
|
||||
and log, and explicitly assumes that because the index entries are smaller than
|
||||
a 512 sector size, that those are safe from corruption during a write.
|
||||
|
||||
The core assumptions here are:
|
||||
1. Latent, silent corruption of committed data needs to be detected at
|
||||
application layer with a checksum per record checked on every read.
|
||||
2. Sector writes are atomic.
|
||||
3. Sector writes have [powersafe overwrites](#powersafe-overwrites-psow).
|
||||
|
||||
### SQLite
|
||||
|
||||
The SQLite authors have a [detailed explanation of their system
|
||||
assumptions](https://www.sqlite.org/atomiccommit.html) which impact correctness
|
||||
of atomic database commits.
|
||||
|
||||
> SQLite assumes that the detection and/or correction of bit errors caused by cosmic rays, thermal noise, quantum fluctuations, device driver bugs, or other mechanisms, is the responsibility of the underlying hardware and operating system. SQLite does not add any redundancy to the database file for the purpose of detecting corruption or I/O errors. SQLite assumes that the data it reads is exactly the same data that it previously wrote.
|
||||
|
||||
Is very different from the above paper authors whose main point of their paper
|
||||
is predicated on how to recover from silent corruptions of the file caused by
|
||||
hardware, firmware or filesystem errors on read.
|
||||
|
||||
Note that this is a pragmatic position rather than a naive one: the authors are
|
||||
certainly aware that file-systems have bugs, that faulty raid controllers exist
|
||||
and even that hardware anomalies like high-flying or poorly tracking disk heads
|
||||
can happen but choose _not_ to protect against that _at all_. See their
|
||||
[briefing for linux kernel
|
||||
developers](https://sqlite.org/lpc2019/doc/trunk/briefing.md) for more details
|
||||
on the uncertainty they understand exists around these areas.
|
||||
|
||||
> SQLite has traditionally assumed that a sector write is not atomic.
|
||||
|
||||
These statements are on a page with this disclaimer:
|
||||
|
||||
> The information in this article applies only when SQLite is operating in "rollback mode", or in other words when SQLite is not using a write-ahead log.
|
||||
|
||||
[WAL mode](https://sqlite.org/wal.html) docs are less explicit on assumptions
|
||||
and how crash recovery is achieved but we can infer some things from the [file
|
||||
format](https://sqlite.org/fileformat2.html#walformat) and
|
||||
[code](https://github.com/sqlite/sqlite/blob/master/src/wal.c) though.
|
||||
|
||||
> The WAL header is 32 bytes in size...
|
||||
|
||||
> Immediately following the wal-header are zero or more frames. Each frame consists of a 24-byte frame-header followed by a page-size bytes of page data.
|
||||
|
||||
So each dirty page is appended with a 24 byte header making it _not_ sector
|
||||
aligned even though pages must be a multiple of sector size.
|
||||
|
||||
Commit frames are also appended in the same way (and fsync called if enabled as
|
||||
an option). If fsync is enabled though (and POWERSAFE_OVERWRITE disabled),
|
||||
SQLite will "pad" to the next sector boundary (or beyond) by repeating the last
|
||||
frame until it's passed that boundary. For some reason, they take great care to
|
||||
write up to the sector boundary, sync then write the rest. I assume this is just
|
||||
to avoid waiting to flush the redundant padding bytes past the end of the sector
|
||||
they care about. Padding prevents the next append from potentially overwriting
|
||||
the committed frame's sector.
|
||||
|
||||
But...
|
||||
|
||||
> By default, SQLite assumes that an operating system call to write a range of bytes will not damage or alter any bytes outside of that range even if a power loss or OS crash occurs during that write. We call this the "powersafe overwrite" property. Prior to version 3.7.9 (2011-11-01), SQLite did not assume powersafe overwrite. But with the standard sector size increasing from 512 to 4096 bytes on most disk drives, it has become necessary to assume powersafe overwrite in order to maintain historical performance levels and so powersafe overwrite is assumed by default in recent versions of SQLite.
|
||||
|
||||
> [assuming no power safe overwrite] In WAL mode, each transaction had to be padded out to the next 4096-byte boundary in the WAL file, rather than the next 512-byte boundary, resulting in thousands of extra bytes being written per transaction.
|
||||
|
||||
> SQLite never assumes that database page writes are atomic, regardless of the PSOW setting.(1) And hence SQLite is always able to automatically recover from torn pages induced by a crash. Enabling PSOW does not decrease SQLite's ability to recover from a torn page.
|
||||
|
||||
So they basically changed to make SSDs performant and now assume _by default_
|
||||
that appending to a partial sector won't damage other data. The authors are
|
||||
explicit that ["powersafe overwrite"](#powersafe-overwrites-psow) is a separate
|
||||
property from atomicity and they still don't rely on sector atomicity. But they
|
||||
do now assume powersafe overwrites by default.
|
||||
|
||||
To summarize, SQLite authors assume:
|
||||
1. Latent, silent corruptions of committed data should be caught by the file
|
||||
system or hardware and so should't need to be accounted for in application
|
||||
code.
|
||||
2. Sector writes are _not_ atomic, but...
|
||||
3. Partial sector overwrites can't corrupt committed data in same sector (by
|
||||
default).
|
||||
|
||||
### Etcd WAL
|
||||
|
||||
The authors of etcd's WAL similarly to the authors of the above paper indicate
|
||||
the need to distinguish between torn writes and silent corruptions.
|
||||
|
||||
They maintain a rolling checksum of all records which is used on recovery only
|
||||
which would imply they only care about torn writes since per-record checksums
|
||||
are not checked on subsequent reads from the file after recovery. But they have
|
||||
specific code to distinguish between torn writes and "other" corruption during
|
||||
recovery.
|
||||
|
||||
They are careful to pad every record with 0 to 7 bytes such that the length
|
||||
prefix for the next record is always 8-byte aligned and so can't span more than
|
||||
one segment.
|
||||
|
||||
But their method of detecting a torn-write (rather than latent corruption)
|
||||
relies on reading through every 512 byte aligned slice of the set of records
|
||||
whose checksum has failed to match and seeing if there are any entirely zero
|
||||
sectors.
|
||||
|
||||
This seems problematic in a purely logical way regardless of disk behavior: if a
|
||||
legitimate record contains more than 1kb of zero bytes and happens to ever be
|
||||
corrupted after writing, that record will be falsely detected as a torn-write
|
||||
because at least one sector will be entirely zero bytes. In practice this
|
||||
doesn't matter much because corruptions caused by anything other than torn
|
||||
writes are likely very rare but it does make me wonder why bother trying to tell
|
||||
the difference.
|
||||
|
||||
The implied assumptions in their design are:
|
||||
1. Latent, silent corruption needs to be detected on recovery, but not on every
|
||||
read.
|
||||
2. Sector writes are atomic.
|
||||
3. Partial sector writes don't corrupt existing data.
|
||||
3. Torn writes (caused by multi-segment appends) always leave sectors all-zero.
|
||||
|
||||
### LMDB
|
||||
|
||||
Symas' Lightning Memory-mapped Database or LMDB is another well-used and
|
||||
respected DB file format (along with Go-native port BoltDB used by Consul,
|
||||
etcd and others).
|
||||
|
||||
LMDB writes exclusively in whole 4kb pages. LMDB has a copy-on-write design
|
||||
which reuses free pages and commits transactions using the a double-buffering
|
||||
technique: writing the root alternately to the first and second pages of the
|
||||
file. Individual pages do not have checksums and may be larger than the physical
|
||||
sector size. Dirty pages are written out to new or un-used pages and then
|
||||
`fsync`ed before the transaction commits so there is no reliance on atomic
|
||||
sector writes for data pages (a crash might leave pages of a transaction
|
||||
partially written but they are not linked into the tree root yet so are ignored
|
||||
on recovery).
|
||||
|
||||
The transaction commits only after the double-buffered meta page is written
|
||||
out. LMDB relies on the fact that the actual content of the meta page is small
|
||||
enough to fit in a single sector to avoid "torn writes" on the meta page. (See
|
||||
[the authors
|
||||
comments](https://ayende.com/blog/162856/reviewing-lightning-memory-mapped-database-library-transactions-commits)
|
||||
on this blog). Although sector writes are assumed to be atomic, there is no
|
||||
reliance on partial sector writes due to the paged design.
|
||||
|
||||
The implied assumptions in this design are:
|
||||
1. Latent, silent corruptions of committed data should be caught by the file
|
||||
system or hardware and so should't need to be accounted for in application
|
||||
code.
|
||||
2. Sector writes _are_ atomic.
|
||||
3. No assumptions about Powersafe overwrite since all IO is in whole pages.
|
||||
|
||||
|
||||
### BoltDB
|
||||
|
||||
BoltDB is a Go port of LMDB so inherits almost all of the same design
|
||||
assumptions. One notable different is that the author added a checksum to
|
||||
metadata page even though it still fits in a single sector. The author noted
|
||||
in private correspondence that this was probably just a defensive measure
|
||||
rather than a fix for a specific identified flaw in LMDB's design.
|
||||
|
||||
Initially this was _not_ used to revert to the alternate page on failure because
|
||||
it was still assumed that meta fit in a single sector and that those writes were
|
||||
atomic. But [a report of Docker causing corruption on a
|
||||
crash](https://github.com/boltdb/bolt/issues/548) seemed to indicate that the
|
||||
atomic sector writes assumption _was not safe_ alone and so the checksum was
|
||||
used to detect non-atomic writes even on the less-than-a-sector meta page.
|
||||
|
||||
BoltDB is also an important base case for our WAL since it is used as the
|
||||
current log store in use for many years within Consul and other HashiCorp
|
||||
products.
|
||||
|
||||
The implied assumptions in this design are:
|
||||
1. Latent, silent corruptions of committed data should be caught by the file
|
||||
system or hardware and so should't need to be accounted for in application
|
||||
code.
|
||||
2. Sector writes are _not_ atomic.
|
||||
3. No assumptions about Powersafe overwrite since all IO is in whole pages.
|
||||
|
||||
|
||||
### RocksDB WAL
|
||||
|
||||
RocksDB is another well-respected storage library based on Google's LevelDB.
|
||||
RocksDB's [WAL
|
||||
Format](https://github.com/facebook/rocksdb/wiki/Write-Ahead-Log-File-Format)
|
||||
uses blocks to allow skipping through files and over corrupt records (which
|
||||
seems dangerous to me in general but perhaps they assume only torn-write
|
||||
corruptions are possible?).
|
||||
|
||||
Records are packed into 32KiB blocks until they don't fit. Records that are
|
||||
larger use first/middle/last flags (which inspired this library) to consume
|
||||
multiple blocks.
|
||||
|
||||
RocksDB WAL uses pre-allocated files but also re-uses old files on a circular
|
||||
buffer pattern since they have tight control of how much WAL is needed. This
|
||||
means they might be overwriting old records in place.
|
||||
|
||||
Each record independently gets a header with a checksum to detect corruption or
|
||||
incomplete writes, but no attempt is made to avoid sector boundaries or partial
|
||||
block writes - the current block is just appended to for each write.
|
||||
|
||||
Implied assumptions:
|
||||
1. No Latent Corruptions? This isn't totally clear from the code or docs, but
|
||||
the docs indicate that a record with a mismatching checksum can simply be
|
||||
skipped over which would seem to violate basic durability properties for a
|
||||
database if they were already committed. That would imply that checksums
|
||||
only (correctly) detect torn writes with latent corruption not accounted
|
||||
for.
|
||||
2. Sector writes _are_ atomic.
|
||||
3. Partial sector writes don't corrupt existing data.
|
||||
|
||||
### Are Sector Writes Atomic?
|
||||
|
||||
Russ Cox asked this on twitter and tweeted a link to an [excellent Stack
|
||||
Overflow
|
||||
answer](https://stackoverflow.com/questions/2009063/are-disk-sector-writes-atomic)
|
||||
about this by one of the authors of the NVME spec.
|
||||
|
||||
> TLDR; if you are in tight control of your whole stack from application all the way down the the physical disks (so you can control and qualify the whole lot) you can arrange to have what you need to make use of disk atomicity. If you're not in that situation or you're talking about the general case, you should not depend on sector writes being atomic.
|
||||
|
||||
Despite this, _most_ current best-of-breed database libraries (notably except
|
||||
SQLite and potentially BoltDB), [many linux file
|
||||
systems](https://lkml.org/lkml/2009/8/24/156), and all academic papers on disk
|
||||
failure modes I've found so far _do_ assume that sector writes are atomic.
|
||||
|
||||
I assume that the authors of these file systems, databases and papers are not
|
||||
unaware of the complexities described in the above link or the possibility of
|
||||
non-atomic sector writes, but rather have chosen to put those outside of the
|
||||
reasonable recoverable behavior of their systems. The actual chances of
|
||||
encountering a non-atomic sector write in a typical, modern system appear to be
|
||||
small enough that these authors consider that a reasonable assumption even when
|
||||
it's not a guarantee that can be 100% relied upon. (Although the Docker bug
|
||||
linked above for [BoltDB](#boltdb) seems to indicate a real-world case of this
|
||||
happening in a modern environment.)
|
||||
|
||||
### Powersafe Overwrites (PSOW)
|
||||
|
||||
A more subtle property that is a weaker assumption that full sector atomicity is
|
||||
termed by the [SQLite authors as "Powersafe
|
||||
Overwrites"](https://www.sqlite.org/psow.html) abbreviated PSOW.
|
||||
|
||||
> By default, SQLite assumes that an operating system call to write a range of bytes will not damage or alter any bytes outside of that range even if a power loss or OS crash occurs during that write. We call this the "powersafe overwrite" property. Prior to version 3.7.9 (2011-11-01), SQLite did not assume powersafe overwrite. But with the standard sector size increasing from 512 to 4096 bytes on most disk drives, it has become necessary to assume powersafe overwrite in order to maintain historical performance levels and so powersafe overwrite is assumed by default in recent versions of SQLite.
|
||||
|
||||
Those who assume atomic sector writes _also_ assume this property but the
|
||||
reverse need not be true. SQLite's authors in the page above assume nothing
|
||||
about the atomicity of the actual data written to any sector still even when
|
||||
POWERSAFE_OVERWRITE is enabled (which is now the default). They simply assume
|
||||
that no _other_ data is harmed while performing a write that overlaps other
|
||||
sectors, even if power fails.
|
||||
|
||||
It's our view that while there certainly can be cases where this assumption
|
||||
doesn't hold, it's already weaker than the atomic sector write assumption that
|
||||
most reliable storage software assumes today and so is safe to assume on for
|
||||
this case.
|
||||
|
||||
### Are fsyncs reliable?
|
||||
|
||||
Even when you explicitly `fsync` a file after writing to it, some devices or
|
||||
even whole operating systems (e.g. macOS) _don't actually flush to disk_ to
|
||||
improve performance.
|
||||
|
||||
In our case, we assume that Go's `os.File.Sync()` method is makes the best
|
||||
effort it can on all modern OSes. It does now at least behave correctly on macOS
|
||||
(since Go 1.12). But we can't do anything about a lying hardware device.
|
||||
|
||||
# Future Extensions
|
||||
|
||||
* **Auto-tuning segment size.** This format allows for segments to be different
|
||||
sizes. We could start with a smaller segment size of say a single 1MiB block
|
||||
and then measure how long it takes to fill each segment. If segments fill
|
||||
quicker than some target rate we could double the allocated size of the next
|
||||
segment. This could mean a burst of writes makes the segments grow and then
|
||||
the writes slow down but the log would take a long time to free disk space
|
||||
because the segments take so long to fill. Arguably not a terrible problem,
|
||||
but we could also have it auto tune segment size down when write rate drops
|
||||
too. The only major benefit here would be to allow trivial usages like tests
|
||||
not need a whole 64MiB of disk space to just record a handful of log entries.
|
||||
But those could also just manually configure a smaller segment size.
|
||||
|
||||
# References
|
||||
|
||||
In no particular order.
|
||||
|
||||
**Files and Crash Recovery**
|
||||
* [Files are hard](https://danluu.com/file-consistency/)
|
||||
* [Files are fraught with peril](https://danluu.com/deconstruct-files/)
|
||||
* [Ensuring data reaches disk](https://lwn.net/Articles/457667/)
|
||||
* [Write Atomicity and NVME Device Design](https://www.bswd.com/FMS12/FMS12-Rudoff.pdf)
|
||||
* [Durability: NVME Disks](https://www.evanjones.ca/durability-nvme.html)
|
||||
* [Intel SSD Durability](https://www.evanjones.ca/intel-ssd-durability.html)
|
||||
* [Are Disk Sector Writes Atomic?](https://stackoverflow.com/questions/2009063/are-disk-sector-writes-atomic/61832882#61832882)
|
||||
* [Protocol Aware Recovery for Consensus-based Storage](https://www.usenix.org/system/files/conference/fast18/fast18-alagappan.pdf)
|
||||
* [Atomic Commit in SQLite](https://www.sqlite.org/atomiccommit.html)
|
||||
* ["Powersafe Overwrites" in SQLite](https://www.sqlite.org/psow.html)
|
||||
* [An Analysis of Data Corruption in the Storage Stack](https://www.cs.toronto.edu/~bianca/papers/fast08.pdf)
|
||||
|
||||
**DB Design and Storage File layout**
|
||||
* [BoltDB Implementation](https://github.com/boltdb/bolt)
|
||||
* LMDB Design: [slides](https://www.snia.org/sites/default/files/SDC15_presentations/database/HowardChu_The_Lighting_Memory_Database.pdf), [talk](https://www.youtube.com/watch?v=tEa5sAh-kVk)
|
||||
* [SQLite file layout](https://www.sqlite.org/fileformat.html)
|
||||
|
||||
**WAL implementations**
|
||||
* [SQLite WAL Mode](https://sqlite.org/wal.html)
|
||||
* [RocksDB WAL Format](https://github.com/facebook/rocksdb/wiki/Write-Ahead-Log-File-Format)]
|
||||
* [etcd implementation](https://github.com/etcd-io/etcd/tree/master/wal)
|
167
vendor/github.com/hashicorp/raft-wal/codec.go
generated
vendored
Normal file
167
vendor/github.com/hashicorp/raft-wal/codec.go
generated
vendored
Normal file
@ -0,0 +1,167 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package wal
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/raft"
|
||||
)
|
||||
|
||||
const (
|
||||
// FirstExternalCodecID is the lowest value an external code may use to
|
||||
// identify their codec. Values lower than this are reserved for future
|
||||
// internal use.
|
||||
FirstExternalCodecID = 1 << 16
|
||||
|
||||
// Codec* constants identify internally-defined codec identifiers.
|
||||
CodecBinaryV1 uint64 = iota
|
||||
)
|
||||
|
||||
// Codec is the interface required for encoding/decoding log entries. Callers
|
||||
// can pass a custom one to manage their own serialization, or to add additional
|
||||
// layers like encryption or compression of records. Each codec
|
||||
type Codec interface {
|
||||
// ID returns the globally unique identifier for this codec version. This is
|
||||
// encoded into segment file headers and must remain consistent over the life
|
||||
// of the log. Values up to FirstExternalCodecID are reserved and will error
|
||||
// if specified externally.
|
||||
ID() uint64
|
||||
|
||||
// Encode the log into the io.Writer. We pass a writer to allow the caller to
|
||||
// manage buffer allocation and re-use.
|
||||
Encode(l *raft.Log, w io.Writer) error
|
||||
|
||||
// Decode a log from the passed byte slice into the log entry pointed to. This
|
||||
// allows the caller to manage allocation and re-use of the bytes and log
|
||||
// entry. The resulting raft.Log MUST NOT reference data in the input byte
|
||||
// slice since the input byte slice may be returned to a pool and re-used.
|
||||
Decode([]byte, *raft.Log) error
|
||||
}
|
||||
|
||||
// BinaryCodec is a Codec that encodes raft.Log with a simple binary format. We
|
||||
// test that all fields are captured using reflection.
|
||||
//
|
||||
// For now we assume raft.Log is not likely to change too much. If it does we'll
|
||||
// use a new Codec ID for the later version and have to support decoding either.
|
||||
type BinaryCodec struct{}
|
||||
|
||||
// ID returns the globally unique identifier for this codec version. This is
|
||||
// encoded into segment file headers and must remain consistent over the life
|
||||
// of the log. Values up to FirstExternalCodecID are reserved and will error
|
||||
// if specified externally.
|
||||
func (c *BinaryCodec) ID() uint64 {
|
||||
return CodecBinaryV1
|
||||
}
|
||||
|
||||
// Encode the log into the io.Writer. We pass a writer to allow the caller to
|
||||
// manage buffer allocation and re-use.
|
||||
func (c *BinaryCodec) Encode(l *raft.Log, w io.Writer) error {
|
||||
enc := encoder{w: w}
|
||||
enc.varint(l.Index)
|
||||
enc.varint(l.Term)
|
||||
enc.varint(uint64(l.Type))
|
||||
enc.bytes(l.Data)
|
||||
enc.bytes(l.Extensions)
|
||||
enc.time(l.AppendedAt)
|
||||
return enc.err
|
||||
}
|
||||
|
||||
// Decode a log from the passed byte slice into the log entry pointed to. This
|
||||
// allows the caller to manage allocation and re-use of the bytes and log
|
||||
// entry.
|
||||
func (c *BinaryCodec) Decode(bs []byte, l *raft.Log) error {
|
||||
dec := decoder{buf: bs}
|
||||
l.Index = dec.varint()
|
||||
l.Term = dec.varint()
|
||||
l.Type = raft.LogType(dec.varint())
|
||||
l.Data = dec.bytes()
|
||||
l.Extensions = dec.bytes()
|
||||
l.AppendedAt = dec.time()
|
||||
return dec.err
|
||||
}
|
||||
|
||||
type encoder struct {
|
||||
w io.Writer
|
||||
err error
|
||||
scratch [10]byte
|
||||
}
|
||||
|
||||
func (e *encoder) varint(v uint64) {
|
||||
if e.err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Varint encoding might use up to 9 bytes for a uint64
|
||||
n := binary.PutUvarint(e.scratch[:], v)
|
||||
_, e.err = e.w.Write(e.scratch[:n])
|
||||
}
|
||||
|
||||
func (e *encoder) bytes(bs []byte) {
|
||||
// Put a length prefix
|
||||
e.varint(uint64(len(bs)))
|
||||
if e.err != nil {
|
||||
return
|
||||
}
|
||||
// Copy the bytes to the writer
|
||||
_, e.err = e.w.Write(bs)
|
||||
}
|
||||
|
||||
func (e *encoder) time(t time.Time) {
|
||||
if e.err != nil {
|
||||
return
|
||||
}
|
||||
bs, err := t.MarshalBinary()
|
||||
if err != nil {
|
||||
e.err = err
|
||||
return
|
||||
}
|
||||
_, e.err = e.w.Write(bs)
|
||||
}
|
||||
|
||||
type decoder struct {
|
||||
buf []byte
|
||||
err error
|
||||
}
|
||||
|
||||
func (d *decoder) varint() uint64 {
|
||||
if d.err != nil {
|
||||
return 0
|
||||
}
|
||||
v, n := binary.Uvarint(d.buf)
|
||||
d.buf = d.buf[n:]
|
||||
return v
|
||||
}
|
||||
|
||||
func (d *decoder) bytes() []byte {
|
||||
// Get length prefix
|
||||
n := d.varint()
|
||||
if d.err != nil {
|
||||
return nil
|
||||
}
|
||||
if n == 0 {
|
||||
return nil
|
||||
}
|
||||
if n > uint64(len(d.buf)) {
|
||||
d.err = io.ErrShortBuffer
|
||||
return nil
|
||||
}
|
||||
bs := make([]byte, n)
|
||||
copy(bs, d.buf[:n])
|
||||
d.buf = d.buf[n:]
|
||||
return bs
|
||||
}
|
||||
|
||||
func (d *decoder) time() time.Time {
|
||||
var t time.Time
|
||||
if d.err != nil {
|
||||
return t
|
||||
}
|
||||
// Note that Unmarshal Binary updates d.buf to remove the bytes it read
|
||||
// already.
|
||||
d.err = t.UnmarshalBinary(d.buf)
|
||||
return t
|
||||
}
|
40
vendor/github.com/hashicorp/raft-wal/fs/file.go
generated
vendored
Normal file
40
vendor/github.com/hashicorp/raft-wal/fs/file.go
generated
vendored
Normal file
@ -0,0 +1,40 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"os"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/hashicorp/raft-wal/types"
|
||||
)
|
||||
|
||||
var _ types.WritableFile = &File{}
|
||||
|
||||
// File wraps an os.File and implements types.WritableFile. It ensures that the
|
||||
// first time Sync is called on the file, that the parent directory is also
|
||||
// Fsynced to ensure a crash won't cause the FS to forget the file is there.
|
||||
//
|
||||
// Postponing this allows us to ensure that we do the minimum necessary fsyncs
|
||||
// but still ensure all required fsyncs are done by the time we acknowledge
|
||||
// committed data in the new file.
|
||||
type File struct {
|
||||
new uint32 // atomically accessed, keep it aligned!
|
||||
dir string
|
||||
os.File
|
||||
}
|
||||
|
||||
// Sync calls fsync on the underlying file. If this is the first call to Sync
|
||||
// since creation it also fsyncs the parent dir.
|
||||
func (f *File) Sync() error {
|
||||
// Sync the underlying file
|
||||
if err := f.File.Sync(); err != nil {
|
||||
return err
|
||||
}
|
||||
new := atomic.SwapUint32(&f.new, 1)
|
||||
if new == 0 {
|
||||
return syncDir(f.dir)
|
||||
}
|
||||
return nil
|
||||
}
|
128
vendor/github.com/hashicorp/raft-wal/fs/fs.go
generated
vendored
Normal file
128
vendor/github.com/hashicorp/raft-wal/fs/fs.go
generated
vendored
Normal file
@ -0,0 +1,128 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/coreos/etcd/pkg/fileutil"
|
||||
"github.com/hashicorp/raft-wal/types"
|
||||
)
|
||||
|
||||
// FS implements the wal.VFS interface using GO's built in OS Filesystem (and a
|
||||
// few helpers).
|
||||
//
|
||||
// TODO if we changed the interface to be Dir centric we could cache the open
|
||||
// dir handle and save some time opening it on each Create in order to fsync.
|
||||
type FS struct {
|
||||
}
|
||||
|
||||
func New() *FS {
|
||||
return &FS{}
|
||||
}
|
||||
|
||||
// ListDir returns a list of all files in the specified dir in lexicographical
|
||||
// order. If the dir doesn't exist, it must return an error. Empty array with
|
||||
// nil error is assumed to mean that the directory exists and was readable,
|
||||
// but contains no files.
|
||||
func (fs *FS) ListDir(dir string) ([]string, error) {
|
||||
files, err := ioutil.ReadDir(dir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
names := make([]string, len(files))
|
||||
for i, f := range files {
|
||||
if f.IsDir() {
|
||||
continue
|
||||
}
|
||||
names[i] = f.Name()
|
||||
}
|
||||
return names, nil
|
||||
}
|
||||
|
||||
// Create creates a new file with the given name. If a file with the same name
|
||||
// already exists an error is returned. If a non-zero size is given,
|
||||
// implementations should make a best effort to pre-allocate the file to be
|
||||
// that size. The dir must already exist and be writable to the current
|
||||
// process.
|
||||
func (fs *FS) Create(dir string, name string, size uint64) (types.WritableFile, error) {
|
||||
f, err := os.OpenFile(filepath.Join(dir, name), os.O_CREATE|os.O_EXCL|os.O_RDWR, os.FileMode(0644))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// We just created the file. Preallocate it's size.
|
||||
if size > 0 {
|
||||
if size > math.MaxInt32 {
|
||||
return nil, fmt.Errorf("maximum file size is %d bytes", math.MaxInt32)
|
||||
}
|
||||
if err := fileutil.Preallocate(f, int64(size), true); err != nil {
|
||||
f.Close()
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
// We don't fsync here for performance reasons. Technically we need to fsync
|
||||
// the file itself to make sure it is really persisted to disk, and you always
|
||||
// need to fsync its parent dir after a creation because fsync doesn't ensure
|
||||
// the directory entry is persisted - a crash could make the file appear to be
|
||||
// missing as there is no directory entry.
|
||||
//
|
||||
// BUT, it doesn't actually matter if this file is crash safe, right up to the
|
||||
// point where we actually commit log data. Since we always fsync the file
|
||||
// when we commit logs, we don't need to again here. That does however leave
|
||||
// the parent dir fsync which must be done after the first fsync to a newly
|
||||
// created file to ensure it survives a crash.
|
||||
//
|
||||
// To handle that, we return a wrapped io.File that will fsync the parent dir
|
||||
// as well the first time Sync is called (and only the first time),
|
||||
fi := &File{
|
||||
new: 0,
|
||||
dir: dir,
|
||||
File: *f,
|
||||
}
|
||||
return fi, nil
|
||||
}
|
||||
|
||||
// Delete indicates the file is no longer required. Typically it should be
|
||||
// deleted from the underlying system to free disk space.
|
||||
func (fs *FS) Delete(dir string, name string) error {
|
||||
if err := os.Remove(filepath.Join(dir, name)); err != nil {
|
||||
return err
|
||||
}
|
||||
// Make sure parent directory metadata is fsynced too before we call this
|
||||
// "done".
|
||||
return syncDir(dir)
|
||||
}
|
||||
|
||||
// OpenReader opens an existing file in read-only mode. If the file doesn't
|
||||
// exist or permission is denied, an error is returned, otherwise no checks
|
||||
// are made about the well-formedness of the file, it may be empty, the wrong
|
||||
// size or corrupt in arbitrary ways.
|
||||
func (fs *FS) OpenReader(dir string, name string) (types.ReadableFile, error) {
|
||||
return os.OpenFile(filepath.Join(dir, name), os.O_RDONLY, os.FileMode(0644))
|
||||
}
|
||||
|
||||
// OpenWriter opens a file in read-write mode. If the file doesn't exist or
|
||||
// permission is denied, an error is returned, otherwise no checks are made
|
||||
// about the well-formedness of the file, it may be empty, the wrong size or
|
||||
// corrupt in arbitrary ways.
|
||||
func (fs *FS) OpenWriter(dir string, name string) (types.WritableFile, error) {
|
||||
return os.OpenFile(filepath.Join(dir, name), os.O_RDWR, os.FileMode(0644))
|
||||
}
|
||||
|
||||
func syncDir(dir string) error {
|
||||
f, err := os.Open(dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = f.Sync()
|
||||
closeErr := f.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return closeErr
|
||||
}
|
269
vendor/github.com/hashicorp/raft-wal/metadb/metadb.go
generated
vendored
Normal file
269
vendor/github.com/hashicorp/raft-wal/metadb/metadb.go
generated
vendored
Normal file
@ -0,0 +1,269 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package metadb
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/hashicorp/raft-wal/types"
|
||||
"go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
const (
|
||||
// FileName is the default file name for the bolt db file.
|
||||
FileName = "wal-meta.db"
|
||||
|
||||
// *Bucket are the names used for internal bolt buckets
|
||||
MetaBucket = "wal-meta"
|
||||
StableBucket = "stable"
|
||||
|
||||
// We just need one key for now so use the byte 'm' for meta arbitrarily.
|
||||
MetaKey = "m"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrUnintialized is returned when any call is made before Load has opened
|
||||
// the DB file.
|
||||
ErrUnintialized = errors.New("uninitialized")
|
||||
)
|
||||
|
||||
// BoltMetaDB implements types.MetaStore using BoltDB as a reliable persistent
|
||||
// store. See repo README for reasons for this design choice and performance
|
||||
// implications.
|
||||
type BoltMetaDB struct {
|
||||
dir string
|
||||
db *bbolt.DB
|
||||
}
|
||||
|
||||
func (db *BoltMetaDB) ensureOpen(dir string) error {
|
||||
if db.dir != "" && db.dir != dir {
|
||||
return fmt.Errorf("can't load dir %s, already open in dir %s", dir, db.dir)
|
||||
}
|
||||
if db.db != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
fileName := filepath.Join(dir, FileName)
|
||||
|
||||
open := func() error {
|
||||
bb, err := bbolt.Open(fileName, 0644, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open %s: %w", FileName, err)
|
||||
}
|
||||
db.db = bb
|
||||
db.dir = dir
|
||||
return nil
|
||||
}
|
||||
|
||||
// BoltDB can get stuck in invalid states if we crash while it's initializing.
|
||||
// We can't distinguish those as safe to just wipe it and start again because
|
||||
// we don't know for sure if it's failing due to bad init or later corruption
|
||||
// (which would loose data if we just wipe and start over). So to ensure
|
||||
// initial creation of the WAL is as crash-safe as possible we will manually
|
||||
// detect we have an atomic init procedure:
|
||||
// 1. Check if file exits already. If yes, skip init and just open it.
|
||||
// 2. Delete any existing DB file with tmp name
|
||||
// 3. Creat a new BoltDB that is empty and has the buckets with a temp name.
|
||||
// 4. Once that's committed, rename to final name and Fsync parent dir
|
||||
_, err := os.Stat(fileName)
|
||||
if err == nil {
|
||||
// File exists, just open it
|
||||
return open()
|
||||
}
|
||||
if !errors.Is(err, os.ErrNotExist) {
|
||||
// Unknown err just return that
|
||||
return fmt.Errorf("failed to stat %s: %w", FileName, err)
|
||||
}
|
||||
|
||||
// File doesn't exist, initialize a new DB in a crash-safe way
|
||||
if err := safeInitBoltDB(dir); err != nil {
|
||||
return fmt.Errorf("failed initializing meta DB: %w", err)
|
||||
}
|
||||
|
||||
// All good, now open it!
|
||||
return open()
|
||||
}
|
||||
|
||||
func safeInitBoltDB(dir string) error {
|
||||
tmpFileName := filepath.Join(dir, FileName+".tmp")
|
||||
|
||||
// Delete any old attempts to init that were unsuccessful
|
||||
if err := os.RemoveAll(tmpFileName); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Open bolt DB at tmp file name
|
||||
bb, err := bbolt.Open(tmpFileName, 0644, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
tx, err := bb.Begin(true)
|
||||
defer tx.Rollback()
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = tx.CreateBucket([]byte(MetaBucket))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = tx.CreateBucket([]byte(StableBucket))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := tx.Commit(); err != nil {
|
||||
return err
|
||||
}
|
||||
// Close the file ready to rename into place and re-open. This probably isn't
|
||||
// necessary but it make it easier to reason about this code path being
|
||||
// totally separate from the common case.
|
||||
if err := bb.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// We created the DB OK. Now rename it to the final name.
|
||||
if err := os.Rename(tmpFileName, filepath.Join(dir, FileName)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// And Fsync that parent dir to make sure the new new file with it's new name
|
||||
// is persisted!
|
||||
dirF, err := os.Open(dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = dirF.Sync()
|
||||
closeErr := dirF.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return closeErr
|
||||
}
|
||||
|
||||
// Load loads the existing persisted state. If there is no existing state
|
||||
// implementations are expected to create initialize new storage and return an
|
||||
// empty state.
|
||||
func (db *BoltMetaDB) Load(dir string) (types.PersistentState, error) {
|
||||
var state types.PersistentState
|
||||
|
||||
if err := db.ensureOpen(dir); err != nil {
|
||||
return state, err
|
||||
}
|
||||
|
||||
tx, err := db.db.Begin(false)
|
||||
if err != nil {
|
||||
return state, err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
meta := tx.Bucket([]byte(MetaBucket))
|
||||
|
||||
// We just need one key for now so use the byte 'm' for meta arbitrarily.
|
||||
raw := meta.Get([]byte(MetaKey))
|
||||
if raw == nil {
|
||||
// This is valid it's an "empty" log that will be initialized by the WAL.
|
||||
return state, nil
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(raw, &state); err != nil {
|
||||
return state, fmt.Errorf("%w: failed to parse persisted state: %s", types.ErrCorrupt, err)
|
||||
}
|
||||
return state, nil
|
||||
}
|
||||
|
||||
// CommitState must atomically replace all persisted metadata in the current
|
||||
// store with the set provided. It must not return until the data is persisted
|
||||
// durably and in a crash-safe way otherwise the guarantees of the WAL will be
|
||||
// compromised. The WAL will only ever call this in a single thread at one
|
||||
// time and it will never be called concurrently with Load however it may be
|
||||
// called concurrently with Get/SetStable operations.
|
||||
func (db *BoltMetaDB) CommitState(state types.PersistentState) error {
|
||||
if db.db == nil {
|
||||
return ErrUnintialized
|
||||
}
|
||||
|
||||
encoded, err := json.Marshal(state)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to encode persisted state: %w", err)
|
||||
}
|
||||
|
||||
tx, err := db.db.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
meta := tx.Bucket([]byte(MetaBucket))
|
||||
|
||||
if err := meta.Put([]byte(MetaKey), encoded); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
// GetStable returns a value from stable store or nil if it doesn't exist. May
|
||||
// be called concurrently by multiple threads.
|
||||
func (db *BoltMetaDB) GetStable(key []byte) ([]byte, error) {
|
||||
if db.db == nil {
|
||||
return nil, ErrUnintialized
|
||||
}
|
||||
|
||||
tx, err := db.db.Begin(false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
stable := tx.Bucket([]byte(StableBucket))
|
||||
|
||||
val := stable.Get(key)
|
||||
if val == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Need to copy the value since bolt only guarantees the slice is valid until
|
||||
// end of txn.
|
||||
ret := make([]byte, len(val))
|
||||
copy(ret, val)
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
// SetStable stores a value from stable store. May be called concurrently with
|
||||
// GetStable.
|
||||
func (db *BoltMetaDB) SetStable(key []byte, value []byte) error {
|
||||
if db.db == nil {
|
||||
return ErrUnintialized
|
||||
}
|
||||
|
||||
tx, err := db.db.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
stable := tx.Bucket([]byte(StableBucket))
|
||||
|
||||
if value == nil {
|
||||
err = stable.Delete(key)
|
||||
} else {
|
||||
err = stable.Put(key, value)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
// Close implements io.Closer
|
||||
func (db *BoltMetaDB) Close() error {
|
||||
if db.db == nil {
|
||||
return nil
|
||||
}
|
||||
err := db.db.Close()
|
||||
db.db = nil
|
||||
return err
|
||||
}
|
78
vendor/github.com/hashicorp/raft-wal/metrics.go
generated
vendored
Normal file
78
vendor/github.com/hashicorp/raft-wal/metrics.go
generated
vendored
Normal file
@ -0,0 +1,78 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package wal
|
||||
|
||||
import (
|
||||
"github.com/hashicorp/raft-wal/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
// MetricDefinitions describe the metrics emitted by this library via the
|
||||
// provided metrics.Collector implementation. It's public so that these can be
|
||||
// registered during init with metrics clients that support pre-defining
|
||||
// metrics.
|
||||
MetricDefinitions = metrics.Definitions{
|
||||
Counters: []metrics.Descriptor{
|
||||
{
|
||||
Name: "log_entry_bytes_written",
|
||||
Desc: "log_entry_bytes_written counts the bytes of log entry after encoding" +
|
||||
" with Codec. Actual bytes written to disk might be slightly higher as it" +
|
||||
" includes headers and index entries.",
|
||||
},
|
||||
{
|
||||
Name: "log_entries_written",
|
||||
Desc: "log_entries_written counts the number of entries written.",
|
||||
},
|
||||
{
|
||||
Name: "log_appends",
|
||||
Desc: "log_appends counts the number of calls to StoreLog(s) i.e." +
|
||||
" number of batches of entries appended.",
|
||||
},
|
||||
{
|
||||
Name: "log_entry_bytes_read",
|
||||
Desc: "log_entry_bytes_read counts the bytes of log entry read from" +
|
||||
" segments before decoding. actual bytes read from disk might be higher" +
|
||||
" as it includes headers and index entries and possible secondary reads" +
|
||||
" for large entries that don't fit in buffers.",
|
||||
},
|
||||
{
|
||||
Name: "log_entries_read",
|
||||
Desc: "log_entries_read counts the number of calls to get_log.",
|
||||
},
|
||||
{
|
||||
Name: "segment_rotations",
|
||||
Desc: "segment_rotations counts how many times we move to a new segment file.",
|
||||
},
|
||||
{
|
||||
Name: "head_truncations",
|
||||
Desc: "head_truncations counts how many log entries have been truncated" +
|
||||
" from the head - i.e. the oldest entries. by graphing the rate of" +
|
||||
" change over time you can see individual truncate calls as spikes.",
|
||||
},
|
||||
{
|
||||
Name: "tail_truncations",
|
||||
Desc: "tail_truncations counts how many log entries have been truncated" +
|
||||
" from the head - i.e. the newest entries. by graphing the rate of" +
|
||||
" change over time you can see individual truncate calls as spikes.",
|
||||
},
|
||||
{
|
||||
Name: "stable_gets",
|
||||
Desc: "stable_gets counts how many calls to StableStore.Get or GetUint64.",
|
||||
},
|
||||
{
|
||||
Name: "stable_sets",
|
||||
Desc: "stable_sets counts how many calls to StableStore.Set or SetUint64.",
|
||||
},
|
||||
},
|
||||
Gauges: []metrics.Descriptor{
|
||||
{
|
||||
Name: "last_segment_age_seconds",
|
||||
Desc: "last_segment_age_seconds is a gauge that is set each time we" +
|
||||
" rotate a segment and describes the number of seconds between when" +
|
||||
" that segment file was first created and when it was sealed. this" +
|
||||
" gives a rough estimate how quickly writes are filling the disk.",
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
89
vendor/github.com/hashicorp/raft-wal/metrics/atomic_collector.go
generated
vendored
Normal file
89
vendor/github.com/hashicorp/raft-wal/metrics/atomic_collector.go
generated
vendored
Normal file
@ -0,0 +1,89 @@
|
||||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package metrics
|
||||
|
||||
import "sync/atomic"
|
||||
|
||||
var (
|
||||
_ Collector = &AtomicCollector{}
|
||||
)
|
||||
|
||||
// AtomicCollector is a simple Collector that atomically stores
|
||||
// counters and gauges in memory.
|
||||
type AtomicCollector struct {
|
||||
counters []uint64
|
||||
gauges []uint64
|
||||
|
||||
counterIndex, gaugeIndex map[string]int
|
||||
}
|
||||
|
||||
// NewAtomicCollector creates a collector for the given set of Definitions.
|
||||
func NewAtomicCollector(defs Definitions) *AtomicCollector {
|
||||
c := &AtomicCollector{
|
||||
counters: make([]uint64, len(defs.Counters)),
|
||||
gauges: make([]uint64, len(defs.Gauges)),
|
||||
counterIndex: make(map[string]int),
|
||||
gaugeIndex: make(map[string]int),
|
||||
}
|
||||
for i, d := range defs.Counters {
|
||||
if _, ok := c.counterIndex[d.Name]; ok {
|
||||
panic("duplicate metrics named " + d.Name)
|
||||
}
|
||||
c.counterIndex[d.Name] = i
|
||||
}
|
||||
for i, d := range defs.Gauges {
|
||||
if _, ok := c.counterIndex[d.Name]; ok {
|
||||
panic("duplicate metrics named " + d.Name)
|
||||
}
|
||||
if _, ok := c.gaugeIndex[d.Name]; ok {
|
||||
panic("duplicate metrics named " + d.Name)
|
||||
}
|
||||
c.gaugeIndex[d.Name] = i
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// IncrementCounter record val occurrences of the named event. Names will
|
||||
// follow prometheus conventions with lower_case_and_underscores. We don't
|
||||
// need any additional labels currently.
|
||||
func (c *AtomicCollector) IncrementCounter(name string, delta uint64) {
|
||||
id, ok := c.counterIndex[name]
|
||||
if !ok {
|
||||
panic("invalid metric name: " + name)
|
||||
}
|
||||
atomic.AddUint64(&c.counters[id], delta)
|
||||
}
|
||||
|
||||
// SetGauge sets the value of the named gauge overriding any previous value.
|
||||
func (c *AtomicCollector) SetGauge(name string, val uint64) {
|
||||
id, ok := c.gaugeIndex[name]
|
||||
if !ok {
|
||||
panic("invalid metric name: " + name)
|
||||
}
|
||||
atomic.StoreUint64(&c.gauges[id], val)
|
||||
}
|
||||
|
||||
// Summary returns a summary of the metrics since startup. Each value is
|
||||
// atomically loaded but the set is not atomic overall and may represent an
|
||||
// inconsistent snapshot e.g. with some metrics reflecting the most recent
|
||||
// operation while others don't.
|
||||
func (c *AtomicCollector) Summary() Summary {
|
||||
s := Summary{
|
||||
Counters: make(map[string]uint64, len(c.counters)),
|
||||
Gauges: make(map[string]uint64, len(c.gauges)),
|
||||
}
|
||||
for name, id := range c.counterIndex {
|
||||
s.Counters[name] = atomic.LoadUint64(&c.counters[id])
|
||||
}
|
||||
for name, id := range c.gaugeIndex {
|
||||
s.Gauges[name] = atomic.LoadUint64(&c.gauges[id])
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Summary is a copy of the values recorded so far for each metric.
|
||||
type Summary struct {
|
||||
Counters map[string]uint64
|
||||
Gauges map[string]uint64
|
||||
}
|
85
vendor/github.com/hashicorp/raft-wal/metrics/gometrics_collector.go
generated
vendored
Normal file
85
vendor/github.com/hashicorp/raft-wal/metrics/gometrics_collector.go
generated
vendored
Normal file
@ -0,0 +1,85 @@
|
||||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
// # Metrics Configuration
|
||||
//
|
||||
// The raft-wal library is instrumented to be able to use different metrics collectors. There are currently two implemented within this package:
|
||||
// - atomic
|
||||
// - go-metrics
|
||||
//
|
||||
// # go-metrics Compatibility
|
||||
//
|
||||
// This library can emit metrics using either github.com/armon/go-metrics or github.com/hashicorp/go-metrics. Choosing between the libraries is controlled via build tags.
|
||||
//
|
||||
// Build Tags:
|
||||
// - armonmetrics - Using this tag will cause metrics to be routed to armon/go-metrics
|
||||
// - hashicorpmetrics - Using this tag will cause all metrics to be routed to hashicorp/go-metrics
|
||||
//
|
||||
// If no build tag is specified, the default behavior is to use armon/go-metrics.
|
||||
//
|
||||
// # Deprecating armon/go-metrics
|
||||
//
|
||||
// Emitting metrics to armon/go-metrics is officially deprecated. Usage of armon/go-metrics will remain the default until mid-2025 with opt-in support continuing to the end of 2025.
|
||||
//
|
||||
// Migration:
|
||||
// To migrate an application currently using the older armon/go-metrics to instead use hashicorp/go-metrics the following should be done.
|
||||
//
|
||||
// 1. Upgrade libraries using armon/go-metrics to consume hashicorp/go-metrics/compat instead. This should involve only changing import statements. All repositories within the hashicorp GitHub organization will be getting these updates in early 2025.
|
||||
//
|
||||
// 2. Update an applications library dependencies to those that have the compatibility layer configured.
|
||||
//
|
||||
// 3. Update the application to use hashicorp/go-metrics for configuring metrics export instead of armon/go-metrics
|
||||
//
|
||||
// - Replace all application imports of github.com/armon/go-metrics with github.com/hashicorp/go-metrics
|
||||
//
|
||||
// - Instrument your build system to build with the hashicorpmetrics tag.
|
||||
//
|
||||
// Eventually once the default behavior changes to use hashicorp/go-metrics by default (mid-2025), you can drop the hashicorpmetrics build tag.
|
||||
package metrics
|
||||
|
||||
import gometrics "github.com/hashicorp/go-metrics/compat"
|
||||
|
||||
// GoMetricsCollector implements a Collector that passes through observations to
|
||||
// a go-metrics instance. The zero value works, writing metrics to the default
|
||||
// global instance however to set a prefix or a static set of labels to add to
|
||||
// each metric observed, or to use a non-global metrics instance use
|
||||
// NewGoMetricsCollector.
|
||||
type GoMetricsCollector struct {
|
||||
gm *gometrics.Metrics
|
||||
prefix []string
|
||||
labels []gometrics.Label
|
||||
}
|
||||
|
||||
// NewGoMetricsCollector returns a GoMetricsCollector that will attach the
|
||||
// specified name prefix and/or labels to each observation. If gm is nil the
|
||||
// global metrics instance is used.
|
||||
func NewGoMetricsCollector(prefix []string, labels []gometrics.Label, gm *gometrics.Metrics) *GoMetricsCollector {
|
||||
if gm == nil {
|
||||
gm = gometrics.Default()
|
||||
}
|
||||
return &GoMetricsCollector{
|
||||
gm: gm,
|
||||
prefix: prefix,
|
||||
labels: labels,
|
||||
}
|
||||
}
|
||||
|
||||
// IncrementCounter record val occurrences of the named event. Names will
|
||||
// follow prometheus conventions with lower_case_and_underscores. We don't
|
||||
// need any additional labels currently.
|
||||
func (c *GoMetricsCollector) IncrementCounter(name string, delta uint64) {
|
||||
c.gm.IncrCounterWithLabels(c.name(name), float32(delta), c.labels)
|
||||
}
|
||||
|
||||
// SetGauge sets the value of the named gauge overriding any previous value.
|
||||
func (c *GoMetricsCollector) SetGauge(name string, val uint64) {
|
||||
c.gm.SetGaugeWithLabels(c.name(name), float32(val), c.labels)
|
||||
}
|
||||
|
||||
// name returns the metric name as a slice we don't want to risk modifying the
|
||||
// prefix slice backing array since this might be called concurrently so we
|
||||
// always allocate a new slice.
|
||||
func (c *GoMetricsCollector) name(name string) []string {
|
||||
var ss []string
|
||||
return append(append(ss, c.prefix...), name)
|
||||
}
|
42
vendor/github.com/hashicorp/raft-wal/metrics/metrics.go
generated
vendored
Normal file
42
vendor/github.com/hashicorp/raft-wal/metrics/metrics.go
generated
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package metrics
|
||||
|
||||
// Collector provides a simple abstraction for counter type metrics that
|
||||
// the WAL and log verifier can use without depending on a specific metrics
|
||||
// collector implementation.
|
||||
type Collector interface {
|
||||
// IncrementCounter record val occurrences of the named event. Names will
|
||||
// follow prometheus conventions with lower_case_and_underscores. We don't
|
||||
// need any additional labels currently.
|
||||
IncrementCounter(name string, delta uint64)
|
||||
|
||||
// SetGauge sets the value of the named gauge overriding any previous value.
|
||||
SetGauge(name string, val uint64)
|
||||
}
|
||||
|
||||
// Definitions provides a simple description of a set of scalar metrics.
|
||||
type Definitions struct {
|
||||
Counters []Descriptor
|
||||
Gauges []Descriptor
|
||||
}
|
||||
|
||||
// Descriptor describes a specific metric.
|
||||
type Descriptor struct {
|
||||
Name string
|
||||
Desc string
|
||||
}
|
||||
|
||||
var _ Collector = &NoOpCollector{}
|
||||
|
||||
// NoOpCollector is a Collector that does nothing.
|
||||
type NoOpCollector struct{}
|
||||
|
||||
// IncrementCounter record val occurrences of the named event. Names will
|
||||
// follow prometheus conventions with lower_case_and_underscores. We don't
|
||||
// need any additional labels currently.
|
||||
func (c *NoOpCollector) IncrementCounter(name string, delta uint64) {}
|
||||
|
||||
// SetGauge sets the value of the named gauge overriding any previous value.
|
||||
func (c *NoOpCollector) SetGauge(name string, val uint64) {}
|
92
vendor/github.com/hashicorp/raft-wal/options.go
generated
vendored
Normal file
92
vendor/github.com/hashicorp/raft-wal/options.go
generated
vendored
Normal file
@ -0,0 +1,92 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package wal
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/hashicorp/go-hclog"
|
||||
"github.com/hashicorp/raft-wal/fs"
|
||||
"github.com/hashicorp/raft-wal/metadb"
|
||||
"github.com/hashicorp/raft-wal/metrics"
|
||||
"github.com/hashicorp/raft-wal/segment"
|
||||
"github.com/hashicorp/raft-wal/types"
|
||||
)
|
||||
|
||||
// WithCodec is an option that allows a custom Codec to be provided to the WAL.
|
||||
// If not used the default Codec is used.
|
||||
func WithCodec(c Codec) walOpt {
|
||||
return func(w *WAL) {
|
||||
w.codec = c
|
||||
}
|
||||
}
|
||||
|
||||
// WithMetaStore is an option that allows a custom MetaStore to be provided to
|
||||
// the WAL. If not used the default MetaStore is used.
|
||||
func WithMetaStore(db types.MetaStore) walOpt {
|
||||
return func(w *WAL) {
|
||||
w.metaDB = db
|
||||
}
|
||||
}
|
||||
|
||||
// WithSegmentFiler is an option that allows a custom SegmentFiler (and hence
|
||||
// Segment Reader/Writer implementation) to be provided to the WAL. If not used
|
||||
// the default SegmentFiler is used.
|
||||
func WithSegmentFiler(sf types.SegmentFiler) walOpt {
|
||||
return func(w *WAL) {
|
||||
w.sf = sf
|
||||
}
|
||||
}
|
||||
|
||||
// WithLogger is an option that allows a custom logger to be used.
|
||||
func WithLogger(logger hclog.Logger) walOpt {
|
||||
return func(w *WAL) {
|
||||
w.log = logger
|
||||
}
|
||||
}
|
||||
|
||||
// WithSegmentSize is an option that allows a custom segmentSize to be set.
|
||||
func WithSegmentSize(size int) walOpt {
|
||||
return func(w *WAL) {
|
||||
w.segmentSize = size
|
||||
}
|
||||
}
|
||||
|
||||
// WithMetricsCollector is an option that allows a custom segmentSize to be set.
|
||||
func WithMetricsCollector(c metrics.Collector) walOpt {
|
||||
return func(w *WAL) {
|
||||
w.metrics = c
|
||||
}
|
||||
}
|
||||
|
||||
func (w *WAL) applyDefaultsAndValidate() error {
|
||||
// Check if an external codec has been used that it's not using a reserved ID.
|
||||
if w.codec != nil && w.codec.ID() < FirstExternalCodecID {
|
||||
return fmt.Errorf("codec is using a reserved ID (below %d)", FirstExternalCodecID)
|
||||
}
|
||||
|
||||
// Defaults
|
||||
if w.log == nil {
|
||||
w.log = hclog.Default().Named("wal")
|
||||
}
|
||||
if w.codec == nil {
|
||||
w.codec = &BinaryCodec{}
|
||||
}
|
||||
if w.sf == nil {
|
||||
// These are not actually swappable via options right now but we override
|
||||
// them in tests. Only load the default implementations if they are not set.
|
||||
vfs := fs.New()
|
||||
w.sf = segment.NewFiler(w.dir, vfs)
|
||||
}
|
||||
if w.metrics == nil {
|
||||
w.metrics = &metrics.NoOpCollector{}
|
||||
}
|
||||
if w.metaDB == nil {
|
||||
w.metaDB = &metadb.BoltMetaDB{}
|
||||
}
|
||||
if w.segmentSize == 0 {
|
||||
w.segmentSize = DefaultSegmentSize
|
||||
}
|
||||
return nil
|
||||
}
|
14
vendor/github.com/hashicorp/raft-wal/segment/crc.go
generated
vendored
Normal file
14
vendor/github.com/hashicorp/raft-wal/segment/crc.go
generated
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package segment
|
||||
|
||||
import (
|
||||
"hash/crc32"
|
||||
)
|
||||
|
||||
var castagnoliTable *crc32.Table
|
||||
|
||||
func init() {
|
||||
castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
|
||||
}
|
295
vendor/github.com/hashicorp/raft-wal/segment/filer.go
generated
vendored
Normal file
295
vendor/github.com/hashicorp/raft-wal/segment/filer.go
generated
vendored
Normal file
@ -0,0 +1,295 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package segment
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/hashicorp/raft-wal/types"
|
||||
)
|
||||
|
||||
const (
|
||||
segmentFileSuffix = ".wal"
|
||||
segmentFileNamePattern = "%020d-%016x" + segmentFileSuffix
|
||||
)
|
||||
|
||||
// Filer implements the abstraction for managing a set of segment files in a
|
||||
// directory. It uses a VFS to abstract actual file system operations for easier
|
||||
// testing.
|
||||
type Filer struct {
|
||||
dir string
|
||||
vfs types.VFS
|
||||
bufPool sync.Pool
|
||||
}
|
||||
|
||||
// NewFiler creates a Filer ready for use.
|
||||
func NewFiler(dir string, vfs types.VFS) *Filer {
|
||||
f := &Filer{
|
||||
dir: dir,
|
||||
vfs: vfs,
|
||||
}
|
||||
f.bufPool.New = func() interface{} {
|
||||
return make([]byte, minBufSize)
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
// FileName returns the formatted file name expected for this segment.
|
||||
// SegmentFiler implementations could choose to ignore this but it's here to
|
||||
func FileName(i types.SegmentInfo) string {
|
||||
return fmt.Sprintf(segmentFileNamePattern, i.BaseIndex, i.ID)
|
||||
}
|
||||
|
||||
// Create adds a new segment with the given info and returns a writer or an
|
||||
// error.
|
||||
func (f *Filer) Create(info types.SegmentInfo) (types.SegmentWriter, error) {
|
||||
if info.BaseIndex == 0 {
|
||||
return nil, fmt.Errorf("BaseIndex must be greater than zero")
|
||||
}
|
||||
fname := FileName(info)
|
||||
|
||||
wf, err := f.vfs.Create(f.dir, fname, uint64(info.SizeLimit))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return createFile(info, wf, &f.bufPool)
|
||||
}
|
||||
|
||||
// RecoverTail is called on an unsealed segment when re-opening the WAL it will
|
||||
// attempt to recover from a possible crash. It will either return an error, or
|
||||
// return a valid segmentWriter that is ready for further appends. If the
|
||||
// expected tail segment doesn't exist it must return an error wrapping
|
||||
// os.ErrNotExist.
|
||||
func (f *Filer) RecoverTail(info types.SegmentInfo) (types.SegmentWriter, error) {
|
||||
fname := FileName(info)
|
||||
|
||||
wf, err := f.vfs.OpenWriter(f.dir, fname)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return recoverFile(info, wf, &f.bufPool)
|
||||
}
|
||||
|
||||
// Open an already sealed segment for reading. Open may validate the file's
|
||||
// header and return an error if it doesn't match the expected info.
|
||||
func (f *Filer) Open(info types.SegmentInfo) (types.SegmentReader, error) {
|
||||
fname := FileName(info)
|
||||
|
||||
rf, err := f.vfs.OpenReader(f.dir, fname)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Validate header here since openReader is re-used by writer where it's valid
|
||||
// for the file header not to be committed yet after a crash so we can't check
|
||||
// it there.
|
||||
var hdr [fileHeaderLen]byte
|
||||
|
||||
if _, err := rf.ReadAt(hdr[:], 0); err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
// Treat failure to read a header as corruption since a sealed file should
|
||||
// never not have a valid header. (I.e. even if crashes happen it should
|
||||
// be impossible to seal a segment with no header written so this
|
||||
// indicates that something truncated the file after the fact)
|
||||
return nil, fmt.Errorf("%w: failed to read header: %s", types.ErrCorrupt, err)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
gotInfo, err := readFileHeader(hdr[:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := validateFileHeader(*gotInfo, info); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return openReader(info, rf, &f.bufPool)
|
||||
}
|
||||
|
||||
// List returns the set of segment IDs currently stored. It's used by the WAL
|
||||
// on recovery to find any segment files that need to be deleted following a
|
||||
// unclean shutdown. The returned map is a map of ID -> BaseIndex. BaseIndex
|
||||
// is returned to allow subsequent Delete calls to be made.
|
||||
func (f *Filer) List() (map[uint64]uint64, error) {
|
||||
segs, _, err := f.listInternal()
|
||||
return segs, err
|
||||
}
|
||||
|
||||
func (f *Filer) listInternal() (map[uint64]uint64, []uint64, error) {
|
||||
files, err := f.vfs.ListDir(f.dir)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
segs := make(map[uint64]uint64)
|
||||
sorted := make([]uint64, 0)
|
||||
for _, file := range files {
|
||||
if !strings.HasSuffix(file, segmentFileSuffix) {
|
||||
continue
|
||||
}
|
||||
// Parse BaseIndex and ID from the file name
|
||||
var bIdx, id uint64
|
||||
n, err := fmt.Sscanf(file, segmentFileNamePattern, &bIdx, &id)
|
||||
if err != nil {
|
||||
return nil, nil, types.ErrCorrupt
|
||||
}
|
||||
if n != 2 {
|
||||
// Misnamed segment files with the right suffix indicates a bug or
|
||||
// tampering, we can't be sure what's happened to the data.
|
||||
return nil, nil, types.ErrCorrupt
|
||||
}
|
||||
segs[id] = bIdx
|
||||
sorted = append(sorted, id)
|
||||
}
|
||||
|
||||
return segs, sorted, nil
|
||||
}
|
||||
|
||||
// Delete removes the segment with given baseIndex and id if it exists. Note
|
||||
// that baseIndex is technically redundant since ID is unique on it's own. But
|
||||
// in practice we name files (or keys) with both so that they sort correctly.
|
||||
// This interface allows a simpler implementation where we can just delete
|
||||
// the file if it exists without having to scan the underlying storage for a.
|
||||
func (f *Filer) Delete(baseIndex uint64, ID uint64) error {
|
||||
fname := fmt.Sprintf(segmentFileNamePattern, baseIndex, ID)
|
||||
return f.vfs.Delete(f.dir, fname)
|
||||
}
|
||||
|
||||
// DumpSegment attempts to read the segment file specified by the baseIndex and
|
||||
// ID. It's intended purpose is for debugging the contents of segment files and
|
||||
// unlike the SegmentFiler interface, it doesn't assume the caller has access to
|
||||
// the correct metadata. This allows dumping log segments in a WAL that is still
|
||||
// being written to by another process. Without metadata we don't know if the
|
||||
// file is sealed so always recover by reading through the whole file. If after
|
||||
// or before are non-zero, the specify a exclusive lower or upper bound on which
|
||||
// log entries should be emitted. No error checking is done on the read data. fn
|
||||
// is called for each entry passing the raft info read from the file header (so
|
||||
// that the caller knows which codec to use for example) the raft index of the
|
||||
// entry and the raw bytes of the entry itself. The callback must return true to
|
||||
// continue reading. The data slice is only valid for the lifetime of the call.
|
||||
func (f *Filer) DumpSegment(baseIndex uint64, ID uint64, after, before uint64, fn func(info types.SegmentInfo, e types.LogEntry) (bool, error)) error {
|
||||
fname := fmt.Sprintf(segmentFileNamePattern, baseIndex, ID)
|
||||
|
||||
rf, err := f.vfs.OpenReader(f.dir, fname)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
buf := make([]byte, 64*1024)
|
||||
idx := baseIndex
|
||||
|
||||
type frameInfo struct {
|
||||
Index uint64
|
||||
Offset int64
|
||||
Len uint32
|
||||
}
|
||||
var batch []frameInfo
|
||||
|
||||
_, err = readThroughSegment(rf, func(info types.SegmentInfo, fh frameHeader, offset int64) (bool, error) {
|
||||
if fh.typ == FrameCommit {
|
||||
// All the previous entries have been committed. Read them and send up to
|
||||
// caller.
|
||||
for _, frame := range batch {
|
||||
// Check the header is reasonable
|
||||
if frame.Len > MaxEntrySize {
|
||||
return false, fmt.Errorf("failed to read entry idx=%d, frame header length (%d) is too big: %w",
|
||||
frame.Index, frame.Len, err)
|
||||
}
|
||||
|
||||
if frame.Len > uint32(len(buf)) {
|
||||
buf = make([]byte, frame.Len)
|
||||
}
|
||||
|
||||
n, err := rf.ReadAt(buf[:frame.Len], frame.Offset+frameHeaderLen)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if uint32(n) < frame.Len {
|
||||
return false, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
ok, err := fn(info, types.LogEntry{Index: frame.Index, Data: buf[:n]})
|
||||
if !ok || err != nil {
|
||||
return ok, err
|
||||
}
|
||||
}
|
||||
// Reset batch
|
||||
batch = batch[:0]
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if fh.typ != FrameEntry {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if idx <= after {
|
||||
// Not in the range we care about, skip reading the entry.
|
||||
idx++
|
||||
return true, nil
|
||||
}
|
||||
if before > 0 && idx >= before {
|
||||
// We're done
|
||||
return false, nil
|
||||
}
|
||||
|
||||
batch = append(batch, frameInfo{idx, offset, fh.len})
|
||||
idx++
|
||||
return true, nil
|
||||
})
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// DumpLogs attempts to read all log entries from segment files in the directory
|
||||
// for debugging purposes. It does _not_ use the metadata and so may output log
|
||||
// entries that are uncommitted or already truncated as far as the writing
|
||||
// process is concerned. As such it should not be used for replication of data.
|
||||
// It is useful though to debug the contents of the log even while the writing
|
||||
// application is still running. After and before if non-zero specify exclusive
|
||||
// bounds on the logs that should be returned which may allow the implementation
|
||||
// to skip reading entire segment files that are not in the range.
|
||||
func (f *Filer) DumpLogs(after, before uint64, fn func(info types.SegmentInfo, e types.LogEntry) (bool, error)) error {
|
||||
baseIndexes, segIDsSorted, err := f.listInternal()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for i, id := range segIDsSorted {
|
||||
baseIndex := baseIndexes[id]
|
||||
nextBaseIndex := uint64(0)
|
||||
if i+1 < len(segIDsSorted) {
|
||||
// This is not the last segment, peek at the base index of that one and
|
||||
// assume that this segment won't contain indexes that high.
|
||||
nextBaseIndex = baseIndexes[segIDsSorted[i+1]]
|
||||
}
|
||||
// See if this file contains any indexes in the range
|
||||
if after > 0 && nextBaseIndex > 0 && after >= nextBaseIndex {
|
||||
// This segment is all indexes before the lower bound we care about
|
||||
continue
|
||||
}
|
||||
if before > 0 && before <= baseIndex {
|
||||
// This segment is all indexes higher than the upper bound. We've output
|
||||
// every log in the range at this point (barring edge cases where we race
|
||||
// with a truncation which leaves multiple generations of segment files on
|
||||
// disk which we are going to ignore for now).
|
||||
return nil
|
||||
}
|
||||
|
||||
// We probably care about at least some of the entries in this segment
|
||||
err := f.DumpSegment(baseIndex, id, after, before, fn)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
252
vendor/github.com/hashicorp/raft-wal/segment/format.go
generated
vendored
Normal file
252
vendor/github.com/hashicorp/raft-wal/segment/format.go
generated
vendored
Normal file
@ -0,0 +1,252 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package segment
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/hashicorp/raft-wal/types"
|
||||
)
|
||||
|
||||
const (
|
||||
// MaxEntrySize is the largest we allow any single raft log entry to be. This
|
||||
// is larger than our raft implementation ever allows so seems safe to encode
|
||||
// statically for now. We could make this configurable. It's main purpose it
|
||||
// to limit allocation when reading entries back if their lengths are
|
||||
// corrupted.
|
||||
MaxEntrySize = 64 * 1024 * 1024 // 64 MiB
|
||||
|
||||
// minBufSize is the size we allocate read and write buffers. Setting it
|
||||
// larger wastes more memory but increases the chances that we'll read the
|
||||
// whole frame in a single shot and not need a second allocation and trip to
|
||||
// the disk.
|
||||
minBufSize = 64 * 1024
|
||||
|
||||
fileHeaderLen = 32
|
||||
version = 0
|
||||
magic = 0x58eb6b0d
|
||||
|
||||
// Note that this must remain a power of 2 to ensure aligning to this also
|
||||
// aligns to sector boundaries.
|
||||
frameHeaderLen = 8
|
||||
)
|
||||
|
||||
const ( // Start iota from 0
|
||||
FrameInvalid uint8 = iota
|
||||
FrameEntry
|
||||
FrameIndex
|
||||
FrameCommit
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrTooBig indicates that the caller tried to write a logEntry with a
|
||||
// payload that's larger than we are prepared to support.
|
||||
ErrTooBig = errors.New("entries larger than 64MiB are not supported")
|
||||
)
|
||||
|
||||
/*
|
||||
|
||||
File Header functions
|
||||
|
||||
0 1 2 3 4 5 6 7 8
|
||||
+------+------+------+------+------+------+------+------+
|
||||
| Magic | Reserved | Vsn |
|
||||
+------+------+------+------+------+------+------+------+
|
||||
| BaseIndex |
|
||||
+------+------+------+------+------+------+------+------+
|
||||
| SegmentID |
|
||||
+------+------+------+------+------+------+------+------+
|
||||
| Codec |
|
||||
+------+------+------+------+------+------+------+------+
|
||||
|
||||
*/
|
||||
|
||||
// writeFileHeader writes a file header into buf for the given file metadata.
|
||||
func writeFileHeader(buf []byte, info types.SegmentInfo) error {
|
||||
if len(buf) < fileHeaderLen {
|
||||
return io.ErrShortBuffer
|
||||
}
|
||||
|
||||
binary.LittleEndian.PutUint32(buf[0:4], magic)
|
||||
// Explicitly zero Reserved bytes just in case
|
||||
buf[4] = 0
|
||||
buf[5] = 0
|
||||
buf[6] = 0
|
||||
buf[7] = version
|
||||
binary.LittleEndian.PutUint64(buf[8:16], info.BaseIndex)
|
||||
binary.LittleEndian.PutUint64(buf[16:24], info.ID)
|
||||
binary.LittleEndian.PutUint64(buf[24:32], info.Codec)
|
||||
return nil
|
||||
}
|
||||
|
||||
// readFileHeader reads a file header from buf.
|
||||
func readFileHeader(buf []byte) (*types.SegmentInfo, error) {
|
||||
if len(buf) < fileHeaderLen {
|
||||
return nil, io.ErrShortBuffer
|
||||
}
|
||||
|
||||
var i types.SegmentInfo
|
||||
m := binary.LittleEndian.Uint64(buf[0:8])
|
||||
if m != magic {
|
||||
return nil, types.ErrCorrupt
|
||||
}
|
||||
if buf[7] != version {
|
||||
return nil, types.ErrCorrupt
|
||||
}
|
||||
i.BaseIndex = binary.LittleEndian.Uint64(buf[8:16])
|
||||
i.ID = binary.LittleEndian.Uint64(buf[16:24])
|
||||
i.Codec = binary.LittleEndian.Uint64(buf[24:32])
|
||||
return &i, nil
|
||||
}
|
||||
|
||||
func validateFileHeader(got, expect types.SegmentInfo) error {
|
||||
if expect.ID != got.ID {
|
||||
return fmt.Errorf("%w: segment header ID %x doesn't match metadata %x",
|
||||
types.ErrCorrupt, got.ID, expect.ID)
|
||||
}
|
||||
if expect.BaseIndex != got.BaseIndex {
|
||||
return fmt.Errorf("%w: segment header BaseIndex %d doesn't match metadata %d",
|
||||
types.ErrCorrupt, got.BaseIndex, expect.BaseIndex)
|
||||
}
|
||||
if expect.Codec != got.Codec {
|
||||
return fmt.Errorf("%w: segment header Codec %d doesn't match metadata %d",
|
||||
types.ErrCorrupt, got.Codec, expect.Codec)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
/*
|
||||
Frame Functions
|
||||
|
||||
0 1 2 3 4 5 6 7 8
|
||||
+------+------+------+------+------+------+------+------+
|
||||
| Type | Reserved | Length/CRC |
|
||||
+------+------+------+------+------+------+------+------+
|
||||
*/
|
||||
|
||||
type frameHeader struct {
|
||||
typ uint8
|
||||
len uint32
|
||||
crc uint32
|
||||
}
|
||||
|
||||
func writeFrame(buf []byte, h frameHeader, payload []byte) error {
|
||||
if len(buf) < encodedFrameSize(int(h.len)) {
|
||||
return io.ErrShortBuffer
|
||||
}
|
||||
if err := writeFrameHeader(buf, h); err != nil {
|
||||
return err
|
||||
}
|
||||
copy(buf[frameHeaderLen:], payload[:h.len])
|
||||
// Explicitly write null bytes for padding
|
||||
padBytes := padLen(int(h.len))
|
||||
for i := 0; i < padBytes; i++ {
|
||||
buf[frameHeaderLen+int(h.len)+i] = 0x0
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeFrameHeader(buf []byte, h frameHeader) error {
|
||||
if len(buf) < frameHeaderLen {
|
||||
return io.ErrShortBuffer
|
||||
}
|
||||
buf[0] = h.typ
|
||||
buf[1] = 0
|
||||
buf[2] = 0
|
||||
buf[3] = 0
|
||||
lOrCRC := h.len
|
||||
if h.typ == FrameCommit {
|
||||
lOrCRC = h.crc
|
||||
}
|
||||
binary.LittleEndian.PutUint32(buf[4:8], lOrCRC)
|
||||
return nil
|
||||
}
|
||||
|
||||
var zeroHeader [frameHeaderLen]byte
|
||||
|
||||
func readFrameHeader(buf []byte) (frameHeader, error) {
|
||||
var h frameHeader
|
||||
if len(buf) < frameHeaderLen {
|
||||
return h, io.ErrShortBuffer
|
||||
}
|
||||
|
||||
switch buf[0] {
|
||||
default:
|
||||
return h, fmt.Errorf("%w: corrupt frame header with unknown type %d", types.ErrCorrupt, buf[0])
|
||||
|
||||
case FrameInvalid:
|
||||
// Check if the whole header is zero and return a zero frame as this could
|
||||
// just indicate we've read right off the end of the written data during
|
||||
// recovery.
|
||||
if bytes.Equal(buf[:frameHeaderLen], zeroHeader[:]) {
|
||||
return h, nil
|
||||
}
|
||||
return h, fmt.Errorf("%w: corrupt frame header with type 0 but non-zero other fields", types.ErrCorrupt)
|
||||
|
||||
case FrameEntry, FrameIndex:
|
||||
h.typ = buf[0]
|
||||
h.len = binary.LittleEndian.Uint32(buf[4:8])
|
||||
|
||||
case FrameCommit:
|
||||
h.typ = buf[0]
|
||||
h.crc = binary.LittleEndian.Uint32(buf[4:8])
|
||||
}
|
||||
return h, nil
|
||||
}
|
||||
|
||||
// padLen returns how many bytes of padding should be added to a frame of length
|
||||
// n to ensure it is a multiple of headerLen. We ensure frameHeaderLen is a
|
||||
// power of two so that it's always a multiple of a typical sector size (e.g.
|
||||
// 512 bytes) to reduce the risk that headers are torn by being written across
|
||||
// sector boundaries. It will return an int in the range [0, 7].
|
||||
func padLen(n int) int {
|
||||
// This looks a bit awful but it's just doing (n % 8) and subtracting that
|
||||
// from 8 to get the number of bytes extra needed to get up to the next 8-byte
|
||||
// boundary. The extra & 7 is to handle the case where n is a multiple of 8
|
||||
// already and so n%8 is 0 and 8-0 is 8. By &ing 8 (0b1000) with 7 (0b111) we
|
||||
// effectively wrap it back around to 0. This only works as long as
|
||||
// frameHeaderLen is a power of 2 but that's necessary per comment above.
|
||||
return (frameHeaderLen - (n % frameHeaderLen)) & (frameHeaderLen - 1)
|
||||
}
|
||||
|
||||
func encodedFrameSize(payloadLen int) int {
|
||||
return frameHeaderLen + payloadLen + padLen(payloadLen)
|
||||
}
|
||||
|
||||
func indexFrameSize(numEntries int) int {
|
||||
// Index frames are completely unnecessary if the whole block is a
|
||||
// continuation with no new entries.
|
||||
if numEntries == 0 {
|
||||
return 0
|
||||
}
|
||||
return encodedFrameSize(numEntries * 4)
|
||||
}
|
||||
|
||||
func writeIndexFrame(buf []byte, offsets []uint32) error {
|
||||
if len(buf) < indexFrameSize(len(offsets)) {
|
||||
return io.ErrShortBuffer
|
||||
}
|
||||
fh := frameHeader{
|
||||
typ: FrameIndex,
|
||||
len: uint32(len(offsets) * 4),
|
||||
}
|
||||
if err := writeFrameHeader(buf, fh); err != nil {
|
||||
return err
|
||||
}
|
||||
cursor := frameHeaderLen
|
||||
for _, o := range offsets {
|
||||
binary.LittleEndian.PutUint32(buf[cursor:], o)
|
||||
cursor += 4
|
||||
}
|
||||
if (len(offsets) % 2) == 1 {
|
||||
// Odd number of entries, zero pad to keep it 8-byte aligned
|
||||
binary.LittleEndian.PutUint32(buf[cursor:], 0)
|
||||
}
|
||||
return nil
|
||||
}
|
160
vendor/github.com/hashicorp/raft-wal/segment/reader.go
generated
vendored
Normal file
160
vendor/github.com/hashicorp/raft-wal/segment/reader.go
generated
vendored
Normal file
@ -0,0 +1,160 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package segment
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
|
||||
"github.com/hashicorp/raft-wal/types"
|
||||
)
|
||||
|
||||
// Reader allows reading logs from a segment file.
|
||||
type Reader struct {
|
||||
info types.SegmentInfo
|
||||
rf types.ReadableFile
|
||||
|
||||
bufPool *sync.Pool
|
||||
|
||||
// tail optionally providers an interface to the writer state when this is an
|
||||
// unsealed segment so we can fetch from it's in-memory index.
|
||||
tail tailWriter
|
||||
}
|
||||
|
||||
type tailWriter interface {
|
||||
OffsetForFrame(idx uint64) (uint32, error)
|
||||
}
|
||||
|
||||
func openReader(info types.SegmentInfo, rf types.ReadableFile, bufPool *sync.Pool) (*Reader, error) {
|
||||
r := &Reader{
|
||||
info: info,
|
||||
rf: rf,
|
||||
bufPool: bufPool,
|
||||
}
|
||||
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// Close implements io.Closer
|
||||
func (r *Reader) Close() error {
|
||||
return r.rf.Close()
|
||||
}
|
||||
|
||||
// GetLog returns the raw log entry bytes associated with idx. If the log
|
||||
// doesn't exist in this segment types.ErrNotFound must be returned.
|
||||
func (r *Reader) GetLog(idx uint64) (*types.PooledBuffer, error) {
|
||||
offset, err := r.findFrameOffset(idx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
_, payload, err := r.readFrame(offset)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return payload, err
|
||||
}
|
||||
|
||||
func (r *Reader) readFrame(offset uint32) (frameHeader, *types.PooledBuffer, error) {
|
||||
buf := r.makeBuffer()
|
||||
|
||||
n, err := r.rf.ReadAt(buf.Bs, int64(offset))
|
||||
if errors.Is(err, io.EOF) && n >= frameHeaderLen {
|
||||
// We might have hit EOF just because our read buffer (at least 64KiB) might
|
||||
// be larger than the space left in the file (say if files are tiny or if we
|
||||
// are reading a frame near the end.). So don't treat EOF as an error as
|
||||
// long as we have actually managed to read a frameHeader - we'll work out
|
||||
// if we got the whole thing or not below.
|
||||
err = nil
|
||||
|
||||
// Re-slice buf.Bs so it's len() reflect only what we actually managed to
|
||||
// read. Note this doesn't impact the buffer length when it's returned to
|
||||
// the pool which will still return the whole cap.
|
||||
buf.Bs = buf.Bs[:n]
|
||||
}
|
||||
if err != nil {
|
||||
return frameHeader{}, nil, err
|
||||
}
|
||||
fh, err := readFrameHeader(buf.Bs)
|
||||
if err != nil {
|
||||
return fh, nil, err
|
||||
}
|
||||
|
||||
if (frameHeaderLen + int(fh.len)) <= len(buf.Bs) {
|
||||
// We already have all we need read, just return it sliced to just include
|
||||
// the payload.
|
||||
buf.Bs = buf.Bs[frameHeaderLen : frameHeaderLen+fh.len]
|
||||
return fh, buf, nil
|
||||
}
|
||||
// Need to read again, with a bigger buffer, return this one
|
||||
buf.Close()
|
||||
|
||||
// Need to read more bytes, validate that len is a sensible number
|
||||
if fh.len > MaxEntrySize {
|
||||
return fh, nil, fmt.Errorf("%w: frame header indicates a record larger than MaxEntrySize (%d bytes)", types.ErrCorrupt, MaxEntrySize)
|
||||
}
|
||||
|
||||
buf = &types.PooledBuffer{
|
||||
Bs: make([]byte, fh.len),
|
||||
// No closer, let outsized buffers be GCed in case they are massive and way
|
||||
// bigger than we need again. Could reconsider this if we find we need to
|
||||
// optimize for frequent > minBufSize reads.
|
||||
}
|
||||
if _, err := r.rf.ReadAt(buf.Bs, int64(offset+frameHeaderLen)); err != nil {
|
||||
return fh, nil, err
|
||||
}
|
||||
return fh, buf, nil
|
||||
}
|
||||
|
||||
func (r *Reader) makeBuffer() *types.PooledBuffer {
|
||||
if r.bufPool == nil {
|
||||
return &types.PooledBuffer{Bs: make([]byte, minBufSize)}
|
||||
}
|
||||
buf := r.bufPool.Get().([]byte)
|
||||
return &types.PooledBuffer{
|
||||
Bs: buf,
|
||||
CloseFn: func() {
|
||||
// Note we always return the whole allocated buf regardless of what Bs
|
||||
// ended up being sliced to.
|
||||
r.bufPool.Put(buf)
|
||||
},
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (r *Reader) findFrameOffset(idx uint64) (uint32, error) {
|
||||
if r.tail != nil {
|
||||
// This is not a sealed segment.
|
||||
return r.tail.OffsetForFrame(idx)
|
||||
}
|
||||
|
||||
// Sealed segment, read from the on-disk index block.
|
||||
if r.info.IndexStart == 0 {
|
||||
return 0, fmt.Errorf("sealed segment has no index block")
|
||||
}
|
||||
|
||||
if idx < r.info.MinIndex || (r.info.MaxIndex > 0 && idx > r.info.MaxIndex) {
|
||||
return 0, types.ErrNotFound
|
||||
}
|
||||
|
||||
// IndexStart is the offset to the first entry in the index array. We need to
|
||||
// find the byte offset to the Nth entry
|
||||
entryOffset := (idx - r.info.BaseIndex)
|
||||
byteOffset := r.info.IndexStart + (entryOffset * 4)
|
||||
|
||||
var bs [4]byte
|
||||
n, err := r.rf.ReadAt(bs[:], int64(byteOffset))
|
||||
if err == io.EOF && n == 4 {
|
||||
// Read all of it just happened to be at end of file, ignore
|
||||
err = nil
|
||||
}
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to read segment index: %w", err)
|
||||
}
|
||||
offset := binary.LittleEndian.Uint32(bs[:])
|
||||
return offset, nil
|
||||
}
|
599
vendor/github.com/hashicorp/raft-wal/segment/writer.go
generated
vendored
Normal file
599
vendor/github.com/hashicorp/raft-wal/segment/writer.go
generated
vendored
Normal file
@ -0,0 +1,599 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package segment
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"hash/crc32"
|
||||
"io"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/hashicorp/raft-wal/types"
|
||||
)
|
||||
|
||||
// Writer allows appending logs to a segment file as well as reading them back.
|
||||
type Writer struct {
|
||||
// commitIdx is updated after an append batch is fully persisted to disk to
|
||||
// allow readers to read the new value. Note that readers must not read values
|
||||
// larger than this even if they are available in tailIndex as they are not
|
||||
// yet committed to disk!
|
||||
commitIdx uint64
|
||||
|
||||
// offsets is the index offset. The first element corresponds to the
|
||||
// BaseIndex. It is accessed concurrently by readers and the single writer
|
||||
// without locks! This is race-free via the following invariants:
|
||||
// - the slice here is never mutated only copied though it may still refer to
|
||||
// the same backing array.
|
||||
// - readers only ever read up to len(offsets) in the atomically accessed
|
||||
// slice. Those elements of the backing array are immutable and will never
|
||||
// be modified once they are accessible to readers.
|
||||
// - readers and writers synchronize on atomic access to the slice
|
||||
// - serial writer will only append to the end which either mutates the
|
||||
// shared backing array but at an index greater than the len any reader has
|
||||
// seen, or a new backing array is allocated and the old one copied into it
|
||||
// which also will never mutate the entries readers can already "see" via
|
||||
// the old slice.
|
||||
offsets atomic.Value // []uint32
|
||||
|
||||
// writer state is accessed only on the (serial) write path so doesn't need
|
||||
// synchronization.
|
||||
writer struct {
|
||||
// commitBuf stores the pending frames waiting to be flushed to the current
|
||||
// tail block.
|
||||
commitBuf []byte
|
||||
|
||||
// crc is the rolling crc32 Castagnoli sum of all data written since the
|
||||
// last fsync.
|
||||
crc uint32
|
||||
|
||||
// writeOffset is the absolute file offset up to which we've written data to
|
||||
// the file. The contents of commitBuf will be written at this offset when
|
||||
// it commits or we reach the end of the block, whichever happens first.
|
||||
writeOffset uint32
|
||||
|
||||
// indexStart is set when the tail is sealed indicating the file offset at
|
||||
// which the index array was written.
|
||||
indexStart uint64
|
||||
}
|
||||
|
||||
info types.SegmentInfo
|
||||
wf types.WritableFile
|
||||
r types.SegmentReader
|
||||
}
|
||||
|
||||
func createFile(info types.SegmentInfo, wf types.WritableFile, bufPool *sync.Pool) (*Writer, error) {
|
||||
r, err := openReader(info, wf, bufPool)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
w := &Writer{
|
||||
info: info,
|
||||
wf: wf,
|
||||
r: r,
|
||||
}
|
||||
r.tail = w
|
||||
if err := w.initEmpty(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return w, nil
|
||||
}
|
||||
|
||||
func recoverFile(info types.SegmentInfo, wf types.WritableFile, bufPool *sync.Pool) (*Writer, error) {
|
||||
r, err := openReader(info, wf, bufPool)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
w := &Writer{
|
||||
info: info,
|
||||
wf: wf,
|
||||
r: r,
|
||||
}
|
||||
r.tail = w
|
||||
|
||||
if err := w.recoverTail(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return w, nil
|
||||
}
|
||||
|
||||
func (w *Writer) initEmpty() error {
|
||||
// Write header into write buffer to be written out with the first commit.
|
||||
w.writer.writeOffset = 0
|
||||
w.ensureBufCap(fileHeaderLen)
|
||||
w.writer.commitBuf = w.writer.commitBuf[:fileHeaderLen]
|
||||
|
||||
if err := writeFileHeader(w.writer.commitBuf, w.info); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
w.writer.crc = crc32.Checksum(w.writer.commitBuf[:fileHeaderLen], castagnoliTable)
|
||||
|
||||
// Initialize the index
|
||||
offsets := make([]uint32, 0, 32*1024)
|
||||
w.offsets.Store(offsets)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *Writer) recoverTail() error {
|
||||
// We need to track the last two commit frames
|
||||
type commitInfo struct {
|
||||
fh frameHeader
|
||||
offset int64
|
||||
crcStart int64
|
||||
offsetsLen int
|
||||
}
|
||||
var prevCommit, finalCommit *commitInfo
|
||||
|
||||
offsets := make([]uint32, 0, 32*1024)
|
||||
|
||||
readInfo, err := readThroughSegment(w.wf, func(_ types.SegmentInfo, fh frameHeader, offset int64) (bool, error) {
|
||||
switch fh.typ {
|
||||
case FrameEntry:
|
||||
// Record the frame offset
|
||||
offsets = append(offsets, uint32(offset))
|
||||
|
||||
case FrameIndex:
|
||||
// So this segment was sealed! (or attempted) keep track of this
|
||||
// indexStart in case it turns out the Seal actually committed completely.
|
||||
// We store the start of the actual array not the frame header.
|
||||
w.writer.indexStart = uint64(offset) + frameHeaderLen
|
||||
|
||||
case FrameCommit:
|
||||
// The payload is not the length field in this case!
|
||||
prevCommit = finalCommit
|
||||
finalCommit = &commitInfo{
|
||||
fh: fh,
|
||||
offset: offset,
|
||||
crcStart: 0, // First commit includes the file header
|
||||
offsetsLen: len(offsets), // Track how many entries were found up to this commit point.
|
||||
}
|
||||
if prevCommit != nil {
|
||||
finalCommit.crcStart = prevCommit.offset + frameHeaderLen
|
||||
}
|
||||
}
|
||||
return true, nil
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if finalCommit == nil {
|
||||
// There were no commit frames found at all. This segment file is
|
||||
// effectively empty. Init it that way ready for appending. This overwrites
|
||||
// the file header so it doesn't matter if it was valid or not.
|
||||
return w.initEmpty()
|
||||
}
|
||||
|
||||
// Assume that the final commit is good for now and set the writer state
|
||||
w.writer.writeOffset = uint32(finalCommit.offset + frameHeaderLen)
|
||||
|
||||
// Just store what we have for now to ensure the defer doesn't panic we'll
|
||||
// probably update this below.
|
||||
w.offsets.Store(offsets)
|
||||
|
||||
// Whichever path we take, fix up the commitIdx before we leave
|
||||
defer func() {
|
||||
ofs := w.getOffsets()
|
||||
if len(ofs) > 0 {
|
||||
// Non atomic is OK because this file is not visible to any other threads
|
||||
// yet.
|
||||
w.commitIdx = w.info.BaseIndex + uint64(len(ofs)) - 1
|
||||
}
|
||||
}()
|
||||
|
||||
if finalCommit.offsetsLen < len(offsets) {
|
||||
// Some entries were found after the last commit. Those must be a partial
|
||||
// write that was uncommitted so can be ignored. But the fact they were
|
||||
// written at all means that the last commit frame must have been completed
|
||||
// and acknowledged so we don't need to verify anything. Just truncate the
|
||||
// extra entries from index and reset the write cursor to continue appending
|
||||
// after the last commit.
|
||||
offsets = offsets[:finalCommit.offsetsLen]
|
||||
w.offsets.Store(offsets)
|
||||
|
||||
// Since at least one commit was found, the header better be valid!
|
||||
return validateFileHeader(*readInfo, w.info)
|
||||
}
|
||||
|
||||
// Last frame was a commit frame! Let's check that all the data written in
|
||||
// that commit frame made it to disk.
|
||||
// Verify the length first
|
||||
bufLen := finalCommit.offset - finalCommit.crcStart
|
||||
// We know bufLen can't be bigger than the whole segment file because none of
|
||||
// the values above were read from the data just from the offsets we moved
|
||||
// through.
|
||||
batchBuf := make([]byte, bufLen)
|
||||
|
||||
if _, err := w.wf.ReadAt(batchBuf, finalCommit.crcStart); err != nil {
|
||||
return fmt.Errorf("failed to read last committed batch for CRC validation: %w", err)
|
||||
}
|
||||
|
||||
gotCrc := crc32.Checksum(batchBuf, castagnoliTable)
|
||||
if gotCrc == finalCommit.fh.crc {
|
||||
// All is good. We already setup the state we need for writer other than
|
||||
// offsets.
|
||||
w.offsets.Store(offsets)
|
||||
|
||||
// Since at least one commit was found, the header better be valid!
|
||||
return validateFileHeader(*readInfo, w.info)
|
||||
}
|
||||
|
||||
// Last commit was incomplete rewind back to the previous one or start of file
|
||||
if prevCommit == nil {
|
||||
// Init wil re-write the file header so it doesn't matter if it was corrupt
|
||||
// or not!
|
||||
return w.initEmpty()
|
||||
}
|
||||
|
||||
w.writer.writeOffset = uint32(prevCommit.offset + frameHeaderLen)
|
||||
offsets = offsets[:prevCommit.offsetsLen]
|
||||
w.offsets.Store(offsets)
|
||||
|
||||
// Since at least one commit was found, the header better be valid!
|
||||
return validateFileHeader(*readInfo, w.info)
|
||||
}
|
||||
|
||||
// Close implements io.Closer
|
||||
func (w *Writer) Close() error {
|
||||
return w.r.Close()
|
||||
}
|
||||
|
||||
// GetLog implements types.SegmentReader
|
||||
func (w *Writer) GetLog(idx uint64) (*types.PooledBuffer, error) {
|
||||
return w.r.GetLog(idx)
|
||||
}
|
||||
|
||||
// Append adds one or more entries. It must not return until the entries are
|
||||
// durably stored otherwise raft's guarantees will be compromised.
|
||||
func (w *Writer) Append(entries []types.LogEntry) error {
|
||||
if len(entries) < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if w.writer.indexStart > 0 {
|
||||
return types.ErrSealed
|
||||
}
|
||||
|
||||
flushed := false
|
||||
|
||||
// Save any state we may need to rollback.
|
||||
beforeBuf := w.writer.commitBuf
|
||||
beforeCRC := w.writer.crc
|
||||
beforeIndexStart := w.writer.indexStart
|
||||
beforeWriteOffset := w.writer.writeOffset
|
||||
beforeOffsets := w.offsets.Load()
|
||||
|
||||
defer func() {
|
||||
if !flushed {
|
||||
// rollback writer state on error
|
||||
w.writer.commitBuf = beforeBuf
|
||||
w.writer.crc = beforeCRC
|
||||
w.writer.indexStart = beforeIndexStart
|
||||
w.writer.writeOffset = beforeWriteOffset
|
||||
w.offsets.Store(beforeOffsets)
|
||||
}
|
||||
}()
|
||||
|
||||
// Iterate entries and append each one
|
||||
for _, e := range entries {
|
||||
if err := w.appendEntry(e); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
ofs := w.getOffsets()
|
||||
// Work out if we need to seal before we commit and sync.
|
||||
if (w.writer.writeOffset + uint32(len(w.writer.commitBuf)+indexFrameSize(len(ofs)))) > w.info.SizeLimit {
|
||||
// Seal the segment! We seal it by writing an index frame before we commit.
|
||||
if err := w.appendIndex(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Write the commit frame
|
||||
if err := w.appendCommit(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
flushed = true
|
||||
|
||||
// Commit in-memory
|
||||
atomic.StoreUint64(&w.commitIdx, entries[len(entries)-1].Index)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *Writer) getOffsets() []uint32 {
|
||||
return w.offsets.Load().([]uint32)
|
||||
}
|
||||
|
||||
// OffsetForFrame implements tailWriter and allows readers to lookup entry
|
||||
// frames in the tail's in-memory index.
|
||||
func (w *Writer) OffsetForFrame(idx uint64) (uint32, error) {
|
||||
if idx < w.info.BaseIndex || idx < w.info.MinIndex || idx > w.LastIndex() {
|
||||
return 0, types.ErrNotFound
|
||||
}
|
||||
os := w.getOffsets()
|
||||
entryIndex := idx - w.info.BaseIndex
|
||||
// No bounds check on entryIndex since LastIndex must ensure it's in bounds.
|
||||
return os[entryIndex], nil
|
||||
}
|
||||
|
||||
func (w *Writer) appendEntry(e types.LogEntry) error {
|
||||
offsets := w.getOffsets()
|
||||
|
||||
// Check the invariant that this entry is the next one we expect otherwise our
|
||||
// index logic is incorrect and will result in panics on read.
|
||||
if e.Index != w.info.BaseIndex+uint64(len(offsets)) {
|
||||
return fmt.Errorf("non-monotonic append to segment with BaseIndex=%d. Entry index %d, expected %d",
|
||||
w.info.BaseIndex, e.Index, w.info.BaseIndex+uint64(len(offsets)))
|
||||
}
|
||||
|
||||
fh := frameHeader{
|
||||
typ: FrameEntry,
|
||||
len: uint32(len(e.Data)),
|
||||
}
|
||||
bufOffset, err := w.appendFrame(fh, e.Data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Update the offsets index
|
||||
|
||||
// Add the index entry. Note this is safe despite mutating the same backing
|
||||
// array as tail because it's beyond the limit current readers will access
|
||||
// until we do the atomic update below. Even if append re-allocates the
|
||||
// backing array, it will only read the indexes smaller than numEntries from
|
||||
// the old array to copy them into the new one and we are not mutating the
|
||||
// same memory locations. Old readers might still be looking at the old
|
||||
// array (lower than numEntries) through the current tail.offsets slice but
|
||||
// we are not touching that at least below numEntries.
|
||||
offsets = append(offsets, w.writer.writeOffset+uint32(bufOffset))
|
||||
|
||||
// Now we can make it available to readers. Note that readers still
|
||||
// shouldn't read it until we actually commit to disk (and increment
|
||||
// commitIdx) but it's race free for them to now!
|
||||
w.offsets.Store(offsets)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *Writer) appendCommit() error {
|
||||
fh := frameHeader{
|
||||
typ: FrameCommit,
|
||||
crc: w.writer.crc,
|
||||
}
|
||||
if _, err := w.appendFrame(fh, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Flush all writes to the file
|
||||
if err := w.sync(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Finally, reset crc so that by the time we write the next trailer
|
||||
// we'll know where the append batch started.
|
||||
w.writer.crc = 0
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *Writer) ensureBufCap(extraLen int) {
|
||||
needCap := len(w.writer.commitBuf) + extraLen
|
||||
if cap(w.writer.commitBuf) < needCap {
|
||||
newSize := minBufSize
|
||||
// Double buffer size until it's big enough to amortize cost
|
||||
for newSize < needCap {
|
||||
newSize = newSize * 2
|
||||
}
|
||||
newBuf := make([]byte, newSize)
|
||||
oldLen := len(w.writer.commitBuf)
|
||||
copy(newBuf, w.writer.commitBuf)
|
||||
w.writer.commitBuf = newBuf[:oldLen]
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Writer) appendIndex() error {
|
||||
// Append the index record before we commit (commit and flush happen later
|
||||
// generally)
|
||||
offsets := w.getOffsets()
|
||||
l := indexFrameSize(len(offsets))
|
||||
w.ensureBufCap(l)
|
||||
|
||||
startOff := len(w.writer.commitBuf)
|
||||
|
||||
if err := writeIndexFrame(w.writer.commitBuf[startOff:startOff+l], offsets); err != nil {
|
||||
return err
|
||||
}
|
||||
w.writer.commitBuf = w.writer.commitBuf[:startOff+l]
|
||||
|
||||
// Update crc with those values
|
||||
w.writer.crc = crc32.Update(w.writer.crc, castagnoliTable, w.writer.commitBuf[startOff:startOff+l])
|
||||
|
||||
// Record the file offset where the index starts (the actual index data so
|
||||
// after the frame header).
|
||||
w.writer.indexStart = uint64(w.writer.writeOffset) + uint64(startOff+frameHeaderLen)
|
||||
return nil
|
||||
}
|
||||
|
||||
// appendFrame appends the given frame to the current block. The frame must fit
|
||||
// already otherwise an error will be returned.
|
||||
func (w *Writer) appendFrame(fh frameHeader, data []byte) (int, error) {
|
||||
// Encode frame header into current block buffer
|
||||
l := encodedFrameSize(len(data))
|
||||
w.ensureBufCap(l)
|
||||
|
||||
bufOffset := len(w.writer.commitBuf)
|
||||
if err := writeFrame(w.writer.commitBuf[bufOffset:bufOffset+l], fh, data); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
// Update len of commitBuf since we resliced it for the write
|
||||
w.writer.commitBuf = w.writer.commitBuf[:bufOffset+l]
|
||||
|
||||
// Update the CRC
|
||||
w.writer.crc = crc32.Update(w.writer.crc, castagnoliTable, w.writer.commitBuf[bufOffset:bufOffset+l])
|
||||
return bufOffset, nil
|
||||
}
|
||||
|
||||
func (w *Writer) flush() error {
|
||||
// Write to file
|
||||
n, err := w.wf.WriteAt(w.writer.commitBuf, int64(w.writer.writeOffset))
|
||||
if err == io.EOF && n == len(w.writer.commitBuf) {
|
||||
// Writer may return EOF even if it wrote all bytes if it wrote right up to
|
||||
// the end of the file. Ignore that case though.
|
||||
err = nil
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Reset writer state ready for next writes
|
||||
w.writer.writeOffset += uint32(len(w.writer.commitBuf))
|
||||
w.writer.commitBuf = w.writer.commitBuf[:0]
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *Writer) sync() error {
|
||||
// Write out current buffer to file
|
||||
if err := w.flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Sync file
|
||||
if err := w.wf.Sync(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Update commitIdx atomically
|
||||
offsets := w.getOffsets()
|
||||
commitIdx := uint64(0)
|
||||
if len(offsets) > 0 {
|
||||
// Probably not possible for the to be less, but just in case we ever flush
|
||||
// the file with only meta data written...
|
||||
commitIdx = uint64(w.info.BaseIndex) + uint64(len(offsets)) - 1
|
||||
}
|
||||
atomic.StoreUint64(&w.commitIdx, commitIdx)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sealed returns whether the segment is sealed or not. If it is it returns
|
||||
// true and the file offset that it's index array starts at to be saved in
|
||||
// meta data. WAL will call this after every append so it should be relatively
|
||||
// cheap in the common case. This design allows the final Append to write out
|
||||
// the index or any additional data needed at seal time in the same fsync.
|
||||
func (w *Writer) Sealed() (bool, uint64, error) {
|
||||
if w.writer.indexStart == 0 {
|
||||
return false, 0, nil
|
||||
}
|
||||
return true, w.writer.indexStart, nil
|
||||
}
|
||||
|
||||
// ForceSeal forces us to seal the segment by writing out an index block
|
||||
// wherever we got to in the file. After calling this it is no longer valid to
|
||||
// call Append on this file.
|
||||
func (w *Writer) ForceSeal() (uint64, error) {
|
||||
if w.writer.indexStart > 0 {
|
||||
// Already sealed, this is a no-op.
|
||||
return w.writer.indexStart, nil
|
||||
}
|
||||
|
||||
// Seal the segment! We seal it by writing an index frame before we commit.
|
||||
if err := w.appendIndex(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Write the commit frame
|
||||
if err := w.appendCommit(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return w.writer.indexStart, nil
|
||||
}
|
||||
|
||||
// LastIndex returns the most recently persisted index in the log. It must
|
||||
// respond without blocking on append since it's needed frequently by read
|
||||
// paths that may call it concurrently. Typically this will be loaded from an
|
||||
// atomic int. If the segment is empty lastIndex should return zero.
|
||||
func (w *Writer) LastIndex() uint64 {
|
||||
return atomic.LoadUint64(&w.commitIdx)
|
||||
}
|
||||
|
||||
func readThroughSegment(r types.ReadableFile, fn func(info types.SegmentInfo, fh frameHeader, offset int64) (bool, error)) (*types.SegmentInfo, error) {
|
||||
// First read the file header. Note we wrote it as part of the first commit so
|
||||
// it may be missing or partial written and that's OK as long as there are no
|
||||
// other later commit frames!
|
||||
var fh [fileHeaderLen]byte
|
||||
_, err := r.ReadAt(fh[:], 0)
|
||||
// EOF is ok - the file might be empty if we crashed before committing
|
||||
// anything and preallocation isn't supported.
|
||||
if err != io.EOF && err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
readInfo, err := readFileHeader(fh[:])
|
||||
if err == types.ErrCorrupt {
|
||||
// Header is malformed or missing, don't error yet though we'll detect it
|
||||
// later when we know if it's a problem or not.
|
||||
err = nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// If header wasn't detected as corrupt, it might still be just in a way
|
||||
// that's valid since we've not verified it against the expected metadata yet.
|
||||
// We'll wait to see if the header was part of the last commit before decide
|
||||
// if we should validate it for corruption or not though. For now just make
|
||||
// sure it's not nil so we don't have to handle nil checks everywhere.
|
||||
if readInfo == nil {
|
||||
// Zero info will fail validation against the actual metadata if it was
|
||||
// corrupt when it shouldn't be later. Just prevents a nil panic.
|
||||
readInfo = &types.SegmentInfo{}
|
||||
}
|
||||
|
||||
// Read through file from after header until we hit zeros, EOF or corrupt
|
||||
// frames.
|
||||
offset := int64(fileHeaderLen)
|
||||
var buf [frameHeaderLen]byte
|
||||
|
||||
for {
|
||||
n, err := r.ReadAt(buf[:], offset)
|
||||
if err == io.EOF {
|
||||
if n < frameHeaderLen {
|
||||
return readInfo, nil
|
||||
}
|
||||
// This is OK! The last frame in file might be a commit frame so as long
|
||||
// as we have it all then we can ignore the EOF for this iteration.
|
||||
err = nil
|
||||
}
|
||||
if err != nil {
|
||||
return readInfo, fmt.Errorf("failed reading frame at offset=%d: %w", offset, err)
|
||||
}
|
||||
fh, err := readFrameHeader(buf[:frameHeaderLen])
|
||||
if err != nil {
|
||||
// This is not actually an error case. If we failed to decode it could be
|
||||
// because of a torn write (since we don't assume writes are atomic). We
|
||||
// assume that previously committed data is not silently corrupted by the
|
||||
// FS (see README for details). So this must be due to corruption that
|
||||
// happened due to non-atomic sector updates whilst committing the last
|
||||
// write batch.
|
||||
return readInfo, nil
|
||||
}
|
||||
if fh.typ == FrameInvalid {
|
||||
// This means we've hit zeros at the end of the file (or due to an
|
||||
// incomplete write, which we treat the same way).
|
||||
return readInfo, nil
|
||||
}
|
||||
|
||||
// Call the callback
|
||||
shouldContinue, err := fn(*readInfo, fh, offset)
|
||||
if err != nil {
|
||||
return readInfo, err
|
||||
}
|
||||
if !shouldContinue {
|
||||
return readInfo, nil
|
||||
}
|
||||
|
||||
// Skip to next frame
|
||||
offset += int64(encodedFrameSize(int(fh.len)))
|
||||
}
|
||||
}
|
215
vendor/github.com/hashicorp/raft-wal/state.go
generated
vendored
Normal file
215
vendor/github.com/hashicorp/raft-wal/state.go
generated
vendored
Normal file
@ -0,0 +1,215 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package wal
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/benbjohnson/immutable"
|
||||
"github.com/hashicorp/raft-wal/types"
|
||||
)
|
||||
|
||||
// state is an immutable snapshot of the state of the log. Modifications must be
|
||||
// made by copying and modifying the copy. This is easy enough because segments
|
||||
// is an immutable map so changing and re-assigning to the clone won't impact
|
||||
// the original map, and tail is just a pointer that can be mutated in the
|
||||
// shallow clone. Note that methods called on the tail segmentWriter may mutate
|
||||
// it's state so must only be called while holding the WAL's writeLock.
|
||||
type state struct {
|
||||
// refCount tracks readers that are reading segments based on this metadata.
|
||||
// It is accessed atomically nd must be 64 bit aligned (i.e. leave it at the
|
||||
// start of the struct).
|
||||
refCount int32
|
||||
// finaliser is set at most once while WAL is holding the write lock in order
|
||||
// to provide a func that must be called when all current readers are done
|
||||
// with this state. It's used for deferring closing and deleting old segments
|
||||
// until we can be sure no reads are still in progress on them.
|
||||
finalizer atomic.Value // func()
|
||||
|
||||
nextSegmentID uint64
|
||||
|
||||
// nextBaseIndex is used to signal which baseIndex to use next if there are no
|
||||
// segments or current tail.
|
||||
nextBaseIndex uint64
|
||||
segments *immutable.SortedMap[uint64, segmentState]
|
||||
tail types.SegmentWriter
|
||||
}
|
||||
|
||||
type segmentState struct {
|
||||
types.SegmentInfo
|
||||
|
||||
// r is the SegmentReader for our in-memory state.
|
||||
r types.SegmentReader
|
||||
}
|
||||
|
||||
// Commit converts the in-memory state into a PersistentState.
|
||||
func (s *state) Persistent() types.PersistentState {
|
||||
segs := make([]types.SegmentInfo, 0, s.segments.Len())
|
||||
it := s.segments.Iterator()
|
||||
for !it.Done() {
|
||||
_, s, _ := it.Next()
|
||||
segs = append(segs, s.SegmentInfo)
|
||||
}
|
||||
return types.PersistentState{
|
||||
NextSegmentID: s.nextSegmentID,
|
||||
Segments: segs,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *state) getLog(index uint64) (*types.PooledBuffer, error) {
|
||||
// Check the tail writer first
|
||||
if s.tail != nil {
|
||||
raw, err := s.tail.GetLog(index)
|
||||
if err != nil && err != ErrNotFound {
|
||||
// Return actual errors since they might mask the fact that index really
|
||||
// is in the tail but failed to read for some other reason.
|
||||
return nil, err
|
||||
}
|
||||
if err == nil {
|
||||
// No error means we found it and just need to decode.
|
||||
return raw, nil
|
||||
}
|
||||
// Not in the tail segment, fall back to searching previous segments.
|
||||
}
|
||||
|
||||
seg, err := s.findSegmentReader(index)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return seg.GetLog(index)
|
||||
}
|
||||
|
||||
// findSegmentReader searches the segment tree for the segment that contains the
|
||||
// log at index idx. It may return the tail segment which may not in fact
|
||||
// contain idx if idx is larger than the last written index. Typically this is
|
||||
// called after already checking with the tail writer whether the log is in
|
||||
// there which means the caller can be sure it's not going to return the tail
|
||||
// segment.
|
||||
func (s *state) findSegmentReader(idx uint64) (types.SegmentReader, error) {
|
||||
|
||||
if s.segments.Len() == 0 {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
|
||||
// Search for a segment with baseIndex.
|
||||
it := s.segments.Iterator()
|
||||
|
||||
// The baseIndex we want is the first one lower or equal to idx. Seek gets us
|
||||
// to the first result equal or greater so we are either at it (if equal) or
|
||||
// on the one _after_ the one we need. We step back since that's most likely
|
||||
it.Seek(idx)
|
||||
// The first call to Next/Prev actually returns the node the iterator is
|
||||
// currently on (which is probably the one after the one we want) but in some
|
||||
// edge cases we might actually want this one. Rather than reversing back and
|
||||
// coming forward again, just check both this and the one before it.
|
||||
_, seg, ok := it.Prev()
|
||||
if ok && seg.BaseIndex > idx {
|
||||
_, seg, ok = it.Prev()
|
||||
}
|
||||
|
||||
// We either have the right segment or it doesn't exist.
|
||||
if ok && seg.MinIndex <= idx && (seg.MaxIndex == 0 || seg.MaxIndex >= idx) {
|
||||
return seg.r, nil
|
||||
}
|
||||
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
|
||||
func (s *state) getTailInfo() *segmentState {
|
||||
it := s.segments.Iterator()
|
||||
it.Last()
|
||||
_, tail, ok := it.Next()
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return &tail
|
||||
}
|
||||
|
||||
func (s *state) append(entries []types.LogEntry) error {
|
||||
return s.tail.Append(entries)
|
||||
}
|
||||
|
||||
func (s *state) firstIndex() uint64 {
|
||||
it := s.segments.Iterator()
|
||||
_, seg, ok := it.Next()
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
if seg.SealTime.IsZero() {
|
||||
// First segment is unsealed so is also the tail. Check it actually has at
|
||||
// least one log in otherwise it doesn't matter what the BaseIndex/MinIndex
|
||||
// are.
|
||||
if s.tail.LastIndex() == 0 {
|
||||
// No logs in the WAL
|
||||
return 0
|
||||
}
|
||||
// At least one log exists, return the MinIndex
|
||||
}
|
||||
return seg.MinIndex
|
||||
}
|
||||
|
||||
func (s *state) lastIndex() uint64 {
|
||||
tailIdx := s.tail.LastIndex()
|
||||
if tailIdx > 0 {
|
||||
return tailIdx
|
||||
}
|
||||
// Current tail is empty. Check there are previous sealed segments.
|
||||
it := s.segments.Iterator()
|
||||
it.Last()
|
||||
_, _, ok := it.Prev()
|
||||
if !ok {
|
||||
// No tail! shouldn't be possible but means no logs yet
|
||||
return 0
|
||||
}
|
||||
// Go back to the segment before the tail
|
||||
_, _, ok = it.Prev()
|
||||
if !ok {
|
||||
// No previous segment so the whole log is empty
|
||||
return 0
|
||||
}
|
||||
|
||||
// There was a previous segment so it's MaxIndex will be one less than the
|
||||
// tail's BaseIndex.
|
||||
tailSeg := s.getTailInfo()
|
||||
if tailSeg == nil || tailSeg.BaseIndex == 0 {
|
||||
return 0
|
||||
}
|
||||
return tailSeg.BaseIndex - 1
|
||||
}
|
||||
|
||||
func (s *state) acquire() func() {
|
||||
atomic.AddInt32(&s.refCount, 1)
|
||||
return s.release
|
||||
}
|
||||
|
||||
func (s *state) release() {
|
||||
// decrement on release
|
||||
new := atomic.AddInt32(&s.refCount, -1)
|
||||
if new == 0 {
|
||||
// Cleanup state associated with this version now all refs have gone. Since
|
||||
// there are no more refs and we should not set a finalizer until this state
|
||||
// is no longer the active state, we can be sure this will happen only one.
|
||||
// Even still lets swap the fn to ensure we only call finalizer once ever!
|
||||
// We can't swap actual nil as it's not the same type as func() so do a
|
||||
// dance with a nilFn below.
|
||||
var nilFn func()
|
||||
fnRaw := s.finalizer.Swap(nilFn)
|
||||
if fn, ok := fnRaw.(func()); ok && fn != nil {
|
||||
fn()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// clone returns a new state which is a shallow copy of just the immutable parts
|
||||
// of s. This is safer than a simple assignment copy because that "reads" the
|
||||
// atomically modified state non-atomically. We never want to copy the refCount
|
||||
// or finalizer anyway.
|
||||
func (s *state) clone() state {
|
||||
return state{
|
||||
nextSegmentID: s.nextSegmentID,
|
||||
segments: s.segments,
|
||||
tail: s.tail,
|
||||
}
|
||||
}
|
21
vendor/github.com/hashicorp/raft-wal/types/buffer.go
generated
vendored
Normal file
21
vendor/github.com/hashicorp/raft-wal/types/buffer.go
generated
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package types
|
||||
|
||||
// PooledBuffer is a wrapper that allows WAL to return read buffers to segment
|
||||
// implementations when we're done decoding.
|
||||
type PooledBuffer struct {
|
||||
Bs []byte
|
||||
CloseFn func()
|
||||
}
|
||||
|
||||
// Close implements io.Closer and returns the buffer to the pool. It should be
|
||||
// called exactly once for each buffer when it's no longer needed. It's no
|
||||
// longer safe to access Bs or any slice taken from it after the call.
|
||||
func (b *PooledBuffer) Close() error {
|
||||
if b.CloseFn != nil {
|
||||
b.CloseFn()
|
||||
}
|
||||
return nil
|
||||
}
|
41
vendor/github.com/hashicorp/raft-wal/types/meta.go
generated
vendored
Normal file
41
vendor/github.com/hashicorp/raft-wal/types/meta.go
generated
vendored
Normal file
@ -0,0 +1,41 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package types
|
||||
|
||||
import "io"
|
||||
|
||||
// MetaStore is the interface we need to some persistent, crash safe backend. We
|
||||
// implement it with BoltDB for real usage but the interface allows alternatives
|
||||
// to be used, or tests to mock out FS access.
|
||||
type MetaStore interface {
|
||||
// Load loads the existing persisted state. If there is no existing state
|
||||
// implementations are expected to create initialize new storage and return an
|
||||
// empty state.
|
||||
Load(dir string) (PersistentState, error)
|
||||
|
||||
// CommitState must atomically replace all persisted metadata in the current
|
||||
// store with the set provided. It must not return until the data is persisted
|
||||
// durably and in a crash-safe way otherwise the guarantees of the WAL will be
|
||||
// compromised. The WAL will only ever call this in a single thread at one
|
||||
// time and it will never be called concurrently with Load however it may be
|
||||
// called concurrently with Get/SetStable operations.
|
||||
CommitState(PersistentState) error
|
||||
|
||||
// GetStable returns a value from stable store or nil if it doesn't exist. May
|
||||
// be called concurrently by multiple threads.
|
||||
GetStable(key []byte) ([]byte, error)
|
||||
|
||||
// SetStable stores a value from stable store. May be called concurrently with
|
||||
// GetStable.
|
||||
SetStable(key, value []byte) error
|
||||
|
||||
io.Closer
|
||||
}
|
||||
|
||||
// PersistentState represents the WAL file metadata we need to store reliably to
|
||||
// recover on restart.
|
||||
type PersistentState struct {
|
||||
NextSegmentID uint64
|
||||
Segments []SegmentInfo
|
||||
}
|
150
vendor/github.com/hashicorp/raft-wal/types/segment.go
generated
vendored
Normal file
150
vendor/github.com/hashicorp/raft-wal/types/segment.go
generated
vendored
Normal file
@ -0,0 +1,150 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package types
|
||||
|
||||
import (
|
||||
"io"
|
||||
"time"
|
||||
)
|
||||
|
||||
// SegmentInfo is the metadata describing a single WAL segment.
|
||||
type SegmentInfo struct {
|
||||
// ID uniquely identifies this segment file
|
||||
ID uint64
|
||||
|
||||
// BaseIndex is the raft index of the first entry that will be written to the
|
||||
// segment.
|
||||
BaseIndex uint64
|
||||
|
||||
// MinIndex is the logical lowest index that still exists in the segment. It
|
||||
// may be greater than BaseIndex if a head truncation has "deleted" a prefix
|
||||
// of the segment.
|
||||
MinIndex uint64
|
||||
|
||||
// MaxIndex is the logical highest index that still exists in the segment. It
|
||||
// may be lower than the actual highest index if a tail truncation has
|
||||
// "deleted" a suffix of the segment. It is zero for unsealed segments and
|
||||
// only set one seal.
|
||||
MaxIndex uint64
|
||||
|
||||
// Codec identifies the codec used to encode log entries. Codec values 0 to
|
||||
// 16k (i.e. the lower 16 bits) are reserved for internal future usage. Custom
|
||||
// codecs must be registered with an identifier higher than this which the
|
||||
// caller is responsible for ensuring uniquely identifies the specific version
|
||||
// of their codec used in any given log. uint64 provides sufficient space that
|
||||
// a randomly generated identifier is almost certainly unique.
|
||||
Codec uint64
|
||||
|
||||
// IndexStart is the file offset where the index can be read from it's 0 for
|
||||
// tail segments and only set after a segment is sealed.
|
||||
IndexStart uint64
|
||||
|
||||
// CreateTime records when the segment was first created.
|
||||
CreateTime time.Time
|
||||
|
||||
// SealTime records when the segment was sealed. Zero indicates that it's not
|
||||
// sealed yet.
|
||||
SealTime time.Time
|
||||
|
||||
// SizeLimit is the soft limit for the segment's size. The segment file may be
|
||||
// pre-allocated to this size on filesystems that support it. It is a soft
|
||||
// limit in the sense that the final Append usually takes the segment file
|
||||
// past this size before it is considered full and sealed.
|
||||
SizeLimit uint32
|
||||
}
|
||||
|
||||
// SegmentFiler is the interface that provides access to segments to the WAL. It
|
||||
// encapsulated creating, and recovering segments and returning reader or writer
|
||||
// interfaces to interact with them. It's main purpose is to abstract the core
|
||||
// WAL logic both from the actual encoding layer of segment files. You can think
|
||||
// of it as a layer of abstraction above the VFS which abstracts actual file
|
||||
// system operations on files but knows nothing about the format. In tests for
|
||||
// example we can implement a SegmentFiler that is way simpler than the real
|
||||
// encoding/decoding layer on top of a VFS - even an in-memory VFS which makes
|
||||
// tests much simpler to write and run.
|
||||
type SegmentFiler interface {
|
||||
// Create adds a new segment with the given info and returns a writer or an
|
||||
// error.
|
||||
Create(info SegmentInfo) (SegmentWriter, error)
|
||||
|
||||
// RecoverTail is called on an unsealed segment when re-opening the WAL it
|
||||
// will attempt to recover from a possible crash. It will either return an
|
||||
// error, or return a valid segmentWriter that is ready for further appends.
|
||||
// If the expected tail segment doesn't exist it must return an error wrapping
|
||||
// os.ErrNotExist.
|
||||
RecoverTail(info SegmentInfo) (SegmentWriter, error)
|
||||
|
||||
// Open an already sealed segment for reading. Open may validate the file's
|
||||
// header and return an error if it doesn't match the expected info.
|
||||
Open(info SegmentInfo) (SegmentReader, error)
|
||||
|
||||
// List returns the set of segment IDs currently stored. It's used by the WAL
|
||||
// on recovery to find any segment files that need to be deleted following a
|
||||
// unclean shutdown. The returned map is a map of ID -> BaseIndex. BaseIndex
|
||||
// is returned to allow subsequent Delete calls to be made.
|
||||
List() (map[uint64]uint64, error)
|
||||
|
||||
// Delete removes the segment with given baseIndex and id if it exists. Note
|
||||
// that baseIndex is technically redundant since ID is unique on it's own. But
|
||||
// in practice we name files (or keys) with both so that they sort correctly.
|
||||
// This interface allows a simpler implementation where we can just delete
|
||||
// the file if it exists without having to scan the underlying storage for a.
|
||||
Delete(baseIndex, ID uint64) error
|
||||
}
|
||||
|
||||
// SegmentWriter manages appending logs to the tail segment of the WAL. It's an
|
||||
// interface to make testing core WAL simpler. Every SegmentWriter will have
|
||||
// either `init` or `recover` called once before any other methods. When either
|
||||
// returns it must either return an error or be ready to accept new writes and
|
||||
// reads.
|
||||
type SegmentWriter interface {
|
||||
io.Closer
|
||||
SegmentReader
|
||||
|
||||
// Append adds one or more entries. It must not return until the entries are
|
||||
// durably stored otherwise raft's guarantees will be compromised. Append must
|
||||
// not be called concurrently with any other call to Sealed, Append or
|
||||
// ForceSeal.
|
||||
Append(entries []LogEntry) error
|
||||
|
||||
// Sealed returns whether the segment is sealed or not. If it is it returns
|
||||
// true and the file offset that it's index array starts at to be saved in
|
||||
// meta data. WAL will call this after every append so it should be relatively
|
||||
// cheap in the common case. This design allows the final Append to write out
|
||||
// the index or any additional data needed at seal time in the same fsync.
|
||||
// Sealed must not be called concurrently with any other call to Sealed,
|
||||
// Append or ForceSeal.
|
||||
Sealed() (bool, uint64, error)
|
||||
|
||||
// ForceSeal causes the segment to become sealed by writing out an index
|
||||
// block. This is not used in the typical flow of append and rotation, but is
|
||||
// necessary during truncations where some suffix of the writer needs to be
|
||||
// truncated. Rather than manipulate what is on disk in a complex way, the WAL
|
||||
// will simply force seal it with whatever state it has already saved and then
|
||||
// open a new segment at the right offset for continued writing. ForceSeal may
|
||||
// be called on a segment that has already been sealed and should just return
|
||||
// the existing index offset in that case. (We don't actually rely on that
|
||||
// currently but it's easier not to assume we'll always call it at most once).
|
||||
// ForceSeal must not be called concurrently with any other call to Sealed,
|
||||
// Append or ForceSeal.
|
||||
ForceSeal() (uint64, error)
|
||||
|
||||
// LastIndex returns the most recently persisted index in the log. It must
|
||||
// respond without blocking on Append since it's needed frequently by read
|
||||
// paths that may call it concurrently. Typically this will be loaded from an
|
||||
// atomic int. If the segment is empty lastIndex should return zero.
|
||||
LastIndex() uint64
|
||||
}
|
||||
|
||||
// SegmentReader wraps a ReadableFile to allow lookup of logs in an existing
|
||||
// segment file. It's an interface to make testing core WAL simpler. The first
|
||||
// call will always be validate which passes in the ReaderAt to be used for
|
||||
// subsequent reads.
|
||||
type SegmentReader interface {
|
||||
io.Closer
|
||||
|
||||
// GetLog returns the raw log entry bytes associated with idx. If the log
|
||||
// doesn't exist in this segment ErrNotFound must be returned.
|
||||
GetLog(idx uint64) (*PooledBuffer, error)
|
||||
}
|
27
vendor/github.com/hashicorp/raft-wal/types/types.go
generated
vendored
Normal file
27
vendor/github.com/hashicorp/raft-wal/types/types.go
generated
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package types
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/hashicorp/raft"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrNotFound is our own version of raft's not found error. It's important
|
||||
// it's exactly the same because the raft lib checks for equality with it's
|
||||
// own type as a crucial part of replication processing (detecting end of logs
|
||||
// and that a snapshot is needed for a follower).
|
||||
ErrNotFound = raft.ErrLogNotFound
|
||||
ErrCorrupt = errors.New("WAL is corrupt")
|
||||
ErrSealed = errors.New("segment is sealed")
|
||||
ErrClosed = errors.New("closed")
|
||||
)
|
||||
|
||||
// LogEntry represents an entry that has already been encoded.
|
||||
type LogEntry struct {
|
||||
Index uint64
|
||||
Data []byte
|
||||
}
|
59
vendor/github.com/hashicorp/raft-wal/types/vfs.go
generated
vendored
Normal file
59
vendor/github.com/hashicorp/raft-wal/types/vfs.go
generated
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package types
|
||||
|
||||
import "io"
|
||||
|
||||
// VFS is the interface WAL needs to interact with the file system. In
|
||||
// production it would normally be implemented by RealFS which interacts with
|
||||
// the operating system FS using standard go os package. It's useful to allow
|
||||
// testing both to run quicker (by being in memory only) and to make it easy to
|
||||
// simulate all kinds of disk errors and failure modes without needing a more
|
||||
// elaborate external test harness like ALICE.
|
||||
type VFS interface {
|
||||
// ListDir returns a list of all files in the specified dir in lexicographical
|
||||
// order. If the dir doesn't exist, it must return an error. Empty array with
|
||||
// nil error is assumed to mean that the directory exists and was readable,
|
||||
// but contains no files.
|
||||
ListDir(dir string) ([]string, error)
|
||||
|
||||
// Create creates a new file with the given name. If a file with the same name
|
||||
// already exists an error is returned. If a non-zero size is given,
|
||||
// implementations should make a best effort to pre-allocate the file to be
|
||||
// that size. The dir must already exist and be writable to the current
|
||||
// process.
|
||||
Create(dir, name string, size uint64) (WritableFile, error)
|
||||
|
||||
// Delete indicates the file is no longer required. Typically it should be
|
||||
// deleted from the underlying system to free disk space.
|
||||
Delete(dir, name string) error
|
||||
|
||||
// OpenReader opens an existing file in read-only mode. If the file doesn't
|
||||
// exist or permission is denied, an error is returned, otherwise no checks
|
||||
// are made about the well-formedness of the file, it may be empty, the wrong
|
||||
// size or corrupt in arbitrary ways.
|
||||
OpenReader(dir, name string) (ReadableFile, error)
|
||||
|
||||
// OpenWriter opens a file in read-write mode. If the file doesn't exist or
|
||||
// permission is denied, an error is returned, otherwise no checks are made
|
||||
// about the well-formedness of the file, it may be empty, the wrong size or
|
||||
// corrupt in arbitrary ways.
|
||||
OpenWriter(dir, name string) (WritableFile, error)
|
||||
}
|
||||
|
||||
// WritableFile provides random read-write access to a file as well as the
|
||||
// ability to fsync it to disk.
|
||||
type WritableFile interface {
|
||||
io.WriterAt
|
||||
io.ReaderAt
|
||||
io.Closer
|
||||
|
||||
Sync() error
|
||||
}
|
||||
|
||||
// ReadableFile provides random read access to a file.
|
||||
type ReadableFile interface {
|
||||
io.ReaderAt
|
||||
io.Closer
|
||||
}
|
957
vendor/github.com/hashicorp/raft-wal/wal.go
generated
vendored
Normal file
957
vendor/github.com/hashicorp/raft-wal/wal.go
generated
vendored
Normal file
@ -0,0 +1,957 @@
|
||||
// Copyright (c) HashiCorp, Inc
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package wal
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/benbjohnson/immutable"
|
||||
|
||||
"github.com/hashicorp/go-hclog"
|
||||
"github.com/hashicorp/raft"
|
||||
"github.com/hashicorp/raft-wal/metrics"
|
||||
"github.com/hashicorp/raft-wal/types"
|
||||
)
|
||||
|
||||
var (
|
||||
_ raft.LogStore = &WAL{}
|
||||
_ raft.StableStore = &WAL{}
|
||||
|
||||
ErrNotFound = types.ErrNotFound
|
||||
ErrCorrupt = types.ErrCorrupt
|
||||
ErrSealed = types.ErrSealed
|
||||
ErrClosed = types.ErrClosed
|
||||
|
||||
DefaultSegmentSize = 64 * 1024 * 1024
|
||||
)
|
||||
|
||||
var (
|
||||
_ raft.LogStore = &WAL{}
|
||||
_ raft.MonotonicLogStore = &WAL{}
|
||||
_ raft.StableStore = &WAL{}
|
||||
)
|
||||
|
||||
// WAL is a write-ahead log suitable for github.com/hashicorp/raft.
|
||||
type WAL struct {
|
||||
closed uint32 // atomically accessed to keep it first in struct for alignment.
|
||||
|
||||
dir string
|
||||
codec Codec
|
||||
sf types.SegmentFiler
|
||||
metaDB types.MetaStore
|
||||
metrics metrics.Collector
|
||||
log hclog.Logger
|
||||
segmentSize int
|
||||
|
||||
// s is the current state of the WAL files. It is an immutable snapshot that
|
||||
// can be accessed without a lock when reading. We only support a single
|
||||
// writer so all methods that mutate either the WAL state or append to the
|
||||
// tail of the log must hold the writeMu until they complete all changes.
|
||||
s atomic.Value // *state
|
||||
|
||||
// writeMu must be held when modifying s or while appending to the tail.
|
||||
// Although we take care never to let readers block writer, we still only
|
||||
// allow a single writer to be updating the meta state at once. The mutex must
|
||||
// be held before s is loaded until all modifications to s or appends to the
|
||||
// tail are complete.
|
||||
writeMu sync.Mutex
|
||||
|
||||
// These chans are used to hand off serial execution for segment rotation to a
|
||||
// background goroutine so that StoreLogs can return and allow the caller to
|
||||
// get on with other work while we mess with files. The next call to StoreLogs
|
||||
// needs to wait until the background work is done though since the current
|
||||
// log is sealed.
|
||||
//
|
||||
// At the end of StoreLogs, if the segment was sealed, still holding writeMu
|
||||
// we make awaitRotate so it's non-nil, then send the indexStart on
|
||||
// triggerRotate which is 1-buffered. We then drop the lock and return to
|
||||
// caller. The rotation goroutine reads from triggerRotate in a loop, takes
|
||||
// the write lock performs rotation and then closes awaitRotate and sets it to
|
||||
// nil before releasing the lock. The next StoreLogs call takes the lock,
|
||||
// checks if awaitRotate. If it is nil there is no rotation going on so
|
||||
// StoreLogs can proceed. If it is non-nil, it releases the lock and then
|
||||
// waits on the close before acquiring the lock and continuing.
|
||||
triggerRotate chan uint64
|
||||
awaitRotate chan struct{}
|
||||
}
|
||||
|
||||
type walOpt func(*WAL)
|
||||
|
||||
// Open attempts to open the WAL stored in dir. If there are no existing WAL
|
||||
// files a new WAL will be initialized there. The dir must already exist and be
|
||||
// readable and writable to the current process. If existing files are found,
|
||||
// recovery is attempted. If recovery is not possible an error is returned,
|
||||
// otherwise the returned *WAL is in a state ready for use.
|
||||
func Open(dir string, opts ...walOpt) (*WAL, error) {
|
||||
w := &WAL{
|
||||
dir: dir,
|
||||
triggerRotate: make(chan uint64, 1),
|
||||
}
|
||||
// Apply options
|
||||
for _, opt := range opts {
|
||||
opt(w)
|
||||
}
|
||||
if err := w.applyDefaultsAndValidate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Load or create metaDB
|
||||
persisted, err := w.metaDB.Load(w.dir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
newState := state{
|
||||
segments: &immutable.SortedMap[uint64, segmentState]{},
|
||||
nextSegmentID: persisted.NextSegmentID,
|
||||
}
|
||||
|
||||
// Get the set of all persisted segments so we can prune it down to just the
|
||||
// unused ones as we go.
|
||||
toDelete, err := w.sf.List()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Build the state
|
||||
recoveredTail := false
|
||||
for i, si := range persisted.Segments {
|
||||
|
||||
// Verify we can decode the entries.
|
||||
// TODO: support multiple decoders to allow rotating codec.
|
||||
if si.Codec != w.codec.ID() {
|
||||
return nil, fmt.Errorf("segment with BasedIndex=%d uses an unknown codec", si.BaseIndex)
|
||||
}
|
||||
|
||||
// We want to keep this segment since it's still in the metaDB list!
|
||||
delete(toDelete, si.ID)
|
||||
|
||||
if si.SealTime.IsZero() {
|
||||
// This is an unsealed segment. It _must_ be the last one. Safety check!
|
||||
if i < len(persisted.Segments)-1 {
|
||||
return nil, fmt.Errorf("unsealed segment is not at tail")
|
||||
}
|
||||
|
||||
// Try to recover this segment
|
||||
sw, err := w.sf.RecoverTail(si)
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
// Handle no file specially. This can happen if we crashed right after
|
||||
// persisting the metadata but before we managed to persist the new
|
||||
// file. In fact it could happen if the whole machine looses power any
|
||||
// time before the fsync of the parent dir since the FS could loose the
|
||||
// dir entry for the new file until that point. We do ensure we pass
|
||||
// that point before we return from Append for the first time in that
|
||||
// new file so that's safe, but we have to handle recovering from that
|
||||
// case here.
|
||||
sw, err = w.sf.Create(si)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Set the tail and "reader" for this segment
|
||||
ss := segmentState{
|
||||
SegmentInfo: si,
|
||||
r: sw,
|
||||
}
|
||||
newState.tail = sw
|
||||
newState.segments = newState.segments.Set(si.BaseIndex, ss)
|
||||
recoveredTail = true
|
||||
|
||||
// We're done with this loop, break here to avoid nesting all the rest of
|
||||
// the logic!
|
||||
break
|
||||
}
|
||||
|
||||
// This is a sealed segment
|
||||
|
||||
// Open segment reader
|
||||
sr, err := w.sf.Open(si)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Store the open reader to get logs from
|
||||
ss := segmentState{
|
||||
SegmentInfo: si,
|
||||
r: sr,
|
||||
}
|
||||
newState.segments = newState.segments.Set(si.BaseIndex, ss)
|
||||
}
|
||||
|
||||
if !recoveredTail {
|
||||
// There was no unsealed segment at the end. This can only really happen
|
||||
// when the log is empty with zero segments (either on creation or after a
|
||||
// truncation that removed all segments) since we otherwise never allow the
|
||||
// state to have a sealed tail segment. But this logic works regardless!
|
||||
|
||||
// Create a new segment. We use baseIndex of 1 even though the first append
|
||||
// might be much higher - we'll allow that since we know we have no records
|
||||
// yet and so lastIndex will also be 0.
|
||||
si := w.newSegment(newState.nextSegmentID, 1)
|
||||
newState.nextSegmentID++
|
||||
ss := segmentState{
|
||||
SegmentInfo: si,
|
||||
}
|
||||
newState.segments = newState.segments.Set(si.BaseIndex, ss)
|
||||
|
||||
// Persist the new meta to "commit" it even before we create the file so we
|
||||
// don't attempt to recreate files with duplicate IDs on a later failure.
|
||||
if err := w.metaDB.CommitState(newState.Persistent()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create the new segment file
|
||||
w, err := w.sf.Create(si)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
newState.tail = w
|
||||
// Update the segment in memory so we have a reader for the new segment. We
|
||||
// don't need to commit again as this isn't changing the persisted metadata
|
||||
// about the segment.
|
||||
ss.r = w
|
||||
newState.segments = newState.segments.Set(si.BaseIndex, ss)
|
||||
}
|
||||
|
||||
// Store the in-memory state (it was already persisted if we modified it
|
||||
// above) there are no readers yet since we are constructing a new WAL so we
|
||||
// don't need to jump through the mutateState hoops yet!
|
||||
w.s.Store(&newState)
|
||||
|
||||
// Delete any unused segment files left over after a crash.
|
||||
w.deleteSegments(toDelete)
|
||||
|
||||
// Start the rotation routine
|
||||
go w.runRotate()
|
||||
|
||||
return w, nil
|
||||
}
|
||||
|
||||
// stateTxn represents a transaction body that mutates the state under the
|
||||
// writeLock. s is already a shallow copy of the current state that may be
|
||||
// mutated as needed. If a nil error is returned, s will be atomically set as
|
||||
// the new state. If a non-nil finalizer func is returned it will be atomically
|
||||
// attached to the old state after it's been replaced but before the write lock
|
||||
// is released. The finalizer will be called exactly once when all current
|
||||
// readers have released the old state. If the transaction func returns a
|
||||
// non-nil postCommit it is executed after the new state has been committed to
|
||||
// metaDB. It may mutate the state further (captured by closure) before it is
|
||||
// atomically committed in memory but the update won't be persisted to disk in
|
||||
// this transaction. This is used where we need sequencing between committing
|
||||
// meta and creating and opening a new file. Both need to happen in memory in
|
||||
// one transaction but the disk commit isn't at the end! If postCommit returns
|
||||
// an error, the state is not updated in memory and the error is returned to the
|
||||
// mutate caller.
|
||||
type stateTxn func(s *state) (finalizer func(), postCommit func() error, err error)
|
||||
|
||||
func (w *WAL) loadState() *state {
|
||||
return w.s.Load().(*state)
|
||||
}
|
||||
|
||||
// mutateState executes a stateTxn. writeLock MUST be held while calling this.
|
||||
func (w *WAL) mutateStateLocked(tx stateTxn) error {
|
||||
s := w.loadState()
|
||||
s.acquire()
|
||||
defer s.release()
|
||||
|
||||
newS := s.clone()
|
||||
fn, postCommit, err := tx(&newS)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Commit updates to meta
|
||||
if err := w.metaDB.CommitState(newS.Persistent()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if postCommit != nil {
|
||||
if err := postCommit(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
w.s.Store(&newS)
|
||||
s.finalizer.Store(fn)
|
||||
return nil
|
||||
}
|
||||
|
||||
// acquireState should be used by all readers to fetch the current state. The
|
||||
// returned release func must be called when no further accesses to state or the
|
||||
// data within it will be performed to free old files that may have been
|
||||
// truncated concurrently.
|
||||
func (w *WAL) acquireState() (*state, func()) {
|
||||
s := w.loadState()
|
||||
return s, s.acquire()
|
||||
}
|
||||
|
||||
// newSegment creates a types.SegmentInfo with the passed ID and baseIndex, filling in
|
||||
// the segment parameters based on the current WAL configuration.
|
||||
func (w *WAL) newSegment(ID, baseIndex uint64) types.SegmentInfo {
|
||||
return types.SegmentInfo{
|
||||
ID: ID,
|
||||
BaseIndex: baseIndex,
|
||||
MinIndex: baseIndex,
|
||||
SizeLimit: uint32(w.segmentSize),
|
||||
|
||||
// TODO make these configurable
|
||||
Codec: CodecBinaryV1,
|
||||
CreateTime: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
// FirstIndex returns the first index written. 0 for no entries.
|
||||
func (w *WAL) FirstIndex() (uint64, error) {
|
||||
if err := w.checkClosed(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
s, release := w.acquireState()
|
||||
defer release()
|
||||
return s.firstIndex(), nil
|
||||
}
|
||||
|
||||
// LastIndex returns the last index written. 0 for no entries.
|
||||
func (w *WAL) LastIndex() (uint64, error) {
|
||||
if err := w.checkClosed(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
s, release := w.acquireState()
|
||||
defer release()
|
||||
return s.lastIndex(), nil
|
||||
}
|
||||
|
||||
// GetLog gets a log entry at a given index.
|
||||
func (w *WAL) GetLog(index uint64, log *raft.Log) error {
|
||||
if err := w.checkClosed(); err != nil {
|
||||
return err
|
||||
}
|
||||
s, release := w.acquireState()
|
||||
defer release()
|
||||
w.metrics.IncrementCounter("log_entries_read", 1)
|
||||
|
||||
raw, err := s.getLog(index)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
w.metrics.IncrementCounter("log_entry_bytes_read", uint64(len(raw.Bs)))
|
||||
defer raw.Close()
|
||||
|
||||
// Decode the log
|
||||
return w.codec.Decode(raw.Bs, log)
|
||||
}
|
||||
|
||||
// StoreLog stores a log entry.
|
||||
func (w *WAL) StoreLog(log *raft.Log) error {
|
||||
return w.StoreLogs([]*raft.Log{log})
|
||||
}
|
||||
|
||||
// StoreLogs stores multiple log entries.
|
||||
func (w *WAL) StoreLogs(logs []*raft.Log) error {
|
||||
if err := w.checkClosed(); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(logs) < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
w.writeMu.Lock()
|
||||
defer w.writeMu.Unlock()
|
||||
|
||||
// Ensure queued rotation has completed before us if we raced with it for
|
||||
// write lock.
|
||||
w.awaitRotationLocked()
|
||||
|
||||
s, release := w.acquireState()
|
||||
defer release()
|
||||
|
||||
// Verify monotonicity since we assume it
|
||||
lastIdx := s.lastIndex()
|
||||
|
||||
// Special case, if the log is currently empty and this is the first append,
|
||||
// we allow any starting index. We've already created an empty tail segment
|
||||
// though and probably started at index 1. Rather than break the invariant
|
||||
// that BaseIndex is the same as the first index in the segment (which causes
|
||||
// lots of extra complexity lower down) we simply accept the additional cost
|
||||
// in this rare case of removing the current tail and re-creating it with the
|
||||
// correct BaseIndex for the first log we are about to append. In practice,
|
||||
// this only happens on startup of a new server, or after a user snapshot
|
||||
// restore which are both rare enough events that the cost is not significant
|
||||
// since the cost of creating other state or restoring snapshots is larger
|
||||
// anyway. We could theoretically defer creating at all until we know for sure
|
||||
// but that is more complex internally since then everything has to handle the
|
||||
// uninitialized case where the is no tail yet with special cases.
|
||||
ti := s.getTailInfo()
|
||||
// Note we check index != ti.BaseIndex rather than index != 1 so that this
|
||||
// works even if we choose to initialize first segments to a BaseIndex other
|
||||
// than 1. For example it might be marginally more performant to choose to
|
||||
// initialize to the old MaxIndex + 1 after a truncate since that is what our
|
||||
// raft library will use after a restore currently so will avoid this case on
|
||||
// the next append, while still being generally safe.
|
||||
if lastIdx == 0 && logs[0].Index != ti.BaseIndex {
|
||||
if err := w.resetEmptyFirstSegmentBaseIndex(logs[0].Index); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Re-read state now we just changed it.
|
||||
s2, release2 := w.acquireState()
|
||||
defer release2()
|
||||
|
||||
// Overwrite the state we read before so the code below uses the new state
|
||||
s = s2
|
||||
}
|
||||
|
||||
// Encode logs
|
||||
nBytes := uint64(0)
|
||||
encoded := make([]types.LogEntry, len(logs))
|
||||
for i, l := range logs {
|
||||
if lastIdx > 0 && l.Index != (lastIdx+1) {
|
||||
return fmt.Errorf("non-monotonic log entries: tried to append index %d after %d", logs[0].Index, lastIdx)
|
||||
}
|
||||
// Need a new buffer each time because Data is just a slice so if we re-use
|
||||
// buffer then all end up pointing to the same underlying data which
|
||||
// contains only the final log value!
|
||||
var buf bytes.Buffer
|
||||
if err := w.codec.Encode(l, &buf); err != nil {
|
||||
return err
|
||||
}
|
||||
encoded[i].Data = buf.Bytes()
|
||||
encoded[i].Index = l.Index
|
||||
lastIdx = l.Index
|
||||
nBytes += uint64(len(encoded[i].Data))
|
||||
}
|
||||
if err := s.tail.Append(encoded); err != nil {
|
||||
return err
|
||||
}
|
||||
w.metrics.IncrementCounter("log_appends", 1)
|
||||
w.metrics.IncrementCounter("log_entries_written", uint64(len(encoded)))
|
||||
w.metrics.IncrementCounter("log_entry_bytes_written", nBytes)
|
||||
|
||||
// Check if we need to roll logs
|
||||
sealed, indexStart, err := s.tail.Sealed()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if sealed {
|
||||
// Async rotation to allow caller to do more work while we mess with files.
|
||||
w.triggerRotateLocked(indexStart)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *WAL) awaitRotationLocked() {
|
||||
awaitCh := w.awaitRotate
|
||||
if awaitCh != nil {
|
||||
// We managed to race for writeMu with the background rotate operation which
|
||||
// needs to complete first. Wait for it to complete.
|
||||
w.writeMu.Unlock()
|
||||
<-awaitCh
|
||||
w.writeMu.Lock()
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteRange deletes a range of log entries. The range is inclusive.
|
||||
// Implements raft.LogStore. Note that we only support deleting ranges that are
|
||||
// a suffix or prefix of the log.
|
||||
func (w *WAL) DeleteRange(min uint64, max uint64) error {
|
||||
if err := w.checkClosed(); err != nil {
|
||||
return err
|
||||
}
|
||||
if min > max {
|
||||
// Empty inclusive range.
|
||||
return nil
|
||||
}
|
||||
|
||||
w.writeMu.Lock()
|
||||
defer w.writeMu.Unlock()
|
||||
|
||||
// Ensure queued rotation has completed before us if we raced with it for
|
||||
// write lock.
|
||||
w.awaitRotationLocked()
|
||||
|
||||
s, release := w.acquireState()
|
||||
defer release()
|
||||
|
||||
// Work out what type of truncation this is.
|
||||
first, last := s.firstIndex(), s.lastIndex()
|
||||
switch {
|
||||
// |min----max|
|
||||
// |first====last|
|
||||
// or
|
||||
// |min----max|
|
||||
// |first====last|
|
||||
case max < first || min > last:
|
||||
// None of the range exists at all so a no-op
|
||||
return nil
|
||||
|
||||
// |min----max|
|
||||
// |first====last|
|
||||
// or
|
||||
// |min--------------max|
|
||||
// |first====last|
|
||||
// or
|
||||
// |min--max|
|
||||
// |first====last|
|
||||
case min <= first: // max >= first implied by the first case not matching
|
||||
// Note we allow head truncations where max > last which effectively removes
|
||||
// the entire log.
|
||||
return w.truncateHeadLocked(max + 1)
|
||||
|
||||
// |min----max|
|
||||
// |first====last|
|
||||
// or
|
||||
// |min--------------max|
|
||||
// |first====last|
|
||||
case max >= last: // min <= last implied by first case not matching
|
||||
return w.truncateTailLocked(min - 1)
|
||||
|
||||
// |min----max|
|
||||
// |first========last|
|
||||
default:
|
||||
// Everything else is a neither a suffix nor prefix so unsupported.
|
||||
return fmt.Errorf("only suffix or prefix ranges may be deleted from log")
|
||||
}
|
||||
}
|
||||
|
||||
// Set implements raft.StableStore
|
||||
func (w *WAL) Set(key []byte, val []byte) error {
|
||||
if err := w.checkClosed(); err != nil {
|
||||
return err
|
||||
}
|
||||
w.metrics.IncrementCounter("stable_sets", 1)
|
||||
return w.metaDB.SetStable(key, val)
|
||||
}
|
||||
|
||||
// Get implements raft.StableStore
|
||||
func (w *WAL) Get(key []byte) ([]byte, error) {
|
||||
if err := w.checkClosed(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
w.metrics.IncrementCounter("stable_gets", 1)
|
||||
return w.metaDB.GetStable(key)
|
||||
}
|
||||
|
||||
// SetUint64 implements raft.StableStore. We assume the same key space as Set
|
||||
// and Get so the caller is responsible for ensuring they don't call both Set
|
||||
// and SetUint64 for the same key.
|
||||
func (w *WAL) SetUint64(key []byte, val uint64) error {
|
||||
var buf [8]byte
|
||||
binary.LittleEndian.PutUint64(buf[:], val)
|
||||
return w.Set(key, buf[:])
|
||||
}
|
||||
|
||||
// GetUint64 implements raft.StableStore. We assume the same key space as Set
|
||||
// and Get. We assume that the key was previously set with `SetUint64` and
|
||||
// returns an undefined value (possibly with nil error) if not.
|
||||
func (w *WAL) GetUint64(key []byte) (uint64, error) {
|
||||
raw, err := w.Get(key)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if len(raw) == 0 {
|
||||
// Not set, return zero per interface contract
|
||||
return 0, nil
|
||||
}
|
||||
// At least a tiny bit of checking is possible
|
||||
if len(raw) != 8 {
|
||||
return 0, fmt.Errorf("GetUint64 called on a non-uint64 key")
|
||||
}
|
||||
return binary.LittleEndian.Uint64(raw), nil
|
||||
}
|
||||
|
||||
func (w *WAL) triggerRotateLocked(indexStart uint64) {
|
||||
if atomic.LoadUint32(&w.closed) == 1 {
|
||||
return
|
||||
}
|
||||
w.awaitRotate = make(chan struct{})
|
||||
w.triggerRotate <- indexStart
|
||||
}
|
||||
|
||||
func (w *WAL) runRotate() {
|
||||
for {
|
||||
indexStart := <-w.triggerRotate
|
||||
|
||||
w.writeMu.Lock()
|
||||
|
||||
// Either triggerRotate was closed by Close, or Close raced with a real
|
||||
// trigger, either way shut down without changing anything else. In the
|
||||
// second case the segment file is sealed but meta data isn't updated yet
|
||||
// but we have to handle that case during recovery anyway so it's simpler
|
||||
// not to try and complete the rotation here on an already-closed WAL.
|
||||
closed := atomic.LoadUint32(&w.closed)
|
||||
if closed == 1 {
|
||||
w.writeMu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
err := w.rotateSegmentLocked(indexStart)
|
||||
if err != nil {
|
||||
// The only possible errors indicate bugs and could probably validly be
|
||||
// panics, but be conservative and just attempt to log them instead!
|
||||
w.log.Error("rotate error", "err", err)
|
||||
}
|
||||
done := w.awaitRotate
|
||||
w.awaitRotate = nil
|
||||
w.writeMu.Unlock()
|
||||
// Now we are done, close the channel to unblock the waiting writer if there
|
||||
// is one
|
||||
close(done)
|
||||
}
|
||||
}
|
||||
|
||||
func (w *WAL) rotateSegmentLocked(indexStart uint64) error {
|
||||
txn := func(newState *state) (func(), func() error, error) {
|
||||
// Mark current tail as sealed in segments
|
||||
tail := newState.getTailInfo()
|
||||
if tail == nil {
|
||||
// Can't happen
|
||||
return nil, nil, fmt.Errorf("no tail found during rotate")
|
||||
}
|
||||
|
||||
// Note that tail is a copy since it's a value type. Even though this is a
|
||||
// pointer here it's pointing to a copy on the heap that was made in
|
||||
// getTailInfo above, so we can mutate it safely and update the immutable
|
||||
// state with our version.
|
||||
tail.SealTime = time.Now()
|
||||
tail.MaxIndex = newState.tail.LastIndex()
|
||||
tail.IndexStart = indexStart
|
||||
w.metrics.SetGauge("last_segment_age_seconds", uint64(tail.SealTime.Sub(tail.CreateTime).Seconds()))
|
||||
|
||||
// Update the old tail with the seal time etc.
|
||||
newState.segments = newState.segments.Set(tail.BaseIndex, *tail)
|
||||
|
||||
post, err := w.createNextSegment(newState)
|
||||
return nil, post, err
|
||||
}
|
||||
w.metrics.IncrementCounter("segment_rotations", 1)
|
||||
return w.mutateStateLocked(txn)
|
||||
}
|
||||
|
||||
// createNextSegment is passes a mutable copy of the new state ready to have a
|
||||
// new segment appended. newState must be a copy, taken under write lock which
|
||||
// is still held by the caller and its segments map must contain all non-tail
|
||||
// segments that should be in the log, all must be sealed at this point. The new
|
||||
// segment's baseIndex will be the current last-segment's MaxIndex + 1 (or 1 if
|
||||
// no current tail segment). The func returned is to be executed post
|
||||
// transaction commit to create the actual segment file.
|
||||
func (w *WAL) createNextSegment(newState *state) (func() error, error) {
|
||||
// Find existing sealed tail
|
||||
tail := newState.getTailInfo()
|
||||
|
||||
// If there is no tail, next baseIndex is 1 (or the requested next base index)
|
||||
nextBaseIndex := uint64(1)
|
||||
if tail != nil {
|
||||
nextBaseIndex = tail.MaxIndex + 1
|
||||
} else if newState.nextBaseIndex > 0 {
|
||||
nextBaseIndex = newState.nextBaseIndex
|
||||
}
|
||||
|
||||
// Create a new segment
|
||||
newTail := w.newSegment(newState.nextSegmentID, nextBaseIndex)
|
||||
newState.nextSegmentID++
|
||||
ss := segmentState{
|
||||
SegmentInfo: newTail,
|
||||
}
|
||||
newState.segments = newState.segments.Set(newTail.BaseIndex, ss)
|
||||
|
||||
// We're ready to commit now! Return a postCommit that will actually create
|
||||
// the segment file once meta is persisted. We don't do it in parallel because
|
||||
// we don't want to persist a file with an ID before that ID is durably stored
|
||||
// in case the metaDB write doesn't happen.
|
||||
post := func() error {
|
||||
// Now create the new segment for writing.
|
||||
sw, err := w.sf.Create(newTail)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
newState.tail = sw
|
||||
|
||||
// Also cache the reader/log getter which is also the writer. We don't bother
|
||||
// reopening read only since we assume we have exclusive access anyway and
|
||||
// only use this read-only interface once the segment is sealed.
|
||||
ss.r = newState.tail
|
||||
|
||||
// We need to re-insert it since newTail is a copy not a reference
|
||||
newState.segments = newState.segments.Set(newTail.BaseIndex, ss)
|
||||
return nil
|
||||
}
|
||||
return post, nil
|
||||
}
|
||||
|
||||
// resetEmptyFirstSegmentBaseIndex is used to change the baseIndex of the tail
|
||||
// segment file if its empty. This is needed when the first log written has a
|
||||
// different index to the base index that was assumed when the tail was created
|
||||
// (e.g. on startup). It will return an error if the log is not currently empty.
|
||||
func (w *WAL) resetEmptyFirstSegmentBaseIndex(newBaseIndex uint64) error {
|
||||
txn := stateTxn(func(newState *state) (func(), func() error, error) {
|
||||
if newState.lastIndex() > 0 {
|
||||
return nil, nil, fmt.Errorf("can't reset BaseIndex on segment, log is not empty")
|
||||
}
|
||||
|
||||
fin := func() {}
|
||||
|
||||
tailSeg := newState.getTailInfo()
|
||||
if tailSeg != nil {
|
||||
// There is an existing tail. Check if it needs to be replaced
|
||||
if tailSeg.BaseIndex == newBaseIndex {
|
||||
// It's fine as it is, no-op
|
||||
return nil, nil, nil
|
||||
}
|
||||
// It needs to be removed
|
||||
newState.segments = newState.segments.Delete(tailSeg.BaseIndex)
|
||||
newState.tail = nil
|
||||
fin = func() {
|
||||
w.closeSegments([]io.Closer{tailSeg.r})
|
||||
w.deleteSegments(map[uint64]uint64{tailSeg.ID: tailSeg.BaseIndex})
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure the newly created tail has the right base index
|
||||
newState.nextBaseIndex = newBaseIndex
|
||||
|
||||
// Create the new segment
|
||||
post, err := w.createNextSegment(newState)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return fin, post, nil
|
||||
})
|
||||
|
||||
return w.mutateStateLocked(txn)
|
||||
}
|
||||
|
||||
func (w *WAL) truncateHeadLocked(newMin uint64) error {
|
||||
txn := stateTxn(func(newState *state) (func(), func() error, error) {
|
||||
oldLastIndex := newState.lastIndex()
|
||||
|
||||
// Iterate the segments to find any that are entirely deleted.
|
||||
toDelete := make(map[uint64]uint64)
|
||||
toClose := make([]io.Closer, 0, 1)
|
||||
it := newState.segments.Iterator()
|
||||
var head *segmentState
|
||||
nTruncated := uint64(0)
|
||||
for !it.Done() {
|
||||
_, seg, _ := it.Next()
|
||||
|
||||
maxIdx := seg.MaxIndex
|
||||
// If the segment is the tail (unsealed) or a sealed segment that contains
|
||||
// this new min then we've found the new head.
|
||||
if seg.SealTime.IsZero() {
|
||||
maxIdx = newState.lastIndex()
|
||||
// This is the tail, check if it actually has any content to keep
|
||||
if maxIdx >= newMin {
|
||||
head = &seg
|
||||
break
|
||||
}
|
||||
} else if seg.MaxIndex >= newMin {
|
||||
head = &seg
|
||||
break
|
||||
}
|
||||
|
||||
toDelete[seg.ID] = seg.BaseIndex
|
||||
toClose = append(toClose, seg.r)
|
||||
newState.segments = newState.segments.Delete(seg.BaseIndex)
|
||||
nTruncated += (maxIdx - seg.MinIndex + 1) // +1 because MaxIndex is inclusive
|
||||
}
|
||||
|
||||
// There may not be any segments (left) but if there are, update the new
|
||||
// head's MinIndex.
|
||||
var postCommit func() error
|
||||
if head != nil {
|
||||
// new
|
||||
nTruncated += (newMin - head.MinIndex)
|
||||
head.MinIndex = newMin
|
||||
newState.segments = newState.segments.Set(head.BaseIndex, *head)
|
||||
} else {
|
||||
// If there is no head any more, then there is no tail either! We should
|
||||
// create a new blank one ready for use when we next append like we do
|
||||
// during initialization. As an optimization, we create it with a
|
||||
// BaseIndex of the old MaxIndex + 1 since this is what our Raft library
|
||||
// uses as the next log index after a restore so this avoids recreating
|
||||
// the files a second time on the next append.
|
||||
newState.nextBaseIndex = oldLastIndex + 1
|
||||
pc, err := w.createNextSegment(newState)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
postCommit = pc
|
||||
}
|
||||
w.metrics.IncrementCounter("head_truncations", nTruncated)
|
||||
|
||||
// Return a finalizer that will be called when all readers are done with the
|
||||
// segments in the current state to close and delete old segments.
|
||||
fin := func() {
|
||||
w.closeSegments(toClose)
|
||||
w.deleteSegments(toDelete)
|
||||
}
|
||||
return fin, postCommit, nil
|
||||
})
|
||||
|
||||
return w.mutateStateLocked(txn)
|
||||
}
|
||||
|
||||
func (w *WAL) truncateTailLocked(newMax uint64) error {
|
||||
txn := stateTxn(func(newState *state) (func(), func() error, error) {
|
||||
// Reverse iterate the segments to find any that are entirely deleted.
|
||||
toDelete := make(map[uint64]uint64)
|
||||
toClose := make([]io.Closer, 0, 1)
|
||||
it := newState.segments.Iterator()
|
||||
it.Last()
|
||||
|
||||
nTruncated := uint64(0)
|
||||
for !it.Done() {
|
||||
_, seg, _ := it.Prev()
|
||||
|
||||
if seg.BaseIndex <= newMax {
|
||||
// We're done
|
||||
break
|
||||
}
|
||||
|
||||
maxIdx := seg.MaxIndex
|
||||
if seg.SealTime.IsZero() {
|
||||
maxIdx = newState.lastIndex()
|
||||
}
|
||||
|
||||
toDelete[seg.ID] = seg.BaseIndex
|
||||
toClose = append(toClose, seg.r)
|
||||
newState.segments = newState.segments.Delete(seg.BaseIndex)
|
||||
nTruncated += (maxIdx - seg.MinIndex + 1) // +1 because MaxIndex is inclusive
|
||||
}
|
||||
|
||||
tail := newState.getTailInfo()
|
||||
if tail != nil {
|
||||
maxIdx := tail.MaxIndex
|
||||
|
||||
// Check that the tail is sealed (it won't be if we didn't need to remove
|
||||
// the actual partial tail above).
|
||||
if tail.SealTime.IsZero() {
|
||||
// Actually seal it (i.e. force it to write out an index block wherever
|
||||
// it got to).
|
||||
indexStart, err := newState.tail.ForceSeal()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
tail.IndexStart = indexStart
|
||||
tail.SealTime = time.Now()
|
||||
maxIdx = newState.lastIndex()
|
||||
}
|
||||
// Update the MaxIndex
|
||||
nTruncated += (maxIdx - newMax)
|
||||
tail.MaxIndex = newMax
|
||||
|
||||
// And update the tail in the new state
|
||||
newState.segments = newState.segments.Set(tail.BaseIndex, *tail)
|
||||
}
|
||||
|
||||
// Create the new tail segment
|
||||
pc, err := w.createNextSegment(newState)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
w.metrics.IncrementCounter("tail_truncations", nTruncated)
|
||||
|
||||
// Return a finalizer that will be called when all readers are done with the
|
||||
// segments in the current state to close and delete old segments.
|
||||
fin := func() {
|
||||
w.closeSegments(toClose)
|
||||
w.deleteSegments(toDelete)
|
||||
}
|
||||
return fin, pc, nil
|
||||
})
|
||||
|
||||
return w.mutateStateLocked(txn)
|
||||
}
|
||||
|
||||
func (w *WAL) deleteSegments(toDelete map[uint64]uint64) {
|
||||
for ID, baseIndex := range toDelete {
|
||||
if err := w.sf.Delete(baseIndex, ID); err != nil {
|
||||
// This is not fatal. We can continue just old files might need manual
|
||||
// cleanup somehow.
|
||||
w.log.Error("failed to delete old segment", "baseIndex", baseIndex, "id", ID, "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (w *WAL) closeSegments(toClose []io.Closer) {
|
||||
for _, c := range toClose {
|
||||
if c != nil {
|
||||
if err := c.Close(); err != nil {
|
||||
// Shouldn't happen!
|
||||
w.log.Error("error closing old segment file", "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (w *WAL) checkClosed() error {
|
||||
closed := atomic.LoadUint32(&w.closed)
|
||||
if closed != 0 {
|
||||
return ErrClosed
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close closes all open files related to the WAL. The WAL is in an invalid
|
||||
// state and should not be used again after this is called. It is safe (though a
|
||||
// no-op) to call it multiple times and concurrent reads and writes will either
|
||||
// complete safely or get ErrClosed returned depending on sequencing. Generally
|
||||
// reads and writes should be stopped before calling this to avoid propagating
|
||||
// errors to users during shutdown but it's safe from a data-race perspective.
|
||||
func (w *WAL) Close() error {
|
||||
if old := atomic.SwapUint32(&w.closed, 1); old != 0 {
|
||||
// Only close once
|
||||
return nil
|
||||
}
|
||||
|
||||
// Wait for writes
|
||||
w.writeMu.Lock()
|
||||
defer w.writeMu.Unlock()
|
||||
|
||||
// It doesn't matter if there is a rotation scheduled because runRotate will
|
||||
// exist when it sees we are closed anyway.
|
||||
w.awaitRotate = nil
|
||||
// Awake and terminate the runRotate
|
||||
close(w.triggerRotate)
|
||||
|
||||
// Replace state with nil state
|
||||
s := w.loadState()
|
||||
s.acquire()
|
||||
defer s.release()
|
||||
|
||||
w.s.Store(&state{})
|
||||
|
||||
// Old state might be still in use by readers, attach closers to all open
|
||||
// segment files.
|
||||
toClose := make([]io.Closer, 0, s.segments.Len())
|
||||
it := s.segments.Iterator()
|
||||
for !it.Done() {
|
||||
_, seg, _ := it.Next()
|
||||
if seg.r != nil {
|
||||
toClose = append(toClose, seg.r)
|
||||
}
|
||||
}
|
||||
// Store finalizer to run once all readers are done. There can't be an
|
||||
// existing finalizer since this was the active state read under a write
|
||||
// lock and finalizers are only set on states that have been replaced under
|
||||
// that same lock.
|
||||
s.finalizer.Store(func() {
|
||||
w.closeSegments(toClose)
|
||||
})
|
||||
|
||||
return w.metaDB.Close()
|
||||
}
|
||||
|
||||
// IsMonotonic implements raft.MonotonicLogStore and informs the raft library
|
||||
// that this store will only allow consecutive log indexes with no gaps.
|
||||
func (w *WAL) IsMonotonic() bool {
|
||||
return true
|
||||
}
|
27
vendor/golang.org/x/exp/LICENSE
generated
vendored
Normal file
27
vendor/golang.org/x/exp/LICENSE
generated
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
50
vendor/golang.org/x/exp/constraints/constraints.go
generated
vendored
Normal file
50
vendor/golang.org/x/exp/constraints/constraints.go
generated
vendored
Normal file
@ -0,0 +1,50 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package constraints defines a set of useful constraints to be used
|
||||
// with type parameters.
|
||||
package constraints
|
||||
|
||||
// Signed is a constraint that permits any signed integer type.
|
||||
// If future releases of Go add new predeclared signed integer types,
|
||||
// this constraint will be modified to include them.
|
||||
type Signed interface {
|
||||
~int | ~int8 | ~int16 | ~int32 | ~int64
|
||||
}
|
||||
|
||||
// Unsigned is a constraint that permits any unsigned integer type.
|
||||
// If future releases of Go add new predeclared unsigned integer types,
|
||||
// this constraint will be modified to include them.
|
||||
type Unsigned interface {
|
||||
~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr
|
||||
}
|
||||
|
||||
// Integer is a constraint that permits any integer type.
|
||||
// If future releases of Go add new predeclared integer types,
|
||||
// this constraint will be modified to include them.
|
||||
type Integer interface {
|
||||
Signed | Unsigned
|
||||
}
|
||||
|
||||
// Float is a constraint that permits any floating-point type.
|
||||
// If future releases of Go add new predeclared floating-point types,
|
||||
// this constraint will be modified to include them.
|
||||
type Float interface {
|
||||
~float32 | ~float64
|
||||
}
|
||||
|
||||
// Complex is a constraint that permits any complex numeric type.
|
||||
// If future releases of Go add new predeclared complex numeric types,
|
||||
// this constraint will be modified to include them.
|
||||
type Complex interface {
|
||||
~complex64 | ~complex128
|
||||
}
|
||||
|
||||
// Ordered is a constraint that permits any ordered type: any type
|
||||
// that supports the operators < <= >= >.
|
||||
// If future releases of Go add new ordered types,
|
||||
// this constraint will be modified to include them.
|
||||
type Ordered interface {
|
||||
Integer | Float | ~string
|
||||
}
|
23
vendor/modules.txt
vendored
23
vendor/modules.txt
vendored
@ -1,6 +1,9 @@
|
||||
# github.com/armon/go-metrics v0.4.1
|
||||
## explicit; go 1.12
|
||||
github.com/armon/go-metrics
|
||||
# github.com/benbjohnson/immutable v0.4.0
|
||||
## explicit; go 1.18
|
||||
github.com/benbjohnson/immutable
|
||||
# github.com/beorn7/perks v1.0.1
|
||||
## explicit; go 1.11
|
||||
github.com/beorn7/perks/quantile
|
||||
@ -10,13 +13,22 @@ github.com/boltdb/bolt
|
||||
# github.com/cespare/xxhash/v2 v2.3.0
|
||||
## explicit; go 1.11
|
||||
github.com/cespare/xxhash/v2
|
||||
# github.com/coreos/etcd v3.3.27+incompatible
|
||||
## explicit
|
||||
github.com/coreos/etcd/pkg/fileutil
|
||||
# github.com/coreos/go-semver v0.3.1
|
||||
## explicit; go 1.8
|
||||
github.com/coreos/go-semver/semver
|
||||
# github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf
|
||||
## explicit
|
||||
github.com/coreos/go-systemd/journal
|
||||
# github.com/coreos/go-systemd/v22 v22.5.0
|
||||
## explicit; go 1.12
|
||||
github.com/coreos/go-systemd/v22/daemon
|
||||
github.com/coreos/go-systemd/v22/journal
|
||||
# github.com/coreos/pkg v0.0.0-20220810130054-c7d1c02cb6cf
|
||||
## explicit
|
||||
github.com/coreos/pkg/capnslog
|
||||
# github.com/denisbrodbeck/machineid v1.0.1
|
||||
## explicit
|
||||
github.com/denisbrodbeck/machineid
|
||||
@ -61,6 +73,14 @@ github.com/hashicorp/raft
|
||||
# github.com/hashicorp/raft-boltdb/v2 v2.3.1
|
||||
## explicit; go 1.20
|
||||
github.com/hashicorp/raft-boltdb/v2
|
||||
# github.com/hashicorp/raft-wal v0.4.2
|
||||
## explicit; go 1.18
|
||||
github.com/hashicorp/raft-wal
|
||||
github.com/hashicorp/raft-wal/fs
|
||||
github.com/hashicorp/raft-wal/metadb
|
||||
github.com/hashicorp/raft-wal/metrics
|
||||
github.com/hashicorp/raft-wal/segment
|
||||
github.com/hashicorp/raft-wal/types
|
||||
# github.com/json-iterator/go v1.1.12
|
||||
## explicit; go 1.12
|
||||
github.com/json-iterator/go
|
||||
@ -156,6 +176,9 @@ go.uber.org/zap/internal/pool
|
||||
go.uber.org/zap/internal/stacktrace
|
||||
go.uber.org/zap/zapcore
|
||||
go.uber.org/zap/zapgrpc
|
||||
# golang.org/x/exp v0.0.0-20220827204233-334a2380cb91
|
||||
## explicit; go 1.18
|
||||
golang.org/x/exp/constraints
|
||||
# golang.org/x/net v0.35.0
|
||||
## explicit; go 1.18
|
||||
golang.org/x/net/http/httpguts
|
||||
|
Loading…
x
Reference in New Issue
Block a user