200 lines
4.6 KiB
Go
200 lines
4.6 KiB
Go
package events
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"strings"
|
|
"time"
|
|
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/credentials/insecure"
|
|
"google.golang.org/protobuf/types/known/timestamppb"
|
|
go_libvirt "libvirt.org/go/libvirt"
|
|
|
|
"deevirt.fr/compute/pkg/config"
|
|
"deevirt.fr/compute/pkg/libvirt"
|
|
pb "deevirt.fr/compute/pkg/proto"
|
|
"deevirt.fr/compute/pkg/schema"
|
|
)
|
|
|
|
type qemu struct {
|
|
clientVirt *go_libvirt.Connect
|
|
config *config.Config
|
|
nodes schema.Node
|
|
}
|
|
|
|
func NewQemu(c *go_libvirt.Connect) qemu {
|
|
config, _ := config.New()
|
|
|
|
return qemu{
|
|
clientVirt: c,
|
|
config: config,
|
|
}
|
|
}
|
|
|
|
/*
|
|
Si l'hote est isolé, il faut impérativement arrêter les VMs.
|
|
*/
|
|
func (q qemu) stonith(ctx context.Context) {
|
|
log.Printf("Perte de la communication avec les manager, procédure d'urgence enclenché.")
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
// Le service est de retour, on annule le stonith
|
|
log.Printf("L'accessibilité avec les manager est revenue, la procédure d'urgence est avortée.")
|
|
return
|
|
case <-time.After(10 * time.Second):
|
|
if len(q.nodes) > 0 {
|
|
// On controle l'accessibilité des autres serveurs via libvirt, si un serveur est accessible, on peut supposer un problème avec le manager
|
|
for _, domData := range q.nodes {
|
|
_, err := libvirt.New(domData.IpManagement, q.config.LibvirtTLS)
|
|
if err == nil {
|
|
log.Printf("Au moins un noeud est joignable, la procédure d'urgence est avortée.")
|
|
return
|
|
}
|
|
}
|
|
} else {
|
|
log.Printf("Le noeud est indépendant, on avorte")
|
|
return
|
|
}
|
|
|
|
// Manager inaccessible et autres noeuds libvirt aussi
|
|
log.Printf("Urgence ! Arrêt de toutes les VMs en cours d'exécution")
|
|
doms, _ := q.clientVirt.ListAllDomains(go_libvirt.CONNECT_LIST_DOMAINS_ACTIVE | go_libvirt.CONNECT_LIST_DOMAINS_PAUSED)
|
|
for _, dom := range doms {
|
|
dom.Destroy()
|
|
}
|
|
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
On se connecte au manager afin d'envoyer une pulsation de disponibilité toutes les 1 secondes.
|
|
*/
|
|
func (q qemu) heartbeat() {
|
|
var destroyCtx context.Context
|
|
var destroyCancel context.CancelFunc
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
var retryPolicy = `{
|
|
"methodConfig": [{
|
|
"retryPolicy": {
|
|
"MaxAttempts": 40,
|
|
"InitialBackoff": "1s",
|
|
"MaxBackoff": "10s",
|
|
"BackoffMultiplier": 1.0,
|
|
"retryableStatusCodes": ["UNAVAILABLE", "DEADLINE_EXCEEDED"]
|
|
},
|
|
"waitForReady": true
|
|
}],
|
|
"loadBalancingConfig": [{
|
|
"round_robin": {}
|
|
}]
|
|
}`
|
|
|
|
conn, err := grpc.NewClient(strings.Join(q.config.Manager.Peers, ","), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithDefaultServiceConfig(retryPolicy))
|
|
if err != nil {
|
|
fmt.Printf("%v", err)
|
|
return
|
|
}
|
|
defer conn.Close()
|
|
|
|
for {
|
|
client := pb.NewNodeClient(conn)
|
|
stream, err := client.Alive(ctx)
|
|
if err == nil {
|
|
go func() {
|
|
for {
|
|
resp, err := stream.Recv()
|
|
if err == io.EOF || err != nil {
|
|
log.Println("🔌 Connexion fermée par le serveur")
|
|
break
|
|
} else {
|
|
nodeStore := schema.Node{}
|
|
json.Unmarshal(resp.Nodes, &nodeStore)
|
|
q.nodes = nodeStore
|
|
}
|
|
}
|
|
}()
|
|
|
|
for {
|
|
req := &pb.NodeAliveRequest{
|
|
NodeId: q.config.NodeID,
|
|
Timestamp: timestamppb.New(time.Now()),
|
|
}
|
|
|
|
if err := stream.Send(req); err != nil {
|
|
if destroyCancel == nil {
|
|
destroyCtx, destroyCancel = context.WithTimeout(ctx, 10*time.Second)
|
|
go q.stonith(destroyCtx)
|
|
}
|
|
break
|
|
}
|
|
|
|
if destroyCancel != nil {
|
|
destroyCancel()
|
|
destroyCancel = nil
|
|
}
|
|
|
|
time.Sleep(10 * time.Second)
|
|
}
|
|
|
|
}
|
|
|
|
time.Sleep(1 * time.Second)
|
|
}
|
|
|
|
}
|
|
|
|
func (q qemu) lifecycle(c *go_libvirt.Connect, d *go_libvirt.Domain, event *go_libvirt.DomainEventLifecycle) {
|
|
if event.Event == go_libvirt.DOMAIN_EVENT_UNDEFINED {
|
|
// On ne s'intéresse pas à la suppression d'une configuration. Le manager est le seul point d'entré de valide !
|
|
return
|
|
}
|
|
|
|
fmt.Printf("%v\n", event)
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
|
|
domID, err := d.GetUUIDString()
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
domState, _, err := d.GetState()
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
res, err := json.Marshal(event)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
conn, err := grpc.NewClient(strings.Join(q.config.Manager.Peers, ","), grpc.WithTransportCredentials(insecure.NewCredentials()))
|
|
if err != nil {
|
|
return
|
|
}
|
|
defer conn.Close()
|
|
|
|
client := pb.NewDomainClient(conn)
|
|
client.Event(ctx, &pb.DomainEventRequest{
|
|
NodeId: q.config.NodeID,
|
|
DomainId: domID,
|
|
Type: "DomainEventLifecycle",
|
|
State: int64(domState),
|
|
Event: res,
|
|
})
|
|
}
|
|
|
|
func (q qemu) Events() {
|
|
q.clientVirt.DomainEventLifecycleRegister(nil, q.lifecycle)
|
|
}
|