compute/pkg/api/raft/scheduler.go

145 lines
2.9 KiB
Go

package raft
import (
"context"
"encoding/json"
"fmt"
"log"
"time"
prom_api "github.com/prometheus/client_golang/api"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
"go.uber.org/zap"
"deevirt.fr/compute/pkg/config"
)
type Scheduler struct {
ctx context.Context
cancel context.CancelFunc
cancelled bool
store *Store
config *config.Config
log *zap.Logger
}
func NewScheduler(r *Store) (*Scheduler, error) {
config, _ := config.New()
ctx, cancel := context.WithCancel(context.Background())
logger, _ := zap.NewProduction()
s := &Scheduler{
ctx: ctx,
cancel: cancel,
cancelled: true,
store: r,
config: config,
log: logger,
}
return s, nil
}
func (w *Scheduler) api() (v1.API, error) {
client, err := prom_api.NewClient(prom_api.Config{
Address: "http://172.16.9.161:9090",
})
if err != nil {
w.log.Error("Prometheus HS")
return nil, nil
}
return v1.NewAPI(client), nil
}
func (w *Scheduler) Start() {
go func() {
// On synchronise l'état des hotes
for {
select {
case <-w.ctx.Done():
fmt.Println("🛑 Worker arrêté !")
return
default:
fmt.Println("🔄 Controle périodique en cours...")
w.Alerts()
/*for _, t := range w.checkHA() {
w.restartDomain(t)
}*/
time.Sleep(1 * time.Minute)
}
}
}()
}
func (w *Scheduler) Stop() {
if !w.cancelled {
w.cancel()
w.cancelled = true
}
}
/*
On récupère les alertes
*/
func (w *Scheduler) Alerts() {
api, err := w.api()
if err != nil {
return
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// On controle l'état du cluster
query := fmt.Sprintf("ALERTS_FOR_STATE{cluster_id=\"%s\", type=\"deevirt_default\"}\n", w.config.ClusterID)
alerts, _, err := api.Query(ctx, query, time.Now())
if err != nil {
log.Fatalf("Erreur lors de la récupération des alertes filtrées: %v", err)
}
if alerts.Type() == model.ValVector {
for _, alert := range alerts.(model.Vector) {
if alert.Metric["severity"] == "critical" {
// En situation critique, on abandonne toutes les actions
return
}
}
}
query = fmt.Sprintf("ALERTS_FOR_STATE{cluster_id=\"%s\", type=\"deevirt_node_default\"}\n", w.config.ClusterID)
alerts, _, err = api.Query(ctx, query, time.Now())
if err != nil {
log.Fatalf("Erreur lors de la récupération des alertes filtrées: %v", err)
}
if alerts.Type() == model.ValVector {
for _, alert := range alerts.(model.Vector) {
println(alert.Metric["node_id"])
t, _ := w.store.Ls(fmt.Sprintf("/etc/libvirt/qemu/%s", alert.Metric["node_id"]), LsOptions{
Recursive: false,
Data: true,
})
for k, v := range t {
var n DomainStore
json.Unmarshal(v, &n)
fmt.Printf("On relance la VM %s\n", k)
fmt.Printf("%v\n", n.State)
}
log.Printf("%v\n", alert)
}
}
}