145 lines
2.9 KiB
Go
145 lines
2.9 KiB
Go
package raft
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"time"
|
|
|
|
prom_api "github.com/prometheus/client_golang/api"
|
|
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
|
|
"github.com/prometheus/common/model"
|
|
"go.uber.org/zap"
|
|
|
|
"deevirt.fr/compute/pkg/config"
|
|
)
|
|
|
|
type Scheduler struct {
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
cancelled bool
|
|
|
|
store *Store
|
|
|
|
config *config.Config
|
|
log *zap.Logger
|
|
}
|
|
|
|
func NewScheduler(r *Store) (*Scheduler, error) {
|
|
config, _ := config.New()
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
logger, _ := zap.NewProduction()
|
|
|
|
s := &Scheduler{
|
|
ctx: ctx,
|
|
cancel: cancel,
|
|
cancelled: true,
|
|
|
|
store: r,
|
|
|
|
config: config,
|
|
log: logger,
|
|
}
|
|
|
|
return s, nil
|
|
}
|
|
|
|
func (w *Scheduler) api() (v1.API, error) {
|
|
client, err := prom_api.NewClient(prom_api.Config{
|
|
Address: "http://172.16.9.161:9090",
|
|
})
|
|
if err != nil {
|
|
w.log.Error("Prometheus HS")
|
|
return nil, nil
|
|
}
|
|
|
|
return v1.NewAPI(client), nil
|
|
}
|
|
|
|
func (w *Scheduler) Start() {
|
|
go func() {
|
|
// On synchronise l'état des hotes
|
|
|
|
for {
|
|
select {
|
|
case <-w.ctx.Done():
|
|
fmt.Println("🛑 Worker arrêté !")
|
|
return
|
|
default:
|
|
fmt.Println("🔄 Controle périodique en cours...")
|
|
w.Alerts()
|
|
/*for _, t := range w.checkHA() {
|
|
w.restartDomain(t)
|
|
}*/
|
|
|
|
time.Sleep(1 * time.Minute)
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
func (w *Scheduler) Stop() {
|
|
if !w.cancelled {
|
|
w.cancel()
|
|
w.cancelled = true
|
|
}
|
|
}
|
|
|
|
/*
|
|
On récupère les alertes
|
|
*/
|
|
func (w *Scheduler) Alerts() {
|
|
api, err := w.api()
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
|
|
// On controle l'état du cluster
|
|
query := fmt.Sprintf("ALERTS_FOR_STATE{cluster_id=\"%s\", type=\"deevirt_default\"}\n", w.config.ClusterID)
|
|
alerts, _, err := api.Query(ctx, query, time.Now())
|
|
if err != nil {
|
|
log.Fatalf("Erreur lors de la récupération des alertes filtrées: %v", err)
|
|
}
|
|
|
|
if alerts.Type() == model.ValVector {
|
|
for _, alert := range alerts.(model.Vector) {
|
|
if alert.Metric["severity"] == "critical" {
|
|
// En situation critique, on abandonne toutes les actions
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
query = fmt.Sprintf("ALERTS_FOR_STATE{cluster_id=\"%s\", type=\"deevirt_node_default\"}\n", w.config.ClusterID)
|
|
alerts, _, err = api.Query(ctx, query, time.Now())
|
|
if err != nil {
|
|
log.Fatalf("Erreur lors de la récupération des alertes filtrées: %v", err)
|
|
}
|
|
|
|
if alerts.Type() == model.ValVector {
|
|
for _, alert := range alerts.(model.Vector) {
|
|
println(alert.Metric["node_id"])
|
|
t, _ := w.store.Ls(fmt.Sprintf("/etc/libvirt/qemu/%s", alert.Metric["node_id"]), LsOptions{
|
|
Recursive: false,
|
|
Data: true,
|
|
})
|
|
|
|
for k, v := range t {
|
|
var n DomainStore
|
|
json.Unmarshal(v, &n)
|
|
|
|
fmt.Printf("On relance la VM %s\n", k)
|
|
|
|
fmt.Printf("%v\n", n.State)
|
|
}
|
|
|
|
log.Printf("%v\n", alert)
|
|
}
|
|
}
|
|
}
|