compute/pkg/api/raft/scheduler.go

package raft

import (
	"context"
	"encoding/json"
	"fmt"
	"log"
	"time"

	prom_api "github.com/prometheus/client_golang/api"
	v1 "github.com/prometheus/client_golang/api/prometheus/v1"
	"github.com/prometheus/common/model"
	"go.uber.org/zap"

	"deevirt.fr/compute/pkg/config"
)

type Scheduler struct {
	ctx       context.Context
	cancel    context.CancelFunc
	cancelled bool

	store *Store

	config *config.Config
	log    *zap.Logger
}

func NewScheduler(r *Store) (*Scheduler, error) {
	config, _ := config.New()
	ctx, cancel := context.WithCancel(context.Background())

	logger, _ := zap.NewProduction()

	s := &Scheduler{
		ctx:       ctx,
		cancel:    cancel,
		cancelled: true,

		store: r,

		config: config,
		log:    logger,
	}

	return s, nil
}

func (w *Scheduler) api() (v1.API, error) {
	client, err := prom_api.NewClient(prom_api.Config{
		Address: "http://172.16.9.161:9090",
	})
	if err != nil {
		w.log.Error("Prometheus HS")
		return nil, nil
	}

	return v1.NewAPI(client), nil
}

func (w *Scheduler) Start() {
	go func() {
		// On synchronise l'état des hotes

		for {
			select {
			case <-w.ctx.Done():
				fmt.Println("🛑 Worker arrêté !")
				return
			default:
				fmt.Println("🔄 Controle périodique en cours...")
				w.Alerts()
				/*for _, t := range w.checkHA() {
					w.restartDomain(t)
				}*/

				time.Sleep(1 * time.Minute)
			}
		}
	}()
}

func (w *Scheduler) Stop() {
	if !w.cancelled {
		w.cancel()
		w.cancelled = true
	}
}

/*
On récupère les alertes
*/
func (w *Scheduler) Alerts() {
	api, err := w.api()
	if err != nil {
		return
	}

	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
	defer cancel()

	// On controle l'état du cluster
	query := fmt.Sprintf("ALERTS_FOR_STATE{cluster_id=\"%s\", type=\"deevirt_default\"}\n", w.config.ClusterID)
	alerts, _, err := api.Query(ctx, query, time.Now())
	if err != nil {
		log.Fatalf("Erreur lors de la récupération des alertes filtrées: %v", err)
	}

	if alerts.Type() == model.ValVector {
		for _, alert := range alerts.(model.Vector) {
			if alert.Metric["severity"] == "critical" {
				// En situation critique, on abandonne toutes les actions
				return
			}
		}
	}

	query = fmt.Sprintf("ALERTS_FOR_STATE{cluster_id=\"%s\", type=\"deevirt_node_default\"}\n", w.config.ClusterID)
	alerts, _, err = api.Query(ctx, query, time.Now())
	if err != nil {
		log.Fatalf("Erreur lors de la récupération des alertes filtrées: %v", err)
	}

	if alerts.Type() == model.ValVector {
		for _, alert := range alerts.(model.Vector) {
			println(alert.Metric["node_id"])
			t, _ := w.store.Ls(fmt.Sprintf("/etc/libvirt/qemu/%s", alert.Metric["node_id"]), LsOptions{
				Recursive: false,
				Data:      true,
			})

			for k, v := range t {
				var n DomainStore
				json.Unmarshal(v, &n)

				fmt.Printf("On relance la VM %s\n", k)

				fmt.Printf("%v\n", n.State)
			}

			log.Printf("%v\n", alert)
		}
	}
}