Compare commits
3 Commits
2e55ef0306
...
2a62c9ff87
Author | SHA1 | Date | |
---|---|---|---|
2a62c9ff87 | |||
d67f11d6cb | |||
fb47a1afae |
240
cmd/compute_qemu/events/events.go
Normal file
240
cmd/compute_qemu/events/events.go
Normal file
@ -0,0 +1,240 @@
|
||||
package events
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"deevirt.fr/compute/pkg/amqp"
|
||||
"deevirt.fr/compute/pkg/config"
|
||||
"deevirt.fr/compute/pkg/schema"
|
||||
clientv3 "go.etcd.io/etcd/client/v3"
|
||||
"libvirt.org/go/libvirt"
|
||||
)
|
||||
|
||||
func AgentLifecycle(c *libvirt.Connect, d *libvirt.Domain, event *libvirt.DomainEventAgentLifecycle) {
|
||||
println(event.State)
|
||||
println(event.Reason)
|
||||
|
||||
}
|
||||
|
||||
func Graphics(c *libvirt.Connect, d *libvirt.Domain, event *libvirt.DomainEventGraphics) {
|
||||
println(event.String())
|
||||
}
|
||||
|
||||
func JobCompleted(c *libvirt.Connect, d *libvirt.Domain, e *libvirt.DomainEventJobCompleted) {
|
||||
println(e.Info.DataRemaining)
|
||||
}
|
||||
|
||||
func MigrationIteration(c *libvirt.Connect, d *libvirt.Domain, e *libvirt.DomainEventMigrationIteration) {
|
||||
println(e.Iteration)
|
||||
}
|
||||
|
||||
func Lifecyle(c *libvirt.Connect, d *libvirt.Domain, e *libvirt.DomainEventLifecycle) {
|
||||
var detail, event string
|
||||
config, _ := config.NewConfig()
|
||||
domainID, _ := d.GetUUIDString()
|
||||
|
||||
etcd, err := clientv3.New(clientv3.Config{
|
||||
Endpoints: strings.Split(config.EtcdURI, ","),
|
||||
DialTimeout: 5 * time.Second,
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatalf("Error connexion to etcd: %v", err)
|
||||
}
|
||||
defer etcd.Close()
|
||||
|
||||
switch e.Event {
|
||||
case libvirt.DOMAIN_EVENT_DEFINED:
|
||||
event = "defined"
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
etcd.Put(ctx, "/cluster/"+config.ClusterID+"/host/"+config.NodeID+"/qemu/"+domainID, "")
|
||||
cancel()
|
||||
|
||||
switch libvirt.DomainEventDefinedDetailType(e.Detail) {
|
||||
case libvirt.DOMAIN_EVENT_DEFINED_ADDED:
|
||||
detail = "added"
|
||||
case libvirt.DOMAIN_EVENT_DEFINED_UPDATED:
|
||||
detail = "updated"
|
||||
case libvirt.DOMAIN_EVENT_DEFINED_RENAMED:
|
||||
detail = "renamed"
|
||||
case libvirt.DOMAIN_EVENT_DEFINED_FROM_SNAPSHOT:
|
||||
detail = "snapshot"
|
||||
default:
|
||||
detail = "unknown"
|
||||
}
|
||||
|
||||
case libvirt.DOMAIN_EVENT_UNDEFINED:
|
||||
event = "undefined"
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
etcd.Delete(ctx, "/cluster/"+config.ClusterID+"/host/"+config.NodeID+"/qemu/"+domainID)
|
||||
cancel()
|
||||
|
||||
switch libvirt.DomainEventUndefinedDetailType(e.Detail) {
|
||||
case libvirt.DOMAIN_EVENT_UNDEFINED_REMOVED:
|
||||
detail = "removed"
|
||||
case libvirt.DOMAIN_EVENT_UNDEFINED_RENAMED:
|
||||
detail = "renamed"
|
||||
default:
|
||||
detail = "unknown"
|
||||
}
|
||||
|
||||
case libvirt.DOMAIN_EVENT_STARTED:
|
||||
event = "started"
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
etcd.Put(ctx, "/cluster/"+config.ClusterID+"/domain/"+domainID+"/state", "2")
|
||||
cancel()
|
||||
|
||||
switch libvirt.DomainEventStartedDetailType(e.Detail) {
|
||||
case libvirt.DOMAIN_EVENT_STARTED_BOOTED:
|
||||
detail = "booted"
|
||||
case libvirt.DOMAIN_EVENT_STARTED_MIGRATED:
|
||||
detail = "migrated"
|
||||
case libvirt.DOMAIN_EVENT_STARTED_RESTORED:
|
||||
detail = "restored"
|
||||
case libvirt.DOMAIN_EVENT_STARTED_FROM_SNAPSHOT:
|
||||
detail = "snapshot"
|
||||
case libvirt.DOMAIN_EVENT_STARTED_WAKEUP:
|
||||
detail = "wakeup"
|
||||
default:
|
||||
detail = "unknown"
|
||||
}
|
||||
|
||||
case libvirt.DOMAIN_EVENT_SUSPENDED:
|
||||
event = "suspended"
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
etcd.Put(ctx, "/cluster/"+config.ClusterID+"/domain/"+domainID+"/state", "3")
|
||||
cancel()
|
||||
|
||||
switch libvirt.DomainEventSuspendedDetailType(e.Detail) {
|
||||
case libvirt.DOMAIN_EVENT_SUSPENDED_PAUSED:
|
||||
detail = "paused"
|
||||
case libvirt.DOMAIN_EVENT_SUSPENDED_MIGRATED:
|
||||
detail = "migrated"
|
||||
case libvirt.DOMAIN_EVENT_SUSPENDED_IOERROR:
|
||||
detail = "I/O error"
|
||||
case libvirt.DOMAIN_EVENT_SUSPENDED_WATCHDOG:
|
||||
detail = "watchdog"
|
||||
case libvirt.DOMAIN_EVENT_SUSPENDED_RESTORED:
|
||||
detail = "restored"
|
||||
case libvirt.DOMAIN_EVENT_SUSPENDED_FROM_SNAPSHOT:
|
||||
detail = "snapshot"
|
||||
case libvirt.DOMAIN_EVENT_SUSPENDED_API_ERROR:
|
||||
detail = "api error"
|
||||
case libvirt.DOMAIN_EVENT_SUSPENDED_POSTCOPY:
|
||||
detail = "postcopy"
|
||||
case libvirt.DOMAIN_EVENT_SUSPENDED_POSTCOPY_FAILED:
|
||||
detail = "postcopy failed"
|
||||
default:
|
||||
detail = "unknown"
|
||||
}
|
||||
|
||||
case libvirt.DOMAIN_EVENT_RESUMED:
|
||||
event = "resumed"
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
etcd.Put(ctx, "/cluster/"+config.ClusterID+"/domain/"+domainID+"/state", "4")
|
||||
cancel()
|
||||
|
||||
switch libvirt.DomainEventResumedDetailType(e.Detail) {
|
||||
case libvirt.DOMAIN_EVENT_RESUMED_UNPAUSED:
|
||||
detail = "unpaused"
|
||||
case libvirt.DOMAIN_EVENT_RESUMED_MIGRATED:
|
||||
detail = "migrated"
|
||||
case libvirt.DOMAIN_EVENT_RESUMED_FROM_SNAPSHOT:
|
||||
detail = "snapshot"
|
||||
case libvirt.DOMAIN_EVENT_RESUMED_POSTCOPY:
|
||||
detail = "postcopy"
|
||||
case libvirt.DOMAIN_EVENT_RESUMED_POSTCOPY_FAILED:
|
||||
detail = "postcopy failed"
|
||||
default:
|
||||
detail = "unknown"
|
||||
}
|
||||
|
||||
case libvirt.DOMAIN_EVENT_STOPPED:
|
||||
event = "stopped"
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
etcd.Put(ctx, "/cluster/"+config.ClusterID+"/domain/"+domainID+"/state", "5")
|
||||
cancel()
|
||||
|
||||
switch libvirt.DomainEventStoppedDetailType(e.Detail) {
|
||||
case libvirt.DOMAIN_EVENT_STOPPED_SHUTDOWN:
|
||||
detail = "shutdown"
|
||||
case libvirt.DOMAIN_EVENT_STOPPED_DESTROYED:
|
||||
detail = "destroyed"
|
||||
case libvirt.DOMAIN_EVENT_STOPPED_CRASHED:
|
||||
detail = "crashed"
|
||||
case libvirt.DOMAIN_EVENT_STOPPED_MIGRATED:
|
||||
detail = "migrated"
|
||||
case libvirt.DOMAIN_EVENT_STOPPED_SAVED:
|
||||
detail = "saved"
|
||||
case libvirt.DOMAIN_EVENT_STOPPED_FAILED:
|
||||
detail = "failed"
|
||||
case libvirt.DOMAIN_EVENT_STOPPED_FROM_SNAPSHOT:
|
||||
detail = "snapshot"
|
||||
default:
|
||||
detail = "unknown"
|
||||
}
|
||||
|
||||
case libvirt.DOMAIN_EVENT_SHUTDOWN:
|
||||
event = "shutdown"
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
etcd.Put(ctx, "/cluster/"+config.ClusterID+"/domain/"+domainID+"/state", "6")
|
||||
cancel()
|
||||
|
||||
switch libvirt.DomainEventShutdownDetailType(e.Detail) {
|
||||
case libvirt.DOMAIN_EVENT_SHUTDOWN_FINISHED:
|
||||
detail = "finished"
|
||||
case libvirt.DOMAIN_EVENT_SHUTDOWN_GUEST:
|
||||
detail = "guest"
|
||||
case libvirt.DOMAIN_EVENT_SHUTDOWN_HOST:
|
||||
detail = "host"
|
||||
default:
|
||||
detail = "unknown"
|
||||
}
|
||||
|
||||
default:
|
||||
event = "unknown"
|
||||
}
|
||||
|
||||
// Send event for all clients
|
||||
if e.Event != libvirt.DOMAIN_EVENT_DEFINED|libvirt.DOMAIN_EVENT_UNDEFINED {
|
||||
xmlDesc, err := d.GetXMLDesc(0)
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
var desc schema.Domain
|
||||
err = xml.Unmarshal([]byte(xmlDesc), &desc)
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
state, _ := json.Marshal(&schema.DomainStateJSON{
|
||||
CompanyID: desc.Metadata.DeevirtInstance.DeevirtCompanyID,
|
||||
DatacenterID: desc.Metadata.DeevirtInstance.DeevirtDatacenterID,
|
||||
DomainID: domainID,
|
||||
State: int64(e.Event),
|
||||
})
|
||||
|
||||
a, _ := amqp.NewAMQP()
|
||||
a.Publisher("vmcenter",
|
||||
"events."+desc.Metadata.DeevirtInstance.DeevirtCompanyID+
|
||||
"."+desc.Metadata.DeevirtInstance.DeevirtDatacenterID+
|
||||
"."+domainID,
|
||||
state)
|
||||
a.Close()
|
||||
}
|
||||
|
||||
fmt.Printf("Domain event=%q detail=%q\n", event, detail)
|
||||
}
|
||||
|
||||
func Reboot(c *libvirt.Connect, d *libvirt.Domain) {
|
||||
|
||||
}
|
||||
|
||||
func Watchdog(c *libvirt.Connect, d *libvirt.Domain, event *libvirt.DomainEventWatchdog) {
|
||||
println(event.String())
|
||||
}
|
97
pkg/scheduler/ha.go
Normal file
97
pkg/scheduler/ha.go
Normal file
@ -0,0 +1,97 @@
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
|
||||
"github.com/prometheus/common/model"
|
||||
clientv3 "go.etcd.io/etcd/client/v3"
|
||||
)
|
||||
|
||||
type nodeDown struct {
|
||||
node_id string
|
||||
domains []string
|
||||
}
|
||||
|
||||
func (w *Scheduler) checkHA() []nodeDown {
|
||||
s := []nodeDown{}
|
||||
|
||||
etcd, err := clientv3.New(clientv3.Config{
|
||||
Endpoints: strings.Split(w.config.EtcdURI, ","),
|
||||
DialTimeout: 5 * time.Second,
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatalf("Error connexion to etcd: %v", err)
|
||||
}
|
||||
defer etcd.Close()
|
||||
|
||||
r := v1.Range{
|
||||
Start: time.Now().Add(-time.Minute),
|
||||
End: time.Now(),
|
||||
Step: 2 * time.Minute,
|
||||
}
|
||||
|
||||
api, err := w.api()
|
||||
if err != nil {
|
||||
return s
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
println("up{cluster_id=" + w.config.ClusterID + "}")
|
||||
|
||||
result, warnings, err := api.QueryRange(ctx, "up{cluster_id='"+w.config.ClusterID+"'}", r, v1.WithTimeout(5*time.Second))
|
||||
if err != nil {
|
||||
fmt.Printf("Error querying Prometheus: %v\n", err)
|
||||
}
|
||||
if len(warnings) > 0 {
|
||||
fmt.Printf("Warnings: %v\n", warnings)
|
||||
}
|
||||
|
||||
matrix, _ := result.(model.Matrix)
|
||||
|
||||
for _, stream := range matrix {
|
||||
node_id := ""
|
||||
domains := []string{}
|
||||
|
||||
for key, value := range stream.Metric {
|
||||
if key == "node_id" {
|
||||
//test.instance = string(value)
|
||||
node_id = string(value)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
state := int(stream.Values[0].Value)
|
||||
|
||||
if state == 1 {
|
||||
re := regexp.MustCompile(`qemu/(?P<domainID>[a-zA-Z0-9-]+)`)
|
||||
|
||||
resp, _ := etcd.Get(ctx, "/cluster/"+w.config.ClusterID+"/host/"+node_id+"/qemu/", clientv3.WithPrefix(), clientv3.WithKeysOnly())
|
||||
for _, kv := range resp.Kvs {
|
||||
matches := re.FindStringSubmatch(string(kv.Key))
|
||||
if matches != nil {
|
||||
index := re.SubexpIndex("domainID")
|
||||
domains = append(domains, matches[index])
|
||||
}
|
||||
}
|
||||
|
||||
s = append(s, nodeDown{
|
||||
node_id: node_id,
|
||||
domains: domains,
|
||||
})
|
||||
}
|
||||
|
||||
/*for _, pair := range stream.Values {
|
||||
println(pair.Value.String())
|
||||
}*/
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
155
pkg/scheduler/scheduler.go
Normal file
155
pkg/scheduler/scheduler.go
Normal file
@ -0,0 +1,155 @@
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"deevirt.fr/compute/pkg/config"
|
||||
prom_api "github.com/prometheus/client_golang/api"
|
||||
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
|
||||
"github.com/prometheus/common/model"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
type Scheduler struct {
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
cancelled bool
|
||||
|
||||
config *config.Config
|
||||
log *zap.Logger
|
||||
}
|
||||
|
||||
func New() (*Scheduler, error) {
|
||||
config, _ := config.NewConfig()
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
logger, _ := zap.NewProduction()
|
||||
|
||||
s := &Scheduler{
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
cancelled: true,
|
||||
|
||||
config: config,
|
||||
log: logger,
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (w *Scheduler) api() (v1.API, error) {
|
||||
client, err := prom_api.NewClient(prom_api.Config{
|
||||
Address: "http://172.16.9.161:9090",
|
||||
})
|
||||
if err != nil {
|
||||
w.log.Error("Prometheus HS")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return v1.NewAPI(client), nil
|
||||
}
|
||||
|
||||
type scoringNode struct {
|
||||
cpu float64
|
||||
memory float64
|
||||
}
|
||||
|
||||
type scoring struct {
|
||||
domain map[string]scoringNode
|
||||
}
|
||||
|
||||
func (w *Scheduler) restartDomain(domain nodeDown) {
|
||||
api, err := w.api()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
query := fmt.Sprintf(`
|
||||
100 * (
|
||||
(sum(rate(libvirt_node_cpu_time_seconds_total{cluster_id='%s'}[5m])) by (node_id) / sum(libvirt_node_cpu_threads{cluster_id='%s'}) by (node_id)
|
||||
+
|
||||
sum(libvirt_node_memory_usage_bytes{cluster_id='%s'}) by (node_id) / sum(libvirt_node_memory_total_bytes{cluster_id='%s'}) by (node_id))
|
||||
/ 2
|
||||
)`, w.config.ClusterID, w.config.ClusterID, w.config.ClusterID, w.config.ClusterID)
|
||||
|
||||
cpu, _, _ := api.Query(ctx, query, time.Now(), v1.WithTimeout(5*time.Second))
|
||||
|
||||
matrix, _ := cpu.(model.Vector)
|
||||
for _, stream := range matrix {
|
||||
println(stream.Value.String())
|
||||
}
|
||||
|
||||
/*cpu, _, _ := api.Query(ctx, "rate(libvirt_node_cpu_time_seconds_total{cluster_id='"+w.config.ClusterID+"'}[5m]) * 100", time.Now(), v1.WithTimeout(5*time.Second))
|
||||
|
||||
score.
|
||||
|
||||
matrix, _ := cpu.(model.Vector)
|
||||
for _, stream := range matrix {
|
||||
total := 100
|
||||
|
||||
for key, value := range stream.Metric {
|
||||
if key == "threads" {
|
||||
threads, _ := strconv.Atoi(string(value))
|
||||
|
||||
total = threads * 100
|
||||
|
||||
println(total)
|
||||
}
|
||||
|
||||
//fmt.Printf("%s => %s\n", key, value)
|
||||
}
|
||||
|
||||
usage := float64(stream.Value)
|
||||
|
||||
p := usage / float64(total) * 100
|
||||
|
||||
fmt.Printf("%.2f%%\n", p)
|
||||
|
||||
//println(stream.Value.String())
|
||||
}
|
||||
|
||||
memory_usage, _, _ := api.Query(ctx,
|
||||
"(libvirt_node_memory_usage_bytes{cluster_id='"+w.config.ClusterID+"'}/1024e2)",
|
||||
time.Now(),
|
||||
v1.WithTimeout(5*time.Second))
|
||||
|
||||
memory_total, _, _ := api.Query(ctx,
|
||||
"(libvirt_node_memory_usage_bytes{cluster_id='"+w.config.ClusterID+"'}/1024e2)",
|
||||
time.Now(),
|
||||
v1.WithTimeout(5*time.Second))*/
|
||||
|
||||
//fmt.Printf("==>%v\n", cpu)
|
||||
//fmt.Printf("%v\n", memory)
|
||||
|
||||
}
|
||||
|
||||
func (w *Scheduler) Start() {
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-w.ctx.Done():
|
||||
fmt.Println("🛑 Worker arrêté !")
|
||||
return
|
||||
default:
|
||||
fmt.Println("🔄 Controle périodique en cours...")
|
||||
for _, t := range w.checkHA() {
|
||||
w.restartDomain(t)
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Minute)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (w *Scheduler) Stop() {
|
||||
if !w.cancelled {
|
||||
w.cancel()
|
||||
w.cancelled = true
|
||||
}
|
||||
}
|
4
pkg/schema/cluster.go
Normal file
4
pkg/schema/cluster.go
Normal file
@ -0,0 +1,4 @@
|
||||
package schema
|
||||
|
||||
type Cluster struct {
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user