computegardener

package
v0.1.5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 10, 2025 License: Apache-2.0 Imports: 22 Imported by: 0

README

Compute Gardener Scheduler

The documentation for this package can be found in the root README.

This link will take you to the main project documentation which includes:

  • Features
  • Configuration
  • Pod Annotations
  • Metrics
  • Architecture
  • Development Guide
  • And more

Documentation

Index

Constants

View Source
const (
	// Name is the name of the plugin used in Registry and configurations.
	Name = "ComputeGardenerScheduler"
	// SchedulerName is the name used in pod specs to request this scheduler
	SchedulerName = "compute-gardener-scheduler"
)

Variables

View Source
var (
	// CarbonIntensityGauge measures the current carbon intensity for a region
	CarbonIntensityGauge = metrics.NewGaugeVec(
		&metrics.GaugeOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "carbon_intensity",
			Help:           "Current carbon intensity (gCO2eq/kWh) for a given region",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"region"},
	)

	// PodSchedulingLatency measures the latency of pod scheduling attempts
	PodSchedulingLatency = metrics.NewHistogramVec(
		&metrics.HistogramOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "pod_scheduling_duration_seconds",
			Help:           "Latency for scheduling attempts in the compute-gardener scheduler",
			Buckets:        metrics.ExponentialBuckets(0.001, 2, 15),
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"result"},
	)

	// SchedulingAttempts counts the total number of scheduling attempts
	SchedulingAttempts = metrics.NewCounterVec(
		&metrics.CounterOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "scheduling_attempt_total",
			Help:           "Number of attempts to schedule pods by result",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"result"},
	)

	// NodeCPUUsage tracks CPU usage on nodes at job start and completion
	NodeCPUUsage = metrics.NewGaugeVec(
		&metrics.GaugeOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "node_cpu_usage_cores",
			Help:           "CPU usage in cores on nodes at baseline (bind) and current",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"node", "pod", "phase"},
	)

	// NodeMemoryUsage tracks memory usage on nodes
	NodeMemoryUsage = metrics.NewGaugeVec(
		&metrics.GaugeOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "node_memory_usage_bytes",
			Help:           "Memory usage in bytes on nodes",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"node", "pod", "phase"},
	)

	// NodeGPUPower tracks GPU power usage on nodes
	NodeGPUPower = metrics.NewGaugeVec(
		&metrics.GaugeOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "node_gpu_power_watts",
			Help:           "GPU power consumption in watts on nodes",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"node", "pod", "phase"},
	)

	// NodePowerEstimate estimates node power consumption based on CPU usage
	NodePowerEstimate = metrics.NewGaugeVec(
		&metrics.GaugeOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "node_power_estimate_watts",
			Help:           "Estimated power consumption in watts based on node resource usage",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"node", "pod", "phase"},
	)

	// MetricsSamplesStored tracks the number of time-series samples stored per pod
	MetricsSamplesStored = metrics.NewGaugeVec(
		&metrics.GaugeOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "metrics_samples_stored",
			Help:           "Number of pod metrics samples currently stored in cache",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"pod", "namespace"},
	)

	// MetricsCacheSize tracks the total number of pods being monitored
	MetricsCacheSize = metrics.NewGauge(
		&metrics.GaugeOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "metrics_cache_size",
			Help:           "Number of pods being tracked in metrics cache",
			StabilityLevel: metrics.ALPHA,
		},
	)

	// JobEnergyUsage tracks estimated energy usage for jobs
	JobEnergyUsage = metrics.NewHistogramVec(
		&metrics.HistogramOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "job_energy_usage_kwh",
			Help:           "Estimated energy usage in kWh for completed jobs",
			Buckets:        metrics.ExponentialBuckets(0.001, 2, 15),
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"pod", "namespace"},
	)

	// SchedulingEfficiencyMetrics tracks carbon/cost improvements
	SchedulingEfficiencyMetrics = metrics.NewGaugeVec(
		&metrics.GaugeOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "scheduling_efficiency",
			Help:           "Scheduling efficiency metrics comparing initial vs actual scheduling time",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"metric", "pod"},
	)

	// EstimatedSavings tracks carbon and cost savings
	EstimatedSavings = metrics.NewCounterVec(
		&metrics.CounterOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "estimated_savings",
			Help:           "Estimated savings from compute-gardener scheduling",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"type", "unit"},
	)

	// ElectricityRateGauge measures the current electricity rate
	ElectricityRateGauge = metrics.NewGaugeVec(
		&metrics.GaugeOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "electricity_rate",
			Help:           "Current electricity rate ($/kWh) for a given location",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"location", "period"},
	)

	// PriceBasedDelays counts scheduling delays due to price thresholds
	PriceBasedDelays = metrics.NewCounterVec(
		&metrics.CounterOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "price_delay_total",
			Help:           "Number of scheduling delays due to electricity price thresholds",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"period"},
	)

	// CarbonBasedDelays counts scheduling delays due to carbon intensity thresholds
	CarbonBasedDelays = metrics.NewCounterVec(
		&metrics.CounterOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "carbon_delay_total",
			Help:           "Number of scheduling delays due to carbon intensity thresholds",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"region"},
	)

	// JobCarbonEmissions tracks estimated carbon emissions for jobs
	JobCarbonEmissions = metrics.NewHistogramVec(
		&metrics.HistogramOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "job_carbon_emissions_grams",
			Help:           "Estimated carbon emissions in gCO2eq for completed jobs",
			Buckets:        metrics.ExponentialBuckets(0.001, 2, 15),
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"pod", "namespace"},
	)

	// NodePUE tracks PUE (Power Usage Effectiveness) values for nodes
	NodePUE = metrics.NewGaugeVec(
		&metrics.GaugeOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "node_pue",
			Help:           "Power Usage Effectiveness for nodes (ratio of total facility energy to IT equipment energy)",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"node"},
	)

	// PowerFilteredNodes counts nodes filtered due to power efficiency reasons
	PowerFilteredNodes = metrics.NewCounterVec(
		&metrics.CounterOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "power_filtered_nodes_total",
			Help:           "Number of nodes filtered due to power or efficiency constraints",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"reason"},
	)

	// NodeEfficiency tracks calculated efficiency metrics for nodes
	NodeEfficiency = metrics.NewGaugeVec(
		&metrics.GaugeOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "node_efficiency",
			Help:           "Efficiency metric for nodes (higher is better)",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"node"},
	)

	// EnergyBudgetTracking tracks energy budget usage for workloads
	EnergyBudgetTracking = metrics.NewGaugeVec(
		&metrics.GaugeOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "energy_budget_usage_percent",
			Help:           "Percentage of energy budget used by workloads",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"pod", "namespace"},
	)

	// EnergyBudgetExceeded counts workloads that exceeded their energy budget
	EnergyBudgetExceeded = metrics.NewCounterVec(
		&metrics.CounterOpts{
			Subsystem:      schedulerSubsystem,
			Name:           "energy_budget_exceeded_total",
			Help:           "Number of workloads that exceeded their energy budget",
			StabilityLevel: metrics.ALPHA,
		},
		[]string{"namespace", "owner_kind", "action"},
	)
)

Functions

func New

New initializes a new plugin and returns it

Types

type ComputeGardenerScheduler

type ComputeGardenerScheduler struct {
	// contains filtered or unexported fields
}

ComputeGardenerScheduler is a scheduler plugin that implements carbon and price-aware scheduling

func (*ComputeGardenerScheduler) Close

func (cs *ComputeGardenerScheduler) Close() error

Close cleans up resources

func (*ComputeGardenerScheduler) Filter

Filter implements the Filter interface

func (*ComputeGardenerScheduler) Name

func (cs *ComputeGardenerScheduler) Name() string

Name returns the name of the plugin

func (*ComputeGardenerScheduler) PreFilter

PreFilter implements the PreFilter interface

func (*ComputeGardenerScheduler) PreFilterExtensions

func (cs *ComputeGardenerScheduler) PreFilterExtensions() framework.PreFilterExtensions

PreFilterExtensions returns nil as this plugin does not need extensions

Directories

Path Synopsis
tou

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL
JackTT - Gopher 🇻🇳