Documentation
¶
Index ¶
- Constants
- func ExtractEssentialJobMetadata(jobRun *executorapi.JobRunLease) (*job.RunMeta, error)
- type ClusterAllocationService
- type ClusterAllocator
- type IssueHandler
- type JobLeaseRequester
- type JobLeaseService
- func (jobLeaseService *JobLeaseService) RenewJobLeases(jobs []*job.RunningJob) ([]*job.RunningJob, error)
- func (jobLeaseService *JobLeaseService) ReportDone(jobIds []string) error
- func (jobLeaseService *JobLeaseService) RequestJobLeases(availableResource *armadaresource.ComputeResources, nodes []api.NodeInfo, ...) ([]*api.Job, error)
- func (jobLeaseService *JobLeaseService) ReturnLease(pod *v1.Pod, reason string, jobRunAttempted bool) error
- type JobManager
- type JobRequester
- type LeaseRequest
- type LeaseRequester
- type LeaseResponse
- type LeaseService
- type LegacyClusterAllocationService
- type ResourceCleanupService
Constants ¶
View Source
const (
UnableToSchedule podIssueType = iota
StuckStartingUp
StuckTerminating
ExternallyDeleted
)
Variables ¶
This section is empty.
Functions ¶
func ExtractEssentialJobMetadata ¶ added in v0.3.57
func ExtractEssentialJobMetadata(jobRun *executorapi.JobRunLease) (*job.RunMeta, error)
Types ¶
type ClusterAllocationService ¶
type ClusterAllocationService struct {
// contains filtered or unexported fields
}
func NewClusterAllocationService ¶
func NewClusterAllocationService(
clusterId executorContext.ClusterIdentity,
eventReporter reporter.EventReporter,
jobRunStateManager job.RunStateStore,
submitter job.Submitter,
etcdHealthMonitor healthmonitor.EtcdLimitHealthMonitor,
) *ClusterAllocationService
func (*ClusterAllocationService) AllocateSpareClusterCapacity ¶
func (allocationService *ClusterAllocationService) AllocateSpareClusterCapacity()
type ClusterAllocator ¶ added in v0.3.47
type ClusterAllocator interface {
AllocateSpareClusterCapacity()
}
type IssueHandler ¶ added in v0.3.77
type IssueHandler struct {
// contains filtered or unexported fields
}
func NewIssueHandler ¶ added in v0.3.77
func NewIssueHandler(
jobRunState job.RunStateStore,
clusterContext executorContext.ClusterContext,
eventReporter reporter.EventReporter,
stateChecksConfig configuration.StateChecksConfiguration,
pendingPodChecker podchecks.PodChecker,
stuckTerminatingPodExpiry time.Duration,
) *IssueHandler
func (*IssueHandler) HandlePodIssues ¶ added in v0.3.77
func (p *IssueHandler) HandlePodIssues()
type JobLeaseRequester ¶ added in v0.3.47
type JobLeaseRequester struct {
// contains filtered or unexported fields
}
func NewJobLeaseRequester ¶ added in v0.3.47
func NewJobLeaseRequester(
executorApiClient executorapi.ExecutorApiClient,
clusterIdentity clusterContext.ClusterIdentity,
minimumJobSize armadaresource.ComputeResources,
) *JobLeaseRequester
func (*JobLeaseRequester) LeaseJobRuns ¶ added in v0.3.47
func (requester *JobLeaseRequester) LeaseJobRuns(ctx context.Context, request *LeaseRequest) (*LeaseResponse, error)
type JobLeaseService ¶
type JobLeaseService struct {
// contains filtered or unexported fields
}
func NewJobLeaseService ¶
func NewJobLeaseService(
clusterContext context2.ClusterContext,
queueClient api.AggregatedQueueClient,
minimumJobSize armadaresource.ComputeResources,
avoidNodeLabelsOnRetry []string,
jobLeaseRequestTimeout time.Duration,
) *JobLeaseService
func (*JobLeaseService) RenewJobLeases ¶
func (jobLeaseService *JobLeaseService) RenewJobLeases(jobs []*job.RunningJob) ([]*job.RunningJob, error)
func (*JobLeaseService) ReportDone ¶
func (jobLeaseService *JobLeaseService) ReportDone(jobIds []string) error
func (*JobLeaseService) RequestJobLeases ¶
func (jobLeaseService *JobLeaseService) RequestJobLeases(
availableResource *armadaresource.ComputeResources,
nodes []api.NodeInfo,
leasedResourceByQueue map[string]armadaresource.ComputeResources,
leasedResourceByQueueAndPriority map[string]map[int32]armadaresource.ComputeResources,
) ([]*api.Job, error)
func (*JobLeaseService) ReturnLease ¶
func (jobLeaseService *JobLeaseService) ReturnLease(pod *v1.Pod, reason string, jobRunAttempted bool) error
type JobManager ¶
type JobManager struct {
// contains filtered or unexported fields
}
func NewJobManager ¶
func NewJobManager(
clusterIdentity context2.ClusterIdentity,
jobContext job.JobContext,
eventReporter reporter.EventReporter,
jobLeaseService LeaseService,
) *JobManager
func (*JobManager) ManageJobLeases ¶
func (m *JobManager) ManageJobLeases()
type JobRequester ¶ added in v0.3.57
type JobRequester struct {
// contains filtered or unexported fields
}
func NewJobRequester ¶ added in v0.3.57
func NewJobRequester(
clusterId executorContext.ClusterIdentity,
eventReporter reporter.EventReporter,
leaseRequester LeaseRequester,
jobRunStateStore job.RunStateStore,
utilisationService utilisation.UtilisationService,
podDefaults *configuration.PodDefaults,
) *JobRequester
func (*JobRequester) RequestJobsRuns ¶ added in v0.3.57
func (r *JobRequester) RequestJobsRuns()
type LeaseRequest ¶ added in v0.3.57
type LeaseRequest struct {
AvailableResource armadaresource.ComputeResources
Nodes []*api.NodeInfo
UnassignedJobRunIds []armadaevents.Uuid
}
type LeaseRequester ¶ added in v0.3.47
type LeaseRequester interface {
LeaseJobRuns(ctx context.Context, request *LeaseRequest) (*LeaseResponse, error)
}
type LeaseResponse ¶ added in v0.3.57
type LeaseResponse struct {
LeasedRuns []*executorapi.JobRunLease
RunIdsToCancel []*armadaevents.Uuid
RunIdsToPreempt []*armadaevents.Uuid
}
type LeaseService ¶
type LeaseService interface {
ReturnLease(pod *v1.Pod, reason string, jobRunAttempted bool) error
RequestJobLeases(
availableResource *armadaresource.ComputeResources,
nodes []api.NodeInfo,
leasedResourceByQueue map[string]armadaresource.ComputeResources,
leasedResourceByQueueAndPriority map[string]map[int32]armadaresource.ComputeResources,
) ([]*api.Job, error)
RenewJobLeases(jobs []*job.RunningJob) ([]*job.RunningJob, error)
ReportDone(jobIds []string) error
}
type LegacyClusterAllocationService ¶ added in v0.3.47
type LegacyClusterAllocationService struct {
// contains filtered or unexported fields
}
func NewLegacyClusterAllocationService ¶ added in v0.3.47
func NewLegacyClusterAllocationService(
clusterContext executorContext.ClusterContext,
eventReporter reporter.EventReporter,
leaseService LeaseService,
utilisationService utilisation.UtilisationService,
submitter job.Submitter,
etcdHealthMonitor healthmonitor.EtcdLimitHealthMonitor,
) *LegacyClusterAllocationService
func (*LegacyClusterAllocationService) AllocateSpareClusterCapacity ¶ added in v0.3.47
func (allocationService *LegacyClusterAllocationService) AllocateSpareClusterCapacity()
type ResourceCleanupService ¶
type ResourceCleanupService struct {
// contains filtered or unexported fields
}
func NewResourceCleanupService ¶
func NewResourceCleanupService(
clusterContext clusterContext.ClusterContext,
kubernetesConfiguration configuration.KubernetesConfiguration,
) *ResourceCleanupService
func (*ResourceCleanupService) CleanupResources ¶
func (r *ResourceCleanupService) CleanupResources()
CleanupResources
- This function finds and delete old resources. It does this in two ways:
- - By deleting all expired terminated pods
- - Deleting non-expired terminated pods when then MaxTerminatedPods limit is exceeded
Source Files
¶
Click to show internal directories.
Click to hide internal directories.