util

package
v10.29.3+incompatible Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 29, 2025 License: Apache-2.0, Apache-2.0 Imports: 17 Imported by: 0

Documentation

Overview

Package util is using for the total variable.

Package util is using for the total variable.

Package util is using for the total variable.

Package util is using for the total variable.

Package util is using for the total variable.

Index

Constants

View Source
const (
	// ChipKind is the prefix of npu resource.
	ChipKind = "910"
	// HwPreName is the prefix of npu resource.
	HwPreName = "huawei.com/"
	// NPU910CardName for judge 910 npu resource.
	NPU910CardName = "huawei.com/Ascend910"
	// NPU910CardNamePre for getting card number.
	NPU910CardNamePre = "Ascend910-"
	// NPU310PCardName for judge 310P npu resource.
	NPU310PCardName = "huawei.com/Ascend310P"
	// NPU310CardName for judge 310 npu resource.
	NPU310CardName = "huawei.com/Ascend310"
	// NPU310CardNamePre for getting card number.
	NPU310CardNamePre = "Ascend310-"
	// NPU310PCardNamePre for getting card number.
	NPU310PCardNamePre = "Ascend310P-"
	// AscendNPUPodRealUse for NPU pod real use cards.
	AscendNPUPodRealUse = "huawei.com/AscendReal"
	// AscendNPUCore for NPU core num, like 56; Records the chip name that the scheduler assigns to the pod.
	AscendNPUCore = "huawei.com/npu-core"
	// Ascend910bName for judge Ascend910b npu resource.
	Ascend910bName = "huawei.com/Ascend910b"

	// Ascend310P device type 310P
	Ascend310P = "Ascend310P"
	// Ascend310 device type 310
	Ascend310 = "Ascend310"
	// Ascend910 device type 910
	Ascend910 = "Ascend910"
	// Pod910DeviceKey pod annotation key, for generate 910 hccl rank table
	Pod910DeviceKey = "ascend.kubectl.kubernetes.io/ascend-910-configuration"
	// JobKind910Value in ring-controller.atlas.
	JobKind910Value = "ascend-910"
	// JobKind310Value in ring-controller.atlas.
	JobKind310Value = "ascend-310"
	// JobKind310PValue 310p ring controller name
	JobKind310PValue = "ascend-310P"
	// JobKind910BValue 910B ring controller name
	JobKind910BValue = "ascend-910b"
	// Module910bx16AcceleratorType for module mode.
	Module910bx16AcceleratorType = "module-910b-16"
	// Module910bx8AcceleratorType for module mode.
	Module910bx8AcceleratorType = "module-910b-8"
	// Accelerator310Key accelerator key of old infer card
	Accelerator310Key = "npu-310-strategy"
)
View Source
const (
	TaskStatusInit = iota
	TaskStatusAllocate
	TaskStatusWrBack
	TaskStatusRunning
	TaskStatusFailed
)

for task status

View Source
const (
	// LogErrorLev for log error.
	LogErrorLev = 1
	// LogWarningLev for log warning.
	LogWarningLev = 2
	// LogInfoLev for log information.
	LogInfoLev = 3
	// LogDebugLev for log debug.
	LogDebugLev = 4
	// ErrorInt return -1 when get error for int
	ErrorInt = -1
	// NPUIndex2 the 2 index.
	NPUIndex2 = 2
	// NPUIndex3 the 3 index.
	NPUIndex3 = 3
	// NPUIndex8 the 8 index.
	NPUIndex8 = 8
	// NPUIndex16 the 16 index.
	NPUIndex16 = 16
	// NPUIndex7 the 7 index.
	NPUIndex7 = 7
	// NPUIndex4 the 4 index.
	NPUIndex4 = 4
	// NPUIndex5 the 5 index.
	NPUIndex5 = 5
	// NPUIndex6 the 6 index.
	NPUIndex6 = 6
	// NPUIndex1 the 1 index.
	NPUIndex1 = 1
	// NPUIndex0 the 0 index.
	NPUIndex0 = 0
	// NPUIndex9 the 9 index.
	NPUIndex9 = 9
	// NPUIndex10 the 10 index.
	NPUIndex10 = 10
	// NPUIndex11 the 11 index.
	NPUIndex11 = 11
	// NPUIndex12 the 12 index.
	NPUIndex12 = 12
	// NPUIndex13 the 13 index.
	NPUIndex13 = 13
	// NPUIndex14 the 14 index.
	NPUIndex14 = 14
	// NPUIndex15 the 15 index.
	NPUIndex15 = 15
	// CoreNum32 32 core 910
	CoreNum32 = 32
	// CoreNum3 3 core 910
	CoreNum3 = 3
	// CoreNum5 5 core 910
	CoreNum5 = 5
	// CoreNum10 10 core 910
	CoreNum10 = 10
	// CoreNum6 6 core 910
	CoreNum6 = 6
	// CoreNum12 12 core 910
	CoreNum12 = 12
	// CoreNum30 30 core 910
	CoreNum30 = 30
	// CoreNum20 20 core 910
	CoreNum20 = 20
	// CoreNum25 25 core 910
	CoreNum25 = 25
	// CoreNum24 24 core 910
	CoreNum24 = 24
	// CpuNum14 14 cpu 910
	CpuNum14 = 14
	// CpuNum6 6 cpu 910
	CpuNum6 = 6
	// MapInitNum for map init length.
	MapInitNum = 3
	// Base10 for const 10.
	Base10 = 10
	// BitSize64 for const 64
	BitSize64 = 64
	// MaxSliceNum max slice number
	MaxSliceNum = 128
	// NPUHexKilo for const 1000,volcano frame used.
	NPUHexKilo = 1000
	// Accelerator for custom tag.
	Accelerator = "accelerator"
	// CMInitParamKey init param key in scheduler configmap
	CMInitParamKey = "init-params"
	// AcceleratorType for selector.
	AcceleratorType = "accelerator-type"
	// ModuleA3x16AcceleratorType for module mode.
	ModuleA3x16AcceleratorType = "module-a3-16"
	// ModuleAcceleratorType for module mode.
	ModuleAcceleratorType = "module"
	// ServerType server type value takes Ascend310P-10-dual/Ascend910-32...
	ServerType = "servertype"
	// ServerTypeDual dual card
	ServerTypeDual = "dual"

	// SegmentEnable for VNPU segment enable flag. Default is "false".
	SegmentEnable = "presetVirtualDevice"

	// UseClusterInfoManager for use cluster info manager, default is true
	UseClusterInfoManager = "useClusterInfoManager"
	// ForceEnqueue for force enqueue when npu num in cluster is meet job required , default is true
	ForceEnqueue = "forceEnqueue"
	// SelfMaintainAvailCard for volcano self maintain available card, default is true
	SelfMaintainAvailCard = "self-maintain-available-card"

	// SubHealthyStrategyLabel sub-healthy handle strategy. default is grace exit
	SubHealthyStrategyLabel = "subHealthyStrategy"
	// SubHealthyIgnore ignore sub-healthy
	SubHealthyIgnore = "ignore"
	// SubHealthyGraceExit don't use sub-healthy node and grace exit
	SubHealthyGraceExit = "graceExit"
	// SubHealthyForceExit don't use sub-healthy node and force exit
	SubHealthyForceExit = "forceExit"
	// SubHealthyHotSwitch strategy name of hot switch
	SubHealthyHotSwitch = "hotSwitch"
	// DevInfoNameSpace device-plugin install Namespace
	DevInfoNameSpace = "kube-system"
	// MindXDlNameSpace mindx dl Namespace
	MindXDlNameSpace = "mindx-dl"
	// DevInfoPreName like "mindx-dl-deviceinfo-ubuntu"
	DevInfoPreName = "mindx-dl-deviceinfo-"
	// NodeDCmInfoNamePrefix is for noded to report node healthy state
	NodeDCmInfoNamePrefix = "mindx-dl-nodeinfo-"
	// SwitchCmInfoNamePrefix is the prefix for switch fault configmap
	SwitchCmInfoNamePrefix = "mindx-dl-switchinfo-"
	// NodeHealthyStatusKey  is the key of node healthy status from configmap data of noded and clusterD
	NodeHealthyStatusKey = "NodeHealthyStatus"
	// NodeSubHealthy means there is some fault on the node which is reported by nodeD, but will not immediately
	// make node unhealthy, this status will prevent new task schduled on this node and reschedule will not consider
	// this node
	NodeSubHealthy = "SubHealthy"
	// NodeUnHealthy is the node unhealthy status reported by nodeD configmap, switch info CM and clusterD,
	// in this case pod will be rescheduling
	NodeUnHealthy = "UnHealthy"
	// NodeHealthyByNodeD is the node healthy status reported by nodeD configmap
	NodeHealthyByNodeD = "Healthy"
	// NodeDEnableKey indicates if the label has been set
	NodeDEnableKey = "nodeDEnable"
	// NodeDEnableOnValue the value of NodeDEnableKey, which means nodeD has been enabled
	NodeDEnableOnValue = "on"

	// PreSeparateFaultCode  PreSeparate fault Code
	PreSeparateFaultCode = "PreSeparate"

	// SwitchNodeHealtyStatuskey same with noded there will be healthy subhealthy unhealthy status report by switch info
	SwitchNodeHealtyStatuskey = "NodeStatus"

	// DevInfoCMKey mindx-dl-deviceinfo configmap key
	DevInfoCMKey = "DeviceInfoCfg"
	// NodeInfoCMKey node info configmap key
	NodeInfoCMKey = "NodeInfo"
	// SwitchInfoCmKey is the key of switch info configmap
	SwitchInfoCmKey = "SwitchInfoCfg"
	// RePropertyCacheName rescheduling keyword in init env.cache
	RePropertyCacheName = "re-scheduling"
	// CmCheckCode Check code key
	CmCheckCode = "checkCode"
	// JobRecovery keywords for retain
	JobRecovery = "job-recovery"

	// DeleteOperator informer delete operator
	DeleteOperator = "delete"
	// AddOperator informer add operator
	AddOperator = "add"
	// UpdateOperator informer update operator
	UpdateOperator = "update"

	// CmConsumer who uses these configmap
	CmConsumer = "mx-consumer-volcano"
	// NormalCmConsumer normal who uses these configmap
	NormalCmConsumer = "mx-consumer-cim"
	// CmConsumerValue the value only for true
	CmConsumerValue = "true"
	// ClusterDeviceInfo the name of cluster device info configmap
	ClusterDeviceInfo = "cluster-info-device-"
	// ClusterNodeInfo the name of cluster node info configmap
	ClusterNodeInfo = "cluster-info-node-cm"
	// ClusterSwitchInfo the name of cluster switch info configmap
	ClusterSwitchInfo = "cluster-info-switch-"

	// PodPredicateTime set pod PodPredicateTime for using by device-plugin.
	PodPredicateTime = "predicate-time"
	// NodeNotMeetTopologyWarning node not satisfy the schedulable topology warning.
	NodeNotMeetTopologyWarning = "the npus on this node don't satisfy the schedulable topology"
	// ArgumentError argument nil error.
	ArgumentError = "invalid argument"
	// RankIdNotExistError rank id does not exist
	RankIdNotExistError = "rank id does not exist"
	// JobKindKey for define the Job kind:ascend-310P, ascend-910
	JobKindKey = "ring-controller.atlas"
	// DistributedJobKey flag for distributed job
	DistributedJobKey = "distributed-job"
	// DistributedJobValue indicate distributed job
	DistributedJobValue = "true"
	// StandaloneJobValue indicate standalone job
	StandaloneJobValue = "false"

	// SuperPodAnnoKey annotation key of super pod
	SuperPodAnnoKey = "sp-block"
	// DistributedInferKey distributed infer
	DistributedInferKey = "distributed"
	// DistributedInferLabel true or false
	DistributedInferLabel = "true"
	// OperatorNameLabelKey pod label key for acjob operator name
	OperatorNameLabelKey = "training.kubeflow.org/operator-name"
)
View Source
const (
	// AffScore0 value 0 for scored.
	AffScore0 = iota
	// AffScore1 value 1 for scored.
	AffScore1
	// AffScore2 value 2 for scored.
	AffScore2
	// AffScore3 value 3 for scored.
	AffScore3
	// AffScore4 value 4 for scored.
	AffScore4
	// AffScore5 value 5 for scored.
	AffScore5
	// AffScore6 value 6 for scored.
	AffScore6
	// AffScore7 value 7 for scored.
	AffScore7
	// AffScore8 value 8 for scored.
	AffScore8
	// AffScore15 value 15 for scored.
	AffScore15
)
View Source
const (
	// JobNotEnqueue job enqueue failed
	JobNotEnqueue = -1
	// JobEnqueue job enqueue success
	JobEnqueue = 1
	// JobEnqueueSkip skip the judgement of ascend-volcano-plugin in the job enqueue phase
	JobEnqueueSkip = 0
	// PodGroupInqueue the pg Inqueue status
	PodGroupInqueue = "Inqueue"
	// PodGroupPending the pg Pending status
	PodGroupPending = "Pending"
	// PodGroupRunning the pg Running status
	PodGroupRunning = "Running"
	// PodGroupUnknown the pg Unknown status
	PodGroupUnknown = "Unknown"
	// PodGroupUnschedulableType the pg Unschedulable Condition
	PodGroupUnschedulableType = "Unschedulable"
	// EnableFunc enable the function
	EnableFunc = "on"
	// SinglePodTag the tag of single pod rescheduling
	SinglePodTag = "pod-rescheduling"
	// ProcessRecoverEnable the tag of process rescheduling
	ProcessRecoverEnable = "process-recover-enable"
	// BaseDeviceInfoKey base device info key
	BaseDeviceInfoKey = "baseDeviceInfos"
	// TorAffinityKey the key of tor affinity
	TorAffinityKey = "tor-affinity"
	// LargeModelTag the value of large model
	LargeModelTag = "large-model-schema"
	// NormalSchema the value of normal tor affinity
	NormalSchema = "normal-schema"

	// NullTag the value means not use tor affinity
	NullTag = "null"
	// DevSplitNum device split number
	DevSplitNum = 2
)
View Source
const (
	// SeparateFaultStrategy Separate task
	SeparateFaultStrategy = "Separate"
	// SubHealthFaultStrategy SubHealth task
	SubHealthFaultStrategy = "SubHealth"
	// RelationFault fault type of relation fault
	RelationFault = "RelationFaultSeparate"
)
View Source
const (
	// Permit indicates permits job to be pipelined
	Permit = 1
	// Abstain indicates abstains in voting job to be pipelined
	Abstain = 0
	// Reject indicates  rejects job to be pipelined
	Reject = -1
)
View Source
const (
	// Namespace check item podName namespace
	Namespace = "namespace"
	// PodName check item podName
	PodName = "podName"
	// PodNameMaxLength pod name max length
	PodNameMaxLength = 253
	// PodNameSpaceMaxLength pod namespace max length
	PodNameSpaceMaxLength = 63
	// PodAnnotationMaxLength pod annotation max data length 1MB
	PodAnnotationMaxLength = 1024 * 1024
	// MaxDevicesNum max device num
	MaxDevicesNum = 100
)
View Source
const (
	// AppTypeLabelKey job type label key
	AppTypeLabelKey = "app"
	// ControllerAppType controller app type
	ControllerAppType = "mindie-ms-controller"
	// CoordinatorAppType coordinator app type
	CoordinatorAppType = "mindie-ms-coordinator"
)
View Source
const (
	// NeedVolcanoOpeKey for volcano
	NeedVolcanoOpeKey = "needVolcanoOpe"
	// OpeTypeDelete operation type delete
	OpeTypeDelete = "delete"
)

HotSwitch

View Source
const (
	// Rank0 default time of pod deleted
	Rank0 = "0"
)

Variables

This section is empty.

Functions

func ChangeIntArrToStr

func ChangeIntArrToStr(top []int, npuCardPreName string) string

ChangeIntArrToStr Covert []int to string. Like [0,1] -> "Ascend910-0,Ascend910-1".

func ChangeNodesToNodeMaps

func ChangeNodesToNodeMaps(nodes []*api.NodeInfo) map[string]*api.NodeInfo

ChangeNodesToNodeMaps change nodes slice into node maps

func ChangeTopToIntArray

func ChangeTopToIntArray(topStr string, npuCardPreName string) []int

ChangeTopToIntArray Change npu card ids from string to int array.

func CheckPodNameOrSpace

func CheckPodNameOrSpace(checkItem, podParam string, maxLength int) error

CheckPodNameOrSpace check pod name or pod namespace

func CheckStrInSlice

func CheckStrInSlice(str string, slice []string) bool

CheckStrInSlice return whether str in string slice

func ConvertErrSliceToError

func ConvertErrSliceToError(reErrors []error) error

ConvertErrSliceToError convert []error to one error.

func GetActivePodUsedDevFromNode

func GetActivePodUsedDevFromNode(nodeInfo *api.NodeInfo, devType string) []string

GetActivePodUsedDevFromNode get active pod used device from node

func GetAvailableDevInfo

func GetAvailableDevInfo(devList map[string]string) (string, []string)

GetAvailableDevInfo get available device info from device list

func GetDeviceType

func GetDeviceType(devList map[string]string) string

GetDeviceType get device type from dev list

func GetNodeDevListFromAnno

func GetNodeDevListFromAnno(nodeInfo *api.NodeInfo) ([]string, error)

GetNodeDevListFromAnno get node device list from annotation

func GetNpuNameFromJobRequire

func GetNpuNameFromJobRequire(npuName string) string

GetNpuNameFromJobRequire get npuName,if job require name is npu-core return huawei.com/Ascend310P

func GetRecoveringDevInfo

func GetRecoveringDevInfo(devList map[string]string) (string, []string)

GetRecoveringDevInfo get recovering device info from device list

func GetTaskInfoByNameFromSSN

func GetTaskInfoByNameFromSSN(ssn *framework.Session, taskName, taskNamespace string) (*api.TaskInfo, error)

GetTaskInfoByNameFromSSN get corresponding api.TaskInfo object by given taskName

func GetUnhealthyDevInfo

func GetUnhealthyDevInfo(devList map[string]string) (string, []string)

GetUnhealthyDevInfo get unhealthy device info from device list

func GetVTaskUseTemplate

func GetVTaskUseTemplate(taskInf *api.TaskInfo) (string, error)

GetVTaskUseTemplate the format is : 0-vir04-3c_ndvpp,0-vir0

func IsMapHasNPUResource

func IsMapHasNPUResource(resMap map[v1.ResourceName]float64, npuName string) bool

IsMapHasNPUResource Determines whether a target string exists in the map.

func IsNPUTask

func IsNPUTask(nT *api.TaskInfo) bool

IsNPUTask to judge the task either is NPU task or not.

func IsNodeReady

func IsNodeReady(node *v1.Node) bool

IsNodeReady returns the node ready status

func IsSliceContain

func IsSliceContain(keyword interface{}, targetSlice interface{}) bool

IsSliceContain judges whether keyword in targetSlice

func IsStrategyInSubHealthyStrategs

func IsStrategyInSubHealthyStrategs(subHealthyStrategy string) bool

IsStrategyInSubHealthyStrategs to judge the subHealthyStrategy is in subHealthyStrategs or not.

func MakeDataHash

func MakeDataHash(data interface{}) string

MakeDataHash check code for configmap

func Max

func Max(x, y int) int

Max return the bigger one

func Min

func Min(x, y int) int

Min return the smaller one

func PtrInit

func PtrInit[T any](v T) *T

PtrInit return base type ptr

func ReferenceNameOfJob

func ReferenceNameOfJob(job *api.JobInfo) string

ReferenceNameOfJob get name of job

func RemoveCommonElement

func RemoveCommonElement(s1, s2 []int) []int

RemoveCommonElement remove common element from s1

func RemoveSliceDuplicateElement

func RemoveSliceDuplicateElement(languages []string) []string

RemoveSliceDuplicateElement remove duplicate element in slice

func SafePrint

func SafePrint(args ...interface{}) string

SafePrint safe print error

func SortByNumericValue

func SortByNumericValue(s []string)

SortByNumericValue sort string

func UuidOfJob

func UuidOfJob(job *api.JobInfo) types.UID

UuidOfJob get uid of job

Types

type ComJob

type ComJob struct {
	Name          api.JobID
	ReferenceName string
	NameSpace     string
	Status        string
	Annotation    map[string]string
	Selector      map[string]string
	Label         map[string]string
}

ComJob all vcJob has.

type Device

type Device struct {
	DeviceID      string `json:"device_id"` // device id
	DeviceIP      string `json:"device_ip"` // device ip
	SuperDeviceID string `json:"super_device_id,omitempty"`
}

Device id for Instcance

type Instance

type Instance struct {
	PodName    string   `json:"pod_name"`  // pod Name
	ServerID   string   `json:"server_id"` // serverdId
	SuperPodId int32    `json:"super_pod_id"`
	Devices    []Device `json:"devices"` // dev
}

Instance is for annotation

type NPUJob

type NPUJob struct {
	// the mapKey is taskID, not Name.
	Tasks              map[api.TaskID]NPUTask
	NPUTaskNum         int
	SchedulingTaskNum  int
	ReqNPUName         string
	ReqNPUNum          int
	SpBlockNPUNum      int
	SubHealthyStrategy string
}

NPUJob only npu vcJob have.

func (*NPUJob) GetSchedulingTaskNum

func (nJob *NPUJob) GetSchedulingTaskNum() int

GetSchedulingTaskNum get the num of scheduling task

func (*NPUJob) IsNPUJob

func (nJob *NPUJob) IsNPUJob() bool

IsNPUJob Determine whether is the NPU job. Dynamic segmentation: huawei.com/npu-core. static segmentation: huawei.com/Ascend910-Y. no segmentation: huawei.com/Ascend910.

func (*NPUJob) IsVJob

func (nJob *NPUJob) IsVJob() bool

IsVJob Determine whether is the NPU virtual job. Dynamic segmentation: huawei.com/npu-core. static segmentation: huawei.com/Ascend910-Y. no segmentation: huawei.com/Ascend910.

type NPUTask

type NPUTask struct {
	Name       string
	NameSpace  string
	ReqNPUName string
	ReqNPUNum  int
	Annotation map[string]string
	Label      map[string]string
	NodeName   string
	PodStatus  v1.PodPhase
	Index      int
	*VTask
}

NPUTask for npu task need.

func (*NPUTask) DeleteRealPodByTask

func (asTask *NPUTask) DeleteRealPodByTask(ssn *framework.Session, waitTime int64) error

DeleteRealPodByTask generally used by force deletion

func (*NPUTask) EvictJobByTask

func (asTask *NPUTask) EvictJobByTask(ssn *framework.Session, reason string, taskName string) error

EvictJobByTask generally used by grace deletion

func (*NPUTask) ForceDeletePodByTaskInf

func (asTask *NPUTask) ForceDeletePodByTaskInf(ssn *framework.Session, reason string, nodeName string) error

ForceDeletePodByTaskInf Force delete pod by taskInf.

func (*NPUTask) InitVTask

func (asTask *NPUTask) InitVTask(taskInf *api.TaskInfo) error

InitVTask init vNPU task.

func (*NPUTask) IsNPUTask

func (asTask *NPUTask) IsNPUTask() bool

IsNPUTask Determine whether is the NPU task. Dynamic segmentation: huawei.com/npu-core. static segmentation: huawei.com/Ascend910-Y. no segmentation: huawei.com/Ascend910.

func (*NPUTask) IsTaskInItsNode

func (asTask *NPUTask) IsTaskInItsNode(ssn *framework.Session, nodeName string) bool

IsTaskInItsNode check if task is on the node

func (*NPUTask) IsVNPUTask

func (asTask *NPUTask) IsVNPUTask() bool

IsVNPUTask Determine whether is the NPU virtual task. Dynamic segmentation: huawei.com/npu-core. no segmentation: huawei.com/Ascend910.

func (*NPUTask) UpdatePodPendingReason

func (asTask *NPUTask) UpdatePodPendingReason(taskInfo *api.TaskInfo, reasonTmp string) error

UpdatePodPendingReason update pod pending reason.

type NpuBaseInfo

type NpuBaseInfo struct {
	IP            string
	SuperDeviceID uint32
}

NpuBaseInfo npu base info

type SchedulerJobAttr

type SchedulerJobAttr struct {
	ComJob
	*NPUJob
}

SchedulerJobAttr vcJob's attribute.

func (SchedulerJobAttr) GetPluginNameByReq

func (sJob SchedulerJobAttr) GetPluginNameByReq() string

GetPluginNameByReq get plugin name by job request resource name

func (*SchedulerJobAttr) IsJobHasTorAffinityLabel

func (sJob *SchedulerJobAttr) IsJobHasTorAffinityLabel() bool

IsJobHasTorAffinityLabel check job has tor affinity label

func (SchedulerJobAttr) IsLargeModelJob

func (sJob SchedulerJobAttr) IsLargeModelJob() bool

IsLargeModelJob job is large model job

func (*SchedulerJobAttr) IsTorAffinityJob

func (sJob *SchedulerJobAttr) IsTorAffinityJob() bool

IsTorAffinityJob check job is tor affinity job

type TaskAllocated

type TaskAllocated struct {
	// like ubuntu
	NodeName string
	// element like 1
	CardName []int
	// element like Ascend310P-2c-100-1
	PhysicsName []string
}

TaskAllocated Task allocated struct.

type VResource

type VResource struct {
	Aicore int
	Aicpu  int
	DVPP   string
}

VResource resource dimensions

func (*VResource) Add

func (vResource *VResource) Add(resource VResource)

Add add resource

func (VResource) BeGreater

func (vResource VResource) BeGreater(resource VResource) bool

BeGreater judge resource greater or equal to

func (*VResource) Sub

func (vResource *VResource) Sub(resource VResource)

Sub sub resource

type VTask

type VTask struct {
	// TASK_STATUS_INIT...
	Status    int
	Allocated TaskAllocated
}

VTask virtual NPU task struct.

type VTemplate

type VTemplate struct {
	// ChipKind Ascend910/Ascend310P
	ChipKind   string
	AICore     int
	AICPU      int
	DVPPEnable string
}

VTemplate for vNode resource

Source Files

  • constants.go
  • job.go
  • task.go
  • type.go
  • util.go

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL