StarPU Handbook
Scheduling Context Hypervisor - Building a new resizing policy

Data Structures

struct  sc_hypervisor_policy
 
struct  sc_hypervisor_policy_config
 
struct  sc_hypervisor_wrapper
 
struct  sc_hypervisor_resize_ack
 
struct  sc_hypervisor_policy_task_pool
 

Macros

#define STARPU_HYPERVISOR_TAG
 

Functions

void sc_hypervisor_post_resize_request (unsigned sched_ctx, int task_tag)
 
unsigned sc_hypervisor_get_size_req (unsigned **sched_ctxs, int *nsched_ctxs, int **workers, int *nworkers)
 
void sc_hypervisor_save_size_req (unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
 
void sc_hypervisor_free_size_req (void)
 
unsigned sc_hypervisor_can_resize (unsigned sched_ctx)
 
struct sc_hypervisor_policy_configsc_hypervisor_get_config (unsigned sched_ctx)
 
void sc_hypervisor_set_config (unsigned sched_ctx, void *config)
 
unsigned * sc_hypervisor_get_sched_ctxs ()
 
int sc_hypervisor_get_nsched_ctxs ()
 
struct sc_hypervisor_wrappersc_hypervisor_get_wrapper (unsigned sched_ctx)
 
double sc_hypervisor_get_elapsed_flops_per_sched_ctx (struct sc_hypervisor_wrapper *sc_w)
 

Detailed Description


Data Structure Documentation

◆ sc_hypervisor_policy

struct sc_hypervisor_policy

This structure contains all the methods that implement a hypervisor resizing policy.

Data Fields

const char * name
 
unsigned custom
 
void(* size_ctxs )(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
 
void(* resize_ctxs )(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
 
void(* handle_idle_cycle )(unsigned sched_ctx, int worker)
 
void(* handle_pushed_task )(unsigned sched_ctx, int worker)
 
void(* handle_poped_task )(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
 
void(* handle_idle_end )(unsigned sched_ctx, int worker)
 
void(* handle_post_exec_hook )(unsigned sched_ctx, int task_tag)
 
void(* handle_submitted_job )(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, size_t data_size)
 
void(* end_ctx )(unsigned sched_ctx)
 

Field Documentation

◆ name

sc_hypervisor_policy::name

Indicates the name of the policy, if there is not a custom policy, the policy corresponding to this name will be used by the hypervisor

◆ custom

sc_hypervisor_policy::custom

Indicates whether the policy is custom or not

◆ size_ctxs

sc_hypervisor_policy::size_ctxs

Distribute workers to contexts even at the beginning of the program

◆ resize_ctxs

sc_hypervisor_policy::resize_ctxs

Require explicit resizing

◆ handle_idle_cycle

sc_hypervisor_policy::handle_idle_cycle

It is called whenever the indicated worker executes another idle cycle in sched_ctx

◆ handle_pushed_task

sc_hypervisor_policy::handle_pushed_task

It is called whenever a task is pushed on the worker’s queue corresponding to the context sched_ctx

◆ handle_poped_task

sc_hypervisor_policy::handle_poped_task

It is called whenever a task is poped from the worker’s queue corresponding to the context sched_ctx

The hypervisor takes a decision when another task was poped from this worker in this ctx

◆ handle_idle_end

sc_hypervisor_policy::handle_idle_end

It is called whenever a task is executed on the indicated worker and context after a long period of idle time

◆ handle_post_exec_hook

sc_hypervisor_policy::handle_post_exec_hook

It is called whenever a tag task has just been executed. The table of resize requests is provided as well as the tag

◆ handle_submitted_job

sc_hypervisor_policy::handle_submitted_job

The hypervisor takes a decision when a job was submitted in this ctx

◆ end_ctx

sc_hypervisor_policy::end_ctx

The hypervisor takes a decision when a certain ctx was deleted

◆ sc_hypervisor_policy_config

struct sc_hypervisor_policy_config

This structure contains all configuration information of a context. It contains configuration information for each context, which can be used to construct new resize strategies.

Data Fields
int min_nworkers

Indicates the minimum number of workers needed by the context

int max_nworkers

Indicates the maximum number of workers needed by the context

int granularity

Indicates the workers granularity of the context

int priority[STARPU_NMAXWORKERS]

Indicates the priority of each worker in the context

double max_idle[STARPU_NMAXWORKERS]

Indicates the maximum idle time accepted before a resize is triggered

double min_working[STARPU_NMAXWORKERS]

Indicates that underneath this limit the priority of the worker is reduced

int fixed_workers[STARPU_NMAXWORKERS]

Indicates which workers can be moved and which ones are fixed

double new_workers_max_idle

Indicates the maximum idle time accepted before a resize is triggered for the workers that just arrived in the new context

double ispeed_w_sample[STARPU_NMAXWORKERS]

Indicates the sample used to compute the instant speed per worker

double ispeed_ctx_sample

Indicates the sample used to compute the instant speed per ctxs

double time_sample

todo

◆ sc_hypervisor_wrapper

struct sc_hypervisor_wrapper

This structure is a wrapper of the contexts available in StarPU and contains all information about a context obtained by incrementing the performance counters.

Data Fields
unsigned sched_ctx

The context wrapped

struct
sc_hypervisor_policy_config *
config

The corresponding resize configuration

double start_time_w[STARPU_NMAXWORKERS]
double current_idle_time[STARPU_NMAXWORKERS]

The idle time counter of each worker of the context

double idle_time[STARPU_NMAXWORKERS]

The time the workers were idle from the last resize

double idle_start_time[STARPU_NMAXWORKERS]

The moment when the workers started being idle

double exec_time[STARPU_NMAXWORKERS]
double exec_start_time[STARPU_NMAXWORKERS]
int worker_to_be_removed[STARPU_NMAXWORKERS]

The list of workers that will leave this contexts (lazy resizing process)

int pushed_tasks[STARPU_NMAXWORKERS]

The number of pushed tasks of each worker of the context

int poped_tasks[STARPU_NMAXWORKERS]

The number of poped tasks of each worker of the context

double total_flops

The total number of flops to execute by the context

double total_elapsed_flops[STARPU_NMAXWORKERS]

The number of flops executed by each workers of the context

double elapsed_flops[STARPU_NMAXWORKERS]

The number of flops executed by each worker of the context from last resize

size_t elapsed_data[STARPU_NMAXWORKERS]

The quantity of data (in bytes) used to execute tasks on each worker in this ctx

int elapsed_tasks[STARPU_NMAXWORKERS]

The nr of tasks executed on each worker in this ctx

double ref_speed[2]

The average speed of the workers (type of workers) when they belonged to this context 0 - cuda 1 - cpu

double submitted_flops

The number of flops submitted to this ctx

double remaining_flops

The number of flops that still have to be executed by the workers in the context

double start_time

The time when he started executed

double real_start_time

The first time a task was pushed to this context

double hyp_react_start_time
struct sc_hypervisor_resize_ack resize_ack

The structure confirming the last resize finished and a new one can be done

starpu_pthread_mutex_t mutex

The mutex needed to synchronize the acknowledgment of the workers into the receiver context

unsigned total_flops_available

A boolean indicating if the hypervisor can use the flops corresponding to the entire execution of the context

unsigned to_be_sized
unsigned compute_idle[STARPU_NMAXWORKERS]
unsigned compute_partial_idle[STARPU_NMAXWORKERS]
unsigned consider_max

◆ sc_hypervisor_resize_ack

struct sc_hypervisor_resize_ack

This structures checks if the workers moved to another context are actually taken into account in that context.

Data Fields
int receiver_sched_ctx

The context receiving the new workers

int * moved_workers

The workers moved to the receiver context

int nmoved_workers

The number of workers moved

int * acked_workers

If the value corresponding to a worker is 1, this one is taken into account in the new context if 0 not yet

◆ sc_hypervisor_policy_task_pool

struct sc_hypervisor_policy_task_pool

task wrapper linked list

Data Fields
struct starpu_codelet * cl

Which codelet has been executed

uint32_t footprint

Task footprint key

unsigned sched_ctx_id

Context the task belongs to

unsigned long n

Number of tasks of this kind

size_t data_size

The quantity of data(in bytes) needed by the task to execute

struct
sc_hypervisor_policy_task_pool *
next

Other task kinds

Macro Definition Documentation

◆ STARPU_HYPERVISOR_TAG

#define STARPU_HYPERVISOR_TAG

todo

Function Documentation

◆ sc_hypervisor_post_resize_request()

void sc_hypervisor_post_resize_request ( unsigned  sched_ctx,
int  task_tag 
)

Requires resizing the context sched_ctx whenever a task tagged with the id task_tag finished executing

◆ sc_hypervisor_get_size_req()

unsigned sc_hypervisor_get_size_req ( unsigned **  sched_ctxs,
int *  nsched_ctxs,
int **  workers,
int *  nworkers 
)

Check if there are pending demands of resizing

◆ sc_hypervisor_save_size_req()

void sc_hypervisor_save_size_req ( unsigned *  sched_ctxs,
int  nsched_ctxs,
int *  workers,
int  nworkers 
)

Save a demand of resizing

◆ sc_hypervisor_free_size_req()

void sc_hypervisor_free_size_req ( void  )

Clear the list of pending demands of resizing

◆ sc_hypervisor_can_resize()

unsigned sc_hypervisor_can_resize ( unsigned  sched_ctx)

Check out if a context can be resized

◆ sc_hypervisor_get_config()

struct sc_hypervisor_policy_config * sc_hypervisor_get_config ( unsigned  sched_ctx)

Returns the configuration structure of a context

◆ sc_hypervisor_set_config()

void sc_hypervisor_set_config ( unsigned  sched_ctx,
void *  config 
)

Set a certain configuration to a contexts

◆ sc_hypervisor_get_sched_ctxs()

unsigned * sc_hypervisor_get_sched_ctxs ( )

Gets the contexts managed by the hypervisor

◆ sc_hypervisor_get_nsched_ctxs()

int sc_hypervisor_get_nsched_ctxs ( )

Gets the number of contexts managed by the hypervisor

◆ sc_hypervisor_get_wrapper()

struct sc_hypervisor_wrapper * sc_hypervisor_get_wrapper ( unsigned  sched_ctx)

Returns the wrapper corresponding the context sched_ctx

◆ sc_hypervisor_get_elapsed_flops_per_sched_ctx()

double sc_hypervisor_get_elapsed_flops_per_sched_ctx ( struct sc_hypervisor_wrapper sc_w)

Returns the flops of a context elapsed from the last resize