rau.tasks

rau.tasks.common.add_prepare_data_args(parser)
rau.tasks.common.validate_prepare_data_args(parser, args)
rau.tasks.common.get_token_types(tokens, unk_string)
rau.tasks.common.get_token_types_in_file(path, unk_string)
rau.tasks.common.prepare_file(vocab, pair)
rau.tasks.common.load_prepared_data_file(path)
Return type:

list[Tensor]

rau.tasks.common.pad_sequences(sequences, device, pad, bos=None, eos=None, return_lengths=False)
Return type:

Tensor | tuple[Tensor, Tensor]

rau.tasks.common.add_training_loop_arguments(parser, max_tokens_per_batch_help)
Return type:

None

class rau.tasks.common.TrainingLoop

Bases: Generic[Example, PreparedBatch, VocabularyContainer]

TrainingLoop(max_epochs: int, random_shuffling_seed: int, max_tokens_per_batch: int, optimizer: Literal[‘SGD’, ‘Adam’, ‘AdamW’], initial_learning_rate: float, weight_decay: float | None, label_smoothing_factor: float | None, gradient_clipping_threshold: float | None, early_stopping_patience: int, learning_rate_schedule_type: Literal[‘reduce-on-plateau’, ‘linear-with-warmup’], learning_rate_patience: int | None, learning_rate_decay_factor: float | None, learning_rate_warmup_examples: int | None, examples_per_checkpoint: int, every_n_examples: list[tuple[int, str]])

__init__(max_epochs, random_shuffling_seed, max_tokens_per_batch, optimizer, initial_learning_rate, weight_decay, label_smoothing_factor, gradient_clipping_threshold, early_stopping_patience, learning_rate_schedule_type, learning_rate_patience, learning_rate_decay_factor, learning_rate_warmup_examples, examples_per_checkpoint, every_n_examples)
static check_args(parser, args)
Return type:

None

evaluate(model, model_interface, batches)
Return type:

dict[str, float]

evaluate_batch(model, model_interface, prepared_batch)
Return type:

dict[str, tuple[float, float]]

generate_batches(examples, max_tokens)

Given the full list of examples in a dataset and a maximum size, group those examples into minibatches.

Return type:

Iterable[list[TypeVar(Example)]]

get_linear_with_warmup_lr_scheduler(optimizer)
Return type:

LinearWithWarmupLRScheduler

get_loss(model, model_interface, prepared_batch)

Return a differentiable tensor representing the loss function to be optimized.

Return type:

tuple[Tensor, float] | dict[str, tuple[Tensor, float] | tuple[Tensor, float, float]]

get_lr_scheduler(optimizer)
Return type:

tuple[LRScheduler | None, PerUpdateLRScheduler | None]

get_optimizer(model)
Return type:

Optimizer

get_prepared_batch_and_loss(saver, model_interface, batch)
Return type:

tuple[TypeVar(PreparedBatch), Tensor, float, float, dict[str, tuple[float, float]]]

get_prepared_batch_info(prepared_batch)
Return type:

dict[str, Any]

get_reduce_on_plateau_lr_scheduler(optimizer)
Return type:

ReduceLROnPlateau

classmethod get_state(parser, args, saver, device)
Return type:

TrainingLoopState | None

get_validation_metric_mode()

Return whether the validation metric is supposed to go up (max) or down (min).

Return type:

Literal['min', 'max']

get_validation_metric_name()

Return the name of the validation set metric used for early stopping and learning rate scheduling.

Return type:

str

handle_out_of_cuda_memory(vocabulary, batch, info, device, console_logger, event_logger)
Return type:

None

initial_state(saver, device)
Return type:

TrainingLoopState

log_failed_batch(vocabulary, batch, info, console_logger, event_logger)
Return type:

dict[str, Any]

run(state, saver, model_interface, training_data, validation_data, vocabulary, console_logger, event_logger, show_progress, time_limit=None, fail_after_examples=None)

NOTE: After training runs to completion, the model’s parameters will be those of the last epoch, not necessarily the best epoch. However, the saved model will be the best one.

Return type:

None

run_parameter_update(saver, model_interface, optimizer, batch)
Return type:

tuple[float, float, dict[str, tuple[float, float]]]

save_config(saver)
Return type:

None

max_epochs: int
random_shuffling_seed: int
max_tokens_per_batch: int
optimizer: Literal['SGD', 'Adam', 'AdamW']
initial_learning_rate: float
weight_decay: float | None
label_smoothing_factor: float | None
gradient_clipping_threshold: float | None
early_stopping_patience: int
learning_rate_schedule_type: Literal['reduce-on-plateau', 'linear-with-warmup']
learning_rate_patience: int | None
learning_rate_decay_factor: float | None
learning_rate_warmup_examples: int | None
examples_per_checkpoint: int
every_n_examples: list[tuple[int, str]]
rau.tasks.common.get_random_generator_and_seed(random_seed)
rau.tasks.common.get_random_seed(random_seed)
rau.tasks.common.evaluate(model, model_interface, batches, evaluate_batch)
Return type:

dict[str, float]

exception rau.tasks.common.OutOfCUDAMemoryError

Bases: RuntimeError

OutOfCUDAMemoryError(info: dict[str, typing.Any])

__init__(info)
info: dict[str, Any]
class rau.tasks.common.MicroAveragedScoreAccumulator

Bases: object

__init__()
get_value()
Return type:

float

update(numerator, denominator)
Return type:

None

class rau.tasks.common.DictScoreAccumulator

Bases: object

__init__()
get_value()
Return type:

dict[str, float]

update(scores)
Return type:

None