���x����ɂ��A�ߔN���p���L�܂��Ă���u�j���[�����@�B�|��v�B���̎d�g�݂��A�����œ������Ȃ���w��ł݂܂��傤�B��2��̓��[�X�P�[�X���ƂɁuJoeyNMT�v���J�X�^�}�C�Y������@��ADiscord�̃`���b�gbot�ɑg�ݍ��ޕ��@��������܂��B
���̋L���͉������ł��B����o�^�i�����j����ƑS�Ă������������܂��B
�@�n�C�f���x���N��w�̔��m�ے��ɍݐЂ��Ȃ���A���y�Ƃ�����ЂŁu�����N�[���v�̊J���Ɍg����Ă��鑾�c�ł��B�����N�[���́AAI�|��|�̕ҏW�A�h�L�������g�̋��L�A�|���Ђւ̔����܂ł�1�ɂ���|��v���b�g�t�H�[���ł��B
�@��1���́A�@�B�|��t���[�����[�N�uJoeyNMT�v�̊T�v�A�C���X�g�[�����@�A���f�����P��������@���Љ�܂����B����́AJoeyNMT���J�X�^�}�C�Y������@����̓I�ȃ��[�X�P�[�X�������Ȃ���Љ�܂��B
�@JoeyNMT�́A���̃t���[�����[�N�ɔ�ׂăR�[�h�̍s����9�`10����1�A�t�@�C�����ł�4�`5����1�i��1�j�Ƃ����~�j�}���Ȏ����������ŁA�j�ƂȂ郂�W���[���͂�����������Ă��܂��B�@�B�w�K����ɂ����鑽���̃x���`�}�[�N��SOTA�iState-of-the-Art�j�ɕC�G�����x���`�}�[�N�X�R�A���o���Ă��܂��B�܂��f�o�b�O����stack trace�����ǂ�ہA�t���b�g�ȃf�B���N�g���\���̂������Ŗ��킸�ɃG���[�ӏ���T�����Ă���̂������b�g�ł��B
��1�FOpenNMT-py�AXNMT�Ƃ̔�r�ł��B�ڍׂ́uJoey NMT: A Minimalist NMT Toolkit for Novices�v���Q�Ƃ��Ă��������B
�@����ł́A���[�X�P�[�X���Ƃ�JoeyNMT���J�X�^�}�C�Y������@�����Ă����܂��傤�B
�@JoeyNMT�̓f�t�H���g�Łusubword-nmt�v�usentencepiece�v�Ƃ���2�̃T�u���[�h�g�[�N�i�C�U�[�ɑΉ����Ă��܂��B�ł́A�ʂ̃g�[�N�i�C�U�[�𗘗p�������ꍇ�͂ǂ�����悢�ł��傤���B
�@�g�[�N�i�C�U�[�́ujoeynmt/tokenizers.py�v�Œ�`�ł��܂��B��Ƃ��āA�ufastBPE�v��V�����������Ă݂܂��傤�B
�@fastBPE��subword-nmt��c++�Ŏ����������C�u�����ł��B�uSubwordNMTTokenizer�v�N���X���p�����邱�Ƃɂ��܂��B
class FaseBPETokenizer(SubwordNMTTokenizer): def __init__(self, ...): try: # fastBPE���C�u�������C���|�[�g import fastBPE except ImportError as e: logger.error(e) raise ImportError from e super().__init__(level, lowercase, normalize, [...], **kwargs) assert self.level == "bpe" # codes_path ���擾 self.codes: Path = Path(kwargs["codes_path"]) assert self.codes.is_file(), f"codes file {self.codes} not found." # fastBPE�I�u�W�F�N�g self.bpe = fastBPE.fastBPE(self.codes) def __call__(self, raw_input: str, is_train: bool = False) -> List[str]: #�@fastBPE.apply() tokenized = self.bpe.apply([raw_input]) tokenized = tokenized[0].strip().split() # �n��̒������w��͈͓̔��ɂ����܂��Ă��邩�m�F if is_train and self._filter_by_length(len(tokenized)): return None return tokenized
�@�����fastBPE�ł̃g�[�N�i�C�Y���ł���悤�ɂȂ�܂����B�ݒ�t�@�C���Łutokenizer_type: "fastbpe"�v�ƑI���ł���悤�ɂ��邽�߁u_build_tokenizer()�v�ŁuFaseBPETokenizer�v���Ăяo����悤�ɂ��܂��B
def _build_tokenizer(cfg: Dict) -> BasicTokenizer: [...] if tokenizer_type == "sentencepiece": [...] elif tokenizer_type == "subword-nmt": [...] elif tokenizer_type == "fastbpe": assert "codes_path" in tokenizer_cfg tokenizer = FaseBPETokenizer( level=cfg["level"], lowercase=cfg.get("lowercase", False), normalize=cfg.get("normalize", False), max_length=cfg.get("max_length", -1), min_length=cfg.get("min_length", -1), **tokenizer_cfg, )
�@fastBPE�ɂ�codes�t�@�C�����K�v�ł��̂Łucodes_path�v���ݒ�t�@�C���Ŏw�肳��Ă��邱�Ƃ��m�F���܂��傤�B���������uFaseBPETokenizer�v�I�u�W�F�N�g��Ԃ��悤�ɂ��Ă��܂��B
�g�[�N�i�C�U�[�́u__call__()�v�́A�f�[�^�Z�b�g����C���X�^���X�����o���ۂɌĂяo����܂��B�Ⴆ�uPlaintextDataset�v�ł́A�uget_item()�v���ŌĂяo����Ă��܂��B
def get_item(self, idx: int, lang: str, is_train: bool = None): [...] item = self.tokenizer[lang](line, is_train=is_train) return item
�@�܂�A�P���A�\�����́ufor batch in data_iterator:�v�̃C�e���[�V�����Łu__getitem__()�v���R�[������邽�тɃg�[�N�i�C�Y�̊����Ăяo����邱�ƂɂȂ�܂��B����́ABPE dropout���\�ɂ��邽�߂̎����ł��B�����A�V������������g�[�N�i�C�U�[���d���v�Z��K�v�Ƃ�����A�������܂����l��Ԃ����肷��̂ł���A�f�[�^�ǂݍ��ݎ��ɌĂяo�����upre_process()�v�Ńg�[�N�i�C�Y���邱�Ƃ��������Ă��������i�uBaseTokenizer�v�ɂ���uMosesTokenizer�v�𗘗p�������O�����̎������Q�l�ɂȂ�܂��j�B
�@JoeyNMT�́utorch.optim.lr_scheduler�v�ɓ����Ă���uReduceLROnPlateau�v�uStepLR�v�uExponentialLR�v�̑��Atransformer�ł悭�g����unoam�X�P�W���[���[�v���������Ă��܂��B�ʂ̊w�K���X�P�W���[���[���g�������ꍇ�͂ǂ�������悢�ł��傤���H
�@�w�K���X�P�W���[���[�́ujoeynmt/builders.py�v�Œ�`�ł��܂��B��Ƃ��āAInverse Square Root�X�P�W���[�������Ă݂܂��B
class BaseScheduler: def step(self, step): """�w�K�����X�V""" self._step = step + 1 rate = self._compute_rate() for p in self.optimizer.param_groups: p["lr"] = rate self._rate = rate def _compute_rate(self): raise NotImplementedError
�@�uBaseScheduler�v�N���X�ɁA���̃X�e�b�v�ł̊w�K�����I�v�e�B�}�C�U�̃p�����[�^�[�ɓn����������������Ă��܂��B�w�K�����v�Z����u_compute_rate()�v�����I�[�o�[���C�h���܂��B
�@Inverse Square Root�X�P�W���[���́A�X�e�b�v���̓�捪�ɔ���Ⴗ��悤�Ɋw�K�������������܂��B�����āAwarmup�̊��Ԃ́A�w�K�������`�ɑ�������悤�ɂ��Awarmup�̏I���ŗ^����ꂽ�w�K���ɓ��B����悤�W���idecay_rate�j�߂��܂��B
class WarmupInverseSquareRootScheduler(BaseScheduler): def __init__( self, optimizer: torch.optim.Optimizer, peak_rate: float = 1.0e-3, warmup: int = 10000, min_rate: float = 1.0e-5, ): super().__init__(optimizer) self.warmup = warmup self.min_rate = min_rate self.peak_rate = peak_rate self.decay_rate = peak_rate * (warmup ** 0.5) def _compute_rate(self): if step < self.warmup: # ���`�ɑ��� rate = self._step * self.peak_rate / self.warmup else: # 2��̃��[�g�ɔ���� rate = self.decay_rate * (self._step ** -0.5) return max(rate, self.min_rate)
�@��������Inverse Square Root�X�P�W���[���[��ݒ�t�@�C������I���ł���悤�Ɂubuild_scheduler()�v��ύX���܂��B
def build_scheduler(): [...] if scheduler_name == "plateau": [...] elif scheduler_name == "decaying": [...] elif scheduler_name == "exponential": [...] elif scheduler_name == "noam": [...] elif scheduler_name == "warmupinversesquareroot": scheduler = WarmupInverseSquareRootScheduler( optimizer=optimizer, peak_rate=config.get("learning_rate", 1.0e-3), min_rate=config.get("learning_rate_min", 1.0e-5), warmup=config.get("learning_rate_warmup", 10000), ) scheduler_step_at = "step"
�@�P����r���Œ��f�����ہA���̒��f�����Ƃ��납��ĊJ�ł���悤�A�w�K���̕ϐ����`�F�b�N�|�C���g�ɕۑ����Ă��܂��B�X�P�W���[���[�ŕۑ����ׂ��ϐ����قȂ邽�߁A�X�P�W���[���[���ƂɁA�ǂ̕ϐ���ۑ�����̂����w�肷��K�v������܂��B
�@Inverse Square Root�X�P�W���[���[�̏ꍇ�A�f�t�H���g�ŕۑ������X�e�b�v���Ƃ��̃X�e�b�v���̊w�K���ɉ����āuwarmup�v�udecay_rate�v�upeak_rate�v�umin_rate�v��ۑ����܂��B
class WarmupInverseSquareRootScheduler(BaseScheduler): [...] def state_dict(self): super().state_dict() self._state_dict["warmup"] = self.warmup self._state_dict["peak_rate"] = self.peak_rate self._state_dict["decay_rate"] = self.decay_rate self._state_dict["min_rate"] = self.min_rate return self._state_dict def load_state_dict(self, state_dict): super().load_state_dict(state_dict) self.warmup = state_dict["warmup"] self.decay_rate = state_dict["decay_rate"] self.peak_rate = state_dict["peak_rate"] self.min_rate = state_dict["min_rate"]
�@�@�B�|��ł͑����̏ꍇ�A�����G���g���s�[���������Ƃ��Ďg���Ă���AJoeyNMT�ł��f�t�H���g�ɂȂ��Ă��܂��B���������J�X�^�}�C�Y�������ꍇ�A�ǂ�����悢�ł��傤���H
�@�������́ujorynmt/loss.py�v�Œ�`�ł��܂��B��3��ŗ\�肵�Ă��鉹���|��ŕK�v�ƂȂ�uCTC Loss�v�ƌĂ�鑹�������A�������肵�Ă����œ������Ă݂܂��傤�B�����́uXentLoss�v�N���X���p�����ĐV�����N���X�uXentCTCLoss�v�����APyTorch�Ŏ�������Ă���CTC Loss���Ăяo���܂��B
�@CTC Loss���v�Z����ɂ́Ablank�����ȃg�[�N���Ƃ��Ĉ����K�v������A����blank�̂��߂̃g�[�N��ID���w�肵�Ȃ���Ȃ�܂���B�V����blank�g�[�N�����`���Ă��悢�̂ł����A�����BOS�g�[�N���u<s>�v�ő�p���邱�Ƃɂ��܂��B
class XentCTCLoss(XentLoss): def __init__(self, pad_index: int, bos_index: int, smoothing: float = 0.0, zero_infinity: bool = True, ctc_weight: float = 0.3 ): super().__init__(pad_index=pad_index, smoothing=smoothing) self.bos_index = bos_index self.ctc_weight = ctc_weight self.ctc = nn.CTCLoss(blank=bos_index, reduction='sum')
�@�uXentCTCLoss�v�ł́A���łɂ�������G���g���s�[��CTC�̏d�ݕt���a��Ԃ��悤�ɂ��܂��B
class XentCTCLoss(XentLoss): def forward(self, log_probs, **kwargs) -> Tuple[Tensor, Tensor, Tensor]: # CTC Loss �̌v�Z�ɕK�v�ȏ��kwargs�ɓ����Ă��邱�Ƃ��m�F assert "trg" in kwargs assert "trg_length" in kwargs assert "src_mask" in kwargs assert "ctc_log_probs" in kwargs # �����G���g���s�[���v�Z�ł���悤�ɕό` log_probs_flat, targets_flat = self._reshape(log_probs, kwargs["trg"]) # �����G���g���s�[���v�Z xent_loss = self.criterion(log_probs_flat, targets_flat) # CTC�������v�Z ctc_loss = self.ctc( kwargs["ctc_log_probs"].transpose(0, 1).contiguous(), targets=kwargs["trg"], # (seq_length, batch_size) input_lengths=kwargs["src_mask"].squeeze(1).sum(dim=1), target_lengths=kwargs["trg_length"] ) # �����G���g���s�[��CTC�̏d�ݕt���a���v�Z total_loss = (1.0 - self.ctc_weight) * xent_loss + self.ctc_weight * ctc_loss assert total_loss.item() >= 0.0, "loss has to be non-negative." return total_loss, xent_loss, ctc_loss
�@�������́A���f���́uforward()�v�ŌĂ�܂��B�ujoeynmt/model.py�v�̊Y��������ύX���uXentCTCLoss�v���Ăяo����悤�ɂ��܂��B
class Model(nn.Module): def forward(self, return_type: str = None, **kwargs): [...] # �ʏ�̃f�R�[�_�[�o�͂̑��ACTC�̂��߂̃��C���[����̃f�R�[�_�[�o�͂��擾 out, ctc_out = self._encode_decode(**kwargs) # �f�R�[�_�[�o�͂ɑ��Alog_softmax�i�e�g�[�N���̊m���j���v�Z log_probs = F.log_softmax(out, dim=-1) # �o�b�`���Ƃɑ������v�Z if isinstance(self.loss_function, XentCTCLoss): # CTC���C���[����̏o�͂ɂ��Ă��Alog_softmax���v�Z kwargs["ctc_log_probs"] = F.log_softmax(ctc_out, dim=-1) # XentCTCLoss��forward()���Ăяo�� total_loss, nll_loss, ctc_loss = self.loss_function(log_probs, **kwargs) [...]
�@�o�b�N�v���p�Q�[�V�����Ɏg����̂͏d�ݕt���a�ł���utotal_loss�v�����ł����A���ꂼ��̑������̊w�K�Ȑ����v���b�g���邽�߁A�unll_loss�v�uctc_loss�v���Ԃ��悤�ɂ��Ă��܂��B
�@�f�R�[�_�[�ijoeynmt/decoders.py�j�ɁACTCLoss�̌v�Z�̂��߂̃��C���[��lj����܂����B
class TransformerDecoder(Decoder): def __init__(self, ...): [...] self.ctc_output_layer = nn.Linear(encoder_output_size, vocab_size, bias=False) def forward(self, ...): [...] out = self.output_layer(x) ctc_output = self.ctc_output_layer(encoder_output) return out, x, att, None, ctc_output class Model(nn.Module): def _encode_decode(self, ...): [...] out, x, att, _, ctc_out = self._decode(...) return out, ctc_out
�@�@�B�|��̏o�͌��ʂł悭����̂��A�J��Ԃ��ł��B�Ⴆ�A�z�z���Ă���p�����f����p����wmt20�e�X�g�Z�b�g�ŁA�ȉ��̂悤�ȏo�͂��m�F���܂����B
���́F"He begged me, "grandma, let me stay, don't do this to me, don't send me back,"" Hernandez said.
�o�́F�u���������A���������A���������A���������A���������A���������A���������A���������A���������A���������A���������A���������A���������A���������A���������A���������A���������A���������A���������A���������v
Copyright © ITmedia, Inc. All Rights Reserved.