-
Notifications
You must be signed in to change notification settings - Fork 265
Description
Hi I want to ask what this error means:
Context prompt >>> def add():\n """1 add 2"""\n
Traceback (most recent call last):
File "generate.py", line 74, in
main()
File "generate.py", line 59, in main
generate_samples_interactive(
File "/gpt-neox/megatron/text_generation_utils.py", line 779, in generate_samples_interactive
generated_text = neox_args.tokenizer.detokenize(generated_tokens)
File "/gpt-neox/megatron/tokenizer/tokenizer.py", line 162, in detokenize
return self.tokenizer.decode(token_ids)
File "/gpt-neox/megatron/tokenizer/gpt2_tokenization.py", line 279, in decode
text = ''.join([self.decoder[token] for token in tokens])
File "/gpt-neox/megatron/tokenizer/gpt2_tokenization.py", line 279, in
text = ''.join([self.decoder[token] for token in tokens])
KeyError: 50286
Killing subprocess 2480
Traceback (most recent call last):
File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.8/dist-packages/deepspeed/launcher/launch.py", line 179, in
main()
File "/usr/local/lib/python3.8/dist-packages/deepspeed/launcher/launch.py", line 169, in main
sigkill_handler(signal.SIGTERM, None) # not coming back
File "/usr/local/lib/python3.8/dist-packages/deepspeed/launcher/launch.py", line 147, in sigkill_handler
raise subprocess.CalledProcessError(returncode=last_return_code, cmd=cmd)
subprocess.CalledProcessError: Command '['/usr/bin/python', '-u', 'generate.py', '--local_rank=0', '--deepspeed_config', '{"train_batch_size": 32, "train_micro_batch_size_per_gpu": 8, "gradient_accumulation_steps": 4, "optimizer": {"type": "adam", "params": {"lr": 0.00016, "betas": [0.9, 0.999], "eps": 1e-08}}, "fp16": {"fp16": true, "enabled": true, "loss_scale": 0, "initial_scale_power": 16, "loss_scale_window": 1000, "hysteresis": 2, "min_loss_scale": 1}, "gradient_clipping": 1.0, "zero_optimization": {"stage": 1, "allgather_partitions": true, "allgather_bucket_size": 500000000, "overlap_comm": true, "reduce_scatter": true, "reduce_bucket_size": 500000000, "contiguous_gradients": true, "cpu_offload": false}, "wall_clock_breakdown": true, "zero_allow_untested_optimizer": true}', '--megatron_config', '{"train_batch_size": 32, "train_micro_batch_size_per_gpu": 8, "gradient_accumulation_steps": 4, "optimizer": {"type": "adam", "params": {"lr": 0.00016, "betas": [0.9, 0.999], "eps": 1e-08}}, "fp16": {"fp16": true, "enabled": true, "loss_scale": 0, "initial_scale_power": 16, "loss_scale_window": 1000, "hysteresis": 2, "min_loss_scale": 1}, "gradient_clipping": 1.0, "zero_optimization": {"stage": 1, "allgather_partitions": true, "allgather_bucket_size": 500000000, "overlap_comm": true, "reduce_scatter": true, "reduce_bucket_size": 500000000, "contiguous_gradients": true, "cpu_offload": false}, "wall_clock_breakdown": true, "zero_allow_untested_optimizer": true, "precision": "fp16", "num_layers": 32, "hidden_size": 2560, "num_attention_heads": 32, "seq_length": 2048, "max_position_embeddings": 2048, "pos_emb": "rotary", "no_weight_tying": true, "attention_config": ["global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global"], "sparsity_config": {}, "scaled_upper_triang_masked_softmax_fusion": true, "bias_gelu_fusion": true, "lr_decay_style": "cosine", "lr_decay_iters": 160000, "zero_stage": 1, "zero_reduce_scatter": true, "zero_contiguous_gradients": true, "zero_reduce_bucket_size": 500000000, "zero_allgather_bucket_size": 500000000, "lr": 0.00016, "data_path": "data/code/code_text_document", "data_impl": "mmap", "save": "checkpoints", "config_files": {"text_generation.yml": "# Parameters used for text generation\n# Make sure load is specified somewhere else\n{\n # Text gen type: input-file, unconditional or interactive\n \"text-gen-type\": \"interactive\",\n \n # Params for all\n \"maximum_tokens\": 256,\n \"temperature\": 0.5,\n \"top_p\": 0.0,\n \"top_k\": 0,\n \"recompute\": false,\n \n # unconditional: samples\n \"num-samples\": 10,\n\n # input/output file\n \"sample-input-file\": \"sample_input.txt\",\n \"sample-output-file\": \"sample_output.txt\",\n}", "local_setup.yml": "# Suggested data paths when using GPT-NeoX locally\n{\n \"data-path\": \"data/code/code_text_document\",\n \n # or for weighted datasets: \n # \"train-data-paths\": [\"data/enron/enron_text_document\", \"data/enron/enron_text_document\"],\n # \"test-data-paths\": [\"data/enron/enron_text_document\", \"data/enron/enron_text_document\"],\n # \"valid-data-paths\": [\"data/enron/enron_text_document\", \"data/enron/enron_text_document\"],\n # \"train-data-weights\": [1., 2.],\n # \"test-data-weights\": [2., 1.],\n # \"valid-data-weights\": [0.5, 0.4],\n\n # If weight_by_num_documents is True, Builds dataset weights from a multinomial distribution over groups of data according to the number of documents in each group. \n # WARNING: setting this to True will override any user provided weights\n # \"weight_by_num_documents\": false,\n # \"weighted_sampler_alpha\": 0.3,\n\n \"vocab-file\": \"data/code-vocab.json\",\n \"merge-file\": \"data/code-merges.txt\",\n\n \"save\": \"checkpoints\",\n \"load\": \"checkpoints\",\n \"checkpoint_validation_with_forward_pass\": False,\n \n \"tensorboard-dir\": \"tensorboard\",\n \"log-dir\": \"logs\",\n \"use_wandb\": True,\n \"wandb_host\": \"https://api.wandb.ai\",\n \"wandb_project\": \"neox\"\n}", "2-7B.yml": "# GPT-2 pretraining setup\n{\n # parallelism settings ( you will want to change these based on your cluster setup, ideally scheduling pipeline stages\n # across the node boundaries )\n \"pipe-parallel-size\": 1,\n \"model-parallel-size\": 1,\n\n # model settings\n \"num-layers\": 32,\n \"hidden-size\": 2560,\n \"num-attention-heads\": 32,\n \"seq-length\": 2048,\n \"max-position-embeddings\": 2048,\n \"norm\": \"layernorm\",\n \"pos-emb\": \"rotary\",\n \"no-weight-tying\": true,\n\n # these should provide some speedup but takes a while to build, set to true if desired\n \"scaled-upper-triang-masked-softmax-fusion\": true,\n \"bias-gelu-fusion\": true,\n\n # optimizer settings\n \"zero_allow_untested_optimizer\": true,\n \"optimizer\": {\n \"type\": \"adam\",\n \"params\": {\n \"lr\": 0.00016,\n \"betas\": [0.9, 0.999],\n \"eps\": 1.0e-8,\n }\n },\n \"zero_optimization\": {\n \"stage\": 1,\n \"allgather_partitions\": True,\n \"allgather_bucket_size\": 500000000,\n \"overlap_comm\": True,\n \"reduce_scatter\": True,\n \"reduce_bucket_size\": 500000000,\n \"contiguous_gradients\": True,\n \"cpu_offload\": False\n },\n\n # batch / data settings\n \"train_micro_batch_size_per_gpu\": 8,\n \"gradient_accumulation_steps\": 4,\n \"data-impl\": \"mmap\",\n \"split\": \"989,10,1\",\n\n # activation checkpointing\n \"checkpoint-activations\": true,\n \"checkpoint-num-layers\": 1,\n \"partition-activations\": true,\n \"synchronize-each-layer\": true,\n\n # regularization\n \"gradient_clipping\": 1.0,\n \"weight-decay\": 0,\n \"hidden-dropout\": 0,\n \"attention-dropout\": 0,\n\n # precision settings\n \"fp16\": { \n \"fp16\": true,\n \"enabled\": true,\n \"loss_scale\": 0,\n \"initial_scale_power\": 16,\n \"loss_scale_window\": 1000,\n \"hysteresis\": 2,\n \"min_loss_scale\": 1\n },\n\n # misc. training settings\n \"train-iters\": 160000,\n \"lr-decay-iters\": 160000,\n \"distributed-backend\": \"nccl\",\n \"lr-decay-style\": \"cosine\",\n \"warmup\": 0.01,\n \"save-interval\": 1000,\n \"eval-interval\": 1000,\n \"eval-iters\": 10,\n\n # logging\n \"log-interval\": 100,\n \"steps_per_print\": 10,\n \"keep-last-n-checkpoints\": 1,\n \"wall_clock_breakdown\": true,\n}\n"}, "load": "checkpoints", "save_interval": 1000, "batch_size": 8, "train_iters": 160000, "eval_iters": 10, "keep_last_n_checkpoints": 1, "split": "989,10,1", "vocab_file": "data/code-vocab.json", "merge_file": "data/code-merges.txt", "attention_dropout": 0, "hidden_dropout": 0, "weight_decay": 0, "checkpoint_activations": true, "synchronize_each_layer": true, "partition_activations": true, "gas": 4, "clip_grad": 1.0, "dynamic_loss_scale": true, "pipe_parallel_size": 1, "is_pipe_parallel": true, "use_wandb": true, "wandb_group": "EU8n85c7eggg2GKkfu64XK_db101lzj", "log_dir": "logs", "tensorboard_dir": "tensorboard", "log_interval": 100, "text_gen_type": "interactive", "temperature": 0.5, "maximum_tokens": 256, "sample_input_file": "sample_input.txt", "sample_output_file": "sample_output.txt", "num_samples": 10, "user_script": "generate.py", "global_num_gpus": 1}']' returned non-zero exit status 1.