You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/1
Enabling DeepSpeed BF16.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Traceback (most recent call last):
File "./train.py", line 388, in
trainer.fit(model, data_loader)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 608, in fit
call._call_and_handle_interrupt(
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 36, in _call_and_handle_interrupt
return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 88, in launch
return function(*args, **kwargs)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 650, in _fit_impl
self._run(model, ckpt_path=self.ckpt_path)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1093, in _run
self.strategy.setup(self)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/strategies/deepspeed.py", line 345, in setup
self.init_deepspeed()
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/strategies/deepspeed.py", line 456, in init_deepspeed
self._initialize_deepspeed_train(model)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/strategies/deepspeed.py", line 489, in _initialize_deepspeed_train
optimizer, lr_scheduler, _ = self._init_optimizers()
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/strategies/deepspeed.py", line 462, in _init_optimizers
optimizers, lr_schedulers, optimizer_frequencies = _init_optimizers_and_lr_schedulers(self.lightning_module)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/core/optimizer.py", line 180, in _init_optimizers_and_lr_schedulers
optim_conf = model.trainer._call_lightning_module_hook("configure_optimizers", pl_module=model)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1356, in _call_lightning_module_hook
output = fn(*args, **kwargs)
File "/home/chenhao/project/RWKV-LM-LoRA/RWKV-v4neo/src/model.py", line 518, in configure_optimizers
return FusedAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adam_w_mode=False, weight_decay=0, amsgrad=False)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/deepspeed/ops/adam/fused_adam.py", line 72, in init
fused_adam_cuda = FusedAdamBuilder().load()
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/deepspeed/ops/op_builder/builder.py", line 471, in load
return self.jit_load(verbose)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/deepspeed/ops/op_builder/builder.py", line 486, in jit_load
assert_no_cuda_mismatch()
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/deepspeed/ops/op_builder/builder.py", line 88, in assert_no_cuda_mismatch
cuda_major, cuda_minor = installed_cuda_version()
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/deepspeed/ops/op_builder/builder.py", line 42, in installed_cuda_version
output = subprocess.check_output([cuda_home + "/bin/nvcc",
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/subprocess.py", line 411, in check_output
return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/subprocess.py", line 489, in run
with Popen(*popenargs, **kwargs) as process:
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/subprocess.py", line 854, in init
self._execute_child(args, executable, preexec_fn, close_fds,
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/subprocess.py", line 1637, in _execute_child
self.pid = _posixsubprocess.fork_exec(
OSError: [Errno 12] Cannot allocate memory
请问这个是什么原因?如何解决?
The text was updated successfully, but these errors were encountered:
initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/1
Enabling DeepSpeed BF16.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Traceback (most recent call last):
File "./train.py", line 388, in
trainer.fit(model, data_loader)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 608, in fit
call._call_and_handle_interrupt(
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 36, in _call_and_handle_interrupt
return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py", line 88, in launch
return function(*args, **kwargs)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 650, in _fit_impl
self._run(model, ckpt_path=self.ckpt_path)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1093, in _run
self.strategy.setup(self)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/strategies/deepspeed.py", line 345, in setup
self.init_deepspeed()
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/strategies/deepspeed.py", line 456, in init_deepspeed
self._initialize_deepspeed_train(model)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/strategies/deepspeed.py", line 489, in _initialize_deepspeed_train
optimizer, lr_scheduler, _ = self._init_optimizers()
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/strategies/deepspeed.py", line 462, in _init_optimizers
optimizers, lr_schedulers, optimizer_frequencies = _init_optimizers_and_lr_schedulers(self.lightning_module)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/core/optimizer.py", line 180, in _init_optimizers_and_lr_schedulers
optim_conf = model.trainer._call_lightning_module_hook("configure_optimizers", pl_module=model)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1356, in _call_lightning_module_hook
output = fn(*args, **kwargs)
File "/home/chenhao/project/RWKV-LM-LoRA/RWKV-v4neo/src/model.py", line 518, in configure_optimizers
return FusedAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adam_w_mode=False, weight_decay=0, amsgrad=False)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/deepspeed/ops/adam/fused_adam.py", line 72, in init
fused_adam_cuda = FusedAdamBuilder().load()
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/deepspeed/ops/op_builder/builder.py", line 471, in load
return self.jit_load(verbose)
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/deepspeed/ops/op_builder/builder.py", line 486, in jit_load
assert_no_cuda_mismatch()
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/deepspeed/ops/op_builder/builder.py", line 88, in assert_no_cuda_mismatch
cuda_major, cuda_minor = installed_cuda_version()
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/site-packages/deepspeed/ops/op_builder/builder.py", line 42, in installed_cuda_version
output = subprocess.check_output([cuda_home + "/bin/nvcc",
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/subprocess.py", line 411, in check_output
return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/subprocess.py", line 489, in run
with Popen(*popenargs, **kwargs) as process:
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/subprocess.py", line 854, in init
self._execute_child(args, executable, preexec_fn, close_fds,
File "/home/chenhao/anaconda3/envs/RWKV/lib/python3.8/subprocess.py", line 1637, in _execute_child
self.pid = _posixsubprocess.fork_exec(
OSError: [Errno 12] Cannot allocate memory
请问这个是什么原因?如何解决?
The text was updated successfully, but these errors were encountered: