I ran into this error while training on my own training set.
The number of iterations trained is different for each error report.
` File "/fast/jupyter/BoxTeacher/projects/BoxTeacher/train_net_my.py", line 354, in
launch(
File "/home/mdxuser/anaconda3/envs/Boxteacher/lib/python3.10/site-packages/detectron2/engine/launch.py", line 69, in launch
mp.start_processes(
File "/home/mdxuser/anaconda3/envs/Boxteacher/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 197, in start_processes
while not context.join():
File "/home/mdxuser/anaconda3/envs/Boxteacher/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 3 terminated with the following error:
Traceback (most recent call last):
File "/home/mdxuser/anaconda3/envs/Boxteacher/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/home/mdxuser/anaconda3/envs/Boxteacher/lib/python3.10/site-packages/detectron2/engine/launch.py", line 123, in _distributed_worker
main_func(*args)
File "/fast/jupyter/BoxTeacher/projects/BoxTeacher/train_net_my.py", line 348, in main
return trainer.train()
File "/fast/jupyter/BoxTeacher/projects/BoxTeacher/train_net_my.py", line 122, in train
self.train_loop(self.start_iter, self.max_iter)
File "/fast/jupyter/BoxTeacher/projects/BoxTeacher/train_net_my.py", line 111, in train_loop
self.run_step()
File "/home/mdxuser/anaconda3/envs/Boxteacher/lib/python3.10/site-packages/detectron2/engine/defaults.py", line 494, in run_step
self._trainer.run_step()
File "/home/mdxuser/anaconda3/envs/Boxteacher/lib/python3.10/site-packages/detectron2/engine/train_loop.py", line 310, in run_step
loss_dict = self.model(data)
File "/home/mdxuser/anaconda3/envs/Boxteacher/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/mdxuser/anaconda3/envs/Boxteacher/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1156, in forward
output = self._run_ddp_forward(*inputs, **kwargs)
File "/home/mdxuser/anaconda3/envs/Boxteacher/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1110, in _run_ddp_forward
return module_to_run(*inputs[0], **kwargs[0]) # type: ignore[index]
File "/home/mdxuser/anaconda3/envs/Boxteacher/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/fast/jupyter/BoxTeacher/projects/BoxTeacher/boxteacher/boxteacher.py", line 101, in forward
return self.student(batched_inputs)
File "/home/mdxuser/anaconda3/envs/Boxteacher/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/fast/jupyter/BoxTeacher/projects/BoxTeacher/boxteacher/condinst.py", line 592, in forward
self.add_bitmasks_from_boxes(
File "/fast/jupyter/BoxTeacher/projects/BoxTeacher/boxteacher/condinst.py", line 860, in add_bitmasks_from_boxes
per_im_gt_inst.boxinst_image_color_similarity = torch.cat(
[images_color_similarity for _ in range(len(per_im_gt_inst))], dim=0 )
RuntimeError: torch.cat(): expected a non-empty list of Tensors`
I've checked that it's due to this len(per_im_gt_inst) being 0.
Do you have any idea what might be causing this problem? Thank you!