from contextualized_topic_models.models.ctm import CTM
from contextualized_topic_models.utils.data_preparation import TextHandler
from contextualized_topic_models.utils.data_preparation import bert_embeddings_from_file
from contextualized_topic_models.datasets.dataset import CTMDataset
handler = TextHandler("documents.txt")
handler.prepare() # create vocabulary and training data
generate BERT data
training_bert = bert_embeddings_from_file("documents.txt", "distiluse-base-multilingual-cased")
training_dataset = CTMDataset(handler.bow, training_bert, handler.idx2token)
ctm = CTM(input_size=len(handler.vocab), bert_input_size=512, inference_type="combined", n_components=50)
ctm.fit(training_dataset) # run the model
output:::::
Settings:
N Components: 50
Topic Prior Mean: 0.0
Topic Prior Variance: 0.98
Model Type: prodLDA
Hidden Sizes: (100, 100)
Activation: softplus
Dropout: 0.2
Learn Priors: True
Learning Rate: 0.002
Momentum: 0.99
Reduce On Plateau: False
Save Dir: None
Traceback (most recent call last):
Traceback (most recent call last):
File "test3.py", line 22, in
File "", line 1, in
File "C:\Users\Jay\py36\lib\multiprocessing\spawn.py", line 105, in spawn_main
ctm.fit(training_dataset)
exitcode = _main(fd) File "C:\Users\Jay\py36\neo\lib\site-packages\contextualized_topic_models\models\ctm.py", line 225, in fit
File "C:\Users\Jay\py36\lib\multiprocessing\spawn.py", line 114, in _main
sp, train_loss = self._train_epoch(train_loader)
prepare(preparation_data) File "C:\Users\Jay\py36\neo\lib\site-packages\contextualized_topic_models\models\ctm.py", line 151, in _train_epoch
File "C:\Users\Jay\py36\lib\multiprocessing\spawn.py", line 225, in prepare
for batch_samples in loader:
_fixup_main_from_path(data['init_main_from_path']) File "C:\Users\Jay\py36\neo\lib\site-packages\torch\utils\data\dataloader.py", line 291, in iter
File "C:\Users\Jay\py36\lib\multiprocessing\spawn.py", line 277, in _fixup_main_from_path
return _MultiProcessingDataLoaderIter(self)
run_name="mp_main") File "C:\Users\Jay\py36\neo\lib\site-packages\torch\utils\data\dataloader.py", line 737, in init
File "C:\Users\Jay\py36\lib\runpy.py", line 263, in run_path
w.start()
File "C:\Users\Jay\py36\lib\multiprocessing\process.py", line 105, in start
pkg_name=pkg_name, script_name=fname)
self._popen = self._Popen(self) File "C:\Users\Jay\py36\lib\runpy.py", line 96, in _run_module_code
File "C:\Users\Jay\py36\lib\multiprocessing\context.py", line 223, in _Popen
mod_name, mod_spec, pkg_name, script_name)
return _default_context.get_context().Process._Popen(process_obj) File "C:\Users\Jay\py36\lib\runpy.py", line 85, in _run_code
File "C:\Users\Jay\py36\lib\multiprocessing\context.py", line 322, in _Popen
exec(code, run_globals)
return Popen(process_obj) File "C:\Users\Jay\Desktop\try_backend\test3.py", line 22, in
File "C:\Users\Jay\py36\lib\multiprocessing\popen_spawn_win32.py", line 65, in __init__
ctm.fit(training_dataset)
reduction.dump(process_obj, to_child) File "C:\Users\Jay\py36\neo\lib\site-packages\contextualized_topic_models\models\ctm.py", line 225, in fit
File "C:\Users\Jay\py36\lib\multiprocessing\reduction.py", line 60, in dump
sp, train_loss = self._train_epoch(train_loader)
ForkingPickler(file, protocol).dump(obj) File "C:\Users\Jay\py36\neo\lib\site-packages\contextualized_topic_models\models\ctm.py", line 151, in _train_epoch
BrokenPipeErrorfor batch_samples in loader::
[Errno 32] Broken pipe File "C:\Users\Jay\py36\neo\lib\site-packages\torch\utils\data\dataloader.py", line 291, in iter
return _MultiProcessingDataLoaderIter(self)
File "C:\Users\Jay\py36\neo\lib\site-packages\torch\utils\data\dataloader.py", line 737, in init
w.start()
File "C:\Users\Jay\py36\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "C:\Users\Jay\py36\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\Jay\py36\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "C:\Users\Jay\py36\lib\multiprocessing\popen_spawn_win32.py", line 33, in init
prep_data = spawn.get_preparation_data(process_obj._name)
File "C:\Users\Jay\py36\lib\multiprocessing\spawn.py", line 143, in get_preparation_data
_check_not_importing_main()
File "C:\Users\Jay\py36\lib\multiprocessing\spawn.py", line 136, in _check_not_importing_main
is not going to be frozen to produce an executable.''')
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
The thing you people mentioned in the github itself is not running.