When I start the training I get the following log files. The loss seems to be very high, because for the bidirectional model the loss starts in the range of 5-6k for me. But here it begins in the range of 100k.
pretrain epoch 1, step 0, cost:ctc 370.774999040268, cost:output/output_prob 9.212651966398369, error:ctc 17.81853248248808, error:decision 0.0, error:output/output_prob 0.9999999811407178, loss 98416.805, max_size:classes 12, max_size:data 437, mem_usage:GPU:0 772.1MB, num_seqs 45, 7.013 sec/step, elapsed 0:00:20, exp. remaining 0:00:31, complete 38.81%
pretrain epoch 1, step 1, cost:ctc 284.98298864587923, cost:output/output_prob 9.212257950929825, error:ctc 14.188172029796988, error:decision 0.0, error:output/output_prob 0.9999999990686774, loss 109440.63, max_size:classes 15, max_size:data 361, mem_usage:GPU:0 4.5GB, num_seqs 45, 1.343 sec/step, elapsed 0:00:21, exp. remaining 0:00:30, complete 41.12%
pretrain epoch 1, step 2, cost:ctc 290.689638091957, cost:output/output_prob 9.212171886229044, error:ctc 13.878237992990762, error:decision 0.0, error:output/output_prob 0.9974093013443053, loss 115762.1, max_size:classes 11, max_size:data 457, mem_usage:GPU:0 4.5GB, num_seqs 43, 1.492 sec/step, elapsed 0:00:22, exp. remaining 0:00:28, complete 44.16%
pretrain epoch 1, step 3, cost:ctc 293.1260531155767, cost:output/output_prob 9.212529359078815, error:ctc 14.327272824011743, error:decision 0.0, error:output/output_prob 1.0000000067520887, loss 83143.11, max_size:classes 13, max_size:data 697, mem_usage:GPU:0 4.5GB, num_seqs 28, 1.295 sec/step, elapsed 0:00:24, exp. remaining 0:00:26, complete 47.32%
pretrain epoch 1, step 4, cost:ctc 260.3412321177311, cost:output/output_prob 9.211921857411653, error:ctc 12.447256781160831, error:decision 0.0, error:output/output_prob 0.9999999515712261, loss 127768.2, max_size:classes 17, max_size:data 429, mem_usage:GPU:0 4.6GB, num_seqs 43, 1.548 sec/step, elapsed 0:00:25, exp. remaining 0:00:25, complete 50.61%
pretrain epoch 1, step 5, cost:ctc 318.70524216529157, cost:output/output_prob 9.211640645493048, error:ctc 15.151261280290782, error:decision 0.0, error:output/output_prob 1.0000000512227416, loss 78044.21, max_size:classes 14, max_size:data 669, mem_usage:GPU:0 5.1GB, num_seqs 21, 1.246 sec/step, elapsed 0:00:26, exp. remaining 0:00:22, complete 54.14%
pretrain epoch 1, step 6, cost:ctc 299.3549390775952, cost:output/output_prob 9.211832857916477, error:ctc 14.26940580131486, error:decision 0.0, error:output/output_prob 0.9999999585561454, loss 67576.125, max_size:classes 14, max_size:data 1096, mem_usage:GPU:0 5.1GB, num_seqs 18, 1.654 sec/step, elapsed 0:00:28, exp. remaining 0:00:21, complete 56.69%
pretrain epoch 1, step 7, cost:ctc 293.0398462318408, cost:output/output_prob 9.210876611768981, error:ctc 13.748554605990648, error:decision 0.0, error:output/output_prob 0.9942196309566498, loss 104578.75, max_size:classes 15, max_size:data 702, mem_usage:GPU:0 5.1GB, num_seqs 28, 1.451 sec/step, elapsed 0:00:30, exp. remaining 0:00:19, complete 60.10%
pretrain epoch 1, step 8, cost:ctc 254.12312227828806, cost:output/output_prob 9.211448004022031, error:ctc 12.09428558475338, error:decision 0.0, error:output/output_prob 0.9999999892897904, loss 92167.1, max_size:classes 18, max_size:data 663, mem_usage:GPU:0 5.1GB, num_seqs 28, 1.229 sec/step, elapsed 0:00:31, exp. remaining 0:00:18, complete 62.41%
pretrain epoch 1, step 9, cost:ctc 316.1771712676091, cost:output/output_prob 9.21173871878409, error:ctc 14.632000694982708, error:decision 0.0, error:output/output_prob 1.0000000474974513, loss 81347.23, max_size:classes 19, max_size:data 691, mem_usage:GPU:0 5.3GB, num_seqs 19, 1.290 sec/step, elapsed 0:00:32, exp. remaining 0:00:17, complete 65.57%
pretrain epoch 1, step 10, cost:ctc 278.35383669496514, cost:output/output_prob 9.21164610571941, error:ctc 11.460377529263496, error:decision 0.0, error:output/output_prob 1.0000000149011612, loss 76204.85, max_size:classes 19, max_size:data 1015, mem_usage:GPU:0 5.3GB, num_seqs 19, 1.624 sec/step, elapsed 0:00:34, exp. remaining 0:00:15, complete 68.25%
pretrain epoch 1, step 11, cost:ctc 283.5369720542076, cost:output/output_prob 9.211048180620764, error:ctc 12.168604605831206, error:decision 0.0, error:output/output_prob 0.9825581358745694, loss 100705.32, max_size:classes 17, max_size:data 796, mem_usage:GPU:0 5.5GB, num_seqs 25, 1.489 sec/step, elapsed 0:00:35, exp. remaining 0:00:14, complete 70.56%
pretrain epoch 1, step 12, cost:ctc 294.0424982657896, cost:output/output_prob 9.211014433250512, error:ctc 11.176151985302567, error:decision 0.0, error:output/output_prob 0.9972899928689002, loss 111900.55, max_size:classes 18, max_size:data 744, mem_usage:GPU:0 5.5GB, num_seqs 26, 1.467 sec/step, elapsed 0:00:37, exp. remaining 0:00:13, complete 73.84%
pretrain epoch 1, step 13, cost:ctc 264.9904200157962, cost:output/output_prob 9.211146379502793, error:ctc 10.400467950617895, error:decision 0.0, error:output/output_prob 0.997658038046211, loss 117084.07, max_size:classes 19, max_size:data 739, mem_usage:GPU:0 5.5GB, num_seqs 27, 1.461 sec/step, elapsed 0:00:38, exp. remaining 0:00:12, complete 76.28%
pretrain epoch 1, step 14, cost:ctc 279.6960028297326, cost:output/output_prob 9.21092234193111, error:ctc 10.493362792767584, error:decision 0.0, error:output/output_prob 0.9845132706686854, loss 130585.93, max_size:classes 20, max_size:data 683, mem_usage:GPU:0 5.7GB, num_seqs 29, 1.521 sec/step, elapsed 0:00:40, exp. remaining 0:00:11, complete 78.10%
pretrain epoch 1, step 15, cost:ctc 327.895143393187, cost:output/output_prob 9.21084384552745, error:ctc 11.577777936821803, error:decision 0.0, error:output/output_prob 0.9968254105187953, loss 106188.38, max_size:classes 18, max_size:data 915, mem_usage:GPU:0 5.8GB, num_seqs 21, 1.584 sec/step, elapsed 0:00:41, exp. remaining 0:00:10, complete 80.66%
pretrain epoch 1, step 16, cost:ctc 287.5264747359033, cost:output/output_prob 9.21073743537363, error:ctc 10.535791894420981, error:decision 0.0, error:output/output_prob 1.000000013038516, loss 136795.86, max_size:classes 20, max_size:data 706, mem_usage:GPU:0 5.8GB, num_seqs 28, 1.509 sec/step, elapsed 0:00:43, exp. remaining 0:00:09, complete 82.60%
pretrain epoch 1, step 17, cost:ctc 285.89704467331467, cost:output/output_prob 9.210682287022337, error:ctc 8.432024161331356, error:decision 0.0, error:output/output_prob 0.9909365549683571, loss 97680.66, max_size:classes 20, max_size:data 1021, mem_usage:GPU:0 5.8GB, num_seqs 19, 1.595 sec/step, elapsed 0:00:44, exp. remaining 0:00:08, complete 84.31%
pretrain epoch 1, step 18, cost:ctc 290.6208765563606, cost:output/output_prob 9.210362345630415, error:ctc 9.50226285494864, error:decision 0.0, error:output/output_prob 0.9773756079375744, loss 132525.4, max_size:classes 21, max_size:data 764, mem_usage:GPU:0 5.8GB, num_seqs 26, 1.869 sec/step, elapsed 0:00:46, exp. remaining 0:00:07, complete 86.74%
pretrain epoch 1, step 19, cost:ctc 289.32279153186573, cost:output/output_prob 9.210678139116453, error:ctc 9.320099179632962, error:decision 0.0, error:output/output_prob 0.9950372127350421, loss 120308.99, max_size:classes 24, max_size:data 881, mem_usage:GPU:0 6.0GB, num_seqs 22, 1.619 sec/step, elapsed 0:00:48, exp. remaining 0:00:05, complete 89.05%
pretrain epoch 1, step 20, cost:ctc 280.0556132506572, cost:output/output_prob 9.21039583990057, error:ctc 10.322946036234498, error:decision 0.0, error:output/output_prob 0.968838513828814, loss 102110.91, max_size:classes 23, max_size:data 1038, mem_usage:GPU:0 6.3GB, num_seqs 19, 1.835 sec/step, elapsed 0:00:50, exp. remaining 0:00:04, complete 91.36%
pretrain epoch 1, step 21, cost:ctc 254.93665059748307, cost:output/output_prob 9.209757156185105, error:ctc 7.915057765785604, error:decision 0.0, error:output/output_prob 0.9787644603056832, loss 136827.84, max_size:classes 23, max_size:data 677, mem_usage:GPU:0 6.3GB, num_seqs 27, 1.672 sec/step, elapsed 0:00:51, exp. remaining 0:00:03, complete 93.31%
pretrain epoch 1 'dev' eval, step 31, cost:ctc 277.11975923537466, cost:output/output_prob 9.208977020054363, error:ctc 7.104166878387332, error:decision 0.0, error:output/output_prob 0.9791666958481073, loss 109950.234, max_size:classes 42, max_size:data 1952, mem_usage:GPU:0 7.0GB, num_seqs 10, 2.828 sec/step, elapsed 0:01:55, exp. remaining 0:08:21, complete 18.73%
pretrain epoch 1 'dev' eval, step 32, cost:ctc 273.59442138671875, cost:output/output_prob 9.209274291992188, error:ctc 7.88671875, error:decision 0.0, error:output/output_prob 0.970703125, loss 144795.5, max_size:classes 40, max_size:data 1409, mem_usage:GPU:0 7.0GB, num_seqs 14, 2.666 sec/step, elapsed 0:01:58, exp. remaining 0:08:16, complete 19.23%
pretrain epoch 1 'dev' eval, step 33, cost:ctc 256.75234398911925, cost:output/output_prob 9.208727096917642, error:ctc 7.4961538531351835, error:decision 0.0, error:output/output_prob 0.9711538470583037, loss 138299.75, max_size:classes 41, max_size:data 1299, mem_usage:GPU:0 7.0GB, num_seqs 14, 2.445 sec/step, elapsed 0:02:00, exp. remaining 0:08:10, complete 19.77%
pretrain epoch 1 'dev' eval, step 34, cost:ctc 285.8596498768602, cost:output/output_prob 9.208559764520942, error:ctc 8.386117246001959, error:decision 0.0, error:output/output_prob 0.9761388413608074, loss 136026.44, max_size:classes 42, max_size:data 1447, mem_usage:GPU:0 7.0GB, num_seqs 13, 2.358 sec/step, elapsed 0:02:03, exp. remaining 0:08:03, complete 20.30%
pretrain epoch 1 'dev' eval, step 35, cost:ctc 269.1395396027401, cost:output/output_prob 9.209745830636166, error:ctc 8.055670401314273, error:decision 0.0, error:output/output_prob 0.9731959123164415, loss 134999.4, max_size:classes 39, max_size:data 1395, mem_usage:GPU:0 7.0GB, num_seqs 14, 2.831 sec/step, elapsed 0:02:05, exp. remaining 0:07:59, complete 20.80%
pretrain epoch 1 'dev' eval, step 36, cost:ctc 259.18438174932635, cost:output/output_prob 9.208568567526925, error:ctc 8.137362669571303, error:decision 0.0, error:output/output_prob 0.9725274763768539, loss 146542.55, max_size:classes 37, max_size:data 1191, mem_usage:GPU:0 7.0GB, num_seqs 16, 2.836 sec/step, elapsed 0:02:08, exp. remaining 0:07:53, complete 21.40%
pretrain epoch 1 'dev' eval, step 37, cost:ctc 252.84458955970877, cost:output/output_prob 9.208074668781308, error:ctc 7.913344890112057, error:decision 0.0, error:output/output_prob 0.9722703642910346, loss 151204.39, max_size:classes 38, max_size:data 1139, mem_usage:GPU:0 7.0GB, num_seqs 17, 2.572 sec/step, elapsed 0:02:11, exp. remaining 0:07:47, complete 21.93%
pretrain epoch 1 'dev' eval, step 38, cost:ctc 250.15737703586183, cost:output/output_prob 9.20930364439414, error:ctc 7.438461545389146, error:decision 0.0, error:output/output_prob 0.9673076932085678, loss 134870.67, max_size:classes 40, max_size:data 1285, mem_usage:GPU:0 7.0GB, num_seqs 15, 2.609 sec/step, elapsed 0:02:13, exp. remaining 0:07:43, complete 22.43%
pretrain epoch 1 'dev' eval, step 39, cost:ctc 271.8697553540187, cost:output/output_prob 9.209092128607153, error:ctc 8.836169932968915, error:decision 0.0, error:output/output_prob 0.9744680542498827, loss 132107.06, max_size:classes 38, max_size:data 1355, mem_usage:GPU:0 7.0GB, num_seqs 14, 2.146 sec/step, elapsed 0:02:16, exp. remaining 0:07:37, complete 22.93%
pretrain epoch 1 'dev' eval, step 40, cost:ctc 261.506115385273, cost:output/output_prob 9.209192646439988, error:ctc 7.9118939489126205, error:decision 0.0, error:output/output_prob 0.9691629558801651, loss 122904.76, max_size:classes 41, max_size:data 1448, mem_usage:GPU:0 7.0GB, num_seqs 13, 2.436 sec/step, elapsed 0:02:18, exp. remaining 0:07:31, complete 23.50%
pretrain epoch 1 'dev' eval, step 41, cost:ctc 274.07519103557206, cost:output/output_prob 9.209490461988025, error:ctc 8.77160467277281, error:decision 0.0, error:output/output_prob 0.9711933862417936, loss 137676.36, max_size:classes 39, max_size:data 1281, mem_usage:GPU:0 7.0GB, num_seqs 15, 2.441 sec/step, elapsed 0:02:21, exp. remaining 0:07:25, complete 24.03%
pretrain epoch 1 'dev' eval, step 42, cost:ctc 252.70003694258233, cost:output/output_prob 9.20855387423012, error:ctc 7.676635749056005, error:decision 0.0, error:output/output_prob 0.9700934876454995, loss 140121.1, max_size:classes 37, max_size:data 1178, mem_usage:GPU:0 7.0GB, num_seqs 16, 2.736 sec/step, elapsed 0:02:23, exp. remaining 0:07:20, complete 24.60%
pretrain epoch 1 'dev' eval, step 43, cost:ctc 270.67411690674635, cost:output/output_prob 9.20859987210747, error:ctc 7.644913419382648, error:decision 0.0, error:output/output_prob 0.9673704151064159, loss 145818.9, max_size:classes 36, max_size:data 1207, mem_usage:GPU:0 7.0GB, num_seqs 16, 2.465 sec/step, elapsed 0:02:26, exp. remaining 0:07:14, complete 25.17%
pretrain epoch 1 'dev' eval, step 44, cost:ctc 280.80680964180283, cost:output/output_prob 9.209749808459833, error:ctc 8.78251619799994, error:decision 0.0, error:output/output_prob 0.9701492765452713, loss 136017.77, max_size:classes 35, max_size:data 1272, mem_usage:GPU:0 7.0GB, num_seqs 15, 2.668 sec/step, elapsed 0:02:28, exp. remaining 0:07:10, complete 25.70%
pretrain epoch 1 'dev' eval, step 45, cost:ctc 254.6034903952077, cost:output/output_prob 9.20842834551786, error:ctc 7.482638944638893, error:decision 0.0, error:output/output_prob 0.96875000721775, loss 151955.66, max_size:classes 38, max_size:data 1031, mem_usage:GPU:0 7.0GB, num_seqs 18, 2.503 sec/step, elapsed 0:02:31, exp. remaining 0:07:05, complete 26.23%
pretrain epoch 1 'dev' eval, step 46, cost:ctc 274.09750769345555, cost:output/output_prob 9.209082607156233, error:ctc 8.13664611428976, error:decision 0.0, error:output/output_prob 0.9710145108401775, loss 136837.08, max_size:classes 38, max_size:data 1201, mem_usage:GPU:0 7.0GB, num_seqs 16, 2.307 sec/step, elapsed 0:02:33, exp. remaining 0:06:58, complete 26.87%
pretrain epoch 1 'dev' eval, step 47, cost:ctc 290.25258018526074, cost:output/output_prob 9.209130957663774, error:ctc 8.91721111512743, error:decision 0.0, error:output/output_prob 0.9716775366105139, loss 137452.92, max_size:classes 34, max_size:data 1332, mem_usage:GPU:0 7.0GB, num_seqs 15, 2.507 sec/step, elapsed 0:02:36, exp. remaining 0:06:53, complete 27.43%
pretrain epoch 1 'dev' eval, step 48, cost:ctc 258.8656995520141, cost:output/output_prob 9.2095162026435, error:ctc 7.991323314607143, error:decision 0.0, error:output/output_prob 0.9739696439355612, loss 123582.67, max_size:classes 37, max_size:data 1326, mem_usage:GPU:0 7.0GB, num_seqs 15, 2.818 sec/step, elapsed 0:02:39, exp. remaining 0:06:47, complete 28.07%
pretrain epoch 1 'dev' eval, step 49, cost:ctc 272.2239783266559, cost:output/output_prob 9.208625928783931, error:ctc 8.412573975510895, error:decision 0.0, error:output/output_prob 0.966601213440299, loss 143249.19, max_size:classes 34, max_size:data 1123, mem_usage:GPU:0 7.0GB, num_seqs 17, 2.734 sec/step, elapsed 0:02:41, exp. remaining 0:06:41, complete 28.70%
pretrain epoch 1 'dev' eval, step 50, cost:ctc 258.8279444774871, cost:output/output_prob 9.209226809130655, error:ctc 8.533468355191872, error:decision 0.0, error:output/output_prob 0.977687603328377, loss 132142.33, max_size:classes 35, max_size:data 1229, mem_usage:GPU:0 7.0GB, num_seqs 16, 3.092 sec/step, elapsed 0:02:44, exp. remaining 0:06:38, complete 29.27%
pretrain epoch 1 'dev' eval, step 51, cost:ctc 257.45671048380245, cost:output/output_prob 9.208775578838413, error:ctc 8.383104426320642, error:decision 0.0, error:output/output_prob 0.9685658500529826, loss 135732.73, max_size:classes 34, max_size:data 1152, mem_usage:GPU:0 7.0GB, num_seqs 17, 2.869 sec/step, elapsed 0:02:47, exp. remaining 0:06:31, complete 29.97%
pretrain epoch 1 'dev' eval, step 52, cost:ctc 257.5612085307803, cost:output/output_prob 9.209717567658572, error:ctc 8.740594332106411, error:decision 0.0, error:output/output_prob 0.9702970599755645, loss 134719.31, max_size:classes 39, max_size:data 1120, mem_usage:GPU:0 7.0GB, num_seqs 17, 2.591 sec/step, elapsed 0:02:50, exp. remaining 0:06:27, complete 30.53%
pretrain epoch 1 'dev' eval, step 53, cost:ctc 288.9668045128128, cost:output/output_prob 9.208448120739831, error:ctc 8.214442108757794, error:decision 0.0, error:output/output_prob 0.9628008864820004, loss 136266.1, max_size:classes 34, max_size:data 1224, mem_usage:GPU:0 7.0GB, num_seqs 16, 2.542 sec/step, elapsed 0:02:52, exp. remaining 0:06:22, complete 31.13%
pretrain epoch 1 'dev' eval, step 54, cost:ctc 266.2692113663579, cost:output/output_prob 9.209002644777911, error:ctc 8.604803629685193, error:decision 0.0, error:output/output_prob 0.9628821113146842, loss 126169.02, max_size:classes 33, max_size:data 1180, mem_usage:GPU:0 7.0GB, num_seqs 16, 2.219 sec/step, elapsed 0:02:55, exp. remaining 0:06:16, complete 31.77%
pretrain epoch 1 'dev' eval, step 55, cost:ctc 266.5517305701014, cost:output/output_prob 9.208483576998958, error:ctc 7.970425241626799, error:decision 0.0, error:output/output_prob 0.9667282934533432, loss 149186.28, max_size:classes 32, max_size:data 1015, mem_usage:GPU:0 7.0GB, num_seqs 19, 2.677 sec/step, elapsed 0:02:57, exp. remaining 0:06:10, complete 32.40%