| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9990982867448152, | |
| "eval_steps": 500, | |
| "global_step": 554, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0018034265103697023, | |
| "grad_norm": 6.674532137525891, | |
| "learning_rate": 0.0, | |
| "loss": 1.2076, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0036068530207394047, | |
| "grad_norm": 6.154259029326631, | |
| "learning_rate": 2.2522522522522524e-08, | |
| "loss": 1.1491, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.005410279531109108, | |
| "grad_norm": 6.771653034612783, | |
| "learning_rate": 4.504504504504505e-08, | |
| "loss": 1.0534, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.007213706041478809, | |
| "grad_norm": 6.67369914164215, | |
| "learning_rate": 6.756756756756757e-08, | |
| "loss": 1.1268, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.009017132551848512, | |
| "grad_norm": 6.948175490186477, | |
| "learning_rate": 9.00900900900901e-08, | |
| "loss": 1.0109, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.010820559062218215, | |
| "grad_norm": 6.285358425270715, | |
| "learning_rate": 1.1261261261261262e-07, | |
| "loss": 1.1015, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.012623985572587917, | |
| "grad_norm": 7.480900019360095, | |
| "learning_rate": 1.3513513513513515e-07, | |
| "loss": 1.1338, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.014427412082957619, | |
| "grad_norm": 5.985730068048292, | |
| "learning_rate": 1.5765765765765766e-07, | |
| "loss": 1.0889, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.016230838593327322, | |
| "grad_norm": 6.74360726738279, | |
| "learning_rate": 1.801801801801802e-07, | |
| "loss": 1.0402, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.018034265103697024, | |
| "grad_norm": 6.603771834682011, | |
| "learning_rate": 2.0270270270270273e-07, | |
| "loss": 1.1394, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.019837691614066726, | |
| "grad_norm": 6.070271548824436, | |
| "learning_rate": 2.2522522522522524e-07, | |
| "loss": 1.1439, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.02164111812443643, | |
| "grad_norm": 6.814219599011481, | |
| "learning_rate": 2.477477477477478e-07, | |
| "loss": 1.0881, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.023444544634806132, | |
| "grad_norm": 6.428167151788823, | |
| "learning_rate": 2.702702702702703e-07, | |
| "loss": 1.1163, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.025247971145175834, | |
| "grad_norm": 7.047010957583219, | |
| "learning_rate": 2.927927927927928e-07, | |
| "loss": 1.0923, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.027051397655545536, | |
| "grad_norm": 5.53060795190606, | |
| "learning_rate": 3.153153153153153e-07, | |
| "loss": 1.0118, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.028854824165915238, | |
| "grad_norm": 6.682099184154056, | |
| "learning_rate": 3.378378378378379e-07, | |
| "loss": 1.1213, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.030658250676284943, | |
| "grad_norm": 5.6679773328714615, | |
| "learning_rate": 3.603603603603604e-07, | |
| "loss": 1.2125, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.032461677186654644, | |
| "grad_norm": 5.547383052372404, | |
| "learning_rate": 3.828828828828829e-07, | |
| "loss": 1.0804, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.034265103697024346, | |
| "grad_norm": 7.200179452941777, | |
| "learning_rate": 4.0540540540540546e-07, | |
| "loss": 1.0663, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.03606853020739405, | |
| "grad_norm": 5.325648740758234, | |
| "learning_rate": 4.27927927927928e-07, | |
| "loss": 1.2159, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03787195671776375, | |
| "grad_norm": 5.0110630414633475, | |
| "learning_rate": 4.504504504504505e-07, | |
| "loss": 0.9838, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.03967538322813345, | |
| "grad_norm": 5.657518684511818, | |
| "learning_rate": 4.7297297297297305e-07, | |
| "loss": 1.0797, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.04147880973850315, | |
| "grad_norm": 4.5143785306650575, | |
| "learning_rate": 4.954954954954956e-07, | |
| "loss": 1.1578, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.04328223624887286, | |
| "grad_norm": 5.287373714805066, | |
| "learning_rate": 5.180180180180181e-07, | |
| "loss": 1.1259, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.04508566275924256, | |
| "grad_norm": 4.812185404720589, | |
| "learning_rate": 5.405405405405406e-07, | |
| "loss": 1.0263, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.046889089269612265, | |
| "grad_norm": 4.813260658431154, | |
| "learning_rate": 5.630630630630631e-07, | |
| "loss": 1.0294, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.04869251577998197, | |
| "grad_norm": 5.38161770029296, | |
| "learning_rate": 5.855855855855856e-07, | |
| "loss": 1.0928, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.05049594229035167, | |
| "grad_norm": 4.1250087598910365, | |
| "learning_rate": 6.081081081081082e-07, | |
| "loss": 1.0323, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.05229936880072137, | |
| "grad_norm": 3.4592927519497496, | |
| "learning_rate": 6.306306306306306e-07, | |
| "loss": 1.0056, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.05410279531109107, | |
| "grad_norm": 3.904048770191826, | |
| "learning_rate": 6.531531531531532e-07, | |
| "loss": 1.0514, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05590622182146077, | |
| "grad_norm": 3.648999951860501, | |
| "learning_rate": 6.756756756756758e-07, | |
| "loss": 1.0958, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.057709648331830475, | |
| "grad_norm": 3.466281275771792, | |
| "learning_rate": 6.981981981981982e-07, | |
| "loss": 1.0257, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.059513074842200184, | |
| "grad_norm": 3.392213870901326, | |
| "learning_rate": 7.207207207207208e-07, | |
| "loss": 0.9459, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.061316501352569885, | |
| "grad_norm": 3.5029298346782385, | |
| "learning_rate": 7.432432432432434e-07, | |
| "loss": 0.9415, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.06311992786293959, | |
| "grad_norm": 3.0634308438792632, | |
| "learning_rate": 7.657657657657658e-07, | |
| "loss": 1.0153, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.06492335437330929, | |
| "grad_norm": 2.9484149128045, | |
| "learning_rate": 7.882882882882883e-07, | |
| "loss": 0.9878, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.06672678088367899, | |
| "grad_norm": 3.0610426789398195, | |
| "learning_rate": 8.108108108108109e-07, | |
| "loss": 0.9123, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.06853020739404869, | |
| "grad_norm": 3.32199769361744, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 1.0099, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0703336339044184, | |
| "grad_norm": 3.0709465427851046, | |
| "learning_rate": 8.55855855855856e-07, | |
| "loss": 1.0385, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.0721370604147881, | |
| "grad_norm": 3.0428201478943575, | |
| "learning_rate": 8.783783783783785e-07, | |
| "loss": 0.9887, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0739404869251578, | |
| "grad_norm": 3.11038611613558, | |
| "learning_rate": 9.00900900900901e-07, | |
| "loss": 0.8805, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.0757439134355275, | |
| "grad_norm": 3.5117708849754283, | |
| "learning_rate": 9.234234234234235e-07, | |
| "loss": 0.9708, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0775473399458972, | |
| "grad_norm": 3.436408499708477, | |
| "learning_rate": 9.459459459459461e-07, | |
| "loss": 0.991, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.0793507664562669, | |
| "grad_norm": 2.707066762216591, | |
| "learning_rate": 9.684684684684686e-07, | |
| "loss": 0.8664, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0811541929666366, | |
| "grad_norm": 2.9154636312948647, | |
| "learning_rate": 9.909909909909911e-07, | |
| "loss": 0.9008, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.0829576194770063, | |
| "grad_norm": 2.9028667627025726, | |
| "learning_rate": 1.0135135135135136e-06, | |
| "loss": 1.0705, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.08476104598737602, | |
| "grad_norm": 2.6634992062941736, | |
| "learning_rate": 1.0360360360360361e-06, | |
| "loss": 0.891, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.08656447249774572, | |
| "grad_norm": 2.738023098685531, | |
| "learning_rate": 1.0585585585585587e-06, | |
| "loss": 0.9246, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.08836789900811542, | |
| "grad_norm": 2.5938725151636435, | |
| "learning_rate": 1.0810810810810812e-06, | |
| "loss": 0.9308, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.09017132551848513, | |
| "grad_norm": 2.732422906982916, | |
| "learning_rate": 1.1036036036036037e-06, | |
| "loss": 1.0283, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09197475202885483, | |
| "grad_norm": 2.5138095481285814, | |
| "learning_rate": 1.1261261261261262e-06, | |
| "loss": 1.0285, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.09377817853922453, | |
| "grad_norm": 2.5550555806196265, | |
| "learning_rate": 1.148648648648649e-06, | |
| "loss": 0.9065, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.09558160504959423, | |
| "grad_norm": 2.3645335521201702, | |
| "learning_rate": 1.1711711711711712e-06, | |
| "loss": 0.8516, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.09738503155996393, | |
| "grad_norm": 2.409700298550962, | |
| "learning_rate": 1.1936936936936937e-06, | |
| "loss": 0.8294, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.09918845807033363, | |
| "grad_norm": 2.3183367981378145, | |
| "learning_rate": 1.2162162162162164e-06, | |
| "loss": 0.9365, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.10099188458070334, | |
| "grad_norm": 2.1828402934512776, | |
| "learning_rate": 1.2387387387387387e-06, | |
| "loss": 0.8918, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.10279531109107304, | |
| "grad_norm": 2.3691895978895094, | |
| "learning_rate": 1.2612612612612613e-06, | |
| "loss": 0.9768, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.10459873760144274, | |
| "grad_norm": 2.3204193779879208, | |
| "learning_rate": 1.2837837837837838e-06, | |
| "loss": 0.7925, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.10640216411181244, | |
| "grad_norm": 2.334168434235552, | |
| "learning_rate": 1.3063063063063065e-06, | |
| "loss": 0.855, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.10820559062218214, | |
| "grad_norm": 2.256611178722444, | |
| "learning_rate": 1.328828828828829e-06, | |
| "loss": 0.8408, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11000901713255185, | |
| "grad_norm": 2.4778146158109924, | |
| "learning_rate": 1.3513513513513515e-06, | |
| "loss": 0.8964, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.11181244364292155, | |
| "grad_norm": 2.4357880480005756, | |
| "learning_rate": 1.373873873873874e-06, | |
| "loss": 0.9621, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.11361587015329125, | |
| "grad_norm": 2.2150394871764294, | |
| "learning_rate": 1.3963963963963963e-06, | |
| "loss": 0.8501, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.11541929666366095, | |
| "grad_norm": 2.118545319018784, | |
| "learning_rate": 1.418918918918919e-06, | |
| "loss": 0.9133, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.11722272317403065, | |
| "grad_norm": 2.1649234330413587, | |
| "learning_rate": 1.4414414414414416e-06, | |
| "loss": 0.7883, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.11902614968440037, | |
| "grad_norm": 2.260747898334313, | |
| "learning_rate": 1.463963963963964e-06, | |
| "loss": 1.0857, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.12082957619477007, | |
| "grad_norm": 2.3422569755909257, | |
| "learning_rate": 1.4864864864864868e-06, | |
| "loss": 0.9595, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.12263300270513977, | |
| "grad_norm": 2.1879879443879067, | |
| "learning_rate": 1.5090090090090093e-06, | |
| "loss": 0.9373, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.12443642921550947, | |
| "grad_norm": 2.1212636698318565, | |
| "learning_rate": 1.5315315315315316e-06, | |
| "loss": 0.8465, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.12623985572587917, | |
| "grad_norm": 2.093523831224821, | |
| "learning_rate": 1.5540540540540541e-06, | |
| "loss": 0.8851, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.12804328223624886, | |
| "grad_norm": 2.117245792873491, | |
| "learning_rate": 1.5765765765765766e-06, | |
| "loss": 0.8836, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.12984670874661858, | |
| "grad_norm": 2.297950888317582, | |
| "learning_rate": 1.5990990990990993e-06, | |
| "loss": 0.8671, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.13165013525698827, | |
| "grad_norm": 2.136681162174477, | |
| "learning_rate": 1.6216216216216219e-06, | |
| "loss": 0.9114, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.13345356176735798, | |
| "grad_norm": 2.377418938286004, | |
| "learning_rate": 1.6441441441441444e-06, | |
| "loss": 0.9153, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.13525698827772767, | |
| "grad_norm": 2.14684216322763, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.895, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.13706041478809738, | |
| "grad_norm": 2.0578144463585395, | |
| "learning_rate": 1.6891891891891894e-06, | |
| "loss": 0.7646, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.1388638412984671, | |
| "grad_norm": 2.1370659943028256, | |
| "learning_rate": 1.711711711711712e-06, | |
| "loss": 0.963, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.1406672678088368, | |
| "grad_norm": 2.1407789023578805, | |
| "learning_rate": 1.7342342342342344e-06, | |
| "loss": 0.8181, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.1424706943192065, | |
| "grad_norm": 2.224908436029519, | |
| "learning_rate": 1.756756756756757e-06, | |
| "loss": 0.7726, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.1442741208295762, | |
| "grad_norm": 2.4321949851329627, | |
| "learning_rate": 1.7792792792792792e-06, | |
| "loss": 0.857, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1460775473399459, | |
| "grad_norm": 2.144226568602669, | |
| "learning_rate": 1.801801801801802e-06, | |
| "loss": 0.8378, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.1478809738503156, | |
| "grad_norm": 1.9826711249103168, | |
| "learning_rate": 1.8243243243243245e-06, | |
| "loss": 0.7902, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.1496844003606853, | |
| "grad_norm": 2.291008678375686, | |
| "learning_rate": 1.846846846846847e-06, | |
| "loss": 0.8824, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.151487826871055, | |
| "grad_norm": 2.145067975437641, | |
| "learning_rate": 1.8693693693693697e-06, | |
| "loss": 0.7856, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.1532912533814247, | |
| "grad_norm": 2.123969288662637, | |
| "learning_rate": 1.8918918918918922e-06, | |
| "loss": 0.7462, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.1550946798917944, | |
| "grad_norm": 2.2295818146317776, | |
| "learning_rate": 1.9144144144144145e-06, | |
| "loss": 0.8886, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.15689810640216412, | |
| "grad_norm": 2.140597774817277, | |
| "learning_rate": 1.9369369369369372e-06, | |
| "loss": 0.8319, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.1587015329125338, | |
| "grad_norm": 2.344451235287885, | |
| "learning_rate": 1.9594594594594595e-06, | |
| "loss": 0.8575, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.16050495942290352, | |
| "grad_norm": 1.961958906755019, | |
| "learning_rate": 1.9819819819819822e-06, | |
| "loss": 0.8305, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.1623083859332732, | |
| "grad_norm": 2.082591039262878, | |
| "learning_rate": 2.0045045045045045e-06, | |
| "loss": 0.8032, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.16411181244364292, | |
| "grad_norm": 1.8951491866286936, | |
| "learning_rate": 2.0270270270270273e-06, | |
| "loss": 0.9459, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.1659152389540126, | |
| "grad_norm": 2.1428859812782344, | |
| "learning_rate": 2.0495495495495496e-06, | |
| "loss": 0.8839, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.16771866546438233, | |
| "grad_norm": 2.2152288593264173, | |
| "learning_rate": 2.0720720720720723e-06, | |
| "loss": 0.8265, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.16952209197475204, | |
| "grad_norm": 2.0213826178716254, | |
| "learning_rate": 2.0945945945945946e-06, | |
| "loss": 0.8098, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.17132551848512173, | |
| "grad_norm": 2.0901306331246374, | |
| "learning_rate": 2.1171171171171173e-06, | |
| "loss": 0.8728, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.17312894499549145, | |
| "grad_norm": 2.164683159815703, | |
| "learning_rate": 2.13963963963964e-06, | |
| "loss": 0.7473, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.17493237150586113, | |
| "grad_norm": 2.128063710011363, | |
| "learning_rate": 2.1621621621621623e-06, | |
| "loss": 0.7631, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.17673579801623085, | |
| "grad_norm": 2.2572109322776446, | |
| "learning_rate": 2.1846846846846846e-06, | |
| "loss": 0.933, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.17853922452660054, | |
| "grad_norm": 2.1363963838074325, | |
| "learning_rate": 2.2072072072072073e-06, | |
| "loss": 0.7977, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.18034265103697025, | |
| "grad_norm": 2.146510752101339, | |
| "learning_rate": 2.22972972972973e-06, | |
| "loss": 0.8328, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.18214607754733994, | |
| "grad_norm": 2.128787213407692, | |
| "learning_rate": 2.2522522522522524e-06, | |
| "loss": 0.769, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.18394950405770966, | |
| "grad_norm": 2.1474901480116384, | |
| "learning_rate": 2.274774774774775e-06, | |
| "loss": 0.8078, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.18575293056807934, | |
| "grad_norm": 2.2077628269004306, | |
| "learning_rate": 2.297297297297298e-06, | |
| "loss": 0.869, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.18755635707844906, | |
| "grad_norm": 2.1596373889839353, | |
| "learning_rate": 2.31981981981982e-06, | |
| "loss": 0.9066, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.18935978358881875, | |
| "grad_norm": 2.227258779710617, | |
| "learning_rate": 2.3423423423423424e-06, | |
| "loss": 0.8736, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.19116321009918846, | |
| "grad_norm": 2.0265448039731604, | |
| "learning_rate": 2.364864864864865e-06, | |
| "loss": 0.8269, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.19296663660955815, | |
| "grad_norm": 2.090824078885953, | |
| "learning_rate": 2.3873873873873874e-06, | |
| "loss": 0.8771, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.19477006311992787, | |
| "grad_norm": 1.9761919651706363, | |
| "learning_rate": 2.40990990990991e-06, | |
| "loss": 0.8337, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.19657348963029755, | |
| "grad_norm": 2.1515967867262455, | |
| "learning_rate": 2.432432432432433e-06, | |
| "loss": 0.8635, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.19837691614066727, | |
| "grad_norm": 2.0366179273737943, | |
| "learning_rate": 2.454954954954955e-06, | |
| "loss": 0.7951, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.20018034265103696, | |
| "grad_norm": 2.2337095952708568, | |
| "learning_rate": 2.4774774774774775e-06, | |
| "loss": 0.8296, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.20198376916140667, | |
| "grad_norm": 2.314845611994883, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.8512, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2037871956717764, | |
| "grad_norm": 2.1397074134865623, | |
| "learning_rate": 2.5225225225225225e-06, | |
| "loss": 0.7915, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.20559062218214608, | |
| "grad_norm": 2.2454332127701644, | |
| "learning_rate": 2.5450450450450452e-06, | |
| "loss": 0.7976, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2073940486925158, | |
| "grad_norm": 2.184763023914372, | |
| "learning_rate": 2.5675675675675675e-06, | |
| "loss": 0.9853, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.20919747520288548, | |
| "grad_norm": 2.1965096069781653, | |
| "learning_rate": 2.5900900900900907e-06, | |
| "loss": 0.8754, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2110009017132552, | |
| "grad_norm": 2.1197328540143405, | |
| "learning_rate": 2.612612612612613e-06, | |
| "loss": 0.873, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.21280432822362488, | |
| "grad_norm": 2.1479255682477656, | |
| "learning_rate": 2.6351351351351353e-06, | |
| "loss": 0.811, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2146077547339946, | |
| "grad_norm": 1.907529342641289, | |
| "learning_rate": 2.657657657657658e-06, | |
| "loss": 0.8094, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.2164111812443643, | |
| "grad_norm": 2.0692795464883162, | |
| "learning_rate": 2.6801801801801803e-06, | |
| "loss": 0.7645, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.218214607754734, | |
| "grad_norm": 2.525383768634248, | |
| "learning_rate": 2.702702702702703e-06, | |
| "loss": 0.7909, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.2200180342651037, | |
| "grad_norm": 2.193135508499143, | |
| "learning_rate": 2.7252252252252253e-06, | |
| "loss": 0.8109, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2218214607754734, | |
| "grad_norm": 2.2119521513341263, | |
| "learning_rate": 2.747747747747748e-06, | |
| "loss": 0.864, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.2236248872858431, | |
| "grad_norm": 2.2411527155988966, | |
| "learning_rate": 2.7702702702702703e-06, | |
| "loss": 0.7952, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2254283137962128, | |
| "grad_norm": 2.0883350487153693, | |
| "learning_rate": 2.7927927927927926e-06, | |
| "loss": 1.0036, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.2272317403065825, | |
| "grad_norm": 2.109829568194192, | |
| "learning_rate": 2.8153153153153158e-06, | |
| "loss": 0.7232, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2290351668169522, | |
| "grad_norm": 1.993689884083202, | |
| "learning_rate": 2.837837837837838e-06, | |
| "loss": 0.7923, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.2308385933273219, | |
| "grad_norm": 1.8907292753085065, | |
| "learning_rate": 2.860360360360361e-06, | |
| "loss": 0.7996, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.23264201983769162, | |
| "grad_norm": 2.0881747024356367, | |
| "learning_rate": 2.882882882882883e-06, | |
| "loss": 0.8358, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.2344454463480613, | |
| "grad_norm": 2.2364232617934583, | |
| "learning_rate": 2.9054054054054054e-06, | |
| "loss": 0.855, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.23624887285843102, | |
| "grad_norm": 2.1751800704208017, | |
| "learning_rate": 2.927927927927928e-06, | |
| "loss": 0.988, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.23805229936880073, | |
| "grad_norm": 2.5339818982016244, | |
| "learning_rate": 2.9504504504504504e-06, | |
| "loss": 0.9803, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.23985572587917042, | |
| "grad_norm": 2.1208235838666276, | |
| "learning_rate": 2.9729729729729736e-06, | |
| "loss": 0.8555, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.24165915238954014, | |
| "grad_norm": 2.2141319147659404, | |
| "learning_rate": 2.995495495495496e-06, | |
| "loss": 0.8157, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.24346257889990983, | |
| "grad_norm": 2.2533639584780403, | |
| "learning_rate": 3.0180180180180186e-06, | |
| "loss": 0.7266, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.24526600541027954, | |
| "grad_norm": 1.9934582570878943, | |
| "learning_rate": 3.040540540540541e-06, | |
| "loss": 0.6843, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.24706943192064923, | |
| "grad_norm": 2.3507505242464286, | |
| "learning_rate": 3.063063063063063e-06, | |
| "loss": 0.7358, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.24887285843101895, | |
| "grad_norm": 2.333362017875557, | |
| "learning_rate": 3.085585585585586e-06, | |
| "loss": 0.7389, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.25067628494138866, | |
| "grad_norm": 2.2614223566969707, | |
| "learning_rate": 3.1081081081081082e-06, | |
| "loss": 0.7867, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.25247971145175835, | |
| "grad_norm": 1.9874595550084149, | |
| "learning_rate": 3.130630630630631e-06, | |
| "loss": 0.8633, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.25428313796212804, | |
| "grad_norm": 2.078601890935306, | |
| "learning_rate": 3.1531531531531532e-06, | |
| "loss": 0.7232, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.2560865644724977, | |
| "grad_norm": 2.059190081358862, | |
| "learning_rate": 3.1756756756756755e-06, | |
| "loss": 0.716, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.25788999098286747, | |
| "grad_norm": 2.2762951186816474, | |
| "learning_rate": 3.1981981981981987e-06, | |
| "loss": 0.8357, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.25969341749323716, | |
| "grad_norm": 2.2293473455945882, | |
| "learning_rate": 3.220720720720721e-06, | |
| "loss": 0.9155, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.26149684400360684, | |
| "grad_norm": 2.138416887954227, | |
| "learning_rate": 3.2432432432432437e-06, | |
| "loss": 0.8325, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.26330027051397653, | |
| "grad_norm": 2.124046946880288, | |
| "learning_rate": 3.265765765765766e-06, | |
| "loss": 0.8651, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.2651036970243463, | |
| "grad_norm": 2.1097037863696015, | |
| "learning_rate": 3.2882882882882887e-06, | |
| "loss": 0.748, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.26690712353471596, | |
| "grad_norm": 1.9680662328568495, | |
| "learning_rate": 3.310810810810811e-06, | |
| "loss": 0.7221, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.26871055004508565, | |
| "grad_norm": 2.046832628017909, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.76, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.27051397655545534, | |
| "grad_norm": 2.318638863913297, | |
| "learning_rate": 3.3558558558558565e-06, | |
| "loss": 0.8807, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2723174030658251, | |
| "grad_norm": 2.012178712794308, | |
| "learning_rate": 3.3783783783783788e-06, | |
| "loss": 0.8326, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.27412082957619477, | |
| "grad_norm": 2.066509449866673, | |
| "learning_rate": 3.4009009009009015e-06, | |
| "loss": 0.8075, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.27592425608656446, | |
| "grad_norm": 2.1828393174475735, | |
| "learning_rate": 3.423423423423424e-06, | |
| "loss": 0.8072, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.2777276825969342, | |
| "grad_norm": 2.3767082165962368, | |
| "learning_rate": 3.445945945945946e-06, | |
| "loss": 0.97, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.2795311091073039, | |
| "grad_norm": 2.1245505290732853, | |
| "learning_rate": 3.468468468468469e-06, | |
| "loss": 0.8154, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.2813345356176736, | |
| "grad_norm": 2.0827910518003523, | |
| "learning_rate": 3.490990990990991e-06, | |
| "loss": 0.7921, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.28313796212804326, | |
| "grad_norm": 2.0364316538096863, | |
| "learning_rate": 3.513513513513514e-06, | |
| "loss": 0.9461, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.284941388638413, | |
| "grad_norm": 2.3511021810109383, | |
| "learning_rate": 3.536036036036036e-06, | |
| "loss": 0.7942, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.2867448151487827, | |
| "grad_norm": 1.9833910918431235, | |
| "learning_rate": 3.5585585585585584e-06, | |
| "loss": 0.8312, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.2885482416591524, | |
| "grad_norm": 1.9342863774694277, | |
| "learning_rate": 3.5810810810810816e-06, | |
| "loss": 0.8015, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.29035166816952207, | |
| "grad_norm": 2.033383216145857, | |
| "learning_rate": 3.603603603603604e-06, | |
| "loss": 0.7695, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.2921550946798918, | |
| "grad_norm": 2.374348511862132, | |
| "learning_rate": 3.6261261261261266e-06, | |
| "loss": 0.762, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.2939585211902615, | |
| "grad_norm": 2.114360094597133, | |
| "learning_rate": 3.648648648648649e-06, | |
| "loss": 0.8444, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.2957619477006312, | |
| "grad_norm": 1.9931929796238907, | |
| "learning_rate": 3.6711711711711716e-06, | |
| "loss": 0.7882, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.2975653742110009, | |
| "grad_norm": 2.0730938718533145, | |
| "learning_rate": 3.693693693693694e-06, | |
| "loss": 0.7745, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.2993688007213706, | |
| "grad_norm": 1.8554364231513298, | |
| "learning_rate": 3.7162162162162162e-06, | |
| "loss": 0.7253, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3011722272317403, | |
| "grad_norm": 2.149623516434781, | |
| "learning_rate": 3.7387387387387394e-06, | |
| "loss": 0.8954, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.30297565374211, | |
| "grad_norm": 2.4856316208076503, | |
| "learning_rate": 3.7612612612612612e-06, | |
| "loss": 0.8402, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.3047790802524797, | |
| "grad_norm": 2.1406112105466035, | |
| "learning_rate": 3.7837837837837844e-06, | |
| "loss": 0.8636, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.3065825067628494, | |
| "grad_norm": 2.2289790923203205, | |
| "learning_rate": 3.8063063063063067e-06, | |
| "loss": 0.7428, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3083859332732191, | |
| "grad_norm": 2.004209812667466, | |
| "learning_rate": 3.828828828828829e-06, | |
| "loss": 0.7797, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.3101893597835888, | |
| "grad_norm": 2.006314497006802, | |
| "learning_rate": 3.851351351351352e-06, | |
| "loss": 0.7593, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.31199278629395855, | |
| "grad_norm": 2.282382563008822, | |
| "learning_rate": 3.8738738738738744e-06, | |
| "loss": 0.9733, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.31379621280432823, | |
| "grad_norm": 2.0355833568890946, | |
| "learning_rate": 3.896396396396397e-06, | |
| "loss": 0.8561, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3155996393146979, | |
| "grad_norm": 2.259718701083019, | |
| "learning_rate": 3.918918918918919e-06, | |
| "loss": 0.797, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3174030658250676, | |
| "grad_norm": 2.1729996844233455, | |
| "learning_rate": 3.941441441441442e-06, | |
| "loss": 0.7527, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.31920649233543735, | |
| "grad_norm": 2.60117835410255, | |
| "learning_rate": 3.9639639639639645e-06, | |
| "loss": 1.0225, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.32100991884580704, | |
| "grad_norm": 2.2528379596704604, | |
| "learning_rate": 3.986486486486487e-06, | |
| "loss": 0.7965, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.32281334535617673, | |
| "grad_norm": 2.3132904967648082, | |
| "learning_rate": 4.009009009009009e-06, | |
| "loss": 0.8112, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.3246167718665464, | |
| "grad_norm": 2.5263030575643564, | |
| "learning_rate": 4.031531531531531e-06, | |
| "loss": 0.8432, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.32642019837691616, | |
| "grad_norm": 2.2940008917196817, | |
| "learning_rate": 4.0540540540540545e-06, | |
| "loss": 0.7679, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.32822362488728585, | |
| "grad_norm": 2.1976286649954355, | |
| "learning_rate": 4.076576576576577e-06, | |
| "loss": 0.853, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.33002705139765554, | |
| "grad_norm": 2.287412594205084, | |
| "learning_rate": 4.099099099099099e-06, | |
| "loss": 0.8528, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.3318304779080252, | |
| "grad_norm": 2.265413975048022, | |
| "learning_rate": 4.121621621621622e-06, | |
| "loss": 0.8891, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.33363390441839497, | |
| "grad_norm": 2.1347188948409626, | |
| "learning_rate": 4.1441441441441446e-06, | |
| "loss": 0.7172, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.33543733092876465, | |
| "grad_norm": 1.9036460590607482, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 0.8139, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.33724075743913434, | |
| "grad_norm": 2.1982101741220723, | |
| "learning_rate": 4.189189189189189e-06, | |
| "loss": 0.7872, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.3390441839495041, | |
| "grad_norm": 1.9974084871264948, | |
| "learning_rate": 4.2117117117117115e-06, | |
| "loss": 0.7211, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.3408476104598738, | |
| "grad_norm": 2.343692275926038, | |
| "learning_rate": 4.234234234234235e-06, | |
| "loss": 0.8724, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.34265103697024346, | |
| "grad_norm": 2.4031986369572103, | |
| "learning_rate": 4.256756756756757e-06, | |
| "loss": 0.8742, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.34445446348061315, | |
| "grad_norm": 2.077375945110708, | |
| "learning_rate": 4.27927927927928e-06, | |
| "loss": 0.7802, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.3462578899909829, | |
| "grad_norm": 2.027445526646734, | |
| "learning_rate": 4.301801801801802e-06, | |
| "loss": 0.8748, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.3480613165013526, | |
| "grad_norm": 2.4821342969963522, | |
| "learning_rate": 4.324324324324325e-06, | |
| "loss": 0.7775, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.34986474301172227, | |
| "grad_norm": 2.356163875058872, | |
| "learning_rate": 4.346846846846847e-06, | |
| "loss": 0.7257, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.35166816952209196, | |
| "grad_norm": 2.295664379900927, | |
| "learning_rate": 4.369369369369369e-06, | |
| "loss": 0.7341, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.3534715960324617, | |
| "grad_norm": 2.3623157091199882, | |
| "learning_rate": 4.391891891891892e-06, | |
| "loss": 0.8198, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.3552750225428314, | |
| "grad_norm": 2.186111001087259, | |
| "learning_rate": 4.414414414414415e-06, | |
| "loss": 0.8505, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.3570784490532011, | |
| "grad_norm": 2.2466120322229504, | |
| "learning_rate": 4.436936936936938e-06, | |
| "loss": 0.8352, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.35888187556357076, | |
| "grad_norm": 2.2300706402504837, | |
| "learning_rate": 4.45945945945946e-06, | |
| "loss": 0.9199, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.3606853020739405, | |
| "grad_norm": 2.175470606319692, | |
| "learning_rate": 4.4819819819819824e-06, | |
| "loss": 0.6704, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3624887285843102, | |
| "grad_norm": 2.0680624661556397, | |
| "learning_rate": 4.504504504504505e-06, | |
| "loss": 0.8349, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.3642921550946799, | |
| "grad_norm": 2.1728800893786895, | |
| "learning_rate": 4.527027027027027e-06, | |
| "loss": 0.8035, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.36609558160504957, | |
| "grad_norm": 2.360603677367448, | |
| "learning_rate": 4.54954954954955e-06, | |
| "loss": 0.7997, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.3678990081154193, | |
| "grad_norm": 2.0528022407082425, | |
| "learning_rate": 4.5720720720720725e-06, | |
| "loss": 0.7377, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.369702434625789, | |
| "grad_norm": 2.145107444388918, | |
| "learning_rate": 4.594594594594596e-06, | |
| "loss": 0.7246, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.3715058611361587, | |
| "grad_norm": 2.142567114305303, | |
| "learning_rate": 4.617117117117118e-06, | |
| "loss": 0.767, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.37330928764652843, | |
| "grad_norm": 2.250353037529415, | |
| "learning_rate": 4.63963963963964e-06, | |
| "loss": 0.744, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.3751127141568981, | |
| "grad_norm": 2.4107500279982577, | |
| "learning_rate": 4.6621621621621625e-06, | |
| "loss": 0.9702, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.3769161406672678, | |
| "grad_norm": 1.83721607411279, | |
| "learning_rate": 4.684684684684685e-06, | |
| "loss": 0.7841, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.3787195671776375, | |
| "grad_norm": 2.187844750445605, | |
| "learning_rate": 4.707207207207208e-06, | |
| "loss": 0.7828, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.38052299368800724, | |
| "grad_norm": 2.6155119945345913, | |
| "learning_rate": 4.72972972972973e-06, | |
| "loss": 0.7754, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.3823264201983769, | |
| "grad_norm": 2.087136361991544, | |
| "learning_rate": 4.7522522522522526e-06, | |
| "loss": 0.7961, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.3841298467087466, | |
| "grad_norm": 2.052469543045352, | |
| "learning_rate": 4.774774774774775e-06, | |
| "loss": 0.74, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.3859332732191163, | |
| "grad_norm": 2.326611456516733, | |
| "learning_rate": 4.797297297297297e-06, | |
| "loss": 0.8593, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.38773669972948605, | |
| "grad_norm": 2.161826276327704, | |
| "learning_rate": 4.81981981981982e-06, | |
| "loss": 0.8495, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.38954012623985573, | |
| "grad_norm": 2.2046675857827416, | |
| "learning_rate": 4.842342342342343e-06, | |
| "loss": 0.7909, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.3913435527502254, | |
| "grad_norm": 1.9987211123805613, | |
| "learning_rate": 4.864864864864866e-06, | |
| "loss": 0.7073, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.3931469792605951, | |
| "grad_norm": 2.060842447295872, | |
| "learning_rate": 4.887387387387388e-06, | |
| "loss": 0.7356, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.39495040577096485, | |
| "grad_norm": 2.167011002864499, | |
| "learning_rate": 4.90990990990991e-06, | |
| "loss": 0.9376, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.39675383228133454, | |
| "grad_norm": 2.120387815326822, | |
| "learning_rate": 4.932432432432433e-06, | |
| "loss": 0.923, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3985572587917042, | |
| "grad_norm": 2.0360713757641675, | |
| "learning_rate": 4.954954954954955e-06, | |
| "loss": 0.7894, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.4003606853020739, | |
| "grad_norm": 1.9555130956875506, | |
| "learning_rate": 4.977477477477478e-06, | |
| "loss": 0.7949, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.40216411181244366, | |
| "grad_norm": 2.1707142856979553, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8106, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.40396753832281335, | |
| "grad_norm": 2.0454935807171566, | |
| "learning_rate": 5.022522522522523e-06, | |
| "loss": 0.7753, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.40577096483318303, | |
| "grad_norm": 1.9818753837111296, | |
| "learning_rate": 5.045045045045045e-06, | |
| "loss": 0.7425, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4075743913435528, | |
| "grad_norm": 2.089730216802721, | |
| "learning_rate": 5.067567567567568e-06, | |
| "loss": 0.7953, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.40937781785392247, | |
| "grad_norm": 2.1221432507716593, | |
| "learning_rate": 5.0900900900900905e-06, | |
| "loss": 0.7228, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.41118124436429215, | |
| "grad_norm": 2.302405327887679, | |
| "learning_rate": 5.112612612612613e-06, | |
| "loss": 0.8949, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.41298467087466184, | |
| "grad_norm": 2.1247546433849203, | |
| "learning_rate": 5.135135135135135e-06, | |
| "loss": 0.7007, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.4147880973850316, | |
| "grad_norm": 2.043647163631905, | |
| "learning_rate": 5.157657657657657e-06, | |
| "loss": 0.7338, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4165915238954013, | |
| "grad_norm": 2.270245087265323, | |
| "learning_rate": 5.180180180180181e-06, | |
| "loss": 0.8341, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.41839495040577096, | |
| "grad_norm": 2.183816726602172, | |
| "learning_rate": 5.202702702702704e-06, | |
| "loss": 0.8531, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.42019837691614065, | |
| "grad_norm": 2.1547309877335903, | |
| "learning_rate": 5.225225225225226e-06, | |
| "loss": 0.7347, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.4220018034265104, | |
| "grad_norm": 2.206709828896525, | |
| "learning_rate": 5.247747747747748e-06, | |
| "loss": 0.8665, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.4238052299368801, | |
| "grad_norm": 2.0559406646994987, | |
| "learning_rate": 5.2702702702702705e-06, | |
| "loss": 0.6986, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.42560865644724977, | |
| "grad_norm": 2.018292507433674, | |
| "learning_rate": 5.292792792792794e-06, | |
| "loss": 0.6952, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.42741208295761945, | |
| "grad_norm": 2.0090659177061805, | |
| "learning_rate": 5.315315315315316e-06, | |
| "loss": 0.8291, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.4292155094679892, | |
| "grad_norm": 2.301997115792778, | |
| "learning_rate": 5.337837837837838e-06, | |
| "loss": 0.7145, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.4310189359783589, | |
| "grad_norm": 2.067738383833435, | |
| "learning_rate": 5.360360360360361e-06, | |
| "loss": 0.7995, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.4328223624887286, | |
| "grad_norm": 2.2635903833648245, | |
| "learning_rate": 5.382882882882884e-06, | |
| "loss": 0.7224, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.43462578899909826, | |
| "grad_norm": 2.275286557637392, | |
| "learning_rate": 5.405405405405406e-06, | |
| "loss": 0.8183, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.436429215509468, | |
| "grad_norm": 2.1179111948903513, | |
| "learning_rate": 5.427927927927928e-06, | |
| "loss": 0.772, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.4382326420198377, | |
| "grad_norm": 2.164539734222491, | |
| "learning_rate": 5.450450450450451e-06, | |
| "loss": 0.7319, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.4400360685302074, | |
| "grad_norm": 2.1650273688319515, | |
| "learning_rate": 5.472972972972973e-06, | |
| "loss": 0.8215, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.4418394950405771, | |
| "grad_norm": 2.075382076411821, | |
| "learning_rate": 5.495495495495496e-06, | |
| "loss": 0.7398, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.4436429215509468, | |
| "grad_norm": 1.8310224669393116, | |
| "learning_rate": 5.518018018018018e-06, | |
| "loss": 0.7462, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.4454463480613165, | |
| "grad_norm": 2.1020377635825955, | |
| "learning_rate": 5.540540540540541e-06, | |
| "loss": 0.7952, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.4472497745716862, | |
| "grad_norm": 2.20596373228597, | |
| "learning_rate": 5.563063063063063e-06, | |
| "loss": 0.7155, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.44905320108205593, | |
| "grad_norm": 2.157295752855383, | |
| "learning_rate": 5.585585585585585e-06, | |
| "loss": 0.7448, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.4508566275924256, | |
| "grad_norm": 2.0756794403814767, | |
| "learning_rate": 5.608108108108109e-06, | |
| "loss": 0.9216, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4526600541027953, | |
| "grad_norm": 2.3192109805255123, | |
| "learning_rate": 5.6306306306306316e-06, | |
| "loss": 0.7884, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.454463480613165, | |
| "grad_norm": 2.0220751392261467, | |
| "learning_rate": 5.653153153153154e-06, | |
| "loss": 0.7508, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.45626690712353474, | |
| "grad_norm": 2.13660207924998, | |
| "learning_rate": 5.675675675675676e-06, | |
| "loss": 0.7559, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.4580703336339044, | |
| "grad_norm": 2.114868597507177, | |
| "learning_rate": 5.6981981981981985e-06, | |
| "loss": 0.8145, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.4598737601442741, | |
| "grad_norm": 2.1123935906108313, | |
| "learning_rate": 5.720720720720722e-06, | |
| "loss": 0.8049, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.4616771866546438, | |
| "grad_norm": 2.4676890144062957, | |
| "learning_rate": 5.743243243243244e-06, | |
| "loss": 0.7957, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.46348061316501354, | |
| "grad_norm": 2.168073489314107, | |
| "learning_rate": 5.765765765765766e-06, | |
| "loss": 0.7666, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.46528403967538323, | |
| "grad_norm": 2.072398238803128, | |
| "learning_rate": 5.7882882882882885e-06, | |
| "loss": 0.8192, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.4670874661857529, | |
| "grad_norm": 2.563194398373102, | |
| "learning_rate": 5.810810810810811e-06, | |
| "loss": 0.8409, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.4688908926961226, | |
| "grad_norm": 2.304094514669754, | |
| "learning_rate": 5.833333333333334e-06, | |
| "loss": 0.8541, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.47069431920649235, | |
| "grad_norm": 2.198147090270184, | |
| "learning_rate": 5.855855855855856e-06, | |
| "loss": 0.6985, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.47249774571686204, | |
| "grad_norm": 2.1753059174557112, | |
| "learning_rate": 5.8783783783783786e-06, | |
| "loss": 0.8182, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.4743011722272317, | |
| "grad_norm": 2.2589974392486356, | |
| "learning_rate": 5.900900900900901e-06, | |
| "loss": 0.755, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.47610459873760147, | |
| "grad_norm": 2.1007335416554507, | |
| "learning_rate": 5.923423423423423e-06, | |
| "loss": 0.748, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.47790802524797116, | |
| "grad_norm": 1.9793246514216147, | |
| "learning_rate": 5.945945945945947e-06, | |
| "loss": 0.6935, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.47971145175834085, | |
| "grad_norm": 2.310796934006057, | |
| "learning_rate": 5.9684684684684694e-06, | |
| "loss": 0.7839, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.48151487826871053, | |
| "grad_norm": 2.1733242596190374, | |
| "learning_rate": 5.990990990990992e-06, | |
| "loss": 0.7748, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.4833183047790803, | |
| "grad_norm": 2.134235761560956, | |
| "learning_rate": 6.013513513513514e-06, | |
| "loss": 0.7545, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.48512173128944996, | |
| "grad_norm": 2.279296657289438, | |
| "learning_rate": 6.036036036036037e-06, | |
| "loss": 0.7399, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.48692515779981965, | |
| "grad_norm": 2.2785713614109566, | |
| "learning_rate": 6.0585585585585595e-06, | |
| "loss": 0.9031, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.48872858431018934, | |
| "grad_norm": 2.17376130509456, | |
| "learning_rate": 6.081081081081082e-06, | |
| "loss": 0.8876, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.4905320108205591, | |
| "grad_norm": 2.2837346570989694, | |
| "learning_rate": 6.103603603603604e-06, | |
| "loss": 0.852, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.49233543733092877, | |
| "grad_norm": 2.308367261822732, | |
| "learning_rate": 6.126126126126126e-06, | |
| "loss": 0.7471, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.49413886384129846, | |
| "grad_norm": 2.353472342894518, | |
| "learning_rate": 6.1486486486486495e-06, | |
| "loss": 0.865, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.49594229035166815, | |
| "grad_norm": 2.2188631595778077, | |
| "learning_rate": 6.171171171171172e-06, | |
| "loss": 0.8253, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.4977457168620379, | |
| "grad_norm": 2.4928969764456212, | |
| "learning_rate": 6.193693693693694e-06, | |
| "loss": 0.9809, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.4995491433724076, | |
| "grad_norm": 2.429996582097567, | |
| "learning_rate": 6.2162162162162164e-06, | |
| "loss": 0.9529, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.5013525698827773, | |
| "grad_norm": 2.156174833500389, | |
| "learning_rate": 6.238738738738739e-06, | |
| "loss": 0.7549, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.503155996393147, | |
| "grad_norm": 2.1145480790559916, | |
| "learning_rate": 6.261261261261262e-06, | |
| "loss": 0.7325, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.5049594229035167, | |
| "grad_norm": 2.3827039996906887, | |
| "learning_rate": 6.283783783783784e-06, | |
| "loss": 0.8234, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5067628494138864, | |
| "grad_norm": 2.2520674713452635, | |
| "learning_rate": 6.3063063063063065e-06, | |
| "loss": 0.805, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.5085662759242561, | |
| "grad_norm": 2.2751328483189344, | |
| "learning_rate": 6.328828828828829e-06, | |
| "loss": 0.7916, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5103697024346258, | |
| "grad_norm": 2.105893153039127, | |
| "learning_rate": 6.351351351351351e-06, | |
| "loss": 0.7339, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.5121731289449954, | |
| "grad_norm": 2.3088480635629853, | |
| "learning_rate": 6.373873873873875e-06, | |
| "loss": 0.7908, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.5139765554553652, | |
| "grad_norm": 2.2019643640954567, | |
| "learning_rate": 6.396396396396397e-06, | |
| "loss": 0.8165, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5157799819657349, | |
| "grad_norm": 2.2224375489982195, | |
| "learning_rate": 6.41891891891892e-06, | |
| "loss": 0.7884, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.5175834084761046, | |
| "grad_norm": 2.2123927819948257, | |
| "learning_rate": 6.441441441441442e-06, | |
| "loss": 0.8261, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.5193868349864743, | |
| "grad_norm": 2.2449799654503093, | |
| "learning_rate": 6.463963963963964e-06, | |
| "loss": 0.8287, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.521190261496844, | |
| "grad_norm": 2.244952248633715, | |
| "learning_rate": 6.486486486486487e-06, | |
| "loss": 0.823, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.5229936880072137, | |
| "grad_norm": 2.146102311067904, | |
| "learning_rate": 6.50900900900901e-06, | |
| "loss": 0.8201, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5247971145175834, | |
| "grad_norm": 2.2107121939036642, | |
| "learning_rate": 6.531531531531532e-06, | |
| "loss": 0.7452, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.5266005410279531, | |
| "grad_norm": 2.274570701724603, | |
| "learning_rate": 6.554054054054054e-06, | |
| "loss": 0.7995, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.5284039675383229, | |
| "grad_norm": 2.3901970457801323, | |
| "learning_rate": 6.5765765765765775e-06, | |
| "loss": 0.8293, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.5302073940486925, | |
| "grad_norm": 2.2046674887614617, | |
| "learning_rate": 6.5990990990991e-06, | |
| "loss": 0.7711, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.5320108205590622, | |
| "grad_norm": 2.181130141644271, | |
| "learning_rate": 6.621621621621622e-06, | |
| "loss": 0.7467, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5338142470694319, | |
| "grad_norm": 2.1545307052885434, | |
| "learning_rate": 6.644144144144144e-06, | |
| "loss": 0.7591, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.5356176735798016, | |
| "grad_norm": 2.000955616731471, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.7405, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.5374211000901713, | |
| "grad_norm": 2.270342762369627, | |
| "learning_rate": 6.689189189189191e-06, | |
| "loss": 0.8734, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.539224526600541, | |
| "grad_norm": 1.9914609602909024, | |
| "learning_rate": 6.711711711711713e-06, | |
| "loss": 0.9029, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.5410279531109107, | |
| "grad_norm": 2.1862011960901238, | |
| "learning_rate": 6.734234234234235e-06, | |
| "loss": 0.7847, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5428313796212805, | |
| "grad_norm": 2.162455670849857, | |
| "learning_rate": 6.7567567567567575e-06, | |
| "loss": 0.796, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.5446348061316502, | |
| "grad_norm": 2.190782292923182, | |
| "learning_rate": 6.77927927927928e-06, | |
| "loss": 0.8361, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.5464382326420198, | |
| "grad_norm": 2.343114673195786, | |
| "learning_rate": 6.801801801801803e-06, | |
| "loss": 0.9578, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.5482416591523895, | |
| "grad_norm": 2.137122549596483, | |
| "learning_rate": 6.824324324324325e-06, | |
| "loss": 0.8094, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.5500450856627592, | |
| "grad_norm": 2.106947969785909, | |
| "learning_rate": 6.846846846846848e-06, | |
| "loss": 0.7836, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.5518485121731289, | |
| "grad_norm": 2.146424998051745, | |
| "learning_rate": 6.86936936936937e-06, | |
| "loss": 0.7997, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.5536519386834986, | |
| "grad_norm": 1.8042726808487144, | |
| "learning_rate": 6.891891891891892e-06, | |
| "loss": 0.7629, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.5554553651938684, | |
| "grad_norm": 2.142256130584483, | |
| "learning_rate": 6.914414414414415e-06, | |
| "loss": 0.8131, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.5572587917042381, | |
| "grad_norm": 2.0191516225293116, | |
| "learning_rate": 6.936936936936938e-06, | |
| "loss": 0.8088, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.5590622182146078, | |
| "grad_norm": 2.2871792098661015, | |
| "learning_rate": 6.95945945945946e-06, | |
| "loss": 0.7785, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5608656447249775, | |
| "grad_norm": 2.2023691318993905, | |
| "learning_rate": 6.981981981981982e-06, | |
| "loss": 0.828, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.5626690712353472, | |
| "grad_norm": 2.234138281725447, | |
| "learning_rate": 7.0045045045045045e-06, | |
| "loss": 0.7213, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.5644724977457168, | |
| "grad_norm": 2.2818031085235932, | |
| "learning_rate": 7.027027027027028e-06, | |
| "loss": 0.6795, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.5662759242560865, | |
| "grad_norm": 2.0842673839335846, | |
| "learning_rate": 7.04954954954955e-06, | |
| "loss": 0.8452, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.5680793507664562, | |
| "grad_norm": 2.002642371369536, | |
| "learning_rate": 7.072072072072072e-06, | |
| "loss": 0.89, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.569882777276826, | |
| "grad_norm": 2.083265325972135, | |
| "learning_rate": 7.0945945945945946e-06, | |
| "loss": 0.7551, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.5716862037871957, | |
| "grad_norm": 2.0768528412350586, | |
| "learning_rate": 7.117117117117117e-06, | |
| "loss": 0.7289, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.5734896302975654, | |
| "grad_norm": 1.9695169006653106, | |
| "learning_rate": 7.139639639639641e-06, | |
| "loss": 0.7184, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.5752930568079351, | |
| "grad_norm": 2.084828562576803, | |
| "learning_rate": 7.162162162162163e-06, | |
| "loss": 0.7489, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.5770964833183048, | |
| "grad_norm": 2.209350364597537, | |
| "learning_rate": 7.1846846846846855e-06, | |
| "loss": 0.7564, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5788999098286745, | |
| "grad_norm": 2.2105476735413054, | |
| "learning_rate": 7.207207207207208e-06, | |
| "loss": 0.84, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.5807033363390441, | |
| "grad_norm": 2.5857780263589616, | |
| "learning_rate": 7.229729729729731e-06, | |
| "loss": 0.8624, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.5825067628494139, | |
| "grad_norm": 2.19851358126889, | |
| "learning_rate": 7.252252252252253e-06, | |
| "loss": 0.6745, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.5843101893597836, | |
| "grad_norm": 2.144886414168463, | |
| "learning_rate": 7.2747747747747755e-06, | |
| "loss": 0.8314, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.5861136158701533, | |
| "grad_norm": 2.11225650931259, | |
| "learning_rate": 7.297297297297298e-06, | |
| "loss": 0.9043, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.587917042380523, | |
| "grad_norm": 2.301105964642942, | |
| "learning_rate": 7.31981981981982e-06, | |
| "loss": 0.7197, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.5897204688908927, | |
| "grad_norm": 2.3975525696500806, | |
| "learning_rate": 7.342342342342343e-06, | |
| "loss": 0.9116, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.5915238954012624, | |
| "grad_norm": 2.1452666571693255, | |
| "learning_rate": 7.3648648648648655e-06, | |
| "loss": 0.7906, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.5933273219116321, | |
| "grad_norm": 2.4409882639138134, | |
| "learning_rate": 7.387387387387388e-06, | |
| "loss": 0.8716, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.5951307484220018, | |
| "grad_norm": 2.171908727845542, | |
| "learning_rate": 7.40990990990991e-06, | |
| "loss": 0.7304, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5969341749323716, | |
| "grad_norm": 2.247976351955023, | |
| "learning_rate": 7.4324324324324324e-06, | |
| "loss": 0.7739, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.5987376014427412, | |
| "grad_norm": 2.238977362911319, | |
| "learning_rate": 7.4549549549549564e-06, | |
| "loss": 0.6977, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.6005410279531109, | |
| "grad_norm": 1.9416451363003897, | |
| "learning_rate": 7.477477477477479e-06, | |
| "loss": 0.7653, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.6023444544634806, | |
| "grad_norm": 2.184729395722401, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.7434, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.6041478809738503, | |
| "grad_norm": 2.1317078889032173, | |
| "learning_rate": 7.5225225225225225e-06, | |
| "loss": 0.7047, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.60595130748422, | |
| "grad_norm": 1.9907910766284589, | |
| "learning_rate": 7.545045045045045e-06, | |
| "loss": 0.7499, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.6077547339945897, | |
| "grad_norm": 2.1264240696103487, | |
| "learning_rate": 7.567567567567569e-06, | |
| "loss": 0.8601, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.6095581605049594, | |
| "grad_norm": 2.2046257254141035, | |
| "learning_rate": 7.590090090090091e-06, | |
| "loss": 0.8269, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.6113615870153292, | |
| "grad_norm": 2.1215302945356695, | |
| "learning_rate": 7.612612612612613e-06, | |
| "loss": 0.7938, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.6131650135256989, | |
| "grad_norm": 2.229799150273438, | |
| "learning_rate": 7.635135135135135e-06, | |
| "loss": 0.7993, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6149684400360685, | |
| "grad_norm": 2.129796061875063, | |
| "learning_rate": 7.657657657657658e-06, | |
| "loss": 0.8045, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.6167718665464382, | |
| "grad_norm": 2.0716978158297685, | |
| "learning_rate": 7.680180180180181e-06, | |
| "loss": 0.7925, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.6185752930568079, | |
| "grad_norm": 2.130760036578581, | |
| "learning_rate": 7.702702702702704e-06, | |
| "loss": 0.8987, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.6203787195671776, | |
| "grad_norm": 1.9658518773305242, | |
| "learning_rate": 7.725225225225226e-06, | |
| "loss": 0.7385, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.6221821460775473, | |
| "grad_norm": 2.2057377247232557, | |
| "learning_rate": 7.747747747747749e-06, | |
| "loss": 0.8462, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.6239855725879171, | |
| "grad_norm": 1.998255103995078, | |
| "learning_rate": 7.77027027027027e-06, | |
| "loss": 0.726, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.6257889990982868, | |
| "grad_norm": 2.1024774999508384, | |
| "learning_rate": 7.792792792792793e-06, | |
| "loss": 0.7351, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.6275924256086565, | |
| "grad_norm": 2.045648302941062, | |
| "learning_rate": 7.815315315315317e-06, | |
| "loss": 0.7605, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.6293958521190262, | |
| "grad_norm": 2.2083257683921373, | |
| "learning_rate": 7.837837837837838e-06, | |
| "loss": 0.7318, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.6311992786293958, | |
| "grad_norm": 2.187495516104006, | |
| "learning_rate": 7.860360360360361e-06, | |
| "loss": 0.8159, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6330027051397655, | |
| "grad_norm": 2.0804434740408007, | |
| "learning_rate": 7.882882882882884e-06, | |
| "loss": 0.7396, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.6348061316501352, | |
| "grad_norm": 2.037209239741434, | |
| "learning_rate": 7.905405405405406e-06, | |
| "loss": 0.7953, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.6366095581605049, | |
| "grad_norm": 2.0556472196259055, | |
| "learning_rate": 7.927927927927929e-06, | |
| "loss": 0.6783, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.6384129846708747, | |
| "grad_norm": 2.0562270484236898, | |
| "learning_rate": 7.95045045045045e-06, | |
| "loss": 0.7568, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.6402164111812444, | |
| "grad_norm": 1.9938898178619702, | |
| "learning_rate": 7.972972972972974e-06, | |
| "loss": 0.8204, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.6420198376916141, | |
| "grad_norm": 2.072139406380031, | |
| "learning_rate": 7.995495495495497e-06, | |
| "loss": 0.8035, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.6438232642019838, | |
| "grad_norm": 2.0697820853812674, | |
| "learning_rate": 8.018018018018018e-06, | |
| "loss": 0.7801, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.6456266907123535, | |
| "grad_norm": 2.0412202940720623, | |
| "learning_rate": 8.040540540540541e-06, | |
| "loss": 0.7014, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.6474301172227231, | |
| "grad_norm": 1.9405663633560892, | |
| "learning_rate": 8.063063063063063e-06, | |
| "loss": 0.6928, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.6492335437330928, | |
| "grad_norm": 2.1239135328030234, | |
| "learning_rate": 8.085585585585586e-06, | |
| "loss": 0.887, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6510369702434626, | |
| "grad_norm": 2.436623349264573, | |
| "learning_rate": 8.108108108108109e-06, | |
| "loss": 0.8074, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.6528403967538323, | |
| "grad_norm": 2.0833195573627195, | |
| "learning_rate": 8.130630630630632e-06, | |
| "loss": 0.7037, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.654643823264202, | |
| "grad_norm": 1.9625571637730252, | |
| "learning_rate": 8.153153153153154e-06, | |
| "loss": 0.7199, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.6564472497745717, | |
| "grad_norm": 1.980886318284568, | |
| "learning_rate": 8.175675675675677e-06, | |
| "loss": 0.749, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.6582506762849414, | |
| "grad_norm": 2.3686023326598593, | |
| "learning_rate": 8.198198198198198e-06, | |
| "loss": 1.0184, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.6600541027953111, | |
| "grad_norm": 2.2671248043012264, | |
| "learning_rate": 8.220720720720721e-06, | |
| "loss": 0.7957, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.6618575293056808, | |
| "grad_norm": 2.2527130831079027, | |
| "learning_rate": 8.243243243243245e-06, | |
| "loss": 0.8575, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.6636609558160504, | |
| "grad_norm": 2.5533424372075446, | |
| "learning_rate": 8.265765765765766e-06, | |
| "loss": 0.7706, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.6654643823264202, | |
| "grad_norm": 2.467984541574478, | |
| "learning_rate": 8.288288288288289e-06, | |
| "loss": 0.7647, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.6672678088367899, | |
| "grad_norm": 2.183449489939818, | |
| "learning_rate": 8.31081081081081e-06, | |
| "loss": 0.8173, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6690712353471596, | |
| "grad_norm": 2.2867069876523582, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.8269, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.6708746618575293, | |
| "grad_norm": 2.2573156909093957, | |
| "learning_rate": 8.355855855855857e-06, | |
| "loss": 0.7816, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.672678088367899, | |
| "grad_norm": 1.9766214188381033, | |
| "learning_rate": 8.378378378378378e-06, | |
| "loss": 0.7515, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.6744815148782687, | |
| "grad_norm": 2.1750484801074057, | |
| "learning_rate": 8.400900900900901e-06, | |
| "loss": 0.8656, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.6762849413886384, | |
| "grad_norm": 2.230627699222089, | |
| "learning_rate": 8.423423423423423e-06, | |
| "loss": 0.773, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.6780883678990082, | |
| "grad_norm": 2.0314632652565763, | |
| "learning_rate": 8.445945945945948e-06, | |
| "loss": 0.7375, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.6798917944093779, | |
| "grad_norm": 2.005821931578866, | |
| "learning_rate": 8.46846846846847e-06, | |
| "loss": 0.7262, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.6816952209197475, | |
| "grad_norm": 2.1743917104398647, | |
| "learning_rate": 8.490990990990992e-06, | |
| "loss": 0.7824, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.6834986474301172, | |
| "grad_norm": 2.0955934020895066, | |
| "learning_rate": 8.513513513513514e-06, | |
| "loss": 0.785, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.6853020739404869, | |
| "grad_norm": 2.000365871785507, | |
| "learning_rate": 8.536036036036037e-06, | |
| "loss": 0.648, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6871055004508566, | |
| "grad_norm": 2.1478172637074744, | |
| "learning_rate": 8.55855855855856e-06, | |
| "loss": 0.8075, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.6889089269612263, | |
| "grad_norm": 2.134460577230095, | |
| "learning_rate": 8.581081081081082e-06, | |
| "loss": 0.9026, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.690712353471596, | |
| "grad_norm": 2.14542331689987, | |
| "learning_rate": 8.603603603603605e-06, | |
| "loss": 0.8901, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.6925157799819658, | |
| "grad_norm": 2.135300301139234, | |
| "learning_rate": 8.626126126126126e-06, | |
| "loss": 0.7259, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.6943192064923355, | |
| "grad_norm": 2.474623212671607, | |
| "learning_rate": 8.64864864864865e-06, | |
| "loss": 0.8629, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.6961226330027052, | |
| "grad_norm": 1.975908066289463, | |
| "learning_rate": 8.671171171171172e-06, | |
| "loss": 0.7249, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.6979260595130748, | |
| "grad_norm": 2.1653693128183016, | |
| "learning_rate": 8.693693693693694e-06, | |
| "loss": 0.8081, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.6997294860234445, | |
| "grad_norm": 1.8567902438166204, | |
| "learning_rate": 8.716216216216217e-06, | |
| "loss": 0.7579, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.7015329125338142, | |
| "grad_norm": 2.2215481111685484, | |
| "learning_rate": 8.738738738738739e-06, | |
| "loss": 0.9716, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.7033363390441839, | |
| "grad_norm": 2.3046170296242, | |
| "learning_rate": 8.761261261261262e-06, | |
| "loss": 0.7795, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7051397655545536, | |
| "grad_norm": 2.131248198058394, | |
| "learning_rate": 8.783783783783785e-06, | |
| "loss": 0.9155, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.7069431920649234, | |
| "grad_norm": 1.924396723021384, | |
| "learning_rate": 8.806306306306306e-06, | |
| "loss": 0.7556, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.7087466185752931, | |
| "grad_norm": 2.1117533927836996, | |
| "learning_rate": 8.82882882882883e-06, | |
| "loss": 0.8406, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.7105500450856628, | |
| "grad_norm": 2.075709429966764, | |
| "learning_rate": 8.851351351351351e-06, | |
| "loss": 0.745, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.7123534715960325, | |
| "grad_norm": 2.16985925608763, | |
| "learning_rate": 8.873873873873876e-06, | |
| "loss": 0.6691, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.7141568981064021, | |
| "grad_norm": 2.0797926880074846, | |
| "learning_rate": 8.896396396396397e-06, | |
| "loss": 0.8238, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.7159603246167718, | |
| "grad_norm": 2.0186248223482997, | |
| "learning_rate": 8.91891891891892e-06, | |
| "loss": 0.7898, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.7177637511271415, | |
| "grad_norm": 2.185076250626117, | |
| "learning_rate": 8.941441441441442e-06, | |
| "loss": 0.7142, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.7195671776375113, | |
| "grad_norm": 2.1357619098512384, | |
| "learning_rate": 8.963963963963965e-06, | |
| "loss": 0.723, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.721370604147881, | |
| "grad_norm": 2.117174241205152, | |
| "learning_rate": 8.986486486486488e-06, | |
| "loss": 0.7863, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7231740306582507, | |
| "grad_norm": 2.0651532522605214, | |
| "learning_rate": 9.00900900900901e-06, | |
| "loss": 0.7213, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.7249774571686204, | |
| "grad_norm": 1.9757749532276578, | |
| "learning_rate": 9.031531531531533e-06, | |
| "loss": 0.7742, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.7267808836789901, | |
| "grad_norm": 2.3393951402241755, | |
| "learning_rate": 9.054054054054054e-06, | |
| "loss": 0.7721, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.7285843101893598, | |
| "grad_norm": 2.2574331805115064, | |
| "learning_rate": 9.076576576576577e-06, | |
| "loss": 0.9634, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.7303877366997295, | |
| "grad_norm": 2.0915118092689524, | |
| "learning_rate": 9.0990990990991e-06, | |
| "loss": 0.8077, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.7321911632100991, | |
| "grad_norm": 2.1653381189020524, | |
| "learning_rate": 9.121621621621622e-06, | |
| "loss": 0.7777, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.7339945897204689, | |
| "grad_norm": 2.1328560039152458, | |
| "learning_rate": 9.144144144144145e-06, | |
| "loss": 0.7882, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.7357980162308386, | |
| "grad_norm": 2.201274651740219, | |
| "learning_rate": 9.166666666666666e-06, | |
| "loss": 0.7608, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.7376014427412083, | |
| "grad_norm": 2.2359271481989587, | |
| "learning_rate": 9.189189189189191e-06, | |
| "loss": 0.8347, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.739404869251578, | |
| "grad_norm": 2.1161842611073034, | |
| "learning_rate": 9.211711711711713e-06, | |
| "loss": 0.7557, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7412082957619477, | |
| "grad_norm": 2.028791176838769, | |
| "learning_rate": 9.234234234234236e-06, | |
| "loss": 0.7183, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.7430117222723174, | |
| "grad_norm": 2.0656955597804503, | |
| "learning_rate": 9.256756756756757e-06, | |
| "loss": 0.7699, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.7448151487826871, | |
| "grad_norm": 2.087727535028477, | |
| "learning_rate": 9.27927927927928e-06, | |
| "loss": 0.7679, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.7466185752930569, | |
| "grad_norm": 2.49764840147548, | |
| "learning_rate": 9.301801801801804e-06, | |
| "loss": 0.8287, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.7484220018034266, | |
| "grad_norm": 1.9741285413389515, | |
| "learning_rate": 9.324324324324325e-06, | |
| "loss": 0.8161, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.7502254283137962, | |
| "grad_norm": 1.9406848284047182, | |
| "learning_rate": 9.346846846846848e-06, | |
| "loss": 0.768, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.7520288548241659, | |
| "grad_norm": 2.244408508249851, | |
| "learning_rate": 9.36936936936937e-06, | |
| "loss": 0.8611, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.7538322813345356, | |
| "grad_norm": 2.158384016489991, | |
| "learning_rate": 9.391891891891893e-06, | |
| "loss": 0.8361, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.7556357078449053, | |
| "grad_norm": 2.066053768199076, | |
| "learning_rate": 9.414414414414416e-06, | |
| "loss": 0.819, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.757439134355275, | |
| "grad_norm": 2.1350022569990603, | |
| "learning_rate": 9.436936936936937e-06, | |
| "loss": 0.7075, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7592425608656447, | |
| "grad_norm": 2.0051316619920745, | |
| "learning_rate": 9.45945945945946e-06, | |
| "loss": 0.8319, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.7610459873760145, | |
| "grad_norm": 2.225160013360467, | |
| "learning_rate": 9.481981981981982e-06, | |
| "loss": 0.7308, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.7628494138863842, | |
| "grad_norm": 2.276993744188313, | |
| "learning_rate": 9.504504504504505e-06, | |
| "loss": 0.8014, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.7646528403967539, | |
| "grad_norm": 1.8858762886928577, | |
| "learning_rate": 9.527027027027028e-06, | |
| "loss": 0.7923, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.7664562669071235, | |
| "grad_norm": 2.0252801103636195, | |
| "learning_rate": 9.54954954954955e-06, | |
| "loss": 0.76, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.7682596934174932, | |
| "grad_norm": 2.0418148296691503, | |
| "learning_rate": 9.572072072072073e-06, | |
| "loss": 0.7714, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.7700631199278629, | |
| "grad_norm": 2.133301976541229, | |
| "learning_rate": 9.594594594594594e-06, | |
| "loss": 0.918, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.7718665464382326, | |
| "grad_norm": 1.8904070256803192, | |
| "learning_rate": 9.617117117117117e-06, | |
| "loss": 0.8019, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.7736699729486023, | |
| "grad_norm": 2.1802687710567445, | |
| "learning_rate": 9.63963963963964e-06, | |
| "loss": 0.7124, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.7754733994589721, | |
| "grad_norm": 2.0041551174293883, | |
| "learning_rate": 9.662162162162164e-06, | |
| "loss": 0.7317, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7772768259693418, | |
| "grad_norm": 2.1685058642707085, | |
| "learning_rate": 9.684684684684685e-06, | |
| "loss": 0.8182, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.7790802524797115, | |
| "grad_norm": 2.0821300887019394, | |
| "learning_rate": 9.707207207207208e-06, | |
| "loss": 0.7483, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.7808836789900812, | |
| "grad_norm": 2.017254787966865, | |
| "learning_rate": 9.729729729729732e-06, | |
| "loss": 0.7439, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.7826871055004508, | |
| "grad_norm": 2.0938808603633965, | |
| "learning_rate": 9.752252252252253e-06, | |
| "loss": 0.727, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.7844905320108205, | |
| "grad_norm": 2.0745407772671784, | |
| "learning_rate": 9.774774774774776e-06, | |
| "loss": 0.8423, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.7862939585211902, | |
| "grad_norm": 2.080200509416444, | |
| "learning_rate": 9.797297297297298e-06, | |
| "loss": 0.7642, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.78809738503156, | |
| "grad_norm": 2.0994306655505537, | |
| "learning_rate": 9.81981981981982e-06, | |
| "loss": 0.839, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.7899008115419297, | |
| "grad_norm": 2.2979389003423423, | |
| "learning_rate": 9.842342342342344e-06, | |
| "loss": 1.0683, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.7917042380522994, | |
| "grad_norm": 2.0022713778993046, | |
| "learning_rate": 9.864864864864865e-06, | |
| "loss": 0.8482, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.7935076645626691, | |
| "grad_norm": 2.079766390913082, | |
| "learning_rate": 9.887387387387388e-06, | |
| "loss": 0.8196, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7953110910730388, | |
| "grad_norm": 2.2693808506736555, | |
| "learning_rate": 9.90990990990991e-06, | |
| "loss": 0.8566, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.7971145175834085, | |
| "grad_norm": 2.0568829176639767, | |
| "learning_rate": 9.932432432432433e-06, | |
| "loss": 0.6902, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.7989179440937781, | |
| "grad_norm": 2.0605295508114687, | |
| "learning_rate": 9.954954954954956e-06, | |
| "loss": 0.7233, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.8007213706041478, | |
| "grad_norm": 2.0444950860289532, | |
| "learning_rate": 9.97747747747748e-06, | |
| "loss": 0.7836, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.8025247971145176, | |
| "grad_norm": 2.110829091240123, | |
| "learning_rate": 1e-05, | |
| "loss": 0.8386, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.8043282236248873, | |
| "grad_norm": 1.9884942336111662, | |
| "learning_rate": 9.999998454785508e-06, | |
| "loss": 0.7082, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.806131650135257, | |
| "grad_norm": 2.259740074055523, | |
| "learning_rate": 9.999993819142988e-06, | |
| "loss": 0.7289, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.8079350766456267, | |
| "grad_norm": 1.9397862708682205, | |
| "learning_rate": 9.999986093075303e-06, | |
| "loss": 0.8564, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.8097385031559964, | |
| "grad_norm": 2.024093634732536, | |
| "learning_rate": 9.99997527658723e-06, | |
| "loss": 0.8771, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.8115419296663661, | |
| "grad_norm": 2.1014829667276866, | |
| "learning_rate": 9.999961369685454e-06, | |
| "loss": 0.7321, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8133453561767358, | |
| "grad_norm": 2.048448841813617, | |
| "learning_rate": 9.999944372378571e-06, | |
| "loss": 0.7546, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.8151487826871056, | |
| "grad_norm": 2.1737934515944346, | |
| "learning_rate": 9.999924284677087e-06, | |
| "loss": 0.7508, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.8169522091974752, | |
| "grad_norm": 2.1001584800928037, | |
| "learning_rate": 9.999901106593418e-06, | |
| "loss": 0.7644, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.8187556357078449, | |
| "grad_norm": 1.8767527278907021, | |
| "learning_rate": 9.999874838141888e-06, | |
| "loss": 0.7667, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.8205590622182146, | |
| "grad_norm": 2.04063723684597, | |
| "learning_rate": 9.999845479338735e-06, | |
| "loss": 0.8819, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.8223624887285843, | |
| "grad_norm": 2.0555425814497728, | |
| "learning_rate": 9.999813030202106e-06, | |
| "loss": 0.7877, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.824165915238954, | |
| "grad_norm": 1.967379340855907, | |
| "learning_rate": 9.999777490752056e-06, | |
| "loss": 0.8801, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.8259693417493237, | |
| "grad_norm": 2.2507392839674556, | |
| "learning_rate": 9.99973886101055e-06, | |
| "loss": 0.7568, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.8277727682596934, | |
| "grad_norm": 2.5630927142344935, | |
| "learning_rate": 9.99969714100147e-06, | |
| "loss": 0.9138, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.8295761947700632, | |
| "grad_norm": 2.173332163298101, | |
| "learning_rate": 9.999652330750595e-06, | |
| "loss": 0.8281, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8313796212804329, | |
| "grad_norm": 1.916637925944768, | |
| "learning_rate": 9.999604430285628e-06, | |
| "loss": 0.7754, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.8331830477908025, | |
| "grad_norm": 2.1152249416855384, | |
| "learning_rate": 9.999553439636171e-06, | |
| "loss": 0.8997, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.8349864743011722, | |
| "grad_norm": 2.0689074493196955, | |
| "learning_rate": 9.999499358833745e-06, | |
| "loss": 0.7964, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.8367899008115419, | |
| "grad_norm": 2.0154413915192064, | |
| "learning_rate": 9.999442187911774e-06, | |
| "loss": 0.7699, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.8385933273219116, | |
| "grad_norm": 1.9151923147532852, | |
| "learning_rate": 9.999381926905592e-06, | |
| "loss": 0.6932, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.8403967538322813, | |
| "grad_norm": 2.1665705435782336, | |
| "learning_rate": 9.999318575852451e-06, | |
| "loss": 1.0093, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.842200180342651, | |
| "grad_norm": 1.8047363755961323, | |
| "learning_rate": 9.999252134791504e-06, | |
| "loss": 0.6659, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.8440036068530208, | |
| "grad_norm": 2.151615247814168, | |
| "learning_rate": 9.999182603763816e-06, | |
| "loss": 0.7546, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.8458070333633905, | |
| "grad_norm": 2.4423751661740503, | |
| "learning_rate": 9.999109982812368e-06, | |
| "loss": 0.9198, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.8476104598737602, | |
| "grad_norm": 1.9717826223860573, | |
| "learning_rate": 9.99903427198204e-06, | |
| "loss": 0.7544, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8494138863841298, | |
| "grad_norm": 2.0312411419769005, | |
| "learning_rate": 9.99895547131963e-06, | |
| "loss": 0.8107, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.8512173128944995, | |
| "grad_norm": 2.157429067167941, | |
| "learning_rate": 9.998873580873848e-06, | |
| "loss": 0.6818, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.8530207394048692, | |
| "grad_norm": 2.165363885628237, | |
| "learning_rate": 9.998788600695304e-06, | |
| "loss": 0.7382, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.8548241659152389, | |
| "grad_norm": 2.147934133999578, | |
| "learning_rate": 9.998700530836525e-06, | |
| "loss": 0.8056, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.8566275924256087, | |
| "grad_norm": 1.9152522216267172, | |
| "learning_rate": 9.998609371351944e-06, | |
| "loss": 0.8791, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.8584310189359784, | |
| "grad_norm": 2.0560094856153976, | |
| "learning_rate": 9.998515122297909e-06, | |
| "loss": 0.8172, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.8602344454463481, | |
| "grad_norm": 2.123184949091883, | |
| "learning_rate": 9.99841778373267e-06, | |
| "loss": 0.8243, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.8620378719567178, | |
| "grad_norm": 2.178911188445628, | |
| "learning_rate": 9.998317355716393e-06, | |
| "loss": 0.9132, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.8638412984670875, | |
| "grad_norm": 2.2802080985811766, | |
| "learning_rate": 9.99821383831115e-06, | |
| "loss": 0.8641, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.8656447249774571, | |
| "grad_norm": 2.0900308677818287, | |
| "learning_rate": 9.998107231580925e-06, | |
| "loss": 0.7905, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8674481514878268, | |
| "grad_norm": 1.9731645490377003, | |
| "learning_rate": 9.99799753559161e-06, | |
| "loss": 0.639, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.8692515779981965, | |
| "grad_norm": 2.253348194223949, | |
| "learning_rate": 9.997884750411004e-06, | |
| "loss": 0.7037, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.8710550045085663, | |
| "grad_norm": 2.052696164388112, | |
| "learning_rate": 9.99776887610882e-06, | |
| "loss": 0.7247, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.872858431018936, | |
| "grad_norm": 2.0018489890660196, | |
| "learning_rate": 9.997649912756678e-06, | |
| "loss": 0.8574, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.8746618575293057, | |
| "grad_norm": 2.168583333160758, | |
| "learning_rate": 9.997527860428108e-06, | |
| "loss": 0.7786, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.8764652840396754, | |
| "grad_norm": 1.943218814932057, | |
| "learning_rate": 9.99740271919855e-06, | |
| "loss": 0.8582, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.8782687105500451, | |
| "grad_norm": 2.3671546883192964, | |
| "learning_rate": 9.997274489145348e-06, | |
| "loss": 0.8454, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.8800721370604148, | |
| "grad_norm": 1.970853464509309, | |
| "learning_rate": 9.997143170347762e-06, | |
| "loss": 0.8135, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.8818755635707844, | |
| "grad_norm": 2.0194288814353505, | |
| "learning_rate": 9.997008762886957e-06, | |
| "loss": 0.8322, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.8836789900811542, | |
| "grad_norm": 1.7905144760024025, | |
| "learning_rate": 9.99687126684601e-06, | |
| "loss": 0.6747, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.8854824165915239, | |
| "grad_norm": 1.8598765372123462, | |
| "learning_rate": 9.996730682309905e-06, | |
| "loss": 0.7077, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.8872858431018936, | |
| "grad_norm": 2.0349723629280194, | |
| "learning_rate": 9.996587009365534e-06, | |
| "loss": 1.0192, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.8890892696122633, | |
| "grad_norm": 1.8670659449601439, | |
| "learning_rate": 9.9964402481017e-06, | |
| "loss": 0.7877, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.890892696122633, | |
| "grad_norm": 1.9920058723596443, | |
| "learning_rate": 9.996290398609115e-06, | |
| "loss": 0.7732, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.8926961226330027, | |
| "grad_norm": 2.115518704658833, | |
| "learning_rate": 9.996137460980397e-06, | |
| "loss": 0.9214, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.8944995491433724, | |
| "grad_norm": 1.8785646679648142, | |
| "learning_rate": 9.995981435310078e-06, | |
| "loss": 0.7817, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.8963029756537421, | |
| "grad_norm": 2.055980396750251, | |
| "learning_rate": 9.99582232169459e-06, | |
| "loss": 0.7329, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.8981064021641119, | |
| "grad_norm": 2.3245341827701016, | |
| "learning_rate": 9.995660120232282e-06, | |
| "loss": 0.7507, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.8999098286744815, | |
| "grad_norm": 1.9959708222467396, | |
| "learning_rate": 9.99549483102341e-06, | |
| "loss": 0.8384, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.9017132551848512, | |
| "grad_norm": 1.8680585762600073, | |
| "learning_rate": 9.995326454170132e-06, | |
| "loss": 0.7024, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9035166816952209, | |
| "grad_norm": 2.0117030514545378, | |
| "learning_rate": 9.995154989776523e-06, | |
| "loss": 0.7802, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.9053201082055906, | |
| "grad_norm": 2.137679667033616, | |
| "learning_rate": 9.994980437948563e-06, | |
| "loss": 0.8063, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.9071235347159603, | |
| "grad_norm": 2.1841726174477225, | |
| "learning_rate": 9.994802798794138e-06, | |
| "loss": 0.8739, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.90892696122633, | |
| "grad_norm": 2.0156304452286316, | |
| "learning_rate": 9.994622072423046e-06, | |
| "loss": 0.8506, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.9107303877366997, | |
| "grad_norm": 1.9906410530703365, | |
| "learning_rate": 9.99443825894699e-06, | |
| "loss": 0.756, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.9125338142470695, | |
| "grad_norm": 2.112608845026693, | |
| "learning_rate": 9.994251358479583e-06, | |
| "loss": 0.8051, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.9143372407574392, | |
| "grad_norm": 2.043964300441665, | |
| "learning_rate": 9.994061371136347e-06, | |
| "loss": 0.7568, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.9161406672678089, | |
| "grad_norm": 1.938078803529045, | |
| "learning_rate": 9.993868297034709e-06, | |
| "loss": 0.6958, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.9179440937781785, | |
| "grad_norm": 2.136141218098241, | |
| "learning_rate": 9.993672136294004e-06, | |
| "loss": 0.8964, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.9197475202885482, | |
| "grad_norm": 2.159727291798854, | |
| "learning_rate": 9.993472889035478e-06, | |
| "loss": 0.7743, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9215509467989179, | |
| "grad_norm": 2.067924171823381, | |
| "learning_rate": 9.993270555382283e-06, | |
| "loss": 0.7229, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.9233543733092876, | |
| "grad_norm": 1.9675288285614405, | |
| "learning_rate": 9.99306513545948e-06, | |
| "loss": 0.7454, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.9251577998196574, | |
| "grad_norm": 2.022409969863871, | |
| "learning_rate": 9.99285662939403e-06, | |
| "loss": 0.8741, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.9269612263300271, | |
| "grad_norm": 2.1801414103496084, | |
| "learning_rate": 9.992645037314815e-06, | |
| "loss": 0.9204, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.9287646528403968, | |
| "grad_norm": 2.243366786461632, | |
| "learning_rate": 9.992430359352613e-06, | |
| "loss": 0.7942, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.9305680793507665, | |
| "grad_norm": 2.035046137492824, | |
| "learning_rate": 9.992212595640115e-06, | |
| "loss": 0.7946, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.9323715058611362, | |
| "grad_norm": 1.9398861696717984, | |
| "learning_rate": 9.991991746311916e-06, | |
| "loss": 0.8198, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.9341749323715058, | |
| "grad_norm": 2.069297097067726, | |
| "learning_rate": 9.991767811504522e-06, | |
| "loss": 0.7359, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.9359783588818755, | |
| "grad_norm": 2.0501351425908516, | |
| "learning_rate": 9.991540791356342e-06, | |
| "loss": 0.7781, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.9377817853922452, | |
| "grad_norm": 2.0883364441479713, | |
| "learning_rate": 9.991310686007694e-06, | |
| "loss": 0.7445, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.939585211902615, | |
| "grad_norm": 2.000915313932848, | |
| "learning_rate": 9.991077495600806e-06, | |
| "loss": 0.877, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.9413886384129847, | |
| "grad_norm": 2.0041196859921637, | |
| "learning_rate": 9.990841220279805e-06, | |
| "loss": 0.7847, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.9431920649233544, | |
| "grad_norm": 2.0817088464090614, | |
| "learning_rate": 9.990601860190732e-06, | |
| "loss": 0.769, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.9449954914337241, | |
| "grad_norm": 1.8694308179676222, | |
| "learning_rate": 9.990359415481532e-06, | |
| "loss": 0.7341, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.9467989179440938, | |
| "grad_norm": 1.929784513064164, | |
| "learning_rate": 9.990113886302057e-06, | |
| "loss": 0.9216, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.9486023444544635, | |
| "grad_norm": 2.0430562116596698, | |
| "learning_rate": 9.989865272804064e-06, | |
| "loss": 0.9328, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.9504057709648331, | |
| "grad_norm": 2.0821273642223366, | |
| "learning_rate": 9.989613575141216e-06, | |
| "loss": 0.7013, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.9522091974752029, | |
| "grad_norm": 2.107240374281522, | |
| "learning_rate": 9.989358793469089e-06, | |
| "loss": 0.8061, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.9540126239855726, | |
| "grad_norm": 1.9040690842756598, | |
| "learning_rate": 9.989100927945155e-06, | |
| "loss": 0.6969, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.9558160504959423, | |
| "grad_norm": 1.9025952752616262, | |
| "learning_rate": 9.988839978728798e-06, | |
| "loss": 0.8238, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.957619477006312, | |
| "grad_norm": 2.26685993097902, | |
| "learning_rate": 9.988575945981308e-06, | |
| "loss": 0.72, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.9594229035166817, | |
| "grad_norm": 1.9202409462283874, | |
| "learning_rate": 9.98830882986588e-06, | |
| "loss": 0.6961, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.9612263300270514, | |
| "grad_norm": 1.9580497289053733, | |
| "learning_rate": 9.988038630547613e-06, | |
| "loss": 0.7772, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.9630297565374211, | |
| "grad_norm": 2.1460745134447143, | |
| "learning_rate": 9.987765348193517e-06, | |
| "loss": 0.7882, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.9648331830477908, | |
| "grad_norm": 2.232600000911527, | |
| "learning_rate": 9.9874889829725e-06, | |
| "loss": 0.7665, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.9666366095581606, | |
| "grad_norm": 2.169430275754146, | |
| "learning_rate": 9.98720953505538e-06, | |
| "loss": 0.8654, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.9684400360685302, | |
| "grad_norm": 2.1645017061078096, | |
| "learning_rate": 9.986927004614881e-06, | |
| "loss": 0.7641, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.9702434625788999, | |
| "grad_norm": 1.964406868016901, | |
| "learning_rate": 9.986641391825633e-06, | |
| "loss": 0.74, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.9720468890892696, | |
| "grad_norm": 2.024095725628653, | |
| "learning_rate": 9.986352696864165e-06, | |
| "loss": 0.7718, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.9738503155996393, | |
| "grad_norm": 2.256779549272981, | |
| "learning_rate": 9.986060919908917e-06, | |
| "loss": 0.8262, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.975653742110009, | |
| "grad_norm": 1.9660904201916358, | |
| "learning_rate": 9.985766061140233e-06, | |
| "loss": 0.6275, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.9774571686203787, | |
| "grad_norm": 2.1280476336617378, | |
| "learning_rate": 9.985468120740361e-06, | |
| "loss": 0.768, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.9792605951307484, | |
| "grad_norm": 1.938265598688848, | |
| "learning_rate": 9.985167098893452e-06, | |
| "loss": 0.9998, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.9810640216411182, | |
| "grad_norm": 2.1379975513910487, | |
| "learning_rate": 9.984862995785564e-06, | |
| "loss": 0.8202, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.9828674481514879, | |
| "grad_norm": 2.0638541854893884, | |
| "learning_rate": 9.984555811604662e-06, | |
| "loss": 0.7729, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.9846708746618575, | |
| "grad_norm": 2.34875633165973, | |
| "learning_rate": 9.984245546540606e-06, | |
| "loss": 0.8073, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.9864743011722272, | |
| "grad_norm": 2.1131918995078056, | |
| "learning_rate": 9.983932200785173e-06, | |
| "loss": 0.7262, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.9882777276825969, | |
| "grad_norm": 1.995224926283804, | |
| "learning_rate": 9.983615774532031e-06, | |
| "loss": 0.8007, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.9900811541929666, | |
| "grad_norm": 1.8849007313400998, | |
| "learning_rate": 9.983296267976766e-06, | |
| "loss": 0.6879, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.9918845807033363, | |
| "grad_norm": 2.051745716721103, | |
| "learning_rate": 9.982973681316854e-06, | |
| "loss": 0.7265, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.9936880072137061, | |
| "grad_norm": 2.0528171651989693, | |
| "learning_rate": 9.982648014751685e-06, | |
| "loss": 0.7505, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.9954914337240758, | |
| "grad_norm": 2.2911439260287336, | |
| "learning_rate": 9.982319268482547e-06, | |
| "loss": 0.8454, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.9972948602344455, | |
| "grad_norm": 2.2047269517513257, | |
| "learning_rate": 9.981987442712634e-06, | |
| "loss": 0.8355, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.9990982867448152, | |
| "grad_norm": 1.7988349615615926, | |
| "learning_rate": 9.981652537647041e-06, | |
| "loss": 0.6762, | |
| "step": 554 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 4440, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 277, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 260908552552448.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |