this is experimental model hxa07c architecture
| Tasks |
Version |
Filter |
n-shot |
Metric |
|
Value |
|
Stderr |
| ruler_vt |
1 |
none |
0 |
16384 |
↑ |
0.2896 |
± |
N/A |
|
|
none |
0 |
4096 |
↑ |
0.9220 |
± |
N/A |
|
|
none |
0 |
8192 |
↑ |
0.6184 |
± |
N/A |
| Tasks |
Version |
Filter |
n-shot |
Metric |
|
Value |
|
Stderr |
| niah_single_1 |
1 |
none |
0 |
16384 |
↑ |
0.988 |
± |
N/A |
|
|
none |
0 |
2048 |
|
0.996 |
± |
0.0028 |
|
|
none |
0 |
32768 |
↑ |
0.884 |
± |
N/A |
|
|
none |
0 |
4096 |
↑ |
1.000 |
± |
N/A |
|
|
none |
0 |
65536 |
↑ |
0.000 |
± |
N/A |
|
|
none |
0 |
8192 |
↑ |
1.000 |
± |
N/A |
| Tasks |
Version |
Filter |
n-shot |
Metric |
|
Value |
|
Stderr |
| gsm8k |
3 |
flexible-extract |
5 |
exact_match |
↑ |
0.8681 |
± |
0.0093 |
|
|
strict-match |
5 |
exact_match |
↑ |
0.8605 |
± |
0.0095 |
| Groups |
Version |
Filter |
n-shot |
Metric |
|
Value |
|
Stderr |
| mmlu |
2 |
none |
|
acc |
↑ |
0.7717 |
± |
0.0033 |
| - humanities |
2 |
none |
|
acc |
↑ |
0.6678 |
± |
0.0064 |
| - other |
2 |
none |
|
acc |
↑ |
0.8175 |
± |
0.0066 |
| - social sciences |
2 |
none |
|
acc |
↑ |
0.8713 |
± |
0.0059 |
| - stem |
2 |
none |
|
acc |
↑ |
0.7843 |
± |
0.0071 |