README.md
69.6 KB · 2822 lines · markdown Raw
1 ---
2 library_name: sentence-transformers
3 pipeline_tag: sentence-similarity
4 tags:
5 - feature-extraction
6 - sentence-similarity
7 - mteb
8 - transformers
9 - transformers.js
10 model-index:
11 - name: epoch_0_model
12 results:
13 - task:
14 type: Classification
15 dataset:
16 type: mteb/amazon_counterfactual
17 name: MTEB AmazonCounterfactualClassification (en)
18 config: en
19 split: test
20 revision: e8379541af4e31359cca9fbcf4b00f2671dba205
21 metrics:
22 - type: accuracy
23 value: 76.8507462686567
24 - type: ap
25 value: 40.592189159090495
26 - type: f1
27 value: 71.01634655512476
28 - task:
29 type: Classification
30 dataset:
31 type: mteb/amazon_polarity
32 name: MTEB AmazonPolarityClassification
33 config: default
34 split: test
35 revision: e2d317d38cd51312af73b3d32a06d1a08b442046
36 metrics:
37 - type: accuracy
38 value: 91.51892500000001
39 - type: ap
40 value: 88.50346762975335
41 - type: f1
42 value: 91.50342077459624
43 - task:
44 type: Classification
45 dataset:
46 type: mteb/amazon_reviews_multi
47 name: MTEB AmazonReviewsClassification (en)
48 config: en
49 split: test
50 revision: 1399c76144fd37290681b995c656ef9b2e06e26d
51 metrics:
52 - type: accuracy
53 value: 47.364
54 - type: f1
55 value: 46.72708080922794
56 - task:
57 type: Retrieval
58 dataset:
59 type: arguana
60 name: MTEB ArguAna
61 config: default
62 split: test
63 revision: None
64 metrics:
65 - type: map_at_1
66 value: 25.178
67 - type: map_at_10
68 value: 40.244
69 - type: map_at_100
70 value: 41.321999999999996
71 - type: map_at_1000
72 value: 41.331
73 - type: map_at_3
74 value: 35.016999999999996
75 - type: map_at_5
76 value: 37.99
77 - type: mrr_at_1
78 value: 25.605
79 - type: mrr_at_10
80 value: 40.422000000000004
81 - type: mrr_at_100
82 value: 41.507
83 - type: mrr_at_1000
84 value: 41.516
85 - type: mrr_at_3
86 value: 35.23
87 - type: mrr_at_5
88 value: 38.15
89 - type: ndcg_at_1
90 value: 25.178
91 - type: ndcg_at_10
92 value: 49.258
93 - type: ndcg_at_100
94 value: 53.776
95 - type: ndcg_at_1000
96 value: 53.995000000000005
97 - type: ndcg_at_3
98 value: 38.429
99 - type: ndcg_at_5
100 value: 43.803
101 - type: precision_at_1
102 value: 25.178
103 - type: precision_at_10
104 value: 7.831
105 - type: precision_at_100
106 value: 0.979
107 - type: precision_at_1000
108 value: 0.1
109 - type: precision_at_3
110 value: 16.121
111 - type: precision_at_5
112 value: 12.29
113 - type: recall_at_1
114 value: 25.178
115 - type: recall_at_10
116 value: 78.307
117 - type: recall_at_100
118 value: 97.866
119 - type: recall_at_1000
120 value: 99.57300000000001
121 - type: recall_at_3
122 value: 48.364000000000004
123 - type: recall_at_5
124 value: 61.451
125 - task:
126 type: Clustering
127 dataset:
128 type: mteb/arxiv-clustering-p2p
129 name: MTEB ArxivClusteringP2P
130 config: default
131 split: test
132 revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
133 metrics:
134 - type: v_measure
135 value: 45.93034494751465
136 - task:
137 type: Clustering
138 dataset:
139 type: mteb/arxiv-clustering-s2s
140 name: MTEB ArxivClusteringS2S
141 config: default
142 split: test
143 revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
144 metrics:
145 - type: v_measure
146 value: 36.64579480054327
147 - task:
148 type: Reranking
149 dataset:
150 type: mteb/askubuntudupquestions-reranking
151 name: MTEB AskUbuntuDupQuestions
152 config: default
153 split: test
154 revision: 2000358ca161889fa9c082cb41daa8dcfb161a54
155 metrics:
156 - type: map
157 value: 60.601310529222054
158 - type: mrr
159 value: 75.04484896451656
160 - task:
161 type: STS
162 dataset:
163 type: mteb/biosses-sts
164 name: MTEB BIOSSES
165 config: default
166 split: test
167 revision: d3fb88f8f02e40887cd149695127462bbcf29b4a
168 metrics:
169 - type: cos_sim_pearson
170 value: 88.57797718095814
171 - type: cos_sim_spearman
172 value: 86.47064499110101
173 - type: euclidean_pearson
174 value: 87.4559602783142
175 - type: euclidean_spearman
176 value: 86.47064499110101
177 - type: manhattan_pearson
178 value: 87.7232764230245
179 - type: manhattan_spearman
180 value: 86.91222131777742
181 - task:
182 type: Classification
183 dataset:
184 type: mteb/banking77
185 name: MTEB Banking77Classification
186 config: default
187 split: test
188 revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
189 metrics:
190 - type: accuracy
191 value: 84.5422077922078
192 - type: f1
193 value: 84.47657456950589
194 - task:
195 type: Clustering
196 dataset:
197 type: mteb/biorxiv-clustering-p2p
198 name: MTEB BiorxivClusteringP2P
199 config: default
200 split: test
201 revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
202 metrics:
203 - type: v_measure
204 value: 38.48953561974464
205 - task:
206 type: Clustering
207 dataset:
208 type: mteb/biorxiv-clustering-s2s
209 name: MTEB BiorxivClusteringS2S
210 config: default
211 split: test
212 revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
213 metrics:
214 - type: v_measure
215 value: 32.75995857510105
216 - task:
217 type: Retrieval
218 dataset:
219 type: BeIR/cqadupstack
220 name: MTEB CQADupstackAndroidRetrieval
221 config: default
222 split: test
223 revision: None
224 metrics:
225 - type: map_at_1
226 value: 30.008000000000003
227 - type: map_at_10
228 value: 39.51
229 - type: map_at_100
230 value: 40.841
231 - type: map_at_1000
232 value: 40.973
233 - type: map_at_3
234 value: 36.248999999999995
235 - type: map_at_5
236 value: 38.096999999999994
237 - type: mrr_at_1
238 value: 36.481
239 - type: mrr_at_10
240 value: 44.818000000000005
241 - type: mrr_at_100
242 value: 45.64
243 - type: mrr_at_1000
244 value: 45.687
245 - type: mrr_at_3
246 value: 42.036
247 - type: mrr_at_5
248 value: 43.782
249 - type: ndcg_at_1
250 value: 36.481
251 - type: ndcg_at_10
252 value: 45.152
253 - type: ndcg_at_100
254 value: 50.449
255 - type: ndcg_at_1000
256 value: 52.76499999999999
257 - type: ndcg_at_3
258 value: 40.161
259 - type: ndcg_at_5
260 value: 42.577999999999996
261 - type: precision_at_1
262 value: 36.481
263 - type: precision_at_10
264 value: 8.369
265 - type: precision_at_100
266 value: 1.373
267 - type: precision_at_1000
268 value: 0.186
269 - type: precision_at_3
270 value: 18.693
271 - type: precision_at_5
272 value: 13.533999999999999
273 - type: recall_at_1
274 value: 30.008000000000003
275 - type: recall_at_10
276 value: 56.108999999999995
277 - type: recall_at_100
278 value: 78.55499999999999
279 - type: recall_at_1000
280 value: 93.659
281 - type: recall_at_3
282 value: 41.754999999999995
283 - type: recall_at_5
284 value: 48.296
285 - task:
286 type: Retrieval
287 dataset:
288 type: BeIR/cqadupstack
289 name: MTEB CQADupstackEnglishRetrieval
290 config: default
291 split: test
292 revision: None
293 metrics:
294 - type: map_at_1
295 value: 30.262
296 - type: map_at_10
297 value: 40.139
298 - type: map_at_100
299 value: 41.394
300 - type: map_at_1000
301 value: 41.526
302 - type: map_at_3
303 value: 37.155
304 - type: map_at_5
305 value: 38.785
306 - type: mrr_at_1
307 value: 38.153
308 - type: mrr_at_10
309 value: 46.369
310 - type: mrr_at_100
311 value: 47.072
312 - type: mrr_at_1000
313 value: 47.111999999999995
314 - type: mrr_at_3
315 value: 44.268
316 - type: mrr_at_5
317 value: 45.389
318 - type: ndcg_at_1
319 value: 38.153
320 - type: ndcg_at_10
321 value: 45.925
322 - type: ndcg_at_100
323 value: 50.394000000000005
324 - type: ndcg_at_1000
325 value: 52.37500000000001
326 - type: ndcg_at_3
327 value: 41.754000000000005
328 - type: ndcg_at_5
329 value: 43.574
330 - type: precision_at_1
331 value: 38.153
332 - type: precision_at_10
333 value: 8.796
334 - type: precision_at_100
335 value: 1.432
336 - type: precision_at_1000
337 value: 0.189
338 - type: precision_at_3
339 value: 20.318
340 - type: precision_at_5
341 value: 14.395
342 - type: recall_at_1
343 value: 30.262
344 - type: recall_at_10
345 value: 55.72200000000001
346 - type: recall_at_100
347 value: 74.97500000000001
348 - type: recall_at_1000
349 value: 87.342
350 - type: recall_at_3
351 value: 43.129
352 - type: recall_at_5
353 value: 48.336
354 - task:
355 type: Retrieval
356 dataset:
357 type: BeIR/cqadupstack
358 name: MTEB CQADupstackGamingRetrieval
359 config: default
360 split: test
361 revision: None
362 metrics:
363 - type: map_at_1
364 value: 39.951
365 - type: map_at_10
366 value: 51.248000000000005
367 - type: map_at_100
368 value: 52.188
369 - type: map_at_1000
370 value: 52.247
371 - type: map_at_3
372 value: 48.211
373 - type: map_at_5
374 value: 49.797000000000004
375 - type: mrr_at_1
376 value: 45.329
377 - type: mrr_at_10
378 value: 54.749
379 - type: mrr_at_100
380 value: 55.367999999999995
381 - type: mrr_at_1000
382 value: 55.400000000000006
383 - type: mrr_at_3
384 value: 52.382
385 - type: mrr_at_5
386 value: 53.649
387 - type: ndcg_at_1
388 value: 45.329
389 - type: ndcg_at_10
390 value: 56.847
391 - type: ndcg_at_100
392 value: 60.738
393 - type: ndcg_at_1000
394 value: 61.976
395 - type: ndcg_at_3
396 value: 51.59
397 - type: ndcg_at_5
398 value: 53.915
399 - type: precision_at_1
400 value: 45.329
401 - type: precision_at_10
402 value: 8.959
403 - type: precision_at_100
404 value: 1.187
405 - type: precision_at_1000
406 value: 0.134
407 - type: precision_at_3
408 value: 22.612
409 - type: precision_at_5
410 value: 15.273
411 - type: recall_at_1
412 value: 39.951
413 - type: recall_at_10
414 value: 70.053
415 - type: recall_at_100
416 value: 86.996
417 - type: recall_at_1000
418 value: 95.707
419 - type: recall_at_3
420 value: 56.032000000000004
421 - type: recall_at_5
422 value: 61.629999999999995
423 - task:
424 type: Retrieval
425 dataset:
426 type: BeIR/cqadupstack
427 name: MTEB CQADupstackGisRetrieval
428 config: default
429 split: test
430 revision: None
431 metrics:
432 - type: map_at_1
433 value: 25.566
434 - type: map_at_10
435 value: 33.207
436 - type: map_at_100
437 value: 34.166000000000004
438 - type: map_at_1000
439 value: 34.245
440 - type: map_at_3
441 value: 30.94
442 - type: map_at_5
443 value: 32.01
444 - type: mrr_at_1
445 value: 27.345000000000002
446 - type: mrr_at_10
447 value: 35.193000000000005
448 - type: mrr_at_100
449 value: 35.965
450 - type: mrr_at_1000
451 value: 36.028999999999996
452 - type: mrr_at_3
453 value: 32.806000000000004
454 - type: mrr_at_5
455 value: 34.021
456 - type: ndcg_at_1
457 value: 27.345000000000002
458 - type: ndcg_at_10
459 value: 37.891999999999996
460 - type: ndcg_at_100
461 value: 42.664
462 - type: ndcg_at_1000
463 value: 44.757000000000005
464 - type: ndcg_at_3
465 value: 33.123000000000005
466 - type: ndcg_at_5
467 value: 35.035
468 - type: precision_at_1
469 value: 27.345000000000002
470 - type: precision_at_10
471 value: 5.763
472 - type: precision_at_100
473 value: 0.859
474 - type: precision_at_1000
475 value: 0.108
476 - type: precision_at_3
477 value: 13.71
478 - type: precision_at_5
479 value: 9.401
480 - type: recall_at_1
481 value: 25.566
482 - type: recall_at_10
483 value: 50.563
484 - type: recall_at_100
485 value: 72.86399999999999
486 - type: recall_at_1000
487 value: 88.68599999999999
488 - type: recall_at_3
489 value: 37.43
490 - type: recall_at_5
491 value: 41.894999999999996
492 - task:
493 type: Retrieval
494 dataset:
495 type: BeIR/cqadupstack
496 name: MTEB CQADupstackMathematicaRetrieval
497 config: default
498 split: test
499 revision: None
500 metrics:
501 - type: map_at_1
502 value: 16.663
503 - type: map_at_10
504 value: 23.552
505 - type: map_at_100
506 value: 24.538
507 - type: map_at_1000
508 value: 24.661
509 - type: map_at_3
510 value: 21.085
511 - type: map_at_5
512 value: 22.391
513 - type: mrr_at_1
514 value: 20.025000000000002
515 - type: mrr_at_10
516 value: 27.643
517 - type: mrr_at_100
518 value: 28.499999999999996
519 - type: mrr_at_1000
520 value: 28.582
521 - type: mrr_at_3
522 value: 25.083
523 - type: mrr_at_5
524 value: 26.544
525 - type: ndcg_at_1
526 value: 20.025000000000002
527 - type: ndcg_at_10
528 value: 28.272000000000002
529 - type: ndcg_at_100
530 value: 33.353
531 - type: ndcg_at_1000
532 value: 36.454
533 - type: ndcg_at_3
534 value: 23.579
535 - type: ndcg_at_5
536 value: 25.685000000000002
537 - type: precision_at_1
538 value: 20.025000000000002
539 - type: precision_at_10
540 value: 5.187
541 - type: precision_at_100
542 value: 0.897
543 - type: precision_at_1000
544 value: 0.13
545 - type: precision_at_3
546 value: 10.987
547 - type: precision_at_5
548 value: 8.06
549 - type: recall_at_1
550 value: 16.663
551 - type: recall_at_10
552 value: 38.808
553 - type: recall_at_100
554 value: 61.305
555 - type: recall_at_1000
556 value: 83.571
557 - type: recall_at_3
558 value: 25.907999999999998
559 - type: recall_at_5
560 value: 31.214
561 - task:
562 type: Retrieval
563 dataset:
564 type: BeIR/cqadupstack
565 name: MTEB CQADupstackPhysicsRetrieval
566 config: default
567 split: test
568 revision: None
569 metrics:
570 - type: map_at_1
571 value: 27.695999999999998
572 - type: map_at_10
573 value: 37.018
574 - type: map_at_100
575 value: 38.263000000000005
576 - type: map_at_1000
577 value: 38.371
578 - type: map_at_3
579 value: 34.226
580 - type: map_at_5
581 value: 35.809999999999995
582 - type: mrr_at_1
583 value: 32.916000000000004
584 - type: mrr_at_10
585 value: 42.067
586 - type: mrr_at_100
587 value: 42.925000000000004
588 - type: mrr_at_1000
589 value: 42.978
590 - type: mrr_at_3
591 value: 39.637
592 - type: mrr_at_5
593 value: 41.134
594 - type: ndcg_at_1
595 value: 32.916000000000004
596 - type: ndcg_at_10
597 value: 42.539
598 - type: ndcg_at_100
599 value: 47.873
600 - type: ndcg_at_1000
601 value: 50.08200000000001
602 - type: ndcg_at_3
603 value: 37.852999999999994
604 - type: ndcg_at_5
605 value: 40.201
606 - type: precision_at_1
607 value: 32.916000000000004
608 - type: precision_at_10
609 value: 7.5840000000000005
610 - type: precision_at_100
611 value: 1.199
612 - type: precision_at_1000
613 value: 0.155
614 - type: precision_at_3
615 value: 17.485
616 - type: precision_at_5
617 value: 12.512
618 - type: recall_at_1
619 value: 27.695999999999998
620 - type: recall_at_10
621 value: 53.638
622 - type: recall_at_100
623 value: 76.116
624 - type: recall_at_1000
625 value: 91.069
626 - type: recall_at_3
627 value: 41.13
628 - type: recall_at_5
629 value: 46.872
630 - task:
631 type: Retrieval
632 dataset:
633 type: BeIR/cqadupstack
634 name: MTEB CQADupstackProgrammersRetrieval
635 config: default
636 split: test
637 revision: None
638 metrics:
639 - type: map_at_1
640 value: 24.108
641 - type: map_at_10
642 value: 33.372
643 - type: map_at_100
644 value: 34.656
645 - type: map_at_1000
646 value: 34.768
647 - type: map_at_3
648 value: 30.830999999999996
649 - type: map_at_5
650 value: 32.204
651 - type: mrr_at_1
652 value: 29.110000000000003
653 - type: mrr_at_10
654 value: 37.979
655 - type: mrr_at_100
656 value: 38.933
657 - type: mrr_at_1000
658 value: 38.988
659 - type: mrr_at_3
660 value: 35.731
661 - type: mrr_at_5
662 value: 36.963
663 - type: ndcg_at_1
664 value: 29.110000000000003
665 - type: ndcg_at_10
666 value: 38.635000000000005
667 - type: ndcg_at_100
668 value: 44.324999999999996
669 - type: ndcg_at_1000
670 value: 46.747
671 - type: ndcg_at_3
672 value: 34.37
673 - type: ndcg_at_5
674 value: 36.228
675 - type: precision_at_1
676 value: 29.110000000000003
677 - type: precision_at_10
678 value: 6.963
679 - type: precision_at_100
680 value: 1.146
681 - type: precision_at_1000
682 value: 0.152
683 - type: precision_at_3
684 value: 16.400000000000002
685 - type: precision_at_5
686 value: 11.552999999999999
687 - type: recall_at_1
688 value: 24.108
689 - type: recall_at_10
690 value: 49.597
691 - type: recall_at_100
692 value: 73.88900000000001
693 - type: recall_at_1000
694 value: 90.62400000000001
695 - type: recall_at_3
696 value: 37.662
697 - type: recall_at_5
698 value: 42.565
699 - task:
700 type: Retrieval
701 dataset:
702 type: BeIR/cqadupstack
703 name: MTEB CQADupstackRetrieval
704 config: default
705 split: test
706 revision: None
707 metrics:
708 - type: map_at_1
709 value: 25.00791666666667
710 - type: map_at_10
711 value: 33.287749999999996
712 - type: map_at_100
713 value: 34.41141666666667
714 - type: map_at_1000
715 value: 34.52583333333333
716 - type: map_at_3
717 value: 30.734416666666668
718 - type: map_at_5
719 value: 32.137166666666666
720 - type: mrr_at_1
721 value: 29.305666666666664
722 - type: mrr_at_10
723 value: 37.22966666666666
724 - type: mrr_at_100
725 value: 38.066583333333334
726 - type: mrr_at_1000
727 value: 38.12616666666667
728 - type: mrr_at_3
729 value: 34.92275
730 - type: mrr_at_5
731 value: 36.23333333333334
732 - type: ndcg_at_1
733 value: 29.305666666666664
734 - type: ndcg_at_10
735 value: 38.25533333333333
736 - type: ndcg_at_100
737 value: 43.25266666666666
738 - type: ndcg_at_1000
739 value: 45.63583333333334
740 - type: ndcg_at_3
741 value: 33.777166666666666
742 - type: ndcg_at_5
743 value: 35.85
744 - type: precision_at_1
745 value: 29.305666666666664
746 - type: precision_at_10
747 value: 6.596416666666667
748 - type: precision_at_100
749 value: 1.0784166666666668
750 - type: precision_at_1000
751 value: 0.14666666666666664
752 - type: precision_at_3
753 value: 15.31075
754 - type: precision_at_5
755 value: 10.830916666666667
756 - type: recall_at_1
757 value: 25.00791666666667
758 - type: recall_at_10
759 value: 49.10933333333333
760 - type: recall_at_100
761 value: 71.09216666666667
762 - type: recall_at_1000
763 value: 87.77725000000001
764 - type: recall_at_3
765 value: 36.660916666666665
766 - type: recall_at_5
767 value: 41.94149999999999
768 - task:
769 type: Retrieval
770 dataset:
771 type: BeIR/cqadupstack
772 name: MTEB CQADupstackStatsRetrieval
773 config: default
774 split: test
775 revision: None
776 metrics:
777 - type: map_at_1
778 value: 23.521
779 - type: map_at_10
780 value: 30.043
781 - type: map_at_100
782 value: 30.936000000000003
783 - type: map_at_1000
784 value: 31.022
785 - type: map_at_3
786 value: 27.926000000000002
787 - type: map_at_5
788 value: 29.076999999999998
789 - type: mrr_at_1
790 value: 26.227
791 - type: mrr_at_10
792 value: 32.822
793 - type: mrr_at_100
794 value: 33.61
795 - type: mrr_at_1000
796 value: 33.672000000000004
797 - type: mrr_at_3
798 value: 30.776999999999997
799 - type: mrr_at_5
800 value: 31.866
801 - type: ndcg_at_1
802 value: 26.227
803 - type: ndcg_at_10
804 value: 34.041
805 - type: ndcg_at_100
806 value: 38.394
807 - type: ndcg_at_1000
808 value: 40.732
809 - type: ndcg_at_3
810 value: 30.037999999999997
811 - type: ndcg_at_5
812 value: 31.845000000000002
813 - type: precision_at_1
814 value: 26.227
815 - type: precision_at_10
816 value: 5.244999999999999
817 - type: precision_at_100
818 value: 0.808
819 - type: precision_at_1000
820 value: 0.107
821 - type: precision_at_3
822 value: 12.679000000000002
823 - type: precision_at_5
824 value: 8.773
825 - type: recall_at_1
826 value: 23.521
827 - type: recall_at_10
828 value: 43.633
829 - type: recall_at_100
830 value: 63.126000000000005
831 - type: recall_at_1000
832 value: 80.765
833 - type: recall_at_3
834 value: 32.614
835 - type: recall_at_5
836 value: 37.15
837 - task:
838 type: Retrieval
839 dataset:
840 type: BeIR/cqadupstack
841 name: MTEB CQADupstackTexRetrieval
842 config: default
843 split: test
844 revision: None
845 metrics:
846 - type: map_at_1
847 value: 16.236
848 - type: map_at_10
849 value: 22.898
850 - type: map_at_100
851 value: 23.878
852 - type: map_at_1000
853 value: 24.009
854 - type: map_at_3
855 value: 20.87
856 - type: map_at_5
857 value: 22.025
858 - type: mrr_at_1
859 value: 19.339000000000002
860 - type: mrr_at_10
861 value: 26.382
862 - type: mrr_at_100
863 value: 27.245
864 - type: mrr_at_1000
865 value: 27.33
866 - type: mrr_at_3
867 value: 24.386
868 - type: mrr_at_5
869 value: 25.496000000000002
870 - type: ndcg_at_1
871 value: 19.339000000000002
872 - type: ndcg_at_10
873 value: 27.139999999999997
874 - type: ndcg_at_100
875 value: 31.944
876 - type: ndcg_at_1000
877 value: 35.077999999999996
878 - type: ndcg_at_3
879 value: 23.424
880 - type: ndcg_at_5
881 value: 25.188
882 - type: precision_at_1
883 value: 19.339000000000002
884 - type: precision_at_10
885 value: 4.8309999999999995
886 - type: precision_at_100
887 value: 0.845
888 - type: precision_at_1000
889 value: 0.128
890 - type: precision_at_3
891 value: 10.874
892 - type: precision_at_5
893 value: 7.825
894 - type: recall_at_1
895 value: 16.236
896 - type: recall_at_10
897 value: 36.513
898 - type: recall_at_100
899 value: 57.999
900 - type: recall_at_1000
901 value: 80.512
902 - type: recall_at_3
903 value: 26.179999999999996
904 - type: recall_at_5
905 value: 30.712
906 - task:
907 type: Retrieval
908 dataset:
909 type: BeIR/cqadupstack
910 name: MTEB CQADupstackUnixRetrieval
911 config: default
912 split: test
913 revision: None
914 metrics:
915 - type: map_at_1
916 value: 24.11
917 - type: map_at_10
918 value: 31.566
919 - type: map_at_100
920 value: 32.647
921 - type: map_at_1000
922 value: 32.753
923 - type: map_at_3
924 value: 29.24
925 - type: map_at_5
926 value: 30.564999999999998
927 - type: mrr_at_1
928 value: 28.265
929 - type: mrr_at_10
930 value: 35.504000000000005
931 - type: mrr_at_100
932 value: 36.436
933 - type: mrr_at_1000
934 value: 36.503
935 - type: mrr_at_3
936 value: 33.349000000000004
937 - type: mrr_at_5
938 value: 34.622
939 - type: ndcg_at_1
940 value: 28.265
941 - type: ndcg_at_10
942 value: 36.192
943 - type: ndcg_at_100
944 value: 41.388000000000005
945 - type: ndcg_at_1000
946 value: 43.948
947 - type: ndcg_at_3
948 value: 31.959
949 - type: ndcg_at_5
950 value: 33.998
951 - type: precision_at_1
952 value: 28.265
953 - type: precision_at_10
954 value: 5.989
955 - type: precision_at_100
956 value: 0.9650000000000001
957 - type: precision_at_1000
958 value: 0.13
959 - type: precision_at_3
960 value: 14.335
961 - type: precision_at_5
962 value: 10.112
963 - type: recall_at_1
964 value: 24.11
965 - type: recall_at_10
966 value: 46.418
967 - type: recall_at_100
968 value: 69.314
969 - type: recall_at_1000
970 value: 87.397
971 - type: recall_at_3
972 value: 34.724
973 - type: recall_at_5
974 value: 39.925
975 - task:
976 type: Retrieval
977 dataset:
978 type: BeIR/cqadupstack
979 name: MTEB CQADupstackWebmastersRetrieval
980 config: default
981 split: test
982 revision: None
983 metrics:
984 - type: map_at_1
985 value: 22.091
986 - type: map_at_10
987 value: 29.948999999999998
988 - type: map_at_100
989 value: 31.502000000000002
990 - type: map_at_1000
991 value: 31.713
992 - type: map_at_3
993 value: 27.464
994 - type: map_at_5
995 value: 28.968
996 - type: mrr_at_1
997 value: 26.482
998 - type: mrr_at_10
999 value: 34.009
1000 - type: mrr_at_100
1001 value: 35.081
1002 - type: mrr_at_1000
1003 value: 35.138000000000005
1004 - type: mrr_at_3
1005 value: 31.785000000000004
1006 - type: mrr_at_5
1007 value: 33.178999999999995
1008 - type: ndcg_at_1
1009 value: 26.482
1010 - type: ndcg_at_10
1011 value: 35.008
1012 - type: ndcg_at_100
1013 value: 41.272999999999996
1014 - type: ndcg_at_1000
1015 value: 43.972
1016 - type: ndcg_at_3
1017 value: 30.804
1018 - type: ndcg_at_5
1019 value: 33.046
1020 - type: precision_at_1
1021 value: 26.482
1022 - type: precision_at_10
1023 value: 6.462
1024 - type: precision_at_100
1025 value: 1.431
1026 - type: precision_at_1000
1027 value: 0.22899999999999998
1028 - type: precision_at_3
1029 value: 14.360999999999999
1030 - type: precision_at_5
1031 value: 10.474
1032 - type: recall_at_1
1033 value: 22.091
1034 - type: recall_at_10
1035 value: 45.125
1036 - type: recall_at_100
1037 value: 72.313
1038 - type: recall_at_1000
1039 value: 89.503
1040 - type: recall_at_3
1041 value: 33.158
1042 - type: recall_at_5
1043 value: 39.086999999999996
1044 - task:
1045 type: Retrieval
1046 dataset:
1047 type: BeIR/cqadupstack
1048 name: MTEB CQADupstackWordpressRetrieval
1049 config: default
1050 split: test
1051 revision: None
1052 metrics:
1053 - type: map_at_1
1054 value: 19.883
1055 - type: map_at_10
1056 value: 26.951000000000004
1057 - type: map_at_100
1058 value: 27.927999999999997
1059 - type: map_at_1000
1060 value: 28.022000000000002
1061 - type: map_at_3
1062 value: 24.616
1063 - type: map_at_5
1064 value: 25.917
1065 - type: mrr_at_1
1066 value: 21.996
1067 - type: mrr_at_10
1068 value: 29.221000000000004
1069 - type: mrr_at_100
1070 value: 30.024
1071 - type: mrr_at_1000
1072 value: 30.095
1073 - type: mrr_at_3
1074 value: 26.833000000000002
1075 - type: mrr_at_5
1076 value: 28.155
1077 - type: ndcg_at_1
1078 value: 21.996
1079 - type: ndcg_at_10
1080 value: 31.421
1081 - type: ndcg_at_100
1082 value: 36.237
1083 - type: ndcg_at_1000
1084 value: 38.744
1085 - type: ndcg_at_3
1086 value: 26.671
1087 - type: ndcg_at_5
1088 value: 28.907
1089 - type: precision_at_1
1090 value: 21.996
1091 - type: precision_at_10
1092 value: 5.009
1093 - type: precision_at_100
1094 value: 0.799
1095 - type: precision_at_1000
1096 value: 0.11199999999999999
1097 - type: precision_at_3
1098 value: 11.275
1099 - type: precision_at_5
1100 value: 8.059
1101 - type: recall_at_1
1102 value: 19.883
1103 - type: recall_at_10
1104 value: 43.132999999999996
1105 - type: recall_at_100
1106 value: 65.654
1107 - type: recall_at_1000
1108 value: 84.492
1109 - type: recall_at_3
1110 value: 30.209000000000003
1111 - type: recall_at_5
1112 value: 35.616
1113 - task:
1114 type: Retrieval
1115 dataset:
1116 type: climate-fever
1117 name: MTEB ClimateFEVER
1118 config: default
1119 split: test
1120 revision: None
1121 metrics:
1122 - type: map_at_1
1123 value: 17.756
1124 - type: map_at_10
1125 value: 30.378
1126 - type: map_at_100
1127 value: 32.537
1128 - type: map_at_1000
1129 value: 32.717
1130 - type: map_at_3
1131 value: 25.599
1132 - type: map_at_5
1133 value: 28.372999999999998
1134 - type: mrr_at_1
1135 value: 41.303
1136 - type: mrr_at_10
1137 value: 53.483999999999995
1138 - type: mrr_at_100
1139 value: 54.106
1140 - type: mrr_at_1000
1141 value: 54.127
1142 - type: mrr_at_3
1143 value: 50.315
1144 - type: mrr_at_5
1145 value: 52.396
1146 - type: ndcg_at_1
1147 value: 41.303
1148 - type: ndcg_at_10
1149 value: 40.503
1150 - type: ndcg_at_100
1151 value: 47.821000000000005
1152 - type: ndcg_at_1000
1153 value: 50.788
1154 - type: ndcg_at_3
1155 value: 34.364
1156 - type: ndcg_at_5
1157 value: 36.818
1158 - type: precision_at_1
1159 value: 41.303
1160 - type: precision_at_10
1161 value: 12.463000000000001
1162 - type: precision_at_100
1163 value: 2.037
1164 - type: precision_at_1000
1165 value: 0.26
1166 - type: precision_at_3
1167 value: 25.798
1168 - type: precision_at_5
1169 value: 19.896
1170 - type: recall_at_1
1171 value: 17.756
1172 - type: recall_at_10
1173 value: 46.102
1174 - type: recall_at_100
1175 value: 70.819
1176 - type: recall_at_1000
1177 value: 87.21799999999999
1178 - type: recall_at_3
1179 value: 30.646
1180 - type: recall_at_5
1181 value: 38.022
1182 - task:
1183 type: Retrieval
1184 dataset:
1185 type: dbpedia-entity
1186 name: MTEB DBPedia
1187 config: default
1188 split: test
1189 revision: None
1190 metrics:
1191 - type: map_at_1
1192 value: 9.033
1193 - type: map_at_10
1194 value: 20.584
1195 - type: map_at_100
1196 value: 29.518
1197 - type: map_at_1000
1198 value: 31.186000000000003
1199 - type: map_at_3
1200 value: 14.468
1201 - type: map_at_5
1202 value: 17.177
1203 - type: mrr_at_1
1204 value: 69.75
1205 - type: mrr_at_10
1206 value: 77.025
1207 - type: mrr_at_100
1208 value: 77.36699999999999
1209 - type: mrr_at_1000
1210 value: 77.373
1211 - type: mrr_at_3
1212 value: 75.583
1213 - type: mrr_at_5
1214 value: 76.396
1215 - type: ndcg_at_1
1216 value: 58.5
1217 - type: ndcg_at_10
1218 value: 45.033
1219 - type: ndcg_at_100
1220 value: 49.071
1221 - type: ndcg_at_1000
1222 value: 56.056
1223 - type: ndcg_at_3
1224 value: 49.936
1225 - type: ndcg_at_5
1226 value: 47.471999999999994
1227 - type: precision_at_1
1228 value: 69.75
1229 - type: precision_at_10
1230 value: 35.775
1231 - type: precision_at_100
1232 value: 11.594999999999999
1233 - type: precision_at_1000
1234 value: 2.062
1235 - type: precision_at_3
1236 value: 52.5
1237 - type: precision_at_5
1238 value: 45.300000000000004
1239 - type: recall_at_1
1240 value: 9.033
1241 - type: recall_at_10
1242 value: 26.596999999999998
1243 - type: recall_at_100
1244 value: 54.607000000000006
1245 - type: recall_at_1000
1246 value: 76.961
1247 - type: recall_at_3
1248 value: 15.754999999999999
1249 - type: recall_at_5
1250 value: 20.033
1251 - task:
1252 type: Classification
1253 dataset:
1254 type: mteb/emotion
1255 name: MTEB EmotionClassification
1256 config: default
1257 split: test
1258 revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37
1259 metrics:
1260 - type: accuracy
1261 value: 48.345000000000006
1262 - type: f1
1263 value: 43.4514918068706
1264 - task:
1265 type: Retrieval
1266 dataset:
1267 type: fever
1268 name: MTEB FEVER
1269 config: default
1270 split: test
1271 revision: None
1272 metrics:
1273 - type: map_at_1
1274 value: 71.29100000000001
1275 - type: map_at_10
1276 value: 81.059
1277 - type: map_at_100
1278 value: 81.341
1279 - type: map_at_1000
1280 value: 81.355
1281 - type: map_at_3
1282 value: 79.74799999999999
1283 - type: map_at_5
1284 value: 80.612
1285 - type: mrr_at_1
1286 value: 76.40299999999999
1287 - type: mrr_at_10
1288 value: 84.615
1289 - type: mrr_at_100
1290 value: 84.745
1291 - type: mrr_at_1000
1292 value: 84.748
1293 - type: mrr_at_3
1294 value: 83.776
1295 - type: mrr_at_5
1296 value: 84.343
1297 - type: ndcg_at_1
1298 value: 76.40299999999999
1299 - type: ndcg_at_10
1300 value: 84.981
1301 - type: ndcg_at_100
1302 value: 86.00999999999999
1303 - type: ndcg_at_1000
1304 value: 86.252
1305 - type: ndcg_at_3
1306 value: 82.97
1307 - type: ndcg_at_5
1308 value: 84.152
1309 - type: precision_at_1
1310 value: 76.40299999999999
1311 - type: precision_at_10
1312 value: 10.446
1313 - type: precision_at_100
1314 value: 1.1199999999999999
1315 - type: precision_at_1000
1316 value: 0.116
1317 - type: precision_at_3
1318 value: 32.147999999999996
1319 - type: precision_at_5
1320 value: 20.135
1321 - type: recall_at_1
1322 value: 71.29100000000001
1323 - type: recall_at_10
1324 value: 93.232
1325 - type: recall_at_100
1326 value: 97.363
1327 - type: recall_at_1000
1328 value: 98.905
1329 - type: recall_at_3
1330 value: 87.893
1331 - type: recall_at_5
1332 value: 90.804
1333 - task:
1334 type: Retrieval
1335 dataset:
1336 type: fiqa
1337 name: MTEB FiQA2018
1338 config: default
1339 split: test
1340 revision: None
1341 metrics:
1342 - type: map_at_1
1343 value: 18.667
1344 - type: map_at_10
1345 value: 30.853
1346 - type: map_at_100
1347 value: 32.494
1348 - type: map_at_1000
1349 value: 32.677
1350 - type: map_at_3
1351 value: 26.91
1352 - type: map_at_5
1353 value: 29.099000000000004
1354 - type: mrr_at_1
1355 value: 37.191
1356 - type: mrr_at_10
1357 value: 46.171
1358 - type: mrr_at_100
1359 value: 47.056
1360 - type: mrr_at_1000
1361 value: 47.099000000000004
1362 - type: mrr_at_3
1363 value: 44.059
1364 - type: mrr_at_5
1365 value: 45.147
1366 - type: ndcg_at_1
1367 value: 37.191
1368 - type: ndcg_at_10
1369 value: 38.437
1370 - type: ndcg_at_100
1371 value: 44.62
1372 - type: ndcg_at_1000
1373 value: 47.795
1374 - type: ndcg_at_3
1375 value: 35.003
1376 - type: ndcg_at_5
1377 value: 36.006
1378 - type: precision_at_1
1379 value: 37.191
1380 - type: precision_at_10
1381 value: 10.586
1382 - type: precision_at_100
1383 value: 1.688
1384 - type: precision_at_1000
1385 value: 0.22699999999999998
1386 - type: precision_at_3
1387 value: 23.302
1388 - type: precision_at_5
1389 value: 17.006
1390 - type: recall_at_1
1391 value: 18.667
1392 - type: recall_at_10
1393 value: 45.367000000000004
1394 - type: recall_at_100
1395 value: 68.207
1396 - type: recall_at_1000
1397 value: 87.072
1398 - type: recall_at_3
1399 value: 32.129000000000005
1400 - type: recall_at_5
1401 value: 37.719
1402 - task:
1403 type: Retrieval
1404 dataset:
1405 type: hotpotqa
1406 name: MTEB HotpotQA
1407 config: default
1408 split: test
1409 revision: None
1410 metrics:
1411 - type: map_at_1
1412 value: 39.494
1413 - type: map_at_10
1414 value: 66.223
1415 - type: map_at_100
1416 value: 67.062
1417 - type: map_at_1000
1418 value: 67.11500000000001
1419 - type: map_at_3
1420 value: 62.867
1421 - type: map_at_5
1422 value: 64.994
1423 - type: mrr_at_1
1424 value: 78.987
1425 - type: mrr_at_10
1426 value: 84.585
1427 - type: mrr_at_100
1428 value: 84.773
1429 - type: mrr_at_1000
1430 value: 84.77900000000001
1431 - type: mrr_at_3
1432 value: 83.592
1433 - type: mrr_at_5
1434 value: 84.235
1435 - type: ndcg_at_1
1436 value: 78.987
1437 - type: ndcg_at_10
1438 value: 73.64
1439 - type: ndcg_at_100
1440 value: 76.519
1441 - type: ndcg_at_1000
1442 value: 77.51
1443 - type: ndcg_at_3
1444 value: 68.893
1445 - type: ndcg_at_5
1446 value: 71.585
1447 - type: precision_at_1
1448 value: 78.987
1449 - type: precision_at_10
1450 value: 15.529000000000002
1451 - type: precision_at_100
1452 value: 1.7770000000000001
1453 - type: precision_at_1000
1454 value: 0.191
1455 - type: precision_at_3
1456 value: 44.808
1457 - type: precision_at_5
1458 value: 29.006999999999998
1459 - type: recall_at_1
1460 value: 39.494
1461 - type: recall_at_10
1462 value: 77.643
1463 - type: recall_at_100
1464 value: 88.825
1465 - type: recall_at_1000
1466 value: 95.321
1467 - type: recall_at_3
1468 value: 67.211
1469 - type: recall_at_5
1470 value: 72.519
1471 - task:
1472 type: Classification
1473 dataset:
1474 type: mteb/imdb
1475 name: MTEB ImdbClassification
1476 config: default
1477 split: test
1478 revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7
1479 metrics:
1480 - type: accuracy
1481 value: 85.55959999999999
1482 - type: ap
1483 value: 80.7246500384617
1484 - type: f1
1485 value: 85.52336485065454
1486 - task:
1487 type: Retrieval
1488 dataset:
1489 type: msmarco
1490 name: MTEB MSMARCO
1491 config: default
1492 split: dev
1493 revision: None
1494 metrics:
1495 - type: map_at_1
1496 value: 23.631
1497 - type: map_at_10
1498 value: 36.264
1499 - type: map_at_100
1500 value: 37.428
1501 - type: map_at_1000
1502 value: 37.472
1503 - type: map_at_3
1504 value: 32.537
1505 - type: map_at_5
1506 value: 34.746
1507 - type: mrr_at_1
1508 value: 24.312
1509 - type: mrr_at_10
1510 value: 36.858000000000004
1511 - type: mrr_at_100
1512 value: 37.966
1513 - type: mrr_at_1000
1514 value: 38.004
1515 - type: mrr_at_3
1516 value: 33.188
1517 - type: mrr_at_5
1518 value: 35.367
1519 - type: ndcg_at_1
1520 value: 24.312
1521 - type: ndcg_at_10
1522 value: 43.126999999999995
1523 - type: ndcg_at_100
1524 value: 48.642
1525 - type: ndcg_at_1000
1526 value: 49.741
1527 - type: ndcg_at_3
1528 value: 35.589
1529 - type: ndcg_at_5
1530 value: 39.515
1531 - type: precision_at_1
1532 value: 24.312
1533 - type: precision_at_10
1534 value: 6.699
1535 - type: precision_at_100
1536 value: 0.9450000000000001
1537 - type: precision_at_1000
1538 value: 0.104
1539 - type: precision_at_3
1540 value: 15.153
1541 - type: precision_at_5
1542 value: 11.065999999999999
1543 - type: recall_at_1
1544 value: 23.631
1545 - type: recall_at_10
1546 value: 64.145
1547 - type: recall_at_100
1548 value: 89.41
1549 - type: recall_at_1000
1550 value: 97.83500000000001
1551 - type: recall_at_3
1552 value: 43.769000000000005
1553 - type: recall_at_5
1554 value: 53.169
1555 - task:
1556 type: Classification
1557 dataset:
1558 type: mteb/mtop_domain
1559 name: MTEB MTOPDomainClassification (en)
1560 config: en
1561 split: test
1562 revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
1563 metrics:
1564 - type: accuracy
1565 value: 93.4108527131783
1566 - type: f1
1567 value: 93.1415880261038
1568 - task:
1569 type: Classification
1570 dataset:
1571 type: mteb/mtop_intent
1572 name: MTEB MTOPIntentClassification (en)
1573 config: en
1574 split: test
1575 revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
1576 metrics:
1577 - type: accuracy
1578 value: 77.24806201550388
1579 - type: f1
1580 value: 60.531916308197175
1581 - task:
1582 type: Classification
1583 dataset:
1584 type: mteb/amazon_massive_intent
1585 name: MTEB MassiveIntentClassification (en)
1586 config: en
1587 split: test
1588 revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
1589 metrics:
1590 - type: accuracy
1591 value: 73.71553463349024
1592 - type: f1
1593 value: 71.70753174900791
1594 - task:
1595 type: Classification
1596 dataset:
1597 type: mteb/amazon_massive_scenario
1598 name: MTEB MassiveScenarioClassification (en)
1599 config: en
1600 split: test
1601 revision: 7d571f92784cd94a019292a1f45445077d0ef634
1602 metrics:
1603 - type: accuracy
1604 value: 77.79757901815736
1605 - type: f1
1606 value: 77.83719850433258
1607 - task:
1608 type: Clustering
1609 dataset:
1610 type: mteb/medrxiv-clustering-p2p
1611 name: MTEB MedrxivClusteringP2P
1612 config: default
1613 split: test
1614 revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73
1615 metrics:
1616 - type: v_measure
1617 value: 33.74193296622113
1618 - task:
1619 type: Clustering
1620 dataset:
1621 type: mteb/medrxiv-clustering-s2s
1622 name: MTEB MedrxivClusteringS2S
1623 config: default
1624 split: test
1625 revision: 35191c8c0dca72d8ff3efcd72aa802307d469663
1626 metrics:
1627 - type: v_measure
1628 value: 30.64257594108566
1629 - task:
1630 type: Reranking
1631 dataset:
1632 type: mteb/mind_small
1633 name: MTEB MindSmallReranking
1634 config: default
1635 split: test
1636 revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69
1637 metrics:
1638 - type: map
1639 value: 30.811018518883625
1640 - type: mrr
1641 value: 31.910376577445003
1642 - task:
1643 type: Retrieval
1644 dataset:
1645 type: nfcorpus
1646 name: MTEB NFCorpus
1647 config: default
1648 split: test
1649 revision: None
1650 metrics:
1651 - type: map_at_1
1652 value: 5.409
1653 - type: map_at_10
1654 value: 13.093
1655 - type: map_at_100
1656 value: 16.256999999999998
1657 - type: map_at_1000
1658 value: 17.617
1659 - type: map_at_3
1660 value: 9.555
1661 - type: map_at_5
1662 value: 11.428
1663 - type: mrr_at_1
1664 value: 45.201
1665 - type: mrr_at_10
1666 value: 54.179
1667 - type: mrr_at_100
1668 value: 54.812000000000005
1669 - type: mrr_at_1000
1670 value: 54.840999999999994
1671 - type: mrr_at_3
1672 value: 51.909000000000006
1673 - type: mrr_at_5
1674 value: 53.519000000000005
1675 - type: ndcg_at_1
1676 value: 43.189
1677 - type: ndcg_at_10
1678 value: 35.028
1679 - type: ndcg_at_100
1680 value: 31.226
1681 - type: ndcg_at_1000
1682 value: 39.678000000000004
1683 - type: ndcg_at_3
1684 value: 40.596
1685 - type: ndcg_at_5
1686 value: 38.75
1687 - type: precision_at_1
1688 value: 44.582
1689 - type: precision_at_10
1690 value: 25.974999999999998
1691 - type: precision_at_100
1692 value: 7.793
1693 - type: precision_at_1000
1694 value: 2.036
1695 - type: precision_at_3
1696 value: 38.493
1697 - type: precision_at_5
1698 value: 33.994
1699 - type: recall_at_1
1700 value: 5.409
1701 - type: recall_at_10
1702 value: 16.875999999999998
1703 - type: recall_at_100
1704 value: 30.316
1705 - type: recall_at_1000
1706 value: 60.891
1707 - type: recall_at_3
1708 value: 10.688
1709 - type: recall_at_5
1710 value: 13.832
1711 - task:
1712 type: Retrieval
1713 dataset:
1714 type: nq
1715 name: MTEB NQ
1716 config: default
1717 split: test
1718 revision: None
1719 metrics:
1720 - type: map_at_1
1721 value: 36.375
1722 - type: map_at_10
1723 value: 51.991
1724 - type: map_at_100
1725 value: 52.91400000000001
1726 - type: map_at_1000
1727 value: 52.93600000000001
1728 - type: map_at_3
1729 value: 48.014
1730 - type: map_at_5
1731 value: 50.381
1732 - type: mrr_at_1
1733 value: 40.759
1734 - type: mrr_at_10
1735 value: 54.617000000000004
1736 - type: mrr_at_100
1737 value: 55.301
1738 - type: mrr_at_1000
1739 value: 55.315000000000005
1740 - type: mrr_at_3
1741 value: 51.516
1742 - type: mrr_at_5
1743 value: 53.435
1744 - type: ndcg_at_1
1745 value: 40.759
1746 - type: ndcg_at_10
1747 value: 59.384
1748 - type: ndcg_at_100
1749 value: 63.157
1750 - type: ndcg_at_1000
1751 value: 63.654999999999994
1752 - type: ndcg_at_3
1753 value: 52.114000000000004
1754 - type: ndcg_at_5
1755 value: 55.986000000000004
1756 - type: precision_at_1
1757 value: 40.759
1758 - type: precision_at_10
1759 value: 9.411999999999999
1760 - type: precision_at_100
1761 value: 1.153
1762 - type: precision_at_1000
1763 value: 0.12
1764 - type: precision_at_3
1765 value: 23.329
1766 - type: precision_at_5
1767 value: 16.256999999999998
1768 - type: recall_at_1
1769 value: 36.375
1770 - type: recall_at_10
1771 value: 79.053
1772 - type: recall_at_100
1773 value: 95.167
1774 - type: recall_at_1000
1775 value: 98.82
1776 - type: recall_at_3
1777 value: 60.475
1778 - type: recall_at_5
1779 value: 69.327
1780 - task:
1781 type: Retrieval
1782 dataset:
1783 type: quora
1784 name: MTEB QuoraRetrieval
1785 config: default
1786 split: test
1787 revision: None
1788 metrics:
1789 - type: map_at_1
1790 value: 70.256
1791 - type: map_at_10
1792 value: 83.8
1793 - type: map_at_100
1794 value: 84.425
1795 - type: map_at_1000
1796 value: 84.444
1797 - type: map_at_3
1798 value: 80.906
1799 - type: map_at_5
1800 value: 82.717
1801 - type: mrr_at_1
1802 value: 80.97999999999999
1803 - type: mrr_at_10
1804 value: 87.161
1805 - type: mrr_at_100
1806 value: 87.262
1807 - type: mrr_at_1000
1808 value: 87.263
1809 - type: mrr_at_3
1810 value: 86.175
1811 - type: mrr_at_5
1812 value: 86.848
1813 - type: ndcg_at_1
1814 value: 80.97999999999999
1815 - type: ndcg_at_10
1816 value: 87.697
1817 - type: ndcg_at_100
1818 value: 88.959
1819 - type: ndcg_at_1000
1820 value: 89.09899999999999
1821 - type: ndcg_at_3
1822 value: 84.83800000000001
1823 - type: ndcg_at_5
1824 value: 86.401
1825 - type: precision_at_1
1826 value: 80.97999999999999
1827 - type: precision_at_10
1828 value: 13.261000000000001
1829 - type: precision_at_100
1830 value: 1.5150000000000001
1831 - type: precision_at_1000
1832 value: 0.156
1833 - type: precision_at_3
1834 value: 37.01
1835 - type: precision_at_5
1836 value: 24.298000000000002
1837 - type: recall_at_1
1838 value: 70.256
1839 - type: recall_at_10
1840 value: 94.935
1841 - type: recall_at_100
1842 value: 99.274
1843 - type: recall_at_1000
1844 value: 99.928
1845 - type: recall_at_3
1846 value: 86.602
1847 - type: recall_at_5
1848 value: 91.133
1849 - task:
1850 type: Clustering
1851 dataset:
1852 type: mteb/reddit-clustering
1853 name: MTEB RedditClustering
1854 config: default
1855 split: test
1856 revision: 24640382cdbf8abc73003fb0fa6d111a705499eb
1857 metrics:
1858 - type: v_measure
1859 value: 56.322692497613104
1860 - task:
1861 type: Clustering
1862 dataset:
1863 type: mteb/reddit-clustering-p2p
1864 name: MTEB RedditClusteringP2P
1865 config: default
1866 split: test
1867 revision: 282350215ef01743dc01b456c7f5241fa8937f16
1868 metrics:
1869 - type: v_measure
1870 value: 61.895813503775074
1871 - task:
1872 type: Retrieval
1873 dataset:
1874 type: scidocs
1875 name: MTEB SCIDOCS
1876 config: default
1877 split: test
1878 revision: None
1879 metrics:
1880 - type: map_at_1
1881 value: 4.338
1882 - type: map_at_10
1883 value: 10.767
1884 - type: map_at_100
1885 value: 12.537999999999998
1886 - type: map_at_1000
1887 value: 12.803999999999998
1888 - type: map_at_3
1889 value: 7.788
1890 - type: map_at_5
1891 value: 9.302000000000001
1892 - type: mrr_at_1
1893 value: 21.4
1894 - type: mrr_at_10
1895 value: 31.637999999999998
1896 - type: mrr_at_100
1897 value: 32.688
1898 - type: mrr_at_1000
1899 value: 32.756
1900 - type: mrr_at_3
1901 value: 28.433000000000003
1902 - type: mrr_at_5
1903 value: 30.178
1904 - type: ndcg_at_1
1905 value: 21.4
1906 - type: ndcg_at_10
1907 value: 18.293
1908 - type: ndcg_at_100
1909 value: 25.274
1910 - type: ndcg_at_1000
1911 value: 30.284
1912 - type: ndcg_at_3
1913 value: 17.391000000000002
1914 - type: ndcg_at_5
1915 value: 15.146999999999998
1916 - type: precision_at_1
1917 value: 21.4
1918 - type: precision_at_10
1919 value: 9.48
1920 - type: precision_at_100
1921 value: 1.949
1922 - type: precision_at_1000
1923 value: 0.316
1924 - type: precision_at_3
1925 value: 16.167
1926 - type: precision_at_5
1927 value: 13.22
1928 - type: recall_at_1
1929 value: 4.338
1930 - type: recall_at_10
1931 value: 19.213
1932 - type: recall_at_100
1933 value: 39.562999999999995
1934 - type: recall_at_1000
1935 value: 64.08
1936 - type: recall_at_3
1937 value: 9.828000000000001
1938 - type: recall_at_5
1939 value: 13.383000000000001
1940 - task:
1941 type: STS
1942 dataset:
1943 type: mteb/sickr-sts
1944 name: MTEB SICK-R
1945 config: default
1946 split: test
1947 revision: a6ea5a8cab320b040a23452cc28066d9beae2cee
1948 metrics:
1949 - type: cos_sim_pearson
1950 value: 82.42568163642142
1951 - type: cos_sim_spearman
1952 value: 78.5797159641342
1953 - type: euclidean_pearson
1954 value: 80.22151260811604
1955 - type: euclidean_spearman
1956 value: 78.5797151953878
1957 - type: manhattan_pearson
1958 value: 80.21224215864788
1959 - type: manhattan_spearman
1960 value: 78.55641478381344
1961 - task:
1962 type: STS
1963 dataset:
1964 type: mteb/sts12-sts
1965 name: MTEB STS12
1966 config: default
1967 split: test
1968 revision: a0d554a64d88156834ff5ae9920b964011b16384
1969 metrics:
1970 - type: cos_sim_pearson
1971 value: 85.44020710812569
1972 - type: cos_sim_spearman
1973 value: 78.91631735081286
1974 - type: euclidean_pearson
1975 value: 81.64188964182102
1976 - type: euclidean_spearman
1977 value: 78.91633286881678
1978 - type: manhattan_pearson
1979 value: 81.69294748512496
1980 - type: manhattan_spearman
1981 value: 78.93438558002656
1982 - task:
1983 type: STS
1984 dataset:
1985 type: mteb/sts13-sts
1986 name: MTEB STS13
1987 config: default
1988 split: test
1989 revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca
1990 metrics:
1991 - type: cos_sim_pearson
1992 value: 84.27165426412311
1993 - type: cos_sim_spearman
1994 value: 85.40429140249618
1995 - type: euclidean_pearson
1996 value: 84.7509580724893
1997 - type: euclidean_spearman
1998 value: 85.40429140249618
1999 - type: manhattan_pearson
2000 value: 84.76488289321308
2001 - type: manhattan_spearman
2002 value: 85.4256793698708
2003 - task:
2004 type: STS
2005 dataset:
2006 type: mteb/sts14-sts
2007 name: MTEB STS14
2008 config: default
2009 split: test
2010 revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375
2011 metrics:
2012 - type: cos_sim_pearson
2013 value: 83.138851760732
2014 - type: cos_sim_spearman
2015 value: 81.64101363896586
2016 - type: euclidean_pearson
2017 value: 82.55165038934942
2018 - type: euclidean_spearman
2019 value: 81.64105257080502
2020 - type: manhattan_pearson
2021 value: 82.52802949883335
2022 - type: manhattan_spearman
2023 value: 81.61255430718158
2024 - task:
2025 type: STS
2026 dataset:
2027 type: mteb/sts15-sts
2028 name: MTEB STS15
2029 config: default
2030 split: test
2031 revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3
2032 metrics:
2033 - type: cos_sim_pearson
2034 value: 86.0654695484029
2035 - type: cos_sim_spearman
2036 value: 87.20408521902229
2037 - type: euclidean_pearson
2038 value: 86.8110651362115
2039 - type: euclidean_spearman
2040 value: 87.20408521902229
2041 - type: manhattan_pearson
2042 value: 86.77984656478691
2043 - type: manhattan_spearman
2044 value: 87.1719947099227
2045 - task:
2046 type: STS
2047 dataset:
2048 type: mteb/sts16-sts
2049 name: MTEB STS16
2050 config: default
2051 split: test
2052 revision: 4d8694f8f0e0100860b497b999b3dbed754a0513
2053 metrics:
2054 - type: cos_sim_pearson
2055 value: 83.77823915496512
2056 - type: cos_sim_spearman
2057 value: 85.43566325729779
2058 - type: euclidean_pearson
2059 value: 84.5396956658821
2060 - type: euclidean_spearman
2061 value: 85.43566325729779
2062 - type: manhattan_pearson
2063 value: 84.5665398848169
2064 - type: manhattan_spearman
2065 value: 85.44375870303232
2066 - task:
2067 type: STS
2068 dataset:
2069 type: mteb/sts17-crosslingual-sts
2070 name: MTEB STS17 (en-en)
2071 config: en-en
2072 split: test
2073 revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
2074 metrics:
2075 - type: cos_sim_pearson
2076 value: 87.20030208471798
2077 - type: cos_sim_spearman
2078 value: 87.20485505076539
2079 - type: euclidean_pearson
2080 value: 88.10588324368722
2081 - type: euclidean_spearman
2082 value: 87.20485505076539
2083 - type: manhattan_pearson
2084 value: 87.92324770415183
2085 - type: manhattan_spearman
2086 value: 87.0571314561877
2087 - task:
2088 type: STS
2089 dataset:
2090 type: mteb/sts22-crosslingual-sts
2091 name: MTEB STS22 (en)
2092 config: en
2093 split: test
2094 revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
2095 metrics:
2096 - type: cos_sim_pearson
2097 value: 63.06093161604453
2098 - type: cos_sim_spearman
2099 value: 64.2163140357722
2100 - type: euclidean_pearson
2101 value: 65.27589680994006
2102 - type: euclidean_spearman
2103 value: 64.2163140357722
2104 - type: manhattan_pearson
2105 value: 65.45904383711101
2106 - type: manhattan_spearman
2107 value: 64.55404716679305
2108 - task:
2109 type: STS
2110 dataset:
2111 type: mteb/stsbenchmark-sts
2112 name: MTEB STSBenchmark
2113 config: default
2114 split: test
2115 revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831
2116 metrics:
2117 - type: cos_sim_pearson
2118 value: 84.32976164578706
2119 - type: cos_sim_spearman
2120 value: 85.54302197678368
2121 - type: euclidean_pearson
2122 value: 85.26307149193056
2123 - type: euclidean_spearman
2124 value: 85.54302197678368
2125 - type: manhattan_pearson
2126 value: 85.26647282029371
2127 - type: manhattan_spearman
2128 value: 85.5316135265568
2129 - task:
2130 type: Reranking
2131 dataset:
2132 type: mteb/scidocs-reranking
2133 name: MTEB SciDocsRR
2134 config: default
2135 split: test
2136 revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab
2137 metrics:
2138 - type: map
2139 value: 81.44675968318754
2140 - type: mrr
2141 value: 94.92741826075158
2142 - task:
2143 type: Retrieval
2144 dataset:
2145 type: scifact
2146 name: MTEB SciFact
2147 config: default
2148 split: test
2149 revision: None
2150 metrics:
2151 - type: map_at_1
2152 value: 56.34400000000001
2153 - type: map_at_10
2154 value: 65.927
2155 - type: map_at_100
2156 value: 66.431
2157 - type: map_at_1000
2158 value: 66.461
2159 - type: map_at_3
2160 value: 63.529
2161 - type: map_at_5
2162 value: 64.818
2163 - type: mrr_at_1
2164 value: 59.333000000000006
2165 - type: mrr_at_10
2166 value: 67.54599999999999
2167 - type: mrr_at_100
2168 value: 67.892
2169 - type: mrr_at_1000
2170 value: 67.917
2171 - type: mrr_at_3
2172 value: 65.778
2173 - type: mrr_at_5
2174 value: 66.794
2175 - type: ndcg_at_1
2176 value: 59.333000000000006
2177 - type: ndcg_at_10
2178 value: 70.5
2179 - type: ndcg_at_100
2180 value: 72.688
2181 - type: ndcg_at_1000
2182 value: 73.483
2183 - type: ndcg_at_3
2184 value: 66.338
2185 - type: ndcg_at_5
2186 value: 68.265
2187 - type: precision_at_1
2188 value: 59.333000000000006
2189 - type: precision_at_10
2190 value: 9.3
2191 - type: precision_at_100
2192 value: 1.053
2193 - type: precision_at_1000
2194 value: 0.11199999999999999
2195 - type: precision_at_3
2196 value: 25.889
2197 - type: precision_at_5
2198 value: 16.866999999999997
2199 - type: recall_at_1
2200 value: 56.34400000000001
2201 - type: recall_at_10
2202 value: 82.789
2203 - type: recall_at_100
2204 value: 92.767
2205 - type: recall_at_1000
2206 value: 99
2207 - type: recall_at_3
2208 value: 71.64399999999999
2209 - type: recall_at_5
2210 value: 76.322
2211 - task:
2212 type: PairClassification
2213 dataset:
2214 type: mteb/sprintduplicatequestions-pairclassification
2215 name: MTEB SprintDuplicateQuestions
2216 config: default
2217 split: test
2218 revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46
2219 metrics:
2220 - type: cos_sim_accuracy
2221 value: 99.75742574257426
2222 - type: cos_sim_ap
2223 value: 93.52081548447406
2224 - type: cos_sim_f1
2225 value: 87.33850129198966
2226 - type: cos_sim_precision
2227 value: 90.37433155080214
2228 - type: cos_sim_recall
2229 value: 84.5
2230 - type: dot_accuracy
2231 value: 99.75742574257426
2232 - type: dot_ap
2233 value: 93.52081548447406
2234 - type: dot_f1
2235 value: 87.33850129198966
2236 - type: dot_precision
2237 value: 90.37433155080214
2238 - type: dot_recall
2239 value: 84.5
2240 - type: euclidean_accuracy
2241 value: 99.75742574257426
2242 - type: euclidean_ap
2243 value: 93.52081548447406
2244 - type: euclidean_f1
2245 value: 87.33850129198966
2246 - type: euclidean_precision
2247 value: 90.37433155080214
2248 - type: euclidean_recall
2249 value: 84.5
2250 - type: manhattan_accuracy
2251 value: 99.75841584158415
2252 - type: manhattan_ap
2253 value: 93.4975678585854
2254 - type: manhattan_f1
2255 value: 87.26708074534162
2256 - type: manhattan_precision
2257 value: 90.45064377682404
2258 - type: manhattan_recall
2259 value: 84.3
2260 - type: max_accuracy
2261 value: 99.75841584158415
2262 - type: max_ap
2263 value: 93.52081548447406
2264 - type: max_f1
2265 value: 87.33850129198966
2266 - task:
2267 type: Clustering
2268 dataset:
2269 type: mteb/stackexchange-clustering
2270 name: MTEB StackExchangeClustering
2271 config: default
2272 split: test
2273 revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259
2274 metrics:
2275 - type: v_measure
2276 value: 64.31437036686651
2277 - task:
2278 type: Clustering
2279 dataset:
2280 type: mteb/stackexchange-clustering-p2p
2281 name: MTEB StackExchangeClusteringP2P
2282 config: default
2283 split: test
2284 revision: 815ca46b2622cec33ccafc3735d572c266efdb44
2285 metrics:
2286 - type: v_measure
2287 value: 33.25569319007206
2288 - task:
2289 type: Reranking
2290 dataset:
2291 type: mteb/stackoverflowdupquestions-reranking
2292 name: MTEB StackOverflowDupQuestions
2293 config: default
2294 split: test
2295 revision: e185fbe320c72810689fc5848eb6114e1ef5ec69
2296 metrics:
2297 - type: map
2298 value: 49.90474939720706
2299 - type: mrr
2300 value: 50.568115503777264
2301 - task:
2302 type: Summarization
2303 dataset:
2304 type: mteb/summeval
2305 name: MTEB SummEval
2306 config: default
2307 split: test
2308 revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c
2309 metrics:
2310 - type: cos_sim_pearson
2311 value: 29.866828641244712
2312 - type: cos_sim_spearman
2313 value: 30.077555055873866
2314 - type: dot_pearson
2315 value: 29.866832988572266
2316 - type: dot_spearman
2317 value: 30.077555055873866
2318 - task:
2319 type: Retrieval
2320 dataset:
2321 type: trec-covid
2322 name: MTEB TRECCOVID
2323 config: default
2324 split: test
2325 revision: None
2326 metrics:
2327 - type: map_at_1
2328 value: 0.232
2329 - type: map_at_10
2330 value: 2.094
2331 - type: map_at_100
2332 value: 11.971
2333 - type: map_at_1000
2334 value: 28.158
2335 - type: map_at_3
2336 value: 0.688
2337 - type: map_at_5
2338 value: 1.114
2339 - type: mrr_at_1
2340 value: 88
2341 - type: mrr_at_10
2342 value: 93.4
2343 - type: mrr_at_100
2344 value: 93.4
2345 - type: mrr_at_1000
2346 value: 93.4
2347 - type: mrr_at_3
2348 value: 93
2349 - type: mrr_at_5
2350 value: 93.4
2351 - type: ndcg_at_1
2352 value: 84
2353 - type: ndcg_at_10
2354 value: 79.923
2355 - type: ndcg_at_100
2356 value: 61.17
2357 - type: ndcg_at_1000
2358 value: 53.03
2359 - type: ndcg_at_3
2360 value: 84.592
2361 - type: ndcg_at_5
2362 value: 82.821
2363 - type: precision_at_1
2364 value: 88
2365 - type: precision_at_10
2366 value: 85
2367 - type: precision_at_100
2368 value: 63.019999999999996
2369 - type: precision_at_1000
2370 value: 23.554
2371 - type: precision_at_3
2372 value: 89.333
2373 - type: precision_at_5
2374 value: 87.2
2375 - type: recall_at_1
2376 value: 0.232
2377 - type: recall_at_10
2378 value: 2.255
2379 - type: recall_at_100
2380 value: 14.823
2381 - type: recall_at_1000
2382 value: 49.456
2383 - type: recall_at_3
2384 value: 0.718
2385 - type: recall_at_5
2386 value: 1.175
2387 - task:
2388 type: Retrieval
2389 dataset:
2390 type: webis-touche2020
2391 name: MTEB Touche2020
2392 config: default
2393 split: test
2394 revision: None
2395 metrics:
2396 - type: map_at_1
2397 value: 2.547
2398 - type: map_at_10
2399 value: 11.375
2400 - type: map_at_100
2401 value: 18.194
2402 - type: map_at_1000
2403 value: 19.749
2404 - type: map_at_3
2405 value: 5.825
2406 - type: map_at_5
2407 value: 8.581
2408 - type: mrr_at_1
2409 value: 32.653
2410 - type: mrr_at_10
2411 value: 51.32
2412 - type: mrr_at_100
2413 value: 51.747
2414 - type: mrr_at_1000
2415 value: 51.747
2416 - type: mrr_at_3
2417 value: 47.278999999999996
2418 - type: mrr_at_5
2419 value: 48.605
2420 - type: ndcg_at_1
2421 value: 29.592000000000002
2422 - type: ndcg_at_10
2423 value: 28.151
2424 - type: ndcg_at_100
2425 value: 39.438
2426 - type: ndcg_at_1000
2427 value: 50.769
2428 - type: ndcg_at_3
2429 value: 30.758999999999997
2430 - type: ndcg_at_5
2431 value: 30.366
2432 - type: precision_at_1
2433 value: 32.653
2434 - type: precision_at_10
2435 value: 25.714
2436 - type: precision_at_100
2437 value: 8.041
2438 - type: precision_at_1000
2439 value: 1.555
2440 - type: precision_at_3
2441 value: 33.333
2442 - type: precision_at_5
2443 value: 31.837
2444 - type: recall_at_1
2445 value: 2.547
2446 - type: recall_at_10
2447 value: 18.19
2448 - type: recall_at_100
2449 value: 49.538
2450 - type: recall_at_1000
2451 value: 83.86
2452 - type: recall_at_3
2453 value: 7.329
2454 - type: recall_at_5
2455 value: 11.532
2456 - task:
2457 type: Classification
2458 dataset:
2459 type: mteb/toxic_conversations_50k
2460 name: MTEB ToxicConversationsClassification
2461 config: default
2462 split: test
2463 revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c
2464 metrics:
2465 - type: accuracy
2466 value: 71.4952
2467 - type: ap
2468 value: 14.793362635531409
2469 - type: f1
2470 value: 55.204635551516915
2471 - task:
2472 type: Classification
2473 dataset:
2474 type: mteb/tweet_sentiment_extraction
2475 name: MTEB TweetSentimentExtractionClassification
2476 config: default
2477 split: test
2478 revision: d604517c81ca91fe16a244d1248fc021f9ecee7a
2479 metrics:
2480 - type: accuracy
2481 value: 61.5365025466893
2482 - type: f1
2483 value: 61.81742556334845
2484 - task:
2485 type: Clustering
2486 dataset:
2487 type: mteb/twentynewsgroups-clustering
2488 name: MTEB TwentyNewsgroupsClustering
2489 config: default
2490 split: test
2491 revision: 6125ec4e24fa026cec8a478383ee943acfbd5449
2492 metrics:
2493 - type: v_measure
2494 value: 49.05531070301185
2495 - task:
2496 type: PairClassification
2497 dataset:
2498 type: mteb/twittersemeval2015-pairclassification
2499 name: MTEB TwitterSemEval2015
2500 config: default
2501 split: test
2502 revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1
2503 metrics:
2504 - type: cos_sim_accuracy
2505 value: 86.51725576682364
2506 - type: cos_sim_ap
2507 value: 75.2292304265163
2508 - type: cos_sim_f1
2509 value: 69.54022988505749
2510 - type: cos_sim_precision
2511 value: 63.65629110039457
2512 - type: cos_sim_recall
2513 value: 76.62269129287598
2514 - type: dot_accuracy
2515 value: 86.51725576682364
2516 - type: dot_ap
2517 value: 75.22922386081054
2518 - type: dot_f1
2519 value: 69.54022988505749
2520 - type: dot_precision
2521 value: 63.65629110039457
2522 - type: dot_recall
2523 value: 76.62269129287598
2524 - type: euclidean_accuracy
2525 value: 86.51725576682364
2526 - type: euclidean_ap
2527 value: 75.22925730473472
2528 - type: euclidean_f1
2529 value: 69.54022988505749
2530 - type: euclidean_precision
2531 value: 63.65629110039457
2532 - type: euclidean_recall
2533 value: 76.62269129287598
2534 - type: manhattan_accuracy
2535 value: 86.52321630804077
2536 - type: manhattan_ap
2537 value: 75.20608115037336
2538 - type: manhattan_f1
2539 value: 69.60000000000001
2540 - type: manhattan_precision
2541 value: 64.37219730941705
2542 - type: manhattan_recall
2543 value: 75.75197889182058
2544 - type: max_accuracy
2545 value: 86.52321630804077
2546 - type: max_ap
2547 value: 75.22925730473472
2548 - type: max_f1
2549 value: 69.60000000000001
2550 - task:
2551 type: PairClassification
2552 dataset:
2553 type: mteb/twitterurlcorpus-pairclassification
2554 name: MTEB TwitterURLCorpus
2555 config: default
2556 split: test
2557 revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf
2558 metrics:
2559 - type: cos_sim_accuracy
2560 value: 89.34877944657896
2561 - type: cos_sim_ap
2562 value: 86.71257569277373
2563 - type: cos_sim_f1
2564 value: 79.10386355986088
2565 - type: cos_sim_precision
2566 value: 76.91468470434214
2567 - type: cos_sim_recall
2568 value: 81.4213119802895
2569 - type: dot_accuracy
2570 value: 89.34877944657896
2571 - type: dot_ap
2572 value: 86.71257133133368
2573 - type: dot_f1
2574 value: 79.10386355986088
2575 - type: dot_precision
2576 value: 76.91468470434214
2577 - type: dot_recall
2578 value: 81.4213119802895
2579 - type: euclidean_accuracy
2580 value: 89.34877944657896
2581 - type: euclidean_ap
2582 value: 86.71257651501476
2583 - type: euclidean_f1
2584 value: 79.10386355986088
2585 - type: euclidean_precision
2586 value: 76.91468470434214
2587 - type: euclidean_recall
2588 value: 81.4213119802895
2589 - type: manhattan_accuracy
2590 value: 89.35848177901967
2591 - type: manhattan_ap
2592 value: 86.69330615469126
2593 - type: manhattan_f1
2594 value: 79.13867741453949
2595 - type: manhattan_precision
2596 value: 76.78881807647741
2597 - type: manhattan_recall
2598 value: 81.63689559593472
2599 - type: max_accuracy
2600 value: 89.35848177901967
2601 - type: max_ap
2602 value: 86.71257651501476
2603 - type: max_f1
2604 value: 79.13867741453949
2605 license: apache-2.0
2606 language:
2607 - en
2608 new_version: nomic-ai/nomic-embed-text-v1.5
2609 ---
2610
2611
2612 # nomic-embed-text-v1: A Reproducible Long Context (8192) Text Embedder
2613
2614 [Blog](https://www.nomic.ai/blog/posts/nomic-embed-text-v1) | [Technical Report](https://arxiv.org/abs/2402.01613) | [AWS SageMaker](https://aws.amazon.com/marketplace/seller-profile?id=seller-tpqidcj54zawi) | [Atlas Embedding and Unstructured Data Analytics Platform](https://atlas.nomic.ai)
2615
2616 `nomic-embed-text-v1` is 8192 context length text encoder that surpasses OpenAI text-embedding-ada-002 and text-embedding-3-small performance on short and long context tasks.
2617
2618 # Performance Benchmarks
2619
2620 | Name | SeqLen | MTEB | LoCo | Jina Long Context | Open Weights | Open Training Code | Open Data |
2621 | :-------------------------------:| :----- | :-------- | :------: | :---------------: | :-----------: | :----------------: | :---------- |
2622 | nomic-embed-text-v1 | 8192 | **62.39** |**85.53** | 54.16 | ✅ | ✅ | ✅ |
2623 | jina-embeddings-v2-base-en | 8192 | 60.39 | 85.45 | 51.90 | ✅ | ❌ | ❌ |
2624 | text-embedding-3-small | 8191 | 62.26 | 82.40 | **58.20** | ❌ | ❌ | ❌ |
2625 | text-embedding-ada-002 | 8191 | 60.99 | 52.7 | 55.25 | ❌ | ❌ | ❌ |
2626
2627
2628 **Exciting Update!**: `nomic-embed-text-v1` is now multimodal! [nomic-embed-vision-v1](https://huggingface.co/nomic-ai/nomic-embed-vision-v1) is aligned to the embedding space of `nomic-embed-text-v1`, meaning any text embedding is multimodal!
2629
2630 ## Usage
2631
2632 **Important**: the text prompt *must* include a *task instruction prefix*, instructing the model which task is being performed.
2633
2634 For example, if you are implementing a RAG application, you embed your documents as `search_document: <text here>` and embed your user queries as `search_query: <text here>`.
2635
2636 **Notice**: From transformers v5.5.0 and sentence transformers v5.3.0, `trust_remote_code=True` will no longer be necessary. This will only be possible with the text-only series as of now.
2637
2638 ## Task instruction prefixes
2639
2640 ### `search_document`
2641
2642 #### Purpose: embed texts as documents from a dataset
2643
2644 This prefix is used for embedding texts as documents, for example as documents for a RAG index.
2645
2646 ```python
2647 from sentence_transformers import SentenceTransformer
2648
2649 model = SentenceTransformer("nomic-ai/nomic-embed-text-v1")
2650 sentences = ['search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten']
2651 embeddings = model.encode(sentences)
2652 print(embeddings)
2653 ```
2654
2655 ### `search_query`
2656
2657 #### Purpose: embed texts as questions to answer
2658
2659 This prefix is used for embedding texts as questions that documents from a dataset could resolve, for example as queries to be answered by a RAG application.
2660
2661 ```python
2662 from sentence_transformers import SentenceTransformer
2663
2664 model = SentenceTransformer("nomic-ai/nomic-embed-text-v1")
2665 sentences = ['search_query: Who is Laurens van Der Maaten?']
2666 embeddings = model.encode(sentences)
2667 print(embeddings)
2668 ```
2669
2670 ### `clustering`
2671
2672 #### Purpose: embed texts to group them into clusters
2673
2674 This prefix is used for embedding texts in order to group them into clusters, discover common topics, or remove semantic duplicates.
2675
2676 ```python
2677 from sentence_transformers import SentenceTransformer
2678
2679 model = SentenceTransformer("nomic-ai/nomic-embed-text-v1")
2680 sentences = ['clustering: the quick brown fox']
2681 embeddings = model.encode(sentences)
2682 print(embeddings)
2683 ```
2684
2685 ### `classification`
2686
2687 #### Purpose: embed texts to classify them
2688
2689 This prefix is used for embedding texts into vectors that will be used as features for a classification model
2690
2691 ```python
2692 from sentence_transformers import SentenceTransformer
2693
2694 model = SentenceTransformer("nomic-ai/nomic-embed-text-v1")
2695 sentences = ['classification: the quick brown fox']
2696 embeddings = model.encode(sentences)
2697 print(embeddings)
2698 ```
2699
2700 ### Sentence Transformers
2701 ```python
2702 from sentence_transformers import SentenceTransformer
2703
2704 model = SentenceTransformer("nomic-ai/nomic-embed-text-v1")
2705 sentences = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?']
2706 embeddings = model.encode(sentences)
2707 print(embeddings)
2708 ```
2709
2710 ### Transformers
2711
2712 ```python
2713 import torch
2714 import torch.nn.functional as F
2715 from transformers import AutoTokenizer, AutoModel
2716
2717 def mean_pooling(model_output, attention_mask):
2718 token_embeddings = model_output[0]
2719 input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
2720 return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
2721
2722 sentences = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?']
2723
2724 tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
2725 model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1')
2726 model.eval()
2727
2728 encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
2729
2730 with torch.no_grad():
2731 model_output = model(**encoded_input)
2732
2733 embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
2734 embeddings = F.normalize(embeddings, p=2, dim=1)
2735 print(embeddings)
2736 ```
2737
2738 The model natively supports scaling of the sequence length past 2048 tokens. To do so,
2739
2740 ```diff
2741 - tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
2742 + tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', model_max_length=8192)
2743
2744
2745 - model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1')
2746 + rope_parameters = {"rope_theta": 1000.0, "rope_type": "dynamic", "factor": 2.0}
2747 + model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1', rope_parameters=rope_parameters)
2748 ```
2749
2750 ### Transformers.js
2751
2752 ```js
2753 import { pipeline } from '@xenova/transformers';
2754
2755 // Create a feature extraction pipeline
2756 const extractor = await pipeline('feature-extraction', 'nomic-ai/nomic-embed-text-v1', {
2757 quantized: false, // Comment out this line to use the quantized version
2758 });
2759
2760 // Compute sentence embeddings
2761 const texts = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?'];
2762 const embeddings = await extractor(texts, { pooling: 'mean', normalize: true });
2763 console.log(embeddings);
2764 ```
2765
2766 ## Nomic API
2767
2768 The easiest way to get started with Nomic Embed is through the Nomic Embedding API.
2769
2770 Generating embeddings with the `nomic` Python client is as easy as
2771
2772 ```python
2773 from nomic import embed
2774
2775 output = embed.text(
2776 texts=['Nomic Embedding API', '#keepAIOpen'],
2777 model='nomic-embed-text-v1',
2778 task_type='search_document'
2779 )
2780
2781 print(output)
2782 ```
2783
2784 For more information, see the [API reference](https://docs.nomic.ai/reference/endpoints/nomic-embed-text)
2785
2786
2787 ## Training
2788 Click the Nomic Atlas map below to visualize a 5M sample of our contrastive pretraining data!
2789
2790 [![image/webp](https://cdn-uploads.huggingface.co/production/uploads/607997c83a565c15675055b3/pjhJhuNyRfPagRd_c_iUz.webp)](https://atlas.nomic.ai/map/nomic-text-embed-v1-5m-sample)
2791
2792 We train our embedder using a multi-stage training pipeline. Starting from a long-context [BERT model](https://huggingface.co/nomic-ai/nomic-bert-2048),
2793 the first unsupervised contrastive stage trains on a dataset generated from weakly related text pairs, such as question-answer pairs from forums like StackExchange and Quora, title-body pairs from Amazon reviews, and summarizations from news articles.
2794
2795 In the second finetuning stage, higher quality labeled datasets such as search queries and answers from web searches are leveraged. Data curation and hard-example mining is crucial in this stage.
2796
2797 For more details, see the Nomic Embed [Technical Report](https://static.nomic.ai/reports/2024_Nomic_Embed_Text_Technical_Report.pdf) and corresponding [blog post](https://blog.nomic.ai/posts/nomic-embed-text-v1).
2798
2799 Training data to train the models is released in its entirety. For more details, see the `contrastors` [repository](https://github.com/nomic-ai/contrastors)
2800
2801
2802 # Join the Nomic Community
2803
2804 - Nomic: [https://nomic.ai](https://nomic.ai)
2805 - Discord: [https://discord.gg/myY5YDR8z8](https://discord.gg/myY5YDR8z8)
2806 - Twitter: [https://twitter.com/nomic_ai](https://twitter.com/nomic_ai)
2807
2808
2809 # Citation
2810
2811 If you find the model, dataset, or training code useful, please cite our work
2812
2813 ```bibtex
2814 @misc{nussbaum2024nomic,
2815 title={Nomic Embed: Training a Reproducible Long Context Text Embedder},
2816 author={Zach Nussbaum and John X. Morris and Brandon Duderstadt and Andriy Mulyar},
2817 year={2024},
2818 eprint={2402.01613},
2819 archivePrefix={arXiv},
2820 primaryClass={cs.CL}
2821 }
2822 ```