data_config.json
38.3 KB · 1452 lines · json Raw
1 [
2 {
3 "name": "stackexchange_title_body/skeptics.stackexchange.com.jsonl.gz",
4 "lines": 10009,
5 "weight": 1
6 },
7 {
8 "name": "stackexchange_TitleBody_Answer/islam.stackexchange.com.jsonl.gz",
9 "lines": 10052,
10 "weight": 1
11 },
12 {
13 "name": "stackexchange_Title_Answer/islam.stackexchange.com.jsonl.gz",
14 "lines": 10052,
15 "weight": 1
16 },
17 {
18 "name": "stackexchange_TitleBody_Answer/anime.stackexchange.com.jsonl.gz",
19 "lines": 10131,
20 "weight": 1
21 },
22 {
23 "name": "stackexchange_Title_Answer/anime.stackexchange.com.jsonl.gz",
24 "lines": 10131,
25 "weight": 1
26 },
27 {
28 "name": "stackexchange_title_body/writers.stackexchange.com.jsonl.gz",
29 "lines": 10157,
30 "weight": 1
31 },
32 {
33 "name": "stackexchange_title_body/astronomy.stackexchange.com.jsonl.gz",
34 "lines": 10462,
35 "weight": 1
36 },
37 {
38 "name": "stackexchange_title_body/vi.stackexchange.com.jsonl.gz",
39 "lines": 10551,
40 "weight": 1
41 },
42 {
43 "name": "stackexchange_TitleBody_Answer/french.stackexchange.com.jsonl.gz",
44 "lines": 10578,
45 "weight": 1
46 },
47 {
48 "name": "stackexchange_Title_Answer/french.stackexchange.com.jsonl.gz",
49 "lines": 10578,
50 "weight": 1
51 },
52 {
53 "name": "stackexchange_title_body/cstheory.stackexchange.com.jsonl.gz",
54 "lines": 10642,
55 "weight": 1
56 },
57 {
58 "name": "stackexchange_TitleBody_Answer/civicrm.stackexchange.com.jsonl.gz",
59 "lines": 10648,
60 "weight": 1
61 },
62 {
63 "name": "stackexchange_Title_Answer/civicrm.stackexchange.com.jsonl.gz",
64 "lines": 10648,
65 "weight": 1
66 },
67 {
68 "name": "stackexchange_TitleBody_Answer/expressionengine.stackexchange.com.jsonl.gz",
69 "lines": 10742,
70 "weight": 1
71 },
72 {
73 "name": "stackexchange_Title_Answer/expressionengine.stackexchange.com.jsonl.gz",
74 "lines": 10742,
75 "weight": 1
76 },
77 {
78 "name": "stackexchange_title_body/engineering.stackexchange.com.jsonl.gz",
79 "lines": 10753,
80 "weight": 1
81 },
82 {
83 "name": "stackexchange_TitleBody_Answer/history.stackexchange.com.jsonl.gz",
84 "lines": 10766,
85 "weight": 1
86 },
87 {
88 "name": "stackexchange_Title_Answer/history.stackexchange.com.jsonl.gz",
89 "lines": 10766,
90 "weight": 1
91 },
92 {
93 "name": "stackexchange_title_body/french.stackexchange.com.jsonl.gz",
94 "lines": 10794,
95 "weight": 1
96 },
97 {
98 "name": "stackexchange_TitleBody_Answer/politics.stackexchange.com.jsonl.gz",
99 "lines": 11047,
100 "weight": 1
101 },
102 {
103 "name": "stackexchange_Title_Answer/politics.stackexchange.com.jsonl.gz",
104 "lines": 11047,
105 "weight": 1
106 },
107 {
108 "name": "stackexchange_title_body/economics.stackexchange.com.jsonl.gz",
109 "lines": 11115,
110 "weight": 1
111 },
112 {
113 "name": "stackexchange_TitleBody_Answer/craftcms.stackexchange.com.jsonl.gz",
114 "lines": 11236,
115 "weight": 1
116 },
117 {
118 "name": "stackexchange_Title_Answer/craftcms.stackexchange.com.jsonl.gz",
119 "lines": 11236,
120 "weight": 1
121 },
122 {
123 "name": "stackexchange_title_body/anime.stackexchange.com.jsonl.gz",
124 "lines": 11444,
125 "weight": 1
126 },
127 {
128 "name": "stackexchange_TitleBody_Answer/christianity.stackexchange.com.jsonl.gz",
129 "lines": 11498,
130 "weight": 1
131 },
132 {
133 "name": "stackexchange_Title_Answer/christianity.stackexchange.com.jsonl.gz",
134 "lines": 11498,
135 "weight": 1
136 },
137 {
138 "name": "stackexchange_TitleBody_Answer/softwarerecs.stackexchange.com.jsonl.gz",
139 "lines": 11761,
140 "weight": 1
141 },
142 {
143 "name": "stackexchange_Title_Answer/softwarerecs.stackexchange.com.jsonl.gz",
144 "lines": 11761,
145 "weight": 1
146 },
147 {
148 "name": "stackexchange_TitleBody_Answer/boardgames.stackexchange.com.jsonl.gz",
149 "lines": 11805,
150 "weight": 1
151 },
152 {
153 "name": "stackexchange_Title_Answer/boardgames.stackexchange.com.jsonl.gz",
154 "lines": 11805,
155 "weight": 1
156 },
157 {
158 "name": "stackexchange_title_body/islam.stackexchange.com.jsonl.gz",
159 "lines": 11853,
160 "weight": 1
161 },
162 {
163 "name": "stackexchange_title_body/expressionengine.stackexchange.com.jsonl.gz",
164 "lines": 11866,
165 "weight": 1
166 },
167 {
168 "name": "stackexchange_title_body/politics.stackexchange.com.jsonl.gz",
169 "lines": 11894,
170 "weight": 1
171 },
172 {
173 "name": "stackexchange_title_body/history.stackexchange.com.jsonl.gz",
174 "lines": 12021,
175 "weight": 1
176 },
177 {
178 "name": "stackexchange_title_body/christianity.stackexchange.com.jsonl.gz",
179 "lines": 12108,
180 "weight": 1
181 },
182 {
183 "name": "stackexchange_title_body/boardgames.stackexchange.com.jsonl.gz",
184 "lines": 12149,
185 "weight": 1
186 },
187 {
188 "name": "flickr30k_captions.jsonl.gz",
189 "lines": 317695,
190 "weight": 1
191 },
192 {
193 "name": "coco_captions.jsonl.gz",
194 "lines": 828395,
195 "weight": 1
196 },
197 {
198 "name": "codesearchnet.jsonl.gz",
199 "lines": 1151414,
200 "weight": 1
201 },
202 {
203 "name": "stackexchange_title_body/civicrm.stackexchange.com.jsonl.gz",
204 "lines": 12543,
205 "weight": 2
206 },
207 {
208 "name": "stackexchange_title_body/craftcms.stackexchange.com.jsonl.gz",
209 "lines": 12574,
210 "weight": 2
211 },
212 {
213 "name": "stackexchange_TitleBody_Answer/networkengineering.stackexchange.com.jsonl.gz",
214 "lines": 12590,
215 "weight": 2
216 },
217 {
218 "name": "stackexchange_Title_Answer/networkengineering.stackexchange.com.jsonl.gz",
219 "lines": 12590,
220 "weight": 2
221 },
222 {
223 "name": "stackexchange_TitleBody_Answer/space.stackexchange.com.jsonl.gz",
224 "lines": 12893,
225 "weight": 2
226 },
227 {
228 "name": "stackexchange_Title_Answer/space.stackexchange.com.jsonl.gz",
229 "lines": 12893,
230 "weight": 2
231 },
232 {
233 "name": "stackexchange_TitleBody_Answer/quant.stackexchange.com.jsonl.gz",
234 "lines": 12933,
235 "weight": 2
236 },
237 {
238 "name": "stackexchange_Title_Answer/quant.stackexchange.com.jsonl.gz",
239 "lines": 12933,
240 "weight": 2
241 },
242 {
243 "name": "stackexchange_TitleBody_Answer/philosophy.stackexchange.com.jsonl.gz",
244 "lines": 13114,
245 "weight": 2
246 },
247 {
248 "name": "stackexchange_Title_Answer/philosophy.stackexchange.com.jsonl.gz",
249 "lines": 13114,
250 "weight": 2
251 },
252 {
253 "name": "stackexchange_TitleBody_Answer/gardening.stackexchange.com.jsonl.gz",
254 "lines": 13246,
255 "weight": 2
256 },
257 {
258 "name": "stackexchange_Title_Answer/gardening.stackexchange.com.jsonl.gz",
259 "lines": 13246,
260 "weight": 2
261 },
262 {
263 "name": "stackexchange_title_body/hinduism.stackexchange.com.jsonl.gz",
264 "lines": 13450,
265 "weight": 2
266 },
267 {
268 "name": "stackexchange_title_body/networkengineering.stackexchange.com.jsonl.gz",
269 "lines": 13454,
270 "weight": 2
271 },
272 {
273 "name": "stackexchange_TitleBody_Answer/german.stackexchange.com.jsonl.gz",
274 "lines": 13733,
275 "weight": 2
276 },
277 {
278 "name": "stackexchange_Title_Answer/german.stackexchange.com.jsonl.gz",
279 "lines": 13733,
280 "weight": 2
281 },
282 {
283 "name": "stackexchange_title_body/german.stackexchange.com.jsonl.gz",
284 "lines": 13950,
285 "weight": 2
286 },
287 {
288 "name": "stackexchange_title_body/philosophy.stackexchange.com.jsonl.gz",
289 "lines": 14829,
290 "weight": 2
291 },
292 {
293 "name": "stackexchange_title_body/gardening.stackexchange.com.jsonl.gz",
294 "lines": 15136,
295 "weight": 2
296 },
297 {
298 "name": "stackexchange_title_body/space.stackexchange.com.jsonl.gz",
299 "lines": 15142,
300 "weight": 2
301 },
302 {
303 "name": "stackexchange_TitleBody_Answer/bicycles.stackexchange.com.jsonl.gz",
304 "lines": 15708,
305 "weight": 2
306 },
307 {
308 "name": "stackexchange_Title_Answer/bicycles.stackexchange.com.jsonl.gz",
309 "lines": 15708,
310 "weight": 2
311 },
312 {
313 "name": "stackexchange_TitleBody_Answer/law.stackexchange.com.jsonl.gz",
314 "lines": 16133,
315 "weight": 2
316 },
317 {
318 "name": "stackexchange_Title_Answer/law.stackexchange.com.jsonl.gz",
319 "lines": 16133,
320 "weight": 2
321 },
322 {
323 "name": "stackexchange_TitleBody_Answer/arduino.stackexchange.com.jsonl.gz",
324 "lines": 16281,
325 "weight": 2
326 },
327 {
328 "name": "stackexchange_Title_Answer/arduino.stackexchange.com.jsonl.gz",
329 "lines": 16281,
330 "weight": 2
331 },
332 {
333 "name": "stackexchange_title_body/bicycles.stackexchange.com.jsonl.gz",
334 "lines": 16353,
335 "weight": 2
336 },
337 {
338 "name": "stackexchange_TitleBody_Answer/emacs.stackexchange.com.jsonl.gz",
339 "lines": 16830,
340 "weight": 2
341 },
342 {
343 "name": "stackexchange_Title_Answer/emacs.stackexchange.com.jsonl.gz",
344 "lines": 16830,
345 "weight": 2
346 },
347 {
348 "name": "stackexchange_title_body/quant.stackexchange.com.jsonl.gz",
349 "lines": 17261,
350 "weight": 2
351 },
352 {
353 "name": "stackexchange_TitleBody_Answer/dsp.stackexchange.com.jsonl.gz",
354 "lines": 17430,
355 "weight": 2
356 },
357 {
358 "name": "stackexchange_Title_Answer/dsp.stackexchange.com.jsonl.gz",
359 "lines": 17430,
360 "weight": 2
361 },
362 {
363 "name": "stackexchange_TitleBody_Answer/puzzling.stackexchange.com.jsonl.gz",
364 "lines": 17448,
365 "weight": 2
366 },
367 {
368 "name": "stackexchange_Title_Answer/puzzling.stackexchange.com.jsonl.gz",
369 "lines": 17448,
370 "weight": 2
371 },
372 {
373 "name": "stackexchange_title_body/puzzling.stackexchange.com.jsonl.gz",
374 "lines": 17851,
375 "weight": 2
376 },
377 {
378 "name": "stackexchange_title_body/law.stackexchange.com.jsonl.gz",
379 "lines": 17941,
380 "weight": 2
381 },
382 {
383 "name": "stackexchange_TitleBody_Answer/movies.stackexchange.com.jsonl.gz",
384 "lines": 18243,
385 "weight": 2
386 },
387 {
388 "name": "stackexchange_Title_Answer/movies.stackexchange.com.jsonl.gz",
389 "lines": 18243,
390 "weight": 2
391 },
392 {
393 "name": "stackexchange_TitleBody_Answer/mechanics.stackexchange.com.jsonl.gz",
394 "lines": 18613,
395 "weight": 2
396 },
397 {
398 "name": "stackexchange_Title_Answer/mechanics.stackexchange.com.jsonl.gz",
399 "lines": 18613,
400 "weight": 2
401 },
402 {
403 "name": "stackexchange_TitleBody_Answer/aviation.stackexchange.com.jsonl.gz",
404 "lines": 18755,
405 "weight": 2
406 },
407 {
408 "name": "stackexchange_Title_Answer/aviation.stackexchange.com.jsonl.gz",
409 "lines": 18755,
410 "weight": 2
411 },
412 {
413 "name": "stackexchange_TitleBody_Answer/biology.stackexchange.com.jsonl.gz",
414 "lines": 19277,
415 "weight": 2
416 },
417 {
418 "name": "stackexchange_Title_Answer/biology.stackexchange.com.jsonl.gz",
419 "lines": 19277,
420 "weight": 2
421 },
422 {
423 "name": "stackexchange_TitleBody_Answer/crypto.stackexchange.com.jsonl.gz",
424 "lines": 19404,
425 "weight": 2
426 },
427 {
428 "name": "stackexchange_Title_Answer/crypto.stackexchange.com.jsonl.gz",
429 "lines": 19404,
430 "weight": 2
431 },
432 {
433 "name": "stackexchange_title_body/arduino.stackexchange.com.jsonl.gz",
434 "lines": 19553,
435 "weight": 2
436 },
437 {
438 "name": "stackexchange_TitleBody_Answer/music.stackexchange.com.jsonl.gz",
439 "lines": 19936,
440 "weight": 2
441 },
442 {
443 "name": "stackexchange_Title_Answer/music.stackexchange.com.jsonl.gz",
444 "lines": 19936,
445 "weight": 2
446 },
447 {
448 "name": "stackexchange_title_body/aviation.stackexchange.com.jsonl.gz",
449 "lines": 20139,
450 "weight": 2
451 },
452 {
453 "name": "stackexchange_title_body/softwarerecs.stackexchange.com.jsonl.gz",
454 "lines": 20142,
455 "weight": 2
456 },
457 {
458 "name": "stackexchange_title_body/movies.stackexchange.com.jsonl.gz",
459 "lines": 20181,
460 "weight": 2
461 },
462 {
463 "name": "stackexchange_TitleBody_Answer/datascience.stackexchange.com.jsonl.gz",
464 "lines": 20503,
465 "weight": 2
466 },
467 {
468 "name": "stackexchange_Title_Answer/datascience.stackexchange.com.jsonl.gz",
469 "lines": 20503,
470 "weight": 2
471 },
472 {
473 "name": "stackexchange_title_body/music.stackexchange.com.jsonl.gz",
474 "lines": 20636,
475 "weight": 2
476 },
477 {
478 "name": "stackexchange_TitleBody_Answer/japanese.stackexchange.com.jsonl.gz",
479 "lines": 20948,
480 "weight": 2
481 },
482 {
483 "name": "stackexchange_Title_Answer/japanese.stackexchange.com.jsonl.gz",
484 "lines": 20948,
485 "weight": 2
486 },
487 {
488 "name": "stackexchange_title_body/emacs.stackexchange.com.jsonl.gz",
489 "lines": 21055,
490 "weight": 2
491 },
492 {
493 "name": "stackexchange_title_body/dsp.stackexchange.com.jsonl.gz",
494 "lines": 21252,
495 "weight": 2
496 },
497 {
498 "name": "stackexchange_title_body/japanese.stackexchange.com.jsonl.gz",
499 "lines": 22056,
500 "weight": 2
501 },
502 {
503 "name": "stackexchange_TitleBody_Answer/bitcoin.stackexchange.com.jsonl.gz",
504 "lines": 22474,
505 "weight": 2
506 },
507 {
508 "name": "stackexchange_Title_Answer/bitcoin.stackexchange.com.jsonl.gz",
509 "lines": 22474,
510 "weight": 2
511 },
512 {
513 "name": "stackexchange_TitleBody_Answer/cooking.stackexchange.com.jsonl.gz",
514 "lines": 22641,
515 "weight": 2
516 },
517 {
518 "name": "stackexchange_Title_Answer/cooking.stackexchange.com.jsonl.gz",
519 "lines": 22641,
520 "weight": 2
521 },
522 {
523 "name": "stackexchange_title_body/mechanics.stackexchange.com.jsonl.gz",
524 "lines": 22868,
525 "weight": 2
526 },
527 {
528 "name": "stackexchange_TitleBody_Answer/photo.stackexchange.com.jsonl.gz",
529 "lines": 23204,
530 "weight": 2
531 },
532 {
533 "name": "stackexchange_Title_Answer/photo.stackexchange.com.jsonl.gz",
534 "lines": 23204,
535 "weight": 2
536 },
537 {
538 "name": "stackexchange_title_body/crypto.stackexchange.com.jsonl.gz",
539 "lines": 23231,
540 "weight": 2
541 },
542 {
543 "name": "stackexchange_title_body/cooking.stackexchange.com.jsonl.gz",
544 "lines": 23705,
545 "weight": 2
546 },
547 {
548 "name": "stackexchange_title_body/photo.stackexchange.com.jsonl.gz",
549 "lines": 23753,
550 "weight": 2
551 },
552 {
553 "name": "stackexchange_TitleBody_Answer/workplace.stackexchange.com.jsonl.gz",
554 "lines": 24012,
555 "weight": 2
556 },
557 {
558 "name": "stackexchange_Title_Answer/workplace.stackexchange.com.jsonl.gz",
559 "lines": 24012,
560 "weight": 2
561 },
562 {
563 "name": "stackexchange_TitleBody_Answer/meta.stackoverflow.com.jsonl.gz",
564 "lines": 24044,
565 "weight": 2
566 },
567 {
568 "name": "stackexchange_Title_Answer/meta.stackoverflow.com.jsonl.gz",
569 "lines": 24044,
570 "weight": 2
571 },
572 {
573 "name": "stackexchange_TitleBody_Answer/raspberrypi.stackexchange.com.jsonl.gz",
574 "lines": 24143,
575 "weight": 2
576 },
577 {
578 "name": "stackexchange_Title_Answer/raspberrypi.stackexchange.com.jsonl.gz",
579 "lines": 24143,
580 "weight": 2
581 },
582 {
583 "name": "stackexchange_title_body/workplace.stackexchange.com.jsonl.gz",
584 "lines": 24189,
585 "weight": 2
586 },
587 {
588 "name": "stackexchange_title_body/biology.stackexchange.com.jsonl.gz",
589 "lines": 24447,
590 "weight": 3
591 },
592 {
593 "name": "stackexchange_TitleBody_Answer/webapps.stackexchange.com.jsonl.gz",
594 "lines": 24867,
595 "weight": 3
596 },
597 {
598 "name": "stackexchange_Title_Answer/webapps.stackexchange.com.jsonl.gz",
599 "lines": 24867,
600 "weight": 3
601 },
602 {
603 "name": "stackexchange_title_body/bitcoin.stackexchange.com.jsonl.gz",
604 "lines": 25374,
605 "weight": 3
606 },
607 {
608 "name": "stackexchange_TitleBody_Answer/judaism.stackexchange.com.jsonl.gz",
609 "lines": 26085,
610 "weight": 3
611 },
612 {
613 "name": "stackexchange_Title_Answer/judaism.stackexchange.com.jsonl.gz",
614 "lines": 26085,
615 "weight": 3
616 },
617 {
618 "name": "stackexchange_TitleBody_Answer/ethereum.stackexchange.com.jsonl.gz",
619 "lines": 26124,
620 "weight": 3
621 },
622 {
623 "name": "stackexchange_Title_Answer/ethereum.stackexchange.com.jsonl.gz",
624 "lines": 26124,
625 "weight": 3
626 },
627 {
628 "name": "stackexchange_TitleBody_Answer/worldbuilding.stackexchange.com.jsonl.gz",
629 "lines": 26210,
630 "weight": 3
631 },
632 {
633 "name": "stackexchange_Title_Answer/worldbuilding.stackexchange.com.jsonl.gz",
634 "lines": 26210,
635 "weight": 3
636 },
637 {
638 "name": "stackexchange_title_body/worldbuilding.stackexchange.com.jsonl.gz",
639 "lines": 26763,
640 "weight": 3
641 },
642 {
643 "name": "stackexchange_TitleBody_Answer/chemistry.stackexchange.com.jsonl.gz",
644 "lines": 27061,
645 "weight": 3
646 },
647 {
648 "name": "stackexchange_Title_Answer/chemistry.stackexchange.com.jsonl.gz",
649 "lines": 27061,
650 "weight": 3
651 },
652 {
653 "name": "stackexchange_title_body/datascience.stackexchange.com.jsonl.gz",
654 "lines": 27397,
655 "weight": 3
656 },
657 {
658 "name": "stackexchange_TitleBody_Answer/graphicdesign.stackexchange.com.jsonl.gz",
659 "lines": 28083,
660 "weight": 3
661 },
662 {
663 "name": "stackexchange_Title_Answer/graphicdesign.stackexchange.com.jsonl.gz",
664 "lines": 28083,
665 "weight": 3
666 },
667 {
668 "name": "stackexchange_TitleBody_Answer/ux.stackexchange.com.jsonl.gz",
669 "lines": 28901,
670 "weight": 3
671 },
672 {
673 "name": "stackexchange_Title_Answer/ux.stackexchange.com.jsonl.gz",
674 "lines": 28901,
675 "weight": 3
676 },
677 {
678 "name": "stackexchange_title_body/ux.stackexchange.com.jsonl.gz",
679 "lines": 29403,
680 "weight": 3
681 },
682 {
683 "name": "stackexchange_TitleBody_Answer/money.stackexchange.com.jsonl.gz",
684 "lines": 29404,
685 "weight": 3
686 },
687 {
688 "name": "stackexchange_Title_Answer/money.stackexchange.com.jsonl.gz",
689 "lines": 29404,
690 "weight": 3
691 },
692 {
693 "name": "stackexchange_title_body/webapps.stackexchange.com.jsonl.gz",
694 "lines": 29697,
695 "weight": 3
696 },
697 {
698 "name": "stackexchange_TitleBody_Answer/cs.stackexchange.com.jsonl.gz",
699 "lines": 30010,
700 "weight": 3
701 },
702 {
703 "name": "stackexchange_Title_Answer/cs.stackexchange.com.jsonl.gz",
704 "lines": 30010,
705 "weight": 3
706 },
707 {
708 "name": "stackexchange_title_body/graphicdesign.stackexchange.com.jsonl.gz",
709 "lines": 30233,
710 "weight": 3
711 },
712 {
713 "name": "stackexchange_TitleBody_Answer/webmasters.stackexchange.com.jsonl.gz",
714 "lines": 30370,
715 "weight": 3
716 },
717 {
718 "name": "stackexchange_Title_Answer/webmasters.stackexchange.com.jsonl.gz",
719 "lines": 30370,
720 "weight": 3
721 },
722 {
723 "name": "stackexchange_title_body/raspberrypi.stackexchange.com.jsonl.gz",
724 "lines": 30625,
725 "weight": 3
726 },
727 {
728 "name": "stackexchange_title_body/money.stackexchange.com.jsonl.gz",
729 "lines": 32021,
730 "weight": 3
731 },
732 {
733 "name": "stackexchange_title_body/judaism.stackexchange.com.jsonl.gz",
734 "lines": 32028,
735 "weight": 3
736 },
737 {
738 "name": "stackexchange_TitleBody_Answer/academia.stackexchange.com.jsonl.gz",
739 "lines": 32137,
740 "weight": 3
741 },
742 {
743 "name": "stackexchange_Title_Answer/academia.stackexchange.com.jsonl.gz",
744 "lines": 32137,
745 "weight": 3
746 },
747 {
748 "name": "stackexchange_title_body/ethereum.stackexchange.com.jsonl.gz",
749 "lines": 32760,
750 "weight": 3
751 },
752 {
753 "name": "stackexchange_title_body/academia.stackexchange.com.jsonl.gz",
754 "lines": 34331,
755 "weight": 3
756 },
757 {
758 "name": "stackexchange_title_body/chemistry.stackexchange.com.jsonl.gz",
759 "lines": 34506,
760 "weight": 3
761 },
762 {
763 "name": "stackexchange_title_body/webmasters.stackexchange.com.jsonl.gz",
764 "lines": 34559,
765 "weight": 3
766 },
767 {
768 "name": "stackexchange_title_body/meta.stackoverflow.com.jsonl.gz",
769 "lines": 36456,
770 "weight": 3
771 },
772 {
773 "name": "stackexchange_TitleBody_Answer/travel.stackexchange.com.jsonl.gz",
774 "lines": 36533,
775 "weight": 4
776 },
777 {
778 "name": "stackexchange_Title_Answer/travel.stackexchange.com.jsonl.gz",
779 "lines": 36533,
780 "weight": 4
781 },
782 {
783 "name": "stackexchange_TitleBody_Answer/android.stackexchange.com.jsonl.gz",
784 "lines": 38077,
785 "weight": 4
786 },
787 {
788 "name": "stackexchange_Title_Answer/android.stackexchange.com.jsonl.gz",
789 "lines": 38077,
790 "weight": 4
791 },
792 {
793 "name": "stackexchange_title_body/cs.stackexchange.com.jsonl.gz",
794 "lines": 38314,
795 "weight": 4
796 },
797 {
798 "name": "stackexchange_TitleBody_Answer/gamedev.stackexchange.com.jsonl.gz",
799 "lines": 40154,
800 "weight": 4
801 },
802 {
803 "name": "stackexchange_Title_Answer/gamedev.stackexchange.com.jsonl.gz",
804 "lines": 40154,
805 "weight": 4
806 },
807 {
808 "name": "stackexchange_TitleBody_Answer/rpg.stackexchange.com.jsonl.gz",
809 "lines": 40435,
810 "weight": 4
811 },
812 {
813 "name": "stackexchange_Title_Answer/rpg.stackexchange.com.jsonl.gz",
814 "lines": 40435,
815 "weight": 4
816 },
817 {
818 "name": "stackexchange_title_body/travel.stackexchange.com.jsonl.gz",
819 "lines": 41227,
820 "weight": 4
821 },
822 {
823 "name": "stackexchange_TitleBody_Answer/codereview.stackexchange.com.jsonl.gz",
824 "lines": 41748,
825 "weight": 4
826 },
827 {
828 "name": "stackexchange_Title_Answer/codereview.stackexchange.com.jsonl.gz",
829 "lines": 41748,
830 "weight": 4
831 },
832 {
833 "name": "stackexchange_title_body/rpg.stackexchange.com.jsonl.gz",
834 "lines": 42303,
835 "weight": 4
836 },
837 {
838 "name": "stackexchange_title_body/codereview.stackexchange.com.jsonl.gz",
839 "lines": 45765,
840 "weight": 4
841 },
842 {
843 "name": "stackexchange_title_body/gamedev.stackexchange.com.jsonl.gz",
844 "lines": 46485,
845 "weight": 4
846 },
847 {
848 "name": "stackexchange_TitleBody_Answer/softwareengineering.stackexchange.com.jsonl.gz",
849 "lines": 51326,
850 "weight": 5
851 },
852 {
853 "name": "stackexchange_Title_Answer/softwareengineering.stackexchange.com.jsonl.gz",
854 "lines": 51326,
855 "weight": 5
856 },
857 {
858 "name": "stackexchange_TitleBody_Answer/security.stackexchange.com.jsonl.gz",
859 "lines": 51355,
860 "weight": 5
861 },
862 {
863 "name": "stackexchange_Title_Answer/security.stackexchange.com.jsonl.gz",
864 "lines": 51355,
865 "weight": 5
866 },
867 {
868 "name": "stackexchange_title_body/android.stackexchange.com.jsonl.gz",
869 "lines": 51608,
870 "weight": 5
871 },
872 {
873 "name": "stackexchange_TitleBody_Answer/diy.stackexchange.com.jsonl.gz",
874 "lines": 52896,
875 "weight": 5
876 },
877 {
878 "name": "stackexchange_Title_Answer/diy.stackexchange.com.jsonl.gz",
879 "lines": 52896,
880 "weight": 5
881 },
882 {
883 "name": "stackexchange_title_body/softwareengineering.stackexchange.com.jsonl.gz",
884 "lines": 53942,
885 "weight": 5
886 },
887 {
888 "name": "stackexchange_TitleBody_Answer/blender.stackexchange.com.jsonl.gz",
889 "lines": 54153,
890 "weight": 5
891 },
892 {
893 "name": "stackexchange_Title_Answer/blender.stackexchange.com.jsonl.gz",
894 "lines": 54153,
895 "weight": 5
896 },
897 {
898 "name": "stackexchange_TitleBody_Answer/scifi.stackexchange.com.jsonl.gz",
899 "lines": 54805,
900 "weight": 5
901 },
902 {
903 "name": "stackexchange_Title_Answer/scifi.stackexchange.com.jsonl.gz",
904 "lines": 54805,
905 "weight": 5
906 },
907 {
908 "name": "stackexchange_title_body/security.stackexchange.com.jsonl.gz",
909 "lines": 58000,
910 "weight": 5
911 },
912 {
913 "name": "stackexchange_TitleBody_Answer/mathematica.stackexchange.com.jsonl.gz",
914 "lines": 59895,
915 "weight": 5
916 },
917 {
918 "name": "stackexchange_Title_Answer/mathematica.stackexchange.com.jsonl.gz",
919 "lines": 59895,
920 "weight": 5
921 },
922 {
923 "name": "stackexchange_title_body/diy.stackexchange.com.jsonl.gz",
924 "lines": 60083,
925 "weight": 5
926 },
927 {
928 "name": "stackexchange_TitleBody_Answer/meta.stackexchange.com.jsonl.gz",
929 "lines": 60744,
930 "weight": 5
931 },
932 {
933 "name": "stackexchange_Title_Answer/meta.stackexchange.com.jsonl.gz",
934 "lines": 60744,
935 "weight": 5
936 },
937 {
938 "name": "stackexchange_title_body/scifi.stackexchange.com.jsonl.gz",
939 "lines": 61528,
940 "weight": 6
941 },
942 {
943 "name": "stackexchange_TitleBody_Answer/drupal.stackexchange.com.jsonl.gz",
944 "lines": 67817,
945 "weight": 6
946 },
947 {
948 "name": "stackexchange_Title_Answer/drupal.stackexchange.com.jsonl.gz",
949 "lines": 67817,
950 "weight": 6
951 },
952 {
953 "name": "stackexchange_TitleBody_Answer/dba.stackexchange.com.jsonl.gz",
954 "lines": 71449,
955 "weight": 6
956 },
957 {
958 "name": "stackexchange_Title_Answer/dba.stackexchange.com.jsonl.gz",
959 "lines": 71449,
960 "weight": 6
961 },
962 {
963 "name": "stackexchange_title_body/mathematica.stackexchange.com.jsonl.gz",
964 "lines": 73131,
965 "weight": 7
966 },
967 {
968 "name": "stackexchange_TitleBody_Answer/ell.stackexchange.com.jsonl.gz",
969 "lines": 77892,
970 "weight": 7
971 },
972 {
973 "name": "stackexchange_Title_Answer/ell.stackexchange.com.jsonl.gz",
974 "lines": 77892,
975 "weight": 7
976 },
977 {
978 "name": "stackexchange_TitleBody_Answer/magento.stackexchange.com.jsonl.gz",
979 "lines": 79241,
980 "weight": 7
981 },
982 {
983 "name": "stackexchange_Title_Answer/magento.stackexchange.com.jsonl.gz",
984 "lines": 79241,
985 "weight": 7
986 },
987 {
988 "name": "stackexchange_title_body/drupal.stackexchange.com.jsonl.gz",
989 "lines": 79717,
990 "weight": 7
991 },
992 {
993 "name": "stackexchange_TitleBody_Answer/sharepoint.stackexchange.com.jsonl.gz",
994 "lines": 80420,
995 "weight": 7
996 },
997 {
998 "name": "stackexchange_Title_Answer/sharepoint.stackexchange.com.jsonl.gz",
999 "lines": 80420,
1000 "weight": 7
1001 },
1002 {
1003 "name": "stackexchange_title_body/blender.stackexchange.com.jsonl.gz",
1004 "lines": 80766,
1005 "weight": 7
1006 },
1007 {
1008 "name": "stackexchange_title_body/dba.stackexchange.com.jsonl.gz",
1009 "lines": 81871,
1010 "weight": 7
1011 },
1012 {
1013 "name": "stackexchange_TitleBody_Answer/gaming.stackexchange.com.jsonl.gz",
1014 "lines": 82887,
1015 "weight": 7
1016 },
1017 {
1018 "name": "stackexchange_Title_Answer/gaming.stackexchange.com.jsonl.gz",
1019 "lines": 82887,
1020 "weight": 7
1021 },
1022 {
1023 "name": "stackexchange_title_body/ell.stackexchange.com.jsonl.gz",
1024 "lines": 83271,
1025 "weight": 7
1026 },
1027 {
1028 "name": "stackexchange_title_body/meta.stackexchange.com.jsonl.gz",
1029 "lines": 83510,
1030 "weight": 7
1031 },
1032 {
1033 "name": "stackexchange_TitleBody_Answer/wordpress.stackexchange.com.jsonl.gz",
1034 "lines": 83621,
1035 "weight": 7
1036 },
1037 {
1038 "name": "stackexchange_Title_Answer/wordpress.stackexchange.com.jsonl.gz",
1039 "lines": 83621,
1040 "weight": 7
1041 },
1042 {
1043 "name": "stackexchange_TitleBody_Answer/mathoverflow.net.jsonl.gz",
1044 "lines": 85289,
1045 "weight": 8
1046 },
1047 {
1048 "name": "stackexchange_Title_Answer/mathoverflow.net.jsonl.gz",
1049 "lines": 85289,
1050 "weight": 8
1051 },
1052 {
1053 "name": "stackexchange_TitleBody_Answer/salesforce.stackexchange.com.jsonl.gz",
1054 "lines": 87272,
1055 "weight": 8
1056 },
1057 {
1058 "name": "stackexchange_Title_Answer/salesforce.stackexchange.com.jsonl.gz",
1059 "lines": 87272,
1060 "weight": 8
1061 },
1062 {
1063 "name": "stackexchange_title_body/gaming.stackexchange.com.jsonl.gz",
1064 "lines": 88912,
1065 "weight": 8
1066 },
1067 {
1068 "name": "stackexchange_TitleBody_Answer/apple.stackexchange.com.jsonl.gz",
1069 "lines": 92487,
1070 "weight": 8
1071 },
1072 {
1073 "name": "stackexchange_Title_Answer/apple.stackexchange.com.jsonl.gz",
1074 "lines": 92487,
1075 "weight": 8
1076 },
1077 {
1078 "name": "stackexchange_title_body/sharepoint.stackexchange.com.jsonl.gz",
1079 "lines": 94011,
1080 "weight": 8
1081 },
1082 {
1083 "name": "stackexchange_title_body/magento.stackexchange.com.jsonl.gz",
1084 "lines": 99991,
1085 "weight": 9
1086 },
1087 {
1088 "name": "stackexchange_TitleBody_Answer/gis.stackexchange.com.jsonl.gz",
1089 "lines": 100254,
1090 "weight": 9
1091 },
1092 {
1093 "name": "stackexchange_Title_Answer/gis.stackexchange.com.jsonl.gz",
1094 "lines": 100254,
1095 "weight": 9
1096 },
1097 {
1098 "name": "stackexchange_title_body/wordpress.stackexchange.com.jsonl.gz",
1099 "lines": 100474,
1100 "weight": 9
1101 },
1102 {
1103 "name": "stackexchange_TitleBody_Answer/english.stackexchange.com.jsonl.gz",
1104 "lines": 100640,
1105 "weight": 9
1106 },
1107 {
1108 "name": "stackexchange_Title_Answer/english.stackexchange.com.jsonl.gz",
1109 "lines": 100640,
1110 "weight": 9
1111 },
1112 {
1113 "name": "stackexchange_title_body/salesforce.stackexchange.com.jsonl.gz",
1114 "lines": 105260,
1115 "weight": 9
1116 },
1117 {
1118 "name": "stackexchange_title_body/english.stackexchange.com.jsonl.gz",
1119 "lines": 109522,
1120 "weight": 10
1121 },
1122 {
1123 "name": "stackexchange_title_body/apple.stackexchange.com.jsonl.gz",
1124 "lines": 110622,
1125 "weight": 10
1126 },
1127 {
1128 "name": "stackexchange_TitleBody_Answer/stats.stackexchange.com.jsonl.gz",
1129 "lines": 115679,
1130 "weight": 10
1131 },
1132 {
1133 "name": "stackexchange_Title_Answer/stats.stackexchange.com.jsonl.gz",
1134 "lines": 115679,
1135 "weight": 10
1136 },
1137 {
1138 "name": "stackexchange_title_body/mathoverflow.net.jsonl.gz",
1139 "lines": 120851,
1140 "weight": 10
1141 },
1142 {
1143 "name": "stackexchange_TitleBody_Answer/electronics.stackexchange.com.jsonl.gz",
1144 "lines": 129494,
1145 "weight": 11
1146 },
1147 {
1148 "name": "stackexchange_Title_Answer/electronics.stackexchange.com.jsonl.gz",
1149 "lines": 129494,
1150 "weight": 11
1151 },
1152 {
1153 "name": "stackexchange_title_body/gis.stackexchange.com.jsonl.gz",
1154 "lines": 131000,
1155 "weight": 11
1156 },
1157 {
1158 "name": "stackexchange_TitleBody_Answer/physics.stackexchange.com.jsonl.gz",
1159 "lines": 141230,
1160 "weight": 12
1161 },
1162 {
1163 "name": "stackexchange_Title_Answer/physics.stackexchange.com.jsonl.gz",
1164 "lines": 141230,
1165 "weight": 12
1166 },
1167 {
1168 "name": "stackexchange_title_body/electronics.stackexchange.com.jsonl.gz",
1169 "lines": 143582,
1170 "weight": 12
1171 },
1172 {
1173 "name": "stackexchange_TitleBody_Answer/unix.stackexchange.com.jsonl.gz",
1174 "lines": 155414,
1175 "weight": 13
1176 },
1177 {
1178 "name": "stackexchange_Title_Answer/unix.stackexchange.com.jsonl.gz",
1179 "lines": 155414,
1180 "weight": 13
1181 },
1182 {
1183 "name": "stackexchange_TitleBody_Answer/tex.stackexchange.com.jsonl.gz",
1184 "lines": 171628,
1185 "weight": 15
1186 },
1187 {
1188 "name": "stackexchange_Title_Answer/tex.stackexchange.com.jsonl.gz",
1189 "lines": 171628,
1190 "weight": 15
1191 },
1192 {
1193 "name": "stackexchange_title_body/physics.stackexchange.com.jsonl.gz",
1194 "lines": 173307,
1195 "weight": 15
1196 },
1197 {
1198 "name": "stackexchange_title_body/stats.stackexchange.com.jsonl.gz",
1199 "lines": 173466,
1200 "weight": 15
1201 },
1202 {
1203 "name": "stackexchange_title_body/unix.stackexchange.com.jsonl.gz",
1204 "lines": 185997,
1205 "weight": 16
1206 },
1207 {
1208 "name": "stackexchange_title_body/tex.stackexchange.com.jsonl.gz",
1209 "lines": 202954,
1210 "weight": 17
1211 },
1212 {
1213 "name": "TriviaQA_pairs.jsonl.gz",
1214 "lines": 73346,
1215 "weight": 19
1216 },
1217 {
1218 "name": "stackexchange_TitleBody_Answer/serverfault.com.jsonl.gz",
1219 "lines": 238507,
1220 "weight": 20
1221 },
1222 {
1223 "name": "stackexchange_Title_Answer/serverfault.com.jsonl.gz",
1224 "lines": 238507,
1225 "weight": 20
1226 },
1227 {
1228 "name": "stackexchange_duplicate_questions_title-body_title-body.jsonl.gz",
1229 "lines": 250460,
1230 "weight": 21
1231 },
1232 {
1233 "name": "stackexchange_duplicate_questions_body_body.jsonl.gz",
1234 "lines": 250519,
1235 "weight": 21
1236 },
1237 {
1238 "name": "squad_pairs.jsonl.gz",
1239 "lines": 87599,
1240 "weight": 22
1241 },
1242 {
1243 "name": "stackexchange_TitleBody_Answer/askubuntu.com.jsonl.gz",
1244 "lines": 267135,
1245 "weight": 22
1246 },
1247 {
1248 "name": "stackexchange_Title_Answer/askubuntu.com.jsonl.gz",
1249 "lines": 267135,
1250 "weight": 22
1251 },
1252 {
1253 "name": "stackexchange_title_body/serverfault.com.jsonl.gz",
1254 "lines": 270904,
1255 "weight": 23
1256 },
1257 {
1258 "name": "NQ-train_pairs.jsonl.gz",
1259 "lines": 100231,
1260 "weight": 25
1261 },
1262 {
1263 "name": "SimpleWiki.jsonl.gz",
1264 "lines": 102225,
1265 "weight": 26
1266 },
1267 {
1268 "name": "quora_duplicates_triplets.jsonl.gz",
1269 "lines": 103663,
1270 "weight": 26
1271 },
1272 {
1273 "name": "stackexchange_duplicate_questions_title_title.jsonl.gz",
1274 "lines": 304525,
1275 "weight": 26
1276 },
1277 {
1278 "name": "altlex.jsonl.gz",
1279 "lines": 112696,
1280 "weight": 28
1281 },
1282 {
1283 "name": "stackexchange_title_body/askubuntu.com.jsonl.gz",
1284 "lines": 347925,
1285 "weight": 29
1286 },
1287 {
1288 "name": "stackexchange_TitleBody_Answer/superuser.com.jsonl.gz",
1289 "lines": 352610,
1290 "weight": 30
1291 },
1292 {
1293 "name": "stackexchange_Title_Answer/superuser.com.jsonl.gz",
1294 "lines": 352610,
1295 "weight": 30
1296 },
1297 {
1298 "name": "wikihow.jsonl.gz",
1299 "lines": 128542,
1300 "weight": 32
1301 },
1302 {
1303 "name": "stackexchange_title_body/superuser.com.jsonl.gz",
1304 "lines": 435463,
1305 "weight": 36
1306 },
1307 {
1308 "name": "stackexchange_title_body/small_stackexchanges.jsonl.gz",
1309 "lines": 448146,
1310 "weight": 37
1311 },
1312 {
1313 "name": "stackexchange_TitleBody_Answer/small_stackexchanges.jsonl.gz",
1314 "lines": 460256,
1315 "weight": 38
1316 },
1317 {
1318 "name": "stackexchange_Title_Answer/small_stackexchanges.jsonl.gz",
1319 "lines": 460256,
1320 "weight": 38
1321 },
1322 {
1323 "name": "sentence-compression.jsonl.gz",
1324 "lines": 180000,
1325 "weight": 45
1326 },
1327 {
1328 "name": "AllNLI.jsonl.gz",
1329 "lines": 277230,
1330 "weight": 69
1331 },
1332 {
1333 "name": "eli5_question_answer.jsonl.gz",
1334 "lines": 325475,
1335 "weight": 81
1336 },
1337 {
1338 "name": "reddit/reddit_2015.jsonl.gz",
1339 "lines": 135108166,
1340 "weight": 82
1341 },
1342 {
1343 "name": "reddit/reddit_2016.jsonl.gz",
1344 "lines": 159164386,
1345 "weight": 82
1346 },
1347 {
1348 "name": "reddit/reddit_2017.jsonl.gz",
1349 "lines": 191485219,
1350 "weight": 82
1351 },
1352 {
1353 "name": "reddit/reddit_2018.jsonl.gz",
1354 "lines": 240726659,
1355 "weight": 82
1356 },
1357 {
1358 "name": "stackexchange_TitleBody_Answer/math.stackexchange.com.jsonl.gz",
1359 "lines": 1100953,
1360 "weight": 83
1361 },
1362 {
1363 "name": "stackexchange_Title_Answer/math.stackexchange.com.jsonl.gz",
1364 "lines": 1100953,
1365 "weight": 83
1366 },
1367 {
1368 "name": "stackexchange_title_body/math.stackexchange.com.jsonl.gz",
1369 "lines": 1338443,
1370 "weight": 83
1371 },
1372 {
1373 "name": "stackexchange_TitleBody_Answer/stackoverflow.com-Posts.jsonl.gz",
1374 "lines": 15768211,
1375 "weight": 83
1376 },
1377 {
1378 "name": "stackexchange_Title_Answer/stackoverflow.com-Posts.jsonl.gz",
1379 "lines": 15768211,
1380 "weight": 83
1381 },
1382 {
1383 "name": "stackexchange_title_body/stackoverflow.com-Posts.jsonl.gz",
1384 "lines": 18562443,
1385 "weight": 83
1386 },
1387 {
1388 "name": "specter_train_triples.jsonl.gz",
1389 "lines": 684100,
1390 "weight": 84
1391 },
1392 {
1393 "name": "S2ORC_title_abstract.jsonl.gz",
1394 "lines": 41769185,
1395 "weight": 123
1396 },
1397 {
1398 "name": "S2ORC_citation_pairs.jsonl.gz",
1399 "lines": 52603982,
1400 "weight": 123
1401 },
1402 {
1403 "name": "PAQ_pairs.jsonl.gz",
1404 "lines": 64371441,
1405 "weight": 123
1406 },
1407 {
1408 "name": "WikiAnswers_pairs.jsonl.gz",
1409 "lines": 77427422,
1410 "weight": 123
1411 },
1412 {
1413 "name": "S2ORC_citation_pairs_abstract.jsonl.gz",
1414 "lines": 116288806,
1415 "weight": 123
1416 },
1417 {
1418 "name": "searchQA_question_top5_snippets_merged.jsonl.gz",
1419 "lines": 582261,
1420 "weight": 144
1421 },
1422 {
1423 "name": "yahoo_answers_title_question.jsonl.gz",
1424 "lines": 659896,
1425 "weight": 163
1426 },
1427 {
1428 "name": "yahoo_answers_question_answer.jsonl.gz",
1429 "lines": 681164,
1430 "weight": 169
1431 },
1432 {
1433 "name": "yahoo_answers_title_answer.jsonl.gz",
1434 "lines": 1198260,
1435 "weight": 247
1436 },
1437 {
1438 "name": "amazon-qa-train-pairs.jsonl.gz",
1439 "lines": 2448839,
1440 "weight": 247
1441 },
1442 {
1443 "name": "gooaq_pairs.jsonl.gz",
1444 "lines": 3012496,
1445 "weight": 247
1446 },
1447 {
1448 "name": "msmarco-query_passage_negative.jsonl.gz",
1449 "lines": 9144553,
1450 "weight": 247
1451 }
1452 ]