tokenizer_config.json
20.3 KB · 945 lines · json Raw
1 {
2 "added_tokens_decoder": {
3 "0": {
4 "content": "|||IP_ADDRESS|||",
5 "lstrip": false,
6 "normalized": true,
7 "rstrip": false,
8 "single_word": false,
9 "special": false
10 },
11 "1": {
12 "content": "<|padding|>",
13 "lstrip": false,
14 "normalized": false,
15 "rstrip": false,
16 "single_word": false,
17 "special": true
18 },
19 "50254": {
20 "content": " ",
21 "lstrip": false,
22 "normalized": true,
23 "rstrip": false,
24 "single_word": false,
25 "special": false
26 },
27 "50255": {
28 "content": " ",
29 "lstrip": false,
30 "normalized": true,
31 "rstrip": false,
32 "single_word": false,
33 "special": false
34 },
35 "50256": {
36 "content": " ",
37 "lstrip": false,
38 "normalized": true,
39 "rstrip": false,
40 "single_word": false,
41 "special": false
42 },
43 "50257": {
44 "content": " ",
45 "lstrip": false,
46 "normalized": true,
47 "rstrip": false,
48 "single_word": false,
49 "special": false
50 },
51 "50258": {
52 "content": " ",
53 "lstrip": false,
54 "normalized": true,
55 "rstrip": false,
56 "single_word": false,
57 "special": false
58 },
59 "50259": {
60 "content": " ",
61 "lstrip": false,
62 "normalized": true,
63 "rstrip": false,
64 "single_word": false,
65 "special": false
66 },
67 "50260": {
68 "content": " ",
69 "lstrip": false,
70 "normalized": true,
71 "rstrip": false,
72 "single_word": false,
73 "special": false
74 },
75 "50261": {
76 "content": " ",
77 "lstrip": false,
78 "normalized": true,
79 "rstrip": false,
80 "single_word": false,
81 "special": false
82 },
83 "50262": {
84 "content": " ",
85 "lstrip": false,
86 "normalized": true,
87 "rstrip": false,
88 "single_word": false,
89 "special": false
90 },
91 "50263": {
92 "content": " ",
93 "lstrip": false,
94 "normalized": true,
95 "rstrip": false,
96 "single_word": false,
97 "special": false
98 },
99 "50264": {
100 "content": " ",
101 "lstrip": false,
102 "normalized": true,
103 "rstrip": false,
104 "single_word": false,
105 "special": false
106 },
107 "50265": {
108 "content": " ",
109 "lstrip": false,
110 "normalized": true,
111 "rstrip": false,
112 "single_word": false,
113 "special": false
114 },
115 "50266": {
116 "content": " ",
117 "lstrip": false,
118 "normalized": true,
119 "rstrip": false,
120 "single_word": false,
121 "special": false
122 },
123 "50267": {
124 "content": " ",
125 "lstrip": false,
126 "normalized": true,
127 "rstrip": false,
128 "single_word": false,
129 "special": false
130 },
131 "50268": {
132 "content": " ",
133 "lstrip": false,
134 "normalized": true,
135 "rstrip": false,
136 "single_word": false,
137 "special": false
138 },
139 "50269": {
140 "content": " ",
141 "lstrip": false,
142 "normalized": true,
143 "rstrip": false,
144 "single_word": false,
145 "special": false
146 },
147 "50270": {
148 "content": " ",
149 "lstrip": false,
150 "normalized": true,
151 "rstrip": false,
152 "single_word": false,
153 "special": false
154 },
155 "50271": {
156 "content": " ",
157 "lstrip": false,
158 "normalized": true,
159 "rstrip": false,
160 "single_word": false,
161 "special": false
162 },
163 "50272": {
164 "content": " ",
165 "lstrip": false,
166 "normalized": true,
167 "rstrip": false,
168 "single_word": false,
169 "special": false
170 },
171 "50273": {
172 "content": " ",
173 "lstrip": false,
174 "normalized": true,
175 "rstrip": false,
176 "single_word": false,
177 "special": false
178 },
179 "50274": {
180 "content": " ",
181 "lstrip": false,
182 "normalized": true,
183 "rstrip": false,
184 "single_word": false,
185 "special": false
186 },
187 "50275": {
188 "content": " ",
189 "lstrip": false,
190 "normalized": true,
191 "rstrip": false,
192 "single_word": false,
193 "special": false
194 },
195 "50276": {
196 "content": " ",
197 "lstrip": false,
198 "normalized": true,
199 "rstrip": false,
200 "single_word": false,
201 "special": false
202 },
203 "50277": {
204 "content": "|||EMAIL_ADDRESS|||",
205 "lstrip": false,
206 "normalized": true,
207 "rstrip": false,
208 "single_word": false,
209 "special": false
210 },
211 "50278": {
212 "content": "|||PHONE_NUMBER|||",
213 "lstrip": false,
214 "normalized": true,
215 "rstrip": false,
216 "single_word": false,
217 "special": false
218 },
219 "50279": {
220 "content": "<|endoftext|>",
221 "lstrip": false,
222 "normalized": false,
223 "rstrip": false,
224 "single_word": false,
225 "special": true
226 },
227 "50280": {
228 "content": "[UNK]",
229 "lstrip": false,
230 "normalized": false,
231 "rstrip": false,
232 "single_word": false,
233 "special": true
234 },
235 "50281": {
236 "content": "[CLS]",
237 "lstrip": false,
238 "normalized": false,
239 "rstrip": false,
240 "single_word": false,
241 "special": true
242 },
243 "50282": {
244 "content": "[SEP]",
245 "lstrip": false,
246 "normalized": false,
247 "rstrip": false,
248 "single_word": false,
249 "special": true
250 },
251 "50283": {
252 "content": "[PAD]",
253 "lstrip": false,
254 "normalized": false,
255 "rstrip": false,
256 "single_word": false,
257 "special": true
258 },
259 "50284": {
260 "content": "[MASK]",
261 "lstrip": true,
262 "normalized": false,
263 "rstrip": false,
264 "single_word": false,
265 "special": true
266 },
267 "50285": {
268 "content": "[unused0]",
269 "lstrip": false,
270 "normalized": true,
271 "rstrip": false,
272 "single_word": false,
273 "special": false
274 },
275 "50286": {
276 "content": "[unused1]",
277 "lstrip": false,
278 "normalized": true,
279 "rstrip": false,
280 "single_word": false,
281 "special": false
282 },
283 "50287": {
284 "content": "[unused2]",
285 "lstrip": false,
286 "normalized": true,
287 "rstrip": false,
288 "single_word": false,
289 "special": false
290 },
291 "50288": {
292 "content": "[unused3]",
293 "lstrip": false,
294 "normalized": true,
295 "rstrip": false,
296 "single_word": false,
297 "special": false
298 },
299 "50289": {
300 "content": "[unused4]",
301 "lstrip": false,
302 "normalized": true,
303 "rstrip": false,
304 "single_word": false,
305 "special": false
306 },
307 "50290": {
308 "content": "[unused5]",
309 "lstrip": false,
310 "normalized": true,
311 "rstrip": false,
312 "single_word": false,
313 "special": false
314 },
315 "50291": {
316 "content": "[unused6]",
317 "lstrip": false,
318 "normalized": true,
319 "rstrip": false,
320 "single_word": false,
321 "special": false
322 },
323 "50292": {
324 "content": "[unused7]",
325 "lstrip": false,
326 "normalized": true,
327 "rstrip": false,
328 "single_word": false,
329 "special": false
330 },
331 "50293": {
332 "content": "[unused8]",
333 "lstrip": false,
334 "normalized": true,
335 "rstrip": false,
336 "single_word": false,
337 "special": false
338 },
339 "50294": {
340 "content": "[unused9]",
341 "lstrip": false,
342 "normalized": true,
343 "rstrip": false,
344 "single_word": false,
345 "special": false
346 },
347 "50295": {
348 "content": "[unused10]",
349 "lstrip": false,
350 "normalized": true,
351 "rstrip": false,
352 "single_word": false,
353 "special": false
354 },
355 "50296": {
356 "content": "[unused11]",
357 "lstrip": false,
358 "normalized": true,
359 "rstrip": false,
360 "single_word": false,
361 "special": false
362 },
363 "50297": {
364 "content": "[unused12]",
365 "lstrip": false,
366 "normalized": true,
367 "rstrip": false,
368 "single_word": false,
369 "special": false
370 },
371 "50298": {
372 "content": "[unused13]",
373 "lstrip": false,
374 "normalized": true,
375 "rstrip": false,
376 "single_word": false,
377 "special": false
378 },
379 "50299": {
380 "content": "[unused14]",
381 "lstrip": false,
382 "normalized": true,
383 "rstrip": false,
384 "single_word": false,
385 "special": false
386 },
387 "50300": {
388 "content": "[unused15]",
389 "lstrip": false,
390 "normalized": true,
391 "rstrip": false,
392 "single_word": false,
393 "special": false
394 },
395 "50301": {
396 "content": "[unused16]",
397 "lstrip": false,
398 "normalized": true,
399 "rstrip": false,
400 "single_word": false,
401 "special": false
402 },
403 "50302": {
404 "content": "[unused17]",
405 "lstrip": false,
406 "normalized": true,
407 "rstrip": false,
408 "single_word": false,
409 "special": false
410 },
411 "50303": {
412 "content": "[unused18]",
413 "lstrip": false,
414 "normalized": true,
415 "rstrip": false,
416 "single_word": false,
417 "special": false
418 },
419 "50304": {
420 "content": "[unused19]",
421 "lstrip": false,
422 "normalized": true,
423 "rstrip": false,
424 "single_word": false,
425 "special": false
426 },
427 "50305": {
428 "content": "[unused20]",
429 "lstrip": false,
430 "normalized": true,
431 "rstrip": false,
432 "single_word": false,
433 "special": false
434 },
435 "50306": {
436 "content": "[unused21]",
437 "lstrip": false,
438 "normalized": true,
439 "rstrip": false,
440 "single_word": false,
441 "special": false
442 },
443 "50307": {
444 "content": "[unused22]",
445 "lstrip": false,
446 "normalized": true,
447 "rstrip": false,
448 "single_word": false,
449 "special": false
450 },
451 "50308": {
452 "content": "[unused23]",
453 "lstrip": false,
454 "normalized": true,
455 "rstrip": false,
456 "single_word": false,
457 "special": false
458 },
459 "50309": {
460 "content": "[unused24]",
461 "lstrip": false,
462 "normalized": true,
463 "rstrip": false,
464 "single_word": false,
465 "special": false
466 },
467 "50310": {
468 "content": "[unused25]",
469 "lstrip": false,
470 "normalized": true,
471 "rstrip": false,
472 "single_word": false,
473 "special": false
474 },
475 "50311": {
476 "content": "[unused26]",
477 "lstrip": false,
478 "normalized": true,
479 "rstrip": false,
480 "single_word": false,
481 "special": false
482 },
483 "50312": {
484 "content": "[unused27]",
485 "lstrip": false,
486 "normalized": true,
487 "rstrip": false,
488 "single_word": false,
489 "special": false
490 },
491 "50313": {
492 "content": "[unused28]",
493 "lstrip": false,
494 "normalized": true,
495 "rstrip": false,
496 "single_word": false,
497 "special": false
498 },
499 "50314": {
500 "content": "[unused29]",
501 "lstrip": false,
502 "normalized": true,
503 "rstrip": false,
504 "single_word": false,
505 "special": false
506 },
507 "50315": {
508 "content": "[unused30]",
509 "lstrip": false,
510 "normalized": true,
511 "rstrip": false,
512 "single_word": false,
513 "special": false
514 },
515 "50316": {
516 "content": "[unused31]",
517 "lstrip": false,
518 "normalized": true,
519 "rstrip": false,
520 "single_word": false,
521 "special": false
522 },
523 "50317": {
524 "content": "[unused32]",
525 "lstrip": false,
526 "normalized": true,
527 "rstrip": false,
528 "single_word": false,
529 "special": false
530 },
531 "50318": {
532 "content": "[unused33]",
533 "lstrip": false,
534 "normalized": true,
535 "rstrip": false,
536 "single_word": false,
537 "special": false
538 },
539 "50319": {
540 "content": "[unused34]",
541 "lstrip": false,
542 "normalized": true,
543 "rstrip": false,
544 "single_word": false,
545 "special": false
546 },
547 "50320": {
548 "content": "[unused35]",
549 "lstrip": false,
550 "normalized": true,
551 "rstrip": false,
552 "single_word": false,
553 "special": false
554 },
555 "50321": {
556 "content": "[unused36]",
557 "lstrip": false,
558 "normalized": true,
559 "rstrip": false,
560 "single_word": false,
561 "special": false
562 },
563 "50322": {
564 "content": "[unused37]",
565 "lstrip": false,
566 "normalized": true,
567 "rstrip": false,
568 "single_word": false,
569 "special": false
570 },
571 "50323": {
572 "content": "[unused38]",
573 "lstrip": false,
574 "normalized": true,
575 "rstrip": false,
576 "single_word": false,
577 "special": false
578 },
579 "50324": {
580 "content": "[unused39]",
581 "lstrip": false,
582 "normalized": true,
583 "rstrip": false,
584 "single_word": false,
585 "special": false
586 },
587 "50325": {
588 "content": "[unused40]",
589 "lstrip": false,
590 "normalized": true,
591 "rstrip": false,
592 "single_word": false,
593 "special": false
594 },
595 "50326": {
596 "content": "[unused41]",
597 "lstrip": false,
598 "normalized": true,
599 "rstrip": false,
600 "single_word": false,
601 "special": false
602 },
603 "50327": {
604 "content": "[unused42]",
605 "lstrip": false,
606 "normalized": true,
607 "rstrip": false,
608 "single_word": false,
609 "special": false
610 },
611 "50328": {
612 "content": "[unused43]",
613 "lstrip": false,
614 "normalized": true,
615 "rstrip": false,
616 "single_word": false,
617 "special": false
618 },
619 "50329": {
620 "content": "[unused44]",
621 "lstrip": false,
622 "normalized": true,
623 "rstrip": false,
624 "single_word": false,
625 "special": false
626 },
627 "50330": {
628 "content": "[unused45]",
629 "lstrip": false,
630 "normalized": true,
631 "rstrip": false,
632 "single_word": false,
633 "special": false
634 },
635 "50331": {
636 "content": "[unused46]",
637 "lstrip": false,
638 "normalized": true,
639 "rstrip": false,
640 "single_word": false,
641 "special": false
642 },
643 "50332": {
644 "content": "[unused47]",
645 "lstrip": false,
646 "normalized": true,
647 "rstrip": false,
648 "single_word": false,
649 "special": false
650 },
651 "50333": {
652 "content": "[unused48]",
653 "lstrip": false,
654 "normalized": true,
655 "rstrip": false,
656 "single_word": false,
657 "special": false
658 },
659 "50334": {
660 "content": "[unused49]",
661 "lstrip": false,
662 "normalized": true,
663 "rstrip": false,
664 "single_word": false,
665 "special": false
666 },
667 "50335": {
668 "content": "[unused50]",
669 "lstrip": false,
670 "normalized": true,
671 "rstrip": false,
672 "single_word": false,
673 "special": false
674 },
675 "50336": {
676 "content": "[unused51]",
677 "lstrip": false,
678 "normalized": true,
679 "rstrip": false,
680 "single_word": false,
681 "special": false
682 },
683 "50337": {
684 "content": "[unused52]",
685 "lstrip": false,
686 "normalized": true,
687 "rstrip": false,
688 "single_word": false,
689 "special": false
690 },
691 "50338": {
692 "content": "[unused53]",
693 "lstrip": false,
694 "normalized": true,
695 "rstrip": false,
696 "single_word": false,
697 "special": false
698 },
699 "50339": {
700 "content": "[unused54]",
701 "lstrip": false,
702 "normalized": true,
703 "rstrip": false,
704 "single_word": false,
705 "special": false
706 },
707 "50340": {
708 "content": "[unused55]",
709 "lstrip": false,
710 "normalized": true,
711 "rstrip": false,
712 "single_word": false,
713 "special": false
714 },
715 "50341": {
716 "content": "[unused56]",
717 "lstrip": false,
718 "normalized": true,
719 "rstrip": false,
720 "single_word": false,
721 "special": false
722 },
723 "50342": {
724 "content": "[unused57]",
725 "lstrip": false,
726 "normalized": true,
727 "rstrip": false,
728 "single_word": false,
729 "special": false
730 },
731 "50343": {
732 "content": "[unused58]",
733 "lstrip": false,
734 "normalized": true,
735 "rstrip": false,
736 "single_word": false,
737 "special": false
738 },
739 "50344": {
740 "content": "[unused59]",
741 "lstrip": false,
742 "normalized": true,
743 "rstrip": false,
744 "single_word": false,
745 "special": false
746 },
747 "50345": {
748 "content": "[unused60]",
749 "lstrip": false,
750 "normalized": true,
751 "rstrip": false,
752 "single_word": false,
753 "special": false
754 },
755 "50346": {
756 "content": "[unused61]",
757 "lstrip": false,
758 "normalized": true,
759 "rstrip": false,
760 "single_word": false,
761 "special": false
762 },
763 "50347": {
764 "content": "[unused62]",
765 "lstrip": false,
766 "normalized": true,
767 "rstrip": false,
768 "single_word": false,
769 "special": false
770 },
771 "50348": {
772 "content": "[unused63]",
773 "lstrip": false,
774 "normalized": true,
775 "rstrip": false,
776 "single_word": false,
777 "special": false
778 },
779 "50349": {
780 "content": "[unused64]",
781 "lstrip": false,
782 "normalized": true,
783 "rstrip": false,
784 "single_word": false,
785 "special": false
786 },
787 "50350": {
788 "content": "[unused65]",
789 "lstrip": false,
790 "normalized": true,
791 "rstrip": false,
792 "single_word": false,
793 "special": false
794 },
795 "50351": {
796 "content": "[unused66]",
797 "lstrip": false,
798 "normalized": true,
799 "rstrip": false,
800 "single_word": false,
801 "special": false
802 },
803 "50352": {
804 "content": "[unused67]",
805 "lstrip": false,
806 "normalized": true,
807 "rstrip": false,
808 "single_word": false,
809 "special": false
810 },
811 "50353": {
812 "content": "[unused68]",
813 "lstrip": false,
814 "normalized": true,
815 "rstrip": false,
816 "single_word": false,
817 "special": false
818 },
819 "50354": {
820 "content": "[unused69]",
821 "lstrip": false,
822 "normalized": true,
823 "rstrip": false,
824 "single_word": false,
825 "special": false
826 },
827 "50355": {
828 "content": "[unused70]",
829 "lstrip": false,
830 "normalized": true,
831 "rstrip": false,
832 "single_word": false,
833 "special": false
834 },
835 "50356": {
836 "content": "[unused71]",
837 "lstrip": false,
838 "normalized": true,
839 "rstrip": false,
840 "single_word": false,
841 "special": false
842 },
843 "50357": {
844 "content": "[unused72]",
845 "lstrip": false,
846 "normalized": true,
847 "rstrip": false,
848 "single_word": false,
849 "special": false
850 },
851 "50358": {
852 "content": "[unused73]",
853 "lstrip": false,
854 "normalized": true,
855 "rstrip": false,
856 "single_word": false,
857 "special": false
858 },
859 "50359": {
860 "content": "[unused74]",
861 "lstrip": false,
862 "normalized": true,
863 "rstrip": false,
864 "single_word": false,
865 "special": false
866 },
867 "50360": {
868 "content": "[unused75]",
869 "lstrip": false,
870 "normalized": true,
871 "rstrip": false,
872 "single_word": false,
873 "special": false
874 },
875 "50361": {
876 "content": "[unused76]",
877 "lstrip": false,
878 "normalized": true,
879 "rstrip": false,
880 "single_word": false,
881 "special": false
882 },
883 "50362": {
884 "content": "[unused77]",
885 "lstrip": false,
886 "normalized": true,
887 "rstrip": false,
888 "single_word": false,
889 "special": false
890 },
891 "50363": {
892 "content": "[unused78]",
893 "lstrip": false,
894 "normalized": true,
895 "rstrip": false,
896 "single_word": false,
897 "special": false
898 },
899 "50364": {
900 "content": "[unused79]",
901 "lstrip": false,
902 "normalized": true,
903 "rstrip": false,
904 "single_word": false,
905 "special": false
906 },
907 "50365": {
908 "content": "[unused80]",
909 "lstrip": false,
910 "normalized": true,
911 "rstrip": false,
912 "single_word": false,
913 "special": false
914 },
915 "50366": {
916 "content": "[unused81]",
917 "lstrip": false,
918 "normalized": true,
919 "rstrip": false,
920 "single_word": false,
921 "special": false
922 },
923 "50367": {
924 "content": "[unused82]",
925 "lstrip": false,
926 "normalized": true,
927 "rstrip": false,
928 "single_word": false,
929 "special": false
930 }
931 },
932 "clean_up_tokenization_spaces": true,
933 "cls_token": "[CLS]",
934 "mask_token": "[MASK]",
935 "model_max_length": 8192,
936 "pad_token": "[PAD]",
937 "sep_token": "[SEP]",
938 "tokenizer_class": "PreTrainedTokenizerFast",
939 "model_input_names": [
940 "input_ids",
941 "attention_mask"
942 ],
943 "unk_token": "[UNK]"
944 }
945