diff --git a/modules/charset/ambiguous.go b/modules/charset/ambiguous.go new file mode 100644 index 000000000000..9dab3b0951bf --- /dev/null +++ b/modules/charset/ambiguous.go @@ -0,0 +1,54 @@ +// This file is generated by modules/charset/ambiguous/generate.go DO NOT EDIT +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import ( + "sort" + "strings" + "unicode" + + "code.gitea.io/gitea/modules/translation" +) + +// AmbiguousTablesForLocale provides the table of ambiguous characters for this locale. +func AmbiguousTablesForLocale(locale translation.Locale) []*AmbiguousTable { + key := locale.Language() + var table *AmbiguousTable + var ok bool + for len(key) > 0 { + if table, ok = AmbiguousCharacters[key]; ok { + break + } + idx := strings.LastIndexAny(key, "-_") + if idx < 0 { + key = "" + } else { + key = key[:idx] + } + } + if table == nil { + table = AmbiguousCharacters["_default"] + } + + return []*AmbiguousTable{ + table, + AmbiguousCharacters["_common"], + } +} + +func isAmbiguous(r rune, confusableTo *rune, tables ...*AmbiguousTable) bool { + for _, table := range tables { + if !unicode.Is(table.RangeTable, r) { + continue + } + i := sort.Search(len(table.Confusable), func(i int) bool { + return table.Confusable[i] >= r + }) + (*confusableTo) = table.With[i] + return true + } + return false +} diff --git a/modules/charset/ambiguous/ambiguous.json b/modules/charset/ambiguous/ambiguous.json new file mode 100644 index 000000000000..d0f69f6ae2b1 --- /dev/null +++ b/modules/charset/ambiguous/ambiguous.json @@ -0,0 +1 @@ +"{\"_common\":[8232,32,8233,32,5760,32,8192,32,8193,32,8194,32,8195,32,8196,32,8197,32,8198,32,8200,32,8201,32,8202,32,8287,32,8199,32,8239,32,2042,95,65101,95,65102,95,65103,95,8208,45,8209,45,8210,45,65112,45,1748,45,8259,45,727,45,8722,45,10134,45,11450,45,1549,44,1643,44,8218,44,184,44,42233,44,894,59,2307,58,2691,58,1417,58,1795,58,1796,58,5868,58,65072,58,6147,58,6153,58,8282,58,1475,58,760,58,42889,58,8758,58,720,58,42237,58,451,33,11601,33,660,63,577,63,2429,63,5038,63,42731,63,119149,46,8228,46,1793,46,1794,46,42510,46,68176,46,1632,46,1776,46,42232,46,1373,96,65287,96,8219,96,8242,96,1370,96,1523,96,8175,96,65344,96,900,96,8189,96,8125,96,8127,96,8190,96,697,96,884,96,712,96,714,96,715,96,756,96,699,96,701,96,700,96,702,96,42892,96,1497,96,2036,96,2037,96,5194,96,5836,96,94033,96,94034,96,65339,91,10088,40,10098,40,12308,40,64830,40,65341,93,10089,41,10099,41,12309,41,64831,41,10100,123,119060,123,10101,125,65342,94,8270,42,1645,42,8727,42,66335,42,5941,47,8257,47,8725,47,8260,47,9585,47,10187,47,10744,47,119354,47,12755,47,12339,47,11462,47,20031,47,12035,47,65340,92,65128,92,8726,92,10189,92,10741,92,10745,92,119311,92,119355,92,12756,92,20022,92,12034,92,42872,38,708,94,710,94,5869,43,10133,43,66203,43,8249,60,10094,60,706,60,119350,60,5176,60,5810,60,5120,61,11840,61,12448,61,42239,61,8250,62,10095,62,707,62,119351,62,5171,62,94015,62,8275,126,732,126,8128,126,8764,126,65372,124,65293,45,120784,50,120794,50,120804,50,120814,50,120824,50,130034,50,42842,50,423,50,1000,50,42564,50,5311,50,42735,50,119302,51,120785,51,120795,51,120805,51,120815,51,120825,51,130035,51,42923,51,540,51,439,51,42858,51,11468,51,1248,51,94011,51,71882,51,120786,52,120796,52,120806,52,120816,52,120826,52,130036,52,5070,52,71855,52,120787,53,120797,53,120807,53,120817,53,120827,53,130037,53,444,53,71867,53,120788,54,120798,54,120808,54,120818,54,120828,54,130038,54,11474,54,5102,54,71893,54,119314,55,120789,55,120799,55,120809,55,120819,55,120829,55,130039,55,66770,55,71878,55,2819,56,2538,56,2666,56,125131,56,120790,56,120800,56,120810,56,120820,56,120830,56,130040,56,547,56,546,56,66330,56,2663,57,2920,57,2541,57,3437,57,120791,57,120801,57,120811,57,120821,57,120831,57,130041,57,42862,57,11466,57,71884,57,71852,57,71894,57,9082,97,65345,97,119834,97,119886,97,119938,97,119990,97,120042,97,120094,97,120146,97,120198,97,120250,97,120302,97,120354,97,120406,97,120458,97,593,97,945,97,120514,97,120572,97,120630,97,120688,97,120746,97,65313,65,119808,65,119860,65,119912,65,119964,65,120016,65,120068,65,120120,65,120172,65,120224,65,120276,65,120328,65,120380,65,120432,65,913,65,120488,65,120546,65,120604,65,120662,65,120720,65,5034,65,5573,65,42222,65,94016,65,66208,65,119835,98,119887,98,119939,98,119991,98,120043,98,120095,98,120147,98,120199,98,120251,98,120303,98,120355,98,120407,98,120459,98,388,98,5071,98,5234,98,5551,98,65314,66,8492,66,119809,66,119861,66,119913,66,120017,66,120069,66,120121,66,120173,66,120225,66,120277,66,120329,66,120381,66,120433,66,42932,66,914,66,120489,66,120547,66,120605,66,120663,66,120721,66,5108,66,5623,66,42192,66,66178,66,66209,66,66305,66,65347,99,8573,99,119836,99,119888,99,119940,99,119992,99,120044,99,120096,99,120148,99,120200,99,120252,99,120304,99,120356,99,120408,99,120460,99,7428,99,1010,99,11429,99,43951,99,66621,99,128844,67,71922,67,71913,67,65315,67,8557,67,8450,67,8493,67,119810,67,119862,67,119914,67,119966,67,120018,67,120174,67,120226,67,120278,67,120330,67,120382,67,120434,67,1017,67,11428,67,5087,67,42202,67,66210,67,66306,67,66581,67,66844,67,8574,100,8518,100,119837,100,119889,100,119941,100,119993,100,120045,100,120097,100,120149,100,120201,100,120253,100,120305,100,120357,100,120409,100,120461,100,1281,100,5095,100,5231,100,42194,100,8558,68,8517,68,119811,68,119863,68,119915,68,119967,68,120019,68,120071,68,120123,68,120175,68,120227,68,120279,68,120331,68,120383,68,120435,68,5024,68,5598,68,5610,68,42195,68,8494,101,65349,101,8495,101,8519,101,119838,101,119890,101,119942,101,120046,101,120098,101,120150,101,120202,101,120254,101,120306,101,120358,101,120410,101,120462,101,43826,101,1213,101,8959,69,65317,69,8496,69,119812,69,119864,69,119916,69,120020,69,120072,69,120124,69,120176,69,120228,69,120280,69,120332,69,120384,69,120436,69,917,69,120492,69,120550,69,120608,69,120666,69,120724,69,11577,69,5036,69,42224,69,71846,69,71854,69,66182,69,119839,102,119891,102,119943,102,119995,102,120047,102,120099,102,120151,102,120203,102,120255,102,120307,102,120359,102,120411,102,120463,102,43829,102,42905,102,383,102,7837,102,1412,102,119315,70,8497,70,119813,70,119865,70,119917,70,120021,70,120073,70,120125,70,120177,70,120229,70,120281,70,120333,70,120385,70,120437,70,42904,70,988,70,120778,70,5556,70,42205,70,71874,70,71842,70,66183,70,66213,70,66853,70,65351,103,8458,103,119840,103,119892,103,119944,103,120048,103,120100,103,120152,103,120204,103,120256,103,120308,103,120360,103,120412,103,120464,103,609,103,7555,103,397,103,1409,103,119814,71,119866,71,119918,71,119970,71,120022,71,120074,71,120126,71,120178,71,120230,71,120282,71,120334,71,120386,71,120438,71,1292,71,5056,71,5107,71,42198,71,65352,104,8462,104,119841,104,119945,104,119997,104,120049,104,120101,104,120153,104,120205,104,120257,104,120309,104,120361,104,120413,104,120465,104,1211,104,1392,104,5058,104,65320,72,8459,72,8460,72,8461,72,119815,72,119867,72,119919,72,120023,72,120179,72,120231,72,120283,72,120335,72,120387,72,120439,72,919,72,120494,72,120552,72,120610,72,120668,72,120726,72,11406,72,5051,72,5500,72,42215,72,66255,72,731,105,9075,105,65353,105,8560,105,8505,105,8520,105,119842,105,119894,105,119946,105,119998,105,120050,105,120102,105,120154,105,120206,105,120258,105,120310,105,120362,105,120414,105,120466,105,120484,105,618,105,617,105,953,105,8126,105,890,105,120522,105,120580,105,120638,105,120696,105,120754,105,1110,105,42567,105,1231,105,43893,105,5029,105,71875,105,65354,106,8521,106,119843,106,119895,106,119947,106,119999,106,120051,106,120103,106,120155,106,120207,106,120259,106,120311,106,120363,106,120415,106,120467,106,1011,106,1112,106,65322,74,119817,74,119869,74,119921,74,119973,74,120025,74,120077,74,120129,74,120181,74,120233,74,120285,74,120337,74,120389,74,120441,74,42930,74,895,74,1032,74,5035,74,5261,74,42201,74,119844,107,119896,107,119948,107,120000,107,120052,107,120104,107,120156,107,120208,107,120260,107,120312,107,120364,107,120416,107,120468,107,8490,75,65323,75,119818,75,119870,75,119922,75,119974,75,120026,75,120078,75,120130,75,120182,75,120234,75,120286,75,120338,75,120390,75,120442,75,922,75,120497,75,120555,75,120613,75,120671,75,120729,75,11412,75,5094,75,5845,75,42199,75,66840,75,1472,108,8739,73,9213,73,65512,73,1633,108,1777,73,66336,108,125127,108,120783,73,120793,73,120803,73,120813,73,120823,73,130033,73,65321,73,8544,73,8464,73,8465,73,119816,73,119868,73,119920,73,120024,73,120128,73,120180,73,120232,73,120284,73,120336,73,120388,73,120440,73,65356,108,8572,73,8467,108,119845,108,119897,108,119949,108,120001,108,120053,108,120105,73,120157,73,120209,73,120261,73,120313,73,120365,73,120417,73,120469,73,448,73,120496,73,120554,73,120612,73,120670,73,120728,73,11410,73,1030,73,1216,73,1493,108,1503,108,1575,108,126464,108,126592,108,65166,108,65165,108,1994,108,11599,73,5825,73,42226,73,93992,73,66186,124,66313,124,119338,76,8556,76,8466,76,119819,76,119871,76,119923,76,120027,76,120079,76,120131,76,120183,76,120235,76,120287,76,120339,76,120391,76,120443,76,11472,76,5086,76,5290,76,42209,76,93974,76,71843,76,71858,76,66587,76,66854,76,65325,77,8559,77,8499,77,119820,77,119872,77,119924,77,120028,77,120080,77,120132,77,120184,77,120236,77,120288,77,120340,77,120392,77,120444,77,924,77,120499,77,120557,77,120615,77,120673,77,120731,77,1018,77,11416,77,5047,77,5616,77,5846,77,42207,77,66224,77,66321,77,119847,110,119899,110,119951,110,120003,110,120055,110,120107,110,120159,110,120211,110,120263,110,120315,110,120367,110,120419,110,120471,110,1400,110,1404,110,65326,78,8469,78,119821,78,119873,78,119925,78,119977,78,120029,78,120081,78,120185,78,120237,78,120289,78,120341,78,120393,78,120445,78,925,78,120500,78,120558,78,120616,78,120674,78,120732,78,11418,78,42208,78,66835,78,3074,111,3202,111,3330,111,3458,111,2406,111,2662,111,2790,111,3046,111,3174,111,3302,111,3430,111,3664,111,3792,111,4160,111,1637,111,1781,111,65359,111,8500,111,119848,111,119900,111,119952,111,120056,111,120108,111,120160,111,120212,111,120264,111,120316,111,120368,111,120420,111,120472,111,7439,111,7441,111,43837,111,959,111,120528,111,120586,111,120644,111,120702,111,120760,111,963,111,120532,111,120590,111,120648,111,120706,111,120764,111,11423,111,4351,111,1413,111,1505,111,1607,111,126500,111,126564,111,126596,111,65259,111,65260,111,65258,111,65257,111,1726,111,64428,111,64429,111,64427,111,64426,111,1729,111,64424,111,64425,111,64423,111,64422,111,1749,111,3360,111,4125,111,66794,111,71880,111,71895,111,66604,111,1984,79,2534,79,2918,79,12295,79,70864,79,71904,79,120782,79,120792,79,120802,79,120812,79,120822,79,130032,79,65327,79,119822,79,119874,79,119926,79,119978,79,120030,79,120082,79,120134,79,120186,79,120238,79,120290,79,120342,79,120394,79,120446,79,927,79,120502,79,120560,79,120618,79,120676,79,120734,79,11422,79,1365,79,11604,79,4816,79,2848,79,66754,79,42227,79,71861,79,66194,79,66219,79,66564,79,66838,79,9076,112,65360,112,119849,112,119901,112,119953,112,120005,112,120057,112,120109,112,120161,112,120213,112,120265,112,120317,112,120369,112,120421,112,120473,112,961,112,120530,112,120544,112,120588,112,120602,112,120646,112,120660,112,120704,112,120718,112,120762,112,120776,112,11427,112,65328,80,8473,80,119823,80,119875,80,119927,80,119979,80,120031,80,120083,80,120187,80,120239,80,120291,80,120343,80,120395,80,120447,80,929,80,120504,80,120562,80,120620,80,120678,80,120736,80,11426,80,5090,80,5229,80,42193,80,66197,80,119850,113,119902,113,119954,113,120006,113,120058,113,120110,113,120162,113,120214,113,120266,113,120318,113,120370,113,120422,113,120474,113,1307,113,1379,113,1382,113,8474,81,119824,81,119876,81,119928,81,119980,81,120032,81,120084,81,120188,81,120240,81,120292,81,120344,81,120396,81,120448,81,11605,81,119851,114,119903,114,119955,114,120007,114,120059,114,120111,114,120163,114,120215,114,120267,114,120319,114,120371,114,120423,114,120475,114,43847,114,43848,114,7462,114,11397,114,43905,114,119318,82,8475,82,8476,82,8477,82,119825,82,119877,82,119929,82,120033,82,120189,82,120241,82,120293,82,120345,82,120397,82,120449,82,422,82,5025,82,5074,82,66740,82,5511,82,42211,82,94005,82,65363,115,119852,115,119904,115,119956,115,120008,115,120060,115,120112,115,120164,115,120216,115,120268,115,120320,115,120372,115,120424,115,120476,115,42801,115,445,115,1109,115,43946,115,71873,115,66632,115,65331,83,119826,83,119878,83,119930,83,119982,83,120034,83,120086,83,120138,83,120190,83,120242,83,120294,83,120346,83,120398,83,120450,83,1029,83,1359,83,5077,83,5082,83,42210,83,94010,83,66198,83,66592,83,119853,116,119905,116,119957,116,120009,116,120061,116,120113,116,120165,116,120217,116,120269,116,120321,116,120373,116,120425,116,120477,116,8868,84,10201,84,128872,84,65332,84,119827,84,119879,84,119931,84,119983,84,120035,84,120087,84,120139,84,120191,84,120243,84,120295,84,120347,84,120399,84,120451,84,932,84,120507,84,120565,84,120623,84,120681,84,120739,84,11430,84,5026,84,42196,84,93962,84,71868,84,66199,84,66225,84,66325,84,119854,117,119906,117,119958,117,120010,117,120062,117,120114,117,120166,117,120218,117,120270,117,120322,117,120374,117,120426,117,120478,117,42911,117,7452,117,43854,117,43858,117,651,117,965,117,120534,117,120592,117,120650,117,120708,117,120766,117,1405,117,66806,117,71896,117,8746,85,8899,85,119828,85,119880,85,119932,85,119984,85,120036,85,120088,85,120140,85,120192,85,120244,85,120296,85,120348,85,120400,85,120452,85,1357,85,4608,85,66766,85,5196,85,42228,85,94018,85,71864,85,8744,118,8897,118,65366,118,8564,118,119855,118,119907,118,119959,118,120011,118,120063,118,120115,118,120167,118,120219,118,120271,118,120323,118,120375,118,120427,118,120479,118,7456,118,957,118,120526,118,120584,118,120642,118,120700,118,120758,118,1141,118,1496,118,71430,118,43945,118,71872,118,119309,86,1639,86,1783,86,8548,86,119829,86,119881,86,119933,86,119985,86,120037,86,120089,86,120141,86,120193,86,120245,86,120297,86,120349,86,120401,86,120453,86,1140,86,11576,86,5081,86,5167,86,42719,86,42214,86,93960,86,71840,86,66845,86,623,119,119856,119,119908,119,119960,119,120012,119,120064,119,120116,119,120168,119,120220,119,120272,119,120324,119,120376,119,120428,119,120480,119,7457,119,1121,119,1309,119,1377,119,71434,119,71438,119,71439,119,43907,119,71919,87,71910,87,119830,87,119882,87,119934,87,119986,87,120038,87,120090,87,120142,87,120194,87,120246,87,120298,87,120350,87,120402,87,120454,87,1308,87,5043,87,5076,87,42218,87,5742,120,10539,120,10540,120,10799,120,65368,120,8569,120,119857,120,119909,120,119961,120,120013,120,120065,120,120117,120,120169,120,120221,120,120273,120,120325,120,120377,120,120429,120,120481,120,5441,120,5501,120,5741,88,9587,88,66338,88,71916,88,65336,88,8553,88,119831,88,119883,88,119935,88,119987,88,120039,88,120091,88,120143,88,120195,88,120247,88,120299,88,120351,88,120403,88,120455,88,42931,88,935,88,120510,88,120568,88,120626,88,120684,88,120742,88,11436,88,11613,88,5815,88,42219,88,66192,88,66228,88,66327,88,66855,88,611,121,7564,121,65369,121,119858,121,119910,121,119962,121,120014,121,120066,121,120118,121,120170,121,120222,121,120274,121,120326,121,120378,121,120430,121,120482,121,655,121,7935,121,43866,121,947,121,8509,121,120516,121,120574,121,120632,121,120690,121,120748,121,1199,121,4327,121,71900,121,65337,89,119832,89,119884,89,119936,89,119988,89,120040,89,120092,89,120144,89,120196,89,120248,89,120300,89,120352,89,120404,89,120456,89,933,89,978,89,120508,89,120566,89,120624,89,120682,89,120740,89,11432,89,1198,89,5033,89,5053,89,42220,89,94019,89,71844,89,66226,89,119859,122,119911,122,119963,122,120015,122,120067,122,120119,122,120171,122,120223,122,120275,122,120327,122,120379,122,120431,122,120483,122,7458,122,43923,122,71876,122,66293,90,71909,90,65338,90,8484,90,8488,90,119833,90,119885,90,119937,90,119989,90,120041,90,120197,90,120249,90,120301,90,120353,90,120405,90,120457,90,918,90,120493,90,120551,90,120609,90,120667,90,120725,90,5059,90,42204,90,71849,90,65282,34,65284,36,65285,37,65286,38,65290,42,65291,43,65294,46,65295,47,65296,48,65297,49,65298,50,65299,51,65300,52,65301,53,65302,54,65303,55,65304,56,65305,57,65308,60,65309,61,65310,62,65312,64,65316,68,65318,70,65319,71,65324,76,65329,81,65330,82,65333,85,65334,86,65335,87,65343,95,65346,98,65348,100,65350,102,65355,107,65357,109,65358,110,65361,113,65362,114,65364,116,65365,117,65367,119,65370,122,65371,123,65373,125],\"_default\":[160,32,8211,45,65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"cs\":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"de\":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"es\":[8211,45,65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"fr\":[65374,126,65306,58,65281,33,8216,96,8245,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"it\":[160,32,8211,45,65374,126,65306,58,65281,33,8216,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"ja\":[8211,45,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65292,44,65307,59],\"ko\":[8211,45,65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"pl\":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"pt-BR\":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"qps-ploc\":[160,32,8211,45,65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"ru\":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,305,105,921,73,1009,112,215,120,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"tr\":[160,32,8211,45,65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"zh-hans\":[65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65288,40,65289,41],\"zh-hant\":[8211,45,65374,126,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65307,59]}" \ No newline at end of file diff --git a/modules/charset/ambiguous/generate.go b/modules/charset/ambiguous/generate.go new file mode 100644 index 000000000000..43cdb217a79a --- /dev/null +++ b/modules/charset/ambiguous/generate.go @@ -0,0 +1,178 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package main + +import ( + "bytes" + "flag" + "fmt" + "go/format" + "os" + "sort" + "text/template" + "unicode" + + "code.gitea.io/gitea/modules/json" + + "golang.org/x/text/unicode/rangetable" +) + +// ambiguous.json provides a one to one mapping of ambiguous characters to other characters +// See https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json + +type AmbiguousTable struct { + Confusable []rune + With []rune + Locale string + RangeTable *unicode.RangeTable +} + +type RunePair struct { + Confusable rune + With rune +} + +var verbose bool + +func main() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, `%s: Generate AmbiguousCharacter + +Usage: %[1]s [-v] [-o output.go] ambiguous.json +`, os.Args[0]) + flag.PrintDefaults() + } + + output := "" + flag.BoolVar(&verbose, "v", false, "verbose output") + flag.StringVar(&output, "o", "ambiguous_gen.go", "file to output to") + flag.Parse() + input := flag.Arg(0) + if input == "" { + input = "ambiguous.json" + } + + bs, err := os.ReadFile(input) + if err != nil { + fatalf("Unable to read: %s Err: %v", input, err) + } + + var unwrapped string + if err := json.Unmarshal(bs, &unwrapped); err != nil { + fatalf("Unable to unwrap content in: %s Err: %v", input, err) + } + + fromJSON := map[string][]uint32{} + if err := json.Unmarshal([]byte(unwrapped), &fromJSON); err != nil { + fatalf("Unable to unmarshal content in: %s Err: %v", input, err) + } + + tables := make([]*AmbiguousTable, 0, len(fromJSON)) + for locale, chars := range fromJSON { + table := &AmbiguousTable{Locale: locale} + table.Confusable = make([]rune, 0, len(chars)/2) + table.With = make([]rune, 0, len(chars)/2) + pairs := make([]RunePair, len(chars)/2) + for i := 0; i < len(chars); i += 2 { + pairs[i/2].Confusable, pairs[i/2].With = rune(chars[i]), rune(chars[i+1]) + } + sort.Slice(pairs, func(i, j int) bool { + return pairs[i].Confusable < pairs[j].Confusable + }) + for _, pair := range pairs { + table.Confusable = append(table.Confusable, pair.Confusable) + table.With = append(table.With, pair.With) + } + table.RangeTable = rangetable.New(table.Confusable...) + tables = append(tables, table) + } + sort.Slice(tables, func(i, j int) bool { + return tables[i].Locale < tables[j].Locale + }) + data := map[string]interface{}{ + "Tables": tables, + } + + if err := runTemplate(generatorTemplate, output, &data); err != nil { + fatalf("Unable to run template: %v", err) + } +} + +func runTemplate(t *template.Template, filename string, data interface{}) error { + buf := bytes.NewBuffer(nil) + if err := t.Execute(buf, data); err != nil { + return fmt.Errorf("unable to execute template: %w", err) + } + bs, err := format.Source(buf.Bytes()) + if err != nil { + verbosef("Bad source:\n%s", buf.String()) + return fmt.Errorf("unable to format source: %w", err) + } + file, err := os.Create(filename) + if err != nil { + return fmt.Errorf("failed to create file %s because %w", filename, err) + } + defer file.Close() + _, err = file.Write(bs) + if err != nil { + return fmt.Errorf("unable to write generated source: %w", err) + } + return nil +} + +var generatorTemplate = template.Must(template.New("ambiguousTemplate").Parse(`// This file is generated by modules/charset/ambiguous/generate.go DO NOT EDIT +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import "unicode" + +// This file is generated from https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json + +// AmbiguousTable matches a confusable rune with its partner for the Locale +type AmbiguousTable struct { + Confusable []rune + With []rune + Locale string + RangeTable *unicode.RangeTable +} + +// AmbiguousCharacters provides a map by locale name to the confusable characters in that locale +var AmbiguousCharacters = map[string]*AmbiguousTable{ + {{range .Tables}}{{printf "%q:" .Locale}} { + Confusable: []rune{ {{range .Confusable}}{{.}},{{end}} }, + With: []rune{ {{range .With}}{{.}},{{end}} }, + Locale: {{printf "%q" .Locale}}, + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {{range .RangeTable.R16 }} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}}, + {{end}} }, + R32: []unicode.Range32{ + {{range .RangeTable.R32}} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}}, + {{end}} }, + LatinOffset: {{.RangeTable.LatinOffset}}, + }, + }, + {{end}} +} + +`)) + +func logf(format string, args ...interface{}) { + fmt.Fprintf(os.Stderr, format+"\n", args...) +} + +func verbosef(format string, args ...interface{}) { + if verbose { + logf(format, args...) + } +} + +func fatalf(format string, args ...interface{}) { + logf("fatal: "+format+"\n", args...) + os.Exit(1) +} diff --git a/modules/charset/ambiguous_gen.go b/modules/charset/ambiguous_gen.go new file mode 100644 index 000000000000..cc270affac52 --- /dev/null +++ b/modules/charset/ambiguous_gen.go @@ -0,0 +1,837 @@ +// This file is generated by modules/charset/ambiguous/generate.go DO NOT EDIT +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import "unicode" + +// This file is generated from https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json + +// AmbiguousTable matches a confusable rune with its partner for the Locale +type AmbiguousTable struct { + Confusable []rune + With []rune + Locale string + RangeTable *unicode.RangeTable +} + +// AmbiguousCharacters provides a map by locale name to the confusable characters in that locale +var AmbiguousCharacters = map[string]*AmbiguousTable{ + "_common": { + Confusable: []rune{184, 383, 388, 397, 422, 423, 439, 444, 445, 448, 451, 540, 546, 547, 577, 593, 609, 611, 617, 618, 623, 651, 655, 660, 697, 699, 700, 701, 702, 706, 707, 708, 710, 712, 714, 715, 720, 727, 731, 732, 756, 760, 884, 890, 894, 895, 900, 913, 914, 917, 918, 919, 922, 924, 925, 927, 929, 932, 933, 935, 945, 947, 953, 957, 959, 961, 963, 965, 978, 988, 1000, 1010, 1011, 1017, 1018, 1029, 1030, 1032, 1109, 1110, 1112, 1121, 1140, 1141, 1198, 1199, 1211, 1213, 1216, 1231, 1248, 1281, 1292, 1307, 1308, 1309, 1357, 1359, 1365, 1370, 1373, 1377, 1379, 1382, 1392, 1400, 1404, 1405, 1409, 1412, 1413, 1417, 1472, 1475, 1493, 1496, 1497, 1503, 1505, 1523, 1549, 1575, 1607, 1632, 1633, 1637, 1639, 1643, 1645, 1726, 1729, 1748, 1749, 1776, 1777, 1781, 1783, 1793, 1794, 1795, 1796, 1984, 1994, 2036, 2037, 2042, 2307, 2406, 2429, 2534, 2538, 2541, 2662, 2663, 2666, 2691, 2790, 2819, 2848, 2918, 2920, 3046, 3074, 3174, 3202, 3302, 3330, 3360, 3430, 3437, 3458, 3664, 3792, 4125, 4160, 4327, 4351, 4608, 4816, 5024, 5025, 5026, 5029, 5033, 5034, 5035, 5036, 5038, 5043, 5047, 5051, 5053, 5056, 5058, 5059, 5070, 5071, 5074, 5076, 5077, 5081, 5082, 5086, 5087, 5090, 5094, 5095, 5102, 5107, 5108, 5120, 5167, 5171, 5176, 5194, 5196, 5229, 5231, 5234, 5261, 5290, 5311, 5441, 5500, 5501, 5511, 5551, 5556, 5573, 5598, 5610, 5616, 5623, 5741, 5742, 5760, 5810, 5815, 5825, 5836, 5845, 5846, 5868, 5869, 5941, 6147, 6153, 7428, 7439, 7441, 7452, 7456, 7457, 7458, 7462, 7555, 7564, 7837, 7935, 8125, 8126, 8127, 8128, 8175, 8189, 8190, 8192, 8193, 8194, 8195, 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8208, 8209, 8210, 8218, 8219, 8228, 8232, 8233, 8239, 8242, 8249, 8250, 8257, 8259, 8260, 8270, 8275, 8282, 8287, 8450, 8458, 8459, 8460, 8461, 8462, 8464, 8465, 8466, 8467, 8469, 8473, 8474, 8475, 8476, 8477, 8484, 8488, 8490, 8492, 8493, 8494, 8495, 8496, 8497, 8499, 8500, 8505, 8509, 8517, 8518, 8519, 8520, 8521, 8544, 8548, 8553, 8556, 8557, 8558, 8559, 8560, 8564, 8569, 8572, 8573, 8574, 8722, 8725, 8726, 8727, 8739, 8744, 8746, 8758, 8764, 8868, 8897, 8899, 8959, 9075, 9076, 9082, 9213, 9585, 9587, 10088, 10089, 10094, 10095, 10098, 10099, 10100, 10101, 10133, 10134, 10187, 10189, 10201, 10539, 10540, 10741, 10744, 10745, 10799, 11397, 11406, 11410, 11412, 11416, 11418, 11422, 11423, 11426, 11427, 11428, 11429, 11430, 11432, 11436, 11450, 11462, 11466, 11468, 11472, 11474, 11576, 11577, 11599, 11601, 11604, 11605, 11613, 11840, 12034, 12035, 12295, 12308, 12309, 12339, 12448, 12755, 12756, 20022, 20031, 42192, 42193, 42194, 42195, 42196, 42198, 42199, 42201, 42202, 42204, 42205, 42207, 42208, 42209, 42210, 42211, 42214, 42215, 42218, 42219, 42220, 42222, 42224, 42226, 42227, 42228, 42232, 42233, 42237, 42239, 42510, 42564, 42567, 42719, 42731, 42735, 42801, 42842, 42858, 42862, 42872, 42889, 42892, 42904, 42905, 42911, 42923, 42930, 42931, 42932, 43826, 43829, 43837, 43847, 43848, 43854, 43858, 43866, 43893, 43905, 43907, 43923, 43945, 43946, 43951, 64422, 64423, 64424, 64425, 64426, 64427, 64428, 64429, 64830, 64831, 65072, 65101, 65102, 65103, 65112, 65128, 65165, 65166, 65257, 65258, 65259, 65260, 65282, 65284, 65285, 65286, 65287, 65290, 65291, 65293, 65294, 65295, 65296, 65297, 65298, 65299, 65300, 65301, 65302, 65303, 65304, 65305, 65308, 65309, 65310, 65312, 65313, 65314, 65315, 65316, 65317, 65318, 65319, 65320, 65321, 65322, 65323, 65324, 65325, 65326, 65327, 65328, 65329, 65330, 65331, 65332, 65333, 65334, 65335, 65336, 65337, 65338, 65339, 65340, 65341, 65342, 65343, 65344, 65345, 65346, 65347, 65348, 65349, 65350, 65351, 65352, 65353, 65354, 65355, 65356, 65357, 65358, 65359, 65360, 65361, 65362, 65363, 65364, 65365, 65366, 65367, 65368, 65369, 65370, 65371, 65372, 65373, 65512, 66178, 66182, 66183, 66186, 66192, 66194, 66197, 66198, 66199, 66203, 66208, 66209, 66210, 66213, 66219, 66224, 66225, 66226, 66228, 66255, 66293, 66305, 66306, 66313, 66321, 66325, 66327, 66330, 66335, 66336, 66338, 66564, 66581, 66587, 66592, 66604, 66621, 66632, 66740, 66754, 66766, 66770, 66794, 66806, 66835, 66838, 66840, 66844, 66845, 66853, 66854, 66855, 68176, 70864, 71430, 71434, 71438, 71439, 71840, 71842, 71843, 71844, 71846, 71849, 71852, 71854, 71855, 71858, 71861, 71864, 71867, 71868, 71872, 71873, 71874, 71875, 71876, 71878, 71880, 71882, 71884, 71893, 71894, 71895, 71896, 71900, 71904, 71909, 71910, 71913, 71916, 71919, 71922, 93960, 93962, 93974, 93992, 94005, 94010, 94011, 94015, 94016, 94018, 94019, 94033, 94034, 119060, 119149, 119302, 119309, 119311, 119314, 119315, 119318, 119338, 119350, 119351, 119354, 119355, 119808, 119809, 119810, 119811, 119812, 119813, 119814, 119815, 119816, 119817, 119818, 119819, 119820, 119821, 119822, 119823, 119824, 119825, 119826, 119827, 119828, 119829, 119830, 119831, 119832, 119833, 119834, 119835, 119836, 119837, 119838, 119839, 119840, 119841, 119842, 119843, 119844, 119845, 119847, 119848, 119849, 119850, 119851, 119852, 119853, 119854, 119855, 119856, 119857, 119858, 119859, 119860, 119861, 119862, 119863, 119864, 119865, 119866, 119867, 119868, 119869, 119870, 119871, 119872, 119873, 119874, 119875, 119876, 119877, 119878, 119879, 119880, 119881, 119882, 119883, 119884, 119885, 119886, 119887, 119888, 119889, 119890, 119891, 119892, 119894, 119895, 119896, 119897, 119899, 119900, 119901, 119902, 119903, 119904, 119905, 119906, 119907, 119908, 119909, 119910, 119911, 119912, 119913, 119914, 119915, 119916, 119917, 119918, 119919, 119920, 119921, 119922, 119923, 119924, 119925, 119926, 119927, 119928, 119929, 119930, 119931, 119932, 119933, 119934, 119935, 119936, 119937, 119938, 119939, 119940, 119941, 119942, 119943, 119944, 119945, 119946, 119947, 119948, 119949, 119951, 119952, 119953, 119954, 119955, 119956, 119957, 119958, 119959, 119960, 119961, 119962, 119963, 119964, 119966, 119967, 119970, 119973, 119974, 119977, 119978, 119979, 119980, 119982, 119983, 119984, 119985, 119986, 119987, 119988, 119989, 119990, 119991, 119992, 119993, 119995, 119997, 119998, 119999, 120000, 120001, 120003, 120005, 120006, 120007, 120008, 120009, 120010, 120011, 120012, 120013, 120014, 120015, 120016, 120017, 120018, 120019, 120020, 120021, 120022, 120023, 120024, 120025, 120026, 120027, 120028, 120029, 120030, 120031, 120032, 120033, 120034, 120035, 120036, 120037, 120038, 120039, 120040, 120041, 120042, 120043, 120044, 120045, 120046, 120047, 120048, 120049, 120050, 120051, 120052, 120053, 120055, 120056, 120057, 120058, 120059, 120060, 120061, 120062, 120063, 120064, 120065, 120066, 120067, 120068, 120069, 120071, 120072, 120073, 120074, 120077, 120078, 120079, 120080, 120081, 120082, 120083, 120084, 120086, 120087, 120088, 120089, 120090, 120091, 120092, 120094, 120095, 120096, 120097, 120098, 120099, 120100, 120101, 120102, 120103, 120104, 120105, 120107, 120108, 120109, 120110, 120111, 120112, 120113, 120114, 120115, 120116, 120117, 120118, 120119, 120120, 120121, 120123, 120124, 120125, 120126, 120128, 120129, 120130, 120131, 120132, 120134, 120138, 120139, 120140, 120141, 120142, 120143, 120144, 120146, 120147, 120148, 120149, 120150, 120151, 120152, 120153, 120154, 120155, 120156, 120157, 120159, 120160, 120161, 120162, 120163, 120164, 120165, 120166, 120167, 120168, 120169, 120170, 120171, 120172, 120173, 120174, 120175, 120176, 120177, 120178, 120179, 120180, 120181, 120182, 120183, 120184, 120185, 120186, 120187, 120188, 120189, 120190, 120191, 120192, 120193, 120194, 120195, 120196, 120197, 120198, 120199, 120200, 120201, 120202, 120203, 120204, 120205, 120206, 120207, 120208, 120209, 120211, 120212, 120213, 120214, 120215, 120216, 120217, 120218, 120219, 120220, 120221, 120222, 120223, 120224, 120225, 120226, 120227, 120228, 120229, 120230, 120231, 120232, 120233, 120234, 120235, 120236, 120237, 120238, 120239, 120240, 120241, 120242, 120243, 120244, 120245, 120246, 120247, 120248, 120249, 120250, 120251, 120252, 120253, 120254, 120255, 120256, 120257, 120258, 120259, 120260, 120261, 120263, 120264, 120265, 120266, 120267, 120268, 120269, 120270, 120271, 120272, 120273, 120274, 120275, 120276, 120277, 120278, 120279, 120280, 120281, 120282, 120283, 120284, 120285, 120286, 120287, 120288, 120289, 120290, 120291, 120292, 120293, 120294, 120295, 120296, 120297, 120298, 120299, 120300, 120301, 120302, 120303, 120304, 120305, 120306, 120307, 120308, 120309, 120310, 120311, 120312, 120313, 120315, 120316, 120317, 120318, 120319, 120320, 120321, 120322, 120323, 120324, 120325, 120326, 120327, 120328, 120329, 120330, 120331, 120332, 120333, 120334, 120335, 120336, 120337, 120338, 120339, 120340, 120341, 120342, 120343, 120344, 120345, 120346, 120347, 120348, 120349, 120350, 120351, 120352, 120353, 120354, 120355, 120356, 120357, 120358, 120359, 120360, 120361, 120362, 120363, 120364, 120365, 120367, 120368, 120369, 120370, 120371, 120372, 120373, 120374, 120375, 120376, 120377, 120378, 120379, 120380, 120381, 120382, 120383, 120384, 120385, 120386, 120387, 120388, 120389, 120390, 120391, 120392, 120393, 120394, 120395, 120396, 120397, 120398, 120399, 120400, 120401, 120402, 120403, 120404, 120405, 120406, 120407, 120408, 120409, 120410, 120411, 120412, 120413, 120414, 120415, 120416, 120417, 120419, 120420, 120421, 120422, 120423, 120424, 120425, 120426, 120427, 120428, 120429, 120430, 120431, 120432, 120433, 120434, 120435, 120436, 120437, 120438, 120439, 120440, 120441, 120442, 120443, 120444, 120445, 120446, 120447, 120448, 120449, 120450, 120451, 120452, 120453, 120454, 120455, 120456, 120457, 120458, 120459, 120460, 120461, 120462, 120463, 120464, 120465, 120466, 120467, 120468, 120469, 120471, 120472, 120473, 120474, 120475, 120476, 120477, 120478, 120479, 120480, 120481, 120482, 120483, 120484, 120488, 120489, 120492, 120493, 120494, 120496, 120497, 120499, 120500, 120502, 120504, 120507, 120508, 120510, 120514, 120516, 120522, 120526, 120528, 120530, 120532, 120534, 120544, 120546, 120547, 120550, 120551, 120552, 120554, 120555, 120557, 120558, 120560, 120562, 120565, 120566, 120568, 120572, 120574, 120580, 120584, 120586, 120588, 120590, 120592, 120602, 120604, 120605, 120608, 120609, 120610, 120612, 120613, 120615, 120616, 120618, 120620, 120623, 120624, 120626, 120630, 120632, 120638, 120642, 120644, 120646, 120648, 120650, 120660, 120662, 120663, 120666, 120667, 120668, 120670, 120671, 120673, 120674, 120676, 120678, 120681, 120682, 120684, 120688, 120690, 120696, 120700, 120702, 120704, 120706, 120708, 120718, 120720, 120721, 120724, 120725, 120726, 120728, 120729, 120731, 120732, 120734, 120736, 120739, 120740, 120742, 120746, 120748, 120754, 120758, 120760, 120762, 120764, 120766, 120776, 120778, 120782, 120783, 120784, 120785, 120786, 120787, 120788, 120789, 120790, 120791, 120792, 120793, 120794, 120795, 120796, 120797, 120798, 120799, 120800, 120801, 120802, 120803, 120804, 120805, 120806, 120807, 120808, 120809, 120810, 120811, 120812, 120813, 120814, 120815, 120816, 120817, 120818, 120819, 120820, 120821, 120822, 120823, 120824, 120825, 120826, 120827, 120828, 120829, 120830, 120831, 125127, 125131, 126464, 126500, 126564, 126592, 126596, 128844, 128872, 130032, 130033, 130034, 130035, 130036, 130037, 130038, 130039, 130040, 130041}, + With: []rune{44, 102, 98, 103, 82, 50, 51, 53, 115, 73, 33, 51, 56, 56, 63, 97, 103, 121, 105, 105, 119, 117, 121, 63, 96, 96, 96, 96, 96, 60, 62, 94, 94, 96, 96, 96, 58, 45, 105, 126, 96, 58, 96, 105, 59, 74, 96, 65, 66, 69, 90, 72, 75, 77, 78, 79, 80, 84, 89, 88, 97, 121, 105, 118, 111, 112, 111, 117, 89, 70, 50, 99, 106, 67, 77, 83, 73, 74, 115, 105, 106, 119, 86, 118, 89, 121, 104, 101, 73, 105, 51, 100, 71, 113, 87, 119, 85, 83, 79, 96, 96, 119, 113, 113, 104, 110, 110, 117, 103, 102, 111, 58, 108, 58, 108, 118, 96, 108, 111, 96, 44, 108, 111, 46, 108, 111, 86, 44, 42, 111, 111, 45, 111, 46, 73, 111, 86, 46, 46, 58, 58, 79, 108, 96, 96, 95, 58, 111, 63, 79, 56, 57, 111, 57, 56, 58, 111, 56, 79, 79, 57, 111, 111, 111, 111, 111, 111, 111, 111, 57, 111, 111, 111, 111, 111, 121, 111, 85, 79, 68, 82, 84, 105, 89, 65, 74, 69, 63, 87, 77, 72, 89, 71, 104, 90, 52, 98, 82, 87, 83, 86, 83, 76, 67, 80, 75, 100, 54, 71, 66, 61, 86, 62, 60, 96, 85, 80, 100, 98, 74, 76, 50, 120, 72, 120, 82, 98, 70, 65, 68, 68, 77, 66, 88, 120, 32, 60, 88, 73, 96, 75, 77, 58, 43, 47, 58, 58, 99, 111, 111, 117, 118, 119, 122, 114, 103, 121, 102, 121, 96, 105, 96, 126, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 45, 45, 45, 44, 96, 46, 32, 32, 32, 96, 60, 62, 47, 45, 47, 42, 126, 58, 32, 67, 103, 72, 72, 72, 104, 73, 73, 76, 108, 78, 80, 81, 82, 82, 82, 90, 90, 75, 66, 67, 101, 101, 69, 70, 77, 111, 105, 121, 68, 100, 101, 105, 106, 73, 86, 88, 76, 67, 68, 77, 105, 118, 120, 73, 99, 100, 45, 47, 92, 42, 73, 118, 85, 58, 126, 84, 118, 85, 69, 105, 112, 97, 73, 47, 88, 40, 41, 60, 62, 40, 41, 123, 125, 43, 45, 47, 92, 84, 120, 120, 92, 47, 92, 120, 114, 72, 73, 75, 77, 78, 79, 111, 80, 112, 67, 99, 84, 89, 88, 45, 47, 57, 51, 76, 54, 86, 69, 73, 33, 79, 81, 88, 61, 92, 47, 79, 40, 41, 47, 61, 47, 92, 92, 47, 66, 80, 100, 68, 84, 71, 75, 74, 67, 90, 70, 77, 78, 76, 83, 82, 86, 72, 87, 88, 89, 65, 69, 73, 79, 85, 46, 44, 58, 61, 46, 50, 105, 86, 63, 50, 115, 50, 51, 57, 38, 58, 96, 70, 102, 117, 51, 74, 88, 66, 101, 102, 111, 114, 114, 117, 117, 121, 105, 114, 119, 122, 118, 115, 99, 111, 111, 111, 111, 111, 111, 111, 111, 40, 41, 58, 95, 95, 95, 45, 92, 108, 108, 111, 111, 111, 111, 34, 36, 37, 38, 96, 42, 43, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 73, 66, 69, 70, 124, 88, 79, 80, 83, 84, 43, 65, 66, 67, 70, 79, 77, 84, 89, 88, 72, 90, 66, 67, 124, 77, 84, 88, 56, 42, 108, 88, 79, 67, 76, 83, 111, 99, 115, 82, 79, 85, 55, 111, 117, 78, 79, 75, 67, 86, 70, 76, 88, 46, 79, 118, 119, 119, 119, 86, 70, 76, 89, 69, 90, 57, 69, 52, 76, 79, 85, 53, 84, 118, 115, 70, 105, 122, 55, 111, 51, 57, 54, 57, 111, 117, 121, 79, 90, 87, 67, 88, 87, 67, 86, 84, 76, 73, 82, 83, 51, 62, 65, 85, 89, 96, 96, 123, 46, 51, 86, 92, 55, 70, 82, 76, 60, 62, 47, 92, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 67, 68, 71, 74, 75, 78, 79, 80, 81, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 102, 104, 105, 106, 107, 108, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 68, 69, 70, 71, 74, 75, 76, 77, 78, 79, 80, 81, 83, 84, 85, 86, 87, 88, 89, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 68, 69, 70, 71, 73, 74, 75, 76, 77, 79, 83, 84, 85, 86, 87, 88, 89, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 105, 65, 66, 69, 90, 72, 73, 75, 77, 78, 79, 80, 84, 89, 88, 97, 121, 105, 118, 111, 112, 111, 117, 112, 65, 66, 69, 90, 72, 73, 75, 77, 78, 79, 80, 84, 89, 88, 97, 121, 105, 118, 111, 112, 111, 117, 112, 65, 66, 69, 90, 72, 73, 75, 77, 78, 79, 80, 84, 89, 88, 97, 121, 105, 118, 111, 112, 111, 117, 112, 65, 66, 69, 90, 72, 73, 75, 77, 78, 79, 80, 84, 89, 88, 97, 121, 105, 118, 111, 112, 111, 117, 112, 65, 66, 69, 90, 72, 73, 75, 77, 78, 79, 80, 84, 89, 88, 97, 121, 105, 118, 111, 112, 111, 117, 112, 70, 79, 73, 50, 51, 52, 53, 54, 55, 56, 57, 79, 73, 50, 51, 52, 53, 54, 55, 56, 57, 79, 73, 50, 51, 52, 53, 54, 55, 56, 57, 79, 73, 50, 51, 52, 53, 54, 55, 56, 57, 79, 73, 50, 51, 52, 53, 54, 55, 56, 57, 108, 56, 108, 111, 111, 108, 111, 67, 84, 79, 73, 50, 51, 52, 53, 54, 55, 56, 57}, + Locale: "_common", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 184, Hi: 383, Stride: 199}, + {Lo: 388, Hi: 397, Stride: 9}, + {Lo: 422, Hi: 423, Stride: 1}, + {Lo: 439, Hi: 444, Stride: 5}, + {Lo: 445, Hi: 451, Stride: 3}, + {Lo: 540, Hi: 546, Stride: 6}, + {Lo: 547, Hi: 577, Stride: 30}, + {Lo: 593, Hi: 609, Stride: 16}, + {Lo: 611, Hi: 617, Stride: 6}, + {Lo: 618, Hi: 623, Stride: 5}, + {Lo: 651, Hi: 655, Stride: 4}, + {Lo: 660, Hi: 697, Stride: 37}, + {Lo: 699, Hi: 702, Stride: 1}, + {Lo: 706, Hi: 708, Stride: 1}, + {Lo: 710, Hi: 714, Stride: 2}, + {Lo: 715, Hi: 720, Stride: 5}, + {Lo: 727, Hi: 731, Stride: 4}, + {Lo: 732, Hi: 756, Stride: 24}, + {Lo: 760, Hi: 884, Stride: 124}, + {Lo: 890, Hi: 894, Stride: 4}, + {Lo: 895, Hi: 900, Stride: 5}, + {Lo: 913, Hi: 914, Stride: 1}, + {Lo: 917, Hi: 919, Stride: 1}, + {Lo: 922, Hi: 924, Stride: 2}, + {Lo: 925, Hi: 929, Stride: 2}, + {Lo: 932, Hi: 933, Stride: 1}, + {Lo: 935, Hi: 945, Stride: 10}, + {Lo: 947, Hi: 953, Stride: 6}, + {Lo: 957, Hi: 965, Stride: 2}, + {Lo: 978, Hi: 988, Stride: 10}, + {Lo: 1000, Hi: 1010, Stride: 10}, + {Lo: 1011, Hi: 1017, Stride: 6}, + {Lo: 1018, Hi: 1029, Stride: 11}, + {Lo: 1030, Hi: 1032, Stride: 2}, + {Lo: 1109, Hi: 1110, Stride: 1}, + {Lo: 1112, Hi: 1121, Stride: 9}, + {Lo: 1140, Hi: 1141, Stride: 1}, + {Lo: 1198, Hi: 1199, Stride: 1}, + {Lo: 1211, Hi: 1213, Stride: 2}, + {Lo: 1216, Hi: 1231, Stride: 15}, + {Lo: 1248, Hi: 1281, Stride: 33}, + {Lo: 1292, Hi: 1307, Stride: 15}, + {Lo: 1308, Hi: 1309, Stride: 1}, + {Lo: 1357, Hi: 1359, Stride: 2}, + {Lo: 1365, Hi: 1370, Stride: 5}, + {Lo: 1373, Hi: 1377, Stride: 4}, + {Lo: 1379, Hi: 1382, Stride: 3}, + {Lo: 1392, Hi: 1400, Stride: 8}, + {Lo: 1404, Hi: 1405, Stride: 1}, + {Lo: 1409, Hi: 1412, Stride: 3}, + {Lo: 1413, Hi: 1417, Stride: 4}, + {Lo: 1472, Hi: 1475, Stride: 3}, + {Lo: 1493, Hi: 1496, Stride: 3}, + {Lo: 1497, Hi: 1503, Stride: 6}, + {Lo: 1505, Hi: 1523, Stride: 18}, + {Lo: 1549, Hi: 1575, Stride: 26}, + {Lo: 1607, Hi: 1632, Stride: 25}, + {Lo: 1633, Hi: 1637, Stride: 4}, + {Lo: 1639, Hi: 1643, Stride: 4}, + {Lo: 1645, Hi: 1726, Stride: 81}, + {Lo: 1729, Hi: 1748, Stride: 19}, + {Lo: 1749, Hi: 1776, Stride: 27}, + {Lo: 1777, Hi: 1781, Stride: 4}, + {Lo: 1783, Hi: 1793, Stride: 10}, + {Lo: 1794, Hi: 1796, Stride: 1}, + {Lo: 1984, Hi: 1994, Stride: 10}, + {Lo: 2036, Hi: 2037, Stride: 1}, + {Lo: 2042, Hi: 2307, Stride: 265}, + {Lo: 2406, Hi: 2429, Stride: 23}, + {Lo: 2534, Hi: 2538, Stride: 4}, + {Lo: 2541, Hi: 2662, Stride: 121}, + {Lo: 2663, Hi: 2666, Stride: 3}, + {Lo: 2691, Hi: 2790, Stride: 99}, + {Lo: 2819, Hi: 2848, Stride: 29}, + {Lo: 2918, Hi: 2920, Stride: 2}, + {Lo: 3046, Hi: 3074, Stride: 28}, + {Lo: 3174, Hi: 3202, Stride: 28}, + {Lo: 3302, Hi: 3330, Stride: 28}, + {Lo: 3360, Hi: 3430, Stride: 70}, + {Lo: 3437, Hi: 3458, Stride: 21}, + {Lo: 3664, Hi: 3792, Stride: 128}, + {Lo: 4125, Hi: 4160, Stride: 35}, + {Lo: 4327, Hi: 4351, Stride: 24}, + {Lo: 4608, Hi: 5024, Stride: 208}, + {Lo: 5025, Hi: 5026, Stride: 1}, + {Lo: 5029, Hi: 5033, Stride: 4}, + {Lo: 5034, Hi: 5036, Stride: 1}, + {Lo: 5038, Hi: 5043, Stride: 5}, + {Lo: 5047, Hi: 5051, Stride: 4}, + {Lo: 5053, Hi: 5056, Stride: 3}, + {Lo: 5058, Hi: 5059, Stride: 1}, + {Lo: 5070, Hi: 5071, Stride: 1}, + {Lo: 5074, Hi: 5076, Stride: 2}, + {Lo: 5077, Hi: 5081, Stride: 4}, + {Lo: 5082, Hi: 5086, Stride: 4}, + {Lo: 5087, Hi: 5090, Stride: 3}, + {Lo: 5094, Hi: 5095, Stride: 1}, + {Lo: 5102, Hi: 5107, Stride: 5}, + {Lo: 5108, Hi: 5120, Stride: 12}, + {Lo: 5167, Hi: 5171, Stride: 4}, + {Lo: 5176, Hi: 5194, Stride: 18}, + {Lo: 5196, Hi: 5229, Stride: 33}, + {Lo: 5231, Hi: 5234, Stride: 3}, + {Lo: 5261, Hi: 5290, Stride: 29}, + {Lo: 5311, Hi: 5441, Stride: 130}, + {Lo: 5500, Hi: 5501, Stride: 1}, + {Lo: 5511, Hi: 5551, Stride: 40}, + {Lo: 5556, Hi: 5573, Stride: 17}, + {Lo: 5598, Hi: 5610, Stride: 12}, + {Lo: 5616, Hi: 5623, Stride: 7}, + {Lo: 5741, Hi: 5742, Stride: 1}, + {Lo: 5760, Hi: 5810, Stride: 50}, + {Lo: 5815, Hi: 5825, Stride: 10}, + {Lo: 5836, Hi: 5845, Stride: 9}, + {Lo: 5846, Hi: 5868, Stride: 22}, + {Lo: 5869, Hi: 5941, Stride: 72}, + {Lo: 6147, Hi: 6153, Stride: 6}, + {Lo: 7428, Hi: 7439, Stride: 11}, + {Lo: 7441, Hi: 7452, Stride: 11}, + {Lo: 7456, Hi: 7458, Stride: 1}, + {Lo: 7462, Hi: 7555, Stride: 93}, + {Lo: 7564, Hi: 7837, Stride: 273}, + {Lo: 7935, Hi: 8125, Stride: 190}, + {Lo: 8126, Hi: 8128, Stride: 1}, + {Lo: 8175, Hi: 8189, Stride: 14}, + {Lo: 8190, Hi: 8192, Stride: 2}, + {Lo: 8193, Hi: 8202, Stride: 1}, + {Lo: 8208, Hi: 8210, Stride: 1}, + {Lo: 8218, Hi: 8219, Stride: 1}, + {Lo: 8228, Hi: 8232, Stride: 4}, + {Lo: 8233, Hi: 8239, Stride: 6}, + {Lo: 8242, Hi: 8249, Stride: 7}, + {Lo: 8250, Hi: 8257, Stride: 7}, + {Lo: 8259, Hi: 8260, Stride: 1}, + {Lo: 8270, Hi: 8275, Stride: 5}, + {Lo: 8282, Hi: 8287, Stride: 5}, + {Lo: 8450, Hi: 8458, Stride: 8}, + {Lo: 8459, Hi: 8462, Stride: 1}, + {Lo: 8464, Hi: 8467, Stride: 1}, + {Lo: 8469, Hi: 8473, Stride: 4}, + {Lo: 8474, Hi: 8477, Stride: 1}, + {Lo: 8484, Hi: 8488, Stride: 4}, + {Lo: 8490, Hi: 8492, Stride: 2}, + {Lo: 8493, Hi: 8497, Stride: 1}, + {Lo: 8499, Hi: 8500, Stride: 1}, + {Lo: 8505, Hi: 8509, Stride: 4}, + {Lo: 8517, Hi: 8521, Stride: 1}, + {Lo: 8544, Hi: 8548, Stride: 4}, + {Lo: 8553, Hi: 8556, Stride: 3}, + {Lo: 8557, Hi: 8560, Stride: 1}, + {Lo: 8564, Hi: 8569, Stride: 5}, + {Lo: 8572, Hi: 8574, Stride: 1}, + {Lo: 8722, Hi: 8725, Stride: 3}, + {Lo: 8726, Hi: 8727, Stride: 1}, + {Lo: 8739, Hi: 8744, Stride: 5}, + {Lo: 8746, Hi: 8758, Stride: 12}, + {Lo: 8764, Hi: 8868, Stride: 104}, + {Lo: 8897, Hi: 8899, Stride: 2}, + {Lo: 8959, Hi: 9075, Stride: 116}, + {Lo: 9076, Hi: 9082, Stride: 6}, + {Lo: 9213, Hi: 9585, Stride: 372}, + {Lo: 9587, Hi: 10088, Stride: 501}, + {Lo: 10089, Hi: 10094, Stride: 5}, + {Lo: 10095, Hi: 10098, Stride: 3}, + {Lo: 10099, Hi: 10101, Stride: 1}, + {Lo: 10133, Hi: 10134, Stride: 1}, + {Lo: 10187, Hi: 10189, Stride: 2}, + {Lo: 10201, Hi: 10539, Stride: 338}, + {Lo: 10540, Hi: 10741, Stride: 201}, + {Lo: 10744, Hi: 10745, Stride: 1}, + {Lo: 10799, Hi: 11397, Stride: 598}, + {Lo: 11406, Hi: 11410, Stride: 4}, + {Lo: 11412, Hi: 11416, Stride: 4}, + {Lo: 11418, Hi: 11422, Stride: 4}, + {Lo: 11423, Hi: 11426, Stride: 3}, + {Lo: 11427, Hi: 11430, Stride: 1}, + {Lo: 11432, Hi: 11436, Stride: 4}, + {Lo: 11450, Hi: 11462, Stride: 12}, + {Lo: 11466, Hi: 11468, Stride: 2}, + {Lo: 11472, Hi: 11474, Stride: 2}, + {Lo: 11576, Hi: 11577, Stride: 1}, + {Lo: 11599, Hi: 11601, Stride: 2}, + {Lo: 11604, Hi: 11605, Stride: 1}, + {Lo: 11613, Hi: 11840, Stride: 227}, + {Lo: 12034, Hi: 12035, Stride: 1}, + {Lo: 12295, Hi: 12308, Stride: 13}, + {Lo: 12309, Hi: 12339, Stride: 30}, + {Lo: 12448, Hi: 12755, Stride: 307}, + {Lo: 12756, Hi: 20022, Stride: 7266}, + {Lo: 20031, Hi: 42192, Stride: 22161}, + {Lo: 42193, Hi: 42196, Stride: 1}, + {Lo: 42198, Hi: 42199, Stride: 1}, + {Lo: 42201, Hi: 42202, Stride: 1}, + {Lo: 42204, Hi: 42205, Stride: 1}, + {Lo: 42207, Hi: 42211, Stride: 1}, + {Lo: 42214, Hi: 42215, Stride: 1}, + {Lo: 42218, Hi: 42220, Stride: 1}, + {Lo: 42222, Hi: 42226, Stride: 2}, + {Lo: 42227, Hi: 42228, Stride: 1}, + {Lo: 42232, Hi: 42233, Stride: 1}, + {Lo: 42237, Hi: 42239, Stride: 2}, + {Lo: 42510, Hi: 42564, Stride: 54}, + {Lo: 42567, Hi: 42719, Stride: 152}, + {Lo: 42731, Hi: 42735, Stride: 4}, + {Lo: 42801, Hi: 42842, Stride: 41}, + {Lo: 42858, Hi: 42862, Stride: 4}, + {Lo: 42872, Hi: 42889, Stride: 17}, + {Lo: 42892, Hi: 42904, Stride: 12}, + {Lo: 42905, Hi: 42911, Stride: 6}, + {Lo: 42923, Hi: 42930, Stride: 7}, + {Lo: 42931, Hi: 42932, Stride: 1}, + {Lo: 43826, Hi: 43829, Stride: 3}, + {Lo: 43837, Hi: 43847, Stride: 10}, + {Lo: 43848, Hi: 43854, Stride: 6}, + {Lo: 43858, Hi: 43866, Stride: 8}, + {Lo: 43893, Hi: 43905, Stride: 12}, + {Lo: 43907, Hi: 43923, Stride: 16}, + {Lo: 43945, Hi: 43946, Stride: 1}, + {Lo: 43951, Hi: 64422, Stride: 20471}, + {Lo: 64423, Hi: 64429, Stride: 1}, + {Lo: 64830, Hi: 64831, Stride: 1}, + {Lo: 65072, Hi: 65101, Stride: 29}, + {Lo: 65102, Hi: 65103, Stride: 1}, + {Lo: 65112, Hi: 65128, Stride: 16}, + {Lo: 65165, Hi: 65166, Stride: 1}, + {Lo: 65257, Hi: 65260, Stride: 1}, + {Lo: 65282, Hi: 65284, Stride: 2}, + {Lo: 65285, Hi: 65287, Stride: 1}, + {Lo: 65290, Hi: 65291, Stride: 1}, + {Lo: 65293, Hi: 65305, Stride: 1}, + {Lo: 65308, Hi: 65310, Stride: 1}, + {Lo: 65312, Hi: 65373, Stride: 1}, + {Lo: 65512, Hi: 65512, Stride: 1}, + }, + R32: []unicode.Range32{ + {Lo: 66178, Hi: 66182, Stride: 4}, + {Lo: 66183, Hi: 66186, Stride: 3}, + {Lo: 66192, Hi: 66194, Stride: 2}, + {Lo: 66197, Hi: 66199, Stride: 1}, + {Lo: 66203, Hi: 66208, Stride: 5}, + {Lo: 66209, Hi: 66210, Stride: 1}, + {Lo: 66213, Hi: 66219, Stride: 6}, + {Lo: 66224, Hi: 66226, Stride: 1}, + {Lo: 66228, Hi: 66255, Stride: 27}, + {Lo: 66293, Hi: 66305, Stride: 12}, + {Lo: 66306, Hi: 66313, Stride: 7}, + {Lo: 66321, Hi: 66325, Stride: 4}, + {Lo: 66327, Hi: 66330, Stride: 3}, + {Lo: 66335, Hi: 66336, Stride: 1}, + {Lo: 66338, Hi: 66564, Stride: 226}, + {Lo: 66581, Hi: 66587, Stride: 6}, + {Lo: 66592, Hi: 66604, Stride: 12}, + {Lo: 66621, Hi: 66632, Stride: 11}, + {Lo: 66740, Hi: 66754, Stride: 14}, + {Lo: 66766, Hi: 66770, Stride: 4}, + {Lo: 66794, Hi: 66806, Stride: 12}, + {Lo: 66835, Hi: 66838, Stride: 3}, + {Lo: 66840, Hi: 66844, Stride: 4}, + {Lo: 66845, Hi: 66853, Stride: 8}, + {Lo: 66854, Hi: 66855, Stride: 1}, + {Lo: 68176, Hi: 70864, Stride: 2688}, + {Lo: 71430, Hi: 71438, Stride: 4}, + {Lo: 71439, Hi: 71840, Stride: 401}, + {Lo: 71842, Hi: 71844, Stride: 1}, + {Lo: 71846, Hi: 71852, Stride: 3}, + {Lo: 71854, Hi: 71855, Stride: 1}, + {Lo: 71858, Hi: 71867, Stride: 3}, + {Lo: 71868, Hi: 71872, Stride: 4}, + {Lo: 71873, Hi: 71876, Stride: 1}, + {Lo: 71878, Hi: 71884, Stride: 2}, + {Lo: 71893, Hi: 71896, Stride: 1}, + {Lo: 71900, Hi: 71904, Stride: 4}, + {Lo: 71909, Hi: 71910, Stride: 1}, + {Lo: 71913, Hi: 71922, Stride: 3}, + {Lo: 93960, Hi: 93962, Stride: 2}, + {Lo: 93974, Hi: 93992, Stride: 18}, + {Lo: 94005, Hi: 94010, Stride: 5}, + {Lo: 94011, Hi: 94015, Stride: 4}, + {Lo: 94016, Hi: 94018, Stride: 2}, + {Lo: 94019, Hi: 94033, Stride: 14}, + {Lo: 94034, Hi: 119060, Stride: 25026}, + {Lo: 119149, Hi: 119302, Stride: 153}, + {Lo: 119309, Hi: 119311, Stride: 2}, + {Lo: 119314, Hi: 119315, Stride: 1}, + {Lo: 119318, Hi: 119338, Stride: 20}, + {Lo: 119350, Hi: 119351, Stride: 1}, + {Lo: 119354, Hi: 119355, Stride: 1}, + {Lo: 119808, Hi: 119845, Stride: 1}, + {Lo: 119847, Hi: 119892, Stride: 1}, + {Lo: 119894, Hi: 119897, Stride: 1}, + {Lo: 119899, Hi: 119949, Stride: 1}, + {Lo: 119951, Hi: 119964, Stride: 1}, + {Lo: 119966, Hi: 119967, Stride: 1}, + {Lo: 119970, Hi: 119973, Stride: 3}, + {Lo: 119974, Hi: 119977, Stride: 3}, + {Lo: 119978, Hi: 119980, Stride: 1}, + {Lo: 119982, Hi: 119993, Stride: 1}, + {Lo: 119995, Hi: 119997, Stride: 2}, + {Lo: 119998, Hi: 120001, Stride: 1}, + {Lo: 120003, Hi: 120005, Stride: 2}, + {Lo: 120006, Hi: 120053, Stride: 1}, + {Lo: 120055, Hi: 120069, Stride: 1}, + {Lo: 120071, Hi: 120074, Stride: 1}, + {Lo: 120077, Hi: 120084, Stride: 1}, + {Lo: 120086, Hi: 120092, Stride: 1}, + {Lo: 120094, Hi: 120105, Stride: 1}, + {Lo: 120107, Hi: 120121, Stride: 1}, + {Lo: 120123, Hi: 120126, Stride: 1}, + {Lo: 120128, Hi: 120132, Stride: 1}, + {Lo: 120134, Hi: 120138, Stride: 4}, + {Lo: 120139, Hi: 120144, Stride: 1}, + {Lo: 120146, Hi: 120157, Stride: 1}, + {Lo: 120159, Hi: 120209, Stride: 1}, + {Lo: 120211, Hi: 120261, Stride: 1}, + {Lo: 120263, Hi: 120313, Stride: 1}, + {Lo: 120315, Hi: 120365, Stride: 1}, + {Lo: 120367, Hi: 120417, Stride: 1}, + {Lo: 120419, Hi: 120469, Stride: 1}, + {Lo: 120471, Hi: 120484, Stride: 1}, + {Lo: 120488, Hi: 120489, Stride: 1}, + {Lo: 120492, Hi: 120494, Stride: 1}, + {Lo: 120496, Hi: 120497, Stride: 1}, + {Lo: 120499, Hi: 120500, Stride: 1}, + {Lo: 120502, Hi: 120504, Stride: 2}, + {Lo: 120507, Hi: 120508, Stride: 1}, + {Lo: 120510, Hi: 120514, Stride: 4}, + {Lo: 120516, Hi: 120522, Stride: 6}, + {Lo: 120526, Hi: 120534, Stride: 2}, + {Lo: 120544, Hi: 120546, Stride: 2}, + {Lo: 120547, Hi: 120550, Stride: 3}, + {Lo: 120551, Hi: 120552, Stride: 1}, + {Lo: 120554, Hi: 120555, Stride: 1}, + {Lo: 120557, Hi: 120558, Stride: 1}, + {Lo: 120560, Hi: 120562, Stride: 2}, + {Lo: 120565, Hi: 120566, Stride: 1}, + {Lo: 120568, Hi: 120572, Stride: 4}, + {Lo: 120574, Hi: 120580, Stride: 6}, + {Lo: 120584, Hi: 120592, Stride: 2}, + {Lo: 120602, Hi: 120604, Stride: 2}, + {Lo: 120605, Hi: 120608, Stride: 3}, + {Lo: 120609, Hi: 120610, Stride: 1}, + {Lo: 120612, Hi: 120613, Stride: 1}, + {Lo: 120615, Hi: 120616, Stride: 1}, + {Lo: 120618, Hi: 120620, Stride: 2}, + {Lo: 120623, Hi: 120624, Stride: 1}, + {Lo: 120626, Hi: 120630, Stride: 4}, + {Lo: 120632, Hi: 120638, Stride: 6}, + {Lo: 120642, Hi: 120650, Stride: 2}, + {Lo: 120660, Hi: 120662, Stride: 2}, + {Lo: 120663, Hi: 120666, Stride: 3}, + {Lo: 120667, Hi: 120668, Stride: 1}, + {Lo: 120670, Hi: 120671, Stride: 1}, + {Lo: 120673, Hi: 120674, Stride: 1}, + {Lo: 120676, Hi: 120678, Stride: 2}, + {Lo: 120681, Hi: 120682, Stride: 1}, + {Lo: 120684, Hi: 120688, Stride: 4}, + {Lo: 120690, Hi: 120696, Stride: 6}, + {Lo: 120700, Hi: 120708, Stride: 2}, + {Lo: 120718, Hi: 120720, Stride: 2}, + {Lo: 120721, Hi: 120724, Stride: 3}, + {Lo: 120725, Hi: 120726, Stride: 1}, + {Lo: 120728, Hi: 120729, Stride: 1}, + {Lo: 120731, Hi: 120732, Stride: 1}, + {Lo: 120734, Hi: 120736, Stride: 2}, + {Lo: 120739, Hi: 120740, Stride: 1}, + {Lo: 120742, Hi: 120746, Stride: 4}, + {Lo: 120748, Hi: 120754, Stride: 6}, + {Lo: 120758, Hi: 120766, Stride: 2}, + {Lo: 120776, Hi: 120778, Stride: 2}, + {Lo: 120782, Hi: 120831, Stride: 1}, + {Lo: 125127, Hi: 125131, Stride: 4}, + {Lo: 126464, Hi: 126500, Stride: 36}, + {Lo: 126564, Hi: 126592, Stride: 28}, + {Lo: 126596, Hi: 128844, Stride: 2248}, + {Lo: 128872, Hi: 130032, Stride: 1160}, + {Lo: 130033, Hi: 130041, Stride: 1}, + }, + LatinOffset: 0, + }, + }, + "_default": { + Confusable: []rune{160, 180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 8216, 8217, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{32, 96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 96, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "_default", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 160, Hi: 180, Stride: 20}, + {Lo: 215, Hi: 305, Stride: 90}, + {Lo: 921, Hi: 1009, Stride: 88}, + {Lo: 1040, Hi: 1042, Stride: 2}, + {Lo: 1045, Hi: 1047, Stride: 2}, + {Lo: 1050, Hi: 1052, Stride: 2}, + {Lo: 1053, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 8216, Stride: 5}, + {Lo: 8217, Hi: 8245, Stride: 28}, + {Lo: 12494, Hi: 65281, Stride: 52787}, + {Lo: 65283, Hi: 65288, Stride: 5}, + {Lo: 65289, Hi: 65292, Stride: 3}, + {Lo: 65306, Hi: 65307, Stride: 1}, + {Lo: 65311, Hi: 65374, Stride: 63}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "cs": { + Confusable: []rune{180, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8216, 8217, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{96, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 96, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "cs", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 305, Stride: 125}, + {Lo: 921, Hi: 1009, Stride: 88}, + {Lo: 1040, Hi: 1042, Stride: 2}, + {Lo: 1045, Hi: 1047, Stride: 2}, + {Lo: 1050, Hi: 1052, Stride: 2}, + {Lo: 1053, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8216, Hi: 8217, Stride: 1}, + {Lo: 8245, Hi: 12494, Stride: 4249}, + {Lo: 65281, Hi: 65283, Stride: 2}, + {Lo: 65288, Hi: 65289, Stride: 1}, + {Lo: 65292, Hi: 65306, Stride: 14}, + {Lo: 65307, Hi: 65311, Stride: 4}, + {Lo: 65374, Hi: 65374, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 0, + }, + }, + "de": { + Confusable: []rune{180, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8216, 8217, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{96, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 96, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "de", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 305, Stride: 125}, + {Lo: 921, Hi: 1009, Stride: 88}, + {Lo: 1040, Hi: 1042, Stride: 2}, + {Lo: 1045, Hi: 1047, Stride: 2}, + {Lo: 1050, Hi: 1052, Stride: 2}, + {Lo: 1053, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8216, Hi: 8217, Stride: 1}, + {Lo: 8245, Hi: 12494, Stride: 4249}, + {Lo: 65281, Hi: 65283, Stride: 2}, + {Lo: 65288, Hi: 65289, Stride: 1}, + {Lo: 65292, Hi: 65306, Stride: 14}, + {Lo: 65307, Hi: 65311, Stride: 4}, + {Lo: 65374, Hi: 65374, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 0, + }, + }, + "es": { + Confusable: []rune{180, 215, 305, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{96, 120, 105, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "es", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 1009, Stride: 704}, + {Lo: 1040, Hi: 1042, Stride: 2}, + {Lo: 1045, Hi: 1047, Stride: 2}, + {Lo: 1050, Hi: 1052, Stride: 2}, + {Lo: 1053, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 8245, Stride: 34}, + {Lo: 12494, Hi: 65281, Stride: 52787}, + {Lo: 65283, Hi: 65288, Stride: 5}, + {Lo: 65289, Hi: 65292, Stride: 3}, + {Lo: 65306, Hi: 65307, Stride: 1}, + {Lo: 65311, Hi: 65374, Stride: 63}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "fr": { + Confusable: []rune{215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8216, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "fr", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 215, Hi: 305, Stride: 90}, + {Lo: 921, Hi: 1009, Stride: 88}, + {Lo: 1040, Hi: 1042, Stride: 2}, + {Lo: 1045, Hi: 1047, Stride: 2}, + {Lo: 1050, Hi: 1052, Stride: 2}, + {Lo: 1053, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8216, Hi: 8245, Stride: 29}, + {Lo: 12494, Hi: 65281, Stride: 52787}, + {Lo: 65283, Hi: 65288, Stride: 5}, + {Lo: 65289, Hi: 65292, Stride: 3}, + {Lo: 65306, Hi: 65307, Stride: 1}, + {Lo: 65311, Hi: 65374, Stride: 63}, + }, + R32: []unicode.Range32{}, + LatinOffset: 0, + }, + }, + "it": { + Confusable: []rune{160, 180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 8216, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{32, 96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "it", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 160, Hi: 180, Stride: 20}, + {Lo: 215, Hi: 305, Stride: 90}, + {Lo: 921, Hi: 1009, Stride: 88}, + {Lo: 1040, Hi: 1042, Stride: 2}, + {Lo: 1045, Hi: 1047, Stride: 2}, + {Lo: 1050, Hi: 1052, Stride: 2}, + {Lo: 1053, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 8216, Stride: 5}, + {Lo: 8245, Hi: 12494, Stride: 4249}, + {Lo: 65281, Hi: 65283, Stride: 2}, + {Lo: 65288, Hi: 65289, Stride: 1}, + {Lo: 65292, Hi: 65306, Stride: 14}, + {Lo: 65307, Hi: 65311, Stride: 4}, + {Lo: 65374, Hi: 65374, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "ja": { + Confusable: []rune{180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 8216, 8217, 8245, 65281, 65283, 65292, 65306, 65307}, + With: []rune{96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 96, 96, 96, 33, 35, 44, 58, 59}, + Locale: "ja", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 921, Stride: 616}, + {Lo: 1009, Hi: 1040, Stride: 31}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 8216, Stride: 5}, + {Lo: 8217, Hi: 8245, Stride: 28}, + {Lo: 65281, Hi: 65283, Stride: 2}, + {Lo: 65292, Hi: 65306, Stride: 14}, + {Lo: 65307, Hi: 65307, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "ko": { + Confusable: []rune{180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "ko", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 921, Stride: 616}, + {Lo: 1009, Hi: 1040, Stride: 31}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 8245, Stride: 34}, + {Lo: 12494, Hi: 65281, Stride: 52787}, + {Lo: 65283, Hi: 65288, Stride: 5}, + {Lo: 65289, Hi: 65292, Stride: 3}, + {Lo: 65306, Hi: 65307, Stride: 1}, + {Lo: 65311, Hi: 65374, Stride: 63}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "pl": { + Confusable: []rune{180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8216, 8217, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 96, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "pl", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 921, Stride: 616}, + {Lo: 1009, Hi: 1040, Stride: 31}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8216, Hi: 8217, Stride: 1}, + {Lo: 8245, Hi: 12494, Stride: 4249}, + {Lo: 65281, Hi: 65283, Stride: 2}, + {Lo: 65288, Hi: 65289, Stride: 1}, + {Lo: 65292, Hi: 65306, Stride: 14}, + {Lo: 65307, Hi: 65311, Stride: 4}, + {Lo: 65374, Hi: 65374, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "pt-BR": { + Confusable: []rune{180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8216, 8217, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 96, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "pt-BR", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 921, Stride: 616}, + {Lo: 1009, Hi: 1040, Stride: 31}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8216, Hi: 8217, Stride: 1}, + {Lo: 8245, Hi: 12494, Stride: 4249}, + {Lo: 65281, Hi: 65283, Stride: 2}, + {Lo: 65288, Hi: 65289, Stride: 1}, + {Lo: 65292, Hi: 65306, Stride: 14}, + {Lo: 65307, Hi: 65311, Stride: 4}, + {Lo: 65374, Hi: 65374, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "qps-ploc": { + Confusable: []rune{160, 180, 215, 305, 921, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 8216, 8217, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{32, 96, 120, 105, 73, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 96, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "qps-ploc", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 160, Hi: 180, Stride: 20}, + {Lo: 215, Hi: 305, Stride: 90}, + {Lo: 921, Hi: 1040, Stride: 119}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 8216, Stride: 5}, + {Lo: 8217, Hi: 8245, Stride: 28}, + {Lo: 12494, Hi: 65281, Stride: 52787}, + {Lo: 65283, Hi: 65288, Stride: 5}, + {Lo: 65289, Hi: 65292, Stride: 3}, + {Lo: 65306, Hi: 65307, Stride: 1}, + {Lo: 65311, Hi: 65374, Stride: 63}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "ru": { + Confusable: []rune{180, 215, 305, 921, 1009, 8216, 8217, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{96, 120, 105, 73, 112, 96, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "ru", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 921, Stride: 616}, + {Lo: 1009, Hi: 8216, Stride: 7207}, + {Lo: 8217, Hi: 8245, Stride: 28}, + {Lo: 12494, Hi: 65281, Stride: 52787}, + {Lo: 65283, Hi: 65288, Stride: 5}, + {Lo: 65289, Hi: 65292, Stride: 3}, + {Lo: 65306, Hi: 65307, Stride: 1}, + {Lo: 65311, Hi: 65374, Stride: 63}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "tr": { + Confusable: []rune{160, 180, 215, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{32, 96, 120, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "tr", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 160, Hi: 180, Stride: 20}, + {Lo: 215, Hi: 921, Stride: 706}, + {Lo: 1009, Hi: 1040, Stride: 31}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 8245, Stride: 34}, + {Lo: 12494, Hi: 65281, Stride: 52787}, + {Lo: 65283, Hi: 65288, Stride: 5}, + {Lo: 65289, Hi: 65292, Stride: 3}, + {Lo: 65306, Hi: 65307, Stride: 1}, + {Lo: 65311, Hi: 65374, Stride: 63}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "zh-hans": { + Confusable: []rune{180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8245, 12494, 65281, 65288, 65289, 65306, 65374}, + With: []rune{96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 96, 47, 33, 40, 41, 58, 126}, + Locale: "zh-hans", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 921, Stride: 616}, + {Lo: 1009, Hi: 1040, Stride: 31}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8245, Hi: 12494, Stride: 4249}, + {Lo: 65281, Hi: 65288, Stride: 7}, + {Lo: 65289, Hi: 65306, Stride: 17}, + {Lo: 65374, Hi: 65374, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "zh-hant": { + Confusable: []rune{180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 12494, 65283, 65307, 65374}, + With: []rune{96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 47, 35, 59, 126}, + Locale: "zh-hant", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 921, Stride: 616}, + {Lo: 1009, Hi: 1040, Stride: 31}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 12494, Stride: 4283}, + {Lo: 65283, Hi: 65307, Stride: 24}, + {Lo: 65374, Hi: 65374, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, +} diff --git a/modules/charset/ambiguous_gen_test.go b/modules/charset/ambiguous_gen_test.go new file mode 100644 index 000000000000..bd64e1c5b1c9 --- /dev/null +++ b/modules/charset/ambiguous_gen_test.go @@ -0,0 +1,32 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import ( + "sort" + "testing" + "unicode" + + "github.com/stretchr/testify/assert" +) + +func TestAmbiguousCharacters(t *testing.T) { + for locale, ambiguous := range AmbiguousCharacters { + assert.Equal(t, locale, ambiguous.Locale) + assert.Equal(t, len(ambiguous.Confusable), len(ambiguous.With)) + assert.True(t, sort.SliceIsSorted(ambiguous.Confusable, func(i, j int) bool { + return ambiguous.Confusable[i] < ambiguous.Confusable[j] + })) + + for _, confusable := range ambiguous.Confusable { + assert.True(t, unicode.Is(ambiguous.RangeTable, confusable)) + i := sort.Search(len(ambiguous.Confusable), func(j int) bool { + return ambiguous.Confusable[j] >= confusable + }) + found := i < len(ambiguous.Confusable) && ambiguous.Confusable[i] == confusable + assert.True(t, found, "%c is not in %d", confusable, i) + } + } +} diff --git a/modules/charset/breakwriter.go b/modules/charset/breakwriter.go new file mode 100644 index 000000000000..619826ff21b1 --- /dev/null +++ b/modules/charset/breakwriter.go @@ -0,0 +1,44 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import ( + "bytes" + "io" +) + +// BreakWriter wraps an io.Writer to always write '\n' as '
' +type BreakWriter struct { + io.Writer +} + +// Write writes the provided byte slice transparently replacing '\n' with '
' +func (b *BreakWriter) Write(bs []byte) (n int, err error) { + pos := 0 + for pos < len(bs) { + idx := bytes.IndexByte(bs[pos:], '\n') + if idx < 0 { + wn, err := b.Writer.Write(bs[pos:]) + return n + wn, err + } + + if idx > 0 { + wn, err := b.Writer.Write(bs[pos : pos+idx]) + n += wn + if err != nil { + return n, err + } + } + + if _, err = b.Writer.Write([]byte("
")); err != nil { + return n, err + } + pos += idx + 1 + + n++ + } + + return n, err +} diff --git a/modules/charset/breakwriter_test.go b/modules/charset/breakwriter_test.go new file mode 100644 index 000000000000..6bbed42ea54c --- /dev/null +++ b/modules/charset/breakwriter_test.go @@ -0,0 +1,69 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import ( + "strings" + "testing" +) + +func TestBreakWriter_Write(t *testing.T) { + tests := []struct { + name string + kase string + expect string + wantErr bool + }{ + { + name: "noline", + kase: "abcdefghijklmnopqrstuvwxyz", + expect: "abcdefghijklmnopqrstuvwxyz", + }, + { + name: "endline", + kase: "abcdefghijklmnopqrstuvwxyz\n", + expect: "abcdefghijklmnopqrstuvwxyz
", + }, + { + name: "startline", + kase: "\nabcdefghijklmnopqrstuvwxyz", + expect: "
abcdefghijklmnopqrstuvwxyz", + }, + { + name: "onlyline", + kase: "\n\n\n", + expect: "


", + }, + { + name: "empty", + kase: "", + expect: "", + }, + { + name: "midline", + kase: "\nabc\ndefghijkl\nmnopqrstuvwxy\nz", + expect: "
abc
defghijkl
mnopqrstuvwxy
z", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + buf := &strings.Builder{} + b := &BreakWriter{ + Writer: buf, + } + n, err := b.Write([]byte(tt.kase)) + if (err != nil) != tt.wantErr { + t.Errorf("BreakWriter.Write() error = %v, wantErr %v", err, tt.wantErr) + return + } + if n != len(tt.kase) { + t.Errorf("BreakWriter.Write() = %v, want %v", n, len(tt.kase)) + } + if buf.String() != tt.expect { + t.Errorf("BreakWriter.Write() wrote %q, want %v", buf.String(), tt.expect) + } + }) + } +} diff --git a/modules/charset/escape.go b/modules/charset/escape.go index 9c1baafba326..b264a569ff5e 100644 --- a/modules/charset/escape.go +++ b/modules/charset/escape.go @@ -1,236 +1,58 @@ -// Copyright 2021 The Gitea Authors. All rights reserved. +// Copyright 2022 The Gitea Authors. All rights reserved. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. +//go:generate go run invisible/generate.go -v -o ./invisible_gen.go + +//go:generate go run ambiguous/generate.go -v -o ./ambiguous_gen.go ambiguous/ambiguous.json + package charset import ( - "bytes" - "fmt" "io" "strings" - "unicode" - "unicode/utf8" - "golang.org/x/text/unicode/bidi" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/translation" ) -// EscapeStatus represents the findings of the unicode escaper -type EscapeStatus struct { - Escaped bool - HasError bool - HasBadRunes bool - HasControls bool - HasSpaces bool - HasMarks bool - HasBIDI bool - BadBIDI bool - HasRTLScript bool - HasLTRScript bool +// RuneNBSP is the codepoint for NBSP +const RuneNBSP = 0xa0 + +// EscapeControlHTML escapes the unicode control sequences in a provided html document +func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) { + sb := &strings.Builder{} + outputStream := &HTMLStreamerWriter{Writer: sb} + streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) + + if err := StreamHTML(strings.NewReader(text), streamer); err != nil { + streamer.escaped.HasError = true + log.Error("Error whilst escaping: %v", err) + } + return streamer.escaped, sb.String() } -// Or combines two EscapeStatus structs into one representing the conjunction of the two -func (status EscapeStatus) Or(other EscapeStatus) EscapeStatus { - st := status - st.Escaped = st.Escaped || other.Escaped - st.HasError = st.HasError || other.HasError - st.HasBadRunes = st.HasBadRunes || other.HasBadRunes - st.HasControls = st.HasControls || other.HasControls - st.HasSpaces = st.HasSpaces || other.HasSpaces - st.HasMarks = st.HasMarks || other.HasMarks - st.HasBIDI = st.HasBIDI || other.HasBIDI - st.BadBIDI = st.BadBIDI || other.BadBIDI - st.HasRTLScript = st.HasRTLScript || other.HasRTLScript - st.HasLTRScript = st.HasLTRScript || other.HasLTRScript - return st +// EscapeControlReaders escapes the unicode control sequences in a provider reader and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte +func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) { + outputStream := &HTMLStreamerWriter{Writer: writer} + streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) + + if err = StreamHTML(reader, streamer); err != nil { + streamer.escaped.HasError = true + log.Error("Error whilst escaping: %v", err) + } + return streamer.escaped, err } // EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string -func EscapeControlString(text string) (EscapeStatus, string) { +func EscapeControlString(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) { sb := &strings.Builder{} - escaped, _ := EscapeControlReader(strings.NewReader(text), sb) - return escaped, sb.String() -} + outputStream := &HTMLStreamerWriter{Writer: sb} + streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) -// EscapeControlBytes escapes the unicode control sequences a provided []byte and returns the findings as an EscapeStatus and the escaped []byte -func EscapeControlBytes(text []byte) (EscapeStatus, []byte) { - buf := &bytes.Buffer{} - escaped, _ := EscapeControlReader(bytes.NewReader(text), buf) - return escaped, buf.Bytes() -} - -// EscapeControlReader escapes the unicode control sequences a provided Reader writing the escaped output to the output and returns the findings as an EscapeStatus and an error -func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) { - buf := make([]byte, 4096) - readStart := 0 - runeCount := 0 - var n int - var writePos int - - lineHasBIDI := false - lineHasRTLScript := false - lineHasLTRScript := false - -readingloop: - for err == nil { - n, err = text.Read(buf[readStart:]) - bs := buf[:n+readStart] - n = len(bs) - i := 0 - - for i < len(bs) { - r, size := utf8.DecodeRune(bs[i:]) - runeCount++ - - // Now handle the codepoints - switch { - case r == utf8.RuneError: - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - writePos = i - } - // runes can be at most 4 bytes - so... - if len(bs)-i <= 3 { - // if not request more data - copy(buf, bs[i:]) - readStart = n - i - writePos = 0 - continue readingloop - } - // this is a real broken rune - escaped.HasBadRunes = true - escaped.Escaped = true - if err = writeBroken(output, bs[i:i+size]); err != nil { - escaped.HasError = true - return - } - writePos += size - case r == '\n': - if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript { - escaped.BadBIDI = true - } - lineHasBIDI = false - lineHasRTLScript = false - lineHasLTRScript = false - - case runeCount == 1 && r == 0xFEFF: // UTF BOM - // the first BOM is safe - case r == '\r' || r == '\t' || r == ' ': - // These are acceptable control characters and space characters - case unicode.IsSpace(r): - escaped.HasSpaces = true - escaped.Escaped = true - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - } - if err = writeEscaped(output, r); err != nil { - escaped.HasError = true - return - } - writePos = i + size - case unicode.Is(unicode.Bidi_Control, r): - escaped.Escaped = true - escaped.HasBIDI = true - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - } - lineHasBIDI = true - if err = writeEscaped(output, r); err != nil { - escaped.HasError = true - return - } - writePos = i + size - case unicode.Is(unicode.C, r): - escaped.Escaped = true - escaped.HasControls = true - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - } - if err = writeEscaped(output, r); err != nil { - escaped.HasError = true - return - } - writePos = i + size - case unicode.Is(unicode.M, r): - escaped.Escaped = true - escaped.HasMarks = true - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - } - if err = writeEscaped(output, r); err != nil { - escaped.HasError = true - return - } - writePos = i + size - default: - p, _ := bidi.Lookup(bs[i : i+size]) - c := p.Class() - if c == bidi.R || c == bidi.AL { - lineHasRTLScript = true - escaped.HasRTLScript = true - } else if c == bidi.L { - lineHasLTRScript = true - escaped.HasLTRScript = true - } - } - i += size - } - if n > 0 { - // we read something... - // write everything unwritten - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - } - - // reset the starting positions for the next read - readStart = 0 - writePos = 0 - } + if err := streamer.Text(text); err != nil { + streamer.escaped.HasError = true + log.Error("Error whilst escaping: %v", err) } - if readStart > 0 { - // this means that there is an incomplete or broken rune at 0-readStart and we read nothing on the last go round - escaped.Escaped = true - escaped.HasBadRunes = true - if err = writeBroken(output, buf[:readStart]); err != nil { - escaped.HasError = true - return - } - } - if err == io.EOF { - if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript { - escaped.BadBIDI = true - } - err = nil - return - } - escaped.HasError = true - return escaped, err -} - -func writeBroken(output io.Writer, bs []byte) (err error) { - _, err = fmt.Fprintf(output, `<%X>`, bs) - return err -} - -func writeEscaped(output io.Writer, r rune) (err error) { - _, err = fmt.Fprintf(output, `%c`, r, r) - return err + return streamer.escaped, sb.String() } diff --git a/modules/charset/escape_status.go b/modules/charset/escape_status.go new file mode 100644 index 000000000000..7ff0ef112bb2 --- /dev/null +++ b/modules/charset/escape_status.go @@ -0,0 +1,28 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +// EscapeStatus represents the findings of the unicode escaper +type EscapeStatus struct { + Escaped bool + HasError bool + HasBadRunes bool + HasInvisible bool + HasAmbiguous bool +} + +// Or combines two EscapeStatus structs into one representing the conjunction of the two +func (status *EscapeStatus) Or(other *EscapeStatus) *EscapeStatus { + st := status + if status == nil { + st = &EscapeStatus{} + } + st.Escaped = st.Escaped || other.Escaped + st.HasError = st.HasError || other.HasError + st.HasBadRunes = st.HasBadRunes || other.HasBadRunes + st.HasAmbiguous = st.HasAmbiguous || other.HasAmbiguous + st.HasInvisible = st.HasInvisible || other.HasInvisible + return st +} diff --git a/modules/charset/escape_stream.go b/modules/charset/escape_stream.go new file mode 100644 index 000000000000..8c17136c9dc6 --- /dev/null +++ b/modules/charset/escape_stream.go @@ -0,0 +1,297 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import ( + "fmt" + "regexp" + "sort" + "strings" + "unicode" + "unicode/utf8" + + "code.gitea.io/gitea/modules/translation" + + "golang.org/x/net/html" +) + +// VScode defaultWordRegexp +var defaultWordRegexp = regexp.MustCompile(`(-?\d*\.\d\w*)|([^\` + "`" + `\~\!\@\#\$\%\^\&\*\(\)\-\=\+\[\{\]\}\\\|\;\:\'\"\,\.\<\>\/\?\s\x00-\x1f]+)`) + +func NewEscapeStreamer(locale translation.Locale, next HTMLStreamer, allowed ...rune) HTMLStreamer { + return &escapeStreamer{ + escaped: &EscapeStatus{}, + PassthroughHTMLStreamer: *NewPassthroughStreamer(next), + locale: locale, + ambiguousTables: AmbiguousTablesForLocale(locale), + allowed: allowed, + } +} + +type escapeStreamer struct { + PassthroughHTMLStreamer + escaped *EscapeStatus + locale translation.Locale + ambiguousTables []*AmbiguousTable + allowed []rune +} + +func (e *escapeStreamer) EscapeStatus() *EscapeStatus { + return e.escaped +} + +// Text tells the next streamer there is a text +func (e *escapeStreamer) Text(data string) error { + sb := &strings.Builder{} + pos, until, next := 0, 0, 0 + if len(data) > len(UTF8BOM) && data[:len(UTF8BOM)] == string(UTF8BOM) { + _, _ = sb.WriteString(data[:len(UTF8BOM)]) + pos = len(UTF8BOM) + } + for pos < len(data) { + nextIdxs := defaultWordRegexp.FindStringIndex(data[pos:]) + if nextIdxs == nil { + until = len(data) + next = until + } else { + until, next = nextIdxs[0]+pos, nextIdxs[1]+pos + } + + // from pos until until we know that the runes are not \r\t\n or even ' ' + runes := make([]rune, 0, next-until) + positions := make([]int, 0, next-until+1) + + for pos < until { + r, sz := utf8.DecodeRune([]byte(data)[pos:]) + positions = positions[:0] + positions = append(positions, pos, pos+sz) + types, confusables, _ := e.runeTypes(r) + if err := e.handleRunes(data, []rune{r}, positions, types, confusables, sb); err != nil { + return err + } + pos += sz + } + + for i := pos; i < next; { + r, sz := utf8.DecodeRune([]byte(data)[i:]) + runes = append(runes, r) + positions = append(positions, i) + i += sz + } + positions = append(positions, next) + types, confusables, runeCounts := e.runeTypes(runes...) + if runeCounts.needsEscape() { + if err := e.handleRunes(data, runes, positions, types, confusables, sb); err != nil { + return err + } + } else { + _, _ = sb.Write([]byte(data)[pos:next]) + } + pos = next + } + if sb.Len() > 0 { + if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil { + return err + } + } + return nil +} + +func (e *escapeStreamer) handleRunes(data string, runes []rune, positions []int, types []runeType, confusables []rune, sb *strings.Builder) error { + for i, r := range runes { + switch types[i] { + case brokenRuneType: + if sb.Len() > 0 { + if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil { + return err + } + sb.Reset() + } + end := positions[i+1] + start := positions[i] + if err := e.brokenRune([]byte(data)[start:end]); err != nil { + return err + } + case ambiguousRuneType: + if sb.Len() > 0 { + if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil { + return err + } + sb.Reset() + } + if err := e.ambiguousRune(r, confusables[0]); err != nil { + return err + } + confusables = confusables[1:] + case invisibleRuneType: + if sb.Len() > 0 { + if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil { + return err + } + sb.Reset() + } + if err := e.invisibleRune(r); err != nil { + return err + } + default: + _, _ = sb.WriteRune(r) + } + } + return nil +} + +func (e *escapeStreamer) brokenRune(bs []byte) error { + e.escaped.Escaped = true + e.escaped.HasBadRunes = true + + if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{ + Key: "class", + Val: "broken-code-point", + }); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.Text(fmt.Sprintf("<%X>", bs)); err != nil { + return err + } + + return e.PassthroughHTMLStreamer.EndTag("span") +} + +func (e *escapeStreamer) ambiguousRune(r, c rune) error { + e.escaped.Escaped = true + e.escaped.HasAmbiguous = true + + if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{ + Key: "class", + Val: "ambiguous-code-point tooltip", + }, html.Attribute{ + Key: "data-content", + Val: e.locale.Tr("repo.ambiguous_character", r, c), + }); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{ + Key: "class", + Val: "char", + }); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.Text(string(r)); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil { + return err + } + + return e.PassthroughHTMLStreamer.EndTag("span") +} + +func (e *escapeStreamer) invisibleRune(r rune) error { + e.escaped.Escaped = true + e.escaped.HasInvisible = true + + if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{ + Key: "class", + Val: "escaped-code-point", + }, html.Attribute{ + Key: "data-escaped", + Val: fmt.Sprintf("[U+%04X]", r), + }); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{ + Key: "class", + Val: "char", + }); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.Text(string(r)); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil { + return err + } + + return e.PassthroughHTMLStreamer.EndTag("span") +} + +type runeCountType struct { + numBasicRunes int + numNonConfusingNonBasicRunes int + numAmbiguousRunes int + numInvisibleRunes int + numBrokenRunes int +} + +func (counts runeCountType) needsEscape() bool { + if counts.numBrokenRunes > 0 { + return true + } + if counts.numBasicRunes == 0 && + counts.numNonConfusingNonBasicRunes > 0 { + return false + } + return counts.numAmbiguousRunes > 0 || counts.numInvisibleRunes > 0 +} + +type runeType int + +const ( + basicASCIIRuneType runeType = iota //nolint // <- This is technically deadcode but its self-documenting so it should stay + brokenRuneType + nonBasicASCIIRuneType + ambiguousRuneType + invisibleRuneType +) + +func (e *escapeStreamer) runeTypes(runes ...rune) (types []runeType, confusables []rune, runeCounts runeCountType) { + types = make([]runeType, len(runes)) + for i, r := range runes { + var confusable rune + switch { + case r == utf8.RuneError: + types[i] = brokenRuneType + runeCounts.numBrokenRunes++ + case r == ' ' || r == '\t' || r == '\n': + runeCounts.numBasicRunes++ + case e.isAllowed(r): + if r > 0x7e || r < 0x20 { + types[i] = nonBasicASCIIRuneType + runeCounts.numNonConfusingNonBasicRunes++ + } else { + runeCounts.numBasicRunes++ + } + case unicode.Is(InvisibleRanges, r): + types[i] = invisibleRuneType + runeCounts.numInvisibleRunes++ + case unicode.IsControl(r): + types[i] = invisibleRuneType + runeCounts.numInvisibleRunes++ + case isAmbiguous(r, &confusable, e.ambiguousTables...): + confusables = append(confusables, confusable) + types[i] = ambiguousRuneType + runeCounts.numAmbiguousRunes++ + case r > 0x7e || r < 0x20: + types[i] = nonBasicASCIIRuneType + runeCounts.numNonConfusingNonBasicRunes++ + default: + runeCounts.numBasicRunes++ + } + } + return types, confusables, runeCounts +} + +func (e *escapeStreamer) isAllowed(r rune) bool { + if len(e.allowed) == 0 { + return false + } + if len(e.allowed) == 1 { + return e.allowed[0] == r + } + + return sort.Search(len(e.allowed), func(i int) bool { + return e.allowed[i] >= r + }) >= 0 +} diff --git a/modules/charset/escape_test.go b/modules/charset/escape_test.go index 01ccca77249b..8063e115424c 100644 --- a/modules/charset/escape_test.go +++ b/modules/charset/escape_test.go @@ -8,6 +8,8 @@ import ( "reflect" "strings" "testing" + + "code.gitea.io/gitea/modules/translation" ) type escapeControlTest struct { @@ -25,37 +27,37 @@ var escapeControlTests = []escapeControlTest{ name: "single line western", text: "single line western", result: "single line western", - status: EscapeStatus{HasLTRScript: true}, + status: EscapeStatus{}, }, { name: "multi line western", text: "single line western\nmulti line western\n", result: "single line western\nmulti line western\n", - status: EscapeStatus{HasLTRScript: true}, + status: EscapeStatus{}, }, { name: "multi line western non-breaking space", text: "single line western\nmulti line western\n", result: `single line western` + "\n" + `multi line western` + "\n", - status: EscapeStatus{Escaped: true, HasLTRScript: true, HasSpaces: true}, + status: EscapeStatus{Escaped: true, HasInvisible: true}, }, { name: "mixed scripts: western + japanese", text: "日属秘ぞしちゅ。Then some western.", result: "日属秘ぞしちゅ。Then some western.", - status: EscapeStatus{HasLTRScript: true}, + status: EscapeStatus{}, }, { name: "japanese", text: "日属秘ぞしちゅ。", result: "日属秘ぞしちゅ。", - status: EscapeStatus{HasLTRScript: true}, + status: EscapeStatus{}, }, { name: "hebrew", text: "עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה", - result: "עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה", - status: EscapeStatus{HasRTLScript: true}, + result: `עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה`, + status: EscapeStatus{Escaped: true, HasAmbiguous: true}, }, { name: "more hebrew", @@ -64,12 +66,12 @@ var escapeControlTests = []escapeControlTest{ המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף. בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`, - result: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( ' ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד. + result: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( ' ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד. - המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף. + המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף. - בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`, - status: EscapeStatus{HasRTLScript: true}, + בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`, + status: EscapeStatus{Escaped: true, HasAmbiguous: true}, }, { name: "Mixed RTL+LTR", @@ -79,10 +81,7 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`, result: `Many computer programs fail to display bidirectional text correctly. For example, the Hebrew name Sarah (שרה) is spelled: sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, - status: EscapeStatus{ - HasRTLScript: true, - HasLTRScript: true, - }, + status: EscapeStatus{}, }, { name: "Mixed RTL+LTR+BIDI", @@ -90,32 +89,27 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`, For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, result: `Many computer programs fail to display bidirectional text correctly. - For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066" + `` + "\n" + + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, - status: EscapeStatus{ - Escaped: true, - HasBIDI: true, - HasRTLScript: true, - HasLTRScript: true, - }, + status: EscapeStatus{}, }, { name: "Accented characters", text: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}), result: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}), - status: EscapeStatus{HasLTRScript: true}, + status: EscapeStatus{}, }, { name: "Program", text: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})", result: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})", - status: EscapeStatus{HasLTRScript: true}, + status: EscapeStatus{}, }, { name: "CVE testcase", text: "if access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {", - result: `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {`, - status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true}, + result: `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {`, + status: EscapeStatus{Escaped: true, HasInvisible: true}, }, { name: "Mixed testcase with fail", @@ -124,10 +118,10 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`, `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` + "\nif access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {\n", result: `Many computer programs fail to display bidirectional text correctly. - For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066" + `` + "\n" + + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` + - "\n" + `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {` + "\n", - status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true}, + "\n" + `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {` + "\n", + status: EscapeStatus{Escaped: true, HasInvisible: true}, }, { // UTF-8/16/32 all use the same codepoint for BOM @@ -135,15 +129,16 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`, name: "UTF BOM", text: "\xef\xbb\xbftest", result: "\xef\xbb\xbftest", - status: EscapeStatus{HasLTRScript: true}, + status: EscapeStatus{}, }, } func TestEscapeControlString(t *testing.T) { for _, tt := range escapeControlTests { t.Run(tt.name, func(t *testing.T) { - status, result := EscapeControlString(tt.text) - if !reflect.DeepEqual(status, tt.status) { + locale := translation.NewLocale("en_US") + status, result := EscapeControlString(tt.text, locale) + if !reflect.DeepEqual(*status, tt.status) { t.Errorf("EscapeControlString() status = %v, wanted= %v", status, tt.status) } if result != tt.result { @@ -153,20 +148,6 @@ func TestEscapeControlString(t *testing.T) { } } -func TestEscapeControlBytes(t *testing.T) { - for _, tt := range escapeControlTests { - t.Run(tt.name, func(t *testing.T) { - status, result := EscapeControlBytes([]byte(tt.text)) - if !reflect.DeepEqual(status, tt.status) { - t.Errorf("EscapeControlBytes() status = %v, wanted= %v", status, tt.status) - } - if string(result) != tt.result { - t.Errorf("EscapeControlBytes()\nresult= %v,\nwanted= %v", result, tt.result) - } - }) - } -} - func TestEscapeControlReader(t *testing.T) { // lets add some control characters to the tests tests := make([]escapeControlTest, 0, len(escapeControlTests)*3) @@ -184,16 +165,7 @@ func TestEscapeControlReader(t *testing.T) { test.text = addPrefix("\u001E", test.text) test.result = addPrefix(``+"\u001e"+``, test.result) test.status.Escaped = true - test.status.HasControls = true - tests = append(tests, test) - } - - for _, test := range escapeControlTests { - test.name += " (+Mark)" - test.text = addPrefix("\u0300", test.text) - test.result = addPrefix(``+"\u0300"+``, test.result) - test.status.Escaped = true - test.status.HasMarks = true + test.status.HasInvisible = true tests = append(tests, test) } @@ -201,13 +173,13 @@ func TestEscapeControlReader(t *testing.T) { t.Run(tt.name, func(t *testing.T) { input := strings.NewReader(tt.text) output := &strings.Builder{} - status, err := EscapeControlReader(input, output) + status, err := EscapeControlReader(input, output, translation.NewLocale("en_US")) result := output.String() if err != nil { t.Errorf("EscapeControlReader(): err = %v", err) } - if !reflect.DeepEqual(status, tt.status) { + if !reflect.DeepEqual(*status, tt.status) { t.Errorf("EscapeControlReader() status = %v, wanted= %v", status, tt.status) } if result != tt.result { @@ -223,5 +195,5 @@ func TestEscapeControlReader_panic(t *testing.T) { for i := 0; i < 6826; i++ { bs = append(bs, []byte("—")...) } - _, _ = EscapeControlBytes(bs) + _, _ = EscapeControlString(string(bs), translation.NewLocale("en_US")) } diff --git a/modules/charset/htmlstream.go b/modules/charset/htmlstream.go new file mode 100644 index 000000000000..b354ce6a48a1 --- /dev/null +++ b/modules/charset/htmlstream.go @@ -0,0 +1,201 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import ( + "fmt" + "io" + + "golang.org/x/net/html" +) + +// HTMLStreamer represents a SAX-like interface for HTML +type HTMLStreamer interface { + Error(err error) error + Doctype(data string) error + Comment(data string) error + StartTag(data string, attrs ...html.Attribute) error + SelfClosingTag(data string, attrs ...html.Attribute) error + EndTag(data string) error + Text(data string) error +} + +// PassthroughHTMLStreamer is a passthrough streamer +type PassthroughHTMLStreamer struct { + next HTMLStreamer +} + +func NewPassthroughStreamer(next HTMLStreamer) *PassthroughHTMLStreamer { + return &PassthroughHTMLStreamer{next: next} +} + +var _ (HTMLStreamer) = &PassthroughHTMLStreamer{} + +// Error tells the next streamer in line that there is an error +func (p *PassthroughHTMLStreamer) Error(err error) error { + return p.next.Error(err) +} + +// Doctype tells the next streamer what the doctype is +func (p *PassthroughHTMLStreamer) Doctype(data string) error { + return p.next.Doctype(data) +} + +// Comment tells the next streamer there is a comment +func (p *PassthroughHTMLStreamer) Comment(data string) error { + return p.next.Comment(data) +} + +// StartTag tells the next streamer there is a starting tag +func (p *PassthroughHTMLStreamer) StartTag(data string, attrs ...html.Attribute) error { + return p.next.StartTag(data, attrs...) +} + +// SelfClosingTag tells the next streamer there is a self-closing tag +func (p *PassthroughHTMLStreamer) SelfClosingTag(data string, attrs ...html.Attribute) error { + return p.next.SelfClosingTag(data, attrs...) +} + +// EndTag tells the next streamer there is a end tag +func (p *PassthroughHTMLStreamer) EndTag(data string) error { + return p.next.EndTag(data) +} + +// Text tells the next streamer there is a text +func (p *PassthroughHTMLStreamer) Text(data string) error { + return p.next.Text(data) +} + +// HTMLStreamWriter acts as a writing sink +type HTMLStreamerWriter struct { + io.Writer + err error +} + +// Write implements io.Writer +func (h *HTMLStreamerWriter) Write(data []byte) (int, error) { + if h.err != nil { + return 0, h.err + } + return h.Writer.Write(data) +} + +// Write implements io.StringWriter +func (h *HTMLStreamerWriter) WriteString(data string) (int, error) { + if h.err != nil { + return 0, h.err + } + return h.Writer.Write([]byte(data)) +} + +// Error tells the next streamer in line that there is an error +func (h *HTMLStreamerWriter) Error(err error) error { + if h.err == nil { + h.err = err + } + return h.err +} + +// Doctype tells the next streamer what the doctype is +func (h *HTMLStreamerWriter) Doctype(data string) error { + _, h.err = h.WriteString("") + return h.err +} + +// Comment tells the next streamer there is a comment +func (h *HTMLStreamerWriter) Comment(data string) error { + _, h.err = h.WriteString("") + return h.err +} + +// StartTag tells the next streamer there is a starting tag +func (h *HTMLStreamerWriter) StartTag(data string, attrs ...html.Attribute) error { + return h.startTag(data, attrs, false) +} + +// SelfClosingTag tells the next streamer there is a self-closing tag +func (h *HTMLStreamerWriter) SelfClosingTag(data string, attrs ...html.Attribute) error { + return h.startTag(data, attrs, true) +} + +func (h *HTMLStreamerWriter) startTag(data string, attrs []html.Attribute, selfclosing bool) error { + if _, h.err = h.WriteString("<" + data); h.err != nil { + return h.err + } + for _, attr := range attrs { + if _, h.err = h.WriteString(" " + attr.Key + "=\"" + html.EscapeString(attr.Val) + "\""); h.err != nil { + return h.err + } + } + if selfclosing { + if _, h.err = h.WriteString("/>"); h.err != nil { + return h.err + } + } else { + if _, h.err = h.WriteString(">"); h.err != nil { + return h.err + } + } + return h.err +} + +// EndTag tells the next streamer there is a end tag +func (h *HTMLStreamerWriter) EndTag(data string) error { + _, h.err = h.WriteString("") + return h.err +} + +// Text tells the next streamer there is a text +func (h *HTMLStreamerWriter) Text(data string) error { + _, h.err = h.WriteString(html.EscapeString(data)) + return h.err +} + +// StreamHTML streams an html to a provided streamer +func StreamHTML(source io.Reader, streamer HTMLStreamer) error { + tokenizer := html.NewTokenizer(source) + for { + tt := tokenizer.Next() + switch tt { + case html.ErrorToken: + if tokenizer.Err() != io.EOF { + return tokenizer.Err() + } + return nil + case html.DoctypeToken: + token := tokenizer.Token() + if err := streamer.Doctype(token.Data); err != nil { + return err + } + case html.CommentToken: + token := tokenizer.Token() + if err := streamer.Comment(token.Data); err != nil { + return err + } + case html.StartTagToken: + token := tokenizer.Token() + if err := streamer.StartTag(token.Data, token.Attr...); err != nil { + return err + } + case html.SelfClosingTagToken: + token := tokenizer.Token() + if err := streamer.StartTag(token.Data, token.Attr...); err != nil { + return err + } + case html.EndTagToken: + token := tokenizer.Token() + if err := streamer.EndTag(token.Data); err != nil { + return err + } + case html.TextToken: + token := tokenizer.Token() + if err := streamer.Text(token.Data); err != nil { + return err + } + default: + return fmt.Errorf("unknown type of token: %d", tt) + } + } +} diff --git a/modules/charset/invisible/generate.go b/modules/charset/invisible/generate.go new file mode 100644 index 000000000000..230ff0b83216 --- /dev/null +++ b/modules/charset/invisible/generate.go @@ -0,0 +1,111 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package main + +import ( + "bytes" + "flag" + "fmt" + "go/format" + "os" + "text/template" + + "golang.org/x/text/unicode/rangetable" +) + +// InvisibleRunes these are runes that vscode has assigned to be invisible +// See https://github.com/hediet/vscode-unicode-data +var InvisibleRunes = []rune{ + 9, 10, 11, 12, 13, 32, 127, 160, 173, 847, 1564, 4447, 4448, 6068, 6069, 6155, 6156, 6157, 6158, 7355, 7356, 8192, 8193, 8194, 8195, 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8203, 8204, 8205, 8206, 8207, 8234, 8235, 8236, 8237, 8238, 8239, 8287, 8288, 8289, 8290, 8291, 8292, 8293, 8294, 8295, 8296, 8297, 8298, 8299, 8300, 8301, 8302, 8303, 10240, 12288, 12644, 65024, 65025, 65026, 65027, 65028, 65029, 65030, 65031, 65032, 65033, 65034, 65035, 65036, 65037, 65038, 65039, 65279, 65440, 65520, 65521, 65522, 65523, 65524, 65525, 65526, 65527, 65528, 65532, 78844, 119155, 119156, 119157, 119158, 119159, 119160, 119161, 119162, 917504, 917505, 917506, 917507, 917508, 917509, 917510, 917511, 917512, 917513, 917514, 917515, 917516, 917517, 917518, 917519, 917520, 917521, 917522, 917523, 917524, 917525, 917526, 917527, 917528, 917529, 917530, 917531, 917532, 917533, 917534, 917535, 917536, 917537, 917538, 917539, 917540, 917541, 917542, 917543, 917544, 917545, 917546, 917547, 917548, 917549, 917550, 917551, 917552, 917553, 917554, 917555, 917556, 917557, 917558, 917559, 917560, 917561, 917562, 917563, 917564, 917565, 917566, 917567, 917568, 917569, 917570, 917571, 917572, 917573, 917574, 917575, 917576, 917577, 917578, 917579, 917580, 917581, 917582, 917583, 917584, 917585, 917586, 917587, 917588, 917589, 917590, 917591, 917592, 917593, 917594, 917595, 917596, 917597, 917598, 917599, 917600, 917601, 917602, 917603, 917604, 917605, 917606, 917607, 917608, 917609, 917610, 917611, 917612, 917613, 917614, 917615, 917616, 917617, 917618, 917619, 917620, 917621, 917622, 917623, 917624, 917625, 917626, 917627, 917628, 917629, 917630, 917631, 917760, 917761, 917762, 917763, 917764, 917765, 917766, 917767, 917768, 917769, 917770, 917771, 917772, 917773, 917774, 917775, 917776, 917777, 917778, 917779, 917780, 917781, 917782, 917783, 917784, 917785, 917786, 917787, 917788, 917789, 917790, 917791, 917792, 917793, 917794, 917795, 917796, 917797, 917798, 917799, 917800, 917801, 917802, 917803, 917804, 917805, 917806, 917807, 917808, 917809, 917810, 917811, 917812, 917813, 917814, 917815, 917816, 917817, 917818, 917819, 917820, 917821, 917822, 917823, 917824, 917825, 917826, 917827, 917828, 917829, 917830, 917831, 917832, 917833, 917834, 917835, 917836, 917837, 917838, 917839, 917840, 917841, 917842, 917843, 917844, 917845, 917846, 917847, 917848, 917849, 917850, 917851, 917852, 917853, 917854, 917855, 917856, 917857, 917858, 917859, 917860, 917861, 917862, 917863, 917864, 917865, 917866, 917867, 917868, 917869, 917870, 917871, 917872, 917873, 917874, 917875, 917876, 917877, 917878, 917879, 917880, 917881, 917882, 917883, 917884, 917885, 917886, 917887, 917888, 917889, 917890, 917891, 917892, 917893, 917894, 917895, 917896, 917897, 917898, 917899, 917900, 917901, 917902, 917903, 917904, 917905, 917906, 917907, 917908, 917909, 917910, 917911, 917912, 917913, 917914, 917915, 917916, 917917, 917918, 917919, 917920, 917921, 917922, 917923, 917924, 917925, 917926, 917927, 917928, 917929, 917930, 917931, 917932, 917933, 917934, 917935, 917936, 917937, 917938, 917939, 917940, 917941, 917942, 917943, 917944, 917945, 917946, 917947, 917948, 917949, 917950, 917951, 917952, 917953, 917954, 917955, 917956, 917957, 917958, 917959, 917960, 917961, 917962, 917963, 917964, 917965, 917966, 917967, 917968, 917969, 917970, 917971, 917972, 917973, 917974, 917975, 917976, 917977, 917978, 917979, 917980, 917981, 917982, 917983, 917984, 917985, 917986, 917987, 917988, 917989, 917990, 917991, 917992, 917993, 917994, 917995, 917996, 917997, 917998, 917999, +} + +var verbose bool + +func main() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, `%s: Generate InvisibleRunesRange + +Usage: %[1]s [-v] [-o output.go] +`, os.Args[0]) + flag.PrintDefaults() + } + + output := "" + flag.BoolVar(&verbose, "v", false, "verbose output") + flag.StringVar(&output, "o", "invisible_gen.go", "file to output to") + flag.Parse() + + // First we filter the runes to remove + // + filtered := make([]rune, 0, len(InvisibleRunes)) + for _, r := range InvisibleRunes { + if r == ' ' || r == '\t' || r == '\n' { + continue + } + filtered = append(filtered, r) + } + + table := rangetable.New(filtered...) + if err := runTemplate(generatorTemplate, output, table); err != nil { + fatalf("Unable to run template: %v", err) + } +} + +func runTemplate(t *template.Template, filename string, data interface{}) error { + buf := bytes.NewBuffer(nil) + if err := t.Execute(buf, data); err != nil { + return fmt.Errorf("unable to execute template: %w", err) + } + bs, err := format.Source(buf.Bytes()) + if err != nil { + verbosef("Bad source:\n%s", buf.String()) + return fmt.Errorf("unable to format source: %w", err) + } + file, err := os.Create(filename) + if err != nil { + return fmt.Errorf("failed to create file %s because %w", filename, err) + } + defer file.Close() + _, err = file.Write(bs) + if err != nil { + return fmt.Errorf("unable to write generated source: %w", err) + } + return nil +} + +var generatorTemplate = template.Must(template.New("invisibleTemplate").Parse(`// This file is generated by modules/charset/invisible/generate.go DO NOT EDIT +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import "unicode" + +var InvisibleRanges = &unicode.RangeTable{ + R16: []unicode.Range16{ +{{range .R16 }} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}}, +{{end}} }, + R32: []unicode.Range32{ +{{range .R32}} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}}, +{{end}} }, + LatinOffset: {{.LatinOffset}}, +} +`)) + +func logf(format string, args ...interface{}) { + fmt.Fprintf(os.Stderr, format+"\n", args...) +} + +func verbosef(format string, args ...interface{}) { + if verbose { + logf(format, args...) + } +} + +func fatalf(format string, args ...interface{}) { + logf("fatal: "+format+"\n", args...) + os.Exit(1) +} diff --git a/modules/charset/invisible_gen.go b/modules/charset/invisible_gen.go new file mode 100644 index 000000000000..b3bfebe0c0e0 --- /dev/null +++ b/modules/charset/invisible_gen.go @@ -0,0 +1,37 @@ +// This file is generated by modules/charset/invisible/generate.go DO NOT EDIT +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import "unicode" + +var InvisibleRanges = &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 11, Hi: 13, Stride: 1}, + {Lo: 127, Hi: 160, Stride: 33}, + {Lo: 173, Hi: 847, Stride: 674}, + {Lo: 1564, Hi: 4447, Stride: 2883}, + {Lo: 4448, Hi: 6068, Stride: 1620}, + {Lo: 6069, Hi: 6155, Stride: 86}, + {Lo: 6156, Hi: 6158, Stride: 1}, + {Lo: 7355, Hi: 7356, Stride: 1}, + {Lo: 8192, Hi: 8207, Stride: 1}, + {Lo: 8234, Hi: 8239, Stride: 1}, + {Lo: 8287, Hi: 8303, Stride: 1}, + {Lo: 10240, Hi: 12288, Stride: 2048}, + {Lo: 12644, Hi: 65024, Stride: 52380}, + {Lo: 65025, Hi: 65039, Stride: 1}, + {Lo: 65279, Hi: 65440, Stride: 161}, + {Lo: 65520, Hi: 65528, Stride: 1}, + {Lo: 65532, Hi: 65532, Stride: 1}, + }, + R32: []unicode.Range32{ + {Lo: 78844, Hi: 119155, Stride: 40311}, + {Lo: 119156, Hi: 119162, Stride: 1}, + {Lo: 917504, Hi: 917631, Stride: 1}, + {Lo: 917760, Hi: 917999, Stride: 1}, + }, + LatinOffset: 2, +} diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index b534c43a8d7d..2330057f8720 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1035,13 +1035,13 @@ file_view_rendered = View Rendered file_view_raw = View Raw file_permalink = Permalink file_too_large = The file is too large to be shown. -bidi_bad_header = `This file contains unexpected Bidirectional Unicode characters!` -bidi_bad_description = `This file contains unexpected Bidirectional Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.` -bidi_bad_description_escaped = `This file contains unexpected Bidirectional Unicode characters. Hidden unicode characters are escaped below. Use the Unescape button to show how they render.` -unicode_header = `This file contains hidden Unicode characters!` -unicode_description = `This file contains hidden Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.` -unicode_description_escaped = `This file contains hidden Unicode characters. Hidden unicode characters are escaped below. Use the Unescape button to show how they render.` -line_unicode = `This line has hidden unicode characters` +invisible_runes_header = `This file contains invisible Unicode characters!` +invisible_runes_description = `This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.` +ambiguous_runes_header = `This file contains ambiguous Unicode characters!` +ambiguous_runes_description = `This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.` +invisible_runes_line = `This line has invisible unicode characters` +ambiguous_runes_line = `This line has ambiguous unicode characters` +ambiguous_character = `%[1]c [U+%04[1]X] is confusable with %[2]c [U+%04[2]X]` escape_control_characters = Escape unescape_control_characters = Unescape diff --git a/routers/web/repo/blame.go b/routers/web/repo/blame.go index 06c43aec19fb..c53a53b47193 100644 --- a/routers/web/repo/blame.go +++ b/routers/web/repo/blame.go @@ -40,7 +40,7 @@ type blameRow struct { CommitMessage string CommitSince gotemplate.HTML Code gotemplate.HTML - EscapeStatus charset.EscapeStatus + EscapeStatus *charset.EscapeStatus } // RefBlame render blame page @@ -235,7 +235,7 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m } lines := make([]string, 0) rows := make([]*blameRow, 0) - escapeStatus := charset.EscapeStatus{} + escapeStatus := &charset.EscapeStatus{} i := 0 commitCnt := 0 @@ -280,7 +280,7 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m fileName := fmt.Sprintf("%v", ctx.Data["FileName"]) line = highlight.Code(fileName, language, line) - br.EscapeStatus, line = charset.EscapeControlString(line) + br.EscapeStatus, line = charset.EscapeControlHTML(line, ctx.Locale) br.Code = gotemplate.HTML(line) rows = append(rows, br) escapeStatus = escapeStatus.Or(br.EscapeStatus) diff --git a/routers/web/repo/lfs.go b/routers/web/repo/lfs.go index 0e446f2de068..baec48bfea77 100644 --- a/routers/web/repo/lfs.go +++ b/routers/web/repo/lfs.go @@ -309,7 +309,7 @@ func LFSFileGet(ctx *context.Context) { // Building code view blocks with line number on server side. escapedContent := &bytes.Buffer{} - ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, escapedContent) + ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, escapedContent, ctx.Locale) var output bytes.Buffer lines := strings.Split(escapedContent.String(), "\n") diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 6a9c6b9bbace..72ffda7e0147 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -328,35 +328,31 @@ func renderReadmeFile(ctx *context.Context, readmeFile *namedBlob, readmeTreelin if markupType := markup.Type(readmeFile.name); markupType != "" { ctx.Data["IsMarkup"] = true ctx.Data["MarkupType"] = markupType - var result strings.Builder - err := markup.Render(&markup.RenderContext{ + + ctx.Data["EscapeStatus"], ctx.Data["FileContent"], err = markupRender(ctx, &markup.RenderContext{ Ctx: ctx, RelativePath: path.Join(ctx.Repo.TreePath, readmeFile.name), // ctx.Repo.TreePath is the directory not the Readme so we must append the Readme filename (and path). URLPrefix: readmeTreelink, Metas: ctx.Repo.Repository.ComposeDocumentMetas(), GitRepo: ctx.Repo.GitRepo, - }, rd, &result) + }, rd) if err != nil { - log.Error("Render failed: %v then fallback", err) + log.Error("Render failed for %s in %-v: %v Falling back to rendering source", readmeFile.name, ctx.Repo.Repository, err) buf := &bytes.Buffer{} - ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf) + ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf, ctx.Locale) ctx.Data["FileContent"] = strings.ReplaceAll( gotemplate.HTMLEscapeString(buf.String()), "\n", `
`, ) - } else { - ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlString(result.String()) } } else { ctx.Data["IsRenderedHTML"] = true buf := &bytes.Buffer{} - ctx.Data["EscapeStatus"], err = charset.EscapeControlReader(rd, buf) + ctx.Data["EscapeStatus"], err = charset.EscapeControlReader(rd, &charset.BreakWriter{Writer: buf}, ctx.Locale, charset.RuneNBSP) if err != nil { log.Error("Read failed: %v", err) } - ctx.Data["FileContent"] = strings.ReplaceAll( - gotemplate.HTMLEscapeString(buf.String()), "\n", `
`, - ) + ctx.Data["FileContent"] = buf.String() } } @@ -498,32 +494,30 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st if markupType != "" && !shouldRenderSource { ctx.Data["IsMarkup"] = true ctx.Data["MarkupType"] = markupType - var result strings.Builder if !detected { markupType = "" } metas := ctx.Repo.Repository.ComposeDocumentMetas() metas["BranchNameSubURL"] = ctx.Repo.BranchNameSubURL() - err := markup.Render(&markup.RenderContext{ + ctx.Data["EscapeStatus"], ctx.Data["FileContent"], err = markupRender(ctx, &markup.RenderContext{ Ctx: ctx, Type: markupType, RelativePath: ctx.Repo.TreePath, URLPrefix: path.Dir(treeLink), Metas: metas, GitRepo: ctx.Repo.GitRepo, - }, rd, &result) + }, rd) if err != nil { ctx.ServerError("Render", err) return } // to prevent iframe load third-party url ctx.Resp.Header().Add("Content-Security-Policy", "frame-src 'self'") - ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlString(result.String()) } else if readmeExist && !shouldRenderSource { buf := &bytes.Buffer{} ctx.Data["IsRenderedHTML"] = true - ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf) + ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf, ctx.Locale) ctx.Data["FileContent"] = strings.ReplaceAll( gotemplate.HTMLEscapeString(buf.String()), "\n", `
`, @@ -570,12 +564,13 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st log.Error("highlight.File failed, fallback to plain text: %v", err) fileContent = highlight.PlainText(buf) } - status, _ := charset.EscapeControlReader(bytes.NewReader(buf), io.Discard) - ctx.Data["EscapeStatus"] = status - statuses := make([]charset.EscapeStatus, len(fileContent)) + status := &charset.EscapeStatus{} + statuses := make([]*charset.EscapeStatus, len(fileContent)) for i, line := range fileContent { - statuses[i], fileContent[i] = charset.EscapeControlString(line) + statuses[i], fileContent[i] = charset.EscapeControlHTML(line, ctx.Locale) + status = status.Or(statuses[i]) } + ctx.Data["EscapeStatus"] = status ctx.Data["FileContent"] = fileContent ctx.Data["LineEscapeStatus"] = statuses } @@ -613,20 +608,17 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st rd := io.MultiReader(bytes.NewReader(buf), dataRc) ctx.Data["IsMarkup"] = true ctx.Data["MarkupType"] = markupType - var result strings.Builder - err := markup.Render(&markup.RenderContext{ + ctx.Data["EscapeStatus"], ctx.Data["FileContent"], err = markupRender(ctx, &markup.RenderContext{ Ctx: ctx, RelativePath: ctx.Repo.TreePath, URLPrefix: path.Dir(treeLink), Metas: ctx.Repo.Repository.ComposeDocumentMetas(), GitRepo: ctx.Repo.GitRepo, - }, rd, &result) + }, rd) if err != nil { ctx.ServerError("Render", err) return } - - ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlString(result.String()) } } @@ -645,6 +637,23 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st } } +func markupRender(ctx *context.Context, renderCtx *markup.RenderContext, input io.Reader) (escaped *charset.EscapeStatus, output string, err error) { + markupRd, markupWr := io.Pipe() + defer markupWr.Close() + done := make(chan struct{}) + go func() { + sb := &strings.Builder{} + // We allow NBSP here this is rendered + escaped, _ = charset.EscapeControlReader(markupRd, sb, ctx.Locale, charset.RuneNBSP) + output = sb.String() + close(done) + }() + err = markup.Render(renderCtx, input, markupWr) + _ = markupWr.CloseWithError(err) + <-done + return escaped, output, err +} + func safeURL(address string) string { u, err := url.Parse(address) if err != nil { diff --git a/routers/web/repo/wiki.go b/routers/web/repo/wiki.go index e4134028aa9d..1af511f50a3e 100644 --- a/routers/web/repo/wiki.go +++ b/routers/web/repo/wiki.go @@ -239,9 +239,28 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { Metas: ctx.Repo.Repository.ComposeDocumentMetas(), IsWiki: true, } + buf := &strings.Builder{} - var buf strings.Builder - if err := markdown.Render(rctx, bytes.NewReader(data), &buf); err != nil { + renderFn := func(data []byte) (escaped *charset.EscapeStatus, output string, err error) { + markupRd, markupWr := io.Pipe() + defer markupWr.Close() + done := make(chan struct{}) + go func() { + // We allow NBSP here this is rendered + escaped, _ = charset.EscapeControlReader(markupRd, buf, ctx.Locale, charset.RuneNBSP) + output = buf.String() + buf.Reset() + close(done) + }() + + err = markdown.Render(rctx, bytes.NewReader(data), markupWr) + _ = markupWr.CloseWithError(err) + <-done + return escaped, output, err + } + + ctx.Data["EscapeStatus"], ctx.Data["content"], err = renderFn(data) + if err != nil { if wikiRepo != nil { wikiRepo.Close() } @@ -249,11 +268,10 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } - ctx.Data["EscapeStatus"], ctx.Data["content"] = charset.EscapeControlString(buf.String()) - if !isSideBar { buf.Reset() - if err := markdown.Render(rctx, bytes.NewReader(sidebarContent), &buf); err != nil { + ctx.Data["sidebarEscapeStatus"], ctx.Data["sidebarContent"], err = renderFn(sidebarContent) + if err != nil { if wikiRepo != nil { wikiRepo.Close() } @@ -261,14 +279,14 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } ctx.Data["sidebarPresent"] = sidebarContent != nil - ctx.Data["sidebarEscapeStatus"], ctx.Data["sidebarContent"] = charset.EscapeControlString(buf.String()) } else { ctx.Data["sidebarPresent"] = false } if !isFooter { buf.Reset() - if err := markdown.Render(rctx, bytes.NewReader(footerContent), &buf); err != nil { + ctx.Data["footerEscapeStatus"], ctx.Data["footerContent"], err = renderFn(footerContent) + if err != nil { if wikiRepo != nil { wikiRepo.Close() } @@ -276,7 +294,6 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } ctx.Data["footerPresent"] = footerContent != nil - ctx.Data["footerEscapeStatus"], ctx.Data["footerContent"] = charset.EscapeControlString(buf.String()) } else { ctx.Data["footerPresent"] = false } diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 6dd237bbc8d4..9844992f5b11 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -32,6 +32,7 @@ import ( "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/translation" "github.com/sergi/go-diff/diffmatchpatch" stdcharset "golang.org/x/net/html/charset" @@ -169,11 +170,11 @@ func getDiffLineSectionInfo(treePath, line string, lastLeftIdx, lastRightIdx int } // escape a line's content or return
needed for copy/paste purposes -func getLineContent(content string) DiffInline { +func getLineContent(content string, locale translation.Locale) DiffInline { if len(content) > 0 { - return DiffInlineWithUnicodeEscape(template.HTML(html.EscapeString(content))) + return DiffInlineWithUnicodeEscape(template.HTML(html.EscapeString(content)), locale) } - return DiffInline{Content: "
"} + return DiffInline{EscapeStatus: &charset.EscapeStatus{}, Content: "
"} } // DiffSection represents a section of a DiffFile. @@ -267,26 +268,26 @@ func init() { // DiffInline is a struct that has a content and escape status type DiffInline struct { - EscapeStatus charset.EscapeStatus + EscapeStatus *charset.EscapeStatus Content template.HTML } // DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped -func DiffInlineWithUnicodeEscape(s template.HTML) DiffInline { - status, content := charset.EscapeControlString(string(s)) +func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline { + status, content := charset.EscapeControlHTML(string(s), locale) return DiffInline{EscapeStatus: status, Content: template.HTML(content)} } // DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped -func DiffInlineWithHighlightCode(fileName, language, code string) DiffInline { - status, content := charset.EscapeControlString(highlight.Code(fileName, language, code)) +func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline { + status, content := charset.EscapeControlHTML(highlight.Code(fileName, language, code), locale) return DiffInline{EscapeStatus: status, Content: template.HTML(content)} } // GetComputedInlineDiffFor computes inline diff for the given line. -func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) DiffInline { +func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline { if setting.Git.DisableDiffHighlight { - return getLineContent(diffLine.Content[1:]) + return getLineContent(diffLine.Content[1:], locale) } var ( @@ -303,26 +304,26 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif // try to find equivalent diff line. ignore, otherwise switch diffLine.Type { case DiffLineSection: - return getLineContent(diffLine.Content[1:]) + return getLineContent(diffLine.Content[1:], locale) case DiffLineAdd: compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx) if compareDiffLine == nil { - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:]) + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale) } diff1 = compareDiffLine.Content diff2 = diffLine.Content case DiffLineDel: compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx) if compareDiffLine == nil { - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:]) + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale) } diff1 = diffLine.Content diff2 = compareDiffLine.Content default: if strings.IndexByte(" +-", diffLine.Content[0]) > -1 { - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:]) + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale) } - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content) + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content, locale) } hcd := newHighlightCodeDiff() @@ -330,7 +331,7 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif // it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back // if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)" diffHTML := diffToHTML(nil, diffRecord, diffLine.Type) - return DiffInlineWithUnicodeEscape(template.HTML(diffHTML)) + return DiffInlineWithUnicodeEscape(template.HTML(diffHTML), locale) } // DiffFile represents a file diff. diff --git a/templates/repo/blame.tmpl b/templates/repo/blame.tmpl index 6b3f3eddd7b3..b697573d24ea 100644 --- a/templates/repo/blame.tmpl +++ b/templates/repo/blame.tmpl @@ -55,7 +55,11 @@ {{if $.EscapeStatus.Escaped}} - {{if $row.EscapeStatus.Escaped}}{{end}} + + {{if $row.EscapeStatus.Escaped}} + + {{end}} + {{end}} {{$row.Code}} diff --git a/templates/repo/diff/blob_excerpt.tmpl b/templates/repo/diff/blob_excerpt.tmpl index 04d271a444c5..c821d12d9094 100644 --- a/templates/repo/diff/blob_excerpt.tmpl +++ b/templates/repo/diff/blob_excerpt.tmpl @@ -19,20 +19,25 @@ {{end}} - {{$inlineDiff := $.section.GetComputedInlineDiffFor $line}}{{$inlineDiff.Content}} + {{$inlineDiff := $.section.GetComputedInlineDiffFor $line $.locale}}{{/* + */}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.locale}} {{else}} - {{$inlineDiff := $.section.GetComputedInlineDiffFor $line}} + {{$inlineDiff := $.section.GetComputedInlineDiffFor $line $.locale}} - {{if and $line.LeftIdx $inlineDiff.EscapeStatus.Escaped}}{{end}} + {{if and $line.LeftIdx $inlineDiff.EscapeStatus.Escaped}}{{end}} {{if $line.LeftIdx}}{{end}} {{/* - */}}{{if $line.LeftIdx}}{{$inlineDiff.Content}}{{end}}{{/* + */}}{{if $line.LeftIdx}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.locale}}{{else}}{{/* + */}}{{/* + */}}{{end}}{{/* */}} - {{if and $line.RightIdx $inlineDiff.EscapeStatus.Escaped}}{{end}} + {{if and $line.RightIdx $inlineDiff.EscapeStatus.Escaped}}{{end}} {{if $line.RightIdx}}{{end}} {{/* - */}}{{if $line.RightIdx}}{{$inlineDiff.Content}}{{end}}{{/* + */}}{{if $line.RightIdx}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.locale}}{{else}}{{/* + */}}{{/* + */}}{{end}}{{/* */}} {{end}} @@ -62,10 +67,10 @@ {{end}} - {{$inlineDiff := $.section.GetComputedInlineDiffFor $line}} - {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} + {{$inlineDiff := $.section.GetComputedInlineDiffFor $line $.locale}} + {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} - {{$inlineDiff.Content}} + {{$inlineDiff.Content}} {{end}} {{end}} diff --git a/templates/repo/diff/escape_title.tmpl b/templates/repo/diff/escape_title.tmpl new file mode 100644 index 000000000000..7aa5af4254a8 --- /dev/null +++ b/templates/repo/diff/escape_title.tmpl @@ -0,0 +1,2 @@ +{{if .diff.EscapeStatus.HasInvisible}}{{.locale.Tr "repo.invisible_runes_line"}} {{end}}{{/* +*/}}{{if .diff.EscapeStatus.HasAmbiguous}}{{.locale.Tr "repo.ambiguous_runes_line"}}{{end}} diff --git a/templates/repo/diff/section_code.tmpl b/templates/repo/diff/section_code.tmpl new file mode 100644 index 000000000000..c95ce83fc416 --- /dev/null +++ b/templates/repo/diff/section_code.tmpl @@ -0,0 +1,6 @@ +{{.diff.Content}} diff --git a/templates/repo/diff/section_split.tmpl b/templates/repo/diff/section_split.tmpl index 10ab6ffbb9e1..aa30221f430c 100644 --- a/templates/repo/diff/section_split.tmpl +++ b/templates/repo/diff/section_split.tmpl @@ -21,15 +21,17 @@ {{svg "octicon-fold"}} {{end}} - {{$inlineDiff := $section.GetComputedInlineDiffFor $line}} - {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} - {{$inlineDiff.Content}} + {{$inlineDiff := $section.GetComputedInlineDiffFor $line $.root.locale}} + {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} + {{/* + */}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/* + */}} {{else if and (eq .GetType 3) $hasmatch}}{{/* DEL */}} {{$match := index $section.Lines $line.Match}} - {{- $leftDiff := ""}}{{if $line.LeftIdx}}{{$leftDiff = $section.GetComputedInlineDiffFor $line}}{{end}} - {{- $rightDiff := ""}}{{if $match.RightIdx}}{{$rightDiff = $section.GetComputedInlineDiffFor $match}}{{end}} + {{- $leftDiff := ""}}{{if $line.LeftIdx}}{{$leftDiff = $section.GetComputedInlineDiffFor $line $.root.locale}}{{end}} + {{- $rightDiff := ""}}{{if $match.RightIdx}}{{$rightDiff = $section.GetComputedInlineDiffFor $match $.root.locale}}{{end}} - {{if $line.LeftIdx}}{{if $leftDiff.EscapeStatus.Escaped}}{{end}}{{end}} + {{if $line.LeftIdx}}{{if $leftDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{/* @@ -38,13 +40,13 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $line.LeftIdx}}{{/* - */}}{{$leftDiff.Content}}{{/* + */}}{{template "repo/diff/section_code" dict "diff" $leftDiff "locale" $.root.locale}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* */}} - {{if $match.RightIdx}}{{if $rightDiff.EscapeStatus.Escaped}}{{end}}{{end}} + {{if $match.RightIdx}}{{if $rightDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{if $match.RightIdx}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{/* @@ -53,15 +55,15 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $match.RightIdx}}{{/* - */}}{{$rightDiff.Content}}{{/* + */}}{{template "repo/diff/section_code" dict "diff" $rightDiff "locale" $.root.locale}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* */}} {{else}} - {{$inlineDiff := $section.GetComputedInlineDiffFor $line}} + {{$inlineDiff := $section.GetComputedInlineDiffFor $line $.root.locale}} - {{if $line.LeftIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}{{end}}{{end}} + {{if $line.LeftIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{if $line.LeftIdx}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 2))}}{{/* @@ -70,13 +72,13 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $line.LeftIdx}}{{/* - */}}{{$inlineDiff.Content}}{{/* + */}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* */}} - {{if $line.RightIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}{{end}}{{end}} + {{if $line.RightIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{if $line.RightIdx}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 3))}}{{/* @@ -85,7 +87,7 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $line.RightIdx}}{{/* - */}}{{$inlineDiff.Content}}{{/* + */}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* diff --git a/templates/repo/diff/section_unified.tmpl b/templates/repo/diff/section_unified.tmpl index 13f396d47e33..1d6c4fc22303 100644 --- a/templates/repo/diff/section_unified.tmpl +++ b/templates/repo/diff/section_unified.tmpl @@ -25,12 +25,12 @@ {{end}} - {{$inlineDiff := $section.GetComputedInlineDiffFor $line -}} - {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} + {{$inlineDiff := $section.GetComputedInlineDiffFor $line $.root.locale -}} + {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} {{if eq .GetType 4}} {{/* - */}}{{$inlineDiff.Content}}{{/* + */}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/* */}} {{else}} {{/* @@ -39,7 +39,7 @@ */}}{{svg "octicon-plus"}}{{/* */}}{{/* */}}{{end}}{{/* - */}}{{$inlineDiff.Content}}{{/* + */}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/* */}} {{end}} diff --git a/templates/repo/unicode_escape_prompt.tmpl b/templates/repo/unicode_escape_prompt.tmpl index 94d353cddf66..ab3452efa4bb 100644 --- a/templates/repo/unicode_escape_prompt.tmpl +++ b/templates/repo/unicode_escape_prompt.tmpl @@ -1,19 +1,22 @@ {{if .EscapeStatus}} - {{if .EscapeStatus.BadBIDI}} + {{if .EscapeStatus.HasInvisible}}
{{svg "octicon-x" 16 "close inside"}}
- {{$.root.locale.Tr "repo.bidi_bad_header"}} + {{$.root.locale.Tr "repo.invisible_runes_header"}}
-

{{$.root.locale.Tr "repo.bidi_bad_description" | Str2html}}

+

{{$.root.locale.Tr "repo.invisible_runes_description" | Str2html}}

+ {{if .EscapeStatus.HasAmbiguous}} +

{{$.root.locale.Tr "repo.ambiguous_runes_description" | Str2html}}

+ {{end}}
- {{else if .EscapeStatus.HasBIDI}} + {{else if .EscapeStatus.HasAmbiguous}}
{{svg "octicon-x" 16 "close inside"}}
- {{$.root.locale.Tr "repo.unicode_header"}} + {{$.root.locale.Tr "repo.ambiguous_runes_header"}}
-

{{$.root.locale.Tr "repo.unicode_description" | Str2html}}

+

{{$.root.locale.Tr "repo.ambiguous_runes_description" | Str2html}}

{{end}} {{end}} diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index 34ed859d40c2..a76700a7263f 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -113,7 +113,7 @@ {{if $.EscapeStatus.Escaped}} - {{if (index $.LineEscapeStatus $idx).Escaped}}{{end}} + {{if (index $.LineEscapeStatus $idx).Escaped}}{{end}} {{end}} {{$code | Safe}} diff --git a/web_src/less/_repository.less b/web_src/less/_repository.less index fa634479f6d3..b3f24ee03d08 100644 --- a/web_src/less/_repository.less +++ b/web_src/less/_repository.less @@ -82,7 +82,11 @@ .broken-code-point { font-family: var(--fonts-monospace); - color: blue; + color: var(--color-blue); + } + + .unicode-escaped .ambiguous-code-point { + border: 1px var(--color-yellow) solid; } .metas {