コード例 #1
0
		private void FillLetterNFKD (int i, bool checkUpper, bool greekRemap)
		{
			if (map [i].Defined)
				return;
			int up = (int) ti.ToUpper ((char) i);
			if (checkUpper && map [up].Category == 0xF) {
				if (i == up)
					return;
				FillLetterNFKD (up, checkUpper, greekRemap);
				map [i] = new CharMapEntry (0xF,
					map [up].Level1,
					map [up].Level2);
			} else {
				int idx = decompIndex [i];
				if (idx == 0)
					return;
				int primary = decompValues [decompIndex [i]];
				FillLetterNFKD (primary, checkUpper, greekRemap);

				int lv2 = map [primary].Level2;
				byte off = 0;
				for (int l = 1; l < decompLength [i]; l++) {
					int tmp = decompValues [idx + l];
					if (map [tmp].Category != 1)
						return;
					if (greekRemap && map [tmp].Level2 == 0xC)
						off += 3;
					else
						off += map [tmp].Level2;
				}
				if (off > 0) {
					if (lv2 == 0)
						lv2 += 2;
					lv2 += off;
				}
				// ... but override if the value already exists.
				if (diacritical [i] != 0)
					lv2 = diacritical [i];
				map [i] = new CharMapEntry (
					map [primary].Category,
					map [primary].Level1,
					(byte) lv2);
			}
		}
コード例 #2
0
		private bool AddCharMap (char c, byte category, byte increment, byte alt)
		{
			if (IsIgnorable ((int) c) || map [(int) c].Defined)
				return false; // do nothing
			map [(int) c] = new CharMapEntry (category,
				category == 1 ? alt : fillIndex [category],
				category == 1 ? fillIndex [category] : alt);
			fillIndex [category] += increment;
			return true;
		}
コード例 #3
0
		void GenerateCore ()
		{
			UnicodeCategory uc;

			#region Specially ignored // 01
			// This will raise "Defined" flag up.
			// FIXME: Check If it is really fine. Actually for
			// Japanese voice marks this code does remapping.
			foreach (char c in specialIgnore)
				map [(int) c] = new CharMapEntry (0, 0, 0);
			#endregion

			#region Extenders (FF FF)
			fillIndex [0xFF] = 0xFF;
			char [] specialBiggest = new char [] {
				'\u3005', '\u3031', '\u3032', '\u309D',
				'\u309E', '\u30FC', '\u30FD', '\u30FE',
				'\uFE7C', '\uFE7D', '\uFF70'};
			foreach (char c in specialBiggest)
				AddCharMap (c, 0xFF, 0);
			#endregion

			#region Variable weights
			// Controls : 06 03 - 06 3D
			fillIndex [0x6] = 3;
			for (int i = 0; i < 65536; i++) {
				if (IsIgnorable (i))
					continue;
				char c = (char) i;
				uc = Char.GetUnicodeCategory (c);
				// NEL is whitespace but not ignored here.
				if (uc == UnicodeCategory.Control &&
					!Char.IsWhiteSpace (c) || c == '\u0085')
					AddCharMap (c, 6, 1);
			}

			// Apostrophe 06 80
			fillIndex [0x6] = 0x80;
			AddCharMap ('\'', 6, 0);
			AddCharMap ('\uFF07', 6, 1);
			AddCharMap ('\uFE63', 6, 1);

			// SPECIAL CASE: fill FE32 here in prior to be added
			// at 2013. Windows does not always respect NFKD.
			map [0xFE32] = new CharMapEntry (6, 0x90, 0);

			// Hyphen/Dash : 06 81 - 06 90
			for (int i = 0; i < char.MaxValue; i++) {
				if (!IsIgnorable (i) &&
					Char.GetUnicodeCategory ((char) i) ==
					UnicodeCategory.DashPunctuation) {
					AddCharMapGroup2 ((char) i, 6, 1, 0);
					if (i == 0x2011) {
						// SPECIAL: add 2027 and 2043
						// Maybe they are regarded the 
						// same hyphens in "central"
						// position.
						AddCharMap ('\u2027', 6, 1);
						AddCharMap ('\u2043', 6, 1);
					}
				}
			}
			// They are regarded as primarily equivalent to '-'
			map [0x208B] = new CharMapEntry (6, 0x82, 0);
			map [0x207B] = new CharMapEntry (6, 0x82, 0);
			map [0xFF0D] = new CharMapEntry (6, 0x82, 0);

			// Arabic variable weight chars 06 A0 -
			fillIndex [6] = 0xA0;
			// vowels
			for (int i = 0x64B; i <= 0x650; i++)
				AddArabicCharMap ((char) i, 6, 1, 0);
			// sukun
			AddCharMapGroup ('\u0652', 6, 1, 0);
			// shadda
			AddCharMapGroup ('\u0651', 6, 1, 0);
			#endregion


			#region Nonspacing marks // 01
			// FIXME: 01 03 - 01 B6 ... annoyance :(

			// Combining diacritical marks: 01 DC -

			fillIndex [0x1] = 0x41;
			for (int i = 0x030E; i <= 0x0326; i++)
				if (!IsIgnorable (i))
					AddCharMap ((char) i, 0x1, 1);
			for (int i = 0x0329; i <= 0x0334; i++)
				if (!IsIgnorable (i))
					AddCharMap ((char) i, 0x1, 1);
			fillIndex [0x1]++;
			for (int i = 0x0339; i <= 0x0341; i++)
				if (!IsIgnorable (i))
					AddCharMap ((char) i, 0x1, 1);
			fillIndex [0x1] = 0x74;
			for (int i = 0x0346; i <= 0x0348; i++)
				if (!IsIgnorable (i))
					AddCharMap ((char) i, 0x1, 1);
			for (int i = 0x02BE; i <= 0x02BF; i++)
				if (!IsIgnorable (i))
					AddCharMap ((char) i, 0x1, 1);
			for (int i = 0x02C1; i <= 0x02C5; i++)
				if (!IsIgnorable (i))
					AddCharMap ((char) i, 0x1, 1);
			for (int i = 0x02CE; i <= 0x02CF; i++)
				if (!IsIgnorable (i))
					AddCharMap ((char) i, 0x1, 1);
			fillIndex [0x1]++;
			for (int i = 0x02D1; i <= 0x02D3; i++)
				if (!IsIgnorable (i))
					AddCharMap ((char) i, 0x1, 1);
			AddCharMap ('\u02DE', 0x1, 1);
			for (int i = 0x02E4; i <= 0x02E9; i++)
				if (!IsIgnorable (i))
					AddCharMap ((char) i, 0x1, 1);


			// FIXME: needs more love here (it should eliminate
			// all the hacky code above).
			for (int i = 0x0300; i < 0x0370; i++)
				if (!IsIgnorable (i) && diacritical [i] != 0
					&& !map [i].Defined)
					map [i] = new CharMapEntry (
						0x1, 0x1, diacritical [i]);

			// Cyrillic and Armenian nonspacing mark
			fillIndex [0x1] = 0x94;
			for (int i = 0x400; i < 0x580; i++)
				if (!IsIgnorable (i) &&
					Char.GetUnicodeCategory ((char) i) ==
					UnicodeCategory.NonSpacingMark)
					AddCharMap ((char) i, 1, 1);

			fillIndex [0x1] = 0x8D;
			// syriac dotted nonspacing marks (1)
			AddCharMap ('\u0740', 0x1, 1);
			AddCharMap ('\u0741', 0x1, 1);
			AddCharMap ('\u0742', 0x1, 1);
			// syriac oblique nonspacing marks
			AddCharMap ('\u0747', 0x1, 1);
			AddCharMap ('\u0748', 0x1, 1);
			// syriac dotted nonspacing marks (2)
			fillIndex [0x1] = 0x94; // this reset is mandatory
			AddCharMap ('\u0732', 0x1, 1);
			AddCharMap ('\u0735', 0x1, 1);
			AddCharMap ('\u0738', 0x1, 1);
			AddCharMap ('\u0739', 0x1, 1);
			AddCharMap ('\u073C', 0x1, 1);
			// SPECIAL CASES: superscripts
			AddCharMap ('\u073F', 0x1, 1);
			AddCharMap ('\u0711', 0x1, 1);
			// syriac "DOTS"
			for (int i = 0x0743; i <= 0x0746; i++)
				AddCharMap ((char) i, 0x1, 1);
			for (int i = 0x0730; i <= 0x0780; i++)
				if (!map [i].Defined &&
					Char.GetUnicodeCategory ((char) i) ==
					UnicodeCategory.NonSpacingMark)
					AddCharMap ((char) i, 0x1, 1);

			// LAMESPEC: It should not stop at '\u20E1'. There are
			// a few more characters (that however results in 
			// overflow of level 2 unless we start before 0xDD).
			fillIndex [0x1] = 0xDD;
			for (int i = 0x20D0; i <= 0x20DC; i++)
				AddCharMap ((char) i, 0x1, 1);
			fillIndex [0x1] = 0xEC;
			for (int i = 0x20DD; i <= 0x20E1; i++)
				AddCharMap ((char) i, 0x1, 1);
			fillIndex [0x1] = 0x4;
			AddCharMap ('\u0CD5', 0x1, 1);
			AddCharMap ('\u0CD6', 0x1, 1);
			AddCharMap ('\u093C', 0x1, 1);
			for (int i = 0x302A; i <= 0x302D; i++)
				AddCharMap ((char) i, 0x1, 1);
			AddCharMap ('\u0C55', 0x1, 1);
			AddCharMap ('\u0C56', 0x1, 1);

			fillIndex [0x1] = 0x50; // I wonder how they are sorted
			for (int i = 0x02D4; i <= 0x02D7; i++)
				AddCharMap ((char) i, 0x1, 1);

			// They are not part of Nonspacing marks, but have
			// only diacritical weight.
			for (int i = 0x3099; i <= 0x309C; i++)
				map [i] = new CharMapEntry (1, 1, 1);
			map [0xFF9E] = new CharMapEntry (1, 1, 1);
			map [0xFF9F] = new CharMapEntry (1, 1, 2);
			map [0x309D] = new CharMapEntry (0xFF, 0xFF, 1);
			map [0x309E] = new CharMapEntry (0xFF, 0xFF, 1);
			for (int i = 0x30FC; i <= 0x30FE; i++)
				map [i] = new CharMapEntry (0xFF, 0xFF, 1);

			fillIndex [0x1] = 0xA;
			for (int i = 0x0951; i <= 0x0954; i++)
				AddCharMap ((char) i, 0x1, 2);

			#endregion


			#region Whitespaces // 07 03 -
			fillIndex [0x7] = 0x2;
			AddCharMap (' ', 0x7, 2);
			AddCharMap ('\u00A0', 0x7, 1);
			for (int i = 9; i <= 0xD; i++)
				AddCharMap ((char) i, 0x7, 1);
			for (int i = 0x2000; i <= 0x200B; i++)
				AddCharMap ((char) i, 0x7, 1);

			fillIndex [0x7] = 0x17;
			AddCharMapGroup ('\u2028', 0x7, 1, 0);
			AddCharMapGroup ('\u2029', 0x7, 1, 0);

			// Characters which used to represent layout control.
			// LAMESPEC: Windows developers seem to have thought 
			// that those characters are kind of whitespaces,
			// while they aren't.
			AddCharMap ('\u2422', 0x7, 1, 0); // blank symbol
			AddCharMap ('\u2423', 0x7, 1, 0); // open box

			#endregion

			// category 09 - continued symbols from 08
			fillIndex [0x9] = 2;
			// misc tech mark
			for (int cp = 0x2300; cp <= 0x237A; cp++)
				AddCharMap ((char) cp, 0x9, 1, 0);

			// arrows
			byte [] arrowLv2 = new byte [] {0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3};
			foreach (DictionaryEntry de in arrowValues) {
				int idx = (int) de.Value;
				int cp = (int) de.Key;
				if (map [cp].Defined)
					continue;
				fillIndex [0x9] = (byte) (0xD8 + idx);
				AddCharMapGroup ((char) cp, 0x9, 0, arrowLv2 [idx]);
				arrowLv2 [idx]++;
			}
			// boxes
			byte [] boxLv2 = new byte [128];
			// 0-63 will be used for those offsets are positive,
			// and 64-127 are for negative ones.
			for (int i = 0; i < boxLv2.Length; i++)
				boxLv2 [i] = 3;
			foreach (DictionaryEntry de in boxValues) {
				int cp = (int) de.Key;
				int off = (int) de.Value;
				if (map [cp].Defined)
					continue;
				if (off < 0) {
					fillIndex [0x9] = (byte) (0xE5 + off);
					AddCharMapGroup ((char) cp, 0x9, 0, boxLv2 [128 + off]++);
				}
				else {
					fillIndex [0x9] = (byte) (0xE5 + off);
					AddCharMapGroup ((char) cp, 0x9, 0, boxLv2 [off]++);
				}
			}
			// Some special characters (slanted)
			fillIndex [0x9] = 0xF4;
			AddCharMap ('\u2571', 0x9, 3);
			AddCharMap ('\u2572', 0x9, 3);
			AddCharMap ('\u2573', 0x9, 3);

			// FIXME: implement 0A
			#region Symbols
			fillIndex [0xA] = 2;
			// byte currency symbols
			for (int cp = 0; cp < 0x100; cp++) {
				uc = Char.GetUnicodeCategory ((char) cp);
				if (!IsIgnorable (cp) &&
					uc == UnicodeCategory.CurrencySymbol &&
					cp != '$')
					AddCharMapGroup ((char) cp, 0xA, 1, 0);
			}
			// byte other symbols
			for (int cp = 0; cp < 0x100; cp++) {
				if (cp == 0xA6)
					continue; // SPECIAL: skip FIXME: why?
				uc = Char.GetUnicodeCategory ((char) cp);
				if (!IsIgnorable (cp) &&
					uc == UnicodeCategory.OtherSymbol ||
					cp == '\u00AC' || cp == '\u00B5' || cp == '\u00B7')
					AddCharMapGroup ((char) cp, 0xA, 1, 0);
			}
			// U+30FB here
			AddCharMapGroup ('\u30FB', 0xA, 1, 0);

			for (int cp = 0x2020; cp <= 0x2031; cp++)
				if (Char.IsPunctuation ((char) cp))
					AddCharMap ((char) cp, 0xA, 1, 0);
			// SPECIAL CASES: why?
			AddCharMap ('\u203B', 0xA, 1, 0);
			AddCharMap ('\u2040', 0xA, 1, 0);
			AddCharMap ('\u2041', 0xA, 1, 0);
			AddCharMap ('\u2042', 0xA, 1, 0);

			for (int cp = 0x20A0; cp <= 0x20AB; cp++)
				AddCharMap ((char) cp, 0xA, 1, 0);

			// 3004 is skipped at first...
			for (int cp = 0x3010; cp <= 0x3040; cp++)
				if (Char.IsSymbol ((char) cp))
					AddCharMap ((char) cp, 0xA, 1, 0);
			// SPECIAL CASES: added here
			AddCharMap ('\u3004', 0xA, 1, 0);
			AddCharMap ('\u327F', 0xA, 1, 0);

			for (int cp = 0x2600; cp <= 0x2613; cp++)
				AddCharMap ((char) cp, 0xA, 1, 0);
			// Dingbats
			for (int cp = 0x2620; cp <= 0x2770; cp++)
				if (Char.IsSymbol ((char) cp))
					AddCharMap ((char) cp, 0xA, 1, 0);
			// OCR
			for (int i = 0x2440; i < 0x2460; i++)
				AddCharMap ((char) i, 0xA, 1, 0);

			// SPECIAL CASES: why?
			AddCharMap ('\u0E3F', 0xA, 1, 0);
			AddCharMap ('\u2117', 0xA, 1, 0);
			AddCharMap ('\u20AC', 0xA, 1, 0);
			#endregion

			#region Numbers // 0C 02 - 0C E1
			fillIndex [0xC] = 2;

			// 9F8 : Bengali "one less than the denominator"
			AddCharMap ('\u09F8', 0xC, 1, 0x3C);

			ArrayList numbers = new ArrayList ();
			for (int i = 0; i < 65536; i++)
				if (!IsIgnorable (i) &&
					Char.IsNumber ((char) i) &&
					(i < 0x3190 || 0x32C0 < i)) // they are CJK characters
					numbers.Add (i);

			ArrayList numberValues = new ArrayList ();
			foreach (int i in numbers)
				numberValues.Add (new DictionaryEntry (i, decimalValue [(char) i]));
			// SPECIAL CASE: Cyrillic Thousand sign
			numberValues.Add (new DictionaryEntry (0x0482, 1000m));
			numberValues.Sort (DecimalDictionaryValueComparer.Instance);

//foreach (DictionaryEntry de in numberValues)
//Console.Error.WriteLine ("****** number {0:X04} : {1} {2}", de.Key, de.Value, decompType [(int) de.Key]);

			// FIXME: fillIndex adjustment lines are too
			// complicated. It must be simpler.
			decimal prevValue = -1;
			foreach (DictionaryEntry de in numberValues) {
				int cp = (int) de.Key;
				decimal currValue = (decimal) de.Value;
				bool addnew = false;
				if (prevValue < currValue &&
					prevValue - (int) prevValue == 0 &&
					prevValue >= 1) {

					addnew = true;
					// Process Hangzhou and Roman numbers

					// There are some SPECIAL cases.
					if (currValue != 4) // no increment for 4
						fillIndex [0xC]++;

					int xcp;
					if (currValue <= 13) {
						if (currValue == 4)
							fillIndex [0xC]++;
						// SPECIAL CASE
						if (currValue == 11)
							AddCharMap ('\u0BF0', 0xC, 1);
						xcp = (int) prevValue + 0x2160 - 1;
						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
						xcp = (int) prevValue + 0x2170 - 1;
						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
						fillIndex [0xC]++;
					}
					if (currValue < 12)
						fillIndex [0xC]++;
					if (currValue <= 10) {
						xcp = (int) prevValue + 0x3021 - 1;
						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
						fillIndex [0xC]++;
					}
				}
				if (prevValue < currValue)
					prevValue = currValue;
				if (map [cp].Defined)
					continue;
				// HangZhou and Roman are add later 
				// (code is above)
				if (0x3021 <= cp && cp < 0x302A
					|| 0x2160 <= cp && cp < 0x216C
					|| 0x2170 <= cp && cp < 0x217C)
					continue;

				if (cp == 0x215B) // FIXME: why?
					fillIndex [0xC] += 2;
				else if (cp == 0x3021) // FIXME: why?
					fillIndex [0xC]++;
				if (addnew || cp <= '9') {
					int mod = (int) currValue - 1;
					int xcp;
					if (1 <= currValue && currValue <= 11) {
						xcp = mod + 0x2776;
						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
						xcp = mod + 0x2780;
						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
						xcp = mod + 0x278A;
						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
					}
					if (1 <= currValue && currValue <= 20) {
						xcp = mod + 0x2460;
						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
						xcp = mod + 0x2474;
						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
						xcp = mod + 0x2488;
						AddCharMap ((char) xcp, 0xC, 0, diacritical [xcp]);
					}
				}
				if (addnew && currValue >= 10 && currValue < 13 || cp == 0x09F9)
					fillIndex [0xC]++;
				AddCharMapGroup ((char) cp, 0xC, 0, diacritical [cp], true);

				switch (cp) {
				// Maybe Bengali digit numbers do not increase
				// indexes, but 0x09E6 does.
				case 0x09E7: case 0x09E8: case 0x09E9:
				case 0x09EA:
				// SPECIAL CASES
				case 0x0BF0: case 0x2180: case 0x2181:
					break;
				// SPECIAL CASE
				case 0x0BF1:
					fillIndex [0xC]++;
					break;
				default:
					if (currValue < 11 || currValue == 1000)
						fillIndex [0xC]++;
					break;
				}

				// Add special cases that are not regarded as 
				// numbers in UnicodeCategory speak.
				if (cp == '5') {
					// TONE FIVE
					AddCharMapGroup ('\u01BD', 0xC, 0, 0);
					AddCharMapGroup ('\u01BC', 0xC, 1, 0);
				}
				else if (cp == '2' || cp == '6') // FIXME: why?
					fillIndex [0xC]++;
			}

			// 221E: infinity
			fillIndex [0xC] = 0xFF;
			AddCharMap ('\u221E', 0xC, 1);
			#endregion

			#region Letters and NonSpacing Marks (general)

			// ASCII Latin alphabets
			for (int i = 0; i < alphabets.Length; i++)
				AddAlphaMap (alphabets [i], 0xE, alphaWeights [i]);

			// non-ASCII Latin alphabets
			// FIXME: there is no such characters that are placed
			// *after* "alphabets" array items. This is nothing
			// more than a hack that creates dummy weight for
			// primary characters.
			for (int i = 0x0080; i < 0x0300; i++) {
				if (!Char.IsLetter ((char) i))
					continue;
				// For those Latin Letters which has NFKD are
				// not added as independent primary character.
				if (decompIndex [i] != 0)
					continue;
				// SPECIAL CASES:
				// 1.some alphabets have primarily
				//   equivalent ASCII alphabets.
				// 2.some have independent primary weights,
				//   but inside a-to-z range.
				// 3.there are some expanded characters that
				//   are not part of Unicode Standard NFKD.
				// 4. some characters are letter in IsLetter
				//   but not in sortkeys (maybe unicode version
				//   difference caused it).
				switch (i) {
				// 1. skipping them does not make sense
//				case 0xD0: case 0xF0: case 0x131: case 0x138:
//				case 0x184: case 0x185: case 0x186: case 0x189:
//				case 0x18D: case 0x18E: case 0x18F: case 0x190:
//				case 0x194: case 0x195: case 0x196: case 0x19A:
//				case 0x19B: case 0x19C:
				// 2. skipping them does not make sense
//				case 0x14A: // Ng
//				case 0x14B: // ng
				// 3.
				case 0xC6: // AE
				case 0xE6: // ae
				case 0xDE: // Icelandic Thorn
				case 0xFE: // Icelandic Thorn
				case 0xDF: // German ss
				case 0xFF: // German ss
				// 4.
				case 0x1C0: case 0x1C1: case 0x1C2: case 0x1C3:
				// not classified yet
//				case 0x1A6: case 0x1A7: case 0x1A8: case 0x1A9:
//				case 0x1AA: case 0x1B1: case 0x1B7: case 0x1B8:
//				case 0x1B9: case 0x1BA: case 0x1BB: case 0x1BF:
//				case 0x1DD:
					continue;
				}
				AddCharMapGroup ((char) i, 0xE, 1, 0);
			}

			// IPA extensions
			// FIXME: this results in not equivalent values to
			// Windows, but is safer for comparison.
			char [] ipaArray = new char [0x300 - 0x250 + 0x20];
			for (int i = 0x40; i < 0x60; i++)
				if (Char.IsLetter ((char) i))
					ipaArray [i - 0x40] = (char) (i);
			for (int i = 0x250; i < 0x300; i++)
				if (Char.IsLetter ((char) i))
					ipaArray [i - 0x250 + 0x20] = (char) i;
			Array.Sort (ipaArray, UCAComparer.Instance);
			int targetASCII = 0;
			byte latinDiacritical = 0x7B;
			foreach (char c in ipaArray) {
				if (c <= 'Z') {
					targetASCII = c;
					latinDiacritical = 0x7B;
				}
				else
					map [(int) c] = new CharMapEntry (
						0xE,
						map [targetASCII].Level1,
						latinDiacritical++);
			}

			// Greek and Coptic

			// FIXME: this is (mysterious and) incomplete.
			for (int i = 0x0380; i < 0x0400; i++)
				if (diacritical [i] == 0 &&
					decompLength [i] == 1 &&
					decompType [i] == DecompositionCompat)
					diacritical [i] = 3;

			fillIndex [0xF] = 2;
			for (int i = 0x0391; i < 0x03AA; i++)
				if (i != 0x03A2)
					AddCharMap ((char) i, 0xF, 1,
						diacritical [i]);
			fillIndex [0xF] = 2;
			for (int i = 0x03B1; i < 0x03CA; i++)
				if (i != 0x03C2)
					AddCharMap ((char) i, 0xF, 1,
						diacritical [i]);
			// Final Sigma
			map [0x03C2] = new CharMapEntry (0xF,
				map [0x03C3].Level1, map [0x03C3].Level2);

			fillIndex [0xF] = 0x40;
			for (int i = 0x03DA; i < 0x03F0; i++)
				AddCharMap ((char) i, 0xF,
					(byte) (i % 2 == 0 ? 0 : 2),
					diacritical [i]);

			// NFKD
			for (int i = 0x0386; i <= 0x0400; i++)
				FillLetterNFKD (i, true, true);

			// Cyrillic.
			// Cyrillic letters are sorted like Latin letters i.e. 
			// containing culture-specific letters between the
			// standard Cyrillic sequence.
			//
			// We can't use UCA here; it has different sorting.
			char [] orderedCyrillic = new char [] {
				'\u0430', '\u0431', '\u0432', '\u0433', '\u0434',
				'\u0452', // DJE for Serbocroatian
				'\u0435',
				'\u0454', // IE for Ukrainian
				'\u0436', '\u0437',
				'\u0455', // DZE
				'\u0438',
				'\u0456', // Byelorussian-Ukrainian I
				'\u0457', // YI
				'\u0439',
				'\u0458', // JE
				'\u043A', '\u043B',
				'\u0459', // LJE
				'\u043C', '\u043D',
				'\u045A', // NJE
				'\u043E',
				// 4E9 goes here.
				'\u043F', '\u0440', '\u0441', '\u0442',
				'\u045B', // TSHE for Serbocroatian
				'\u0443',
				'\u045E', // Short U for Byelorussian
				'\u04B1', // Straight U w/ stroke (diacritical!)
				'\u0444', '\u0445', '\u0446', '\u0447',
				'\u045F', // DZHE
				'\u0448', '\u0449', '\u044A', '\u044B', '\u044C',
				'\u044D', '\u044E', '\u044F'};

			// For some characters here is a map to basic cyrillic
			// letters. See UnicodeData.txt character names for
			// the sources. Here I simply declare an equiv. array.
			// The content characters are map from U+490(,491),
			// skipping small letters.
			char [] cymap_src = new char [] {
				'\u0433', '\u0433', '\u0433', '\u0436',
				'\u0437', '\u043A', '\u043A', '\u043A',
				'\u043A', '\u043D', '\u043D', '\u043F',
				'\u0445', '\u0441', '\u0442', '\u0443',
				'\u0443', '\u0445', '\u0446', '\u0447',
				'\u0447', '\u0432', '\u0435', '\u0435',
				'\u0406', '\u0436', '\u043A', '\u043D',
				'\u0447', '\u0435'};

			fillIndex [0x10] = 0x8D;
			for (int i = 0x0460; i < 0x0481; i++) {
				if (Char.IsLetter ((char) i)) {
					if (i == 0x0476)
						// U+476/477 have the same
						// primary weight as U+474/475.
						fillIndex [0x10] -= 3;
					AddLetterMap ((char) i, 0x10, 3);
				}
			}

			fillIndex [0x10] = 0x6;
			for (int i = 0; i < orderedCyrillic.Length; i++) {
				char c = Char.ToUpper (orderedCyrillic [i], CultureInfo.InvariantCulture);
				if (!IsIgnorable ((int) c) &&
					Char.IsLetter (c) &&
					!map [c].Defined) {
					AddLetterMap (c, 0x10, 0);
					fillIndex [0x10] += 3;
				}
			}

			// NFKD
			for (int i = 0x0401; i <= 0x045F; i++)
				FillLetterNFKD (i, false, false);

			for (int i = 0; i < cymap_src.Length; i++) {
				char c = cymap_src [i];
				fillIndex [0x10] = map [c].Level1;
				int c2 = 0x0490 + i * 2;
				AddLetterMapCore ((char) c2, 0x10, 0, diacritical [c2], false);
			}

			// Armenian
			fillIndex [0x11] = 0x3;
			fillIndex [0x1] = 0x98;
			for (int i = 0x0531; i < 0x0586; i++) {
				if (i == 0x0559 || i == 0x55A)
					AddCharMap ((char) i, 1, 1);
				if (Char.IsLetter ((char) i))
					AddLetterMap ((char) i, 0x11, 1);
			}

			// Hebrew
			// -Letters
			fillIndex [0x12] = 0x2;
			for (int i = 0x05D0; i < 0x05FF; i++)
				if (Char.IsLetter ((char) i)) {
					if (isUppercase [i]) {
						fillIndex [0x12]--;
						AddLetterMap ((char) i, 0x12, 2);
					}
					else
						AddLetterMap ((char) i, 0x12, 1);
				}
			// -Accents
			fillIndex [0x1] = 0x3;
			for (int i = 0x0591; i <= 0x05C2; i++) {
				if (i == 0x05A3 || i == 0x05BB)
					fillIndex [0x1]++;
				if (i != 0x05BE)
					AddCharMap ((char) i, 0x1, 1);
			}

			// Arabic
			fillIndex [0x1] = 0x8E;
			fillIndex [0x13] = 0x3;
			for (int i = 0x0621; i <= 0x064A; i++) {
				// Abjad
				if (Char.GetUnicodeCategory ((char) i)
					!= UnicodeCategory.OtherLetter) {
					// FIXME: arabic nonspacing marks are
					// in different order.
					AddCharMap ((char) i, 0x1, 1);
					continue;
				}
//				map [i] = new CharMapEntry (0x13,
//					(byte) arabicLetterPrimaryValues [i], 1);
				fillIndex [0x13] = 
					(byte) arabicLetterPrimaryValues [i];
				byte formDiacritical = 8; // default
				// SPECIAL CASES:
				switch (i) {
				case 0x0622: formDiacritical = 9; break;
				case 0x0623: formDiacritical = 0xA; break;
				case 0x0624: formDiacritical = 5; break;
				case 0x0625: formDiacritical = 0xB; break;
				case 0x0626: formDiacritical = 7; break;
				case 0x0649: formDiacritical = 5; break;
				case 0x064A: formDiacritical = 7; break;
				}
//				AddLetterMapCore ((char) i, 0x13, 1, formDiacritical, false);
				AddArabicCharMap ((char) i, 0x13, 1, formDiacritical);
			}
			for (int i = 0x0670; i < 0x0673; i++)
				map [i] = new CharMapEntry (0x13, 0xB, (byte) (0xC + i - 0x670));
			fillIndex [0x13] = 0x84;
			for (int i = 0x0674; i < 0x06D6; i++)
				if (Char.IsLetter ((char) i))
					AddLetterMapCore ((char) i, 0x13, 1, 0, false);

			// Devanagari

			// FIXME: this could be fixed in more decent way
			for (int i = 0x0958; i <= 0x095F; i++)
				diacritical [i] = 8;

			// FIXME: it does seem straight codepoint mapping.
			fillIndex [0x14] = 04;
			for (int i = 0x0901; i < 0x0905; i++)
				if (!IsIgnorable (i))
					AddLetterMap ((char) i, 0x14, 2);
			fillIndex [0x14] = 0xB;
			for (int i = 0x0905; i < 0x093A; i++) {
				if (i == 0x0928)
					AddCharMap ('\u0929', 0x14, 0, 8);
				if (i == 0x0930)
					AddCharMap ('\u0931', 0x14, 0, 8);
				if (i == 0x0933)
					AddCharMap ('\u0934', 0x14, 0, 8);
				if (Char.IsLetter ((char) i))
					AddLetterMap ((char) i, 0x14, 4);
				if (i == 0x090B)
					AddCharMap ('\u0960', 0x14, 4);
				if (i == 0x090C)
					AddCharMap ('\u0961', 0x14, 4);
			}
			fillIndex [0x14] = 0xDA;
			for (int i = 0x093E; i < 0x0945; i++)
				if (!IsIgnorable (i))
					AddLetterMap ((char) i, 0x14, 2);
			fillIndex [0x14] = 0xEC;
			for (int i = 0x0945; i < 0x094F; i++)
				if (!IsIgnorable (i))
					AddLetterMap ((char) i, 0x14, 2);

			// Bengali
			// -Letters
			fillIndex [0x15] = 02;
			for (int i = 0x0980; i < 0x9FF; i++) {
				if (IsIgnorable (i))
					continue;
				if (i == 0x09E0)
					fillIndex [0x15] = 0x3B;
				switch (Char.GetUnicodeCategory ((char) i)) {
				case UnicodeCategory.NonSpacingMark:
				case UnicodeCategory.DecimalDigitNumber:
				case UnicodeCategory.OtherNumber:
					continue;
				}
				AddLetterMap ((char) i, 0x15, 1);
			}
			// -Signs
			fillIndex [0x1] = 0x3;
			for (int i = 0x0981; i < 0x0A00; i++)
				if (Char.GetUnicodeCategory ((char) i) ==
					UnicodeCategory.NonSpacingMark)
					AddCharMap ((char) i, 0x1, 1);

			// Gurmukhi. orderedGurmukhi is from UCA
			// FIXME: it does not look equivalent to UCA.
			fillIndex [0x16] = 04;
			fillIndex [0x1] = 3;
			for (int i = 0; i < orderedGurmukhi.Length; i++) {
				char c = orderedGurmukhi [i];
				if (IsIgnorable ((int) c))
					continue;
				if (IsIgnorableNonSpacing (c)) {
					AddLetterMap (c, 0x1, 1);
					continue;
				}
				if (c == '\u0A3C' || c == '\u0A4D' ||
					'\u0A66' <= c && c <= '\u0A71')
					continue;
				// SPECIAL CASES
				byte shift = 4;
				switch (c) {
				case '\u0A33': case '\u0A36': case '\u0A16':
				case '\u0A17': case '\u0A5B': case '\u0A5E':
					shift = 0;
					break;
				}
				if (c == '\u0A3E') // Skip
					fillIndex [0x16] = 0xC0;
				AddLetterMap (c, 0x16, shift);
			}

			// Gujarati. orderedGujarati is from UCA
			fillIndex [0x17] = 0x4;
			// nonspacing marks
			map [0x0A4D] = new CharMapEntry (1, 0, 0x3);
			map [0x0ABD] = new CharMapEntry (1, 0, 0x3);
			map [0x0A3C] = new CharMapEntry (1, 0, 0x4);
			map [0x0A71] = new CharMapEntry (1, 0, 0x6);
			map [0x0ABC] = new CharMapEntry (1, 0, 0xB);
			map [0x0A70] = new CharMapEntry (1, 0, 0xE);
			// letters go first.
			for (int i = 0; i < orderedGujarati.Length; i++) {
				// SPECIAL CASE
				char c = orderedGujarati [i];
				if (Char.IsLetter (c)) {
					// SPECIAL CASES
					if (c == '\u0AB3' || c == '\u0A32')
						continue;
					if (c == '\u0A33') {
						AddCharMap ('\u0A32', 0x17, 0);
						AddCharMap ('\u0A33', 0x17, 4, 4);
						continue;
					}
					if (c == '\u0A8B')
						AddCharMap ('\u0AE0', 0x17, 0, 5);
					AddCharMap (c, 0x17, 4);

					if (c == '\u0AB9')
						AddCharMap ('\u0AB3', 0x17, 6);
				}
			}
			// non-letters
			byte gujaratiShift = 4;
			fillIndex [0x17] = 0xC0;
			for (int i = 0; i < orderedGujarati.Length; i++) {
				char c = orderedGujarati [i];
				if (fillIndex [0x17] == 0xCC)
					gujaratiShift = 3;
				if (!Char.IsLetter (c)) {
					// SPECIAL CASES
					if (c == '\u0A82')
						AddCharMap ('\u0A81', 0x17, 2);
					if (c == '\u0AC2')
						fillIndex [0x17]++;
					AddLetterMap (c, 0x17, gujaratiShift);
				}
			}

			// Oriya
			fillIndex [0x1] = 03;
			fillIndex [0x18] = 02;
			for (int i = 0x0B00; i < 0x0B7F; i++) {
				switch (Char.GetUnicodeCategory ((char) i)) {
				case UnicodeCategory.NonSpacingMark:
				case UnicodeCategory.DecimalDigitNumber:
					AddLetterMap ((char) i, 0x1, 1);
					continue;
				}
				AddLetterMapCore ((char) i, 0x18, 1, 0, true);
			}

			// Tamil
			fillIndex [0x19] = 2;
			AddCharMap ('\u0BD7', 0x19, 0);
			fillIndex [0x19] = 0xA;
			// vowels
			for (int i = 0x0B82; i <= 0x0B94; i++)
				if (!IsIgnorable ((char) i))
					AddCharMap ((char) i, 0x19, 2);
			// special vowel
			fillIndex [0x19] = 0x28;
			// The array for Tamil consonants is a constant.
			// Windows have almost similar sequence to TAM from
			// tamilnet but a bit different in Grantha.
			for (int i = 0; i < orderedTamilConsonants.Length; i++)
				AddLetterMap (orderedTamilConsonants [i], 0x19, 4);
			// combining marks
			fillIndex [0x19] = 0x82;
			for (int i = 0x0BBE; i < 0x0BCD; i++)
				if (Char.GetUnicodeCategory ((char) i) ==
					UnicodeCategory.SpacingCombiningMark
					|| i == 0x0BC0)
					AddLetterMap ((char) i, 0x19, 2);

			// Telugu
			fillIndex [0x1A] = 0x4;
			for (int i = 0x0C00; i < 0x0C62; i++) {
				if (i == 0x0C55 || i == 0x0C56)
					continue; // skip
				AddCharMap ((char) i, 0x1A, 3);
				char supp = (i == 0x0C0B) ? '\u0C60':
					i == 0x0C0C ? '\u0C61' : char.MinValue;
				if (supp == char.MinValue)
					continue;
				AddCharMap (supp, 0x1A, 3);
			}

			// Kannada
			fillIndex [0x1B] = 4;
			for (int i = 0x0C80; i < 0x0CE5; i++) {
				if (i == 0x0CD5 || i == 0x0CD6)
					continue; // ignore
				if (i == 0x0CB1 || i == 0x0CB3 || i == 0x0CDE)
					continue; // shift after 0xCB9
				AddCharMap ((char) i, 0x1B, 3);
				if (i == 0x0CB9) {
					// SPECIAL CASES: but why?
					AddCharMap ('\u0CB1', 0x1B, 3); // RRA
					AddCharMap ('\u0CB3', 0x1B, 3); // LLA
					AddCharMap ('\u0CDE', 0x1B, 3); // FA
				}
				if (i == 0x0CB2)
					AddCharMap ('\u0CE1', 0x1B, 3); // vocalic LL
			}
			
			// Malayalam
			fillIndex [0x1C] = 2;
			fillIndex [0x1] = 3;
			for (int i = 0x0D02; i < 0x0D61; i++) {
				// FIXME: I avoided MSCompatUnicodeTable usage
				// here (it results in recursion). So check if
				// using NonSpacingMark makes sense or not.
				if (Char.GetUnicodeCategory ((char) i) != UnicodeCategory.NonSpacingMark)
//				if (!MSCompatUnicodeTable.IsIgnorable ((char) i))
					AddCharMap ((char) i, 0x1C, 1);
				else if (!IsIgnorable ((char) i))
					AddCharMap ((char) i, 1, 1);
			}

			// Thai ... note that it breaks 0x1E wall after E2B!
			// Also, all Thai characters have level 2 value 3.
			fillIndex [0x1E] = 2;
			fillIndex [0x1] = 3;
			for (int i = 0xE40; i <= 0xE44; i++)
				AddCharMap ((char) i, 0x1E, 1, 3);
			for (int i = 0xE01; i < 0xE2B; i++)
				AddCharMap ((char) i, 0x1E, 6, 3);
			fillIndex [0x1F] = 5;
			for (int i = 0xE2B; i < 0xE30; i++)
				AddCharMap ((char) i, 0x1F, 6, 3);
			fillIndex [0x1F] = 0x1E;
			for (int i = 0xE30; i < 0xE3B; i++)
				AddCharMap ((char) i, 0x1F, 1, 3);
			// some Thai characters remains.
			char [] specialThai = new char [] {'\u0E45', '\u0E46',
				'\u0E4E', '\u0E4F', '\u0E5A', '\u0E5B'};
			foreach (char c in specialThai)
				AddCharMap (c, 0x1F, 1, 3);

			for (int i = 0xE00; i < 0xE80; i++)
				if (Char.GetUnicodeCategory ((char) i) ==
					UnicodeCategory.NonSpacingMark)
					AddCharMap ((char) i, 1, 1);

			// Lao
			fillIndex [0x1F] = 2;
			fillIndex [0x1] = 3;
			for (int i = 0xE80; i < 0xEDF; i++) {
				if (IsIgnorable ((char) i))
					continue;
				else if (Char.IsLetter ((char) i))
					AddCharMap ((char) i, 0x1F, 1);
				else if (Char.GetUnicodeCategory ((char) i) ==
					UnicodeCategory.NonSpacingMark)
					AddCharMap ((char) i, 1, 1);
			}

			// Georgian. orderedGeorgian is from UCA DUCET.
			fillIndex [0x21] = 5;
			for (int i = 0; i < orderedGeorgian.Length; i++) {
				char c = orderedGeorgian [i];
				if (map [(int) c].Defined)
					continue;
				AddCharMap (c, 0x21, 0);
				if (c < '\u10F6')
					AddCharMap ((char) (c - 0x30), 0x21, 0);
				fillIndex [0x21] += 5;
			}

			// Japanese Kana.
			fillIndex [0x22] = 2;
			int kanaOffset = 0x3041;
			byte [] kanaLines = new byte [] {2, 2, 2, 2, 1, 3, 1, 2, 1};

			for (int gyo = 0; gyo < 9; gyo++) {
				for (int dan = 0; dan < 5; dan++) {
					if (gyo == 7 && dan % 2 == 1) {
						// 'ya'-gyo
						fillIndex [0x22]++;
						kanaOffset -= 2; // There is no space for yi and ye.
						continue;
					}
					int cp = kanaOffset + dan * kanaLines [gyo];
					// small lines (a-gyo, ya-gyo)
					if (gyo == 0 || gyo == 7) {
						AddKanaMap (cp, 1); // small
						AddKanaMap (cp + 1, 1);
					}
					else
						AddKanaMap (cp, kanaLines [gyo]);
					fillIndex [0x22]++;

					if (cp == 0x30AB) {
						// add small 'ka' (before normal one)
						AddKanaMap (0x30F5, 1);
						kanaOffset++;
					}
					if (cp == 0x30B1) {
						// add small 'ke' (before normal one)
						AddKanaMap (0x30F6, 1);
						kanaOffset++;
					}
					if (cp == 0x3061) {
						// add small 'Tsu' (before normal one)
						AddKanaMap (0x3063, 1);
						kanaOffset++;
					}
				}
				fillIndex [0x22] += 3;
				kanaOffset += 5 * kanaLines [gyo];
			}

			// Wa-gyo is almost special, so I just manually add.
			AddLetterMap ((char) 0x308E, 0x22, 0);
			AddLetterMap ((char) (0x308E + 0x60), 0x22, 0);
			AddLetterMap ((char) 0x308F, 0x22, 0);
			AddLetterMap ((char) (0x308F + 0x60), 0x22, 0);
			fillIndex [0x22]++;
			AddLetterMap ((char) 0x3090, 0x22, 0);
			AddLetterMap ((char) (0x3090 + 0x60), 0x22, 0);
			fillIndex [0x22] += 2;
			// no "Wu" in Japanese.
			AddLetterMap ((char) 0x3091, 0x22, 0);
			AddLetterMap ((char) (0x3091 + 0x60), 0x22, 0);
			fillIndex [0x22]++;
			AddLetterMap ((char) 0x3092, 0x22, 0);
			AddLetterMap ((char) (0x3092 + 0x60), 0x22, 0);
			// Nn
			fillIndex [0x22] = 0x80;
			AddLetterMap ((char) 0x3093, 0x22, 0);
			AddLetterMap ((char) (0x3093 + 0x60), 0x22, 0);

			map [0x3094] = new CharMapEntry (map [0x30A6].Category,
				map [0x30A6].Level1, 3);// voiced hiragana U
			map [0x30F4] = new CharMapEntry (map [0x30A6].Category,
				map [0x30A6].Level1, 3);// voiced katakana U

			map [0x30F5] = new CharMapEntry (map [0x30AB].Category,
				map [0x30AB].Level1, 0);// small katakana Ka
			map [0x30F6] = new CharMapEntry (map [0x30B1].Category,
				map [0x30B1].Level1, 0);// small katakana Ke
			// voiced Wa lines
			for (int i = 0x30F7; i < 0x30FB; i++)
				map [i] = new CharMapEntry (map [i - 8].Category,
					map [i - 8].Level1,
					3);

			// JIS Japanese square chars.
			fillIndex [0x22] = 0x97;
			jisJapanese.Sort (JISComparer.Instance);
			foreach (JISCharacter j in jisJapanese)
				if (0x3300 <= j.CP && j.CP <= 0x3357)
					AddCharMap ((char) j.CP, 0x22, 1);
			// non-JIS Japanese square chars.
			nonJisJapanese.Sort (NonJISComparer.Instance);
			foreach (NonJISCharacter j in nonJisJapanese)
				AddCharMap ((char) j.CP, 0x22, 1);

			// Bopomofo
			fillIndex [0x23] = 0x02;
			for (int i = 0x3105; i <= 0x312C; i++)
				AddCharMap ((char) i, 0x23, 1);

			// Estrangela: ancient Syriac
			fillIndex [0x24] = 0x0B;
			// FIXME: is 0x71E really alternative form?
			ArrayList syriacAlternatives = new ArrayList (
				new int [] {0x714, 0x716, 0x71C, 0x71E, 0x724, 0x727});
			for (int i = 0x0710; i <= 0x072C; i++) {
				if (i == 0x0711) // NonSpacingMark
					continue;
				if (syriacAlternatives.Contains (i))
					continue;
				AddCharMap ((char) i, 0x24, 4);
				// FIXME: why?
				if (i == 0x721)
					fillIndex [0x24]++;
			}
			foreach (int cp in syriacAlternatives)
				map [cp] = new CharMapEntry (0x24,
					(byte) (map [cp - 1].Level1 + 2),
					0);
			// FIXME: Syriac NonSpacingMark should go here.

			// Thaana
			// FIXME: it turned out that it does not look like UCA
			fillIndex [0x24] = 0x6E;
			fillIndex [0x1] = 0xAC;
			for (int i = 0; i < orderedThaana.Length; i++) {
				char c = orderedThaana [i];
				if (IsIgnorableNonSpacing ((int) c))
					AddCharMap (c, 1, 1);
				AddCharMap (c, 0x24, 2);
				if (c == '\u0782') // SPECIAL CASE: why?
					fillIndex [0x24] += 2;
			}
			#endregion

			// FIXME: Add more culture-specific letters (that are
			// not supported in Windows collation) here.

			// Surrogate ... they are computed.

			#region Hangul
			// Hangul.
			//
			// Unlike UCA Windows Hangul sequence mixes Jongseong
			// with Choseong sequence as well as Jungseong,
			// adjusted to have the same primary weight for the
			// same base character. So it is impossible to compute
			// those sort keys.
			//
			// Here I introduce an ordered sequence of mixed
			// 'commands' and 'characters' that is similar to
			// LDML text:
			//	- ',' increases primary weight.
			//	- [A B] means a range, increasing index
			//	- {A B} means a range, without increasing index
			//	- '=' is no operation (it means the characters 
			//	  of both sides have the same weight).
			//	- '>' inserts a Hangul Syllable block that 
			//	  contains 0x251 characters.
			//	- '<' decreases the index
			//	- '0'-'9' means skip count
			//	- whitespaces are ignored
			//

			string hangulSequence =
			  "\u1100=\u11A8 > \u1101=\u11A9 >"
			+ "\u11C3, \u11AA, \u11C4, \u1102=\u11AB >"
			+ "<{\u1113 \u1116}, \u3165,"
				+ "\u11C5, \u11C6=\u3166,, \u11C7, \u11C8,"
				+ "\u11AC, \u11C9, \u11AD, \u1103=\u11AE  >"
			+ "<\u1117, \u11CA, \u1104, \u11CB > \u1105=\u11AF >"
			+ "<{\u1118 \u111B}, \u11B0, [\u11CC \u11D0], \u11B1,"
				+ "[\u11D1 \u11D2], \u11B2,"
				+ "[\u11D3 \u11D5], \u11B3,"
				+ "[\u11D6 \u11D7], \u11B4, \u11B5,"
				+ "\u11B6=\u11D8, \u3140,, \u11D9, \u1106=\u11B7 >"
			+ "<{\u111C \u111D}, [\u11DA \u11E2], \u1107=\u11B8 >"
			+ "<{\u111E \u1120}, \u3172,, \u3173, \u11E3, \u1108 >"
			+ "<{\u1121 \u112C}, \u3144 \u11B9, \u3174, \u3175,,,, "
				+ "\u3176,, \u3177, [\u11E4 \u11E6] \u3178,"
				+ "\u3179, \u1109=\u11BA,,, \u3214=\u3274 <>"
			+ "<{\u112D \u1133}, \u11E7 \u317A, \u317B, \u317C "
				+ "[\u11E8 \u11E9],, \u11EA \u317D,, \u110A=\u11BB,,, >"
			+ "<{\u1134 \u1140}, \u317E,,,,,,, \u11EB,"
				+ "\u110B=\u11BC, [\u1161 \u11A2], \u1160 >"
			+ "<{\u1141 \u114C}, \u3180=\u11EE, \u11EC, \u11ED,,,,, "
				+ "\u11F1,, \u11F2,,,"
				+ "\u11EF,,, \u3181=\u11F0, \u110C=\u11BD,, >"
			+ "<\u114D, \u110D,,  >"
			+ "<{\u114E \u1151},, \u110E=\u11BE,,  >"
			+ "<{\u1152 \u1155},,, \u110F=\u11BF >"
			+ "\u1110=\u11C0 > \u1111=\u11C1 >"
			+ "<\u1156=\u1157, \u11F3, \u11F4, \u1112=\u11C2 >"
			+ "<\u1158=\u1159=\u115F, \u3185, \u11F9,"
				+ "[\u11F5 \u11F8]"
			;

			byte hangulCat = 0x52;
			fillIndex [hangulCat] = 0x2;

			int syllableBlock = 0;
			for (int n = 0; n < hangulSequence.Length; n++) {
				char c = hangulSequence [n];
				int start, end;
				if (Char.IsWhiteSpace (c))
					continue;
				switch (c) {
				case '=':
					break; // NOP
				case ',':
					IncrementSequentialIndex (ref hangulCat);
					break;
				case '<':
					if (fillIndex [hangulCat] == 2)
						throw new Exception ("FIXME: handle it correctly (yes it is hacky, it is really unfortunate).");
					fillIndex [hangulCat]--;
					break;
				case '>':
					IncrementSequentialIndex (ref hangulCat);
					for (int l = 0; l < 0x15; l++)
						for (int v = 0; v < 0x1C; v++) {
							AddCharMap (
								(char) (0xAC00 + syllableBlock * 0x1C * 0x15 + l * 0x1C + v), hangulCat, 0);
							IncrementSequentialIndex (ref hangulCat);
						}
					syllableBlock++;
					break;
				case '[':
					start = hangulSequence [n + 1];
					end = hangulSequence [n + 3];
					for (int i = start; i <= end; i++) {
						AddCharMap ((char) i, hangulCat, 0);
						if (end > i)
							IncrementSequentialIndex (ref hangulCat);
					}
					n += 4; // consumes 5 characters for this operation
					break;
				case '{':
					start = hangulSequence [n + 1];
					end = hangulSequence [n + 3];
					for (int i = start; i <= end; i++)
						AddCharMap ((char) i, hangulCat, 0);
					n += 4; // consumes 5 characters for this operation
					break;
				default:
					AddCharMap (c, hangulCat, 0);
					break;
				}
			}

			// Some Jamo NFKD.
			for (int i = 0x3200; i < 0x3300; i++) {
				if (IsIgnorable (i) || map [i].Defined)
					continue;
				int ch = 0;
				// w/ bracket
				if (decompLength [i] == 4 &&
					decompValues [decompIndex [i]] == '(')
					ch = decompIndex [i] + 1;
				// circled
				else if (decompLength [i] == 2 &&
					decompValues [decompIndex [i] + 1] == '\u1161')
					ch = decompIndex [i];
				else if (decompLength [i] == 1)
					ch = decompIndex [i];
				else
					continue;
				ch = decompValues [ch];
				if (ch < 0x1100 || 0x1200 < ch &&
					ch < 0xAC00 || 0xD800 < ch)
					continue;

				// SPECIAL CASE ?
				int offset = i < 0x3260 ? 1 : 0;
				if (0x326E <= i && i <= 0x3273)
					offset = 1;

				map [i] = new CharMapEntry (map [ch].Category,
					(byte) (map [ch].Level1 + offset),
					map [ch].Level2);
//					Console.Error.WriteLine ("Jamo {0:X04} -> {1:X04}", i, decompValues [decompIndex [i] + 1]);
			}


			#endregion

			// Letterlike characters and CJK compatibility square
			sortableCharNames.Sort (StringDictionaryValueComparer.Instance);
			int [] counts = new int ['Z' - 'A' + 1];
			char [] namedChars = new char [sortableCharNames.Count];
			int nCharNames = 0;
			foreach (DictionaryEntry de in sortableCharNames) {
				counts [((string) de.Value) [0] - 'A']++;
				namedChars [nCharNames++] = (char) ((int) de.Key);
			}
			nCharNames = 0; // reset
			for (int a = 0; a < counts.Length; a++) {
				fillIndex [0xE] = (byte) (alphaWeights [a + 1] - counts [a]);
				for (int i = 0; i < counts [a]; i++)
//Console.Error.WriteLine ("---- {0:X04} : {1:x02} / {2} {3}", (int) namedChars [nCharNames], fillIndex [0xE], ((DictionaryEntry) sortableCharNames [nCharNames]).Value, Char.GetUnicodeCategory (namedChars [nCharNames]));
					AddCharMap (namedChars [nCharNames++], 0xE, 1);
			}

			// CJK unified ideograph.
			byte cjkCat = 0x9E;
			fillIndex [cjkCat] = 0x2;
			for (int cp = 0x4E00; cp <= 0x9FBB; cp++)
				if (!IsIgnorable (cp))
					AddCharMapGroupCJK ((char) cp, ref cjkCat);
			// CJK Extensions goes here.
			// LAMESPEC: With this Windows style CJK layout, it is
			// impossible to add more CJK ideograph i.e. 0x9FA6-
			// 0x9FBB can never be added w/o breaking compat.
			for (int cp = 0xF900; cp <= 0xFA2D; cp++)
				if (!IsIgnorable (cp))
					AddCharMapGroupCJK ((char) cp, ref cjkCat);

			// PrivateUse ... computed.
			// remaining Surrogate ... computed.

			#region 07 - ASCII non-alphanumeric + 3001, 3002 // 07
			// non-alphanumeric ASCII except for: + - < = > '
			for (int i = 0x21; i < 0x7F; i++) {
				// SPECIAL CASE: 02C6 looks regarded as 
				// equivalent to '^', which does not conform 
				// to Unicode standard character database.
				if (i == 0x005B)
					AddCharMap ('\u2045', 0x7, 0, 0x1C);
				if (i == 0x005D)
					AddCharMap ('\u2046', 0x7, 0, 0x1C);
				if (i == 0x005E)
					AddCharMap ('\u02C6', 0x7, 0, 3);
				if (i == 0x0060)
					AddCharMap ('\u02CB', 0x7, 0, 3);

				if (Char.IsLetterOrDigit ((char) i)
					|| "+-<=>'".IndexOf ((char) i) >= 0)
					continue; // they are not added here.

				AddCharMapGroup2 ((char) i, 0x7, 1, 0);
				// Insert 3001 after ',' and 3002 after '.'
				if (i == 0x2C)
					AddCharMapGroup2 ('\u3001', 0x7, 1, 0);
				else if (i == 0x2E)
					AddCharMapGroup2 ('\u3002', 0x7, 1, 0);
				else if (i == 0x3A)
					AddCharMap ('\uFE30', 0x7, 1, 0);
			}
			#endregion

			#region 07 - Punctuations and something else
			for (int i = 0xA0; i < char.MaxValue; i++) {
				if (IsIgnorable (i))
					continue;

				// FIXME: actually those reset should not be 
				// done but here I put for easy goal.
				if (i == 0x05C3)
					fillIndex [0x7]++;
				if (i == 0x0700)
					fillIndex [0x7] = 0xE2;
				if (i == 0x2016)
					fillIndex [0x7] = 0x77;
				if (i == 0x3008)
					fillIndex [0x7] = 0x93;

				if (0x02C8 <= i && i <= 0x02CD)
					continue; // nonspacing marks

				// SPECIAL CASE: maybe they could be allocated
				// dummy NFKD mapping and no special processing
				// would be required here.
				if (i == 0x00AF)
					AddCharMap ('\u02C9', 0x7, 0, 3);
				if (i == 0x00B4)
					AddCharMap ('\u02CA', 0x7, 0, 3);
				if (i == 0x02C7)
					AddCharMap ('\u02D8', 0x7, 0, 3);

				// SPECIAL CASES:
				switch (i) {
				case 0xAB: // 08
				case 0xB7: // 0A
				case 0xBB: // 08
				case 0x02B9: // 01
				case 0x02BA: // 01
				case 0x2329: // 09
				case 0x232A: // 09
					continue;
				}

				switch (Char.GetUnicodeCategory ((char) i)) {
				case UnicodeCategory.OtherPunctuation:
				case UnicodeCategory.ClosePunctuation:
				case UnicodeCategory.OpenPunctuation:
				case UnicodeCategory.ConnectorPunctuation:
				case UnicodeCategory.InitialQuotePunctuation:
				case UnicodeCategory.FinalQuotePunctuation:
				case UnicodeCategory.ModifierSymbol:
					// SPECIAL CASES: // 0xA
					if (0x2020 <= i && i <= 0x2031)
						continue;
					if (i == 0x3003) // added later
						continue;
					AddCharMapGroup2 ((char) i, 0x7, 1, 0);
					break;
				default:
					if (i == 0xA6 || i == 0x1C3 || i == 0x037A) // SPECIAL CASE. FIXME: why?
						goto case UnicodeCategory.OtherPunctuation;
					break;
				}
			}

			// Control pictures
			// FIXME: it should not need to reset level 1, but
			// it's for easy goal.
			fillIndex [0x7] = 0xB6;
			for (int i = 0x2400; i <= 0x2424; i++)
				AddCharMap ((char) i, 0x7, 1, 0);

			// FIXME: what are they?
			AddCharMap ('\u3003', 0x7, 1);
			AddCharMap ('\u3006', 0x7, 1);
			AddCharMap ('\u02D0', 0x7, 1);
			AddCharMap ('\u10FB', 0x7, 1);
			AddCharMap ('\u0950', 0x7, 1);
			AddCharMap ('\u093D', 0x7, 1);
			AddCharMap ('\u0964', 0x7, 1);
			AddCharMap ('\u0965', 0x7, 1);
			AddCharMap ('\u0970', 0x7, 1);

			#endregion

			#region category 08 - symbols
			fillIndex [0x8] = 2;
			// Here Windows mapping is not straightforward. It is
			// not based on computation but seems manual sorting.
			AddCharMapGroup ('+', 0x8, 1, 0); // plus
			AddCharMapGroup ('\u2212', 0x8, 1); // minus
			AddCharMapGroup ('\u229D', 0x8, 1); // minus
			AddCharMapGroup ('\u2297', 0x8, 1); // mul
			AddCharMapGroup ('\u2044', 0x8, 1); // div
			AddCharMapGroup ('\u2215', 0x8, 0); // div
			AddCharMapGroup ('\u2298', 0x8, 1); // div slash
			AddCharMapGroup ('\u2217', 0x8, 0); // mul
			AddCharMapGroup ('\u229B', 0x8, 1); // asterisk oper
			AddCharMapGroup ('\u2218', 0x8, 0); // ring
			AddCharMapGroup ('\u229A', 0x8, 1); // ring
			AddCharMapGroup ('\u2219', 0x8, 0); // bullet
			AddCharMapGroup ('\u2299', 0x8, 1); // dot oper
			AddCharMapGroup ('\u2213', 0x8, 1); // minus-or-plus
			AddCharMapGroup ('\u003C', 0x8, 1); // <
			AddCharMapGroup ('\u227A', 0x8, 1); // precedes relation
			AddCharMapGroup ('\u22B0', 0x8, 1); // precedes under relation

			for (int cp = 0; cp < 0x2300; cp++) {
				if (cp == 0xAC) // SPECIAL CASE: skip
					continue;
				if (cp == 0x200) {
					cp = 0x2200; // skip to 2200
					fillIndex [0x8] = 0x21;
				}
				if (cp == 0x2295)
					fillIndex [0x8] = 0x3;
				if (cp == 0x22A2)
					fillIndex [0x8] = 0xAB;
				if (cp == 0x22B2)
					fillIndex [0x8] = 0xB9;
				if (!map [cp].Defined &&
//					Char.GetUnicodeCategory ((char) cp) ==
//					UnicodeCategory.MathSymbol)
					Char.IsSymbol ((char) cp))
					AddCharMapGroup ((char) cp, 0x8, 1);
				// SPECIAL CASES: no idea why Windows sorts as such
				switch (cp) {
				case 0x3E:
					AddCharMap ('\u227B', 0x8, 1, 0);
					AddCharMap ('\u22B1', 0x8, 1, 0);
					break;
				case 0xB1:
					AddCharMapGroup ('\u00AB', 0x8, 1);
					AddCharMapGroup ('\u226A', 0x8, 1);
					AddCharMapGroup ('\u00BB', 0x8, 1);
					AddCharMapGroup ('\u226B', 0x8, 1);
					break;
				case 0xF7:
					AddCharMap ('\u01C0', 0x8, 1, 0);
					AddCharMap ('\u01C1', 0x8, 1, 0);
					AddCharMap ('\u01C2', 0x8, 1, 0);
					break;
				}
			}
			#endregion

			#region Hack!

			// Characters w/ diacritical marks (NFKD)
			for (int i = 0; i <= char.MaxValue; i++) {
				if (map [i].Defined || IsIgnorable (i))
					continue;
				if (decompIndex [i] == 0)
					continue;

				int start = decompIndex [i];
				int primaryChar = decompValues [start];
				int secondary = diacritical [i];
				bool skip = false;
				int length = decompLength [i];
				// special processing for parenthesized ones.
				if (length == 3 &&
					decompValues [start] == '(' &&
					decompValues [start + 2] == ')') {
					primaryChar = decompValues [start + 1];
					length = 1;
				}

				if (map [primaryChar].Level1 == 0)
					continue;

				for (int l = 1; l < length; l++) {
					int c = decompValues [start + l];
					if (map [c].Level1 != 0)
						skip = true;
					secondary += diacritical [c];
				}
				if (skip)
					continue;
				map [i] = new CharMapEntry (
					map [primaryChar].Category,
					map [primaryChar].Level1,
					(byte) secondary);
				
			}

			// Diacritical weight adjustment

			// Arabic Hamzah
			diacritical [0x624] = 0x5;
			diacritical [0x626] = 0x7;
			diacritical [0x622] = 0x9;
			diacritical [0x623] = 0xA;
			diacritical [0x625] = 0xB;
			diacritical [0x649] = 0x5; // 'alif maqs.uurah
			diacritical [0x64A] = 0x7; // Yaa'

			for (int i = 0; i < char.MaxValue; i++) {
				byte mod = 0;
				byte cat = map [i].Category;
				switch (cat) {
				case 0xE: // Latin diacritics
				case 0x22: // Japanese: circled characters
					mod = diacritical [i];
					break;
				case 0x13: // Arabic
					if (i == 0x0621)
						break; // 0
					if (diacritical [i] == 0 && decompLength [i] != 0)
						diacritical [i] = map [decompValues [decompIndex [i]]].Level2;
					if (diacritical [i] == 0 && i >= 0xFE8D)
						mod = 0x8; // default for arabic
					break;
				}
				if (0x52 <= cat && cat <= 0x7F) // Hangul
					mod = diacritical [i];
				if (mod > 0)
					map [i] = new CharMapEntry (
						cat, map [i].Level1, mod);
			}

			// FIXME: this is halfly hack but those NonSpacingMark 
			// characters and still undefined are likely to
			// be nonspacing.
			for (int i = 0; i < char.MaxValue; i++) {
				if (map [i].Defined ||
					IsIgnorable (i))
					continue;
				switch (i) {
				// SPECIAL CASES.
				case 0x02B9:
				case 0x02BA:
					break;
				default:
					if (Char.GetUnicodeCategory ((char) i) !=
					UnicodeCategory.NonSpacingMark)
						continue;
					break;
				}
				if (diacritical [i] != 0)
					map [i] = new CharMapEntry (1, 1, diacritical [i]);
				else
					AddCharMap ((char) i, 1, 1);
			}

			#endregion
		}