static ThaiBreakEngine() { // Initialize UnicodeSets fThaiWordSet = new UnicodeSet(); fMarkSet = new UnicodeSet(); fBeginWordSet = new UnicodeSet(); fSuffixSet = new UnicodeSet(); fThaiWordSet.ApplyPattern("[[:Thai:]&[:LineBreak=SA:]]"); fThaiWordSet.Compact(); fMarkSet.ApplyPattern("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"); fMarkSet.Add(0x0020); fEndWordSet = new UnicodeSet(fThaiWordSet); fEndWordSet.Remove(0x0E31); // MAI HAN-AKAT fEndWordSet.Remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI fBeginWordSet.Add(0x0E01, 0x0E2E); //KO KAI through HO NOKHUK fBeginWordSet.Add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI fSuffixSet.Add(THAI_PAIYANNOI); fSuffixSet.Add(THAI_MAIYAMOK); // Compact for caching fMarkSet.Compact(); fEndWordSet.Compact(); fBeginWordSet.Compact(); fSuffixSet.Compact(); // Freeze the static UnicodeSet fThaiWordSet.Freeze(); fMarkSet.Freeze(); fEndWordSet.Freeze(); fBeginWordSet.Freeze(); fSuffixSet.Freeze(); }
static LaoBreakEngine() { // Initialize UnicodeSets fLaoWordSet = new UnicodeSet(); fMarkSet = new UnicodeSet(); fBeginWordSet = new UnicodeSet(); fLaoWordSet.ApplyPattern("[[:Laoo:]&[:LineBreak=SA:]]"); fLaoWordSet.Compact(); fMarkSet.ApplyPattern("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"); fMarkSet.Add(0x0020); fEndWordSet = new UnicodeSet(fLaoWordSet); fEndWordSet.Remove(0x0EC0, 0x0EC4); // prefix vowels fBeginWordSet.Add(0x0E81, 0x0EAE); // basic consonants (including holes for corresponding Thai characters) fBeginWordSet.Add(0x0EDC, 0x0EDD); // digraph consonants (no Thai equivalent) fBeginWordSet.Add(0x0EC0, 0x0EC4); // prefix vowels // Compact for caching fMarkSet.Compact(); fEndWordSet.Compact(); fBeginWordSet.Compact(); // Freeze the static UnicodeSet fLaoWordSet.Freeze(); fMarkSet.Freeze(); fEndWordSet.Freeze(); fBeginWordSet.Freeze(); }
static KhmerBreakEngine() { // Initialize UnicodeSets fKhmerWordSet = new UnicodeSet(); fMarkSet = new UnicodeSet(); fBeginWordSet = new UnicodeSet(); fKhmerWordSet.ApplyPattern("[[:Khmer:]&[:LineBreak=SA:]]"); fKhmerWordSet.Compact(); fMarkSet.ApplyPattern("[[:Khmer:]&[:LineBreak=SA:]&[:M:]]"); fMarkSet.Add(0x0020); fEndWordSet = new UnicodeSet(fKhmerWordSet); fBeginWordSet.Add(0x1780, 0x17B3); fEndWordSet.Remove(0x17D2); // KHMER SIGN COENG that combines some following characters // Compact for caching fMarkSet.Compact(); fEndWordSet.Compact(); fBeginWordSet.Compact(); // Freeze the static UnicodeSet fKhmerWordSet.Freeze(); fMarkSet.Freeze(); fEndWordSet.Freeze(); fBeginWordSet.Freeze(); }