private void UxBreakClick(object sender, EventArgs e) { using (BreakIterator bi = BreakIterator.CreateWordInstance(Locale.GetUS())) { bi.SetText(this.uxText.Text); var words = bi.Enumerate().ToList(); MessageBox.Show(string.Join("-", words.ToArray())); } }
private void UxOccurrenceCountClick(object sender, EventArgs e) { using (BreakIterator bi = BreakIterator.CreateWordInstance(Locale.GetUS())) { bi.SetText(this.uxText.Text); var words = bi.Enumerate() .GroupBy(w => w) .OrderBy(x => x.Count()) .Reverse() .Select(x => x.Key + " : " + x.Count()) .Take(10) .ToArray(); MessageBox.Show(string.Join(Environment.NewLine, words)); } }
private void wordBreak() { int quote = 0; using (BreakIterator bi = BreakIterator.CreateWordInstance(Locale.GetUS())) { var input = inputTextBox.Text; Regex pairReplace = new Regex(@"\(\d+,\d+\)"); var pairReplaceResult = pairReplace.Matches(inputTextBox.Text); foreach (Match match in pairReplaceResult) { string treat = match.Value; treat = treat.Replace(",", "⠠"); input = input.Remove(match.Index, match.Length).Insert(match.Index, treat); } Regex expression = new Regex(@"[^\.][^\d+]\.\s"); var results = expression.Matches(input); foreach (Match match in results) { input = Regex.Replace(input, @"[^\.][^\d+]\.\s", match.Value.Replace(".", "⠸⠲")); } input = Regex.Replace(input, @"\.{3,}", "⠄⠄⠄"); bi.SetText(input); IEnumerable <String> spWord = bi.Enumerate(); for (int i = 0; i < spWord.Count(); i++) { string spwordCheckCapital = spWord.ElementAt(i); //check wheather sentense capitalize or CAPS whole word if (Regex.IsMatch(spwordCheckCapital, @"[A-Z]+[A-Z]")) { spwordCheckCapital = "⠠⠠" + spwordCheckCapital; } else if (Char.IsUpper(spwordCheckCapital[0])) { spwordCheckCapital = "⠠" + spwordCheckCapital; } string vowelLiftFilter = spwordCheckCapital; expression = new Regex(@"\)|\?|[^\d]\,|\!|\:|;"); results = expression.Matches(vowelLiftFilter); foreach (Match match in results) { //Check Thai or English Regex checkTHOrEN = new Regex(@"[\u0080-\u9fff]+"); for (var j = i - 1; j >= 0; j--) { if (spWord.ElementAt(j).Equals(" ")) { continue; } Match m = checkTHOrEN.Match(spWord.ElementAt(j)); if (m.Success) { vowelLiftFilter = getBrailleInTable("T" + vowelLiftFilter); break; } vowelLiftFilter = getBrailleInTable("E" + vowelLiftFilter); break; } } expression = new Regex(@"\("); results = expression.Matches(vowelLiftFilter); foreach (Match match in results) { Regex checkTHOrEN = new Regex(@"[\u0080-\u9fff]+"); for (var j = i + 1; j <= spWord.Count(); j++) { if (spWord.ElementAt(j).Equals(" ")) { continue; } Match m = checkTHOrEN.Match(spWord.ElementAt(j)); if (m.Success) { vowelLiftFilter = getBrailleInTable("T" + vowelLiftFilter); break; } vowelLiftFilter = getBrailleInTable("E" + vowelLiftFilter); break; } } string bt = separateVowel(vowelLiftFilter); // Detect double quote if (vowelLiftFilter.Equals("\"")) { bt = quote % 2 == 0 ? "⠦" : "⠴"; quote++; } prepareOutput.Add(bt); } } }
protected override unsafe void DoConvert(byte *lpInBuffer, int nInLen, byte *lpOutBuffer, ref int rnOutLen) { // we need to put it *back* into a string for the lookup // [aside: I should probably override base.InternalConvertEx so I can avoid having the base // class version turn the input string into a byte* for this call just so we can turn around // and put it *back* into a string for our processing... but I like working with a known // quantity and no other EncConverter does it that way. Besides, I'm afraid I'll break smtg ;-] var baIn = new byte[nInLen]; ECNormalizeData.ByteStarToByteArr(lpInBuffer, nInLen, baIn); var caIn = Encoding.Unicode.GetChars(baIn); // here's our input string var strInput = new string(caIn); string strOutput = null; if (_bForward) { var bySpace = strInput.Split(_achSpace, StringSplitOptions.RemoveEmptyEntries); _breakIterator.SetText(strInput); var words = _breakIterator.Enumerate().ToList(); if (bySpace.Length == words.Count) { // it didn't do anything! // if it is mandarin, this is probably expected and we can do this if (_regexForMandarin.IsMatch(strInput)) { strOutput = bySpace .SelectMany(word => word) .Aggregate <char, string>(null, (current, ch) => current + (ch + ConverterIdentifier)); } else { strOutput = strInput; } } else { int nNumWords = words.Count - 1; for (var i = 0; i < nNumWords; i++) { var word = words[i]; if (!String.IsNullOrEmpty(word) && (word != ConverterIdentifier)) { strOutput += words[i] + ConverterIdentifier; } } strOutput += words.Last(); } } else { strOutput = strInput.Replace(ConverterIdentifier, null); } if (String.IsNullOrEmpty(strOutput)) { return; } var nLen = strOutput.Length * 2; if (nLen > rnOutLen) { EncConverters.ThrowError(ErrStatus.OutputBufferFull); } rnOutLen = nLen; ECNormalizeData.StringToByteStar(strOutput, lpOutBuffer, rnOutLen, false); }