Esempio n. 1
0
        private void UxBreakClick(object sender, EventArgs e)
        {
            using (BreakIterator bi = BreakIterator.CreateWordInstance(Locale.GetUS()))
            {
                bi.SetText(this.uxText.Text);

                var words = bi.Enumerate().ToList();

                MessageBox.Show(string.Join("-", words.ToArray()));
            }
        }
Esempio n. 2
0
        private void UxOccurrenceCountClick(object sender, EventArgs e)
        {
            using (BreakIterator bi = BreakIterator.CreateWordInstance(Locale.GetUS()))
            {
                bi.SetText(this.uxText.Text);

                var words = bi.Enumerate()
                            .GroupBy(w => w)
                            .OrderBy(x => x.Count())
                            .Reverse()
                            .Select(x => x.Key + " : " + x.Count())
                            .Take(10)
                            .ToArray();

                MessageBox.Show(string.Join(Environment.NewLine, words));
            }
        }
        private void wordBreak()
        {
            int quote = 0;

            using (BreakIterator bi = BreakIterator.CreateWordInstance(Locale.GetUS()))
            {
                var input = inputTextBox.Text;

                Regex pairReplace       = new Regex(@"\(\d+,\d+\)");
                var   pairReplaceResult = pairReplace.Matches(inputTextBox.Text);
                foreach (Match match in pairReplaceResult)
                {
                    string treat = match.Value;
                    treat = treat.Replace(",", "⠠");
                    input = input.Remove(match.Index, match.Length).Insert(match.Index, treat);
                }

                Regex expression = new Regex(@"[^\.][^\d+]\.\s");
                var   results    = expression.Matches(input);
                foreach (Match match in results)
                {
                    input = Regex.Replace(input, @"[^\.][^\d+]\.\s", match.Value.Replace(".", "⠸⠲"));
                }
                input = Regex.Replace(input, @"\.{3,}", "⠄⠄⠄");

                bi.SetText(input);
                IEnumerable <String> spWord = bi.Enumerate();
                for (int i = 0; i < spWord.Count(); i++)
                {
                    string spwordCheckCapital = spWord.ElementAt(i);
                    //check wheather sentense capitalize or CAPS whole word
                    if (Regex.IsMatch(spwordCheckCapital, @"[A-Z]+[A-Z]"))
                    {
                        spwordCheckCapital = "⠠⠠" + spwordCheckCapital;
                    }
                    else if (Char.IsUpper(spwordCheckCapital[0]))
                    {
                        spwordCheckCapital = "⠠" + spwordCheckCapital;
                    }

                    string vowelLiftFilter = spwordCheckCapital;

                    expression = new Regex(@"\)|\?|[^\d]\,|\!|\:|;");
                    results    = expression.Matches(vowelLiftFilter);
                    foreach (Match match in results)
                    {
                        //Check Thai or English
                        Regex checkTHOrEN = new Regex(@"[\u0080-\u9fff]+");
                        for (var j = i - 1; j >= 0; j--)
                        {
                            if (spWord.ElementAt(j).Equals(" "))
                            {
                                continue;
                            }
                            Match m = checkTHOrEN.Match(spWord.ElementAt(j));
                            if (m.Success)
                            {
                                vowelLiftFilter = getBrailleInTable("T" + vowelLiftFilter);
                                break;
                            }
                            vowelLiftFilter = getBrailleInTable("E" + vowelLiftFilter);
                            break;
                        }
                    }

                    expression = new Regex(@"\(");
                    results    = expression.Matches(vowelLiftFilter);
                    foreach (Match match in results)
                    {
                        Regex checkTHOrEN = new Regex(@"[\u0080-\u9fff]+");
                        for (var j = i + 1; j <= spWord.Count(); j++)
                        {
                            if (spWord.ElementAt(j).Equals(" "))
                            {
                                continue;
                            }
                            Match m = checkTHOrEN.Match(spWord.ElementAt(j));
                            if (m.Success)
                            {
                                vowelLiftFilter = getBrailleInTable("T" + vowelLiftFilter);
                                break;
                            }
                            vowelLiftFilter = getBrailleInTable("E" + vowelLiftFilter);
                            break;
                        }
                    }

                    string bt = separateVowel(vowelLiftFilter);
                    // Detect double quote
                    if (vowelLiftFilter.Equals("\""))
                    {
                        bt = quote % 2 == 0 ? "⠦" : "⠴";
                        quote++;
                    }
                    prepareOutput.Add(bt);
                }
            }
        }
        protected override unsafe void DoConvert(byte *lpInBuffer, int nInLen, byte *lpOutBuffer, ref int rnOutLen)
        {
            // we need to put it *back* into a string for the lookup
            // [aside: I should probably override base.InternalConvertEx so I can avoid having the base
            //  class version turn the input string into a byte* for this call just so we can turn around
            //  and put it *back* into a string for our processing... but I like working with a known
            //  quantity and no other EncConverter does it that way. Besides, I'm afraid I'll break smtg ;-]
            var baIn = new byte[nInLen];

            ECNormalizeData.ByteStarToByteArr(lpInBuffer, nInLen, baIn);
            var caIn = Encoding.Unicode.GetChars(baIn);

            // here's our input string
            var strInput = new string(caIn);

            string strOutput = null;

            if (_bForward)
            {
                var bySpace = strInput.Split(_achSpace, StringSplitOptions.RemoveEmptyEntries);
                _breakIterator.SetText(strInput);
                var words = _breakIterator.Enumerate().ToList();
                if (bySpace.Length == words.Count)
                {
                    // it didn't do anything!
                    // if it is mandarin, this is probably expected and we can do this
                    if (_regexForMandarin.IsMatch(strInput))
                    {
                        strOutput = bySpace
                                    .SelectMany(word => word)
                                    .Aggregate <char, string>(null, (current, ch) => current + (ch + ConverterIdentifier));
                    }
                    else
                    {
                        strOutput = strInput;
                    }
                }
                else
                {
                    int nNumWords = words.Count - 1;
                    for (var i = 0; i < nNumWords; i++)
                    {
                        var word = words[i];
                        if (!String.IsNullOrEmpty(word) && (word != ConverterIdentifier))
                        {
                            strOutput += words[i] + ConverterIdentifier;
                        }
                    }
                    strOutput += words.Last();
                }
            }
            else
            {
                strOutput = strInput.Replace(ConverterIdentifier, null);
            }

            if (String.IsNullOrEmpty(strOutput))
            {
                return;
            }

            var nLen = strOutput.Length * 2;

            if (nLen > rnOutLen)
            {
                EncConverters.ThrowError(ErrStatus.OutputBufferFull);
            }
            rnOutLen = nLen;
            ECNormalizeData.StringToByteStar(strOutput, lpOutBuffer, rnOutLen, false);
        }