예제 #1
0
        public static int IndexOfPunctuationMark(this string source, PunctuationMark punctuationMark)
        {
            var tempSource = source;
            var tempIndex = 0;

            if (tempSource.Contains(punctuationMark.StringValue))
            {
                var punctuationIndex = tempSource.IndexOf(punctuationMark.StringValue);

                while (punctuationIndex != -1 && tempIndex < source.Length)
                {
                    tempIndex += punctuationIndex;
                    var leftPart = tempSource.Substring(0, punctuationIndex);
                    var rightPart = tempSource.Substring(punctuationIndex + punctuationMark.StringValue.Length);
                    var hasLeftSpace = string.IsNullOrEmpty(leftPart) || char.IsWhiteSpace(leftPart.Last());
                    var hasRightSpace = string.IsNullOrEmpty(rightPart) || char.IsWhiteSpace(rightPart.First());

                    switch (punctuationMark.InnerOption)
                    {
                        case SingleTextElementInnerOption.None:
                            {
                                if (!hasLeftSpace && !hasRightSpace)
                                    return tempIndex;
                                break;
                            }
                        case SingleTextElementInnerOption.LeftSpace:
                            {
                                if (hasLeftSpace && !hasRightSpace)
                                    return tempIndex;
                                break;
                            }
                        case SingleTextElementInnerOption.RightSpace:
                            {
                                if (!hasLeftSpace && hasRightSpace)
                                    return tempIndex;
                                break;
                            }
                        case SingleTextElementInnerOption.BothSpace:
                            {
                                if (hasLeftSpace && hasRightSpace)
                                    return tempIndex;
                                break;
                            }
                    }

                    tempSource = tempSource.Substring(punctuationIndex + punctuationMark.StringValue.Length);
                    tempIndex += punctuationMark.StringValue.Length;
                    punctuationIndex = rightPart.IndexOf(punctuationMark.StringValue);
                }
            }

            return -1;
        }
예제 #2
0
        public static IEnumerable<PunctuationMark> GetPunctuationMarks(this string source, IEnumerable<PunctuationMark> punctuationMarks)
        {
            // Ordered by string value collection, will give us ability to find composite punctuation marks first
            var orderedMarks = punctuationMarks.OrderByDescending(mark => mark.StringValue.Length);

            var punctuationString = string.Empty;
            var punctuationDetected = false;

            var leftPart = string.Empty;
            var rightPart = string.Empty;
            var hasLeftSpace = false;
            var hasRightSpace = false;
            var index = 0;

            foreach (var symbol in source)
            {
                if (char.IsPunctuation(symbol))
                {
                    if (!punctuationDetected)
                    {
                        leftPart = source.Substring(0, index);
                        hasLeftSpace = string.IsNullOrEmpty(leftPart) || char.IsWhiteSpace(leftPart.Last());
                    }

                    punctuationDetected = true;
                    punctuationString += symbol;
                }
                else
                {
                    if (punctuationDetected)
                    {
                        rightPart = source.Substring(index - 1 + punctuationString.Length);
                        hasRightSpace = string.IsNullOrEmpty(rightPart) || char.IsWhiteSpace(rightPart.First());

                        var innerOption = hasLeftSpace && hasRightSpace
                            ? SingleTextElementInnerOption.BothSpace
                            : !hasLeftSpace && hasRightSpace
                                ? SingleTextElementInnerOption.RightSpace
                                : hasLeftSpace && !hasRightSpace
                                    ? SingleTextElementInnerOption.LeftSpace
                                    : SingleTextElementInnerOption.None;
                        var punctuationMark = new PunctuationMark() { StringValue = punctuationString, InnerOption = innerOption };
                        if (orderedMarks.Any(mark => mark.StringValue == punctuationMark.StringValue && mark.InnerOption == punctuationMark.InnerOption))
                        {
                            yield return orderedMarks.First(mark => mark.StringValue == punctuationMark.StringValue && mark.InnerOption == punctuationMark.InnerOption);
                        }
                    }

                    punctuationString = string.Empty;
                    punctuationDetected = false;
                }

                index++;
            }

            if (punctuationDetected)
            {
                rightPart = source.Substring(index);
                hasRightSpace = string.IsNullOrEmpty(rightPart) || char.IsWhiteSpace(rightPart.First());

                var innerOption = hasLeftSpace && hasRightSpace
                    ? SingleTextElementInnerOption.BothSpace
                    : !hasLeftSpace && hasRightSpace
                        ? SingleTextElementInnerOption.RightSpace
                        : hasLeftSpace && !hasRightSpace
                            ? SingleTextElementInnerOption.LeftSpace
                            : SingleTextElementInnerOption.None;
                var punctuationMark = new PunctuationMark() { StringValue = punctuationString, InnerOption = innerOption };
                if (orderedMarks.Any(mark => mark.StringValue == punctuationMark.StringValue && mark.InnerOption == punctuationMark.InnerOption))
                {
                    yield return orderedMarks.First(mark => mark.StringValue == punctuationMark.StringValue && mark.InnerOption == punctuationMark.InnerOption);
                }
            }
        }
예제 #3
0
 public static bool ContainsPunctuationMark(this string source, PunctuationMark punctuationMark)
 {
     return source.IndexOfPunctuationMark(punctuationMark) != -1;
 }
예제 #4
0
        public static Sentence ParseSentenceString(string source, PunctuationMark endMark)
        {
            var sentence = new Sentence();

            // Get split parts of the sentence
            var splitParts = source.Split(new char[] { ' ' });

            foreach (var part in splitParts)
            {
                var sPart = part;

                // Get inner punctuation marks in the split part
                var innerPunctuationMarks = sPart.GetPunctuationMarks(DefaultPunctuationMarks.InternalPunctuationMarks);
                if (innerPunctuationMarks.Any())
                {
                    var sentenceParts = new List<ISingleTextElement>();
                    var compositeWord = new CompositeWord();

                    // If split part contains punctuation marks, split this part to single text elements
                    foreach (var mark in innerPunctuationMarks)
                    {
                        var markLenght = mark.StringValue.Length;
                        var index = sPart.IndexOfPunctuationMark(mark);

                        var leftPart = sPart.Substring(0, index);
                        if (!string.IsNullOrEmpty(leftPart))
                        {
                            var word = new Word() { StringValue = leftPart };
                            compositeWord.Add(word);
                            if (!sentenceParts.Contains(compositeWord))
                                sentenceParts.Add(compositeWord);
                        }

                        if(index == 0)
                        {
                            sentenceParts.Add(mark);
                        }
                        else if(index + markLenght < sPart.Length)
                        {
                            compositeWord.Add(mark);
                            if (!sentenceParts.Contains(compositeWord))
                                sentenceParts.Add(compositeWord);
                        }
                        else
                        {
                            sentenceParts.Add(mark);
                        }

                        sPart = sPart.Substring(index + markLenght);
                    }

                    if (!string.IsNullOrEmpty(sPart))
                    {
                        var word = new Word() { StringValue = sPart };
                        compositeWord.Add(word);
                        if (!sentenceParts.Contains(compositeWord))
                            sentenceParts.Add(compositeWord);
                    }

                    // Add single text elements to sentence
                    foreach(var sentencePart in sentenceParts)
                    {
                        if (sentencePart == sentenceParts.Last())
                            sentencePart.InnerOption = SingleTextElementInnerOption.RightSpace;
                        sentence.Add(sentencePart);
                    }
                }
                else
                {
                    var word = new Word() { StringValue = sPart, InnerOption = SingleTextElementInnerOption.RightSpace };
                    sentence.Add(word);
                }
            }

            if (endMark.HasValue)
            {
                if(sentence.Last().InnerOption == SingleTextElementInnerOption.RightSpace)
                {
                    sentence.Last().InnerOption = SingleTextElementInnerOption.None;
                }
                sentence.Add(endMark);
            }

            return sentence;
        }
예제 #5
0
        static DefaultPunctuationMarks()
        {
            _spaceMark = new PunctuationMark(" ", PunctuationMarkType.SpaceMark, SingleTextElementInnerOption.None);

            _terminalPunctuationMarks = new Collection<PunctuationMark>()
            {
                new PunctuationMark(".", PunctuationMarkType.EndMark, SingleTextElementInnerOption.RightSpace),
                new PunctuationMark("?", PunctuationMarkType.QuestionMark, SingleTextElementInnerOption.RightSpace),
                new PunctuationMark("!", PunctuationMarkType.ExclamationMark, SingleTextElementInnerOption.RightSpace),
                new PunctuationMark("...", PunctuationMarkType.UnfinishedMark, SingleTextElementInnerOption.RightSpace),
                new PunctuationMark("?!", PunctuationMarkType.AccentMark, SingleTextElementInnerOption.RightSpace),
                new PunctuationMark("!?", PunctuationMarkType.AccentMark, SingleTextElementInnerOption.RightSpace)
            };

            _internalPunctuationMarks = new Collection<PunctuationMark>()
            {
                new PunctuationMark(",", PunctuationMarkType.CommaMark, SingleTextElementInnerOption.RightSpace),
                new PunctuationMark(".", PunctuationMarkType.AbreviationMark, SingleTextElementInnerOption.None),
                new PunctuationMark(".", PunctuationMarkType.AbreviationMark, SingleTextElementInnerOption.RightSpace),
                new PunctuationMark(";", PunctuationMarkType.EnumerationMark, SingleTextElementInnerOption.RightSpace),
                new PunctuationMark(":", PunctuationMarkType.GeneralizationMark, SingleTextElementInnerOption.RightSpace),
                new PunctuationMark("-", PunctuationMarkType.ConnectionMark, SingleTextElementInnerOption.None),
                new PunctuationMark("-", PunctuationMarkType.IllustrationMark, SingleTextElementInnerOption.BothSpace),
                new PunctuationMark("(", PunctuationMarkType.AdditionalMark, SingleTextElementInnerOption.LeftSpace),
                new PunctuationMark(")", PunctuationMarkType.AdditionalMark, SingleTextElementInnerOption.RightSpace),
                new PunctuationMark("\"", PunctuationMarkType.CitationMark, SingleTextElementInnerOption.LeftSpace),
                new PunctuationMark("\"", PunctuationMarkType.CitationMark, SingleTextElementInnerOption.RightSpace)
            };
        }