コード例 #1
0
        /// <summary>
        /// Update neccessary data due to processContext output
        /// </summary>
        /// <param name="processContext">A processing context which lives until the process is finished,
        /// and stores data for the process</param>
        /// <param name="stopIndex"></param>
        /// <param name="startIndex"></param>
        /// <param name="stopSignCompleteSentenceFound"></param>
        /// <param name="text"></param>
        private static void HandleOutputContext(AnalysisProcessContext processContext, ref int stopIndex, ref int startIndex, ref bool stopSignCompleteSentenceFound, ref string text)
        {
            int?wordCompleteSentenceAtIndex = processContext.Output.WordCompleteSentenceAtIndex;

            //If stop sign which complete sentence was found in a specifix index gap-
            if (wordCompleteSentenceAtIndex.HasValue)
            {
                //Assign stopIndexFound to true
                stopSignCompleteSentenceFound = true;
                //Set correct stop index by index into text + new stop index into word - original stop index into word
                stopIndex = stopIndex + wordCompleteSentenceAtIndex.Value - processContext.StopSignIndexIntoWord;
                processContext.Output.WordCompleteSentenceAtIndex = null;
            }
            else
            {
                //If stopSignIndexGap is empty - Set startIndex to the following char of stopIndex variable
                startIndex = stopIndex + 1;
            }

            int?seperateWordAtIndex = processContext.Output.AddSpaceAtIndex;

            //If there is a need to add a space separator within the text
            if (seperateWordAtIndex.HasValue)
            {
                //Insert space at a specific index into text
                startIndex             = stopIndex + seperateWordAtIndex.Value + 1;
                text                   = text.Insert(startIndex, Consts.SPACE_SIGN.ToString());
                processContext.AllText = text;

                processContext.Output.AddSpaceAtIndex = null;
            }
        }
コード例 #2
0
        /// <summary>
        /// Divide text into sentences, using separators like a dot
        /// </summary>
        /// <param name="text">Input text for processing</param>
        /// <returns>Sentences generated from text processing</returns>
        public IList <string> Process(string text)
        {
            AnalysisConfiguration  analysisConfiguration = TextAnalysisRepository.Instance.AnalysisConfiguration;
            AnalysisProcessContext processContext        = new AnalysisProcessContext();

            processContext.AnalysisConfiguration = analysisConfiguration;

            //If enter key (\n or \r\n) declared as seperator- We seperate input text to segments
            if (analysisConfiguration.EnableLinebreakSeperator)
            {
                IList <string> segments = GetTextSegments(text);

                //Processing all segments contained in the input text
                foreach (var segment in segments)
                {
                    if (!string.IsNullOrWhiteSpace(segment))
                    {
                        ProcessTextSegment(segment, processContext);
                    }
                }
            }
            else
            {
                //Process input text as one segmant
                ProcessTextSegment(text, processContext);
            }

            return(processContext.Output.SentenceListResult);
        }
コード例 #3
0
 /// <summary>
 /// Set process context scope- Update variables ny found word and sign
 /// </summary>
 /// <param name="processContext">A processing context which lives until the process is finished,
 /// and stores data for the process</param>
 /// <param name="wordMetadata"></param>
 /// <param name="stopIndex"></param>
 private void SetContextScope(AnalysisProcessContext processContext, WordMetadata wordMetadata, int stopIndex)
 {
     processContext.Word                  = wordMetadata.Word;
     processContext.WordWithEndTrim       = processContext.Word.TrimEnd(processContext.AvailableStopSigns);
     processContext.Sign                  = processContext.AllText[stopIndex];
     processContext.WordLocation          = wordMetadata.WordLocation;
     processContext.StopSignIndexIntoWord = processContext.Word.IndexOf(processContext.Sign);
 }
コード例 #4
0
        /// <summary>
        /// Process one text segment, and divide it into sentences
        /// </summary>
        /// <param name="segment">Input segment to process it</param>
        /// <param name="processContext">A processing context which lives until the process is finished,
        /// and stores data for the process</param>
        private void ProcessTextSegment(string segment, AnalysisProcessContext processContext)
        {
            string         currentSentence;
            IList <string> sentenceListResult = processContext.Output.SentenceListResult;

            processContext.AllText = segment;

            //While current segment is not empty,
            //We search for next sentence, and append it to sentenceListResult
            while (processContext.AllText.Count() > 0)
            {
                currentSentence = FindNextSentence(processContext);
                sentenceListResult.Add(currentSentence);
            }
        }
コード例 #5
0
        /// <summary>
        /// Find out if any exception is matched to current word.
        /// Compare word to exceptions which correspond to the location of the word
        /// </summary>
        /// <param name="processContext">A processing context which lives until the process is finished,
        /// and stores data for the process</param>
        /// <param name="wordLocation">A place of a word relative to a sentence: at first, in the middle or at the end</param>
        /// <returns></returns>
        private bool IsAnyExceptionMatch(AnalysisProcessContext processContext, WordLocation wordLocation)
        {
            IList <StopSignExceptionRule> exceptions = null;
            bool isAnyExceptionMatch = false;
            bool?exceptionsFound     = processContext?.StopSignConfiguration?.Exceptions?.TryGetValue(wordLocation, out exceptions);

            exceptionsFound = exceptionsFound.HasValue && exceptions != null && exceptions.Count() > 0;

            if (exceptionsFound == true)
            {
                isAnyExceptionMatch = exceptions.Any(exception => exception.IsMatch(processContext));
            }

            return(isAnyExceptionMatch);
        }
コード例 #6
0
        /// <summary>
        /// Find next sentence of the segment.
        /// Note: Each time a sentence is found, it is deleted from the segment,
        /// so the function actually searches for the first sentence that the segment contains
        /// </summary>
        /// <param name="processContext">A processing context which lives until the process is finished,
        /// and stores data for the process</param>
        /// <returns></returns>
        private string FindNextSentence(AnalysisProcessContext processContext)
        {
            string currentSentence = string.Empty;
            int    stopIndex       = 0;
            int    currentIndex    = 0;
            bool   stopSignCompleteSentenceFound = false;

            processContext.AvailableStopSigns = processContext.AnalysisConfiguration.Signs.ToArray();

            //As long as the closing sign which ends the sentence has not yet been found, we continue search for it
            while (!stopSignCompleteSentenceFound)
            {
                stopSignCompleteSentenceFound = SearchNextStopSign(processContext, out stopIndex, ref currentIndex);
            }

            string text = processContext.AllText;

            //Stop sign whice complete sentence was found!

            //If stop sign has found into sentence
            if (stopIndex > -1)
            {
                stopIndex++;
                //Find index after consecutive characters of a certain type
                stopIndex = FindIndexAfterConsecutiveChars(text, stopIndex, processContext.Sign);
                //Set currentSentence member with substring from text beginning till stopIndex
                currentSentence = text.Substring(0, stopIndex);

                //Remove the found sentence from the total text
                text = processContext.AllText.Remove(0, stopIndex).TrimStart();
            }
            //If stop sign has not found into sentence
            else
            {
                //Set currentSentence to the entire text
                currentSentence = processContext.AllText.TrimEnd();
                //Clear text;
                text = string.Empty;
            }

            //Set processContext.AllText to remaining text without the sentence which found
            processContext.AllText = text;

            return(currentSentence);
        }
コード例 #7
0
        /// <summary>
        /// Search for next stop sign into current segment
        /// </summary>
        /// <param name="processContext">A processing context which lives until the process is finished,
        /// and stores data for the process</param>
        /// <param name="stopIndex">out param which contains index of the found sign</param>
        /// <param name="startIndex">ref param, in order to update startIndex to search from there</param>
        /// <returns></returns>
        private bool SearchNextStopSign(AnalysisProcessContext processContext, out int stopIndex, ref int startIndex)
        {
            bool   stopSignCompleteSentenceFound = false;
            string text = processContext.AllText;

            //Remove leading spaces
            text = text.TrimStart();

            //Search for one of the closing signs,
            //the closest to the beginning of the sentence
            stopIndex = text.IndexOfAny(processContext.AvailableStopSigns, startIndex);

            //If a closing sign found- We check if it complete a sentence
            if (stopIndex > -1)
            {
                //Find word which includes current stopIndex
                WordMetadata wordMetadata = FindWord(text, stopIndex);

                //Set processContext by found stopSign and its parent word
                SetContextScope(processContext, wordMetadata, stopIndex);

                //Check whether the word is exceptional, and therefore does not complete a sentence
                stopSignCompleteSentenceFound = !IsExceptionalWord(processContext);

                //If stop sign not complete a sentence-
                //Update neccessary dadta due to processContext output
                if (!stopSignCompleteSentenceFound)
                {
                    HandleOutputContext(processContext, ref stopIndex, ref startIndex, ref stopSignCompleteSentenceFound, ref text);
                }
            }
            //If no closing sign found- The entire remaining text is an additional sentence
            else
            {
                stopIndex--;
                stopSignCompleteSentenceFound = true;
            }

            return(stopSignCompleteSentenceFound);
        }
コード例 #8
0
        /// <summary>
        /// Find out if a word is an exception, and therefore does not complete a sentence
        /// </summary>
        /// <param name="processContext">A processing context which lives until the process is finished,
        /// and stores data for the process</param>
        /// <returns></returns>
        private bool IsExceptionalWord(AnalysisProcessContext processContext)
        {
            var sign = processContext.Sign;
            var stopSignConfigurations = processContext.AnalysisConfiguration.StopSignConfigurations;
            StopSignConfiguration stopSignConfiguration = null;
            bool isExceptionalWord = false;

            if (stopSignConfigurations.TryGetValue(processContext.Sign, out stopSignConfiguration))
            {
                processContext.StopSignConfiguration = stopSignConfiguration;

                //Find out if a word is an exception, by exception which correspond to the location of the word
                isExceptionalWord = IsAnyExceptionMatch(processContext, processContext.WordLocation);

                if (!isExceptionalWord)
                {
                    //Find out if a word is an exception, by general exceptions
                    isExceptionalWord = IsAnyExceptionMatch(processContext, WordLocation.Anywhere);
                }
            }

            return(isExceptionalWord);
        }