/// <summary> /// Extract a word from text that is located across a particular index /// </summary> /// <param name="text">Input text</param> /// <param name="index">An index in which the word located across</param> /// <returns></returns> private WordMetadata FindWord(string text, int index) { WordMetadata wordMetadata = new WordMetadata(); string word = string.Empty; //Find prevSpaceIndex relative to given index int prevSpaceIndex = text.LastIndexOf(Consts.SPACE_SIGN, index, index + 1); //Find nextSpaceIndex relative to given index int nextSpaceIndex = text.IndexOf(Consts.SPACE_SIGN, index); if (prevSpaceIndex == -1) { wordMetadata.WordLocation = WordLocation.Start; prevSpaceIndex = 0; } else { prevSpaceIndex++; wordMetadata.WordLocation = WordLocation.Mid; } if (nextSpaceIndex == -1) { nextSpaceIndex = text.Count(); } //Get word by prevSpaceIndex and nextSpaceIndex wordMetadata.Word = text.Substring(prevSpaceIndex, nextSpaceIndex - prevSpaceIndex); wordMetadata.WordEndIndex = nextSpaceIndex; return(wordMetadata); }
/// <summary> /// Set process context scope- Update variables ny found word and sign /// </summary> /// <param name="processContext">A processing context which lives until the process is finished, /// and stores data for the process</param> /// <param name="wordMetadata"></param> /// <param name="stopIndex"></param> private void SetContextScope(AnalysisProcessContext processContext, WordMetadata wordMetadata, int stopIndex) { processContext.Word = wordMetadata.Word; processContext.WordWithEndTrim = processContext.Word.TrimEnd(processContext.AvailableStopSigns); processContext.Sign = processContext.AllText[stopIndex]; processContext.WordLocation = wordMetadata.WordLocation; processContext.StopSignIndexIntoWord = processContext.Word.IndexOf(processContext.Sign); }
/// <summary> /// Search for next stop sign into current segment /// </summary> /// <param name="processContext">A processing context which lives until the process is finished, /// and stores data for the process</param> /// <param name="stopIndex">out param which contains index of the found sign</param> /// <param name="startIndex">ref param, in order to update startIndex to search from there</param> /// <returns></returns> private bool SearchNextStopSign(AnalysisProcessContext processContext, out int stopIndex, ref int startIndex) { bool stopSignCompleteSentenceFound = false; string text = processContext.AllText; //Remove leading spaces text = text.TrimStart(); //Search for one of the closing signs, //the closest to the beginning of the sentence stopIndex = text.IndexOfAny(processContext.AvailableStopSigns, startIndex); //If a closing sign found- We check if it complete a sentence if (stopIndex > -1) { //Find word which includes current stopIndex WordMetadata wordMetadata = FindWord(text, stopIndex); //Set processContext by found stopSign and its parent word SetContextScope(processContext, wordMetadata, stopIndex); //Check whether the word is exceptional, and therefore does not complete a sentence stopSignCompleteSentenceFound = !IsExceptionalWord(processContext); //If stop sign not complete a sentence- //Update neccessary dadta due to processContext output if (!stopSignCompleteSentenceFound) { HandleOutputContext(processContext, ref stopIndex, ref startIndex, ref stopSignCompleteSentenceFound, ref text); } } //If no closing sign found- The entire remaining text is an additional sentence else { stopIndex--; stopSignCompleteSentenceFound = true; } return(stopSignCompleteSentenceFound); }