/// <summary> /// Find the stem(s) of the provided word /// </summary> /// <param name="word"> Word to find the stems for </param> /// <returns> List of stems for the word </returns> public IList <CharsRef> Stem(char[] word, int length) { if (dictionary.needsInputCleaning) { scratchSegment.Length = 0; scratchSegment.Append(word, 0, length); string cleaned = dictionary.CleanInput(scratchSegment.ToString(), segment); scratchBuffer = ArrayUtil.Grow(scratchBuffer, cleaned.Length); length = segment.Length; segment.CopyTo(0, scratchBuffer, 0, length); word = scratchBuffer; } List <CharsRef> stems = new List <CharsRef>(); IntsRef forms = dictionary.LookupWord(word, 0, length); if (forms != null) { // TODO: some forms should not be added, e.g. ONLYINCOMPOUND // just because it exists, does not make it valid... for (int i = 0; i < forms.Length; i++) { stems.Add(NewStem(word, length)); } } stems.AddRange(Stem(word, length, -1, -1, -1, 0, true, true, false, false)); return(stems); }
/// <summary> /// Find the stem(s) of the provided word /// </summary> /// <param name="word"> Word to find the stems for </param> /// <param name="length"> length </param> /// <returns> <see cref="IList{CharsRef}"/> of stems for the word </returns> public IList <CharsRef> Stem(char[] word, int length) { if (dictionary.needsInputCleaning) { scratchSegment.Length = 0; scratchSegment.Append(word, 0, length); string cleaned = dictionary.CleanInput(scratchSegment.ToString(), segment); scratchBuffer = ArrayUtil.Grow(scratchBuffer, cleaned.Length); length = segment.Length; segment.CopyTo(0, scratchBuffer, 0, length); word = scratchBuffer; } int caseType = CaseOf(word, length); if (caseType == UPPER_CASE) { // upper: union exact, title, lower CaseFoldTitle(word, length); CaseFoldLower(titleBuffer, length); IList <CharsRef> list = DoStem(word, length, false); list.AddRange(DoStem(titleBuffer, length, true)); list.AddRange(DoStem(lowerBuffer, length, true)); return(list); } else if (caseType == TITLE_CASE) { // title: union exact, lower CaseFoldLower(word, length); IList <CharsRef> list = DoStem(word, length, false); list.AddRange(DoStem(lowerBuffer, length, true)); return(list); } else { // exact match only return(DoStem(word, length, false)); } }