public void wordBreak(string text, Action <PutTypes, Int16, Int16> onPutWord) { if (string.IsNullOrEmpty(text)) { return; } if (breaker == null && item != null) { breaker = item.getWordBreaker(); } if (breaker == null) { breaker = Lib.items[Langs._].getWordBreaker(); //neutral word breaker } if (breaker == null) { throw new Exception("breaker == null"); } BreakSink cws = new BreakSink(onPutWord); //CPhraseSink cps = new CPhraseSink(res); TEXT_SOURCE pTextSource = new TEXT_SOURCE(); pTextSource.pfnFillTextBuffer += fillTextBuffer; pTextSource.awcBuffer = text; pTextSource.iCur = 0; pTextSource.iEnd = text.Length; breaker.BreakText(ref pTextSource, cws, /*cps*/ null); }
public List <Token> Tokenize(CultureInfo cultureInfo, string text) { List <Token> list = null; IWordBreaker wordBreaker = this.LoadWordBreaker(cultureInfo); if (wordBreaker != null) { IWordSink wordSink = new WordSink(); TEXT_SOURCE text_SOURCE = default(TEXT_SOURCE); text_SOURCE.FillTextBuffer = new FillTextBuffer(this.FillBuffer); text_SOURCE.Buffer = text; text_SOURCE.Current = 0; text_SOURCE.End = text_SOURCE.Buffer.Length; if (wordBreaker.BreakText(ref text_SOURCE, wordSink, null) == 0) { list = ((WordSink)wordSink).Tokens; } } if (list == null) { list = new List <Token>(); list.Add(new Token(0, text.Length)); } return(list); }
public FreeTextQuery(IWordBreaker wordBreaker, IThesaurus thesaurus, IEqualityComparer <WordReference <T> > wordReferenceEqualityComparer, ITextIndexSearcher <T> textIndexSearcher) { this.wordBreaker = wordBreaker; this.thesaurus = thesaurus; this.wordReferenceEqualityComparer = wordReferenceEqualityComparer; Indexes = new SortedList <string, TextIndex <T> >(); this.textIndexSearcher = textIndexSearcher; }
static IEnumerable <string> wordBreak(IWordBreaker breaker, string sentence) { var pl = new List <string>(); wordBreak(sentence, breaker, (type, pos, len) => { if (type != PutTypes.put) { return; // && type != PutTypes.alt) return; } pl.Add(sentence.Substring(pos, len)); }); return(pl); }
//struct StemItem { public PutTypes type; public string word; } static void wordBreak(string text, IWordBreaker breaker, Action <PutTypes, int, int> onPutWord) { if (string.IsNullOrEmpty(text)) { return; } BreakSink cws = new BreakSink(onPutWord); TEXT_SOURCE pTextSource = new TEXT_SOURCE(); pTextSource.pfnFillTextBuffer += fillTextBuffer; pTextSource.awcBuffer = text; pTextSource.iCur = 0; pTextSource.iEnd = text.Length; breaker.BreakText(ref pTextSource, cws, /*cps*/ null); }
private static void WriteDiagnosticInfo( string[] dictionary, string[] inputWordsArray, IWordBreaker breaker, IWordProcessor processor, Stopwatch stopwatch) { Console.WriteLine(new string('=', 50)); Console.WriteLine($"Breaker: {breaker.GetType().Name}"); Console.WriteLine($"Processor: {processor.GetType().Name}"); Console.WriteLine($"Dictionary words count: {dictionary.Length}"); Console.WriteLine($"Input words count: {inputWordsArray.Length}"); Console.WriteLine($"Time elapsed: {stopwatch.ElapsedMilliseconds} ms"); Console.WriteLine(new string('=', 50)); }
public static void ReleaseWordBreakers() { try { Tokenizer.cacheLock.AcquireWriterLock(60000); foreach (Guid key in Tokenizer.guidWordBreakerMapping.Keys) { IWordBreaker o = Tokenizer.guidWordBreakerMapping[key]; Marshal.ReleaseComObject(o); } Tokenizer.guidWordBreakerMapping.Clear(); } catch (ApplicationException) { ExTraceGlobals.CcGenericTracer.TraceError(0L, "Unable to acquire lock to release word breakers"); } finally { Tokenizer.cacheLock.ReleaseLock(); } }
public SerialWordProcessor(IWordBreaker wordBreaker) { _wordBreaker = wordBreaker; }
public TextIndexFiller(IWordBreaker wordBreaker) { this.wordBreaker = wordBreaker; }
public ParallelWordProcessor(IWordBreaker wordBreaker) { _wordBreaker = wordBreaker; }