Exemple #1
0
        public void wordBreak(string text, Action <PutTypes, Int16, Int16> onPutWord)
        {
            if (string.IsNullOrEmpty(text))
            {
                return;
            }
            if (breaker == null && item != null)
            {
                breaker = item.getWordBreaker();
            }
            if (breaker == null)
            {
                breaker = Lib.items[Langs._].getWordBreaker();            //neutral word breaker
            }
            if (breaker == null)
            {
                throw new Exception("breaker == null");
            }
            BreakSink cws = new BreakSink(onPutWord);
            //CPhraseSink cps = new CPhraseSink(res);
            TEXT_SOURCE pTextSource = new TEXT_SOURCE();

            pTextSource.pfnFillTextBuffer += fillTextBuffer;
            pTextSource.awcBuffer          = text;
            pTextSource.iCur = 0;
            pTextSource.iEnd = text.Length;
            breaker.BreakText(ref pTextSource, cws, /*cps*/ null);
        }
Exemple #2
0
        public List <Token> Tokenize(CultureInfo cultureInfo, string text)
        {
            List <Token> list        = null;
            IWordBreaker wordBreaker = this.LoadWordBreaker(cultureInfo);

            if (wordBreaker != null)
            {
                IWordSink   wordSink    = new WordSink();
                TEXT_SOURCE text_SOURCE = default(TEXT_SOURCE);
                text_SOURCE.FillTextBuffer = new FillTextBuffer(this.FillBuffer);
                text_SOURCE.Buffer         = text;
                text_SOURCE.Current        = 0;
                text_SOURCE.End            = text_SOURCE.Buffer.Length;
                if (wordBreaker.BreakText(ref text_SOURCE, wordSink, null) == 0)
                {
                    list = ((WordSink)wordSink).Tokens;
                }
            }
            if (list == null)
            {
                list = new List <Token>();
                list.Add(new Token(0, text.Length));
            }
            return(list);
        }
Exemple #3
0
 public FreeTextQuery(IWordBreaker wordBreaker,
                      IThesaurus thesaurus,
                      IEqualityComparer <WordReference <T> > wordReferenceEqualityComparer,
                      ITextIndexSearcher <T> textIndexSearcher)
 {
     this.wordBreaker = wordBreaker;
     this.thesaurus   = thesaurus;
     this.wordReferenceEqualityComparer = wordReferenceEqualityComparer;
     Indexes = new SortedList <string, TextIndex <T> >();
     this.textIndexSearcher = textIndexSearcher;
 }
Exemple #4
0
        static IEnumerable <string> wordBreak(IWordBreaker breaker, string sentence)
        {
            var pl = new List <string>();

            wordBreak(sentence, breaker, (type, pos, len) => {
                if (type != PutTypes.put)
                {
                    return;               // && type != PutTypes.alt) return;
                }
                pl.Add(sentence.Substring(pos, len));
            });
            return(pl);
        }
Exemple #5
0
        //struct StemItem { public PutTypes type; public string word; }

        static void wordBreak(string text, IWordBreaker breaker, Action <PutTypes, int, int> onPutWord)
        {
            if (string.IsNullOrEmpty(text))
            {
                return;
            }
            BreakSink   cws         = new BreakSink(onPutWord);
            TEXT_SOURCE pTextSource = new TEXT_SOURCE();

            pTextSource.pfnFillTextBuffer += fillTextBuffer;
            pTextSource.awcBuffer          = text;
            pTextSource.iCur = 0;
            pTextSource.iEnd = text.Length;
            breaker.BreakText(ref pTextSource, cws, /*cps*/ null);
        }
Exemple #6
0
 private static void WriteDiagnosticInfo(
     string[] dictionary,
     string[] inputWordsArray,
     IWordBreaker breaker,
     IWordProcessor processor,
     Stopwatch stopwatch)
 {
     Console.WriteLine(new string('=', 50));
     Console.WriteLine($"Breaker: {breaker.GetType().Name}");
     Console.WriteLine($"Processor: {processor.GetType().Name}");
     Console.WriteLine($"Dictionary words count: {dictionary.Length}");
     Console.WriteLine($"Input words count: {inputWordsArray.Length}");
     Console.WriteLine($"Time elapsed: {stopwatch.ElapsedMilliseconds} ms");
     Console.WriteLine(new string('=', 50));
 }
Exemple #7
0
 public static void ReleaseWordBreakers()
 {
     try
     {
         Tokenizer.cacheLock.AcquireWriterLock(60000);
         foreach (Guid key in Tokenizer.guidWordBreakerMapping.Keys)
         {
             IWordBreaker o = Tokenizer.guidWordBreakerMapping[key];
             Marshal.ReleaseComObject(o);
         }
         Tokenizer.guidWordBreakerMapping.Clear();
     }
     catch (ApplicationException)
     {
         ExTraceGlobals.CcGenericTracer.TraceError(0L, "Unable to acquire lock to release word breakers");
     }
     finally
     {
         Tokenizer.cacheLock.ReleaseLock();
     }
 }
Exemple #8
0
 public SerialWordProcessor(IWordBreaker wordBreaker)
 {
     _wordBreaker = wordBreaker;
 }
 public TextIndexFiller(IWordBreaker wordBreaker)
 {
     this.wordBreaker = wordBreaker;
 }
 public ParallelWordProcessor(IWordBreaker wordBreaker)
 {
     _wordBreaker = wordBreaker;
 }