예제 #1
0
        public InvertedIndex CreateIndex(
            params string[] documents
            )
        {
            var result = new InvertedIndex(documents.Length);

            for (var i = 0; i < documents.Length; i++)
            {
                using (var reader = new StringReader(documents[i]))
                {
                    var tokenSource = new TokenSource(reader);

                    while (tokenSource.Next())
                    {
                        if (_analyzer.Process(tokenSource))
                        {
                            result.Append(
                                new CharArraySegmentKey(tokenSource.Buffer, tokenSource.Size),
                                i, tokenSource.Position);
                        }
                    }
                }
            }

            return(result);
        }
예제 #2
0
 public IEnumerable <string> Analyze(string source)
 {
     using (var reader = new StringReader(source))
     {
         var tokenSource = new TokenSource(reader);
         return(tokenSource.ReadAll(Process).ToArray());
     }
 }
예제 #3
0
 public string AnalyzeOnlyTheFirstToken(string source)
 {
     using (var reader = new StringReader(source))
     {
         var tokenSource = new TokenSource(reader);
         tokenSource.Next();
         Process(tokenSource);
         return(tokenSource.ToString());
     }
 }
예제 #4
0
 public bool Process(TokenSource source)
 {
     for (var i = 0; i < _filters.Length; i++)
     {
         if (!_filters[i].Process(source))
         {
             return(false);
         }
     }
     return(true);
 }
예제 #5
0
        private void ProcessDocument(
            InvertedIndex result,
            int documentIndex,
            TextReader reader
            )
        {
            var tokenSource = new TokenSource(reader);

            while (tokenSource.Next())
            {
                if (_analyzer.Process(tokenSource))
                {
                    result.Append(
                        new CharArraySegmentKey(tokenSource.Buffer, tokenSource.Size),
                        documentIndex, tokenSource.Position);
                }
            }
        }