예제 #1
0
        public void Run()
        {
            if (Parameters.Count != 2)
            {
                throw new ArgumentException("the number of parameters must be 2. Parameter 1 is Analyzer name, Parameter 2 is a text for test.");
            }

            Analysis.IAnalyzer analyzer = Data.DBProvider.GetAnalyzer(Parameters[0]);

            if (analyzer == null)
            {
                throw new Data.DataException(string.Format("Can't find analyzer name : {0}", Parameters[0]));
            }

            AddColumn("Word");
            AddColumn("Position");
            AddColumn("Rank");

            foreach (Entity.WordInfo word in analyzer.Tokenize(Parameters[1]))
            {
                NewRow();
                OutputValue("Word", word.Word);
                OutputValue("Position", word.Position.ToString());
                OutputValue("Rank", word.Rank.ToString());
            }
        }
예제 #2
0
        /// <summary>
        /// Index a text for one field
        /// </summary>
        /// <param name="text">text</param>
        /// <param name="documentId">document id</param>
        /// <param name="analyzer">analyzer</param>
        private void Index(string text, int documentId, Analysis.IAnalyzer analyzer)
        {
            lock (this)
            {
                if (_WordTableWriter == null)
                {
                    _WordTableWriter = new Dictionary <string, int>(65536);
                }

                if (_DocPositionAlloc == null)
                {
                    _DocPositionAlloc = new DocumentPositionAlloc();
                }

                _DocumentCount++;
                if (_TempWordIndexWriter == null)
                {
                    _TempWordIndexWriter = new AppendList <int>(65536);
                }

                _TempWordIndexWriter.Clear();

                foreach (Entity.WordInfo wordInfo in analyzer.Tokenize(text))
                {
                    if (wordInfo.Position < 0)
                    {
                        continue;
                    }

                    string internedWord = string.IsInterned(wordInfo.Word);

                    if (internedWord == null)
                    {
                        internedWord = wordInfo.Word;
                    }

                    int index;

                    if (!_WordTableWriter.TryGetValue(internedWord, out index))
                    {
                        if (_WordIndexWriterPool == null)
                        {
                            _WordIndexWriterPool = new WordIndexWriter[65536];
                        }

                        if (_IndexWriterPoolId >= _WordIndexWriterPool.Length)
                        {
                            int nextLength = _WordIndexWriterPool.Length * 2;

                            WordIndexWriter[] tempPool = new WordIndexWriter[nextLength];
                            Array.Copy(_WordIndexWriterPool, tempPool, _WordIndexWriterPool.Length);
                            _WordIndexWriterPool = tempPool;
                        }

                        _WordIndexWriterPool[_IndexWriterPoolId]           = new WordIndexWriter(wordInfo.Word, _IndexMode, _DocPositionAlloc);
                        _WordIndexWriterPool[_IndexWriterPoolId].TempDocId = documentId;
                        _WordIndexWriterPool[_IndexWriterPoolId].TempWordCountInThisDoc = 0;
                        _WordIndexWriterPool[_IndexWriterPoolId].TempFirstPosition      = wordInfo.Position;
                        _WordIndexWriterPool[_IndexWriterPoolId].TempTotalWordsInDoc    = analyzer.Count;

                        _WordTableWriter.Add(wordInfo.Word, _IndexWriterPoolId);

                        _TempWordIndexWriter.Add(_IndexWriterPoolId);
                        index = _IndexWriterPoolId;
                        _IndexWriterPoolId++;
                    }

                    if (_WordIndexWriterPool[index].TempDocId != documentId)
                    {
                        _WordIndexWriterPool[index].TempDocId = documentId;
                        _WordIndexWriterPool[index].TempWordCountInThisDoc = 1;
                        _WordIndexWriterPool[index].TempFirstPosition      = wordInfo.Position;
                        _WordIndexWriterPool[index].TempTotalWordsInDoc    = analyzer.Count;
                        _TempWordIndexWriter.Add(index);
                    }
                    else
                    {
                        if (_WordIndexWriterPool[index].TempFirstPosition > wordInfo.Position)
                        {
                            _WordIndexWriterPool[index].TempFirstPosition = wordInfo.Position;
                        }

                        _WordIndexWriterPool[index].TempWordCountInThisDoc++;
                    }
                }

                foreach (int writeId in _TempWordIndexWriter)
                {
                    _WordIndexWriterPool[writeId].Index();
                }
            }
        }
예제 #3
0
        public void Run()
        {
            if (Parameters.Count < 3)
            {
                throw new ArgumentException("Parameter 1 is table name, Parameter 2 is field name, Parameter 3 is a text for test, Parameter 4 is analyzer type(optional)");
            }

            Data.DBProvider dbProvider = Data.DBProvider.GetDBProvider(Parameters[0]);

            if (dbProvider == null)
            {
                throw new Data.DataException(string.Format("Can't find table name : {0}", Parameters[0]));
            }

            Data.Field field = dbProvider.GetField(Parameters[1]);

            if (field == null)
            {
                throw new Data.DataException(string.Format("Can't find field name : {0}", Parameters[2]));
            }

            if (field.AnalyzerName == null)
            {
                throw new Data.DataException(string.Format("Field: {0} in {1} is not tokenized", Parameters[2],
                                                           dbProvider.Table.Name));
            }

            Analysis.IAnalyzer analyzer = Data.DBProvider.GetAnalyzer(field.AnalyzerName);

            if (analyzer == null)
            {
                throw new Data.DataException(string.Format("Can't find analyzer name : {0}", field.AnalyzerName));
            }

            bool clientAnalyzer = false;

            if (Parameters.Count == 4)
            {
                if (Parameters[3].Equals("sqlclient", StringComparison.CurrentCultureIgnoreCase))
                {
                    clientAnalyzer = true;
                }
            }

            AddColumn("Word");
            AddColumn("Position");
            AddColumn("Rank");

            if (clientAnalyzer)
            {
                foreach (Entity.WordInfo word in analyzer.TokenizeForSqlClient(Parameters[2]))
                {
                    NewRow();
                    OutputValue("Word", word.Word);
                    OutputValue("Position", word.Position.ToString());
                    OutputValue("Rank", word.Rank.ToString());
                }
            }
            else
            {
                foreach (Entity.WordInfo word in analyzer.Tokenize(Parameters[2]))
                {
                    NewRow();
                    OutputValue("Word", word.Word);
                    OutputValue("Position", word.Position.ToString());
                    OutputValue("Rank", word.Rank.ToString());
                }
            }
        }