Пример #1
0
        public List<WordOccurrenceNode> GetWordOccurrencies(Word word)
        {
            invertedfileName = GetFileName(word.WordID);
            List<WordOccurrenceNode> result = new List<WordOccurrenceNode>();

            try
            {
                //open the file
                br = new BinaryReader(new FileStream(invertedfileName, FileMode.Open));

                //reading the file
                for (int i = 0; (i < conf.MaxResultList) && (br.BaseStream.Position < br.BaseStream.Length); i++)
                {
                    int tempDocumentHashOne = br.ReadInt32();
                    int hitsCount = br.ReadInt32();

                    WordOccurrenceNode node = new WordOccurrenceNode();

                    node.Hits = new List<WordHit>();

                    for (int y = 0; y < hitsCount; y++)
                    {
                        WordHit hit = new WordHit();
                        hit.Position = br.ReadInt32();
                        node.Hits.Add(hit);
                    }

                    node.Word = word;
                    node.QuantityHits = hitsCount;
                    node.Doc = this.docIndex.Search(tempDocumentHashOne);
                    result.Add(node);
                }

                return result;
            }
            catch (IOException e)
            {
                throw e;
            }
            finally
            {
                br.Close();
            }
        }
Пример #2
0
        public Hashtable GetPostingList()
        {
            Hashtable postingList = new Hashtable();

            string text = this.GetText();

            string[] splitWords = text.Split(' ');
            this.WordQuantity = splitWords.Length + 1;

            //index words
            for (int i = 0; i < splitWords.Length; i++)
            {
                string wordTmp = QueryParser.GetCleanQuery(splitWords[i]);
                wordTmp = wordTmp.Replace(" ", string.Empty);

                int key = wordTmp.GetHashCode();

                //get frequency for each document word
                if (postingList.ContainsKey(key))
                {
                    WordOccurrenceNode node = postingList[key] as WordOccurrenceNode;

                    WordHit newhit = new WordHit();
                    newhit.Position = i;
                    node.Hits.Add(newhit);
                }
                else if(!string.IsNullOrEmpty(wordTmp))
                {
                    WordOccurrenceNode newNode = new WordOccurrenceNode();
                    newNode.Word = new Word();
                    newNode.Word.WordID = key;
                    newNode.Word.Text = wordTmp;

                    newNode.Doc = this;

                    WordHit newhit = new WordHit();
                    newhit.Position = i;
                    //define frequency
                    newNode.Hits.Add(newhit);

                    postingList.Add(key, newNode);
                }
            }

            GC.ReRegisterForFinalize(text);
            GC.ReRegisterForFinalize(splitWords);
            GC.Collect();

            return postingList;
        }