Example #1
0
        public void AddWordOccurrence(WordOccurrenceNode wordOccur)
        {
            invertedfileName = GetFileName(wordOccur.Word.WordID);

            //create the file or add entry to file
            try
            {
                bw = new BinaryWriter(new FileStream(invertedfileName, FileMode.Append));

                bw.Write(wordOccur.Doc.DocID);
                bw.Write(wordOccur.QuantityHits);

                foreach (WordHit hit in wordOccur.Hits)
                {
                    bw.Write(hit.Position);
                }
            }
            catch (IOException e)
            {
                Console.WriteLine("\n Cannot create file or write to file." + e.Message);
                return;
            }
            finally
            {
                bw.Close();
            }
        }
Example #2
0
        public double CalcRankFactor(WordOccurrenceNode occ, Query query)
        {
            double queryRank = 0.0;

            int countTermQuery = 0;
            foreach (QueryItem item in query.QueryItens)
            {
                if (item.WordID == occ.Word.WordID)
                {
                    countTermQuery++;
                }
            }
            
            int countTermDoc = occ.Hits.Count;

            double bm25_TF = ((occ.Word.QuantityDocFrequency + 1)*countTermDoc)/(countTermDoc + occ.Word.QuantityDocFrequency);

            double tf = occ.Frequency;
            double idf = Math.Log((((double)totalDocQuantity) + 1) / ((double)occ.Word.QuantityDocFrequency));

            double tf_idf = ((double)countTermQuery) * (bm25_TF) * idf;

            queryRank += tf_idf;

            return queryRank;
        }
        public double CalcRankFactor(WordOccurrenceNode occ, Query query)
        {
            double queryRank = 0.0;
            
            int countTermQuery = 0;
            foreach (QueryItem item in query.QueryItens)
            {
                if (item.WordID == occ.Word.WordID)
                {
                    countTermQuery++;
                }
            }

            double qtf = ((double)countTermQuery / (double)query.QueryItens.Count);
            double termQueryFactor = ((k3 + 1) * qtf) / (k3 + qtf);

            double df = (double)occ.Word.QuantityDocFrequency;
            //double termLogFactor = Math.Log( ((totalDocQuantity - df + 0.5D)/(df + 0.5D)),Math.E);
            double termLogFactor = Math.Log( ((double)totalDocQuantity) / ((double)df));

            double tf = ((double)occ.Hits.Count / (double)occ.Doc.WordQuantity);
            double normalizer = ((k1*(1 - b)) + (b * (occ.Doc.WordQuantity / avdl))) + tf;
            double normalizationTermFactor = ((k1 + 1) * tf) / normalizer;

            queryRank = termLogFactor * normalizationTermFactor * termQueryFactor;

            return queryRank;
        }
Example #4
0
        public List<WordOccurrenceNode> GetWordOccurrencies(Word word)
        {
            invertedfileName = GetFileName(word.WordID);
            List<WordOccurrenceNode> result = new List<WordOccurrenceNode>();

            try
            {
                //open the file
                br = new BinaryReader(new FileStream(invertedfileName, FileMode.Open));

                //reading the file
                for (int i = 0; (i < conf.MaxResultList) && (br.BaseStream.Position < br.BaseStream.Length); i++)
                {
                    int tempDocumentHashOne = br.ReadInt32();
                    int hitsCount = br.ReadInt32();

                    WordOccurrenceNode node = new WordOccurrenceNode();

                    node.Hits = new List<WordHit>();

                    for (int y = 0; y < hitsCount; y++)
                    {
                        WordHit hit = new WordHit();
                        hit.Position = br.ReadInt32();
                        node.Hits.Add(hit);
                    }

                    node.Word = word;
                    node.QuantityHits = hitsCount;
                    node.Doc = this.docIndex.Search(tempDocumentHashOne);
                    result.Add(node);
                }

                return result;
            }
            catch (IOException e)
            {
                throw e;
            }
            finally
            {
                br.Close();
            }
        }
Example #5
0
        public Hashtable GetPostingList()
        {
            Hashtable postingList = new Hashtable();

            string text = this.GetText();

            string[] splitWords = text.Split(' ');
            this.WordQuantity = splitWords.Length + 1;

            //index words
            for (int i = 0; i < splitWords.Length; i++)
            {
                string wordTmp = QueryParser.GetCleanQuery(splitWords[i]);
                wordTmp = wordTmp.Replace(" ", string.Empty);

                int key = wordTmp.GetHashCode();

                //get frequency for each document word
                if (postingList.ContainsKey(key))
                {
                    WordOccurrenceNode node = postingList[key] as WordOccurrenceNode;

                    WordHit newhit = new WordHit();
                    newhit.Position = i;
                    node.Hits.Add(newhit);
                }
                else if(!string.IsNullOrEmpty(wordTmp))
                {
                    WordOccurrenceNode newNode = new WordOccurrenceNode();
                    newNode.Word = new Word();
                    newNode.Word.WordID = key;
                    newNode.Word.Text = wordTmp;

                    newNode.Doc = this;

                    WordHit newhit = new WordHit();
                    newhit.Position = i;
                    //define frequency
                    newNode.Hits.Add(newhit);

                    postingList.Add(key, newNode);
                }
            }

            GC.ReRegisterForFinalize(text);
            GC.ReRegisterForFinalize(splitWords);
            GC.Collect();

            return postingList;
        }
Example #6
0
        public void CalculateRank(WordOccurrenceNode occ, Query query)
        {
            IRankFunction rankFunc = FactoryRankFunction.GetRankFunction();

            this.queryRank += rankFunc.CalcRankFactor(occ, query);
        }
Example #7
0
 public void Add(WordOccurrenceNode newNode)
 {
     if (this.HasNext())
     {
         this.NextOccurrence.Add(newNode);
     }
     else
     {
         this.NextOccurrence = newNode;
         newNode.PreviousOccurrence = this;
     }
 }