public void AddWordOccurrence(WordOccurrenceNode wordOccur) { invertedfileName = GetFileName(wordOccur.Word.WordID); //create the file or add entry to file try { bw = new BinaryWriter(new FileStream(invertedfileName, FileMode.Append)); bw.Write(wordOccur.Doc.DocID); bw.Write(wordOccur.QuantityHits); foreach (WordHit hit in wordOccur.Hits) { bw.Write(hit.Position); } } catch (IOException e) { Console.WriteLine("\n Cannot create file or write to file." + e.Message); return; } finally { bw.Close(); } }
public double CalcRankFactor(WordOccurrenceNode occ, Query query) { double queryRank = 0.0; int countTermQuery = 0; foreach (QueryItem item in query.QueryItens) { if (item.WordID == occ.Word.WordID) { countTermQuery++; } } int countTermDoc = occ.Hits.Count; double bm25_TF = ((occ.Word.QuantityDocFrequency + 1)*countTermDoc)/(countTermDoc + occ.Word.QuantityDocFrequency); double tf = occ.Frequency; double idf = Math.Log((((double)totalDocQuantity) + 1) / ((double)occ.Word.QuantityDocFrequency)); double tf_idf = ((double)countTermQuery) * (bm25_TF) * idf; queryRank += tf_idf; return queryRank; }
public double CalcRankFactor(WordOccurrenceNode occ, Query query) { double queryRank = 0.0; int countTermQuery = 0; foreach (QueryItem item in query.QueryItens) { if (item.WordID == occ.Word.WordID) { countTermQuery++; } } double qtf = ((double)countTermQuery / (double)query.QueryItens.Count); double termQueryFactor = ((k3 + 1) * qtf) / (k3 + qtf); double df = (double)occ.Word.QuantityDocFrequency; //double termLogFactor = Math.Log( ((totalDocQuantity - df + 0.5D)/(df + 0.5D)),Math.E); double termLogFactor = Math.Log( ((double)totalDocQuantity) / ((double)df)); double tf = ((double)occ.Hits.Count / (double)occ.Doc.WordQuantity); double normalizer = ((k1*(1 - b)) + (b * (occ.Doc.WordQuantity / avdl))) + tf; double normalizationTermFactor = ((k1 + 1) * tf) / normalizer; queryRank = termLogFactor * normalizationTermFactor * termQueryFactor; return queryRank; }
public List<WordOccurrenceNode> GetWordOccurrencies(Word word) { invertedfileName = GetFileName(word.WordID); List<WordOccurrenceNode> result = new List<WordOccurrenceNode>(); try { //open the file br = new BinaryReader(new FileStream(invertedfileName, FileMode.Open)); //reading the file for (int i = 0; (i < conf.MaxResultList) && (br.BaseStream.Position < br.BaseStream.Length); i++) { int tempDocumentHashOne = br.ReadInt32(); int hitsCount = br.ReadInt32(); WordOccurrenceNode node = new WordOccurrenceNode(); node.Hits = new List<WordHit>(); for (int y = 0; y < hitsCount; y++) { WordHit hit = new WordHit(); hit.Position = br.ReadInt32(); node.Hits.Add(hit); } node.Word = word; node.QuantityHits = hitsCount; node.Doc = this.docIndex.Search(tempDocumentHashOne); result.Add(node); } return result; } catch (IOException e) { throw e; } finally { br.Close(); } }
public Hashtable GetPostingList() { Hashtable postingList = new Hashtable(); string text = this.GetText(); string[] splitWords = text.Split(' '); this.WordQuantity = splitWords.Length + 1; //index words for (int i = 0; i < splitWords.Length; i++) { string wordTmp = QueryParser.GetCleanQuery(splitWords[i]); wordTmp = wordTmp.Replace(" ", string.Empty); int key = wordTmp.GetHashCode(); //get frequency for each document word if (postingList.ContainsKey(key)) { WordOccurrenceNode node = postingList[key] as WordOccurrenceNode; WordHit newhit = new WordHit(); newhit.Position = i; node.Hits.Add(newhit); } else if(!string.IsNullOrEmpty(wordTmp)) { WordOccurrenceNode newNode = new WordOccurrenceNode(); newNode.Word = new Word(); newNode.Word.WordID = key; newNode.Word.Text = wordTmp; newNode.Doc = this; WordHit newhit = new WordHit(); newhit.Position = i; //define frequency newNode.Hits.Add(newhit); postingList.Add(key, newNode); } } GC.ReRegisterForFinalize(text); GC.ReRegisterForFinalize(splitWords); GC.Collect(); return postingList; }
public void CalculateRank(WordOccurrenceNode occ, Query query) { IRankFunction rankFunc = FactoryRankFunction.GetRankFunction(); this.queryRank += rankFunc.CalcRankFactor(occ, query); }
public void Add(WordOccurrenceNode newNode) { if (this.HasNext()) { this.NextOccurrence.Add(newNode); } else { this.NextOccurrence = newNode; newNode.PreviousOccurrence = this; } }