public List<WordOccurrenceNode> GetWordOccurrencies(Word word) { invertedfileName = GetFileName(word.WordID); List<WordOccurrenceNode> result = new List<WordOccurrenceNode>(); try { //open the file br = new BinaryReader(new FileStream(invertedfileName, FileMode.Open)); //reading the file for (int i = 0; (i < conf.MaxResultList) && (br.BaseStream.Position < br.BaseStream.Length); i++) { int tempDocumentHashOne = br.ReadInt32(); int hitsCount = br.ReadInt32(); WordOccurrenceNode node = new WordOccurrenceNode(); node.Hits = new List<WordHit>(); for (int y = 0; y < hitsCount; y++) { WordHit hit = new WordHit(); hit.Position = br.ReadInt32(); node.Hits.Add(hit); } node.Word = word; node.QuantityHits = hitsCount; node.Doc = this.docIndex.Search(tempDocumentHashOne); result.Add(node); } return result; } catch (IOException e) { throw e; } finally { br.Close(); } }
public Hashtable GetPostingList() { Hashtable postingList = new Hashtable(); string text = this.GetText(); string[] splitWords = text.Split(' '); this.WordQuantity = splitWords.Length + 1; //index words for (int i = 0; i < splitWords.Length; i++) { string wordTmp = QueryParser.GetCleanQuery(splitWords[i]); wordTmp = wordTmp.Replace(" ", string.Empty); int key = wordTmp.GetHashCode(); //get frequency for each document word if (postingList.ContainsKey(key)) { WordOccurrenceNode node = postingList[key] as WordOccurrenceNode; WordHit newhit = new WordHit(); newhit.Position = i; node.Hits.Add(newhit); } else if(!string.IsNullOrEmpty(wordTmp)) { WordOccurrenceNode newNode = new WordOccurrenceNode(); newNode.Word = new Word(); newNode.Word.WordID = key; newNode.Word.Text = wordTmp; newNode.Doc = this; WordHit newhit = new WordHit(); newhit.Position = i; //define frequency newNode.Hits.Add(newhit); postingList.Add(key, newNode); } } GC.ReRegisterForFinalize(text); GC.ReRegisterForFinalize(splitWords); GC.Collect(); return postingList; }