/// <summary> /// Index list of texts into temporal posting dictionary /// </summary> /// <param name="docs">List of texts</param> /// <returns>Dictionary of terms and posting data</returns> public Dictionary <string, TermPostingList> IndexDocumentsToTemporalPostingFile(List <string> docs) { Dictionary <string, TermPostingList> TemporalPostingFile = new Dictionary <string, TermPostingList>((StringComparer.InvariantCultureIgnoreCase)); foreach (string doc in docs) // for each document text in docs { // parse the document text into dictionary of terms and it's frequency and update the max term in the document var maxTerm = new KeyValuePair <string, int>(); Dictionary <string, int> termsInDocumentDictionary = controller.parser.ParseText(doc, out maxTerm); // update the document data in the documents list controller.DocumentsDataList[DocID].DocLength = termsInDocumentDictionary.Count; controller.DocumentsDataList[DocID].MostFrequentTerm = maxTerm.Key; controller.DocumentsDataList[DocID].FrequentTermNumberOfInstances = maxTerm.Value; // for each entry in the terms dictionary update the data in the temporal posting dictionary foreach (KeyValuePair <string, int> entery in termsInDocumentDictionary) { if (!TemporalPostingFile.ContainsKey(entery.Key)) // if the key not exist in the temporal posting dictionary create new instance { TemporalPostingFile[entery.Key] = new TermPostingList(); } if (!termsTFs.ContainsKey(entery.Key)) // if the key not exist in the termsTFs dictionary create new instance { termsTFs[entery.Key] = entery.Value; } else { termsTFs[entery.Key] += entery.Value; // add the TF to total TF of the term in the termsTFs dictionary } TermPostingData termData = new TermPostingData(entery.Value, DocID); // create term posting data TemporalPostingFile[entery.Key].AddToTermPostingList(termData); // update the data in the dictionary } DocID++; // move to the next doc in the courpus } return(TemporalPostingFile); }
/// <summary> /// add termPostingData to the end of the list /// </summary> /// <param name="termData"></param> public void AddToTermPostingList(TermPostingData termData) { PostingList.Add(termData); }