/// <summary> /// this method efficiently merge the newly created sorted files into posting files and /// creates the index in the desired index path /// </summary> /// <param name="fileCount">number of files</param> /// <param name="files">files array</param> private void merge(int fileCount, string[] files) { bool MoreToRead = false; string[] firstLines = new string[fileCount]; string[] sortedFirstLines; StreamReader[] sr = new StreamReader[fileCount]; Dictionary <string, StreamWriter> writers = new Dictionary <string, StreamWriter>(); for (int i = 0; i < fileCount; i++) { sr[i] = new StreamReader(path + "\\index" + i + "sorted.txt"); } for (char c = 'A'; c <= 'Z'; c++) { writers.Add(c + "", new StreamWriter(ipath + "\\" + c + ".txt")); } foreach (term.Type t in Enum.GetValues(typeof(term.Type))) { if (t == term.Type.word) { continue; } writers.Add(t.ToString(), new StreamWriter(ipath + "\\" + t.ToString() + ".txt")); } writers.Add("other", new StreamWriter(ipath + "\\other.txt")); writers.Add("index", new StreamWriter(ipath + "\\index.txt")); for (int i = 0; i < fileCount; i++) { firstLines[i] = sr[i].ReadLine(); if (firstLines[i] == null) { firstLines[i] = "\0"; } if (firstLines[i] == "") { i--; } if (firstLines[i] != null) { MoreToRead = true; } } int lastIndex = 0; StringBuilder minLine = new StringBuilder(); StringBuilder minPhrase = new StringBuilder(); while (lastIndex < firstLines.Length) { minPhrase.Clear(); minLine.Clear(); int i = lastIndex; sortedFirstLines = firstLines.OrderBy(s => s.Split(new string[] { "\t" }, StringSplitOptions.None)[0]).ToArray(); for (i = 0; i < sortedFirstLines.Length; i++) { if (sortedFirstLines[i].Equals("\0")) { continue; } else { break; } } if (i >= sortedFirstLines.Length) { break; } minPhrase.Append(GetPhrase(sortedFirstLines[i])); bool Cap = true; double icf = 0, idf = 0; term.Type type = GetType(sortedFirstLines[i]); for (i = 0; i < sortedFirstLines.Length; i++) { if (sortedFirstLines[i].Equals("\0")) { continue; } if (string.Compare(minPhrase.ToString(), GetPhrase(sortedFirstLines[i]), true) == 0) { string[] splitted = sortedFirstLines[i].Split('\t'); if (type != (term.Type)Enum.Parse(typeof(term.Type), splitted[2], true)) { continue; } Cap &= splitted[1].Equals("T") ? true : false; minLine.Append(splitted[3]); icf += double.Parse(splitted[4]); idf += double.Parse(splitted[5]); } else { break; } } string termPhrase = ""; if (Cap) { termPhrase = minPhrase.ToString().ToUpper(); } else { termPhrase = minPhrase.ToString().ToLower(); } if (type == term.Type.word) { if (char.IsLetter(minPhrase[0])) { writers[Char.ToUpper(minPhrase[0]).ToString()].WriteLine(termPhrase + "\t" + minLine.ToString()); if (Cap) { q.Enqueue(termPhrase + "\t" + minLine.ToString()); q_list.Release(1); } } else { writers["other"].WriteLine(termPhrase + "\t" + minLine.ToString()); } } else { writers[type.ToString()].WriteLine(termPhrase + "\t" + minLine.ToString()); } for (i = 0; i < fileCount; i++) { if (string.Compare(minPhrase.ToString(), GetPhrase(firstLines[i]), true) == 0) { if (type != GetType(firstLines[i])) { continue; } if (firstLines[i].Equals("\0")) { continue; } firstLines[i] = sr[i].ReadLine(); if (firstLines[i] == null) { firstLines[i] = "\0"; } while (firstLines[i] == "") { firstLines[i] = sr[i].ReadLine(); if (firstLines[i] == null) { firstLines[i] = "\0"; } } } } writers["index"].WriteLine(termPhrase + "\t" + (int)type + "\t" + icf + "\t" + idf); } for (int i = 0; i < fileCount; i++) { sr[i].Close(); File.Delete(path + "\\index" + i + "sorted.txt"); } foreach (KeyValuePair <string, StreamWriter> entry in writers) { entry.Value.Close(); } }
public override string ToString() { return(phrase + '\t' + type.ToString()); }