public static string Process(Index index, string[] query) { _index = index; // initialize the rank dictionary Dictionary<int, double> rank = new Dictionary<int, double>(); foreach (KeyValuePair<int, string> document in _index.documents) rank.Add(document.Key, 0.0); // rank the terms in the query Dictionary<string, double> queryWeight = new Dictionary<string, double>(); int weight = 1; foreach (string term in query.Reverse()) { queryWeight.Add(term, weight); // weight++; } // loop through all of the terms in the index foreach (KeyValuePair<int, string> term in _index.terms) { // only process the terms that are being queried if (query.Contains(term.Value) == true) { // loop though all of the documents in the index foreach (KeyValuePair<int, string> document in _index.documents) { int frequency = Occurrences(document.Value, term.Value); // only process the documents that contain this term if (frequency > 0) rank[document.Key] = rank[document.Key] + ComputeWeight(document.Key, term.Key, frequency, queryWeight[term.Value]); } } } // calculate the results StringBuilder sb = new StringBuilder(); foreach (KeyValuePair<int, double> result in rank.OrderByDescending(z => z.Value)) { sb.AppendLine(result.Key.ToString() + " " + result.Value.ToString()); } return sb.ToString(); }
protected void Page_Load(object sender, EventArgs e) { Response.Write("Building index.<br />\n"); // build the index Processing.Index index = new Processing.Index(Directory.GetFiles(Server.MapPath("~/Data/Output"), "*.txt")); // stem the queries string[] stemmedQuery1 = StemQueryArray(LoveCategory); string[] stemmedQuery2 = StemQueryArray(HappyCategory); string[] stemmedQuery3 = StemQueryArray(SadCategory); string[] stemmedQuery4 = StemQueryArray(ProfanityCategory); // calculate weight for love songs Response.Write("Calculating weight for love songs.<br />\n"); string results1 = WeightHandler.Process(index, stemmedQuery1); File.WriteAllText(Server.MapPath("~/Data/Output") + "\\love.txt", results1); // calculate weight for happy songs Response.Write("Calculating weight for happiest songs.<br />\n"); string results2 = WeightHandler.Process(index, stemmedQuery2); File.WriteAllText(Server.MapPath("~/Data/Output") + "\\happy.txt", results2); // calculate weight for sad songs Response.Write("Calculating weight for saddest songs.<br />\n"); string results3 = WeightHandler.Process(index, stemmedQuery3); File.WriteAllText(Server.MapPath("~/Data/Output") + "\\sad.txt", results3); // calculate weight for profane songs Response.Write("Calculating weight for profanity songs.<br />\n"); string results4 = WeightHandler.Process(index, stemmedQuery4); File.WriteAllText(Server.MapPath("~/Data/Output") + "\\profanity.txt", results4); // calculate weight for repetitive songs Response.Write("Calculating weight for repetitive songs.<br />\n"); string results5 = WeightHandler.ProcessUniqueTerms(index); File.WriteAllText(Server.MapPath("~/Data/Output") + "\\repetitive.txt", results5); Response.Write("Done.<br />\n"); }
public static string ProcessUniqueTerms(Index index) { _index = index; // initialize the rank dictionary Dictionary<int, double> rank = new Dictionary<int, double>(); foreach (KeyValuePair<int, string> document in _index.documents) rank.Add(document.Key, 0.0); // loop though all of the documents in the index foreach (KeyValuePair<int, string> document in _index.documents) { rank[document.Key] = ((double)UniqueTermsCount(document.Value) / _index.GetDocumentLength(document.Key)); } // calculate the results StringBuilder sb = new StringBuilder(); foreach (KeyValuePair<int, double> result in rank.OrderBy(z => z.Value)) { sb.AppendLine(result.Key.ToString() + " " + result.Value.ToString()); } return sb.ToString(); }