static void Main(string[] arg) { //Parse the businesses Dictionary<string, Business> businesses = new Dictionary<string, Business>(); StreamReader reader = new StreamReader(@"F:\Data Science Project\businesses.txt"); HashSet<string> set = new HashSet<string>(); string line = reader.ReadLine(); while (line != null) { if (line.Contains("\"type\": \"business\"")) { JObject obj = JObject.Parse(line); if (obj["categories"].ToString().Contains("Restaurants")) { JArray array = JArray.Parse(obj["categories"].ToString()); foreach (JValue val in array) { set.Add(val.ToString()); } Business business = new Business(); business.name = obj["name"].ToString(); business.full_address = obj["full_address"].ToString(); business.city = obj["city"].ToString(); business.state = obj["state"].ToString(); business.latitude = double.Parse(obj["latitude"].ToString()); business.longitude = double.Parse(obj["longitude"].ToString()); business.stars = float.Parse(obj["stars"].ToString()); business.review_count = int.Parse(obj["review_count"].ToString()); business.categories = obj["categories"].ToString(); businesses.Add(obj["business_id"].ToString(), business); } } line = reader.ReadLine(); } //Parse the reviews reader = new StreamReader(@"F:\Data Science Project\reviews.txt"); Dictionary<string, User> users = new Dictionary<string, User>(); line = reader.ReadLine(); int i = 0; while (line != null) { if (line.Contains("\"type\": \"review\"")) { JObject obj = JObject.Parse(line); if (businesses.ContainsKey(obj["business_id"].ToString()) && businesses[obj["business_id"].ToString()].categories.Contains("Restaurants")) { if (users.ContainsKey(obj["user_id"].ToString())) { Review review = new Review(); review.text = RemoveSpecialCharacters(obj["text"].ToString()); review.business_id = obj["business_id"].ToString(); review.stars = float.Parse(obj["stars"].ToString()); users[obj["user_id"].ToString()].reviews.Add(review); } else { Review review = new Review(); review.text = RemoveSpecialCharacters(obj["text"].ToString()); review.business_id = obj["business_id"].ToString(); review.stars = float.Parse(obj["stars"].ToString()); User user = new User(); user.reviews.Add(review); users.Add(obj["user_id"].ToString(), user); } //if (businesses[obj["business_id"].ToString()].categories.Contains("Bagels") && obj["business_id"].ToString()=="3vKhV2ELR2hmwlnoNqYWaA") { //Console.WriteLine(obj["user_id"].ToString() + ",\"" + RemoveSpecialCharacters(obj["text"].ToString()) + "\""); Console.WriteLine("{\"id1\":\"" + ++i + "\"," + "\"id\":\"" + obj["review_id"].ToString() + "\",\"text\":\"" + RemoveSpecialCharacters(obj["text"].ToString()) + "\"}"); } } } line = reader.ReadLine(); } ////Parse the users //reader = new StreamReader(@"F:\Data Science Project\users.txt"); //line = reader.ReadLine(); //while (line != null) //{ // if (line.Contains("\"type\": \"user\"")) // { // JObject obj = JObject.Parse(line); // if (users.ContainsKey(obj["user_id"].ToString())) // { // users[obj["user_id"].ToString()].name = obj["name"].ToString(); // users[obj["user_id"].ToString()].average_stars = float.Parse(obj["average_stars"].ToString()); // users[obj["user_id"].ToString()].review_count = int.Parse(obj["review_count"].ToString()); // users[obj["user_id"].ToString()].funny_votes = int.Parse(obj["votes"]["funny"].ToString()); // users[obj["user_id"].ToString()].useful_votes = int.Parse(obj["votes"]["useful"].ToString()); // users[obj["user_id"].ToString()].cool_votes = int.Parse(obj["votes"]["cool"].ToString()); // } // } // line = reader.ReadLine(); //} /* foreach (KeyValuePair<string,User> user in users) { if (user.Value.reviews.Count > 0) { Console.Write(user.Key+","); Console.Write("\""); foreach (Review review in user.Value.reviews) { Console.Write(review.text); } Console.Write("\""); Console.WriteLine(); } } */ //User target = users["FevBcg69uao1b4CSW-PKBw"]; }
static void Main(string[] arg) { //Parse the businesses Dictionary<string, Business> businesses = new Dictionary<string, Business>(); StreamReader reader = new StreamReader(@"F:\Data Science Project\businesses.txt"); HashSet<string> set = new HashSet<string>(); string line = reader.ReadLine(); while (line != null) { if (line.Contains("\"type\": \"business\"")) { JObject obj = JObject.Parse(line); if (obj["categories"].ToString().Contains("Restaurants")) { JArray array = JArray.Parse(obj["categories"].ToString()); foreach (JValue val in array) { set.Add(val.ToString()); } Business business = new Business(); business.name = obj["name"].ToString(); business.full_address = obj["full_address"].ToString(); business.city = obj["city"].ToString(); business.state = obj["state"].ToString(); business.latitude = double.Parse(obj["latitude"].ToString()); business.longitude = double.Parse(obj["longitude"].ToString()); business.stars = float.Parse(obj["stars"].ToString()); business.review_count = int.Parse(obj["review_count"].ToString()); business.categories = obj["categories"].ToString(); businesses.Add(obj["business_id"].ToString(), business); } } line = reader.ReadLine(); } //Parse the reviews reader = new StreamReader(@"F:\Data Science Project\reviews.txt"); Dictionary<string, User> users = new Dictionary<string, User>(); line = reader.ReadLine(); while (line != null) { if (line.Contains("\"type\": \"review\"")) { JObject obj = JObject.Parse(line); if (businesses.ContainsKey(obj["business_id"].ToString()) && businesses[obj["business_id"].ToString()].categories.Contains("Restaurants")) { Review review = new Review(); if (users.ContainsKey(obj["user_id"].ToString())) { review.text = RemoveSpecialCharacters(obj["text"].ToString()); review.business_id = obj["business_id"].ToString(); review.stars = float.Parse(obj["stars"].ToString()); review.user_id = obj["user_id"].ToString(); users[obj["user_id"].ToString()].reviews.Add(review); } else { review.text = RemoveSpecialCharacters(obj["text"].ToString()); review.business_id = obj["business_id"].ToString(); review.stars = float.Parse(obj["stars"].ToString()); review.user_id = obj["user_id"].ToString(); User user = new User(); user.reviews.Add(review); users.Add(obj["user_id"].ToString(), user); } businesses[obj["business_id"].ToString()].reviews.Add(review); } } line = reader.ReadLine(); } //Parse the users reader = new StreamReader(@"F:\Data Science Project\users.txt"); line = reader.ReadLine(); while (line != null) { if (line.Contains("\"type\": \"user\"")) { JObject obj = JObject.Parse(line); if (users.ContainsKey(obj["user_id"].ToString())) { users[obj["user_id"].ToString()].name = obj["name"].ToString(); users[obj["user_id"].ToString()].user_id = obj["user_id"].ToString(); users[obj["user_id"].ToString()].average_stars = float.Parse(obj["average_stars"].ToString()); users[obj["user_id"].ToString()].review_count = int.Parse(obj["review_count"].ToString()); users[obj["user_id"].ToString()].funny_votes = int.Parse(obj["votes"]["funny"].ToString()); users[obj["user_id"].ToString()].useful_votes = int.Parse(obj["votes"]["useful"].ToString()); users[obj["user_id"].ToString()].cool_votes = int.Parse(obj["votes"]["cool"].ToString()); } } line = reader.ReadLine(); } foreach (KeyValuePair<string, User> user in users) { if (File.Exists(@"F:\Data Science Project\users\" + user.Key + ".txt")) { XmlDocument xmlDoc = new XmlDocument(); xmlDoc.LoadXml(File.ReadAllText(@"F:\Data Science Project\users\" + user.Key + ".txt")); XmlNode reviewsElt = xmlDoc.SelectSingleNode("//reviews"); reviewsElt.InnerXml = ""; foreach (Review review in user.Value.reviews) { XmlElement reviewElt = xmlDoc.CreateElement("review"); //text XmlElement textElt = xmlDoc.CreateElement("text"); textElt.InnerText = review.text; reviewElt.AppendChild(textElt); //lat XmlElement latElt = xmlDoc.CreateElement("lat"); latElt.InnerText = businesses[review.business_id].latitude.ToString(); reviewElt.AppendChild(latElt); //long XmlElement longElt = xmlDoc.CreateElement("long"); longElt.InnerText = businesses[review.business_id].longitude.ToString(); reviewElt.AppendChild(longElt); //business name XmlElement b_name = xmlDoc.CreateElement("business_name"); b_name.InnerText = businesses[review.business_id].name; reviewElt.AppendChild(b_name); //business category XmlElement b_category = xmlDoc.CreateElement("business_category"); b_category.InnerText = businesses[review.business_id].categories; reviewElt.AppendChild(b_category); //star rating XmlElement star_rating = xmlDoc.CreateElement("rating"); star_rating.InnerText = review.stars.ToString(); reviewElt.AppendChild(star_rating); reviewsElt.AppendChild(reviewElt); StreamWriter writer = new StreamWriter(@"F:\Data Science Project\users_new\" + user.Key + ".txt"); writer.Write(xmlDoc.OuterXml); writer.Close(); } } } }
static void Main(string[] arg) { //Parse the businesses Dictionary<string, Business> businesses = new Dictionary<string, Business>(); StreamReader reader = new StreamReader(@"F:\Data Science Project\businesses.txt"); HashSet<string> set = new HashSet<string>(); string line = reader.ReadLine(); while (line != null) { if (line.Contains("\"type\": \"business\"")) { JObject obj = JObject.Parse(line); if (obj["categories"].ToString().Contains("Restaurants")) { JArray array = JArray.Parse(obj["categories"].ToString()); foreach (JValue val in array) { set.Add(val.ToString()); } Business business = new Business(); business.name = obj["name"].ToString(); business.full_address = obj["full_address"].ToString(); business.city = obj["city"].ToString(); business.state = obj["state"].ToString(); business.latitude = double.Parse(obj["latitude"].ToString()); business.longitude = double.Parse(obj["longitude"].ToString()); business.stars = float.Parse(obj["stars"].ToString()); business.review_count = int.Parse(obj["review_count"].ToString()); business.categories = obj["categories"].ToString(); businesses.Add(obj["business_id"].ToString(), business); } } line = reader.ReadLine(); } //Parse the reviews reader = new StreamReader(@"F:\Data Science Project\reviews.txt"); Dictionary<string, User> users = new Dictionary<string, User>(); line = reader.ReadLine(); while (line != null) { if (line.Contains("\"type\": \"review\"")) { JObject obj = JObject.Parse(line); if (businesses.ContainsKey(obj["business_id"].ToString()) && businesses[obj["business_id"].ToString()].categories.Contains("Restaurants")) { Review review = new Review(); if (users.ContainsKey(obj["user_id"].ToString())) { review.text = RemoveSpecialCharacters(obj["text"].ToString()); review.business_id = obj["business_id"].ToString(); review.stars = float.Parse(obj["stars"].ToString()); review.user_id = obj["user_id"].ToString(); users[obj["user_id"].ToString()].reviews.Add(review); } else { review.text = RemoveSpecialCharacters(obj["text"].ToString()); review.business_id = obj["business_id"].ToString(); review.stars = float.Parse(obj["stars"].ToString()); review.user_id = obj["user_id"].ToString(); User user = new User(); user.reviews.Add(review); users.Add(obj["user_id"].ToString(), user); } businesses[obj["business_id"].ToString()].reviews.Add(review); } } line = reader.ReadLine(); } //Parse the users reader = new StreamReader(@"F:\Data Science Project\users.txt"); line = reader.ReadLine(); while (line != null) { if (line.Contains("\"type\": \"user\"")) { JObject obj = JObject.Parse(line); if (users.ContainsKey(obj["user_id"].ToString())) { users[obj["user_id"].ToString()].name = obj["name"].ToString(); users[obj["user_id"].ToString()].user_id = obj["user_id"].ToString(); users[obj["user_id"].ToString()].average_stars = float.Parse(obj["average_stars"].ToString()); users[obj["user_id"].ToString()].review_count = int.Parse(obj["review_count"].ToString()); users[obj["user_id"].ToString()].funny_votes = int.Parse(obj["votes"]["funny"].ToString()); users[obj["user_id"].ToString()].useful_votes = int.Parse(obj["votes"]["useful"].ToString()); users[obj["user_id"].ToString()].cool_votes = int.Parse(obj["votes"]["cool"].ToString()); } } line = reader.ReadLine(); } int z = 0; foreach (KeyValuePair<string, Business> business in businesses) { z++; //Console.WriteLine(++z); //if (z == 10) break; //KeyValuePair<string, Business> business = new KeyValuePair<string, Business>("3vKhV2ELR2hmwlnoNqYWaA", businesses["3vKhV2ELR2hmwlnoNqYWaA"]); StringBuilder sb = new StringBuilder(); sb.AppendLine("@relation reviews"); sb.AppendLine("@attribute user_id string"); sb.AppendLine("@attribute text string"); sb.AppendLine("@attribute stars numeric"); sb.AppendLine("@data"); foreach (Review review in business.Value.reviews) { sb.Append(review.user_id + ",\"" + review.text + "\"" + "," + review.stars); sb.AppendLine(""); } Instances instances = new Instances(new java.io.StringReader(sb.ToString())); //Create nominal filter for the user_id attribute StringToNominal nominalFilter = new StringToNominal(); String[] options = weka.core.Utils.splitOptions("-R first"); nominalFilter.setOptions(options); nominalFilter.setInputFormat(instances); //apply the filter instances = weka.filters.Filter.useFilter(instances, nominalFilter); //Create string to word vector filter for the text attribute StringToWordVector stwFilter = new StringToWordVector(); options = weka.core.Utils.splitOptions("-R first-last -P att_ -W 1000 -prune-rate -1.0 -T -I -N 0 -L -stemmer weka.core.stemmers.NullStemmer -M 2 -tokenizer weka.core.tokenizers.WordTokenizer -delimiters \\r\\n\\t.,;:\\\'\\\"()?!\"\""); stwFilter.setOptions(options); stwFilter.setInputFormat(instances); //apply the filter instances = weka.filters.Filter.useFilter(instances, stwFilter); SimpleKMeans kmeansClusterer = new SimpleKMeans(); options = weka.core.Utils.splitOptions("-N " + Math.Ceiling(business.Value.reviews.Count / (decimal)5).ToString() + " -A \"weka.core.EuclideanDistance -R first-last\" -I 5 -S 10"); kmeansClusterer.setOptions(options); kmeansClusterer.setPreserveInstancesOrder(true); kmeansClusterer.buildClusterer(instances); int[] assignments = kmeansClusterer.getAssignments(); for (int j = 0; j < assignments.Length; j++) { for (int k = 0; k < assignments.Length; k++) { if (j != k && assignments[j] == assignments[k]) { users[business.Value.reviews[j].user_id].friends.Add(business.Value.reviews[k].user_id); } } } /*int i = 0; foreach (int clusterNum in assignments) { string str = ""; foreach (string u in users[business.Value.reviews[i].user_id].friends) { str += u + ","; } Console.WriteLine(clusterNum + "-->" + business.Value.reviews[i].user_id + "-->" + str); i++; }*/ /*EM emClusterer = new EM(); options = weka.core.Utils.splitOptions("-I 100 -N -1 -M 1.0E-6 -S 100"); emClusterer.setOptions(options); emClusterer.buildClusterer(instances); for (int i=0;i<business.Value.reviews.Count;i++) { Console.WriteLine(emClusterer.clusterInstance(instances.instance(i)) + "-->" + business.Value.reviews[i].text); }*/ //break; } XmlDocument xmlDoc = new XmlDocument(); XmlElement usersElt = xmlDoc.CreateElement("users"); xmlDoc.AppendChild(usersElt); foreach (KeyValuePair<string, User> user in users) { if (user.Value.friends.Count == 0) continue; if (string.IsNullOrEmpty(user.Value.name)) user.Value.name = "No Name"; XmlElement userElt = xmlDoc.CreateElement("user"); //id attribute XmlAttribute idAtt = xmlDoc.CreateAttribute("id"); idAtt.Value = user.Value.user_id; userElt.Attributes.Append(idAtt); //name XmlElement nameElt = xmlDoc.CreateElement("name"); nameElt.InnerText = user.Value.name; userElt.AppendChild(nameElt); //friends XmlElement friendsElt = xmlDoc.CreateElement("friends"); foreach (string user_id in user.Value.friends) { XmlElement friendElt = xmlDoc.CreateElement("friend"); friendElt.InnerText = user_id; friendsElt.AppendChild(friendElt); } userElt.AppendChild(friendsElt); //reviews XmlElement reviewsElt = xmlDoc.CreateElement("reviews"); foreach (Review review in user.Value.reviews) { XmlElement reviewElt = xmlDoc.CreateElement("review"); //text XmlElement textElt = xmlDoc.CreateElement("text"); textElt.InnerText = review.text; reviewElt.AppendChild(textElt); //lat XmlElement latElt = xmlDoc.CreateElement("lat"); latElt.InnerText = businesses[review.business_id].latitude.ToString(); reviewElt.AppendChild(latElt); //long XmlElement longElt = xmlDoc.CreateElement("long"); longElt.InnerText = businesses[review.business_id].longitude.ToString(); reviewElt.AppendChild(longElt); reviewsElt.AppendChild(reviewElt); //break; } userElt.AppendChild(reviewsElt); usersElt.AppendChild(userElt); } StreamWriter userlistwriter = new StreamWriter(@"F:\Data Science Project\users\userlist.txt"); userlistwriter.Write("<users>"); XmlNodeList usersList = xmlDoc.DocumentElement.SelectNodes("//user"); foreach (XmlNode user in usersList) { if (string.IsNullOrEmpty(user.Attributes["id"].InnerText)) continue; StreamWriter writer = new StreamWriter(@"F:\Data Science Project\users\" + user.Attributes["id"].InnerText + ".txt"); writer.Write(user.OuterXml); writer.Close(); userlistwriter.Write("<user id=\"" + user.Attributes["id"].InnerText + "\"><name>" + user.SelectSingleNode("name").InnerText + "</name></user>"); } userlistwriter.Write("</users>"); userlistwriter.Close(); }