static void Evaluate() { if ((Male18.Count == 0 && Male25.Count == 0) || (Male25.Count == 0 && Male35.Count == 0) || (Male35.Count == 0 && Male50.Count == 0) || (Male50.Count == 0 && Male65.Count == 0)) { Console.WriteLine("Not All Age-Gender Datasets are Learnt by Model, Continue? (y/n)"); string AN = Console.ReadLine(); if (AN.ToLower() == "n") { return; } } StreamReader sr = new StreamReader("eng1/tru.txt"); while (!sr.EndOfStream) { string[] ww = sr.ReadLine().Split(':'); XmlDocument doc = new XmlDocument(); doc.Load("eng1/" + ww[0] + ".xml"); decimal c18 = 0, c25 = 0, c35 = 0, c50 = 0, c65 = 0; decimal fc18 = 0, fc25 = 0, fc35 = 0, fc50 = 0, fc65 = 0; string id = ww[0];//doc.SelectSingleNode("author").Attributes["url"].Value.ToString(); string wrd = doc.SelectSingleNode("author/documents").InnerText; string finalData = Regex.Replace(wrd, @"<.*?>", string.Empty); finalData = Regex.Replace(finalData, @"\b(?:\n|the|then|into|being|too|haven't|shouldn't|hasn't|hadn't|wasn't|can't|isn't|couldn't|shalln't|don't|there's|that's|on|which|with|-|where|is|was|were|are|of|an|also|be|there|do|did|done|so|a|on|this|these|in|from|all|to|that|but|been|as|by|if|at|have|who|it|it's|its|'s|than|such|what|and|or|for|„s|how|can|could|\r\n|\n\r|\n|\t\n|;\n\r|;\r\n|,\r\n|,\n\r|\d+|,|#|%)\b", string.Empty, RegexOptions.IgnoreCase); string d = Regex.Replace(finalData, @"[^\w\.@-]", string.Empty, RegexOptions.Compiled); string[] arr = finalData.Split('.', '?', ':', '\n').ToArray(); //string[] arr = finalData.Split(' ').ToArray(); Console.WriteLine(ww[3] + "|" + ww[6] + "|" + arr.Count().ToString()); Hashtable hs = new Hashtable(); foreach (string str in arr) { if (!hs.ContainsKey(str)) { hs.Add(str, 0); } } FillHash(hs, null, 1); foreach (DictionaryEntry str in hs) { string sts = str.Key.ToString().Trim('*', ':', ',', '.', '{', '}', '(', ')', ',', ';', '?', '!', ' ', '-', '[', ']', ' ', '\t', '\n', '\r').ToLower(); /* try * { * List<string> ab = hunspl.Stem(sts); * if (ab.Count > 1) * sts = ab.ElementAt(1).ToString(); * else if (ab.Count == 1) * sts = ab.ElementAt(0).ToString(); * * } * catch { hunspl = new NHunspell.Hunspell("en_US.aff", "en_US.dic"); } * if (sts.Length < 3) * continue; * * decimal curWeight = 0;*/ //============================================================================== if (Male18.ContainsKey(sts)) { //if ((a <= b) && (a <= c) && (a <= d) && (a <= e) && (a <= f) && (a <= g) && (a <= h) && (a <= i) && (a <= j)) { c18 += (decimal)Male18[sts]; } //break; } } if (fMale18.ContainsKey(sts)) { //if ((b <= a) && (b <= c) && (b <= d) && (b <= e) && (b <= f) && (b <= g) && (b <= h) && (b <= i) && (b <= j)) { fc18 += (decimal)fMale18[sts]; } //break; } } if (Male25.ContainsKey(sts)) { //if ((c <= a) && (c <= b) && (c <= d) && (c <= e) && (c <= f) && (c <= g) && (c <= h) && (c <= i) && (c <= j)) { c25 += (decimal)Male25[sts]; } //break; } } if (fMale25.ContainsKey(sts)) { //if ((d <= a) && (d <= b) && (d <= c) && (d <= e) && (d <= f) && (d <= g) && (d <= h) && (d <= i) && (d <= j)) { fc25 += (decimal)fMale25[sts]; } //break; } } if (Male35.ContainsKey(sts)) { //if ((e <= a) && (e <= b) && (e <= c) && (e <= d) && (e <= f) && (e <= g) && (e <= h) && (e <= i) && (e <= j)) { c35 += (decimal)Male35[sts]; } // break; } } if (fMale35.ContainsKey(sts)) { // if ((f <= a) && (f <= b) && (f <= c) && (f <= d) && (f <= e) && (f <= g) && (f <= h) && (f <= i) && (f <= j)) { fc35 += (decimal)fMale35[sts]; } // break; } } if (Male50.ContainsKey(sts)) { //if ((g <= a) && (g <= b) && (g <= c) && (g <= d) && (g <= e) && (g <= f) && (g <= h) && (g <= i) && (g <= j)) { c50 += (decimal)Male50[sts]; } //break; } } if (fMale50.ContainsKey(sts)) { //if ((h <= a) && (h <= b) && (h <= c) && (h <= d) && (h <= e) && (h <= f) && (h <= g) && (h <= i) && (h <= j)) { fc50 += (decimal)fMale50[sts]; } // break; } } if (Male65.ContainsKey(sts)) { //if ((i <= a) && (i <= b) && (i <= c) && (i <= d) && (i <= e) && (i <= f) && (i <= g) && (i <= h) && (i <= j)) { c65 += (decimal)Male65[sts]; } //break; } } if (fMale65.ContainsKey(sts)) { //if ((j <= a) && (j <= b) && (j <= c) && (j <= d) && (j <= e) && (j <= f) && (j <= g) && (j <= h) && (j <= i)) fc65 += (decimal)fMale65[sts]; } } decimal winner = 0; //bool a, b, c, d, e, f, g, h, i, j = false; string age = ""; string gen = ""; if (winner <= c18) { winner = c18; age = "18-24"; gen = "male"; } if (winner <= fc18) { winner = fc18; age = "18-24"; gen = "female"; } if (winner <= c25) { winner = c25; age = "25-34"; gen = "male"; } if (winner <= fc25) { winner = fc25; age = "25-34"; gen = "female"; } if (winner <= c35) { winner = c35; age = "35-49"; gen = "male"; } if (winner < fc35) { winner = fc35; age = "35-49"; gen = "female"; } if (winner < c50) { winner = c50; age = "50-64"; gen = "male"; } if (winner < fc50) { winner = fc50; age = "50-64"; gen = "female"; } if (winner < c65) { winner = c65; age = "65-xx"; gen = "male"; } if (winner < fc65) { winner = fc65; age = "65-xx"; gen = "female"; } XmlWriter w; w = XmlWriter.Create("Results/" + id + ".xml"); w.WriteStartDocument(); w.WriteStartElement("author"); w.WriteAttributeString("id", "{" + id + "}"); w.WriteAttributeString("type", "not relevant"); w.WriteAttributeString("lang", "en"); w.WriteAttributeString("age_group", age); w.WriteAttributeString("gender", gen); w.WriteEndElement(); w.WriteEndDocument(); w.Close(); Console.WriteLine("->" + age + " " + gen); } sr.Close(); }
static void Learn() { int cc = 0; Console.WriteLine("Enter Path to Load Files e.g. /Documents/DataFolder/truth.txt"); path = Console.ReadLine(); StreamReader sr = new StreamReader(path); while (!sr.EndOfStream) { string asa = sr.ReadLine(); string[] ww = asa.Split(':'); XmlDocument doc = new XmlDocument(); try { cc++; doc.Load("eng1/" + ww[0] + ".xml"); Console.WriteLine(cc.ToString() + "." + ww[0]); } catch { continue; } //DetermineAgeHash(ww[6], ww[3]); //var s = doc.SelectSingleNode("author/documents").Attributes[0].Value; string cDataNode = doc.SelectSingleNode("author/documents").InnerText; string finalData = Regex.Replace(cDataNode, "<.*?>", string.Empty); finalData = Regex.Replace(finalData, @"\b(?:\n|the|then|into|being|too|haven't|shouldn't|hasn't|hadn't|wasn't|can't|isn't|couldn't|shalln't|don't|there's|that's|on|which|with|-|where|is|was|were|are|of|an|also|be|there|do|did|done|so|a|on|this|these|in|from|all|to|that|but|been|as|by|if|at|have|who|it|it's|its|'s|than|such|what|and|or|for|„s|how|can|could|\r\n|\n\r|\n|\t\n|;\n\r|;\r\n|,\r\n|,\n\r|\d+|,|#|%)\b", string.Empty, RegexOptions.IgnoreCase); string d = Regex.Replace(finalData, @"[^\w\.@-]", string.Empty, RegexOptions.Compiled); string[] arr = finalData.Split('.', '?', ':', '\n').ToArray(); if (ww[6] == "18-24") { A18count++; if (ww[3] == "MALE") { Male18Count++; foreach (string s in arr) { if (s.Length > 5 && !Male18.ContainsKey(s)) { Male18.Add(s, s.Length); } } } else { fMale18Count++; foreach (string s in arr) { if (s.Length > 5 && !fMale18.ContainsKey(s)) { fMale18.Add(s, s.Length); } } } } else if (ww[6] == "25-34") { A25count++; if (ww[3] == "MALE") { Male25Count++; foreach (string s in arr) { if (s.Length > 5 && !Male25.ContainsKey(s)) { Male25.Add(s, s.Length); } } } else { fMale25Count++; foreach (string s in arr) { if (s.Length > 5 && !fMale25.ContainsKey(s)) { fMale25.Add(s, s.Length); } } } } else if (ww[6] == "35-49") { A35count++; if (ww[3] == "MALE") { Male35Count++; foreach (string s in arr) { if (s.Length > 5 && !Male35.ContainsKey(s)) { Male35.Add(s, s.Length); } } } else { fMale35Count++; foreach (string s in arr) { if (s.Length > 5 && !fMale35.ContainsKey(s)) { fMale35.Add(s, s.Length); } } } } else if (ww[6] == "50-64") { A50count++; if (ww[3] == "MALE") { Male50Count++; foreach (string s in arr) { if (s.Length > 5 && !Male50.ContainsKey(s)) { Male50.Add(s, s.Length); } } } else { fMale50Count++; foreach (string s in arr) { if (s.Length > 5 && !fMale50.ContainsKey(s)) { fMale50.Add(s, s.Length); } } } } else if (ww[6] == "65-xx") { A65count++; if (ww[3] == "MALE") { Male65Count++; foreach (string s in arr) { if (s.Length > 5 && !Male65.ContainsKey(s)) { Male65.Add(s, s.Length); } } } else { fMale65Count++; foreach (string s in arr) { if (s.Length > 5 && !fMale65.ContainsKey(s)) { fMale65.Add(s, s.Length); } } } } //CopyBack(); } // END OF WHILE sr.Close(); FillHash(Male18, "Male18", Male18Count); FillHash(fMale18, "fMale18", fMale18Count); FillHash(Male25, "Male25", Male25Count); FillHash(fMale25, "fMale25", fMale25Count); FillHash(Male35, "Male35", Male35Count); FillHash(fMale35, "fMale35", fMale35Count); FillHash(Male50, "Male50", Male50Count); FillHash(fMale50, "fMale50", fMale50Count); FillHash(Male65, "Male65", Male65Count); FillHash(fMale65, "fMale65", fMale65Count); }