Ejemplo n.º 1
0
        static void Evaluate()
        {
            if ((Male18.Count == 0 && Male25.Count == 0) || (Male25.Count == 0 && Male35.Count == 0) || (Male35.Count == 0 && Male50.Count == 0) || (Male50.Count == 0 && Male65.Count == 0))
            {
                Console.WriteLine("Not All Age-Gender Datasets are Learnt by Model, Continue? (y/n)");
                string AN = Console.ReadLine();
                if (AN.ToLower() == "n")
                {
                    return;
                }
            }
            StreamReader sr = new StreamReader("eng1/tru.txt");

            while (!sr.EndOfStream)
            {
                string[] ww = sr.ReadLine().Split(':');

                XmlDocument doc = new XmlDocument();
                doc.Load("eng1/" + ww[0] + ".xml");

                decimal c18 = 0, c25 = 0, c35 = 0, c50 = 0, c65 = 0;
                decimal fc18 = 0, fc25 = 0, fc35 = 0, fc50 = 0, fc65 = 0;


                string id        = ww[0];//doc.SelectSingleNode("author").Attributes["url"].Value.ToString();
                string wrd       = doc.SelectSingleNode("author/documents").InnerText;
                string finalData = Regex.Replace(wrd, @"<.*?>", string.Empty);
                finalData = Regex.Replace(finalData, @"\b(?:\n|the|then|into|being|too|haven't|shouldn't|hasn't|hadn't|wasn't|can't|isn't|couldn't|shalln't|don't|there's|that's|on|which|with|-|where|is|was|were|are|of|an|also|be|there|do|did|done|so|a|on|this|these|in|from|all|to|that|but|been|as|by|if|at|have|who|it|it's|its|'s|than|such|what|and|or|for|„s|how|can|could|\r\n|\n\r|\n|\t\n|;\n\r|;\r\n|,\r\n|,\n\r|\d+|,|#|%)\b", string.Empty, RegexOptions.IgnoreCase);
                string   d   = Regex.Replace(finalData, @"[^\w\.@-]", string.Empty, RegexOptions.Compiled);
                string[] arr = finalData.Split('.', '?', ':', '\n').ToArray();
                //string[] arr = finalData.Split(' ').ToArray();
                Console.WriteLine(ww[3] + "|" + ww[6] + "|" + arr.Count().ToString());
                Hashtable hs = new Hashtable();
                foreach (string str in arr)
                {
                    if (!hs.ContainsKey(str))
                    {
                        hs.Add(str, 0);
                    }
                }

                FillHash(hs, null, 1);
                foreach (DictionaryEntry str in hs)
                {
                    string sts = str.Key.ToString().Trim('*', ':', ',', '.', '{', '}', '(', ')', ',', ';', '?', '!', ' ', '-', '[', ']', ' ', '\t', '\n', '\r').ToLower();

                    /* try
                     * {
                     *    List<string> ab = hunspl.Stem(sts);
                     *    if (ab.Count > 1)
                     *        sts = ab.ElementAt(1).ToString();
                     *    else if (ab.Count == 1)
                     *        sts = ab.ElementAt(0).ToString();
                     *
                     * }
                     * catch { hunspl = new NHunspell.Hunspell("en_US.aff", "en_US.dic"); }
                     * if (sts.Length < 3)
                     *   continue;
                     *
                     * decimal curWeight = 0;*/


                    //==============================================================================
                    if (Male18.ContainsKey(sts))
                    {
                        //if ((a <= b) && (a <= c) && (a <= d) && (a <= e) && (a <= f) && (a <= g) && (a <= h) && (a <= i) && (a <= j))
                        { c18 += (decimal)Male18[sts]; }    //break; }
                    }

                    if (fMale18.ContainsKey(sts))
                    {
                        //if ((b <= a) && (b <= c) && (b <= d) && (b <= e) && (b <= f) && (b <= g) && (b <= h) && (b <= i) && (b <= j))
                        { fc18 += (decimal)fMale18[sts]; }    //break; }
                    }

                    if (Male25.ContainsKey(sts))
                    {
                        //if ((c <= a) && (c <= b) && (c <= d) && (c <= e) && (c <= f) && (c <= g) && (c <= h) && (c <= i) && (c <= j))
                        { c25 += (decimal)Male25[sts]; }    //break; }
                    }

                    if (fMale25.ContainsKey(sts))
                    {
                        //if ((d <= a) && (d <= b) && (d <= c) && (d <= e) && (d <= f) && (d <= g) && (d <= h) && (d <= i) && (d <= j))
                        { fc25 += (decimal)fMale25[sts]; }    //break; }
                    }

                    if (Male35.ContainsKey(sts))
                    {
                        //if ((e <= a) && (e <= b) && (e <= c) && (e <= d) && (e <= f) && (e <= g) && (e <= h) && (e <= i) && (e <= j))
                        { c35 += (decimal)Male35[sts]; }    // break; }
                    }

                    if (fMale35.ContainsKey(sts))
                    {
                        // if ((f <= a) && (f <= b) && (f <= c) && (f <= d) && (f <= e) && (f <= g) && (f <= h) && (f <= i) && (f <= j))
                        { fc35 += (decimal)fMale35[sts]; }    // break; }
                    }

                    if (Male50.ContainsKey(sts))
                    {
                        //if ((g <= a) && (g <= b) && (g <= c) && (g <= d) && (g <= e) && (g <= f) && (g <= h) && (g <= i) && (g <= j))
                        { c50 += (decimal)Male50[sts]; }    //break; }
                    }

                    if (fMale50.ContainsKey(sts))
                    {
                        //if ((h <= a) && (h <= b) && (h <= c) && (h <= d) && (h <= e) && (h <= f) && (h <= g) && (h <= i) && (h <= j))
                        { fc50 += (decimal)fMale50[sts]; }    // break; }
                    }

                    if (Male65.ContainsKey(sts))
                    {
                        //if ((i <= a) && (i <= b) && (i <= c) && (i <= d) && (i <= e) && (i <= f) && (i <= g) && (i <= h) && (i <= j))
                        { c65 += (decimal)Male65[sts]; }    //break; }
                    }

                    if (fMale65.ContainsKey(sts))
                    {
                        //if ((j <= a) && (j <= b) && (j <= c) && (j <= d) && (j <= e) && (j <= f) && (j <= g) && (j <= h) && (j <= i))
                        fc65 += (decimal)fMale65[sts];
                    }
                }

                decimal winner = 0; //bool a, b, c, d, e, f, g, h, i, j = false;
                string  age = ""; string gen = "";
                if (winner <= c18)
                {
                    winner = c18; age = "18-24"; gen = "male";
                }
                if (winner <= fc18)
                {
                    winner = fc18; age = "18-24"; gen = "female";
                }

                if (winner <= c25)
                {
                    winner = c25; age = "25-34"; gen = "male";
                }
                if (winner <= fc25)
                {
                    winner = fc25; age = "25-34"; gen = "female";
                }

                if (winner <= c35)
                {
                    winner = c35; age = "35-49"; gen = "male";
                }
                if (winner < fc35)
                {
                    winner = fc35; age = "35-49"; gen = "female";
                }

                if (winner < c50)
                {
                    winner = c50; age = "50-64"; gen = "male";
                }
                if (winner < fc50)
                {
                    winner = fc50; age = "50-64"; gen = "female";
                }

                if (winner < c65)
                {
                    winner = c65; age = "65-xx"; gen = "male";
                }
                if (winner < fc65)
                {
                    winner = fc65; age = "65-xx"; gen = "female";
                }

                XmlWriter w;
                w = XmlWriter.Create("Results/" + id + ".xml");
                w.WriteStartDocument();
                w.WriteStartElement("author");
                w.WriteAttributeString("id", "{" + id + "}");
                w.WriteAttributeString("type", "not relevant");
                w.WriteAttributeString("lang", "en");
                w.WriteAttributeString("age_group", age);
                w.WriteAttributeString("gender", gen);
                w.WriteEndElement();
                w.WriteEndDocument();
                w.Close();

                Console.WriteLine("->" + age + " " + gen);
            }
            sr.Close();
        }
Ejemplo n.º 2
0
        static void Learn()
        {
            int cc = 0;

            Console.WriteLine("Enter Path to Load Files e.g. /Documents/DataFolder/truth.txt");

            path = Console.ReadLine();
            StreamReader sr = new StreamReader(path);

            while (!sr.EndOfStream)
            {
                string   asa = sr.ReadLine();
                string[] ww  = asa.Split(':');

                XmlDocument doc = new XmlDocument();
                try
                {
                    cc++;
                    doc.Load("eng1/" + ww[0] + ".xml");
                    Console.WriteLine(cc.ToString() + "." + ww[0]);
                }
                catch { continue; }
                //DetermineAgeHash(ww[6], ww[3]);

                //var s = doc.SelectSingleNode("author/documents").Attributes[0].Value;
                string cDataNode = doc.SelectSingleNode("author/documents").InnerText;
                string finalData = Regex.Replace(cDataNode, "<.*?>", string.Empty);
                finalData = Regex.Replace(finalData, @"\b(?:\n|the|then|into|being|too|haven't|shouldn't|hasn't|hadn't|wasn't|can't|isn't|couldn't|shalln't|don't|there's|that's|on|which|with|-|where|is|was|were|are|of|an|also|be|there|do|did|done|so|a|on|this|these|in|from|all|to|that|but|been|as|by|if|at|have|who|it|it's|its|'s|than|such|what|and|or|for|„s|how|can|could|\r\n|\n\r|\n|\t\n|;\n\r|;\r\n|,\r\n|,\n\r|\d+|,|#|%)\b", string.Empty, RegexOptions.IgnoreCase);

                string   d   = Regex.Replace(finalData, @"[^\w\.@-]", string.Empty, RegexOptions.Compiled);
                string[] arr = finalData.Split('.', '?', ':', '\n').ToArray();

                if (ww[6] == "18-24")
                {
                    A18count++;
                    if (ww[3] == "MALE")
                    {
                        Male18Count++;
                        foreach (string s in arr)
                        {
                            if (s.Length > 5 && !Male18.ContainsKey(s))
                            {
                                Male18.Add(s, s.Length);
                            }
                        }
                    }
                    else
                    {
                        fMale18Count++;
                        foreach (string s in arr)
                        {
                            if (s.Length > 5 && !fMale18.ContainsKey(s))
                            {
                                fMale18.Add(s, s.Length);
                            }
                        }
                    }
                }
                else if (ww[6] == "25-34")
                {
                    A25count++;
                    if (ww[3] == "MALE")
                    {
                        Male25Count++;
                        foreach (string s in arr)
                        {
                            if (s.Length > 5 && !Male25.ContainsKey(s))
                            {
                                Male25.Add(s, s.Length);
                            }
                        }
                    }
                    else
                    {
                        fMale25Count++;
                        foreach (string s in arr)
                        {
                            if (s.Length > 5 && !fMale25.ContainsKey(s))
                            {
                                fMale25.Add(s, s.Length);
                            }
                        }
                    }
                }
                else if (ww[6] == "35-49")
                {
                    A35count++;
                    if (ww[3] == "MALE")
                    {
                        Male35Count++;
                        foreach (string s in arr)
                        {
                            if (s.Length > 5 && !Male35.ContainsKey(s))
                            {
                                Male35.Add(s, s.Length);
                            }
                        }
                    }
                    else
                    {
                        fMale35Count++;
                        foreach (string s in arr)
                        {
                            if (s.Length > 5 && !fMale35.ContainsKey(s))
                            {
                                fMale35.Add(s, s.Length);
                            }
                        }
                    }
                }
                else if (ww[6] == "50-64")
                {
                    A50count++;
                    if (ww[3] == "MALE")
                    {
                        Male50Count++;
                        foreach (string s in arr)
                        {
                            if (s.Length > 5 && !Male50.ContainsKey(s))
                            {
                                Male50.Add(s, s.Length);
                            }
                        }
                    }
                    else
                    {
                        fMale50Count++;
                        foreach (string s in arr)
                        {
                            if (s.Length > 5 && !fMale50.ContainsKey(s))
                            {
                                fMale50.Add(s, s.Length);
                            }
                        }
                    }
                }
                else if (ww[6] == "65-xx")
                {
                    A65count++;
                    if (ww[3] == "MALE")
                    {
                        Male65Count++;
                        foreach (string s in arr)
                        {
                            if (s.Length > 5 && !Male65.ContainsKey(s))
                            {
                                Male65.Add(s, s.Length);
                            }
                        }
                    }
                    else
                    {
                        fMale65Count++;
                        foreach (string s in arr)
                        {
                            if (s.Length > 5 && !fMale65.ContainsKey(s))
                            {
                                fMale65.Add(s, s.Length);
                            }
                        }
                    }
                }


                //CopyBack();
            } // END OF WHILE
            sr.Close();
            FillHash(Male18, "Male18", Male18Count);
            FillHash(fMale18, "fMale18", fMale18Count);
            FillHash(Male25, "Male25", Male25Count);
            FillHash(fMale25, "fMale25", fMale25Count);
            FillHash(Male35, "Male35", Male35Count);
            FillHash(fMale35, "fMale35", fMale35Count);
            FillHash(Male50, "Male50", Male50Count);
            FillHash(fMale50, "fMale50", fMale50Count);
            FillHash(Male65, "Male65", Male65Count);
            FillHash(fMale65, "fMale65", fMale65Count);
        }