Esempio n. 1
0
        public static string[] NGramify(string line, int n = 2)
        {
            List <string> ngrams = new List <string>();

            //clean
            char[] trimchars = { ',' };
            line = line.ToLower().Trim(trimchars).Trim();
            line = EkwOAnalyze.CleanWord(line);

            //notStopWord
            foreach (string stopword in stopWords)
            {
                line = line.Replace(stopword, "");
            }

            //neg
            line = line.Replace(" not ", "+not not+");
            line = line.Replace(" no ", "+no no+");

            char[]   space  = { ' ' };
            string[] splits = line.Split(space, StringSplitOptions.RemoveEmptyEntries);

            for (int i = 1; i < splits.Length; i++)
            {
                string gram = splits[i - 1] + " " + splits[i];
                gram = gram.Replace("+not not+", " not ");
                gram = gram.Replace("+no no+", " no ");
                gram = gram.Replace("+", " ");
                ngrams.Add(gram);
            }

            //for (int i = 0; i < splits.Length; i++)
            //{
            //    string gram = splits[i];
            //    gram = gram.Replace("+", " ");
            //    ngrams.Add(gram);
            //}

            return(ngrams.ToArray());
        }
Esempio n. 2
0
        static void Main(string[] args)
        {
            //    DataTable table = new DataTable();
            //    table.Columns.Add("Sex");
            //    table.Columns.Add("Height", typeof(double));
            //    table.Columns.Add("Weight", typeof(double));
            //    table.Columns.Add("FootSize", typeof(double));

            //    //training data.
            //    table.Rows.Add("male", 6, 180, 12);
            //    table.Rows.Add("male", 5.92, 190, 11);
            //    table.Rows.Add("male", 5.58, 170, 12);
            //    table.Rows.Add("male", 5.92, 165, 10);

            //    table.Rows.Add("female", 5, 100, 6);
            //    table.Rows.Add("female", 5.5, 150, 8);
            //    table.Rows.Add("female", 5.42, 130, 7);
            //    table.Rows.Add("female", 5.75, 150, 9);

            //    table.Rows.Add("transgender", 4, 200, 5);
            //    table.Rows.Add("transgender", 4.10, 150, 8);
            //    table.Rows.Add("transgender", 5.42, 190, 7);
            //    table.Rows.Add("transgender", 5.50, 150, 9);


            //Classifier classifier = new Classifier();
            //classifier.TrainClassifier(table);
            //classifier.Store("gender.csv");
            //classifier.load("gender.csv");

            /*
             * Classifier classifier = new Classifier();
             *
             * classifier.TrainClassifier(
             *      SadHappyObj("sads.vcb-labelled",
             *      "joys.vcb-labelled",
             *      "obj.vcb-labelled",
             *      10000)
             *  );
             * classifier.Store("sentiobj.csv");
             */
            //classifier.load("senti.csv");

            //Anlyze("sads.vcb-labelled",
            //        "joys.vcb-labelled",
            //        10000
            //    );

            //Test("test.vcb-labelled", "POS.test.report.csv", 10000);

            //Console.WriteLine(classifier.Classify(new double[] { 6, 130, 10}));

            //string[] ng = NGramClassifier.NGramify("I do not like fish");

            //NGramClassifier.printArrayOfStrings(ng);

            //NGramClassifier ngc = new NGramClassifier();

            //ngc.recordLine("I do not like fish", -1);
            //ngc.recordLine("I hate fish", -1);
            //ngc.recordLine("I love fish", +1);
            //ngc.recordLine("We are human being", 0);

            //NGCTrain("sads.txt", "joys.txt", "obj.txt", 10000);
            //return;

            //NGCTest("test.txt", "NGC.test.repo.csv", 10000);

            //BothTest("test.vcb-labelled", "NGC+POS.test.report.csv", 10000);

            /******* EkwOG *****************/
            EkwOAnalyze EkwOA = new EkwOAnalyze();

            //EkwOA.TEkwP("obj.vcb-labelled", "TEkwP_small", 111);
            //EkwOA.TEkwP("sample.undup.vcb-labelled", "TEkwP_sample", -1);
            //EkwOA.genEkwBigraph("TEkwP_small.xml");
            //EkwOA.genEkwBigraph("TEkwP_sample.xml");
            //EkwOA.genEEgraph("sample", 350, 2);

            string basicFile = "xob/sample";

            EkwOAnalyze.AllNounsEntity = true;
            EkwOAnalyze.UsersEntity    = true;
            //EkwOAnalyze.RemoveDupLines(basicFile + ".vcb-labelled", basicFile + ".undup.vcb-labelled");
            //EkwOA.TEkwP(basicFile + ".undup.vcb-labelled", basicFile + "_TEkwP", -1);
            EkwOA.genEkwBigraph(basicFile + "_TEkwP.xml", basicFile);
            EkwOA.genEEgraph(basicFile, 450, 2, 0.35, false);
            EkwOA.BFS(basicFile + ".E.bfs.csv");
            EkwOA.BFS_Image(basicFile + ".png", 1);
            EkwOA.Communitize(basicFile + "_Communities.xml", 0.0, 1);
            Console.WriteLine("final kw count: " + EkwOA.FinalKW(basicFile));
            //EkwOA.BFS_Image(basicFile + ".All.png", -1);

            //Console.WriteLine( int.MaxValue );
            //OpinGram("bankrupt");

            //Console.Read();
        }