public static string[] NGramify(string line, int n = 2) { List <string> ngrams = new List <string>(); //clean char[] trimchars = { ',' }; line = line.ToLower().Trim(trimchars).Trim(); line = EkwOAnalyze.CleanWord(line); //notStopWord foreach (string stopword in stopWords) { line = line.Replace(stopword, ""); } //neg line = line.Replace(" not ", "+not not+"); line = line.Replace(" no ", "+no no+"); char[] space = { ' ' }; string[] splits = line.Split(space, StringSplitOptions.RemoveEmptyEntries); for (int i = 1; i < splits.Length; i++) { string gram = splits[i - 1] + " " + splits[i]; gram = gram.Replace("+not not+", " not "); gram = gram.Replace("+no no+", " no "); gram = gram.Replace("+", " "); ngrams.Add(gram); } //for (int i = 0; i < splits.Length; i++) //{ // string gram = splits[i]; // gram = gram.Replace("+", " "); // ngrams.Add(gram); //} return(ngrams.ToArray()); }
static void Main(string[] args) { // DataTable table = new DataTable(); // table.Columns.Add("Sex"); // table.Columns.Add("Height", typeof(double)); // table.Columns.Add("Weight", typeof(double)); // table.Columns.Add("FootSize", typeof(double)); // //training data. // table.Rows.Add("male", 6, 180, 12); // table.Rows.Add("male", 5.92, 190, 11); // table.Rows.Add("male", 5.58, 170, 12); // table.Rows.Add("male", 5.92, 165, 10); // table.Rows.Add("female", 5, 100, 6); // table.Rows.Add("female", 5.5, 150, 8); // table.Rows.Add("female", 5.42, 130, 7); // table.Rows.Add("female", 5.75, 150, 9); // table.Rows.Add("transgender", 4, 200, 5); // table.Rows.Add("transgender", 4.10, 150, 8); // table.Rows.Add("transgender", 5.42, 190, 7); // table.Rows.Add("transgender", 5.50, 150, 9); //Classifier classifier = new Classifier(); //classifier.TrainClassifier(table); //classifier.Store("gender.csv"); //classifier.load("gender.csv"); /* * Classifier classifier = new Classifier(); * * classifier.TrainClassifier( * SadHappyObj("sads.vcb-labelled", * "joys.vcb-labelled", * "obj.vcb-labelled", * 10000) * ); * classifier.Store("sentiobj.csv"); */ //classifier.load("senti.csv"); //Anlyze("sads.vcb-labelled", // "joys.vcb-labelled", // 10000 // ); //Test("test.vcb-labelled", "POS.test.report.csv", 10000); //Console.WriteLine(classifier.Classify(new double[] { 6, 130, 10})); //string[] ng = NGramClassifier.NGramify("I do not like fish"); //NGramClassifier.printArrayOfStrings(ng); //NGramClassifier ngc = new NGramClassifier(); //ngc.recordLine("I do not like fish", -1); //ngc.recordLine("I hate fish", -1); //ngc.recordLine("I love fish", +1); //ngc.recordLine("We are human being", 0); //NGCTrain("sads.txt", "joys.txt", "obj.txt", 10000); //return; //NGCTest("test.txt", "NGC.test.repo.csv", 10000); //BothTest("test.vcb-labelled", "NGC+POS.test.report.csv", 10000); /******* EkwOG *****************/ EkwOAnalyze EkwOA = new EkwOAnalyze(); //EkwOA.TEkwP("obj.vcb-labelled", "TEkwP_small", 111); //EkwOA.TEkwP("sample.undup.vcb-labelled", "TEkwP_sample", -1); //EkwOA.genEkwBigraph("TEkwP_small.xml"); //EkwOA.genEkwBigraph("TEkwP_sample.xml"); //EkwOA.genEEgraph("sample", 350, 2); string basicFile = "xob/sample"; EkwOAnalyze.AllNounsEntity = true; EkwOAnalyze.UsersEntity = true; //EkwOAnalyze.RemoveDupLines(basicFile + ".vcb-labelled", basicFile + ".undup.vcb-labelled"); //EkwOA.TEkwP(basicFile + ".undup.vcb-labelled", basicFile + "_TEkwP", -1); EkwOA.genEkwBigraph(basicFile + "_TEkwP.xml", basicFile); EkwOA.genEEgraph(basicFile, 450, 2, 0.35, false); EkwOA.BFS(basicFile + ".E.bfs.csv"); EkwOA.BFS_Image(basicFile + ".png", 1); EkwOA.Communitize(basicFile + "_Communities.xml", 0.0, 1); Console.WriteLine("final kw count: " + EkwOA.FinalKW(basicFile)); //EkwOA.BFS_Image(basicFile + ".All.png", -1); //Console.WriteLine( int.MaxValue ); //OpinGram("bankrupt"); //Console.Read(); }