public void BuildTree(string fileName, string outputDirectory = "Output") { var data = LoadDataFromfCSV(fileName); var decisionTreeName = Path.GetFileNameWithoutExtension(fileName); var algorithm = new C45Algorithm(); var fixedData = TableFixedData.FromTableData(data); if (!Directory.Exists(outputDirectory)) { Directory.CreateDirectory(outputDirectory); } var decisionalTree = algorithm.BuildConditionalTree(fixedData, new TreeOptions()); var decisionTreeRenderer = new DecisionTreeRenderer(); var bitmap = decisionTreeRenderer.RenderTree(decisionalTree, new Size(100, 50)); var decisionTreeFileName = Path.Combine(outputDirectory, string.Format("{0}.jpg", decisionTreeName)); bitmap.Save(decisionTreeFileName, ImageFormat.Jpeg); var img = new BitmapImage(new Uri(decisionTreeFileName, UriKind.RelativeOrAbsolute)); img.Freeze(); //img.EndInit(); //mainGrid.Children.Remove(tree); tree.Source = img; tree.UpdateLayout(); //tree = new Image {Source = img}; //mainGrid.Children.Add(tree); //Grid.SetRow(tree,1); //Grid.SetColumn(tree, 1); txtMessage.Text = decisionalTree.ToPseudocode(); }
private static void TestNaiveBayes() { var data = LoadDataFromfCSV("Data.csv"); var fixedData = TableFixedData.FromTableData(data); var samples = TableFixedData.ToSample(fixedData); var columnsTypes = fixedData.ColumnDataTypes; var algorithm = new NaiveBayesClassifierOld(fixedData); var algorithm1 = new NaiveBayesClassifier(samples, fixedData.ClassesValue.Length, columnsTypes); var dataRow = data.ToList()[2]; var className = algorithm.Compute(dataRow); var classId = algorithm1.Compute(fixedData.GetSample(dataRow)); var className1 = fixedData.ClassesValue[classId]; int missed = 0; for (int index = 0; index < 50; index++) { var row = data.ToList()[index]; var estimatedClassName = algorithm.Compute(row); if (estimatedClassName != row.Class) { missed++; } } }
public static void TestC45() { var data = LoadDataFromfCSV("Data2.csv"); var algorithm = new C45Algorithm(); var fixedData = TableFixedData.FromTableData(data); var decisionalTree = algorithm.BuildConditionalTree(fixedData, new TreeOptions()); var decisionTreeRenderer = new DecisionTreeRenderer(); var bitmap = decisionTreeRenderer.RenderTree(decisionalTree, new Size(100, 50)); bitmap.Save("test.jpg", System.Drawing.Imaging.ImageFormat.Jpeg); var pseudocode = decisionalTree.ToPseudocode(); using (var textString = new StreamReader(@"C:\Research\Kaggle\Expedia\trainData\train.csv")) { while (!textString.EndOfStream) { var line = textString.ReadLine(); } } }
public static void TestData() { var data = Program.LoadDataFromfCSV(@"C:\Work\NLP\train.csv", ","); var fixedData = TableFixedData.FromTableData(data); var dt = DateTime.Now; var attributes = fixedData.Attributes.Select((item, index) => index) .Where(item => !fixedData.IsClassAttribute(item) && fixedData.Attributes[item].ToLower() != "id") .ToArray(); RandomForestAlgorithm rfa = new RandomForestAlgorithm(70); var forest = rfa.BuildForest(fixedData, new TreeOptions { MaxTreeDepth = 10 }, attributes); //var ret4 = forest.Compute(data[0]); //var ret3 = forest.GetClass(data[0]); var ts = DateTime.Now.Subtract(dt); Console.WriteLine(ts.TotalMilliseconds); GC.Collect(); var testData = Program.LoadDataFromfCSV(@"C:\Work\NLP\test.csv", ","); var configuration = new CsvConfiguration(); configuration.Delimiter = ","; configuration.HasExcelSeparator = false; configuration.IgnoreQuotes = true; configuration.HasHeaderRecord = true; configuration.QuoteNoFields = true; using (var writer = new StreamWriter("test.csv")) { using (var csvWriter = new CsvWriter(writer)) { foreach (var testItem in testData) { var classId = forest.Compute(testItem); csvWriter.WriteField(classId[0]); csvWriter.NextRecord(); } } } }
private static void Main(string[] args) { ExpediaReader reader = new ExpediaReader(); var dateTime = DateTime.Parse("9/3/2015 5:09:54 PM"); TestC45(); TestSantander.TestData(); //Tools.Test(); var data = LoadDataFromfCSV("Data.csv"); // TestNaiveBayes(); var algorithm = new C45Algorithm(); var fixedData = TableFixedData.FromTableData(data); // var val = algorithm.ComputeGain(data, "Attribute1"); double splitValue; var delta = 0.0; var watch1 = new Stopwatch(); var watch2 = new Stopwatch(); var decisionalTree = algorithm.BuildConditionalTree(fixedData, new TreeOptions()); var randomForestAlgorithm = new RandomForestAlgorithm(70, null); var forest = randomForestAlgorithm.BuildForest(fixedData); for (int index = 0; index < 50; index++) { //var className1 = ret1.GetClass(data.ToList()[8]); var row = data.ToList()[index]; var estimatedClassName = decisionalTree.GetClass(row); var result = decisionalTree.Compute(row); var result2 = forest.Compute(row); var classsForest = fixedData.ClassesValue[forest.GetClass(row)]; if (estimatedClassName != row.Class || classsForest != row.Class) { // missed++; } } for (int index = 0; index < 100; index++) { watch1.Start(); var ret = algorithm.BuildConditionalTree(data, new TreeOptions()); watch1.Stop(); watch2.Start(); var ret1 = algorithm.BuildConditionalTree(fixedData, new TreeOptions()); watch2.Stop(); //var className = ret.GetClass(data.ToList()[8]); var className1 = ret1.GetClass(data.ToList()[8]); } delta = watch1.Elapsed.Subtract(watch2.Elapsed).TotalMilliseconds; }
public DecisionTree BuildConditionalTree(ITableData data, TreeOptions options) { var ret = new C45AlgorithmDataOptimized(TableFixedData.FromTableData(data), options); return(ret.BuildConditionalTree()); }