public string knn(DataTable tbl) { Codification codebook = new Codification(tbl); DataTable symbols = codebook.Apply(tbl); double[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses").ToDouble(); int sayac = 0; int[] outputs = symbols.ToIntArray("Class").GetColumn(0); KNearestNeighbors knn = new KNearestNeighbors(k: 4, classes: 2, inputs: inputs, outputs: outputs); int answer = knn.Compute(new double[] { Convert.ToInt32(inputlar[0]), Convert.ToInt32(inputlar[1]), Convert.ToInt32(inputlar[2]), Convert.ToInt32(inputlar[3]), Convert.ToInt32(inputlar[4]), Convert.ToInt32(inputlar[5]), Convert.ToInt32(inputlar[6]), Convert.ToInt32(inputlar[7]), Convert.ToInt32(inputlar[8]) }); // answer will be 2. if (answer == 0) { answer = 4; } else { answer = 2; } return(answer.ToString()); }
public string predict(string p1, string p2, string p3, string p4, string p5, string p6, string p7, string p8, string p9) { SqlCommand cmd = new SqlCommand(); cmd.CommandText = "select * from Dataset_tbl"; DataTable dt = db.get(cmd); //Codification codebook = new Codification(ds.Tables[0], "Par1", "Par2", "Par3", "Par4", "Par5", "Par6", "Par7", "Par8", "Par9", "Par10", "Par11", "Par12", "Par13", "Par14", "Par15", "Par16", "Par17", "Par18", "Par19", "Par20", "Par21", "Res"); Codification codebook = new Codification(dt, "p1", "p2", "p3", "p4", "p5", "p6", "result"); //DataTable symbols = codebook.Apply(ds.Tables[0]); DataTable symbols = codebook.Apply(dt); double[][] inputs = symbols.ToArray <double>("p1", "p2", "p3", "p4", "p5", "p6"); int[] outputs = symbols.ToArray <int>("result"); int K = 1; try { int[] sample = codebook.Translate(p1, p2, p3, p4, p5, p6); //int[] sample = new int[] { int.Parse(p1), int.Parse(p2), int.Parse(p3) }; int classCount = 1; // 3 possible output values KNearestNeighbors knn = new KNearestNeighbors(k: K, classes: 2, inputs: inputs, outputs: outputs); double[] doubleArray = Array.ConvertAll(sample, x => (double)x); answer = codebook.Translate("result", knn.Compute(doubleArray)); } catch { answer = "Nill"; } return(answer); }
public void Run() { // The k-Nearest Neighbors algorithm can be used with // any kind of data. In this example, we will see how // it can be used to compare, for example, Strings. string[] inputs = { "Car", // class 0 "Bar", // class 0 "Jar", // class 0 "Charm", // class 1 "Chair" // class 1 }; int[] outputs = { 0, 0, 0, // First three are from class 0 1, 1, // And next two are from class 1 }; // Now we will create the K-Nearest Neighbors algorithm. For this // example, we will be choosing k = 1. This means that, for a given // instance, only its nearest neighbor will be used to cast a new // decision. // In order to compare strings, we will be using Levenshtein's string distance KNearestNeighbors<string> knn = new KNearestNeighbors<string>(k: 1, classes: 2, inputs: inputs, outputs: outputs, distance: Distance.Levenshtein); // After the algorithm has been created, we can use it: int answer = knn.Compute("Chars"); // answer should be 1. }
private static void meh() { string[] delimiters = { "," }; string[] fields; TextFieldParser tfp; tfp = new TextFieldParser(""); tfp.HasFieldsEnclosedInQuotes = true; tfp.Delimiters = delimiters; while (!tfp.EndOfData) { fields = tfp.ReadFields(); } tfp.Close(); // The k-Nearest Neighbors algorithm can be used with // any kind of data. In this example, we will see how // it can be used to compare, for example, Strings. string[] inputs = { "Car", // class 0 "Bar", // class 0 "Jar", // class 0 "Charm", // class 1 "Chair" // class 1 }; int[] outputs = { 0, 0, 0, // First three are from class 0 1, 1, // And next two are from class 1 }; // Now we will create the K-Nearest Neighbors algorithm. For this // example, we will be choosing k = 1. This means that, for a given // instance, only its nearest neighbor will be used to cast a new // decision. // In order to compare strings, we will be using Levenshtein's string distance KNearestNeighbors <string> knn = new KNearestNeighbors <string>(k: 1, classes: 2, inputs: inputs, outputs: outputs, distance: Distance.Levenshtein); // After the algorithm has been created, we can use it: int answer = knn.Compute("Chars"); // answer should be 1. }
public LasPoint.ClassificationType[] Classify(LasFile file) { var sw = Stopwatch.StartNew(); LasPointDataRecords points = file.LasPointDataRecords; int pointsCount = points.Count(); LasPoint.ClassificationType[] output = new LasPoint.ClassificationType[pointsCount]; Statistics stats = new Statistics(); stats.Count = pointsCount; OpenTK.Vector3[] slopeVector = new OpenTK.Vector3[pointsCount]; Parallel.For(0, pointsCount, (i) => { slopeVector[i] = LinearRegression.ComputeRegressionPoint(file, points[i], regressionCount, regressionRange); if (i % 1000 == 0) { Console.WriteLine("ComputeRegression " + i); } }); for (int i = 0; i < pointsCount; i++) { LasPoint3Short point = (LasPoint3Short)points[i]; double distanceFromPlane = Utills.DistanceFromPlane(point, slopeVector[i]); double green = point.Green - (point.Red + point.Blue) / 2; output[i] = Utills.ClassificationClasses[knn.Compute(new double[] { green, file.LasHeader.ScaleZ(point.Z), point.Intensity, slopeVector[i].X, slopeVector[i].Y, slopeVector[i].Z, distanceFromPlane })]; if (output[i] != points[i].Classification) { stats.ClassErrors[(int)points[i].Classification]++; } stats.ClassCount[(int)output[i]]++; stats.ClassRealCount[(int)points[i].Classification]++; stats.PredictionMatrix[(int)points[i].Classification, (int)output[i]]++; if (i % 1000 == 0) { Console.WriteLine(i); } } Console.Write(stats.ToString()); sw.Stop(); Console.WriteLine("Czas trwania [" + sw.Elapsed.TotalSeconds.ToString() + "s]"); stats.SaveMatrixAsCSV(); return(output); }
public void KNearestNeighborConstructorTest2() { // Create some sample learning data. In this data, // the first two instances belong to a class, the // four next belong to another class and the last // three to yet another. double[][] inputs = { // The first two are from class 0 new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, // The next four are from class 1 new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, // The last three are from class 2 new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; int[] outputs = { 0, 0, // First two from class 0 1, 1, 1, 1, // Next four from class 1 2, 2, 2 // Last three from class 2 }; // Now we will create the K-Nearest Neighbors algorithm. For this // example, we will be choosing k = 4. This means that, for a given // instance, its nearest 4 neighbors will be used to cast a decision. KNearestNeighbors knn = new KNearestNeighbors(k: 4, classes: 3, inputs: inputs, outputs: outputs); // After the algorithm has been created, we can classify a new instance: int answer = knn.Compute(new double[] { 11, 5, 4 }); // answer will be 2. Assert.AreEqual(2, answer); }
public override ConfusionMatrix Execute() { //Create a knn classifer with 2 classes var knn = new KNearestNeighbors(k: k, classes: 2, inputs: trainingSet, outputs: trainingOutput); //Map the classifier over the test set //This wil return an array where index i is the classificatioon of the i-th vector //of the testSet var predicted = AlgorithmHelpers .MergeArrays(trainingSet, testSet) .Select(x => knn.Compute(x)) .ToArray(); //Create a new confusion matrix with the calculated parameters var cmatrix = new ConfusionMatrix(predicted, AlgorithmHelpers.MergeArrays(trainingOutput, expected), POSITIVE, NEGATIVE); return cmatrix; }
public void Run() { // Create some sample learning data. In this data, // the first two instances belong to a class, the // four next belong to another class and the last // three to yet another. double[][] inputs = { // The first two are from class 0 new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, // The next four are from class 1 new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, // The last three are from class 2 new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; int[] outputs = { 0, 0, // First two from class 0 1, 1, 1, 1, // Next four from class 1 2, 2, 2 // Last three from class 2 }; // Now we will create the K-Nearest Neighbors algorithm. For this // example, we will be choosing k = 4. This means that, for a given // instance, its nearest 4 neighbors will be used to cast a decision. KNearestNeighbors knn = new KNearestNeighbors(k: 4, classes: 3, inputs: inputs, outputs: outputs); // After the algorithm has been created, we can classify a new instance: int answer = knn.Compute(new double[] { 11, 5, 4 }); // answer will be 2. }
public LasPoint.ClassificationType[] Classify(LasFile file) { var sw = Stopwatch.StartNew(); LasPointDataRecords points = file.LasPointDataRecords; int pointsCount = points.Count(); LasPoint.ClassificationType[] output = new LasPoint.ClassificationType[pointsCount]; Statistics stats = new Statistics(); stats.Count = pointsCount; for (int i = 0; i < pointsCount; i++) { LasPoint3Short point = (LasPoint3Short)points[i]; double green = point.Green - (point.Red + point.Blue) / 2; output[i] = Utills.ClassificationClasses[knn.Compute(new double[] { file.LasHeader.ScaleZ(point.Z), point.Intensity, green })]; if (output[i] != points[i].Classification) { stats.ClassErrors[(int)points[i].Classification]++; } stats.ClassCount[(int)output[i]]++; stats.ClassRealCount[(int)points[i].Classification]++; stats.PredictionMatrix[(int)points[i].Classification, (int)output[i]]++; if (i % 1000 == 0) { Console.WriteLine(i); } } Console.Write(stats.ToString()); sw.Stop(); Console.WriteLine("Czas trwania [" + sw.Elapsed.TotalSeconds.ToString() + "s]"); stats.SaveMatrixAsCSV(); return(output); }
public override ConfusionMatrix Execute() { //Create a knn classifer with 2 classes var knn = new KNearestNeighbors(k: k, classes: 2, inputs: trainingSet, outputs: trainingOutput); //Map the classifier over the test set //This wil return an array where index i is the classificatioon of the i-th vector //of the testSet var predicted = AlgorithmHelpers .MergeArrays(trainingSet, testSet) .Select(x => knn.Compute(x)) .ToArray(); //Create a new confusion matrix with the calculated parameters var cmatrix = new ConfusionMatrix(predicted, AlgorithmHelpers.MergeArrays(trainingOutput, expected), POSITIVE, NEGATIVE); return(cmatrix); }
internal int test(Dictionary <string, double> docWordDic, Dictionary <string, int> dictionary, Dictionary <string, double> wordIDFDictionary) { double[] testFeature = new double[featureSize]; double docWordSum = 0; for (int i = 0; i < featureSize; i++) { testFeature[i] = 0; } foreach (string word in docWordDic.Keys) { docWordSum += docWordDic[word]; } foreach (string word in docWordDic.Keys) { if (dictionary.ContainsKey(word) && wordIDFDictionary.ContainsKey(word) && wordIDFDictionary[word] != 0) { testFeature[dictionary[word]] = (docWordDic[word] / docWordSum) * wordIDFDictionary[word];//TFIDF } } return(knn.Compute(testFeature)); }
public void KNearestNeighborConstructorTest3() { // The k-Nearest Neighbors algorithm can be used with // any kind of data. In this example, we will see how // it can be used to compare, for example, Strings. string[] inputs = { "Car", // class 0 "Bar", // class 0 "Jar", // class 0 "Charm", // class 1 "Chair" // class 1 }; int[] outputs = { 0, 0, 0, // First three are from class 0 1, 1, // And next two are from class 1 }; // Now we will create the K-Nearest Neighbors algorithm. For this // example, we will be choosing k = 1. This means that, for a given // instance, only its nearest neighbor will be used to cast a new // decision. // In order to compare strings, we will be using Levenshtein's string distance KNearestNeighbors <string> knn = new KNearestNeighbors <string>(k: 1, classes: 2, inputs: inputs, outputs: outputs, distance: Distance.Levenshtein); // After the algorithm has been created, we can use it: int answer = knn.Compute("Chars"); // answer should be 1. Assert.AreEqual(1, answer); }
public string Classify(double[] image) { var answer = _knn.Compute(image); return(Utilities.GetClassFromInt(answer)); }
private List<String> GetSimilaresDatabaseKNN(List<Double> descriptoresEntrada) { ModeloSimilitudEntities db=new ModeloSimilitudEntities(); Double[] vectorEntrada=descriptoresEntrada.ToArray(); vectorEntrada = Normalizar(vectorEntrada); Double[][] matriz = csvtoMatrix("descriptoresNormalizados"); int[] pertenencia=new int[matriz.Length]; for(int i=0;i<pertenencia.Length;i++){ pertenencia[i]=1; } pertenencia[23] = 2; KNearestNeighbors knn = new KNearestNeighbors(k: 10, inputs: matriz, outputs: pertenencia); int answer = knn.Compute(vectorEntrada); int[] a = new int[1]; a[0] = 1; Double[][] cercanos = knn.GetNearestNeighbors(vectorEntrada,out a); List<String> listaSimilares = new List<String>(); List<canciones> dbcanciones = db.canciones.ToList(); for (int i = 0; i < matriz.Length; i++) { if (cercanos.Contains(matriz[i])) { listaSimilares.Add(dbcanciones[i].id_spotify.Substring(14)); } } //string select="select * from canciones where energy={0} and liveness={1} and tempo={2} and speechiness={3} and acousticness={4} and loudness={5} and valence={6} and danceability={7} and instrumentalness={8} and key={9}"; //string select2 = "select * from canciones"; //for(int j=0;j<cercanos.Length;j++){ // object[] parameters = new object[10]; // for (int i = 0; i < 10; i++) // { // SqlParameter param = new SqlParameter("i", cercanos[j][i]); // parameters[i] = cercanos[j][i]; // } // var stores = db.Database.SqlQuery<canciones>(select, parameters).ToList(); // listaSimilares.Add(stores[0].id_spotify); //} return listaSimilares; }
private void browse_Click(object sender, RoutedEventArgs e) { OpenFileDialog dlg = new OpenFileDialog(); dlg.Filter = "All images|*.jpeg;*.png;*.jpg;*.bmp|JPEG Files (*.jpeg)|*.jpeg|PNG Files (*.png)|*.png|JPG Files (*.jpg)|*.jpg|Bitmap Files (*.bmp)|*.bmp"; Nullable<bool> result = dlg.ShowDialog(); if (result == true) { string filename = dlg.FileName; Uri uri = new Uri(filename); BitmapImage imgSource = new BitmapImage(uri); FileNameTextBox.Text = filename; picture.Source = imgSource; /* KNN */ List<double []> letters = new List<double[]>(); List<int> outputList = new List<int>(); List<string> indexes = new List<string>(); foreach (string letterPath in dictionary) { FileInfo file = new FileInfo(letterPath); string nameClean = file.Name.Substring(0, file.Name.Length - 4); while (Char.IsDigit(nameClean[nameClean.Length - 1])) { nameClean = nameClean.Substring(0, nameClean.Length - 1); } int i = indexes.IndexOf(nameClean); if (i <= -1) { indexes.Add(nameClean); outputList.Add(indexes.Count - 1); } else { outputList.Add(i); } letters.Add(getVectors(cropImage(getImageBitmap(file.FullName)))); } DateTime start = DateTime.Now; KNearestNeighbors knn = new KNearestNeighbors(k: 3, classes: indexes.Count, inputs: letters.ToArray(), outputs: outputList.ToArray()); int answer = knn.Compute(getVectors(cropImage(MakeGrayscale(getImageBitmap(filename))))); string res = indexes[answer]; lettre.Content = res; timeSpent.Content = "Execution time: " + (DateTime.Now - start); } }
private void button2_Click(object sender, EventArgs e) { wv = new Word2Vec(3); textBox3.Text = "Processing"; System.Diagnostics.Debug.WriteLine("Starting Processing"); //Get list of classes, input and output vectors classlist = new Dictionary <string, int>(); inverseClassList = new Dictionary <int, string>(); List <List <double> > IntMatrixInputs = new List <List <double> >(); List <string> stringInputs = new List <string>(); List <int> IntOutputs = new List <int>(); bool frequency = true; int temp = 0; string[] delimiters = { "," }; string[] fields; TextFieldParser tfp; tfp = new TextFieldParser(textBox1.Text); tfp.HasFieldsEnclosedInQuotes = true; tfp.Delimiters = delimiters; fields = tfp.ReadFields(); int indexClass = 0; int indexRows = 0; while (!tfp.EndOfData) { System.Diagnostics.Debug.WriteLine("Processing training row: " + indexRows); fields = tfp.ReadFields(); if (NotNullFields(fields)) { if (!classlist.TryGetValue(fields[SelectedClass], out temp)) { classlist[fields[SelectedClass]] = indexClass; inverseClassList[indexClass] = fields[SelectedClass]; ++indexClass; } /* * stringInputs.Add(GetString(fields,"")); * IntMatrixInputs.Add(Word2Vec.Transform(GetString(fields,""), frequency).ToList()); //getLettersVector.toList(); */ stringInputs.Add(GetString(fields, " ")); wv.addSentence(GetString(fields, " ")); IntOutputs.Add(classlist[fields[SelectedClass]]); ++indexRows; } } wv.ComputeVectors(); List <string> strInputsTrain = new List <string>(); List <string> strInputsTest = new List <string>(); double[][] IntInputsTrain = new double[indexRows][]; int[] outputsTrain = new int[indexRows]; double [][] IntInputs = new double[IntMatrixInputs.Count][]; for (int i = 0; i < indexRows; ++i) { //IntInputs[i] = IntMatrixInputs[i].ToArray(); System.Diagnostics.Debug.WriteLine("Creating input row: " + i); // IntInputsTrain[i] = wv.transform(stringInputs[i], comboBox4.SelectedItem.ToString()); //IntMatrixInputs[i].ToArray(); strInputsTrain.Add(stringInputs[i]); outputsTrain[i] = IntOutputs[i]; } if (comboBox2.SelectedItem.ToString() == "Levenshtein") { knnStr = new KNearestNeighbors <string>(k: Int32.Parse(textBox2.Text), classes: classlist.Count, inputs: strInputsTrain.ToArray(), outputs: outputsTrain, distance: Distance.Levenshtein); } else if (comboBox2.SelectedItem.ToString() == "Euclidean") { knn = new KNearestNeighbors(k: Int32.Parse(textBox2.Text), classes: classlist.Count, inputs: IntInputsTrain, outputs: outputsTrain, distance: Distance.Euclidean); } else if (comboBox2.SelectedItem.ToString() == "Jaccard") { knn = new KNearestNeighbors(k: Int32.Parse(textBox2.Text), classes: classlist.Count, inputs: IntInputsTrain, outputs: outputsTrain, distance: Jaccard); } else { knn = new KNearestNeighbors(k: Int32.Parse(textBox2.Text), classes: classlist.Count, inputs: IntInputsTrain, outputs: outputsTrain, distance: Distance.Cosine); } int correctCount = 0; int wrongCount = 0; List <int> expected = new List <int>(); List <int> predicted = new List <int>(); int positiveValue = 1; int negativeValue = 0; tfp = new TextFieldParser(textBox8.Text); tfp.HasFieldsEnclosedInQuotes = true; tfp.Delimiters = delimiters; fields = tfp.ReadFields(); indexRows = 0; string outputPath = textBox8.Text.Replace(".csv", "_Labeled.csv"); StreamWriter sw = new StreamWriter(outputPath); sw.WriteLine("FirstName,LastName,Country"); while (!tfp.EndOfData) { System.Diagnostics.Debug.WriteLine("Processing test row: " + indexRows); // Console.WriteLine("Processing row: " + indexRows); fields = tfp.ReadFields(); if (fields[1] != "" && fields[2] != "") { int answer; if (comboBox2.SelectedItem.ToString() == "Levenshtein") { answer = knnStr.Compute(GetString(fields, " ")); } else { answer = knn.Compute(wv.transform(GetString(fields, " "), comboBox4.SelectedItem.ToString())); } int tempClass = -1; classlist.TryGetValue(fields[SelectedClass], out tempClass); expected.Add(tempClass); predicted.Add(answer); if (answer == tempClass) { correctCount++; } else { wrongCount++; } ++indexRows; sw.WriteLine(fields[1] + "," + fields[2] + "," + inverseClassList[answer]); } else { sw.WriteLine(fields[1] + "," + fields[2] + ",Undefined"); } } sw.Flush(); sw.Close(); ConfusionMatrix matrix = new ConfusionMatrix(predicted.ToArray(), expected.ToArray()); GeneralConfusionMatrix matrixGen = new GeneralConfusionMatrix(classlist.Count, expected.ToArray(), predicted.ToArray()); textBox3.Text = DateTime.Now + " " + "k: " + textBox2.Text + " Distance: " + comboBox2.SelectedItem.ToString() + " Vector: " + comboBox4.SelectedItem.ToString(); textBox3.Text += " Number of instances: " + indexRows + " Number of classes: " + classlist.Count; textBox3.Text += " Correctly classified: " + correctCount + " Wrongly classified: " + wrongCount; textBox3.Text += " Standard Error " + matrixGen.StandardError; textBox3.Text += " Accuracy " + (double)((double)correctCount / (double)indexRows); //textBox3.Text += " Conf. Matrix " + matrix; //textBox8.Text = "Rows " + matrixGen.Matrix.Rows(); if (checkBox1.Checked) { paintMatrix(matrixGen, inverseClassList); } label8.Text = "-"; }
public void KNearestNeighborConstructorTest() { double[][] inputs = { new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; int[] outputs = { 0, 0, 1, 1, 1, 1, 2, 2, 2 }; int k = 3; KNearestNeighbors target = new KNearestNeighbors(k, inputs, outputs); for (int i = 0; i < inputs.Length; i++) { int actual = target.Compute(inputs[i]); int expected = outputs[i]; Assert.AreEqual(expected, actual); } double[][] test = { new double[] { -4, -3, -1 }, new double[] { -5, -4, -4 }, new double[] { 5, 3, 4 }, new double[] { 3, 1, 6 }, new double[] { 10, 5, 4 }, new double[] { 13, 4, 5 }, }; int[] expectedOutputs = { 0, 0, 1, 1, 2, 2, }; for (int i = 0; i < test.Length; i++) { int actual = target.Compute(test[i]); int expected = expectedOutputs[i]; Assert.AreEqual(expected, actual); } }
public string knn(DataTable tbl) { Codification codebook = new Codification(tbl); DataTable symbols = codebook.Apply(tbl); double[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses").ToDouble(); int sayac = 0; int[] outputs = symbols.ToIntArray("Class").GetColumn(0); KNearestNeighbors knn = new KNearestNeighbors(k: 4, classes: 2, inputs: inputs, outputs: outputs); int answer = knn.Compute(new double[] { Convert.ToInt32(inputlar[0]), Convert.ToInt32(inputlar[1]), Convert.ToInt32( inputlar[2]), Convert.ToInt32( inputlar[3]), Convert.ToInt32( inputlar[4]), Convert.ToInt32( inputlar[5]), Convert.ToInt32( inputlar[6]), Convert.ToInt32( inputlar[7]), Convert.ToInt32( inputlar[8]) }); // answer will be 2. if (answer == 0) answer = 4; else answer = 2; return answer.ToString(); }
public static List<testResult> RunTest(string path,Hashtable dictionary ,int dicSize, Hashtable idfTable, KNearestNeighbors knn) { List<testResult> result = new List<testResult>(); //int[] trainingAnswer = new int[17998]; int count = 0; string[] categories = Directory.GetDirectories(path); for (int i = 0; i < categories.Count(); i++) //traverse Categories { Console.WriteLine(Path.GetFileName(categories[i])); string[] file_names = Directory.GetFiles(categories[i]); for (int j = 0; j < file_names.Count(); j++) //file in Cagetory { Console.WriteLine(Path.GetFileName(file_names[j])); System.IO.StreamReader file = new System.IO.StreamReader(file_names[j]); double[] featureV = new double[dicSize]; for(int k = 0;k<dicSize;k++) //initial featureV[k] = 0; String line; int counter = 0; Hashtable docWord = new Hashtable(); Stemmer stemmer = new Stemmer(); int sumWordCount = 0; stemmer.stem(); //Console.WriteLine(stemmer.stem("running")); //String word; /******Structured Column*****/ while ((line = file.ReadLine()) != null) { //Console.WriteLine(line); if (line.Contains(": ")) { string[] splitPart = line.Split(new string[] { ": " }, StringSplitOptions.None); string columnName = splitPart[0].Trim(); string content = splitPart[splitPart.Length - 1]; if (columnName.ToLower() == "subject") { foreach (string iter_word in Regex.Split(content, @"[^A-Za-z0-9_-]")) { String word = iter_word.ToLower().Trim(new Char[] { '_', '-' }); double Num; bool isNum = double.TryParse(word, out Num); if (isNum) { continue; } stemmer.add(word.ToCharArray(), word.Length); stemmer.stem(); word = stemmer.ToString(); if (word.Length == 0) { continue; } if (stopWordTable.ContainsKey(word)) { continue; } sumWordCount += 1 * subjectWeight; // word preprocess done if (docWord.ContainsKey(word)) { int temp = (int)docWord[word]; temp += 1 * subjectWeight; docWord[word] = temp; } else { docWord[word] = 1 * subjectWeight; } } } /*else if (columnName.ToLower() == "keywords") { foreach (string iter_word in Regex.Split(content, @"[^A-Za-z0-9_-]")) { String word = iter_word.ToLower().Trim(new Char[] { '_', '-' }); double Num; bool isNum = double.TryParse(word, out Num); if (isNum) { continue; } stemmer.add(word.ToCharArray(), word.Length); stemmer.stem(); word = stemmer.ToString(); if (word.Length == 0) { continue; } if (stopWordTable.ContainsKey(word)) { continue; } sumWordCount += 1 * keywordsWeight; // word preprocess done if (docWord.ContainsKey(word)) { int temp = (int)docWord[word]; temp += 1 * keywordsWeight; docWord[word] = temp; } else { docWord[word] = 1 * keywordsWeight; } } } if (columnName.ToLower() == "newsgroups") { foreach (string iter_word in content.Split(new char[] { ',' })) { String word = iter_word.ToLower().Trim(); sumWordCount += 1 * newsgroupsWeight; // word preprocess done if (docWord.ContainsKey(word)) { int temp = (int)docWord[word]; temp += 1 * newsgroupsWeight; docWord[word] = temp; } else { docWord[word] = 1 * newsgroupsWeight; } } }*/ /*else if (columnName.ToLower() == "from") { Regex emailRegex = new Regex(@"\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*", RegexOptions.IgnoreCase); //find items that matches with our pattern MatchCollection emailMatches = emailRegex.Matches(content); foreach (Match emailMatch in emailMatches) { String word = emailMatch.Value; // word preprocess done if (docWord.ContainsKey(word)) { int temp = (int)docWord[word]; temp += 1 * fromWeight; docWord[word] = temp; } else { docWord[word] = 1 * fromWeight; } } }*/ } else { break; } } /******Text******/ while ((line = file.ReadLine()) != null) { if (line.StartsWith(">") || line.StartsWith("|>")) { continue; } //foreach(string iter_word in line.Split(new Char [] {' ', ',', '.', ':', '\t', '\n' })) foreach (string iter_word in Regex.Split(line, @"[^A-Za-z0-9_-]")) { String word = iter_word.ToLower().Trim(new Char[] { '_', '-' }); double Num; bool isNum = double.TryParse(word, out Num); if (isNum) { continue; } stemmer.add(word.ToCharArray(), word.Length); stemmer.stem(); word = stemmer.ToString(); if (word.Length == 0) { continue; } if (stopWordTable.ContainsKey(word)) { continue; } sumWordCount++; // word preprocess done if (docWord.ContainsKey(word)) { int temp = (int)docWord[word]; temp++; docWord[word] = temp; } else { docWord[word] = 1; } } }// line end foreach (string word in docWord.Keys) { if (dictionary.ContainsKey(word)) { int indexOfDic = (int)dictionary[word]; double TF = System.Convert.ToDouble((int)docWord[word])/System.Convert.ToDouble(sumWordCount); double IDF = (double)idfTable[word]; featureV[indexOfDic] = TF * IDF; } } testResult resultTemp = new testResult(); resultTemp.docName = Path.GetFileName(file_names[j]); resultTemp.oriClass = i; resultTemp.resultClass = knn.Compute(featureV); result.Add(resultTemp); Console.WriteLine(resultTemp.resultClass); }//file end //Console.ReadLine(); }//category end return result; }
public ClassificationType[,] Classify(LasFile file, int divCountX, int divCountY) { Stopwatch swTotal = Stopwatch.StartNew(); Stopwatch sw = Stopwatch.StartNew(); Console.WriteLine("Preparing testing dataset..."); LasPointDataRecords points = file.LasPointDataRecords; ClassificationType[,] output = new ClassificationType[divCountX, divCountY]; SubgroupOfPoints[,] values = Utills.GroupPoints(file, divCountX, divCountY); Statistics stats = new Statistics(); stats.Count = divCountX * divCountY; sw.Stop(); Console.WriteLine("Preparing testing dataset completed [" + sw.Elapsed.TotalSeconds.ToString() + "s]"); Stopwatch sw2 = Stopwatch.StartNew(); Console.WriteLine("Classification in progress..."); int noiseCount = 0; for (int i = 0; i < divCountX; i++) { for (int j = 0; j < divCountY; j++) { if (values[i, j].classIndex == 7) { output[i, j] = ClassificationType.Noise; noiseCount++; } else { double avgHeight = values[i, j].avgHeight; double avgIntensity = values[i, j].avgIntensity; double avgDistance = values[i, j].avgDistance; OpenTK.Vector3 slopeVector = values[i, j].slopeVector; output[i, j] = Utills.ClassificationClasses[knn.Compute(new double[] { avgDistance, avgHeight, avgIntensity, slopeVector[0], slopeVector[1], slopeVector[2] })]; ClassificationType ct; if (!Utills.QuickClassess.TryGetValue(values[i, j].classIndex, out ct)) { continue; } if (output[i, j] != ct) { stats.ClassErrors[(int)ct]++; } stats.PredictionMatrix[(int)output[i, j], (int)ct]++; stats.PredictionMatrix[(int)ct, (int)output[i, j]]++; stats.ClassCount[(int)output[i, j]]++; stats.ClassRealCount[(int)ct]++; } } //Console.WriteLine(i); } Console.Write(stats.ToString()); sw2.Stop(); Console.WriteLine("Classification completed [" + sw2.Elapsed.TotalSeconds.ToString() + "s]"); swTotal.Stop(); Console.WriteLine("Total time: [" + swTotal.Elapsed.TotalSeconds.ToString() + "s]"); Console.WriteLine("Noise count: " + noiseCount.ToString()); stats.SaveMatrixAsCSV(); return(output); }
//The KNN algorithm //It creates a knn library object, classifies the entire data set //and returns the confusion matrix for the classification private ConfusionMatrix RunKNN(int k, Double[][] trainingSet, int[] trainingOutput, Double[][] testSet, int[] expected) { //Create a knn classifer with 2 classes KNearestNeighbors knn = new KNearestNeighbors(k: k, classes: 2, inputs: trainingSet, outputs: trainingOutput); //Map the classifier over the test set //This wil return an array where index i is the classificatioon of the i-th vector //of the testSet var predicted = UtilityProvider.MergeArrays(trainingSet, testSet) .Select(x => knn.Compute(x)) .ToArray(); //For test, assume 0 as positive and 1 as negative int positive = 0; int negative = 1; //Create a new confusion matrix with the calculated parameters ConfusionMatrix cmatrix = new ConfusionMatrix(predicted, UtilityProvider.MergeArrays(trainingOutput, expected), positive, negative); return cmatrix; }