public static void AlphaAllOps(string sourceFolder, string datasetWithAlphaOutputPath, double parallelThreadMultiplier = 1.0) { StringBuilder logger = new StringBuilder(); StringBuilder allBinFreqBuilder = new StringBuilder("filename,oalpha-bin0,oalpha-bin1,oalpha-bin2,oalpha-bin3,oalpha-bin4,oalpha-bin5,oalpha-bin6,oalpha-bin7,oalpha-bin8,oalpha-bin9,knnallrew-bin0,knnallrew-bin1,knnallrew-bin2,knnallrew-bin3,knnallrew-bin4,knnallrew-bin5,knnallrew-bin6,knnallrew-bin7,knnallrew-bin8,knnallrew-bin9,nbpkid-bin0,nbpkid-bin1,nbpkid-bin2,nbpkid-bin3,nbpkid-bin4,nbpkid-bin5,nbpkid-bin6,nbpkid-bin7,nbpkid-bin8,nbpkid-bin9,dtc44-bin0,dtc44-bin1,dtc44-bin2,dtc44-bin3,dtc44-bin4,dtc44-bin5,dtc44-bin6,dtc44-bin7,dtc44-bin8,dtc44-bin9,knnallrew-adiff-bin0,knnallrew-adiff-bin1,knnallrew-adiff-bin2,knnallrew-adiff-bin3,knnallrew-adiff-bin4,knnallrew-adiff-bin5,knnallrew-adiff-bin6,knnallrew-adiff-bin7,knnallrew-adiff-bin8,knnallrew-adiff-bin9,nbpkid-adiff-bin0,nbpkid-adiff-bin1,nbpkid-adiff-bin2,nbpkid-adiff-bin3,nbpkid-adiff-bin4,nbpkid-adiff-bin5,nbpkid-adiff-bin6,nbpkid-adiff-bin7,nbpkid-adiff-bin8,nbpkid-adiff-bin9,dtc44-adiff-bin0,dtc44-adiff-bin1,dtc44-adiff-bin2,dtc44-adiff-bin3,dtc44-adiff-bin4,dtc44-adiff-bin5,dtc44-adiff-bin6,dtc44-adiff-bin7,dtc44-adiff-bin8,dtc44-adiff-bin9\r\n"); int finishedCount = 0; bool hasFinished = false; AppDomain.CurrentDomain.ProcessExit += CurrentDomain_ProcessExit; int maxDegreeOfParallelism = (int)(Environment.ProcessorCount * parallelThreadMultiplier); Console.WriteLine($"Max degree of parallelism: {maxDegreeOfParallelism} "); Parallel.ForEach(Directory.EnumerateFiles(sourceFolder), new ParallelOptions() { MaxDegreeOfParallelism = maxDegreeOfParallelism }, filename => TryAlphaAllOps(filename)); logger.AppendLine($"Finished all {finishedCount}. "); Console.WriteLine($"Finished all {finishedCount}. "); Output(); hasFinished = true; void TryAlphaAllOps(string filename) { try { // 1. read raw datasets Dictionary <Instance, Dictionary <string, double> > datasetInfo = new Dictionary <Instance, Dictionary <string, double> >(); List <Instance> instances = CSV.ReadFromCsv(filename, null); instances.ForEach(i => datasetInfo.Add(i, new Dictionary <string, double>())); foreach ((Instance instance, double alpha) in new KNNContext(instances).GetAllAlphaValues()) { datasetInfo[instance].Add("alpha", alpha); } filename = Path.GetFileNameWithoutExtension(filename); //StringBuilder fileBinFreqBuilder = new StringBuilder($"{filename},"); // 2. do work foreach ((AlgorithmContextBase context, string symbol) in new List <(AlgorithmContextBase context, string symbol)> { (new KNNContext(instances) { NeighboringMethod = KNNContext.NeighboringOption.AllNeighborsWithReweighting }, "knnallrew"), (new NaiveBayesContext(instances), "nbpkid"), (new DecisionTreeContext(instances) { UseLaplaceCorrection = true }, "dtc44") }) { // 2.1 calc prob dist context.Train(); foreach (Instance instance in instances) { Dictionary <string, double> result = context.GetProbDist(instance); double p0 = result.ContainsKey("0") ? result["0"] : 0.0; double p1 = result.ContainsKey("1") ? result["1"] : 0.0; bool isCorrect = true; if (p0 > p1) { isCorrect = instance.LabelValue == "0"; } else if (p1 > p0) { isCorrect = instance.LabelValue == "1"; } datasetInfo[instance].Add($"{symbol}-p0", p0); datasetInfo[instance].Add($"{symbol}-p1", p1); datasetInfo[instance].Add($"{symbol}-iscorrect", isCorrect ? 1 : 0); } // 2.2 calc alpha List <Instance> derivedInstances = new List <Instance>(); foreach (Instance instance in instances) { derivedInstances.Add(new Instance(new List <Feature> { new Feature($"{symbol}-p0", ValueType.Continuous, datasetInfo[instance][$"{symbol}-p0"]), new Feature($"{symbol}-p1", ValueType.Continuous, datasetInfo[instance][$"{symbol}-p1"]), }, instance.LabelValue)); } List <double> derivedAlphas = new KNNContext(derivedInstances).GetAllAlphaValues().Select(tuple => tuple.Item2).ToList(); int temp = 0; foreach (KeyValuePair <Instance, Dictionary <string, double> > kvp in datasetInfo) { kvp.Value.Add($"{symbol}-alpha", derivedAlphas[temp]); kvp.Value.Add($"{symbol}-adiff", derivedAlphas[temp++] - kvp.Value["alpha"]); } // 2.3 record bin freq //for (int i = 0; i < 10; i++) //{ // double binLowerBound = i / 10.0; // double binUpperBound = i == 9 ? 1.01 : (i + 1) / 10.0; // fileBinFreqBuilder.Append(derivedAlphas.Count(a => a < binUpperBound && a >= binLowerBound) / (double)instances.Count); // fileBinFreqBuilder.Append(','); //} } //allBinFreqBuilder.AppendLine(fileBinFreqBuilder.ToString()[..^1]); // 3. write dataset with alphas List <List <string> > tableFields = new List <List <string> >(); foreach ((Instance instance, Dictionary <string, double> props) in datasetInfo) { List <string> rowFields = instance.Serialize().Split(',').ToList(); foreach (KeyValuePair <string, double> kvp in props) { rowFields.Add(kvp.Value.ToString()); } tableFields.Add(rowFields); } CSV.WriteToCsv($"{datasetWithAlphaOutputPath}\\{filename}.csv", new Table <string>(tableFields), $"{string.Join(',', instances.First().Features.Select(f => f.Name))},label,{string.Join(',', datasetInfo.First().Value.Select(kvp => kvp.Key))}"); logger.AppendLine($"{DateTime.Now}\tSuccessfully finished {filename} (Total: {++finishedCount})"); Console.WriteLine($"{DateTime.Now}\tSuccessfully finished {filename} (Total: {finishedCount})"); } catch (Exception e) { Console.WriteLine($"{DateTime.Now}\t{e.GetType()} encountered in processing {filename}, skipping this file"); logger.AppendLine(new string('>', 64)); logger.AppendLine($"{DateTime.Now}\t{e.GetType()} encountered in processing {filename}, skipping this file"); logger.AppendLine(e.ToString()); logger.AppendLine(new string('>', 64)); } } void CurrentDomain_ProcessExit(object?sender, EventArgs e) { if (!hasFinished) { logger.AppendLine($"Program exited after finishing {finishedCount}. "); Output(); } } void Output() { using StreamWriter allBinFreqWriter = new StreamWriter("..\\B739-allBinFreqs.csv"); allBinFreqWriter.Write(allBinFreqBuilder); using StreamWriter logWriter = new StreamWriter("..\\log.txt"); logWriter.Write(logger); } }
public static void WriteBaseAlphaBinFreq(string sourceFolder, string datasetWithAlphaOutputPath, string outputFilename, string logFilename) { int finishedCount = 0; bool hasFinished = false; StringBuilder logger = new StringBuilder(); StringBuilder resultsBuilder = new StringBuilder($"filename,bin0,bin1,bin2,bin3,bin4,bin5,bin6,bin7,bin8,bin9\r\n"); AppDomain.CurrentDomain.ProcessExit += CurrentDomain_ProcessExit; Parallel.ForEach(Directory.EnumerateFiles(sourceFolder), new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount }, filename => TryGetBinFreq(filename, true)); logger.AppendLine($"Finished all {finishedCount}. "); Console.WriteLine($"Finished all {finishedCount}. "); Output(); hasFinished = true; void CurrentDomain_ProcessExit(object?sender, EventArgs e) { if (!hasFinished) { logger.AppendLine($"Program exited after finishing {finishedCount}. "); Output(); } } void Output() { using StreamWriter resultsWriter = new StreamWriter(outputFilename); resultsWriter.Write(resultsBuilder); using StreamWriter logWriter = new StreamWriter(logFilename); logWriter.Write(logger); } void TryGetBinFreq(string filename, bool writeDatasetWithAlpha) { List <Instance> instances = CSV.ReadFromCsv(filename, null); filename = Path.GetFileNameWithoutExtension(filename); try { double[] binFreq = new double[10]; IEnumerable <(Instance instance, double alpha)> results = new KNNContext(instances).GetAllAlphaValues(); IEnumerable <double> alphas = results.Select(tuple => tuple.alpha); for (int i = 0; i < 10; i++) { double binLowerRange = i * 0.1; double binUpperRange = i == 9 ? 1.01 : binLowerRange + 0.1; // include alpha = 1.0 in bin9 binFreq[i] = alphas.Count(a => a >= binLowerRange && a < binUpperRange) / (double)instances.Count; } resultsBuilder.AppendLine($"{filename},{string.Join(',', binFreq)}"); if (writeDatasetWithAlpha) { StringBuilder sb = new StringBuilder($"{string.Join(',', instances.First().Features.Select(f => f.Name))},label,alpha\r\n"); foreach ((Instance instance, double alpha) in results) { sb.AppendLine($"{instance.Serialize()},{alpha}"); } using StreamWriter sw = new StreamWriter($"{datasetWithAlphaOutputPath}\\{filename}.csv"); sw.Write(sb); } logger.AppendLine($"{DateTime.Now}\tSuccessfully finished {filename} (Total: {++finishedCount})"); Console.WriteLine($"{DateTime.Now}\tSuccessfully finished {filename} (Total: {finishedCount})"); Console.WriteLine($"{filename},{string.Join(',', binFreq)}"); } catch (Exception e) { Console.WriteLine($"{DateTime.Now}\t{e.GetType()} encountered in processing {filename}, skipping this file"); resultsBuilder.AppendLine($"{filename},{string.Join(',', Enumerable.Repeat("NaN", 10))}"); logger.AppendLine(new string('>', 64)); logger.AppendLine($"{DateTime.Now}\t{e.GetType()} encountered in processing {filename}, skipping this file"); logger.AppendLine(e.ToString()); logger.AppendLine(new string('>', 64)); } } }