protected override void BuildModel() { StringBuilder rCmd = new StringBuilder(@" trainRaw=read.csv(""" + RawTrainPath.Replace("\\", "/") + @""", header = TRUE, sep = ',')" + @" trainNorm=trainRaw cols=NCOL(trainRaw) mxmn <- array(0, dim=c(2,cols-1)) options(scipen=999) for(i in 2:cols) { cmax=max(trainRaw[,i]) cmin=min(trainRaw[,i]) mxmn[1,i-1]=cmax mxmn[2,i-1]=cmin trainNorm[,i]=(trainRaw[,i]-((cmax+cmin)/2))/((cmax-cmin)/2) } trainNorm[is.na(trainNorm)]=0 write.table(data.frame(mxmn), file=""" + ColumnMaxMinPath.Replace("\\", "/") + @""", row.names=FALSE, col.names=FALSE, sep=',')" + @" library(randomForest) rf=randomForest(Class ~., data=trainNorm, ntree=" + _numTrees + ", importance=TRUE, seed=99)" + @" save(rf, file=""" + RandomForestModelPath.Replace("\\", "/") + @""")" + @" "); string output, error; R.Execute(rCmd.ToString(), false, out output, out error); try { if (!File.Exists(RandomForestModelPath)) { throw new Exception("RandomForest model was not created at \"" + RandomForestModelPath + "\"."); } } catch (Exception ex) { throw new Exception("ERROR: RandomForest failed to build model. Output and error messages follow:" + Environment.NewLine + "\tException message: " + ex.Message + Environment.NewLine + "\tR output: " + output + Environment.NewLine + "\tR orror: " + error); } finally { try { File.Delete(RawTrainPath); } catch { } } }
protected override void BuildModel() { StringBuilder rCmd = new StringBuilder(@" trainRaw=read.csv(""" + RawTrainPath.Replace(@"\", "/") + @""", header = TRUE, sep = ',')" + @" trainNorm=trainRaw cols=NCOL(trainRaw) mxmn <- array(0, dim=c(2,cols-1)) options(scipen=999) for(i in 2:cols) { cmax=max(trainRaw[,i]) cmin=min(trainRaw[,i]) mxmn[1,i-1]=cmax mxmn[2,i-1]=cmin trainNorm[,i]=(trainRaw[,i]-((cmax+cmin)/2))/((cmax-cmin)/2) } trainNorm[is.na(trainNorm)]=0 write.table(data.frame(mxmn), file=""" + ColumnMaxMinPath.Replace(@"\", "/") + @""", row.names=FALSE, col.names=FALSE, sep=',')" + @" library(ada) set.seed(99) cls <- sort(unique(trainNorm$Class)) cList <- vector('list', length(cls)) save(cls, file=""" + ClassPath.Replace(@"\", "/") + @""")" + @" binForm = ""Class ~.,data=trainNorm,iter=" + _iterations + ",loss='logistic',nu=1,type='discrete'" + @""" multForm = ""Class ~.,data=cList[[i]],iter=" + _iterations + ",loss='logistic',nu=1,type='discrete'" + @""" if(length(cls)==2) { adb <- eval(parse(text=paste('ada(', binForm, ')', sep=''))) save(adb, file=""" + AdaModelPath.Replace(@"\", "/") + @""")" + @" } else { for(i in 1:length(cls)) { cList[[i]] <- trainNorm for(j in 1:length(cls)) { if(i!=j) { levels(cList[[i]]$Class)[levels(cList[[i]]$Class)==cls[j]] <- 'REST' } } adb <- eval(parse(text=paste('ada(', multForm, ')', sep=''))) save(adb, file=paste('" + Path.Combine(Model.ModelDirectory, @"ada', i, '.RData', sep='')").Replace("\\", "/") + @")" + @" } } "); R.Execute(rCmd.ToString(), false); File.Delete(RawTrainPath); }
public override void Classify(FeatureVectorList featureVectors) { base.Classify(featureVectors); if (featureVectors != null && featureVectors.Count > 0) { using (StreamWriter predictionsFile = new StreamWriter(RawPredictionInstancesPath)) { predictionsFile.Write("Class"); foreach (PTL.ATT.Models.Feature f in Model.Features.OrderBy(i => i.Id)) { predictionsFile.Write("," + f.Id); } predictionsFile.WriteLine(); foreach (FeatureVector vector in featureVectors) { predictionsFile.Write(vector.DerivedFrom.TrueClass); foreach (PTL.ATT.Models.Feature f in Model.Features.OrderBy(i => i.Id)) { object value; if (vector.TryGetValue(f.Id, out value)) { predictionsFile.Write("," + value); } else { predictionsFile.Write(",0"); } } predictionsFile.WriteLine(); } predictionsFile.Close(); } StringBuilder rCmd = new StringBuilder(@" predRaw=read.csv(""" + RawPredictionInstancesPath.Replace("\\", "/") + @""", header = TRUE, sep = ',')" + @" mxmn=read.csv(""" + ColumnMaxMinPath.Replace("\\", "/") + @""", header = FALSE, sep = ',')" + @" predNorm=predRaw for(i in 2:NCOL(predRaw)) { cmax=mxmn[1,i-1] cmin=mxmn[2,i-1] predNorm[,i] = (predRaw[,i]-((cmax+cmin)/2))/((cmax-cmin)/2) } predNorm[is.na(predNorm)]=0 library(randomForest) load(file=""" + RandomForestModelPath.Replace("\\", "/") + @""")" + @" rf.pred=predict(rf, predNorm, norm.votes=TRUE, type='prob') dfp<-data.frame(rf.pred) names(dfp)[names(dfp)=='NULL.'] <- 'NULL' write.table(dfp, file=""" + PredictionsPath.Replace("\\", "/") + @""", row.names=FALSE, sep=',')" + @" "); string output, error; R.Execute(rCmd.ToString(), false, out output, out error); try { using (StreamReader predictionsFile = new StreamReader(PredictionsPath)) { string[] colnames = predictionsFile.ReadLine().Split(','); int row = 0; string line; while ((line = predictionsFile.ReadLine()) != null) { string[] lines = line.Split(','); for (int i = 0; i < colnames.Length; i++) { string label = colnames[i].Replace("\"", @""); label = label.Replace(".", " "); float prob = float.Parse(lines[i]); featureVectors[row].DerivedFrom.PredictionConfidenceScores.Add(label, prob); } row++; } predictionsFile.Close(); if (row != featureVectors.Count) { throw new Exception("Number of predictions doesn't match number of input vectors"); } } } catch (Exception ex) { throw new Exception("ERROR: RandomForest failed to classify points. Output and error messages follow:" + Environment.NewLine + "\tException message: " + ex.Message + Environment.NewLine + "\tR output: " + output + Environment.NewLine + "\tR orror: " + error); } finally { try { File.Delete(ColumnMaxMinPath); } catch { } try { File.Delete(RandomForestModelPath); } catch { } try { File.Delete(RawPredictionInstancesPath); } catch { } try { File.Delete(PredictionsPath); } catch { } } } }
public override void Classify(FeatureVectorList featureVectors) { base.Classify(featureVectors); if (featureVectors != null && featureVectors.Count > 0) { using (StreamWriter predictionsFile = new StreamWriter(RawPredictionInstancesPath)) { predictionsFile.Write("Class"); foreach (PTL.ATT.Models.Feature f in Model.Features.OrderBy(i => i.Id)) { predictionsFile.Write("," + f.Id); } predictionsFile.WriteLine(); foreach (FeatureVector vector in featureVectors) { predictionsFile.Write(vector.DerivedFrom.TrueClass); foreach (PTL.ATT.Models.Feature f in Model.Features.OrderBy(i => i.Id)) { object value; if (vector.TryGetValue(f.Id, out value)) { predictionsFile.Write("," + value); } else { predictionsFile.Write(",0"); } } predictionsFile.WriteLine(); } predictionsFile.Close(); } StringBuilder rCmd = new StringBuilder(@" predRaw=read.csv(""" + RawPredictionInstancesPath.Replace("\\", "/") + @""", header = TRUE, sep = ',')" + @" mxmn=read.csv(""" + ColumnMaxMinPath.Replace("\\", "/") + @""", header = FALSE, sep = ',')" + @" predNorm=predRaw for(i in 2:NCOL(predRaw)) { cmax=mxmn[1,i-1] cmin=mxmn[2,i-1] predNorm[,i] = (predRaw[,i]-((cmax+cmin)/2))/((cmax-cmin)/2) } predNorm[is.na(predNorm)]=0 library(ada) set.seed(99) load(file=""" + ClassPath.Replace("\\", "/") + @""")" + @" if(length(cls)==2) { load(file=""" + AdaModelPath.Replace("\\", "/") + @""")" + @" adb.pred<-predict(adb, newdata=predNorm, type='prob') mult<-data.frame(adb.pred) names(mult)<-sort(c(toString(cls[1]), toString(cls[2]))) } else { mult<-data.frame(matrix(0, ncol=1, nrow=NROW(predRaw))) names(mult)<-c('INIT_DF') for(i in 1:length(cls)) { load(file=paste('" + Path.Combine(Model.ModelDirectory, @"ada', i, '.RData', sep='')").Replace("\\", "/") + @")" + @" adb.pred<-predict(adb, newdata=predNorm, type='prob') abp<-data.frame(adb.pred) names(abp)<-sort(c(toString(cls[i]), 'REST')) abp<-subset(abp, select=-c(REST)) mult<-cbind(mult, abp) } mult<-subset(mult, select=-c(INIT_DF)) mult<-1/(1+exp(-1*mult)) sums<-data.frame(rowSums(mult)) for(j in 1:length(cls)) { mult[j]<-mult[j]/sums } } write.table(mult, file=""" + PredictionsPath.Replace("\\", "/") + @""", row.names=FALSE, sep=',')" + @" "); string output, error; R.Execute(rCmd.ToString(), false, out output, out error); try { using (StreamReader predictionsFile = new StreamReader(PredictionsPath)) { string[] colnames = predictionsFile.ReadLine().Split(','); int row = 0; string line; while ((line = predictionsFile.ReadLine()) != null) { string[] lines = line.Split(','); for (int i = 0; i < colnames.Length; i++) { string label = colnames[i].Replace("\"", ""); float prob = float.Parse(lines[i]); featureVectors[row].DerivedFrom.PredictionConfidenceScores.Add(label, prob); } row++; } predictionsFile.Close(); if (row != featureVectors.Count) { throw new Exception("Number of predictions doesn't match number of input vectors"); } } } catch (Exception ex) { throw new Exception("ERROR: AdaBoost failed to classify points. Output and error messages follow:" + Environment.NewLine + "\tException message: " + ex.Message + Environment.NewLine + "\tR output: " + output + Environment.NewLine + "\tR orror: " + error); } finally { try { File.Delete(ColumnMaxMinPath); } catch { } try { File.Delete(ClassPath); } catch { } try { File.Delete(AdaModelPath); } catch { } try { File.Delete(RawPredictionInstancesPath); } catch { } try { File.Delete(PredictionsPath); } catch { } } } }