public BL.AnalysisObject basicVars(BL.AnalysisObject a, EL.singleExcel thisExcel) { // Get Some Generic, Untested Data About The Page From Excel //var theTotalColumns = thisExcel.excelRange.Columns.Count; //var theTotalRows = thisExcel.excelRange.Rows.Count; a.colCount = thisExcel.excelRange.Columns.Count; a.rowCount = thisExcel.excelRange.Rows.Count; a.allTheData = (object[, ])thisExcel.excelRange.get_Value(Excel.XlRangeValueDataType.xlRangeValueDefault); // Everything in UsedRange var constantCells = thisExcel.excelRange.SpecialCells(Excel.XlCellType.xlCellTypeConstants, Type.Missing); // Everything with a constant in it in UsedRange // Do a little preprossing on that Data a.splitUpAddresses = Regex.Split(constantCells.Address, @"(?:\,|\:)"); a.splitUpAddresses = a.splitUpAddresses.Where(s => !string.IsNullOrWhiteSpace(s)).Distinct().ToArray(); // Create an Array of the Constant Addresses in the Order The Were Found var count = a.splitUpAddresses.Length; string[] tempArr = new string[count]; Array.Copy(a.splitUpAddresses, tempArr, count); // Create a Second Array that is Sorted Array.Sort(tempArr); // Create a Second Array that is Sorted string[] firstAddress = Regex.Split(tempArr[0], @"(?:\$)"); int x = 0; while (firstAddress[1].Length != 1) { firstAddress = Regex.Split(tempArr[x], @"(?:\$)"); x++; } a.startCol = firstAddress[1]; // Get Column of First Address, in case it isn't A firstAddress = Regex.Split(a.splitUpAddresses[0], @"(?:\$)"); a.startRow = firstAddress[2]; // Get Row of First Address, in case it isn't 1 return(a); }
public static object[,] getTrainData(DirectoryInfo di, string fileLocation) { fileLocation = di + fileLocation; EL.singleExcel wordFile = new EL.singleExcel().createExcel(fileLocation); EL.singleExcel.ExcelWorkSheetChange(wordFile, 1); object[,] theWords; if (wordFile.excelRange.Count == 1) { string temp = (string)wordFile.excelRange.get_Value(Excel.XlRangeValueDataType.xlRangeValueDefault); int[] myLengthsArray = new int[2] { 1, 1 }; int[] myBoundsArray = new int[2] { 1, 1 }; theWords = (object[, ])Array.CreateInstance(typeof(String), myLengthsArray, myBoundsArray); theWords[1, 1] = temp; } else { theWords = (object[, ])wordFile.excelRange.get_Value(Excel.XlRangeValueDataType.xlRangeValueDefault); } EL.singleExcel.CloseSheet(wordFile); return(theWords); }
static void Main(string[] args) { REngine.SetEnvironmentVariables(); REngine RBlock = REngine.GetInstance(); DirectoryInfo di = new DirectoryInfo("C:\\Users\\" + Environment.UserName + "\\Documents\\File Attachments\\"); string searchPattern = "*"; // Set some bases. These need to be tested for each sample. int avgRowCount = 100; int avgColCount = 20; int fileCount = 0; // Get our files, Create a library to hold everything Library.ListofFiles = Library.makeList(di, searchPattern); // Look Through Each File foreach (var a in Library.ListofFiles) { Console.WriteLine(a.FileName); string fileLocation = di + a.FileName; // Open The File, Set the Page EL.singleExcel thisExcel = new EL.singleExcel().createExcel(fileLocation); EL.singleExcel.ExcelWorkSheetChange(thisExcel, 1); // Get Basic Vars new EDA.excelBasics().basicVars(a, thisExcel); // Run Series of Operations to Get More Exact Bounds EDA.lookTriggers LT = new EDA.lookTriggers(); LT.runTriggers(fileCount, a, avgRowCount, avgColCount, out avgRowCount, out avgColCount); Library.colCounts.Add(a.colCount); Library.rowCounts.Add(a.rowCount); // In the event that no rows are found, the file is not counted if (a.rowCount != 0) { fileCount++; } EL.singleExcel.CloseSheet(thisExcel); //Console.ReadLine(); Console.WriteLine("\n"); } // Run some R analysis IntegerVector rowR = RBlock.CreateIntegerVector(Library.rowCounts); IntegerVector colR = RBlock.CreateIntegerVector(Library.colCounts); RBlock.SetSymbol("rowR", rowR); RBlock.SetSymbol("colR", colR); int[] thisTemp; RBlock.Evaluate("temp <- table(as.vector(rowR))"); thisTemp = RBlock.Evaluate("names(temp)[temp == max(temp)]").AsInteger().ToArray(); Library.groupStats.modeRow = thisTemp[0]; RBlock.Evaluate("temp <- table(as.vector(colR))"); thisTemp = RBlock.Evaluate("names(temp)[temp == max(temp)]").AsInteger().ToArray(); Library.groupStats.modeCol = thisTemp[0]; thisTemp = RBlock.Evaluate("mean(rowR)").AsInteger().ToArray(); Library.groupStats.meanRow = thisTemp[0]; thisTemp = RBlock.Evaluate("mean(colR)").AsInteger().ToArray(); Library.groupStats.meanCol = thisTemp[0]; thisTemp = RBlock.Evaluate("median(rowR)").AsInteger().ToArray(); Library.groupStats.medianRow = thisTemp[0]; thisTemp = RBlock.Evaluate("median(colR)").AsInteger().ToArray(); Library.groupStats.medianCol = thisTemp[0]; thisTemp = RBlock.Evaluate("min(rowR)").AsInteger().ToArray(); Library.groupStats.minRow = thisTemp[0]; thisTemp = RBlock.Evaluate("min(colR)").AsInteger().ToArray(); Library.groupStats.minCol = thisTemp[0]; thisTemp = RBlock.Evaluate("max(rowR)").AsInteger().ToArray(); Library.groupStats.maxRow = thisTemp[0]; thisTemp = RBlock.Evaluate("max(colR)").AsInteger().ToArray(); Library.groupStats.maxCol = thisTemp[0]; thisTemp = RBlock.Evaluate("IQR(rowR)").AsInteger().ToArray(); Library.groupStats.iqrRow = thisTemp[0]; thisTemp = RBlock.Evaluate("IQR(colR)").AsInteger().ToArray(); Library.groupStats.iqrCol = thisTemp[0]; thisTemp = RBlock.Evaluate("quantile(rowR)").AsInteger().ToArray(); Library.groupStats.quantileRow = thisTemp[0]; thisTemp = RBlock.Evaluate("quantile(colR)").AsInteger().ToArray(); Library.groupStats.quantileCol = thisTemp[0]; RBlock.Dispose(); EL.singleExcel.outputObjectToExcel(Library.groupStats); // Build some training data from previous information, assumptions object[,] theWords = TSR.trainMethods.getTrainData(di, "\\testdata\\trainlist.xlsx"); List <string> words = new List <string>(); TSR.trainMethods.makeTrainList(theWords, words); theWords = TSR.trainMethods.getTrainData(di, "\\testdata\\looselist.xlsx"); List <string> looseWords = new List <string>(); TSR.trainMethods.makeTrainList(theWords, looseWords); Dictionary <string, int> discreteTermsDict = new Dictionary <string, int>(); Dictionary <string, int> betterTermsDict = new Dictionary <string, int>(); Dictionary <string, int> secondaryTermsDict = new Dictionary <string, int>(); // Run training scenarios TSR.trainMethods.runTrainScan(words, looseWords, avgColCount, Library.ListofFiles, out discreteTermsDict, out betterTermsDict, out secondaryTermsDict); TSR.trainMethods.buildTrainList(discreteTermsDict, words); TSR.trainMethods.runTrainScan(words, looseWords, avgColCount, Library.ListofFiles, out discreteTermsDict, out betterTermsDict, out secondaryTermsDict); TSR.trainMethods.buildTrainList(betterTermsDict, looseWords); TSR.trainMethods.buildTrainList(secondaryTermsDict, looseWords); // In tests, three cycles make for extremely high confidence TSR.trainMethods.runTrainScan(words, looseWords, avgColCount, Library.ListofFiles, out discreteTermsDict, out betterTermsDict, out secondaryTermsDict); // Terms for classifcation EL.singleExcel.outputListToExcel(words, "strongTrainList"); EL.singleExcel.outputListToExcel(looseWords, "learnedTrainList"); }