Ejemplo n.º 1
0
            public BL.AnalysisObject basicVars(BL.AnalysisObject a, EL.singleExcel thisExcel)
            {
                // Get Some Generic, Untested Data About The Page From Excel
                //var theTotalColumns = thisExcel.excelRange.Columns.Count;
                //var theTotalRows = thisExcel.excelRange.Rows.Count;
                a.colCount   = thisExcel.excelRange.Columns.Count;
                a.rowCount   = thisExcel.excelRange.Rows.Count;
                a.allTheData = (object[, ])thisExcel.excelRange.get_Value(Excel.XlRangeValueDataType.xlRangeValueDefault); // Everything in UsedRange
                var constantCells = thisExcel.excelRange.SpecialCells(Excel.XlCellType.xlCellTypeConstants, Type.Missing); // Everything with a constant in it in UsedRange

                // Do a little preprossing on that Data
                a.splitUpAddresses = Regex.Split(constantCells.Address, @"(?:\,|\:)");
                a.splitUpAddresses = a.splitUpAddresses.Where(s => !string.IsNullOrWhiteSpace(s)).Distinct().ToArray(); // Create an Array of the Constant Addresses in the Order The Were Found
                var count = a.splitUpAddresses.Length;

                string[] tempArr = new string[count];
                Array.Copy(a.splitUpAddresses, tempArr, count); // Create a Second Array that is Sorted
                Array.Sort(tempArr);                            // Create a Second Array that is Sorted
                string[] firstAddress = Regex.Split(tempArr[0], @"(?:\$)");
                int      x            = 0;

                while (firstAddress[1].Length != 1)
                {
                    firstAddress = Regex.Split(tempArr[x], @"(?:\$)");
                    x++;
                }
                a.startCol   = firstAddress[1]; // Get Column of First Address, in case it isn't A
                firstAddress = Regex.Split(a.splitUpAddresses[0], @"(?:\$)");
                a.startRow   = firstAddress[2]; // Get Row of First Address, in case it isn't 1

                return(a);
            }
 public static object[,] getTrainData(DirectoryInfo di, string fileLocation)
 {
     fileLocation = di + fileLocation;
     EL.singleExcel wordFile = new EL.singleExcel().createExcel(fileLocation);
     EL.singleExcel.ExcelWorkSheetChange(wordFile, 1);
     object[,] theWords;
     if (wordFile.excelRange.Count == 1)
     {
         string temp           = (string)wordFile.excelRange.get_Value(Excel.XlRangeValueDataType.xlRangeValueDefault);
         int[]  myLengthsArray = new int[2] {
             1, 1
         };
         int[] myBoundsArray = new int[2] {
             1, 1
         };
         theWords       = (object[, ])Array.CreateInstance(typeof(String), myLengthsArray, myBoundsArray);
         theWords[1, 1] = temp;
     }
     else
     {
         theWords = (object[, ])wordFile.excelRange.get_Value(Excel.XlRangeValueDataType.xlRangeValueDefault);
     }
     EL.singleExcel.CloseSheet(wordFile);
     return(theWords);
 }
Ejemplo n.º 3
0
        static void Main(string[] args)
        {
            REngine.SetEnvironmentVariables();
            REngine RBlock = REngine.GetInstance();

            DirectoryInfo di            = new DirectoryInfo("C:\\Users\\" + Environment.UserName + "\\Documents\\File Attachments\\");
            string        searchPattern = "*";

            // Set some bases. These need to be tested for each sample.
            int avgRowCount = 100;
            int avgColCount = 20;
            int fileCount   = 0;

            // Get our files, Create a library to hold everything
            Library.ListofFiles = Library.makeList(di, searchPattern);

            // Look Through Each File
            foreach (var a in Library.ListofFiles)
            {
                Console.WriteLine(a.FileName);
                string fileLocation = di + a.FileName;
                // Open The File, Set the Page
                EL.singleExcel thisExcel = new EL.singleExcel().createExcel(fileLocation);
                EL.singleExcel.ExcelWorkSheetChange(thisExcel, 1);

                // Get Basic Vars
                new EDA.excelBasics().basicVars(a, thisExcel);
                // Run Series of Operations to Get More Exact Bounds

                EDA.lookTriggers LT = new EDA.lookTriggers();
                LT.runTriggers(fileCount, a, avgRowCount, avgColCount, out avgRowCount, out avgColCount);

                Library.colCounts.Add(a.colCount);
                Library.rowCounts.Add(a.rowCount);

                // In the event that no rows are found, the file is not counted
                if (a.rowCount != 0)
                {
                    fileCount++;
                }

                EL.singleExcel.CloseSheet(thisExcel);
                //Console.ReadLine();
                Console.WriteLine("\n");
            }

            // Run some R analysis
            IntegerVector rowR = RBlock.CreateIntegerVector(Library.rowCounts);
            IntegerVector colR = RBlock.CreateIntegerVector(Library.colCounts);

            RBlock.SetSymbol("rowR", rowR);
            RBlock.SetSymbol("colR", colR);

            int[] thisTemp;
            RBlock.Evaluate("temp <- table(as.vector(rowR))");
            thisTemp = RBlock.Evaluate("names(temp)[temp == max(temp)]").AsInteger().ToArray();
            Library.groupStats.modeRow = thisTemp[0];
            RBlock.Evaluate("temp <- table(as.vector(colR))");
            thisTemp = RBlock.Evaluate("names(temp)[temp == max(temp)]").AsInteger().ToArray();
            Library.groupStats.modeCol = thisTemp[0];
            thisTemp = RBlock.Evaluate("mean(rowR)").AsInteger().ToArray();
            Library.groupStats.meanRow = thisTemp[0];
            thisTemp = RBlock.Evaluate("mean(colR)").AsInteger().ToArray();
            Library.groupStats.meanCol = thisTemp[0];
            thisTemp = RBlock.Evaluate("median(rowR)").AsInteger().ToArray();
            Library.groupStats.medianRow = thisTemp[0];
            thisTemp = RBlock.Evaluate("median(colR)").AsInteger().ToArray();
            Library.groupStats.medianCol = thisTemp[0];
            thisTemp = RBlock.Evaluate("min(rowR)").AsInteger().ToArray();
            Library.groupStats.minRow = thisTemp[0];
            thisTemp = RBlock.Evaluate("min(colR)").AsInteger().ToArray();
            Library.groupStats.minCol = thisTemp[0];
            thisTemp = RBlock.Evaluate("max(rowR)").AsInteger().ToArray();
            Library.groupStats.maxRow = thisTemp[0];
            thisTemp = RBlock.Evaluate("max(colR)").AsInteger().ToArray();
            Library.groupStats.maxCol = thisTemp[0];
            thisTemp = RBlock.Evaluate("IQR(rowR)").AsInteger().ToArray();
            Library.groupStats.iqrRow = thisTemp[0];
            thisTemp = RBlock.Evaluate("IQR(colR)").AsInteger().ToArray();
            Library.groupStats.iqrCol = thisTemp[0];
            thisTemp = RBlock.Evaluate("quantile(rowR)").AsInteger().ToArray();
            Library.groupStats.quantileRow = thisTemp[0];
            thisTemp = RBlock.Evaluate("quantile(colR)").AsInteger().ToArray();
            Library.groupStats.quantileCol = thisTemp[0];

            RBlock.Dispose();

            EL.singleExcel.outputObjectToExcel(Library.groupStats);

            // Build some training data from previous information, assumptions
            object[,] theWords = TSR.trainMethods.getTrainData(di, "\\testdata\\trainlist.xlsx");
            List <string> words = new List <string>();

            TSR.trainMethods.makeTrainList(theWords, words);

            theWords = TSR.trainMethods.getTrainData(di, "\\testdata\\looselist.xlsx");
            List <string> looseWords = new List <string>();

            TSR.trainMethods.makeTrainList(theWords, looseWords);

            Dictionary <string, int> discreteTermsDict  = new Dictionary <string, int>();
            Dictionary <string, int> betterTermsDict    = new Dictionary <string, int>();
            Dictionary <string, int> secondaryTermsDict = new Dictionary <string, int>();

            // Run training scenarios
            TSR.trainMethods.runTrainScan(words, looseWords, avgColCount, Library.ListofFiles, out discreteTermsDict, out betterTermsDict, out secondaryTermsDict);
            TSR.trainMethods.buildTrainList(discreteTermsDict, words);

            TSR.trainMethods.runTrainScan(words, looseWords, avgColCount, Library.ListofFiles, out discreteTermsDict, out betterTermsDict, out secondaryTermsDict);
            TSR.trainMethods.buildTrainList(betterTermsDict, looseWords);
            TSR.trainMethods.buildTrainList(secondaryTermsDict, looseWords);

            // In tests, three cycles make for extremely high confidence
            TSR.trainMethods.runTrainScan(words, looseWords, avgColCount, Library.ListofFiles, out discreteTermsDict, out betterTermsDict, out secondaryTermsDict);
            //  Terms for classifcation
            EL.singleExcel.outputListToExcel(words, "strongTrainList");
            EL.singleExcel.outputListToExcel(looseWords, "learnedTrainList");
        }