예제 #1
0
        public HashSet <AST.Address> getFormulaSingleCellInputs(AST.Address addr)
        {
            if (!InFormulaBox(addr))
            {
                return(new HashSet <AST.Address>());
            }

            var d      = FormulaAddressToReference(addr);
            var output = new HashSet <AST.Address>();

            if (d.OnSheet)
            {
                var key = new Tuple <int, int>(d.Row, d.Col);

                // formulas of the form =1 + 1 are indeed formulas,
                // but they have no references, so don't bother looking
                // in this case.
                if (_referenceTable.ContainsKey(key))
                {
                    foreach (Reference d2 in _referenceTable[key])
                    {
                        var addr2 = ValueReferenceToAddress(d2.Row, d2.Col);
                        output.Add(addr2);
                    }
                }
            }

            return(output);
        }
예제 #2
0
        public string UploadImageToS3(AST.Address addr, Bitmap b, string secret)
        {
            // convert Bitmap to MemoryStream
            MemoryStream stream = new MemoryStream();

            b.Save(stream, System.Drawing.Imaging.ImageFormat.Png);

            // the image name is the md
            var imagename = GetImageName(addr);

            // the url to the bitmap
            string url;

            // upload MemoryStream to S3
            using (AmazonS3 client = Amazon.AWSClientFactory.CreateAmazonS3Client(_id, secret))
            {
                // generate url
                GetPreSignedUrlRequest request = new GetPreSignedUrlRequest()
                {
                    BucketName = _s3bucket,
                    Key        = imagename,
                    Verb       = HttpVerb.GET,
                    Expires    = DateTime.Now.AddMonths(24)
                };
                url = client.GetPreSignedURL(request);

                // upload image
                var tu = new Amazon.S3.Transfer.TransferUtility(client);
                tu.Upload(stream, _s3bucket, imagename);
            }

            return(url);
        }
예제 #3
0
 public void AddInput(AST.Address addr, string input)
 {
     if (!_inputs.ContainsKey(addr))
     {
         _inputs.Add(addr, input);
     }
 }
예제 #4
0
 private bool InValueBox(AST.Address addr)
 {
     return(addr.Row >= _value_box_top &&
            addr.Row <= _value_box_bottom &&
            addr.Col >= _value_box_left &&
            addr.Col <= _value_box_right);
 }
예제 #5
0
        public string GetImageName(AST.Address addr)
        {
            // calculate MD5
            MD5 md5 = MD5.Create();

            byte[] bytes = System.Text.Encoding.ASCII.GetBytes(addr.A1FullyQualified());
            byte[] hash  = md5.ComputeHash(bytes);

            StringBuilder sb = new StringBuilder();

            // prepend the job state ID
            sb.AppendFormat("{0}", _jobstate.ToString());

            // convert byte array to hex string

            for (int i = 0; i < hash.Length; i++)
            {
                sb.Append(hash[i].ToString("X2"));
            }

            // stick a ".png" on the end
            sb.Append(".png");

            // url encode it
            return(System.Web.HttpUtility.UrlEncode(sb.ToString()));
        }
예제 #6
0
파일: Analysis.cs 프로젝트: sjas/DataDebug
        private static int PropagateNodeWeight(AST.Address node, DAG dag)
        {
            // if the node is a formula, recursively
            // compute its weight
            if (dag.isFormula(node))
            {
                // get input nodes
                var vector_rngs = dag.getFormulaInputVectors(node);
                var scinputs    = dag.getFormulaSingleCellInputs(node);
                var inputs      = vector_rngs.SelectMany(vrng => vrng.Addresses()).ToList();
                inputs.AddRange(scinputs);

                // call recursively and sum components
                var weight = 0;
                foreach (var input in inputs)
                {
                    weight += PropagateNodeWeight(input, dag);
                }
                dag.setWeight(node, weight);
                return(weight);
            }
            // node is an input
            else
            {
                dag.setWeight(node, 1);
                return(1);
            }
        }
예제 #7
0
 public LogEntry(AnalysisType procedure,
     string filename,
     AST.Address address,
     string original_value,
     string erroneous_value,
     double output_error_magnitude,
     double num_input_error_magnitude,
     double str_input_error_magnitude,
     bool was_flagged,
     bool was_error,
     double significance,
     double threshold)
 {
     _filename = filename;
     _procedure = procedure;
     _address = address;
     _original_value = original_value;
     _erroneous_value = erroneous_value;
     _output_error_magnitude = output_error_magnitude;
     _num_input_error_magnitude = num_input_error_magnitude;
     _str_input_error_magnitude = str_input_error_magnitude;
     _was_flagged = was_flagged;
     _was_error = was_error;
     _significance = significance;
     _threshold = threshold;
 }
예제 #8
0
파일: DAG.cs 프로젝트: sjas/DataDebug
        private bool traversalHasLoop(AST.Address current_addr, Dictionary <AST.Address, AST.Address> visited, AST.Address from_addr)
        {
            // base case 1: loop check
            if (visited.ContainsKey(current_addr))
            {
                return(true);
            }
            // base case 2: an input cell
            if (!_formulas.ContainsKey(current_addr))
            {
                return(false);
            }
            // recursive case (it's a formula)
            // check both single inputs and the inputs of any vector inputs
            bool OK = true;
            HashSet <AST.Address> single_inputs = _f2i[current_addr];
            HashSet <AST.Address> vector_inputs = new HashSet <AST.Address>(_f2v[current_addr].SelectMany(addrs => addrs.Addresses()));

            foreach (AST.Address input_addr in vector_inputs.Union(single_inputs))
            {
                if (OK)
                {
                    // new dict to mark visit
                    var visited2 = new Dictionary <AST.Address, AST.Address>(visited);
                    // mark visit
                    visited2.Add(current_addr, from_addr);
                    // recurse
                    OK = OK && !traversalHasLoop(input_addr, visited2, from_addr);
                }
            }
            return(!OK);
        }
예제 #9
0
 private bool InFormulaBox(AST.Address addr)
 {
     return(addr.Row >= _formula_box_top &&
            addr.Row <= _formula_box_bottom &&
            addr.Col >= _formula_box_left &&
            addr.Col <= _formula_box_right);
 }
예제 #10
0
 public LogEntry(AnalysisType procedure,
                 string filename,
                 AST.Address address,
                 string original_value,
                 string erroneous_value,
                 double output_error_magnitude,
                 double num_input_error_magnitude,
                 double str_input_error_magnitude,
                 bool was_flagged,
                 bool was_error,
                 double significance,
                 double threshold)
 {
     _filename                  = filename;
     _procedure                 = procedure;
     _address                   = address;
     _original_value            = original_value;
     _erroneous_value           = erroneous_value;
     _output_error_magnitude    = output_error_magnitude;
     _num_input_error_magnitude = num_input_error_magnitude;
     _str_input_error_magnitude = str_input_error_magnitude;
     _was_flagged               = was_flagged;
     _was_error                 = was_error;
     _significance              = significance;
     _threshold                 = threshold;
 }
예제 #11
0
        private static ExpressionTools.EData inlineExpression(AST.Address addr, Depends.DAG graph, MemoDBOpt memodb)
        {
            // get top-level AST
            var ast = graph.getASTofFormulaAt(addr);

            // merge subtrees
            return ExpressionTools.flattenExpression(ast, graph, memodb);
        }
예제 #12
0
        public string getFormulaAtAddress(AST.Address addr)
        {
            if (_worksheet_names_indices.ContainsKey(addr.WorksheetName))
            {
                return(_worksheet_graphs[_worksheet_names_indices[addr.WorksheetName]].getFormulaAtAddress(addr));
            }

            return(null);
        }
예제 #13
0
        public bool isFormula(AST.Address addr)
        {
            if (_worksheet_names_indices.ContainsKey(addr.WorksheetName))
            {
                return(_worksheet_graphs[_worksheet_names_indices[addr.WorksheetName]].isFormula(addr));
            }

            return(false);
        }
예제 #14
0
        // Get dictionary of inputs and the error they produce
        public static CellDict GenImportantErrors(AST.Address[] output_nodes,
                                                  CellDict inputs,
                                                  int k,         // number of alternatives to consider
                                                  CellDict correct_outputs,
                                                  Excel.Application app,
                                                  Excel.Workbook wb,
                                                  Classification c,
                                                  DAG dag)
        {
            var eg = new ErrorGenerator();
            var max_error_produced_dictionary = new Dictionary <AST.Address, Tuple <string, double> >();

            foreach (KeyValuePair <AST.Address, string> pair in inputs)
            {
                AST.Address addr       = pair.Key;
                string      orig_value = pair.Value;

                //Load in the classification's dictionaries
                double max_error_produced = 0.0;
                string max_error_string   = "";

                // get k strings
                string[] errorstrings = eg.GenerateErrorStrings(orig_value, c, k);

                for (int i = 0; i < k; i++)
                {
                    CellDict cd = new CellDict();
                    cd.Add(addr, errorstrings[i]);
                    //inject the typo
                    InjectValues(app, wb, cd);

                    // save function outputs
                    CellDict incorrect_outputs = SaveOutputs(output_nodes, dag);

                    //remove the typo that was introduced
                    cd.Clear();
                    cd.Add(addr, orig_value);
                    InjectValues(app, wb, cd);

                    double total_error = Utility.CalculateTotalError(correct_outputs, incorrect_outputs);

                    //keep track of the largest observed max error
                    if (total_error > max_error_produced)
                    {
                        max_error_produced = total_error;
                        max_error_string   = errorstrings[i];
                    }
                }
                //Add entry for this TreeNode in our dictionary with its max_error_produced
                max_error_produced_dictionary.Add(addr, new Tuple <string, double>(max_error_string, max_error_produced));
            }

            // sort by max_error_produced
            var maxen = max_error_produced_dictionary.OrderByDescending(pair => pair.Value.Item2).Select(pair => new Tuple <AST.Address, string>(pair.Key, pair.Value.Item1)).ToList();

            return(maxen.Take((int)Math.Ceiling(0.05 * inputs.Count)).ToDictionary(tup => tup.Item1, tup => tup.Item2));
        }
예제 #15
0
파일: Analysis.cs 프로젝트: sjas/DataDebug
        public static TreeScore NumericHypothesisTest(DAG dag, AST.Range rangeNode, AST.Address functionNode, FunctionOutput <string>[] boots, string initial_output, bool weighted, double significance)
        {
            // this function's input cells
            var input_cells = rangeNode.Addresses();

            var inputs_sz = input_cells.Count();

            // scores
            var input_exclusion_scores = new TreeScore();

            // convert to numeric
            var numeric_boots = ConvertToNumericOutput(boots);

            // sort
            var sorted_num_boots = SortBootstraps(numeric_boots);

            // for each excluded index, test whether the original input
            // falls outside our bootstrap confidence bounds
            for (int i = 0; i < inputs_sz; i++)
            {
                // default weight
                int weight = 1;

                // add weight to score if test fails
                AST.Address xtree = input_cells[i];
                if (weighted)
                {
                    // the weight of the function value of interest
                    weight = dag.getWeight(functionNode);
                }

                double outlieriness = RejectNullHypothesis(sorted_num_boots, initial_output, i, significance);

                if (outlieriness != 0.0)
                {
                    // get the xth indexed input in input_rng i
                    if (input_exclusion_scores.ContainsKey(xtree))
                    {
                        input_exclusion_scores[xtree] += (int)(weight * outlieriness);
                    }
                    else
                    {
                        input_exclusion_scores.Add(xtree, (int)(weight * outlieriness));
                    }
                }
                else
                {
                    // we need to at least add the value to the tree
                    if (!input_exclusion_scores.ContainsKey(xtree))
                    {
                        input_exclusion_scores.Add(xtree, 0);
                    }
                }
            }
            return(input_exclusion_scores);
        }
예제 #16
0
 public CellColor restoreColorAt(AST.Address addr)
 {
     if (_d.ContainsKey(addr))
     {
         return(_d[addr]);
     }
     else
     {
         return(transparent);
     }
 }
예제 #17
0
파일: DAG.cs 프로젝트: sjas/DataDebug
 public void setWeight(AST.Address node, int weight)
 {
     if (!_weights.ContainsKey(node))
     {
         _weights.Add(node, weight);
     }
     else
     {
         _weights[node] = weight;
     }
 }
예제 #18
0
 public BugAnnotation AnnotationFor(AST.Address addr)
 {
     if (_bugs.ContainsKey(addr))
     {
         return(new BugAnnotation(_bugs[addr], _notes[addr]));
     }
     else
     {
         return(new BugAnnotation(BugKind.NotABug, ""));
     }
 }
예제 #19
0
 public bool AddressHasADual(AST.Address addr)
 {
     if (_bugclass_lookup.ContainsKey(addr))
     {
         var bugclass = _bugclass_lookup[addr];
         return(_bugclass_dual_lookup.ContainsKey(bugclass));
     }
     else
     {
         return(false);
     }
 }
예제 #20
0
        public void paintRed(AST.Address cell, double intensity)
        {
            // generate color
            byte A = System.Drawing.Color.Red.A;
            byte R = System.Drawing.Color.Red.R;
            byte G = Convert.ToByte((1.0 - intensity) * 255);
            byte B = Convert.ToByte((1.0 - intensity) * 255);
            var  c = System.Drawing.Color.FromArgb(A, R, G, B);

            // highlight
            paintColor(cell, c, true);
        }
예제 #21
0
        /**
         * Return a local address without absolute address $s
         */
        public String localAddress(AST.Address addr)
        {
            var addr2 = AST.Address.fromR1C1withMode(
                addr.Row,
                addr.Col,
                AST.AddressMode.Relative,
                AST.AddressMode.Relative,
                addr.WorksheetName,
                addr.WorkbookName, addr.Path);

            return(addr2.A1Local());
        }
예제 #22
0
        private void activateAndCenterOn(AST.Address cell, Excel.Application app)
        {
            // go to worksheet
            RibbonHelper.GetWorksheetByName(cell.A1Worksheet(), _workbook.Worksheets).Activate();

            // COM object
            var comobj = ParcelCOMShim.Address.GetCOMObject(cell, app);

            // if the sheet is hidden, unhide it
            if (comobj.Worksheet.Visible != Excel.XlSheetVisibility.xlSheetVisible)
            {
                comobj.Worksheet.Visible = Excel.XlSheetVisibility.xlSheetVisible;
            }

            // if the cell's row is hidden, unhide it
            if ((bool)comobj.Rows.Hidden)
            {
                comobj.Rows.Hidden = false;
            }

            // if the cell's column is hidden, unhide it
            if ((bool)comobj.Columns.Hidden)
            {
                comobj.Columns.Hidden = false;
            }

            // ensure that the cell is wide enough that we can actually see it
            widenIfNecessary(comobj, app);

            // make sure that the printable area is big enough to show the cell;
            // don't change the printable area if the used range is a single cell
            int ur_width  = comobj.Worksheet.UsedRange.Columns.Count;
            int ur_height = comobj.Worksheet.UsedRange.Rows.Count;

            if (ur_width != 1 || ur_height != 1)
            {
                comobj.Worksheet.PageSetup.PrintArea = comobj.Worksheet.UsedRange.Address;
            }

            // center screen on cell
            var visible_columns = app.ActiveWindow.VisibleRange.Columns.Count;
            var visible_rows    = app.ActiveWindow.VisibleRange.Rows.Count;

            app.Goto(comobj, true);
            app.ActiveWindow.SmallScroll(Type.Missing, visible_rows / 2, Type.Missing, visible_columns / 2);

            // select highlighted cell
            // center on highlighted cell
            comobj.Select();
        }
예제 #23
0
파일: DAG.cs 프로젝트: sjas/DataDebug
        public string readCOMValueAtAddress(AST.Address addr)
        {
            // null values become the empty string
            var s = System.Convert.ToString(this.getCOMRefForAddress(addr).Range.Value2);

            if (s == null)
            {
                return("");
            }
            else
            {
                return(s);
            }
        }
예제 #24
0
 private int NumberOfReferences(AST.Address addr, Graphs graphs)
 {
     if (graphs.isFormula(addr))
     {
         var fexpr        = Parcel.parseFormulaAtAddress(addr, graphs.getFormulaAtAddress(addr));
         var heads_single = Parcel.addrReferencesFromExpr(fexpr);
         var heads_vector = Parcel.rangeReferencesFromExpr(fexpr).SelectMany(rng => rng.Addresses()).ToArray();
         return(heads_single.Length + heads_vector.Length);
     }
     else
     {
         return(0);
     }
 }
예제 #25
0
 public void saveColorAt(AST.Address addr, CellColor c)
 {
     if (c.ColorIndex != 0)
     {
         if (_d.ContainsKey(addr))
         {
             _d[addr] = c;
         }
         else
         {
             _d.Add(addr, c);
         }
     }
 }
예제 #26
0
        private Reference ValueAddressToReference(AST.Address addr)
        {
            Debug.Assert(InValueBox(addr));

            // if it's on-sheet, set flag to true
            bool onSheet = addr.Path == Path &&
                           addr.WorkbookName == Workbook &&
                           addr.WorksheetName == Worksheet;

            // x and y coordinates don't matter for off-sheet formulas
            return(new Reference(
                       onSheet,
                       onSheet ? addr.Row - _value_box_top : 0,
                       onSheet ? addr.Col - _value_box_left : 0));
        }
예제 #27
0
 /// <summary>
 /// Insert or update an annotation for a given cell.
 /// </summary>
 /// <param name="addr"></param>
 /// <param name="annot"></param>
 public void SetAnnotationFor(AST.Address addr, BugAnnotation annot)
 {
     if (_bugs.ContainsKey(addr))
     {
         _bugs[addr]  = annot.BugKind;
         _notes[addr] = annot.Note;
         _changed.Add(addr);
     }
     else
     {
         _bugs.Add(addr, annot.BugKind);
         _notes.Add(addr, annot.Note);
         _added.Add(addr);
     }
 }
예제 #28
0
파일: DAG.cs 프로젝트: sjas/DataDebug
 public void linkSingleCellInput(AST.Address formula_addr, AST.Address input_addr)
 {
     // add address to input_addr-lookup-by-formula_addr dictionary
     // (initialzied in DAG constructor)
     _f2i[formula_addr].Add(input_addr);
     // add formula_addr to faddr-lookup-by-iaddr dictionary,
     // initializing bucket if necessary
     if (!_i2f.ContainsKey(input_addr))
     {
         _i2f.Add(input_addr, new HashSet <AST.Address>());
     }
     if (!_i2f[input_addr].Contains(formula_addr))
     {
         _i2f[input_addr].Add(formula_addr);
     }
 }
예제 #29
0
파일: DAG.cs 프로젝트: sjas/DataDebug
 public void linkInputVector(AST.Address formula_addr, AST.Range vector_rng)
 {
     // add range to range-lookup-by-formula_addr dictionary
     // (initialized in DAG constructor)
     _f2v[formula_addr].Add(vector_rng);
     // add formula_addr to faddr-lookup-by-range dictionary,
     // initializing bucket if necessary
     if (!_v2f.ContainsKey(vector_rng))
     {
         _v2f.Add(vector_rng, new HashSet <AST.Address>());
     }
     if (!_v2f[vector_rng].Contains(formula_addr))
     {
         _v2f[vector_rng].Add(formula_addr);
     }
 }
예제 #30
0
        // Get dictionary of inputs and the error they produce
        public Dictionary <AST.Address, Tuple <string, double> > TopOfKErrors(AST.Address[] terminal_formula_nodes, CellDict inputs, int k, CellDict correct_outputs, Excel.Application app, Excel.Workbook wb, string classification_file, DAG dag)
        {
            var eg = new ErrorGenerator();
            var c  = Classification.Deserialize(classification_file);
            var max_error_produced_dictionary = new Dictionary <AST.Address, Tuple <string, double> >();

            foreach (KeyValuePair <AST.Address, string> pair in inputs)
            {
                AST.Address addr       = pair.Key;
                string      orig_value = pair.Value;

                //Load in the classification's dictionaries
                double max_error_produced = 0.0;
                string max_error_string   = "";

                // get k strings, in parallel
                string[] errorstrings = eg.GenerateErrorStrings(orig_value, c, k);

                for (int i = 0; i < k; i++)
                {
                    CellDict cd = new CellDict();
                    cd.Add(addr, errorstrings[i]);
                    //inject the typo
                    Utility.InjectValues(app, wb, cd);

                    // save function outputs
                    CellDict incorrect_outputs = Utility.SaveOutputs(terminal_formula_nodes, dag);

                    //remove the typo that was introduced
                    cd.Clear();
                    cd.Add(addr, orig_value);
                    Utility.InjectValues(app, wb, cd);

                    double total_error = Utility.CalculateTotalError(correct_outputs, incorrect_outputs);

                    //keep track of the largest observed max error
                    if (total_error > max_error_produced)
                    {
                        max_error_produced = total_error;
                        max_error_string   = errorstrings[i];
                    }
                }
                //Add entry for this TreeNode in our dictionary with its max_error_produced
                max_error_produced_dictionary.Add(addr, new Tuple <string, double>(max_error_string, max_error_produced));
            }
            return(max_error_produced_dictionary);
        }
예제 #31
0
파일: Analysis.cs 프로젝트: sjas/DataDebug
        public static TreeScore StringHypothesisTest(DAG dag, AST.Range rangeNode, AST.Address functionNode, FunctionOutput <string>[] boots, string initial_output, bool weighted, double significance)
        {
            // this function's input cells
            var input_cells = rangeNode.Addresses();

            // scores
            var iexc_scores = new TreeScore();

            var inputs_sz = input_cells.Count();

            // exclude each index, in turn
            for (int i = 0; i < inputs_sz; i++)
            {
                // default weight
                int weight = 1;

                // add weight to score if test fails
                AST.Address xtree = input_cells[i];
                if (weighted)
                {
                    // the weight of the function value of interest
                    weight = dag.getWeight(functionNode);
                }

                if (RejectNullHypothesis(boots, initial_output, i, significance))
                {
                    if (iexc_scores.ContainsKey(xtree))
                    {
                        iexc_scores[xtree] += weight;
                    }
                    else
                    {
                        iexc_scores.Add(xtree, weight);
                    }
                }
                else
                {
                    // we need to at least add the value to the tree
                    if (!iexc_scores.ContainsKey(xtree))
                    {
                        iexc_scores.Add(xtree, 0);
                    }
                }
            }

            return(iexc_scores);
        }
예제 #32
0
        public void Flag()
        {
            //filter known_good
            _flaggable = _flaggable.Where(kvp => !_known_good.Contains(kvp.Key));
            if (_flaggable.Count() != 0)
            {
                // get TreeNode corresponding to most unusual score
                _flagged_cell = _flaggable.First().Key;
            }
            else
            {
                _flagged_cell = null;
            }

            if (_flagged_cell == null)
            {
                System.Windows.Forms.MessageBox.Show("No bugs remain.");
                ResetTool();
            }
            else
            {
                // get cell COM object
                var com = _flagged_cell.GetCOMObject(_app);

                // save old color
                var cc = new CellColor(com.Interior.ColorIndex, com.Interior.Color);
                if (_colors.ContainsKey(_flagged_cell))
                {
                    _colors[_flagged_cell] = cc;
                }
                else
                {
                    _colors.Add(_flagged_cell, cc);
                }

                // highlight cell
                com.Interior.Color = System.Drawing.Color.Red;
                _tool_highlights.Add(_flagged_cell);

                // go to highlighted cell
                ActivateAndCenterOn(_flagged_cell, _app);

                // enable auditing buttons
                SetTool(active: true);
            }
        }
예제 #33
0
        internal void FixError(Action<WorkbookState> setUIState)
        {
            var cell = _flagged_cell.GetCOMObject(_app);
            // this callback gets run when the user clicks "OK"
            System.Action callback = () =>
            {
                // add the cell to the known good list
                _known_good.Add(_flagged_cell);

                // unflag the cell
                _flagged_cell = null;
                try
                {
                    // when a user fixes something, we need to re-run the analysis
                    Analyze(MAX_DURATION_IN_MS);
                    // and flag again
                    Flag();
                    // and then set the UI state
                    setUIState(this);
                }
                catch (ExcelParserUtility.ParseException ex)
                {
                    System.Windows.Forms.Clipboard.SetText(ex.Message);
                    System.Windows.Forms.MessageBox.Show("Could not parse the formula string:\n" + ex.Message);
                    return;
                }
                catch (System.OutOfMemoryException ex)
                {
                    System.Windows.Forms.MessageBox.Show("Insufficient memory to perform analysis.");
                    return;
                }

            };
            // show the form
            var fixform = new CellFixForm(cell, GREEN, callback);
            fixform.Show();

            // restore output colors
            RestoreOutputColors();
        }