public PivotTable GetSparseSimilarites(PivotTableEntry baseVector, PivotTable vectors, bool logarithm, bool onlyBase) { this.pbarUpdate(vectors.Count, 0, 0); PivotTable outMap = new PivotTable(); int i = 0; foreach (PivotTableEntry b in vectors) { PivotTableAnalysisResult similarity = GetSparseSimilarity(baseVector, b, logarithm, onlyBase); similarity.Data.Add("cos_sim", similarity.prob); Dictionary <String, double> diffData = CalculateDiffs(baseVector, b); foreach (String key in diffData.Keys) { if (!similarity.Data.ContainsKey(key)) { similarity.Data.Add(key, diffData[key]); } } outMap.Add(similarity); this.pbarValueUpdate(i); i++; } return(outMap); }
public PivotTable GenerateAverageVector(PivotTable tableWithMoreThanOneRow) { if (tableWithMoreThanOneRow.Count < 2) { throw new Exception("Must Have more than one row"); } Dictionary <String, List <Double> > data = new Dictionary <String, List <Double> >(); //consolidate all the values for each column of each entry in the pivot table foreach (PivotTableEntry entry in tableWithMoreThanOneRow) { foreach (String key in entry.Data.Keys) { if (data.ContainsKey(key)) { data[key].Add(entry.Data[key]); } else { List <Double> newList = new List <Double>(); newList.Add(entry.Data[key]); data.Add(key, newList); } } } //now average each and produce a new data dictionary Dictionary <String, Double> averages = new Dictionary <String, Double>(); foreach (String key in data.Keys) { double avg = 0d; foreach (Double val in data[key]) { avg += val; } avg = avg / data[key].Count; averages.Add(key, avg); } PivotTableEntry newEntry = new PivotTableEntry() { Data = averages, Context = tableWithMoreThanOneRow[0].Context, RowKey = tableWithMoreThanOneRow[0].RowKey }; PivotTable pt = new PivotTable(); pt.Add(newEntry); return(pt); }
/// <summary> /// Converts a csv file to a pivot table. /// </summary> /// <param name="file"></param> /// <param name="rowkeyColumnName"></param> /// <returns></returns> public PivotTable FileToPivotTable(String file, string rowkeyColumnName) { string[] fields = null; PivotTable vectors = new PivotTable(); using (var reader = System.IO.File.OpenText(file)) { String line = null; int i = 0; while ((line = reader.ReadLine()) != null) { if (i == 0) { fields = Regex.Split(line, ","); i++; continue; } Dictionary <string, double> vector = new Dictionary <string, double>(); string[] vals = Regex.Split(line, ","); String geohash = ""; for (int x = 0; x < fields.Length; x++) { // Console.WriteLine(fields[x]); string key = fields[x]; String valStr = vals[x]; if (Regex.IsMatch(key, "(SHAPE_Length|SHAPE_Area)", RegexOptions.IgnoreCase)) { continue; } if (Regex.IsMatch(key, rowkeyColumnName, RegexOptions.IgnoreCase)) { geohash = vals[x]; continue; } double val = Double.Parse(vals[x]); vector.Add(key, val); } vectors.Add(new PivotTableEntry() { Data = new Dictionary <string, double>(vector), RowKey = geohash }); //Console.WriteLine(line); i++; } } return(vectors); }
public PivotTable flattenAndSimplify(PivotTable withMultipleCompares, String pivCol) { PivotTable output = new PivotTable(); Dictionary <String, Dictionary <String, Double> > outMap = new Dictionary <String, Dictionary <String, Double> >(); foreach (PivotTableEntry pte in withMultipleCompares) { if (outMap.ContainsKey(pte.RowKey)) { if (pte.Data.ContainsKey(pivCol)) { outMap[pte.RowKey].Add(pte.Label, pte.Data[pivCol]); } else { outMap[pte.RowKey].Add(pte.Label, 0d); } } else { Dictionary <String, double> newMap = new Dictionary <string, double>(); if (pte.Data.ContainsKey(pivCol)) { newMap.Add(pte.Label, pte.Data[pivCol]); outMap.Add(pte.RowKey, newMap); } // else { // newMap.Add(pte.Label, 0d); // outMap.Add(pte.RowKey, newMap); //} } } foreach (String key in outMap.Keys) { output.Add(new PivotTableEntry() { RowKey = key, Data = outMap[key] }); } return(output); }
public PivotTable FeaturesToPivotTable( List<IFeature> layers, string rowKeyColName, List<string> columnsToIgnore) { SendAnInt sai = this.UpdatePBar; this.pbarChangeDet.Minimum = 0; this.pbarChangeDet.Maximum = layers.Count; this.pbarChangeDet.Value = 0; if (columnsToIgnore == null) { columnsToIgnore = new List<string>(); } if (!columnsToIgnore.Contains("OBJECTID")) { columnsToIgnore.Add("OBJECTID"); } var pt = new PivotTable(); // IFeature feature = featureCursor.NextFeature(); // loop through the returned features and get the value for the field var x = 0; foreach (var feature in layers) { var entry = new PivotTableEntry(); //do something with each feature(ie update geometry or attribute) // Console.WriteLine("The {0} field contains a value of {1}", nameOfField, feature.get_Value(fieldIndexValue)); this.pbarChangeDet.Value++; sai.Invoke(x); x++; for (var i = 0; i < feature.Fields.FieldCount; i++) { if (this.pbarChangeDet.Value == this.pbarChangeDet.Maximum) { this.pbarChangeDet.Maximum = this.pbarChangeDet.Maximum + 10; } var fname = feature.Fields.get_Field(i).Name; var val = feature.get_Value(i).ToString(); if (columnsToIgnore.Contains(fname)) { continue; } if (fname.Equals(rowKeyColName)) { entry.RowKey = Convert.ToString(val); } else { try { entry.Data.Add(fname, int.Parse(val)); } catch { } } } pt.Add(entry); } sai.Invoke(Convert.ToInt32(this.pbarChangeDet.Maximum)); return pt; }
/// <summary> /// Compares two pivot tables. Do not pass in columns that don't make sense to compare. This method encapsulates a cosine similarity /// calculation on geohash cell pairs, and subsequently, each pair also calculates a diff between each col pair as a quasi percentage diff. /// /// </summary> /// <param name="timeA"></param> /// <param name="timeB"></param>A PivotTable that is full /// <returns></returns> public PivotTable DetectChange(PivotTable ptA, PivotTable ptB, string label, bool diffs) { PivotTable outList = new PivotTable(); //each dictionary below is a geohash agg layer, key=aGeoHashPrefix,value=anAggVectorOfThatBox Dictionary <string, PivotTableEntry> a = new Dictionary <string, PivotTableEntry>(); Dictionary <string, PivotTableEntry> b = new Dictionary <string, PivotTableEntry>(); HashSet <string> hashset = new HashSet <string>(); //union the key sets into hashset variable foreach (PivotTableEntry av in ptA) { a.Add(av.RowKey, av); hashset.Add(av.RowKey); } foreach (PivotTableEntry av in ptB) { b.Add(av.RowKey, av); hashset.Add(av.RowKey); } this.pbarUpdate.Invoke(hashset.Count, 0, 0); //now hashset variable is a unique list of strings Dictionary <string, double> empty = new Dictionary <string, double>(); foreach (String s in hashset) { empty.Add(s, 0d); } int x = 0; foreach (string geohash in hashset) { this.pbarValueUpdate.Invoke(x); x++; PivotTableEntry ava = null; PivotTableEntry avb = null; if (a.ContainsKey(geohash)) { ava = a[geohash]; } if (b.ContainsKey(geohash)) { avb = b[geohash]; } if (ava == null || avb == null) { outList.Add(new PivotTableAnalysisResult() { RowKey = geohash, prob = 0d, Data = empty, Label = label }); } else { PivotTableAnalysisResult p = GetSparseSimilarity(ava, avb, true, false); p.RowKey = geohash; p.Label = label; if (diffs) { p.Data = CalculateDiffs(ava, avb); } else { p.Data = new Dictionary <string, double>(); } p.Data.Add("cos_sim", p.prob); p.Data.Add("percent_change", Math.Abs(p.prob - 1) * 100); outList.Add(p); } } return(outList); }
public PivotTable FeatureLayerToPivotTable( IFeatureLayer layer, string rowKeyColName, List<string> columnsToIgnore) { SendAnInt sai = this.UpdatePBar; this.analysisProgressBar.Minimum = 0; this.analysisProgressBar.Maximum = layer.FeatureClass.FeatureCount(null); this.analysisProgressBar.Value = 0; if (columnsToIgnore == null) { columnsToIgnore = new List<string>(); } if (!columnsToIgnore.Contains("OBJECTID")) { columnsToIgnore.Add("OBJECTID"); } var pt = new PivotTable(); if (PivotTableCache.Cache.ContainsKey(layer.Name)) { pt = PivotTableCache.Cache[layer.Name]; return pt; } var featureCursor = layer.FeatureClass.Search(null, false); var feature = featureCursor.NextFeature(); // loop through the returned features and get the value for the field var x = 0; while (feature != null) { var entry = new PivotTableEntry(); //do something with each feature(ie update geometry or attribute) this.analysisProgressBar.Value++; sai.Invoke(x); x++; for (var i = 0; i < feature.Fields.FieldCount; i++) { if (this.analysisProgressBar.Value == this.analysisProgressBar.Maximum) { this.analysisProgressBar.Maximum = this.analysisProgressBar.Maximum + 10; } var f = feature.Fields.get_Field(i).Name; var val = feature.get_Value(i).ToString(); if (columnsToIgnore.Contains(f)) { continue; } if (f.Equals(rowKeyColName)) { entry.RowKey = Convert.ToString(val); } else { try { entry.Data.Add(f, int.Parse(val)); } catch { } } } pt.Add(entry); feature = featureCursor.NextFeature(); } sai.Invoke(Convert.ToInt32(this.analysisProgressBar.Maximum)); //add to the cache if (!PivotTableCache.Cache.ContainsKey(layer.Name)) { PivotTableCache.Cache.Add(layer.Name, pt); } return pt; }
/// <summary> /// Compares two pivot tables. Do not pass in columns that don't make sense to compare. This method encapsulates a cosine similarity /// calculation on geohash cell pairs, and subsequently, each pair also calculates a diff between each col pair as a quasi percentage diff. /// /// </summary> /// <param name="timeA"></param> /// <param name="timeB"></param>A PivotTable that is full /// <returns></returns> public PivotTable DetectChange(PivotTable ptA, PivotTable ptB, string label, bool diffs) { PivotTable outList = new PivotTable(); //each dictionary below is a geohash agg layer, key=aGeoHashPrefix,value=anAggVectorOfThatBox Dictionary<string, PivotTableEntry> a = new Dictionary<string, PivotTableEntry>(); Dictionary<string, PivotTableEntry> b = new Dictionary<string, PivotTableEntry>(); HashSet<string> hashset = new HashSet<string>(); //union the key sets into hashset variable foreach (PivotTableEntry av in ptA) { a.Add(av.RowKey, av); hashset.Add(av.RowKey); } foreach (PivotTableEntry av in ptB) { b.Add(av.RowKey, av); hashset.Add(av.RowKey); } this.pbarUpdate.Invoke(hashset.Count, 0, 0); //now hashset variable is a unique list of strings Dictionary<string,double> empty = new Dictionary<string, double>(); foreach(String s in hashset){ empty.Add(s, 0d); } int x =0; foreach (string geohash in hashset) { this.pbarValueUpdate.Invoke(x); x++; PivotTableEntry ava = null; PivotTableEntry avb = null; if (a.ContainsKey(geohash)) { ava = a[geohash]; } if (b.ContainsKey(geohash)) { avb = b[geohash]; } if (ava == null || avb == null) { outList.Add(new PivotTableAnalysisResult() { RowKey = geohash, prob = 0d, Data = empty, Label = label }); } else { PivotTableAnalysisResult p = GetSparseSimilarity(ava, avb, true, false); p.RowKey = geohash; p.Label = label; if (diffs) { p.Data = CalculateDiffs(ava, avb); } else { p.Data = new Dictionary<string, double>(); } p.Data.Add("cos_sim", p.prob); p.Data.Add("percent_change", Math.Abs(p.prob - 1) * 100); outList.Add(p); } } return outList; }
public PivotTable GetSparseSimilarites(PivotTableEntry baseVector, PivotTable vectors, bool logarithm, bool onlyBase) { this.pbarUpdate(vectors.Count, 0, 0); PivotTable outMap = new PivotTable(); int i = 0; foreach (PivotTableEntry b in vectors) { PivotTableAnalysisResult similarity = GetSparseSimilarity(baseVector, b, logarithm, onlyBase); similarity.Data.Add("cos_sim", similarity.prob); Dictionary<String, double> diffData = CalculateDiffs(baseVector, b); foreach (String key in diffData.Keys) { if (!similarity.Data.ContainsKey(key)) { similarity.Data.Add(key, diffData[key]); } } outMap.Add(similarity); this.pbarValueUpdate(i); i++; } return outMap; }
public PivotTable GenerateAverageVector(PivotTable tableWithMoreThanOneRow) { if (tableWithMoreThanOneRow.Count < 2) { throw new Exception("Must Have more than one row"); } Dictionary<String, List<Double>> data = new Dictionary<String, List<Double>>(); //consolidate all the values for each column of each entry in the pivot table foreach (PivotTableEntry entry in tableWithMoreThanOneRow) { foreach (String key in entry.Data.Keys) { if (data.ContainsKey(key)) { data[key].Add(entry.Data[key]); } else { List<Double> newList = new List<Double>(); newList.Add(entry.Data[key]); data.Add(key,newList); } } } //now average each and produce a new data dictionary Dictionary<String, Double> averages = new Dictionary<String, Double>(); foreach (String key in data.Keys) { double avg = 0d; foreach (Double val in data[key]) { avg += val; } avg = avg / data[key].Count; averages.Add(key, avg); } PivotTableEntry newEntry = new PivotTableEntry() { Data = averages, Context = tableWithMoreThanOneRow[0].Context, RowKey = tableWithMoreThanOneRow[0].RowKey }; PivotTable pt = new PivotTable(); pt.Add(newEntry); return pt; }
public PivotTable flattenAndSimplify(PivotTable withMultipleCompares, String pivCol) { PivotTable output = new PivotTable(); Dictionary<String, Dictionary<String, Double>> outMap = new Dictionary<String, Dictionary<String, Double>>(); foreach (PivotTableEntry pte in withMultipleCompares) { if (outMap.ContainsKey(pte.RowKey)) { if (pte.Data.ContainsKey(pivCol)) { outMap[pte.RowKey].Add(pte.Label, pte.Data[pivCol]); } else { outMap[pte.RowKey].Add(pte.Label, 0d); } } else { Dictionary<String, double> newMap = new Dictionary<string, double>(); if (pte.Data.ContainsKey(pivCol)) { newMap.Add(pte.Label, pte.Data[pivCol]); outMap.Add(pte.RowKey, newMap); } // else { // newMap.Add(pte.Label, 0d); // outMap.Add(pte.RowKey, newMap); //} } } foreach (String key in outMap.Keys) { output.Add(new PivotTableEntry() { RowKey = key, Data = outMap[key] }); } return output; }
/// <summary> /// Converts a csv file to a pivot table. /// </summary> /// <param name="file"></param> /// <param name="rowkeyColumnName"></param> /// <returns></returns> public PivotTable FileToPivotTable(String file, string rowkeyColumnName) { string[] fields = null; PivotTable vectors = new PivotTable(); using (var reader = System.IO.File.OpenText(file)) { String line = null; int i = 0; while ((line = reader.ReadLine()) != null) { if (i == 0) { fields = Regex.Split(line, ","); i++; continue; } Dictionary<string, double> vector = new Dictionary<string, double>(); string[] vals = Regex.Split(line, ","); String geohash = ""; for (int x = 0; x < fields.Length; x++) { // Console.WriteLine(fields[x]); string key = fields[x]; String valStr = vals[x]; if (Regex.IsMatch(key, "(SHAPE_Length|SHAPE_Area)", RegexOptions.IgnoreCase)) { continue; } if (Regex.IsMatch(key, rowkeyColumnName, RegexOptions.IgnoreCase)) { geohash = vals[x]; continue; } double val = Double.Parse(vals[x]); vector.Add(key, val); } vectors.Add(new PivotTableEntry() { Data = new Dictionary<string, double>(vector), RowKey = geohash }); //Console.WriteLine(line); i++; } } return vectors; }