Пример #1
0
        public PivotTable GetSparseSimilarites(PivotTableEntry baseVector, PivotTable vectors, bool logarithm, bool onlyBase)
        {
            this.pbarUpdate(vectors.Count, 0, 0);

            PivotTable outMap = new PivotTable();
            int        i      = 0;

            foreach (PivotTableEntry b in vectors)
            {
                PivotTableAnalysisResult similarity = GetSparseSimilarity(baseVector, b, logarithm, onlyBase);
                similarity.Data.Add("cos_sim", similarity.prob);
                Dictionary <String, double> diffData = CalculateDiffs(baseVector, b);
                foreach (String key in diffData.Keys)
                {
                    if (!similarity.Data.ContainsKey(key))
                    {
                        similarity.Data.Add(key, diffData[key]);
                    }
                }
                outMap.Add(similarity);
                this.pbarValueUpdate(i);
                i++;
            }
            return(outMap);
        }
Пример #2
0
        /// <summary>
        /// Returns a PivotTableAnalysisResult with an empty Data dictionary but with a prob
        /// score set to the cosine similarity value of the two input vector spaces.
        /// Look here for more info:  https://upload.wikimedia.org/math/4/e/4/4e45dc7ae582130813e804f793f24ead.png
        ///</summary>
        ///
        /// <param name="a"></param>The base vectors
        /// <param name="b"></param>
        /// <param name="logarithm"></param>
        /// <param name="onlyBase"></param>
        /// <returns></returns>
        public PivotTableAnalysisResult GetSparseSimilarity(PivotTableEntry a, PivotTableEntry b, bool logarithm, bool onlyBase)
        {
            if (a == null || b == null)
            {
                throw new Exception("neither a nor b are allowed to be null");
            }
            PivotTableAnalysisResult prob = new PivotTableAnalysisResult();
            Double           aSoS = 0d, bSoS = 0d, dotProd = 0d;
            HashSet <string> keys = new HashSet <string>();

            foreach (string k in a.Data.Keys)
            {
                if (k != "OBJECTID")
                {
                    keys.Add(k);
                }
            }
            if (!onlyBase)
            {
                foreach (string k in b.Data.Keys)
                {
                    if (k != "OBJECTID")
                    {
                        keys.Add(k);
                    }
                }
            }
            foreach (string key in keys)
            {
                if (key == "OBJECTID")
                {
                    continue;
                }
                double x = 0d;
                double y = 0d;
                if (a.Data.ContainsKey(key))
                {
                    x = a.Data[key];
                }
                if (b.Data.ContainsKey(key))
                {
                    y = b.Data[key];
                }
                if (logarithm)
                {
                    x = Math.Log10(x + 1);
                    y = Math.Log10(y + 1);
                }
                aSoS    += x * x;
                bSoS    += y * y;
                dotProd += x * y;
            }
            if (dotProd == 0)
            {
                return(new PivotTableAnalysisResult()
                {
                    prob = -1d, RowKey = b.RowKey, Context = b.Context
                });
            }
            double div = (Math.Sqrt(aSoS) * Math.Sqrt(bSoS));

            if (div == 0d)
            {
                return(new PivotTableAnalysisResult()
                {
                    prob = -1d, RowKey = b.RowKey, Context = b.Context
                });
            }
            Double similarity = dotProd / div;
            PivotTableAnalysisResult idprob = new PivotTableAnalysisResult()
            {
                prob = similarity, RowKey = b.RowKey, Context = b.Context
            };

            return(idprob);
        }
Пример #3
0
        /// <summary>
        /// Compares two pivot tables. Do not pass in columns that don't make sense to compare. This method encapsulates a cosine similarity
        /// calculation on geohash cell pairs, and subsequently, each pair also calculates a diff between each col pair as a quasi percentage diff.
        ///
        /// </summary>
        /// <param name="timeA"></param>
        /// <param name="timeB"></param>A PivotTable that is full
        /// <returns></returns>
        public PivotTable DetectChange(PivotTable ptA, PivotTable ptB, string label, bool diffs)
        {
            PivotTable outList = new PivotTable();
            //each dictionary below is a geohash agg layer, key=aGeoHashPrefix,value=anAggVectorOfThatBox
            Dictionary <string, PivotTableEntry> a = new Dictionary <string, PivotTableEntry>();
            Dictionary <string, PivotTableEntry> b = new Dictionary <string, PivotTableEntry>();
            HashSet <string> hashset = new HashSet <string>();

            //union the key sets into hashset variable
            foreach (PivotTableEntry av in ptA)
            {
                a.Add(av.RowKey, av);
                hashset.Add(av.RowKey);
            }
            foreach (PivotTableEntry av in ptB)
            {
                b.Add(av.RowKey, av);
                hashset.Add(av.RowKey);
            }

            this.pbarUpdate.Invoke(hashset.Count, 0, 0);
            //now hashset variable is a unique list of strings
            Dictionary <string, double> empty = new Dictionary <string, double>();

            foreach (String s in hashset)
            {
                empty.Add(s, 0d);
            }
            int x = 0;

            foreach (string geohash in hashset)
            {
                this.pbarValueUpdate.Invoke(x);
                x++;
                PivotTableEntry ava = null;
                PivotTableEntry avb = null;
                if (a.ContainsKey(geohash))
                {
                    ava = a[geohash];
                }
                if (b.ContainsKey(geohash))
                {
                    avb = b[geohash];
                }
                if (ava == null || avb == null)
                {
                    outList.Add(new PivotTableAnalysisResult()
                    {
                        RowKey = geohash, prob = 0d, Data = empty, Label = label
                    });
                }
                else
                {
                    PivotTableAnalysisResult p = GetSparseSimilarity(ava, avb, true, false);
                    p.RowKey = geohash;
                    p.Label  = label;
                    if (diffs)
                    {
                        p.Data = CalculateDiffs(ava, avb);
                    }
                    else
                    {
                        p.Data = new Dictionary <string, double>();
                    }
                    p.Data.Add("cos_sim", p.prob);
                    p.Data.Add("percent_change", Math.Abs(p.prob - 1) * 100);
                    outList.Add(p);
                }
            }
            return(outList);
        }
 /// <summary>
 /// Returns a PivotTableAnalysisResult with an empty Data dictionary but with a prob 
 /// score set to the cosine similarity value of the two input vector spaces.
 /// Look here for more info:  https://upload.wikimedia.org/math/4/e/4/4e45dc7ae582130813e804f793f24ead.png
 ///</summary>
 ///
 /// <param name="a"></param>The base vectors
 /// <param name="b"></param>
 /// <param name="logarithm"></param>
 /// <param name="onlyBase"></param>
 /// <returns></returns>
 public PivotTableAnalysisResult GetSparseSimilarity(PivotTableEntry a, PivotTableEntry b, bool logarithm, bool onlyBase)
 {
     if (a == null || b == null) {
     throw new Exception("neither a nor b are allowed to be null");
       }
       PivotTableAnalysisResult prob = new PivotTableAnalysisResult();
       Double aSoS = 0d, bSoS = 0d, dotProd = 0d;
       HashSet<string> keys = new HashSet<string>();
       foreach (string k in a.Data.Keys) {
     if (k != "OBJECTID") {
       keys.Add(k);
     }
       }
       if (!onlyBase) {
     foreach (string k in b.Data.Keys) {
       if (k != "OBJECTID") {
     keys.Add(k);
       }
     }
       }
       foreach (string key in keys) {
     if (key == "OBJECTID") {
       continue;
     }
     double x = 0d;
     double y = 0d;
     if (a.Data.ContainsKey(key)) { x = a.Data[key]; }
     if (b.Data.ContainsKey(key)) { y = b.Data[key]; }
     if (logarithm) {
       x = Math.Log10(x + 1);
       y = Math.Log10(y + 1);
     }
     aSoS += x * x;
     bSoS += y * y;
     dotProd += x * y;
       }
       if (dotProd == 0) {
     return new PivotTableAnalysisResult() { prob = -1d, RowKey = b.RowKey, Context = b.Context };
       }
       double div = (Math.Sqrt(aSoS) * Math.Sqrt(bSoS));
       if (div == 0d) {
     return new PivotTableAnalysisResult() { prob = -1d, RowKey = b.RowKey, Context = b.Context };
       }
       Double similarity = dotProd / div;
       PivotTableAnalysisResult idprob = new PivotTableAnalysisResult() { prob = similarity, RowKey = b.RowKey, Context = b.Context };
       return idprob;
 }