/// <summary>
        /// ABOD algorithm implement
        /// </summary>
        /// <param name="topK">topK points are outlier</param>
        public static void ABOD(int topK, String fpath, DateTime timeStart)
        {
            List<DPoint> ABOFList = new List<DPoint>(LENGTH);
            List<DStatus> D = new List<DStatus>(LENGTH);
            DStatusPacket tmpPacket;
            DPoint tmpPoint = new DPoint();
            DStatusPacket A;
            double ABOF_A;

            for (int i = 0; i < LENGTH; i++)
            {
                tmpPacket = data.getDataByID(i);
                D.Add(addIndexToPacket(i, tmpPacket));
            }
            for (int j = 0; j < D.Count(); j++)
            {
                A = D[j];
                ABOF_A = ABOF(D, A, j);

                tmpPoint = new DPoint(A.ID, ABOF_A);
                ABOFList.Add(tmpPoint);
            }
            ABOFList.Sort();       // Sort ABOF list by ABOF value

            DateTime timeEnd = DateTime.Now;

            saveDPoint(ABOFList, fpath);
            saveOutlier(ABOFList, topK, fpath);

            string runTime = calcRunTime(timeStart, timeEnd);
            string str = "======================================"
                + "\r\nN\t" + LENGTH
                + "\r\nK\t" + DIMENSION
                + "\r\ntopK\t" + topK
                + "\r\nStart Time\t" + timeStart.ToString("yyyy-MM-dd HH:mm:ss")
                + "\r\nEnd Time\t" + timeEnd.ToString("yyyy-MM-dd HH:mm:ss")
                + "\r\nRun Time\t" + runTime;
            strfprintf(str, fpath);

            Console.WriteLine("ABOD Accomplished!");
        }
 public int CompareTo(DPoint other)
 {
     return this.Value.CompareTo(other.Value);
 }
Example #3
0
 public int CompareTo(DPoint other)
 {
     return(this.Value.CompareTo(other.Value));
 }
        /// <summary>
        /// LOF: DBOD(Density Based Outlier Detection) algorithm implement
        /// </summary>
        /// <param name="MinPts"></param>
        /// <param name="fpath"></param>
        public static void DBOD(int MinPts, int topK, string fpath, DateTime timeStart)
        {
            List<DkNNborIndex> kNNIndexOfAll = new List<DkNNborIndex>(LENGTH);
            DkNNborIndex tmp;

            DStatusPacket P;
            List<DPoint> dist_list_P;
            List<int> kNNindex_P;

            for (int i = 0; i < LENGTH; i++)
            {
                P = data.getDataByID(i);
                // get sorted distance list of point P
                dist_list_P = sortIndexOfNborByDist(P, i);
                // get index of top k nearest neighbor
                kNNindex_P = getkNNIndex(dist_list_P, MinPts);

                tmp = new DkNNborIndex(i, kNNindex_P);
                kNNIndexOfAll.Add(tmp);
            }

            double LOF_P;

            List<DPoint> LOFList = new List<DPoint>(LENGTH);
            DPoint tmpPoint = new DPoint();

            for (int i = 0; i < LENGTH; i++)
            {
                P = data.getDataByID(i);
                LOF_P = LOF(kNNIndexOfAll, i, MinPts);

                //Console.WriteLine("{0}\t{1}", i, LOF_P);
                tmpPoint = new DPoint(P.ID, LOF_P);
                LOFList.Add(tmpPoint);
            }
            LOFList.Sort();

            DateTime timeEnd = DateTime.Now;

            // save result list...
            StreamWriter sw = File.AppendText(fpath);
            sw.WriteLine("======================================");
            sw.WriteLine("ID\tValue");
            foreach (DPoint x in LOFList)
                sw.WriteLine(x.ID + "\t" + x.Value);

            LOFList.Reverse();

            sw.WriteLine("==================outlier====================");
            for (int j = 0; j < topK; j++)
                sw.Write(LOFList[j].ID + "\t");

            string runTime = calcRunTime(timeStart, timeEnd);

            string str = "\r\n======================================"
                + "\r\nN\t" + LENGTH
                + "\r\nK\t" + DIMENSION
                + "\r\nMinPts\t" + MinPts
                + "\r\nStart Time\t" + timeStart.ToString("yyyy-MM-dd HH:mm:ss")
                + "\r\nEnd Time\t" + timeEnd.ToString("yyyy-MM-dd HH:mm:ss")
                + "\r\nRun Time\t" + runTime;
            sw.WriteLine(str);
            sw.Flush();
            sw.Close();

            Console.WriteLine("LOF Accomplished!");
        }
        /// <summary>
        /// code by H-P. Kerigel
        /// </summary>
        /// <param name="D"></param>
        /// <param name="A"></param>
        /// <param name="index"></param>
        /// <param name="kNN"></param>
        /// <returns></returns>
        private static double calcLBABOF(DStatusPacket A, int index, int kNN)
        {
            List<DPoint> NkIndex = new List<DPoint>();
            // Compute nearest neighbors and distances.

            double simAA = calcDotProduct(A, A);
            // Sum of 1./(|AB|) and 1./(|AB|^2); for computing R2.
            double sumid = 0, sumisqd = 0;
            for (int j = 0; j < LENGTH; j++)
            {
                if (index == j)
                {
                    continue;
                }
                DStatusPacket nB = data.getDataByID(j);
                double simBB = calcDotProduct(nB, nB);
                double simAB = calcDotProduct(A, nB);
                double sqdAB = simAA + simBB - simAB - simAB;

                if (!(sqdAB > 0))
                {
                    continue;
                }
                sumid += 1 / Math.Sqrt(sqdAB);
                sumisqd += 1 / sqdAB;
                // Update heap
                DPoint temp = new DPoint(j, sqdAB);
                if (NkIndex.Count < kNN)
                {
                    NkIndex.Add(temp);
                }
                else if (sqdAB < NkIndex.Max().Value)
                {
                    //移出最大的
                    NkIndex.Remove(NkIndex.Max());
                    NkIndex.Add(temp);
                }
            }

            // Compute FastABOD approximation, adjust for lower bound.
            // LB-ABOF is defined via a numerically unstable formula.
            // Variance as E(X^2)-E(X)^2 suffers from catastrophic cancellation!
            // TODO: ensure numerical precision!
            double nnsum = 0, nnsumsq = 0, nnsumisqd = 0;
            for (int k = 0; k < NkIndex.Count; k++)
            {
                DPoint iB = NkIndex[k];
                DStatusPacket nB = data.getDataByID(iB.ID);
                double sqdAB = iB.Value;
                double simAB = calcDotProduct(A, nB);
                if (!(sqdAB > 0))
                {
                    continue;
                }
                for (int l = 0; l < NkIndex.Count; l++)
                {
                    if (k == l)
                    {
                        continue;
                    }
                    DPoint iC = NkIndex[l];
                    DStatusPacket nC = data.getDataByID(iC.ID);
                    double sqdAC = iC.Value;
                    double simAC = calcDotProduct(A, nC);
                    if (!(sqdAC > 0))
                    {
                        continue;
                    }
                    // Exploit bilinearity of scalar product:
                    // <B-A, C-A> = <B, C-A> - <A,C-A>
                    // = <B,C> - <B,A> - <A,C> + <A,A>
                    double simBC = calcDotProduct(nB, nC);
                    double numerator = simBC - simAB - simAC + simAA;
                    double sqweight = 1 / (sqdAB * sqdAC);
                    double weight = Math.Sqrt(sqweight);
                    double val = numerator * sqweight;
                    nnsum += val * weight;
                    nnsumsq += val * val * weight;
                    nnsumisqd += sqweight;
                }
            }
            // Remaining weight, term R2:
            double r2 = sumisqd * sumisqd - 2 * nnsumisqd;
            double tmp = (2 * nnsum + r2) / (sumid * sumid);
            double lbabof = 2 * nnsumsq / (sumid * sumid) - tmp * tmp;

            return lbabof;
        }
        /// <summary>
        /// LB-ABOD algorithm implement
        /// </summary>
        /// <param name="kNN"></param>
        /// <param name="topK"></param>
        public static void LB_ABOD(int kNN, int topK, string fpath, DateTime timeStart)
        {
            DStatusPacket tmpPacket;
            List<DStatus> D = new List<DStatus>(LENGTH);

            for (int i = 0; i < LENGTH; i++)
            {
                tmpPacket = data.getDataByID(i);
                D.Add(addIndexToPacket(i, tmpPacket));
            }

            /*
             * (step 2) Compute LB-ABOF for each point  A ∈ D.
             * (step 3) Organize the database objects in a candidate list ordered ascending
             *          w.r.t. their assigned LB-ABOF.
             */
            DStatusPacket A;
            double LB_ABOF_A;
            List<DPoint> candidateList = new List<DPoint>();
            DPoint tmp;

            double ABOF_A;

            for (int i = 0; i < LENGTH; i++)
            {
                A = data.getDataByID(i);
                LB_ABOF_A = calcLBABOF(A, i, kNN);

                // debug
                //ABOF_A = ABOF(D, A, i);
                //if (ABOF_A - LB_ABOF_A <= 0)
                //{
                //    Console.WriteLine("ABOF(A) <= LB-ABOF(A)");
                //    Console.WriteLine("ABOF: {0}\tLB-ABOF: {1}", ABOF_A, LB_ABOF_A);
                //}

                tmp = new DPoint(i, LB_ABOF_A);
                candidateList.Add(tmp);

            }
            candidateList.Sort();           // sort ascending

            saveDPoint(candidateList, fpath);

            /*
             * (step 4) Determine the exact ABOF for the first @topK objects in the candidate
             *          list, Remove them from the candidate list and insert into the current
             *          result list.
             */
            int indexB;
            DStatusPacket B;
            double ABOF_B;

            int Counter = 0;        // The Counter of Comparable

            SortedSet<DPoint> resultList = new SortedSet<DPoint>();

            for (int i = 0; i < topK; i++)
            {
                indexB = (int)candidateList[i].ID;
                B = data.getDataByID(indexB);
                ABOF_B = ABOF(D, B, indexB);

                tmp = new DPoint(B.ID, ABOF_B);
                resultList.Add(tmp);

                candidateList.RemoveAt(i);
                Counter++;
            }
            /*
             * (step 6) if the largest ABOF in the result list < the smallest approximated
             *          ABOF in the candidate list, terminate; else, proceed with step 5.
             * (step 5) Remove and examine the next best candidate C from the candidate list
             *          and determine the exact ABOF, if the ABOF of C < the largest ABOF
             *          of an object A in the result list, remove A from the result list and
             *          insert C into the result list.
             */
            int indexC;
            DStatusPacket C;    // next best candidate C in the candidate list
            DPoint CC;          // point that need to be insert into result list
            double ABOF_C;
            double Min_LBABOF = candidateList[0].Value;
            double Max_ABOF = resultList.Max().Value;

            while (Max_ABOF > Min_LBABOF && candidateList.Count() != 0)
            {
                indexC = (int)candidateList[0].ID;      // next best candidate
                C = data.getDataByID(indexC);
                ABOF_C = ABOF(D, C, indexC);
                candidateList.RemoveAt(0);

                if (ABOF_C < Max_ABOF)
                {
                    CC = new DPoint(C.ID, ABOF_C);
                    resultList.Remove(resultList.Max());
                    resultList.Add(CC);

                    Counter++;

                    if (candidateList.Count() == 0)
                        break;
                    Min_LBABOF = candidateList[0].Value;
                    Max_ABOF = resultList.Max().Value;
                }
            }

            List<DPoint> reslist = new List<DPoint>();
            foreach (DPoint x in resultList)
            {
                reslist.Add(x);
                //Console.WriteLine("ID: " + x.ID + "\tMoteID: " + x.MoteID + "\tValue: " + x.Value);
            }
            //save result...

            DateTime timeEnd = DateTime.Now;

            saveDPoint(reslist, fpath);
            saveOutlier(reslist, topK, fpath);

            int numOutlier = 0;
            double precision;
            for (int k = 0; k < topK; k++)
            {
                if (reslist[k].ID % 50 == 0)
                    numOutlier++;
            }
            precision = numOutlier / topK;

            string runTime = calcRunTime(timeStart, timeEnd);
            string str = "======================================"
                + "\r\nN:\t" + LENGTH
                + "\r\nD:\t" + DIMENSION
                + "\r\nkNN:\t" + kNN
                + "\r\ntopK:\t" + topK
                + "\r\n======================================"
                + "\r\nCounter:\t" + Counter
                + "\r\nPrecision:\t" + precision
                + "\r\nStart Time:\t" + timeStart.ToString("yyyy-MM-dd HH:mm:ss")
                + "\r\nEnd Time:\t" + timeEnd.ToString("yyyy-MM-dd HH:mm:ss")
                + "\r\nRun Time:\t" + runTime;
            strfprintf(str, fpath);

            Console.WriteLine("LB-ABOD Accomplished!");
        }
        /// <summary>
        /// Fast ABOD algorithm implement
        /// </summary>
        /// <param name="kNN"></param>
        /// <param name="topK"></param>
        /// <param name="fpath"></param>
        public static void FastABOD(int kNN, int topK, string fpath, DateTime timeStart)
        {
            DStatusPacket A;
            List<DPoint> FastABOFList = new List<DPoint>(LENGTH);
            DPoint tmp;
            double approxABOF_A;

            for (int i = 0; i < LENGTH; i++)
            {
                A = data.getDataByID(i);
                approxABOF_A = ApproxABOF(A, i, kNN);

                tmp = new DPoint(A.ID, approxABOF_A);
                FastABOFList.Add(tmp);
            }
            FastABOFList.Sort();

            DateTime timeEnd = DateTime.Now;

            saveDPoint(FastABOFList, fpath);
            saveOutlier(FastABOFList, topK, fpath);

            string runTime = calcRunTime(timeStart, timeEnd);
            string str = "======================================"
                + "\r\nN\t" + LENGTH
                + "\r\nK\t" + DIMENSION
                + "\r\nkNN\t" + kNN
                + "\r\ntopK\t" + topK
                + "\r\nStart Time: " + timeStart.ToString("yyyy-MM-dd HH:mm:ss")
                + "\r\nEnd Time: " + timeEnd.ToString("yyyy-MM-dd HH:mm:ss")
                + "\r\nRun Time: " + runTime;
            strfprintf(str, fpath);

            Console.WriteLine("Fast ABOD Accomplished!");
        }
        /// <summary>
        /// sort point @A 's all neighbor index by their distance to @A
        /// </summary>
        /// <param name="A"></param>
        /// <param name="index"></param>
        /// <returns></returns>
        private static List<DPoint> sortIndexOfNborByDist(DStatusPacket A, int index)
        {
            List<DPoint> distList = new List<DPoint>(LENGTH - 1);
            DPoint tmp;
            double AB;  // dist(A, B)

            for (int i = 0; i < LENGTH; i++)
            {
                if (i == index)
                    continue;
                else if (i < index)
                    AB = distTable[i, index];
                else
                    AB = distTable[index, i];

                tmp = new DPoint(i, AB);  // i: the index of B in the database
                distList.Add(tmp);
            }
            distList.Sort();    // sort by tmp.Value (distance) ascending
            return distList;
        }