/// <summary> /// ABOD algorithm implement /// </summary> /// <param name="topK">topK points are outlier</param> public static void ABOD(int topK, String fpath, DateTime timeStart) { List<DPoint> ABOFList = new List<DPoint>(LENGTH); List<DStatus> D = new List<DStatus>(LENGTH); DStatusPacket tmpPacket; DPoint tmpPoint = new DPoint(); DStatusPacket A; double ABOF_A; for (int i = 0; i < LENGTH; i++) { tmpPacket = data.getDataByID(i); D.Add(addIndexToPacket(i, tmpPacket)); } for (int j = 0; j < D.Count(); j++) { A = D[j]; ABOF_A = ABOF(D, A, j); tmpPoint = new DPoint(A.ID, ABOF_A); ABOFList.Add(tmpPoint); } ABOFList.Sort(); // Sort ABOF list by ABOF value DateTime timeEnd = DateTime.Now; saveDPoint(ABOFList, fpath); saveOutlier(ABOFList, topK, fpath); string runTime = calcRunTime(timeStart, timeEnd); string str = "======================================" + "\r\nN\t" + LENGTH + "\r\nK\t" + DIMENSION + "\r\ntopK\t" + topK + "\r\nStart Time\t" + timeStart.ToString("yyyy-MM-dd HH:mm:ss") + "\r\nEnd Time\t" + timeEnd.ToString("yyyy-MM-dd HH:mm:ss") + "\r\nRun Time\t" + runTime; strfprintf(str, fpath); Console.WriteLine("ABOD Accomplished!"); }
public int CompareTo(DPoint other) { return this.Value.CompareTo(other.Value); }
public int CompareTo(DPoint other) { return(this.Value.CompareTo(other.Value)); }
/// <summary> /// LOF: DBOD(Density Based Outlier Detection) algorithm implement /// </summary> /// <param name="MinPts"></param> /// <param name="fpath"></param> public static void DBOD(int MinPts, int topK, string fpath, DateTime timeStart) { List<DkNNborIndex> kNNIndexOfAll = new List<DkNNborIndex>(LENGTH); DkNNborIndex tmp; DStatusPacket P; List<DPoint> dist_list_P; List<int> kNNindex_P; for (int i = 0; i < LENGTH; i++) { P = data.getDataByID(i); // get sorted distance list of point P dist_list_P = sortIndexOfNborByDist(P, i); // get index of top k nearest neighbor kNNindex_P = getkNNIndex(dist_list_P, MinPts); tmp = new DkNNborIndex(i, kNNindex_P); kNNIndexOfAll.Add(tmp); } double LOF_P; List<DPoint> LOFList = new List<DPoint>(LENGTH); DPoint tmpPoint = new DPoint(); for (int i = 0; i < LENGTH; i++) { P = data.getDataByID(i); LOF_P = LOF(kNNIndexOfAll, i, MinPts); //Console.WriteLine("{0}\t{1}", i, LOF_P); tmpPoint = new DPoint(P.ID, LOF_P); LOFList.Add(tmpPoint); } LOFList.Sort(); DateTime timeEnd = DateTime.Now; // save result list... StreamWriter sw = File.AppendText(fpath); sw.WriteLine("======================================"); sw.WriteLine("ID\tValue"); foreach (DPoint x in LOFList) sw.WriteLine(x.ID + "\t" + x.Value); LOFList.Reverse(); sw.WriteLine("==================outlier===================="); for (int j = 0; j < topK; j++) sw.Write(LOFList[j].ID + "\t"); string runTime = calcRunTime(timeStart, timeEnd); string str = "\r\n======================================" + "\r\nN\t" + LENGTH + "\r\nK\t" + DIMENSION + "\r\nMinPts\t" + MinPts + "\r\nStart Time\t" + timeStart.ToString("yyyy-MM-dd HH:mm:ss") + "\r\nEnd Time\t" + timeEnd.ToString("yyyy-MM-dd HH:mm:ss") + "\r\nRun Time\t" + runTime; sw.WriteLine(str); sw.Flush(); sw.Close(); Console.WriteLine("LOF Accomplished!"); }
/// <summary> /// code by H-P. Kerigel /// </summary> /// <param name="D"></param> /// <param name="A"></param> /// <param name="index"></param> /// <param name="kNN"></param> /// <returns></returns> private static double calcLBABOF(DStatusPacket A, int index, int kNN) { List<DPoint> NkIndex = new List<DPoint>(); // Compute nearest neighbors and distances. double simAA = calcDotProduct(A, A); // Sum of 1./(|AB|) and 1./(|AB|^2); for computing R2. double sumid = 0, sumisqd = 0; for (int j = 0; j < LENGTH; j++) { if (index == j) { continue; } DStatusPacket nB = data.getDataByID(j); double simBB = calcDotProduct(nB, nB); double simAB = calcDotProduct(A, nB); double sqdAB = simAA + simBB - simAB - simAB; if (!(sqdAB > 0)) { continue; } sumid += 1 / Math.Sqrt(sqdAB); sumisqd += 1 / sqdAB; // Update heap DPoint temp = new DPoint(j, sqdAB); if (NkIndex.Count < kNN) { NkIndex.Add(temp); } else if (sqdAB < NkIndex.Max().Value) { //移出最大的 NkIndex.Remove(NkIndex.Max()); NkIndex.Add(temp); } } // Compute FastABOD approximation, adjust for lower bound. // LB-ABOF is defined via a numerically unstable formula. // Variance as E(X^2)-E(X)^2 suffers from catastrophic cancellation! // TODO: ensure numerical precision! double nnsum = 0, nnsumsq = 0, nnsumisqd = 0; for (int k = 0; k < NkIndex.Count; k++) { DPoint iB = NkIndex[k]; DStatusPacket nB = data.getDataByID(iB.ID); double sqdAB = iB.Value; double simAB = calcDotProduct(A, nB); if (!(sqdAB > 0)) { continue; } for (int l = 0; l < NkIndex.Count; l++) { if (k == l) { continue; } DPoint iC = NkIndex[l]; DStatusPacket nC = data.getDataByID(iC.ID); double sqdAC = iC.Value; double simAC = calcDotProduct(A, nC); if (!(sqdAC > 0)) { continue; } // Exploit bilinearity of scalar product: // <B-A, C-A> = <B, C-A> - <A,C-A> // = <B,C> - <B,A> - <A,C> + <A,A> double simBC = calcDotProduct(nB, nC); double numerator = simBC - simAB - simAC + simAA; double sqweight = 1 / (sqdAB * sqdAC); double weight = Math.Sqrt(sqweight); double val = numerator * sqweight; nnsum += val * weight; nnsumsq += val * val * weight; nnsumisqd += sqweight; } } // Remaining weight, term R2: double r2 = sumisqd * sumisqd - 2 * nnsumisqd; double tmp = (2 * nnsum + r2) / (sumid * sumid); double lbabof = 2 * nnsumsq / (sumid * sumid) - tmp * tmp; return lbabof; }
/// <summary> /// LB-ABOD algorithm implement /// </summary> /// <param name="kNN"></param> /// <param name="topK"></param> public static void LB_ABOD(int kNN, int topK, string fpath, DateTime timeStart) { DStatusPacket tmpPacket; List<DStatus> D = new List<DStatus>(LENGTH); for (int i = 0; i < LENGTH; i++) { tmpPacket = data.getDataByID(i); D.Add(addIndexToPacket(i, tmpPacket)); } /* * (step 2) Compute LB-ABOF for each point A ∈ D. * (step 3) Organize the database objects in a candidate list ordered ascending * w.r.t. their assigned LB-ABOF. */ DStatusPacket A; double LB_ABOF_A; List<DPoint> candidateList = new List<DPoint>(); DPoint tmp; double ABOF_A; for (int i = 0; i < LENGTH; i++) { A = data.getDataByID(i); LB_ABOF_A = calcLBABOF(A, i, kNN); // debug //ABOF_A = ABOF(D, A, i); //if (ABOF_A - LB_ABOF_A <= 0) //{ // Console.WriteLine("ABOF(A) <= LB-ABOF(A)"); // Console.WriteLine("ABOF: {0}\tLB-ABOF: {1}", ABOF_A, LB_ABOF_A); //} tmp = new DPoint(i, LB_ABOF_A); candidateList.Add(tmp); } candidateList.Sort(); // sort ascending saveDPoint(candidateList, fpath); /* * (step 4) Determine the exact ABOF for the first @topK objects in the candidate * list, Remove them from the candidate list and insert into the current * result list. */ int indexB; DStatusPacket B; double ABOF_B; int Counter = 0; // The Counter of Comparable SortedSet<DPoint> resultList = new SortedSet<DPoint>(); for (int i = 0; i < topK; i++) { indexB = (int)candidateList[i].ID; B = data.getDataByID(indexB); ABOF_B = ABOF(D, B, indexB); tmp = new DPoint(B.ID, ABOF_B); resultList.Add(tmp); candidateList.RemoveAt(i); Counter++; } /* * (step 6) if the largest ABOF in the result list < the smallest approximated * ABOF in the candidate list, terminate; else, proceed with step 5. * (step 5) Remove and examine the next best candidate C from the candidate list * and determine the exact ABOF, if the ABOF of C < the largest ABOF * of an object A in the result list, remove A from the result list and * insert C into the result list. */ int indexC; DStatusPacket C; // next best candidate C in the candidate list DPoint CC; // point that need to be insert into result list double ABOF_C; double Min_LBABOF = candidateList[0].Value; double Max_ABOF = resultList.Max().Value; while (Max_ABOF > Min_LBABOF && candidateList.Count() != 0) { indexC = (int)candidateList[0].ID; // next best candidate C = data.getDataByID(indexC); ABOF_C = ABOF(D, C, indexC); candidateList.RemoveAt(0); if (ABOF_C < Max_ABOF) { CC = new DPoint(C.ID, ABOF_C); resultList.Remove(resultList.Max()); resultList.Add(CC); Counter++; if (candidateList.Count() == 0) break; Min_LBABOF = candidateList[0].Value; Max_ABOF = resultList.Max().Value; } } List<DPoint> reslist = new List<DPoint>(); foreach (DPoint x in resultList) { reslist.Add(x); //Console.WriteLine("ID: " + x.ID + "\tMoteID: " + x.MoteID + "\tValue: " + x.Value); } //save result... DateTime timeEnd = DateTime.Now; saveDPoint(reslist, fpath); saveOutlier(reslist, topK, fpath); int numOutlier = 0; double precision; for (int k = 0; k < topK; k++) { if (reslist[k].ID % 50 == 0) numOutlier++; } precision = numOutlier / topK; string runTime = calcRunTime(timeStart, timeEnd); string str = "======================================" + "\r\nN:\t" + LENGTH + "\r\nD:\t" + DIMENSION + "\r\nkNN:\t" + kNN + "\r\ntopK:\t" + topK + "\r\n======================================" + "\r\nCounter:\t" + Counter + "\r\nPrecision:\t" + precision + "\r\nStart Time:\t" + timeStart.ToString("yyyy-MM-dd HH:mm:ss") + "\r\nEnd Time:\t" + timeEnd.ToString("yyyy-MM-dd HH:mm:ss") + "\r\nRun Time:\t" + runTime; strfprintf(str, fpath); Console.WriteLine("LB-ABOD Accomplished!"); }
/// <summary> /// Fast ABOD algorithm implement /// </summary> /// <param name="kNN"></param> /// <param name="topK"></param> /// <param name="fpath"></param> public static void FastABOD(int kNN, int topK, string fpath, DateTime timeStart) { DStatusPacket A; List<DPoint> FastABOFList = new List<DPoint>(LENGTH); DPoint tmp; double approxABOF_A; for (int i = 0; i < LENGTH; i++) { A = data.getDataByID(i); approxABOF_A = ApproxABOF(A, i, kNN); tmp = new DPoint(A.ID, approxABOF_A); FastABOFList.Add(tmp); } FastABOFList.Sort(); DateTime timeEnd = DateTime.Now; saveDPoint(FastABOFList, fpath); saveOutlier(FastABOFList, topK, fpath); string runTime = calcRunTime(timeStart, timeEnd); string str = "======================================" + "\r\nN\t" + LENGTH + "\r\nK\t" + DIMENSION + "\r\nkNN\t" + kNN + "\r\ntopK\t" + topK + "\r\nStart Time: " + timeStart.ToString("yyyy-MM-dd HH:mm:ss") + "\r\nEnd Time: " + timeEnd.ToString("yyyy-MM-dd HH:mm:ss") + "\r\nRun Time: " + runTime; strfprintf(str, fpath); Console.WriteLine("Fast ABOD Accomplished!"); }
/// <summary> /// sort point @A 's all neighbor index by their distance to @A /// </summary> /// <param name="A"></param> /// <param name="index"></param> /// <returns></returns> private static List<DPoint> sortIndexOfNborByDist(DStatusPacket A, int index) { List<DPoint> distList = new List<DPoint>(LENGTH - 1); DPoint tmp; double AB; // dist(A, B) for (int i = 0; i < LENGTH; i++) { if (i == index) continue; else if (i < index) AB = distTable[i, index]; else AB = distTable[index, i]; tmp = new DPoint(i, AB); // i: the index of B in the database distList.Add(tmp); } distList.Sort(); // sort by tmp.Value (distance) ascending return distList; }