//** i: current peak index ** //** currentCluster: side effect ** //** neighbourList: edge set ** //** creates a cluster ** public static void AddNeighbors(int i, HashSet <int> currentCluster, NeighbourList neighbourList) { //** nodes to process ** Stack <int> todo = new Stack <int>(); todo.Push(i); while (todo.Count > 0) { int next = todo.Pop(); //** is the node already in the cluster? ** if (!currentCluster.Contains(next)) { //** collect the current node ** currentCluster.Add(next); //** does the current node have neighbors remaining? ** if (!neighbourList.IsEmptyAt(next)) { foreach (int x in neighbourList.GetAt(next)) { todo.Push(x); } neighbourList.DeleteAt(next); } } } }
//** cluster the peaks ** public static List <int[]> CalcClusterIndices(int minCharge, int maxCharge, double correlationThreshold, int peakCount, double[] centerMz, float[] centerMzErrors, float[] minTimes, float[] maxTimes, Peak[] peaks, IPeakList peakList) { NeighbourList neighbourList = new NeighbourList(); //** iterate through all peaks ** for (int j = 0; j < peakCount; j++) { //** current peak's mz and RT range ** double massJ = centerMz[j]; float massErrorJ = centerMzErrors[j]; float timeMinJ = minTimes[j]; float timeMaxJ = maxTimes[j]; //** get index of nearest peak at or above current mass -1.1 ** int start = ArrayUtil.CeilIndex(centerMz, massJ - 1.1); //** get index of nearest peak at or below current mass -1.2 int w = ArrayUtil.FloorIndex(centerMz, massJ - 1.2); //** remove any peaks outside of massj - 1.2 ** //** so there's a removal to the left of this peak outside 1.2 away... ** //** what is this all about?? ** for (int i = 0; i < w; i++) { if (peaks != null && peaks[i] != null) { peaks[i].Dispose(); peaks[i] = null; } } //** iterate from current peak at mass - 1.1 to current peak ** //** iterates through left "adjacent" traces, neihbors with current trace (j) if valid ** for (int i = start; i < j; i++) { //** comparing peak mz and RT range ** double massI = centerMz[i]; double massErrorI = centerMzErrors[i]; double timeMinI = minTimes[i]; double timeMaxI = maxTimes[i]; //** difference in mass and synthesized mass error double massDiff = Math.Abs(massI - massJ); double massError = 5 * Math.Sqrt(massErrorI * massErrorI + massErrorJ * massErrorJ); //** invalidating conditions: ** //** 1) mass difference is greater than minimum ** if (massDiff > MolUtil.C13C12Diff + massError) { continue; } //** 2) no RT overlap if (timeMinI >= timeMaxJ) { continue; } //** 2) no RT overlap if (timeMinJ >= timeMaxI) { continue; } //** 3) mass difference doesn't match any charge states ** if (!FitsMassDifference(massDiff, massError, minCharge, maxCharge)) { continue; } //** 4) The intensity profile correlation (cosine similarity) fails the threshold ** if (CalcCorrelation(peakList.GetPeakKeep(i), peakList.GetPeakKeep(j)) < correlationThreshold) { continue; } //** create an edge between peak I and peak J if valid: ** //** 1) mass difference exceeds minimum //** 2) RT has overlap //** 3) mass difference fits a charge state //** 4) intensity profiles have strong correlation neighbourList.Add(i, j); } } //** convert edge list to clusters! ** List <int[]> clusterList = new List <int[]>(); //** iterate through all peaks ** for (int i = 0; i < peakCount; i++) { //** if the peak has neighbors... ** if (!neighbourList.IsEmptyAt(i)) { HashSet <int> currentCluster = new HashSet <int>(); AddNeighbors(i, currentCluster, neighbourList); int[] c = SortByMass(currentCluster.ToArray(), centerMz); clusterList.Add(c); } } return(clusterList); }