public static void RemoveDuplicatedTokens(ArrayList<ArrayList<QueryToken>> disjuncts) { IEqualityComparer<string> tokenComparer = new DefaultTokenComparer(DefaultTokenComparer.Options.ConvertUmlauts | DefaultTokenComparer.Options.RemoveDiacritics); for (int i = 0; i < disjuncts.Count(); ++i) { ArrayList<int> tokensToRemove = new ArrayList<int>(); for (int j = 0; j < disjuncts[i].Count(); ++j) { if (!tokensToRemove.Contains(j)) { for (int k = j + 1; k < disjuncts[i].Count(); ++k) { if (tokenComparer.Equals(disjuncts[i][j].ToString(), disjuncts[i][k].ToString())) tokensToRemove.Add(k); } } } tokensToRemove.Sort(); tokensToRemove.Reverse(); foreach (var toRemove in tokensToRemove.Distinct()) { disjuncts[i].RemoveAt(toRemove); } } }
public static void RemoveDuplicatedDisjuncts(ArrayList<ArrayList<QueryToken>> disjuncts) { IEqualityComparer<string> tokenComparer = new DefaultTokenComparer(DefaultTokenComparer.Options.ConvertUmlauts | DefaultTokenComparer.Options.RemoveDiacritics); ArrayList<int> disjunctsToRemove = new ArrayList<int>(); for (int i = 0; i < disjuncts.Count(); ++i) for (int j = i + 1; j < disjuncts.Count(); ++j) { if (ContainsTokenSubset(disjuncts.ElementAt(j), disjuncts.ElementAt(i))) { disjunctsToRemove.Add(i); break; } } disjunctsToRemove.Sort(); disjunctsToRemove.Reverse(); foreach (var toRemove in disjunctsToRemove.Distinct()) disjuncts.RemoveAt(toRemove); }
private static void ExtractUniqueQueryTokensBasedOnOccurance(ArrayList<ArrayList<QueryToken>> disjuncts, ArrayList<ArrayList<QueryToken>> finalQueryList, double twitterProfilerFrequencyThreshold, ArrayList<ArrayList<QueryToken>> subsetsGrouped, ArrayList<int> subsetsGroupedOccurance, int maxOccurance) { var subsetsWithMaxOccurance = subsetsGrouped .Where((x, idx) => subsetsGroupedOccurance[idx] == maxOccurance) .OrderBy(x => x.Count) .ToList(); // remove subsets which are contained in other ones ArrayList<int> subsetsToRemove = new ArrayList<int>(); for (int i = 0; i < subsetsWithMaxOccurance.Count(); ++i) { for (int j = i + 1; j < subsetsWithMaxOccurance.Count(); ++j) { if (ContainsTokenSubset(subsetsWithMaxOccurance[i], subsetsWithMaxOccurance[j])) { subsetsToRemove.Add(i); break; } } } subsetsToRemove.Sort(); subsetsToRemove.Reverse(); foreach (var toRemove in subsetsToRemove.Distinct()) { subsetsWithMaxOccurance.RemoveAt(toRemove); } subsetsToRemove.Clear(); if (maxOccurance > 1) // perform twitter profiling { for (int i = 0; i < subsetsWithMaxOccurance.Count(); ++i) { //twitter profiling string searchQuery = TwitterProfiler.QueryTokensToStringConverter(subsetsWithMaxOccurance[i]); double twitterProfilerFrequency = TwitterProfiler.ProfileFrequency(searchQuery); if (Math.Round(twitterProfilerFrequency) == Convert.ToInt32(TwitterProfiler.InvalidReturns.Exception)) { Console.WriteLine("\nError during twitter profiling: {0}!", TwitterProfiler.GetExceptionError()); System.Environment.Exit(1); } if (twitterProfilerFrequency < twitterProfilerFrequencyThreshold) { int disjunctsCountBeforeRemoval = disjuncts.Count(); // remove disjuncts from the original list that contain the selected subset QueryConverterUtils.RemoveDisjunctsContainingSubsets(disjuncts, subsetsWithMaxOccurance[i]); if (disjuncts.Count() < disjunctsCountBeforeRemoval) finalQueryList.Add(subsetsWithMaxOccurance[i]); if (disjuncts.Count() == 0) return; else { if (i >= (subsetsWithMaxOccurance.Count() - 1)) { // find new possible subsets QueryConverterUtils.ExtractUniqueQueryTokens(disjuncts, finalQueryList, twitterProfilerFrequencyThreshold); return; } } } else { if (i >= (subsetsWithMaxOccurance.Count() - 1)) { // select subsets with the first lower occurance ArrayList<int> lowerOccurances = subsetsGroupedOccurance.FindAll(x => x < maxOccurance); if (lowerOccurances.Count > 0) maxOccurance = lowerOccurances.Max(); else maxOccurance = 0; if (maxOccurance < 1) return; QueryConverterUtils.ExtractUniqueQueryTokensBasedOnOccurance(disjuncts, finalQueryList, twitterProfilerFrequencyThreshold, subsetsGrouped, subsetsGroupedOccurance, maxOccurance); if (disjuncts.Count() == 0) return; } } } } else // if occurrence is 1 -> profiling not needed { foreach (var item in subsetsWithMaxOccurance) { int disjunctsCountBeforeRemoval = disjuncts.Count(); // remove disjuncts from the original list that contain the selected subset QueryConverterUtils.RemoveDisjunctsContainingSubsets(disjuncts, item); if (disjuncts.Count() < disjunctsCountBeforeRemoval) finalQueryList.Add(item); if (disjuncts.Count() == 0) return; } // find new possible subsets QueryConverterUtils.ExtractUniqueQueryTokens(disjuncts, finalQueryList, twitterProfilerFrequencyThreshold); return; } }
private static void RemoveDisjunctsContainingSubsets(ArrayList<ArrayList<QueryToken>> disjuncts, ArrayList<QueryToken> subset) { ArrayList<int> disjunctsToRemove = new ArrayList<int>(); for (int i = 0; i < disjuncts.Count(); ++i) { if (ContainsTokenSubset(subset, disjuncts.ElementAt(i))) disjunctsToRemove.Add(i); } disjunctsToRemove.Sort(); disjunctsToRemove.Reverse(); foreach (var toRemove in disjunctsToRemove.Distinct()) { disjuncts.RemoveAt(toRemove); } }
public static void ConvertUmlauts(ArrayList<ArrayList<QueryToken>> disjuncts) { ArrayList<int> disjunctsToConvert = new ArrayList<int>(); for (int i = 0; i < disjuncts.Count(); ++i) { foreach (var token in disjuncts[i]) { String tokenString = token.ToString(); if (tokenString.Contains('ö') || tokenString.Contains('ü') || tokenString.Contains('ä') || tokenString.Contains('Ö') || tokenString.Contains('Ü') || tokenString.Contains('Ä') || tokenString.Contains('ß')) { disjunctsToConvert.Add(i); break; } } } disjunctsToConvert.Sort(); disjunctsToConvert.Reverse(); foreach (var toConvert in disjunctsToConvert.Distinct()) disjuncts.AddRange(QueryConverterUtils.ConvertSingleDisjunctUmlauts(disjuncts[toConvert])); }
/* * Returns an array consiting of all the Bubbles which are not connected to anything * i.e, the balls that should fall */ public ArrayList looseBubbles(){ ArrayList anchoredBubbles = this.anchoredBubbles(); ArrayList connectedBubbles = new ArrayList(); foreach (Bubble anchoredBubble in anchoredBubbles){ ArrayList connected = this.connectedBubbles(anchoredBubble); connectedBubbles.AddRange(connected); connectedBubbles = connectedBubbles.Distinct(); } ArrayList theBubbles = this.bubbles; theBubbles.Exclusive(connectedBubbles); return theBubbles; }