public static void RemoveDuplicatedTokens(ArrayList<ArrayList<QueryToken>> disjuncts) { IEqualityComparer<string> tokenComparer = new DefaultTokenComparer(DefaultTokenComparer.Options.ConvertUmlauts | DefaultTokenComparer.Options.RemoveDiacritics); for (int i = 0; i < disjuncts.Count(); ++i) { ArrayList<int> tokensToRemove = new ArrayList<int>(); for (int j = 0; j < disjuncts[i].Count(); ++j) { if (!tokensToRemove.Contains(j)) { for (int k = j + 1; k < disjuncts[i].Count(); ++k) { if (tokenComparer.Equals(disjuncts[i][j].ToString(), disjuncts[i][k].ToString())) tokensToRemove.Add(k); } } } tokensToRemove.Sort(); tokensToRemove.Reverse(); foreach (var toRemove in tokensToRemove.Distinct()) { disjuncts[i].RemoveAt(toRemove); } } }
public static void RemoveDuplicatedDisjuncts(ArrayList<ArrayList<QueryToken>> disjuncts) { IEqualityComparer<string> tokenComparer = new DefaultTokenComparer(DefaultTokenComparer.Options.ConvertUmlauts | DefaultTokenComparer.Options.RemoveDiacritics); ArrayList<int> disjunctsToRemove = new ArrayList<int>(); for (int i = 0; i < disjuncts.Count(); ++i) for (int j = i + 1; j < disjuncts.Count(); ++j) { if (ContainsTokenSubset(disjuncts.ElementAt(j), disjuncts.ElementAt(i))) { disjunctsToRemove.Add(i); break; } } disjunctsToRemove.Sort(); disjunctsToRemove.Reverse(); foreach (var toRemove in disjunctsToRemove.Distinct()) disjuncts.RemoveAt(toRemove); }
private static bool ContainsTokenSubset(ArrayList<QueryToken> subset, ArrayList<QueryToken> set) { IEqualityComparer<string> tokenComparer = new DefaultTokenComparer( DefaultTokenComparer.Options.ConvertUmlauts | DefaultTokenComparer.Options.RemoveDiacritics ); return Set<string>.Difference( new Set<string>(subset.Select(x => x.ToString()), tokenComparer), new Set<string>(set.Select(x => x.ToString()), tokenComparer) ).Count == 0; }