public static void Main(String[] args) { Console.OutputEncoding = Encoding.GetEncoding("iso-8859-1"); SCG.IEnumerable <String> ss; if (args.Length == 1) { ss = ReadFileWords(args[0]); } else { ss = args; } Timer t = new Timer(); SCG.IEnumerable <SCG.IEnumerable <String> > classes = AnagramClasses(ss); int count = 0; foreach (SCG.IEnumerable <String> anagramClass in classes) { count++; // foreach (String s in anagramClass) // Console.Write(s + " "); // Console.WriteLine(); } Console.WriteLine("{0} anagram classes", count); Console.WriteLine(t.Check()); }
public static void Main(String[] args) { Console.OutputEncoding = Encoding.GetEncoding("iso-8859-1"); SCG.IEnumerable <String> ss; if (args.Length == 2) { ss = ReadFileWords(args[0], int.Parse(args[1])); } else { ss = args; } // foreach (String s in FirstAnagramOnly(ss)) // Console.WriteLine(s); // Console.WriteLine("==="); var sw = Stopwatch.StartNew(); SCG.IEnumerable <SCG.IEnumerable <String> > classes = AnagramClasses(ss); int count = 0; foreach (SCG.IEnumerable <String> anagramClass in classes) { count++; // foreach (String s in anagramClass) // Console.Write(s + " "); // Console.WriteLine(); } Console.WriteLine("{0} non-trivial anagram classes", count); sw.Stop(); Console.WriteLine(sw.Elapsed); }
// Given a sequence of strings, return all non-trivial anagram // classes. Should use a *sequenced* equalityComparer on a TreeBag<char>, // obviously: after all, characters can be sorted by ASCII code. On // 347 000 distinct Danish words this takes 70 cpu seconds, 180 MB // memory, and 263 wall-clock seconds (due to swapping). // Using a TreeBag<char> and a sequenced equalityComparer takes 82 cpu seconds // and 180 MB RAM to find the 26,058 anagram classes among 347,000 // distinct words. // Using an unsequenced equalityComparer on TreeBag<char> or HashBag<char> // makes it criminally slow: at least 1200 cpu seconds. This must // be because many bags get the same hash code, so that there are // many collisions. But exactly how the unsequenced equalityComparer works is // not clear ... or is it because unsequenced equality is slow? private static SCG.IEnumerable <SCG.IEnumerable <string> > AnagramClasses(SCG.IEnumerable <string> ss, bool unsequenced = true) { var classes = unsequenced ? new HashDictionary <TreeBag <char>, TreeSet <string> >(UnsequencedCollectionEqualityComparer <TreeBag <char>, char> .Default) : new HashDictionary <TreeBag <char>, TreeSet <string> >(SequencedCollectionEqualityComparer <TreeBag <char>, char> .Default); foreach (var s in ss) { var anagram = AnagramClass(s); if (!classes.Find(ref anagram, out TreeSet <string> anagramClass)) { classes[anagram] = anagramClass = new TreeSet <string>(); } anagramClass.Add(s); } foreach (var anagramClass in classes.Values) { if (anagramClass.Count > 1) { yield return(anagramClass); } } }
public void PullDownNamespaces(SCG.IEnumerable <SCG.IEnumerable <string> > Namespaces) { // collect everything into a non lazy structure first var insertions = new SCG.List <SuggestionTree <Pay> .IntermFResult.single_suggestion>(); foreach (var ns in Namespaces) { var namespace_node_res = FindSequence(ns.ToArray(), last_query_is_exact: true); if (namespace_node_res.type == SuggestionTree <Pay> .FRType.unique_fit) { SuggestionTree <Pay> namespaceNode = namespace_node_res.suggs[0].val; foreach (var single_sugg in namespaceNode.FindAllWithPayload()) { insertions.Add(single_sugg); } } else { throw new Exception("no exact match for Namespace-pulling : " + string.Join(".", ns.ToArray())); // <- turn this into consumer catchable Exception as soon as user defined "usings" are a thing } } // second iteration to not intertwine access and modifying - and avoid reasoning headaches foreach (var single_sugg in insertions) { Add(single_sugg.steps, single_sugg.val.payload); // Add overrides the payload } }
private static void Print <T>(SCG.IEnumerable <T> xs) { foreach (var x in xs) { Console.Write(x + " "); } Console.WriteLine(); }
// Determine whether each key in ks is associated with a value public override bool ContainsAll <U>(SCG.IEnumerable <U> ks) { foreach (K k in ks) { if (!Contains(k)) { return(false); } } return(true); }
public static void FindCollisions(SCG.IEnumerable <string> ss) { var occurrences = new HashBag <int>(); foreach (string s in ss) { var tb = TreeBag(s); // HashBag<char> hb = HashBag(s); occurrences.Add(sequencedTreeBagHasher.GetHashCode(tb)); // unsequencedTreeBagHasher.GetHashCode(tb); // unsequencedHashBagHasher.GetHashCode(hb); } }
// Given a sequence of strings, return only the first member of each // anagram class. private static SCG.IEnumerable <string> FirstAnagramOnly(SCG.IEnumerable <string> ss) { var tbh = UnsequencedCollectionEqualityComparer <TreeBag <char>, char> .Default; var anagrams = new HashSet <TreeBag <char> >(tbh); foreach (var s in ss) { var anagram = AnagramClass(s); if (!anagrams.Contains(anagram)) { anagrams.Add(anagram); yield return(s); } } }
// Given a sequence of strings, return only the first member of each // anagram class. private static SCG.IEnumerable <string> FirstAnagramOnly(SCG.IEnumerable <string> ss) { var anagrams = new HashSet <HashBag <char> >(); foreach (string s in ss) { var anagram = AnagramClass(s); if (!anagrams.Contains(anagram)) { anagrams.Add(anagram); yield return(s); } } }
// Given a sequence of strings, return all non-trivial anagram classes private static SCG.IEnumerable <SCG.IEnumerable <string> > AnagramClasses(SCG.IEnumerable <string> ss) { var classes = new TreeDictionary <CharBag, HashSet <string> >(); foreach (string s in ss) { var anagram = AnagramClass(s); if (!classes.Find(ref anagram, out HashSet <string> anagramClass)) { classes[anagram] = anagramClass = new HashSet <string>(); } anagramClass.Add(s); } foreach (HashSet <string> anagramClass in classes.Values) { if (anagramClass.Count > 1) // && anagramClass.Exists(delegate(string s) { return !s.EndsWith("s"); })) { yield return(anagramClass); } } }
// Given a sequence of strings, return all non-trivial anagram // classes. // Using HashBag<char> and an unsequenced equalityComparer, this performs as // follows on 1600 MHz Mobile P4 and .Net 2.0 beta 1 (wall-clock // time): // 50 000 words 2 822 classes 2.0 sec // 100 000 words 5 593 classes 4.3 sec // 200 000 words 11 705 classes 8.8 sec // 300 000 words 20 396 classes 52.0 sec includes swapping // 347 165 words 24 428 classes 146.0 sec includes swapping // The maximal memory consumption is less than 180 MB. private static SCG.IEnumerable <SCG.IEnumerable <string> > AnagramClasses(SCG.IEnumerable <string> ss) { var classes = new HashDictionary <HashBag <char>, TreeSet <string> >(); foreach (var s in ss) { var anagram = AnagramClass(s); if (!classes.Find(ref anagram, out var anagramClass)) { classes[anagram] = anagramClass = new TreeSet <string>(); } anagramClass.Add(s); } foreach (var anagramClass in classes.Values) { if (anagramClass.Count > 1) { yield return(anagramClass); } } }
/// <summary> /// /// </summary> /// <exception cref="ReadOnlyCollectionException"> since this is a read-only wrapper</exception> /// <param name="items"></param> public void AddSorted(SCG.IEnumerable <SCG.KeyValuePair <K, V> > items) => throw new ReadOnlyCollectionException();
/// <summary> /// /// </summary> /// <param name="items"></param> public void AddSorted(SCG.IEnumerable <SCG.KeyValuePair <K, V> > items) { sortedpairs.AddSorted(items); }
public void GetResolutionSequenceTest(string alias, SCG.IEnumerable <AC.CommandEntry> expected) { Assert.True(expected.SequenceEqual(ATF.Sample.Configuration.Binding.GetResolutionSequence(alias))); }
//TODO: add delegate for checking validity! public BasicCollectionValue(SCG.IEnumerable <T> e, Func <T> chooser, int c) { _enumerable = e; _chooser = chooser; Count = c; }
public Set(SCG.IEnumerable <T> enm) : base() { AddAll(enm); }