Exemplo n.º 1
0
    /* 3 Write a program that finds a set of words (e.g. 1000 words)
     * in a large text (e.g. 100 MB text file). Print how many times
     * each word occurs in the text.
     * Hint: you may find a C# trie in Internet.
     * */
    static void Main(string[] args)
    {
        var dict = new Dictionary<string, int>();
        var knownCount = new Dictionary<string, int>
        {
            {"foo", 10*1000},
            {"bar", 20*1000},
            {"quux",30*1000},
            {"frob",40*1000},
            {"asdf",50*1000}
        };
        var trie = new Trie<int>();

        var sw = new Stopwatch();

        sw.Start();

        // obviously, I couldn't zip the 100 MB file
        // use "bin\debug\generator.cs" to generate it if you want

        using (var reader = new StreamReader("text.txt"))
            foreach (var word in Words(reader))
                dict[word] = 1 + dict.GetOrDefault(word, 0);

        sw.Stop();
        /*
        foreach (var kvp in knownCount)
            Debug.Assert(dict[kvp.Key] == kvp.Value);
        */

        Console.WriteLine("Using hashtable: " + sw.Elapsed.TotalMilliseconds);

        sw.Reset();
        sw.Start();

        using (var reader = new StreamReader("text.txt"))
            foreach (var word in Words(reader))
                trie.Add(word, 1 + trie.GetOrDefault(word, 0));

        sw.Stop();

        foreach (var kvp in dict)
            Debug.Assert(trie.Find(kvp.Key) == kvp.Value);

        // the trie would probably do much better compared to a hashtable when used on
        // natural text with large amount of repetition and low average word length
        // it is however extremely space inefficient

        // at any rate, I'd be surprised if this implementation could beat .NET's build-in
        // hashtable

        Console.WriteLine("Using trie: " + sw.Elapsed.TotalMilliseconds);
    }