コード例 #1
0
    static void Reduce(this IDictionary <string, Bag> lexicon, System.Language.IOrthography lang, int weight, int limit)
    {
        var reduce = new HashSet <string>(); var depends = new HashSet <string>();

        Parallel.ForEach(lexicon, (bag) =>
        {
            List <Tuple <String, Int32, Int32> > SORT = new List <Tuple <String, Int32, Int32> >();

            // A single bag should never be worked on concurrently

            bag.Value.ForEach((key, count) =>
            {
                Bag lex = null;

                if (!lexicon.TryGetValue(key, out lex))
                {
                    lex = null;
                }

                if (lex != null && lex.Weight >= weight)
                {
                    SORT.Add(new Tuple <String, Int32, Int32>(key, count, lex.Weight));
                }
            });

            SORT.Sort((a, b) =>
            {
                int c = 0;

                if (a.Item2 > b.Item2)
                {
                    c = -1;
                }
                else if (a.Item2 < b.Item2)
                {
                    c = +1;
                }

                if (c == 0)
                {
                    c = lang.Compare(a.Item1, b.Item1);
                }

                return(c);
            });

            List <Tuple <String, Int32, Int32> > TAKE = new List <Tuple <String, Int32, Int32> >();

            for (int i = 0; i < SORT.Count; i++)
            {
                if (TAKE.Count >= limit)
                {
                    break;
                }

                TAKE.Add(SORT[i]);
            }

            bag.Value.Clear();

            for (int i = 0; i < TAKE.Count; i++)
            {
                string key = TAKE[i].Item1;

                bag.Value.Add(key, TAKE[i].Item2);

                lock (depends)
                {
                    depends.Add(key);
                }
            }

            if (bag.Value.Weight < weight)
            {
                lock (reduce)
                {
                    reduce.Add(bag.Value.Key);
                }
            }
        });

        foreach (var key in reduce)
        {
            if (!depends.Contains(key))
            {
                lexicon.Remove(key);
            }
        }
    }
コード例 #2
0
    static IDictionary <string, Bag> Build(int WINDOW, System.Language.IOrthography lang, Func <string, string> support, ISet <string> ignore, string[] paths, string search = "*.*")
    {
        Dictionary <String, Bag> lexicon = new Dictionary <String, Bag>();

        Tokens.Parse(paths, search,

                     (TOKEN, EMIT) =>
        {
            if (TOKEN.Length > 1)
            {
                if (char.ToUpperInvariant(TOKEN[1]) != TOKEN[1])
                {
                    TOKEN = TOKEN.ToLowerInvariant();
                }
            }

            /*
             */

            string s = lang.Convert(TOKEN);

            if (!lang.IsLegible(s))
            {
                return;
            }

            /* Do not take single letter entries unless they start with upper case */

            if (s.Length == 1)
            {
                if (char.ToUpperInvariant(s[0]) != s[0])
                {
                    return;
                }
            }

            /* Do not take roman numerals  */

            if (s.Length > 1 && char.ToUpperInvariant(s[0]) != s[0])
            {
                bool same = true;
                for (int i = 1; i < s.Length; i++)
                {
                    if (char.ToUpperInvariant(s[i]) != char.ToUpperInvariant(s[i - 1]))
                    {
                        same = false;
                        break;
                    }
                }
                if (s != "ui" && s != "uim" && s != "uix" && s != "uii" &&
                    s != "lux" && s != "lum" &&
                    s != "cum" &&
                    s != "cui" &&
                    s != "mum" &&
                    s != "mi" &&
                    s != "id" &&
                    s != "mix" &&
                    s != "diu" &&
                    s != "dix" &&
                    s != "di" &&
                    s != "dii" &&
                    s != "dux" && s != "dum")
                {
                    var n = RomanToInteger(s);
                    if (n.HasValue)
                    {
                        var c = IntegerToRoman(n.Value);
                        if (c == s)
                        {
                            return;
                        }
                    }
                }
                if (same)
                {
                    return;
                }
            }

            /* Must have at least one vowel
             *
             *  - Abbreviates should be capitalized .
             *  - Foreign words might be ignored which is a good side effect.
             *
             */

            if (s.Length > 1 && char.ToUpperInvariant(s[0]) != s[0])
            {
                int vowels = 0;
                for (int i = 0; i < s.Length; i++)
                {
                    switch (s[i])
                    {
                    case 'a':
                    case 'e':
                    case 'i':
                    case 'o':
                    case 'u':
                        vowels++;
                        break;
                    }
                }
                if (vowels <= 0)
                {
                    return;
                }
            }

            if (s.EndsWith("que") && s.Length > "que".Length)
            {
                s = s.Substring(0, s.Length - "que".Length);
            }

            if (s.EndsWith("QVE") && s.Length > "QVE".Length)
            {
                s = s.Substring(0, s.Length - "QVE".Length);
            }

            if (ignore.Contains(s))
            {
                return;
            }

            if (support != null)
            {
                s = lang.Convert(support(s));

                if (!lang.IsLegible(s))
                {
                    return;
                }
            }

            if (EMIT != null)
            {
                EMIT(s);
            }
        },

                     (FILE, DOC) =>
        {
            Log(Path.GetFullPath(FILE));

            var bags = Bags.Compute(DOC, WINDOW, (FOCUS, NEIGHBOR, Δ) =>
            {
                if (FOCUS[0] == char.ToUpperInvariant(FOCUS[0]))
                {
                    if (NEIGHBOR[0] != char.ToUpperInvariant(NEIGHBOR[0]))
                    {
                        return(false);
                    }
                }
                else if (NEIGHBOR[0] == char.ToUpperInvariant(NEIGHBOR[0]))
                {
                    if (FOCUS[0] != char.ToUpperInvariant(FOCUS[0]))
                    {
                        return(false);
                    }
                }

                return(true);
            });

            foreach (var bag in bags)
            {
                if (lexicon != null)
                {
                    lock (lexicon)
                    {
                        Bag lex; string key = bag.Key;

                        if (!lexicon.TryGetValue(key, out lex))
                        {
                            lexicon[key] = lex = new Bag(key, lexicon.Count);
                        }

                        lex.Add(bag, bag.Weight);
                    }
                }
            }
        });

        return(lexicon);
    }