public void RemoveNotExists()
        {
            var trie = new TrieDictionary <char, bool> {
                { "ABC", false }
            };

            Assert.IsFalse(trie.Remove("A"));
            Assert.IsFalse(trie.Remove("X"));
        }
        public void KeysValues()
        {
            var trie = new TrieDictionary <char, bool> {
                { "ABC", false }, { "AB", true }, { "ADE", false }, { "ABCDE", true }
            };

            Assert.IsTrue(new[] { "AB", "ABC", "ABCDE", "ADE" }.SequenceEqual(trie.Keys.Select(i => new string(i.ToArray())).OrderBy(s => s)));
            Assert.IsTrue(new[] { false, false, true, true }.SequenceEqual(trie.Values.OrderBy(s => s)));
        }
        public void ItemsGetException()
        {
            var trie = new TrieDictionary <char, bool> {
                { "ABC", false }, { "AB", false }, { "ADE", true }, { "ABCDE", false }
            };

            // ReSharper disable once UnusedVariable
            var result = trie["A"];
        }
        public void Clear()
        {
            var trie = new TrieDictionary <char, bool> {
                { "ABC", false }, { "AB", false }, { "ADE", false }, { "ABCDE", false }
            };

            trie.Clear();

            Assert.AreEqual(0, trie.Count);
        }
        public void RemoveNullKey()
        {
            // ReSharper disable once CollectionNeverQueried.Local
            var trie = new TrieDictionary <char, bool> {
                { "ABC", false }
            };

            // ReSharper disable once AssignNullToNotNullAttribute
            trie.Remove(null);
        }
Ejemplo n.º 6
0
        static void Main(string[] args)
        {
            //Load word libary
            Regex rgx = new Regex("[^a-zA-Z]");
            IEnumerable <string> wordLib = System.IO.File.ReadAllLines(@"Data\WordLib.txt").ToList().Select(word => rgx.Replace(word, "").ToLowerInvariant());

            //The max distance from word, we'll generate all the "brothers" of given word - those words whose distance are within MaxDist from given word.
            //So the bigger MaxDist is, the more "brothers" we will generate based on given words, the longer it takes in overall fuzzy searching.
            const int MaxDist = 1;

            //Load test cases
            IEnumerable <string> testcase1 = System.IO.File.ReadAllLines(@"Data\TestCase_1_WordsToSearch.txt").ToList().Select(word => rgx.Replace(word, "").ToLowerInvariant());

            Console.WriteLine("----Automaton way----");

            //Build Trie dictionary based on library.
            TrieDictionary dict = TrieDictionary.BuildTrieDictionary(wordLib.GetEnumerator());

            Stopwatch st   = new Stopwatch();
            int       hits = 0;

            st.Start();
            foreach (string word in testcase1)
            {
                IEnumerable <string> results = AutomatonSearch.Search(word, MaxDist, dict).Distinct();
                if (results.Count() > 0)
                {
                    hits++;
                    Console.WriteLine("results size for \"" + word + "\": " + results.Count().ToString());
                }
            }
            st.Stop();
            Console.WriteLine("Total hits: " + hits.ToString() + "; Max distance : " + MaxDist.ToString() + "; Total time consumed(milisec): " + st.ElapsedMilliseconds.ToString());

            Console.Write("\n\n");

            Console.WriteLine("----Traditional way----");
            st.Reset();
            hits = 0;
            //const double factor = 0.7;
            st.Start();
            foreach (string word in testcase1)
            {
                IEnumerable <string> results2 = TraditionSearch.search(word, MaxDist, wordLib.GetEnumerator()).Distinct();
                if (results2.Count() > 0)
                {
                    hits++;
                    Console.WriteLine("results size for \"" + word + "\": " + results2.Count().ToString());
                }
            }
            st.Stop();
            Console.WriteLine("Total hits: " + hits.ToString() + "; Max distance : " + MaxDist.ToString() + "; Total time consumed (milisec): " + st.ElapsedMilliseconds.ToString());

            Console.ReadKey();
        }
        public void ItemsGet()
        {
            var trie = new TrieDictionary <char, bool> {
                { "ABC", false }, { "AB", false }, { "ADE", true }, { "ABCDE", false }
            };

            Assert.AreEqual(false, trie["ABC"]);
            Assert.AreEqual(false, trie["AB"]);
            Assert.AreEqual(true, trie["ADE"]);
            Assert.AreEqual(false, trie["ABCDE"]);
        }
        public void TryGetValueKeyIsNull()
        {
            var trie = new TrieDictionary <char, bool> {
                { "ABC", false }
            };

            bool value;

            // ReSharper disable once AssignNullToNotNullAttribute
            trie.TryGetValue(null, out value);
        }
        public void ContainsKeyClear()
        {
            var trie = new TrieDictionary <char, bool> {
                { "ABC", false }, { "AB", false }, { "ADE", false }, { "ABCDE", false }
            };

            trie.Clear();

            Assert.IsFalse(trie.ContainsKey("ABC"));
            Assert.IsFalse(trie.ContainsKey("AB"));
            Assert.IsFalse(trie.ContainsKey("ADE"));
            Assert.IsFalse(trie.ContainsKey("ABCDE"));
        }
        public void GetEnumerator()
        {
            var trie = new TrieDictionary <char, bool> {
                { "ABC", false }, { "AB", false }, { "ADE", false }, { "ABCDE", false }
            };


            var result           = trie.Select(kvp => new string(kvp.Key.ToArray())).OrderBy(w => w).ToArray();
            var resultEnumerator =
                trie.OfType <KeyValuePair <IEnumerable <char>, bool> >().Select(k => new string(k.Key.ToArray())).OrderBy(w => w).ToArray();

            CollectionAssert.AreEqual(new[] { "AB", "ABC", "ABCDE", "ADE" }, result);
            CollectionAssert.AreEqual(new[] { "AB", "ABC", "ABCDE", "ADE" }, resultEnumerator);
        }
        public void TryGetValue()
        {
            const string expectedValue = "value";

            var trie = new TrieDictionary <char, string> {
                { "ABC", expectedValue }
            };

            string value;

            Assert.IsTrue(trie.TryGetValue("ABC", out value));
            Assert.AreEqual(expectedValue, value);
            Assert.IsFalse(trie.TryGetValue("A", out value));
            Assert.IsNull(value);
        }
Ejemplo n.º 12
0
        public TextMiningEngine(PhenotypeEngine phenotypeEngine)
        {
            Console.WriteLine("TextMiningEngine initialization ...");
            stringBuilder = new System.Text.StringBuilder();
            client        = new HttpClient();

            symptomsList = phenotypeEngine.SymptomsList;
            //////////////////
            //symptomsList = new List<Symptom>();
            //GetSymptomsList();
            ///////////////
            //getSymptomsListBeta();

            //Preparing dictionary
            //Construct dictionnary for symptoms
            TrieDictionary dict = new TrieDictionary();

            foreach (Symptom pheno in symptomsList)
            {
                dict.addEntry(new com.aliasi.dict.DictionaryEntry(pheno.Name, "PHENOTYPE"));
                foreach (string synonym in pheno.Synonyms)
                {
                    dict.addEntry(new com.aliasi.dict.DictionaryEntry(synonym, "PHENOTYPE"));
                }
            }

            TokenizerFactory     tokenizerFactory = IndoEuropeanTokenizerFactory.INSTANCE;
            WeightedEditDistance editDistance     = new FixedWeightEditDistance(0, -1, -1, -1, System.Double.NaN);

            double maxDistance = 0.0;

            chunker = new ApproxDictionaryChunker(dict, tokenizerFactory, editDistance, maxDistance);

            /*
             * //////////////////////////////////////////
             * //FOR HMM PREPARATION
             * string pathWithoutSettings = Environment.GetEnvironmentVariable("RD_AGGREGATOR_SETTINGS").Substring(0, Environment.GetEnvironmentVariable("RD_AGGREGATOR_SETTINGS").Length - 14);
             * string completePath = $"{pathWithoutSettings}\\Aggregator\\tools\\model.test";
             * Console.WriteLine(completePath);
             * java.io.File modelFile = new java.io.File(completePath);
             * //chunkerHMM = Conversion.Converter.Convert($"{pathWithoutSettings}\\Aggregator\\tools\\model.test");
             * //java.io.File modelFile = new java.io.File(@"C: \Users\CharlesCOUSYN\Desktop\qhskdjhq.txt");
             * chunkerHMM = (Chunker)AbstractExternalizable.readObject(modelFile);
             *
             * //////////////////////////////////////////
             */
            Console.WriteLine("TextMiningEngine initialization finished");
        }
Ejemplo n.º 13
0
    public static ITrieDictionary getInstanceByParsing(string input)
    {
        if ("" == input)
        {
            throw new ParsingException();
        }
        //Se coloca en una estructura de dictionario
        TrieDictionary dictionary = new TrieDictionary();

        string[] words = input.Split(' ');                          //Se separa mediante espacios
        foreach (string word in words)
        {
            dictionary.insert(word);
        }
        return(dictionary);
    }
        public void ItemsSet()
        {
            var trie = new TrieDictionary <char, bool> {
                ["ABC"] = true
            };

            Assert.AreEqual(true, trie["ABC"]);

            trie["AB"] = true;

            Assert.AreEqual(true, trie["AB"]);

            trie["AB"] = false;

            Assert.AreEqual(false, trie["AB"]);
        }
        public void Contains()
        {
            var trie = new TrieDictionary <char, bool> {
                { "ABC", false }, { "AB", false }, { "ADE", true }, { "ABCDE", false }
            };

            var t = (IDictionary <IEnumerable <char>, bool>)trie;

            Assert.IsTrue(t.Contains(new KeyValuePair <IEnumerable <char>, bool>("ABC", false)));
            Assert.IsTrue(t.Contains(new KeyValuePair <IEnumerable <char>, bool>("AB", false)));
            Assert.IsTrue(t.Contains(new KeyValuePair <IEnumerable <char>, bool>("ADE", true)));
            Assert.IsTrue(t.Contains(new KeyValuePair <IEnumerable <char>, bool>("ABCDE", false)));

            Assert.IsFalse(t.Contains(new KeyValuePair <IEnumerable <char>, bool>("X", false)));
            Assert.IsFalse(t.Contains(new KeyValuePair <IEnumerable <char>, bool>("ADE", false)));
            Assert.IsFalse(t.Contains(new KeyValuePair <IEnumerable <char>, bool>("ABCD", false)));
        }
        public void GetByPrefix()
        {
            var trie = new TrieDictionary <char, int> {
                { "ABC", 1 }, { "AB", 2 }, { "ABCDE", 3 }
            };

            ((IDictionary <IEnumerable <char>, int>)trie).Add(new KeyValuePair <IEnumerable <char>, int>("ADE", 4));


            var result = trie.GetByPrefix("ABC").ToArray();
            var keys   = result.Select(i => new string(i.Key.ToArray()));
            var values = result.Select(i => i.Value);

            string[] expectedResultKeys   = { "ABC", "ABCDE" };
            int[]    expectedResultValues = { 1, 3 };

            Assert.AreEqual(4, trie.Count);
            Assert.IsTrue(expectedResultKeys.SequenceEqual(keys));
            Assert.IsTrue(expectedResultValues.SequenceEqual(values));
        }
        public void Remove()
        {
            const int initialCount = 5;

            var trie = new TrieDictionary <char, bool>
            {
                { "ABC", false }, { "AB", false }, { "ADE", false }, { "ABCDE", false }, { "X", false }
            };

            Assert.IsFalse(((IDictionary <IEnumerable <char>, bool>)trie).Remove(new KeyValuePair <IEnumerable <char>, bool>("XY", true)));
            Assert.IsFalse(((IDictionary <IEnumerable <char>, bool>)trie).Remove(new KeyValuePair <IEnumerable <char>, bool>("ABCD", true)));
            Assert.IsFalse(((IDictionary <IEnumerable <char>, bool>)trie).Remove(new KeyValuePair <IEnumerable <char>, bool>("ABCDE", true)));
            Assert.AreEqual(initialCount, trie.Count);
            Assert.IsTrue(((IDictionary <IEnumerable <char>, bool>)trie).Remove(new KeyValuePair <IEnumerable <char>, bool>("ABCDE", false)));
            Assert.AreEqual(initialCount - 1, trie.Count);
            Assert.IsTrue(trie.Remove("X"));
            Assert.AreEqual(initialCount - 2, trie.Count);
            Assert.IsTrue(trie.Remove("ABC"));
            Assert.AreEqual(initialCount - 3, trie.Count);
            Assert.IsFalse(trie.ContainsKey("ABC"));
            Assert.IsTrue(trie.ContainsKey("AB"));
            Assert.IsTrue(trie.ContainsKey("ADE"));
        }
Ejemplo n.º 18
0
        public TextMiningEngine()
        {
            Console.WriteLine("TextMiningEngine initialization ...");
            client       = new HttpClient();
            symptomsList = new List <Symptom>();
            GetSymptomsList();
            //getSymptomsListBeta();

            //Preparing dictionary
            //Construct dictionnary for symptoms
            TrieDictionary dict = new TrieDictionary();

            foreach (Symptom pheno in symptomsList)
            {
                dict.addEntry(new com.aliasi.dict.DictionaryEntry(pheno.Name, "PHENOTYPE"));
                foreach (string synonym in pheno.Synonyms)
                {
                    dict.addEntry(new com.aliasi.dict.DictionaryEntry(synonym, "PHENOTYPE"));
                }
            }

            TokenizerFactory     tokenizerFactory = IndoEuropeanTokenizerFactory.INSTANCE;
            WeightedEditDistance editDistance     = new FixedWeightEditDistance(0, -1, -1, -1, System.Double.NaN);

            double maxDistance = 0.0;

            chunker = new ApproxDictionaryChunker(dict, tokenizerFactory, editDistance, maxDistance);

            //////////////////////////////////////////
            //FOR HMM PREPARATION
            java.io.File modelFile = new java.io.File($"{Environment.GetEnvironmentVariable("RD_AGGREGATOR_SETTINGS")}/Aggregator/tools/model.test");

            chunkerHMM = (Chunker)AbstractExternalizable.readObject(modelFile);

            //////////////////////////////////////////
            Console.WriteLine("TextMiningEngine initialization finished");
        }
        public void IsReadOnly()
        {
            var trie = new TrieDictionary <char, bool>();

            Assert.IsFalse(((IDictionary <IEnumerable <char>, bool>)trie).IsReadOnly);
        }