The finite set of unique words were extracted from a sequence of characters.
        /// <summary>
        /// The cut.
        /// </summary>
        /// <param name="parameters">
        /// The current parameters for segmentation.
        /// </param>
        /// <returns>
        /// The convoluted chain <see cref="ComplexChain"/>.
        /// </returns>
        public override ComplexChain Cut(ContentValues parameters)
        {
            int maxWindowLen = (int)parameters.Get(Enum.GetName(typeof(Parameter), Parameter.Window));
            int windowDec = (int)parameters.Get(Enum.GetName(typeof(Parameter), Parameter.WindowDecrement));

            convoluted = (ComplexChain)parameters.Get(Formalism.GetName(typeof(Formalism), Formalism.Sequence));
            alphabet = new FrequencyDictionary();

            for (int winLen = maxWindowLen; (winLen >= windowDec) && (winLen > 1); winLen -= windowDec)
            {
                bool flag = true;
                while (flag)
                {
                    UpdateParams(parameters, winLen);
                    KeyValuePair<List<string>, List<int>>? pair = WordExtractorFactory.GetSeeker(extractor).Find(parameters);
                    flag = pair != null;
                    if (flag)
                    {
                        pair.Value.Value.Reverse();
                        foreach (int position in pair.Value.Value)
                        {
                            convoluted.Join(position, winLen);
                        }

                        alphabet.Add(Helper.ToString(pair.Value.Key), pair.Value.Value);
                    }
                }
            }

            FindLastWords();

            return convoluted;
        }
 public void CloneTest()
 {
     string str = chain.ToString();
     var alphabet1 = new FrequencyDictionary(str);
     var alphabet2 = new FrequencyDictionary(chain);
     FrequencyDictionary alphabet3 = alphabet2.Clone();
     Assert.True(alphabet1.Equals(alphabet2) && alphabet3.Equals(alphabet1));
 }
        /// <summary>
        /// The distortion.
        /// </summary>
        /// <param name="chain">
        /// The chain.
        /// </param>
        /// <param name="alphabet">
        /// The alphabet.
        /// </param>
        /// <returns>
        /// The <see cref="double"/>.
        /// </returns>
        public override double Distortion(ComplexChain chain, FrequencyDictionary alphabet)
        {
            double maxFrequency = MaxFrequency(alphabet);
            double power = alphabet.Count;

            double greaterToSmaller = power / maxFrequency;
            double sumToGreater = (power + maxFrequency) / power;

            return Math.Abs(greaterToSmaller - sumToGreater);
        }
 public void AddTest()
 {
     var alphabet = new FrequencyDictionary();
     var alphabetTest = new FrequencyDictionary(chain);
     string[] words = { "A", "G", "C", "T" };
     int power = 1;
     alphabet.Add(words[0], alphabetTest[words[0]]);
     alphabet.Add(words[0], alphabetTest[words[0]]);
     Assert.True(alphabet.Contains(words[0]) && alphabet.Count == power);
 }
 public void ContainsTest()
 {
     var alphabet = new FrequencyDictionary(chain);
     string[] words = { "A", "G", "C", "T", "WORD", "AG" };
     Assert.True(alphabet.Contains(words[0]));
     Assert.True(alphabet.Contains(words[1]));
     Assert.True(alphabet.Contains(words[2]));
     Assert.True(alphabet.Contains(words[3]));
     Assert.True(!alphabet.Contains(words[4]));
     Assert.True(!alphabet.Contains(words[5]));
 }
        /// <summary>
        /// The state.
        /// </summary>
        /// <param name="chain">
        /// The chain.
        /// </param>
        /// <param name="alphabet">
        /// The alphabet.
        /// </param>
        /// <returns>
        /// The <see cref="bool"/>.
        /// </returns>
        public override bool State(ComplexChain chain, FrequencyDictionary alphabet)
        {
            double current = Distortion(chain, alphabet);
            if (Value > current)
            {
                Value = current;
                this.chain = chain.Clone();
                this.alphabet = alphabet.Clone();
                ThresholdToStop.SaveBest();
            }

            return ThresholdToStop.Distance > ThresholdVariator.Precision;
        }
        /// <summary>
        /// The state.
        /// </summary>
        /// <param name="chain">
        /// The chain.
        /// </param>
        /// <param name="alphabet">
        /// The alphabet.
        /// </param>
        /// <returns>
        /// The <see cref="bool"/>.
        /// </returns>
        public override bool State(ComplexChain chain, FrequencyDictionary alphabet)
        {
            double distortion = Distortion(chain, alphabet);
            if (Math.Abs(Value) > Math.Abs(distortion))
            {
                this.chain = chain.Clone();
                this.alphabet = alphabet.Clone();
                Value = distortion;
                ThresholdToStop.SaveBest();
            }

            return ThresholdToStop.Distance > ThresholdVariator.Precision;
        }
        /// <summary>
        /// The state.
        /// </summary>
        /// <param name="chain">
        /// The chain.
        /// </param>
        /// <param name="alphabet">
        /// The alphabet.
        /// </param>
        /// <returns>
        /// The <see cref="bool"/>.
        /// </returns>
        public override bool State(ComplexChain chain, FrequencyDictionary alphabet)
        {
            double currentDistortion = depth.Calculate(chain, chain.Anchor); // - calculate(gamutDeep, chain);
            if (Math.Abs(currentDistortion) > Value)
            {
                this.chain = chain.Clone();
                this.alphabet = alphabet.Clone();
                ThresholdToStop.SaveBest();
                Value = currentDistortion;
            }

            return ThresholdToStop.Distance > ThresholdVariator.Precision;
        }
        /// <summary>
        /// The get taxons value.
        /// </summary>
        /// <param name="alphabet">
        /// The alphabet.
        /// </param>
        /// <returns>
        /// The <see cref="double"/>.
        /// </returns>
        public double GetTaxonsValue(FrequencyDictionary alphabet)
        {
            double taxons = 0;

            List<List<int>> positions = alphabet.GetWordsPositions();

            for (int index = 0; index < alphabet.Count; index++)
            {
                int countT = positions[index].Count;
                taxons += (Math.Log(countT) * countT) - countT;
            }

            return taxons;
        }
        /// <summary>
        /// Discards all words which enter in the alphabet and contains compound words
        /// </summary>
        /// <param name="alphabet">
        /// The alphabet.
        /// </param>
        /// <param name="level">
        /// The filter level.
        /// </param>
        /// <returns>
        /// The <see cref="T:KeyValuePair{List{string},List{int}}?"/>.
        /// </returns>
        protected KeyValuePair<List<string>, List<int>>? DiscardCompositeWords(FrequencyDictionary alphabet, double level)
        {
            var stds = new List<double>(wordPriority.Keys);
            var entries = new List<KeyValuePair<List<string>, List<int>>>(wordPriority.Values);
            for (int index = entries.Count; --index >= 0;)
            {
                List<string> entry = entries[index].Key;
                string entryS;
                if (!alphabet.Contains(new ValueString(entryS = Helper.ToString(entry))) && (entry.Count == entryS.Length))
                {
                    double bestStd = stds[index];
                    if (bestStd > level)
                    {
                        return new KeyValuePair<List<string>, List<int>>(wordPriority[bestStd].Key, wordPriority[bestStd].Value);
                    }
                }
            }

            return null;
        }
 /// <summary>
 /// The state.
 /// </summary>
 /// <param name="chain">
 /// The chain.
 /// </param>
 /// <param name="alphabet">
 /// The alphabet.
 /// </param>
 /// <returns>
 /// The <see cref="bool"/>.
 /// </returns>
 public override bool State(ComplexChain chain, FrequencyDictionary alphabet)
 {
     Update(chain, alphabet);
     return (ThresholdToStop.Distance > ThresholdVariator.Precision)
            && (Math.Abs(Distortion(chain, alphabet)) > precisionOfDifference);
 }
 /// <summary>
 /// The distortion.
 /// </summary>
 /// <param name="chain">
 /// The chain.
 /// </param>
 /// <param name="alphabet">
 /// The alphabet.
 /// </param>
 /// <returns>
 /// The <see cref="double"/>.
 /// </returns>
 public override double Distortion(ComplexChain chain, FrequencyDictionary alphabet)
 {
     return depth.Calculate(chain.Original(), chain.Anchor); // - gamutDeep.Calculate(chain);
 }
 /// <summary>
 /// The distortion.
 /// </summary>
 /// <param name="chain">
 /// The chain.
 /// </param>
 /// <param name="alphabet">
 /// The alphabet.
 /// </param>
 /// <returns>
 /// The <see cref="double"/>.
 /// </returns>
 public override double Distortion(ComplexChain chain, FrequencyDictionary alphabet)
 {
     return regularity.Calculate(chain, chain.Anchor);
 }
 public void FillOneTest()
 {
     string str = chain.ToString();
     var alphabet1 = new FrequencyDictionary(str);
     var alphabet2 = new FrequencyDictionary(chain);
     Assert.True(alphabet1.Equals(alphabet2));
 }
 /// <summary>
 /// The max frequency.
 /// </summary>
 /// <param name="alphabet">
 /// The alphabet.
 /// </param>
 /// <returns>
 /// The <see cref="int"/>.
 /// </returns>
 private int MaxFrequency(FrequencyDictionary alphabet)
 {
     return alphabet.GetWordsPositions().Max(p => p.Count);
 }
 /// <summary>
 /// The distortion.
 /// </summary>
 /// <param name="chain">
 /// The chain.
 /// </param>
 /// <param name="alphabet">
 /// The alphabet.
 /// </param>
 /// <returns>
 /// The <see cref="double"/>.
 /// </returns>
 public override sealed double Distortion(ComplexChain chain, FrequencyDictionary alphabet)
 {
     return TheoryVolume(chain, alphabet) - alphabet.Count;
 }
        public void PutTest()
        {
            var alphabet = new FrequencyDictionary(chain);
            string word = "string";
            string unknown = "WOW";
            int pos = 20;
            alphabet.Put(word, pos);

            Assert.True(alphabet.Contains(word));
            Assert.True(!alphabet.Contains(unknown));
        }
 public void GetWordsTest()
 {
     var alphabet = new FrequencyDictionary(chain);
     string[] words = { "A", "G", "C", "T" };
     List<string> alphabetWords = alphabet.GetWords();
     Assert.True(!words.Except(alphabetWords).Any());
 }
 /// <summary>
 /// The distortion.
 /// </summary>
 /// <param name="chain">
 /// The chain.
 /// </param>
 /// <param name="alphabet">
 /// The alphabet.
 /// </param>
 /// <returns>
 /// The <see cref="double"/>.
 /// </returns>
 public override double Distortion(ComplexChain chain, FrequencyDictionary alphabet)
 {
     return 0;
 }
        /// <summary>
        /// The symmetry.
        /// </summary>
        /// <param name="alphabet">
        /// The alphabet.
        /// </param>
        /// <returns>
        /// The <see cref="double"/>.
        /// </returns>
        private double Symmetry(FrequencyDictionary alphabet)
        {
            double taxons = 0;
            double merons = 0;
            int arrayMaxLength = 0;
            List<List<int>> positions = alphabet.GetWordsPositions();

            for (int index = 0; index < alphabet.Count; index++)
            {
                int countT = positions[index].Count;
                taxons += (Math.Log(countT) * countT) - countT;
                int arraySize = positions[index].Count;
                if (arrayMaxLength < arraySize)
                {
                    arrayMaxLength = arraySize;
                }
            }

            for (int meronIndex = 0, countM = 0; meronIndex < arrayMaxLength; meronIndex++)
            {
                for (int index = 0; index < alphabet.Count; index++)
                {
                    if (positions[index].Count >= meronIndex)
                    {
                        countM = countM + 1;
                    }
                }

                merons += (Math.Log(countM) * countM) - countM;
                countM = 0;
            }

            return taxons + merons;
        }
        /// <summary>
        /// The get merons value.
        /// </summary>
        /// <param name="alphabet">
        /// The alphabet.
        /// </param>
        /// <returns>
        /// The <see cref="double"/>.
        /// </returns>
        public double GetMeronsValue(FrequencyDictionary alphabet)
        {
            double merons = 0;

            return merons;
        }
 public void GetWordTest()
 {
     var alphabet = new FrequencyDictionary(chain);
     for (int index = 0; index < alphabet.Count; index++)
     {
         Assert.True(alphabet.Contains(alphabet.GetWord(index)));
     }
 }
 public void PowerTest()
 {
     var alphabetChain = new FrequencyDictionary(chain);
     int power = 4;
     Assert.True(alphabetChain.Count == power);
 }
 public void ClearTest()
 {
     var alphabet = new FrequencyDictionary(chain);
     alphabet.Clear();
     Assert.True(alphabet.Count == 0);
 }
        /// <summary>
        /// Calculates the theoretical volume the alphabet for a chain.
        /// </summary>
        /// <param name="chain">
        /// An estimated chain.
        /// </param>
        /// <param name="alphabet">
        /// Current alphabet.
        /// </param>
        /// <returns>
        /// The theoretical volume the alphabet.
        /// </returns>
        public double TheoryVolume(ComplexChain chain, FrequencyDictionary alphabet)
        {
            double f = 0;
            List<string> wordsList = alphabet.GetWords();
            foreach (string word in wordsList)
            {
                double freq = Frequency(chain, word);
                if (freq > f)
                {
                    f = freq;
                }
            }

            double z = chain.GetLength();
            double k = 1 / Math.Log(f * z);
            double b = (k / f) - 1;
            double v = (k * z) - b;
            return v;
        }
 /// <summary>
 /// The state.
 /// </summary>
 /// <param name="chain">
 /// The chain.
 /// </param>
 /// <param name="alphabet">
 /// The alphabet.
 /// </param>
 /// <returns>
 /// The <see cref="bool"/>.
 /// </returns>
 public override bool State(ComplexChain chain, FrequencyDictionary alphabet)
 {
     return false;
 }
 /// <summary>
 /// The update.
 /// </summary>
 /// <param name="chain">
 /// The chain.
 /// </param>
 /// <param name="alphabet">
 /// The alphabet.
 /// </param>
 private void Update(ComplexChain chain, FrequencyDictionary alphabet)
 {
     double dist = TheoryVolume(chain, alphabet) - alphabet.Count;
     if (Math.Abs(Value) > Math.Abs(dist))
     {
         this.alphabet = alphabet.Clone();
         this.chain = chain.Clone();
         Value = dist;
         ThresholdToStop.SaveBest();
     }
 }
 public void GetTest()
 {
     // AACAGGTGCCCCTTATTT
     var alphabet = new FrequencyDictionary(chain);
     string[] words = { "A", "G", "C", "T", "WORD", "AG" };
     int[] positionsA = { 0, 1, 3, 14 };
     int[] positionsG = { 4, 5, 7 };
     int[] positionsC = { 2, 8, 9, 10, 11 };
     int[] positionsT = { 6, 12, 13, 15, 16, 17 };
     Helper.ArraysEqual(positionsA, alphabet[words[0]].ToArray());
     Assert.True(Helper.ArraysEqual(positionsA, alphabet[words[0]].ToArray()));
     Assert.True(Helper.ArraysEqual(positionsG, alphabet[words[1]].ToArray()));
     Assert.True(Helper.ArraysEqual(positionsC, alphabet[words[2]].ToArray()));
     Assert.True(Helper.ArraysEqual(positionsT, alphabet[words[3]].ToArray()));
 }
 public void EqualsTest()
 {
     string str = chain.ToString();
     var alphabet1 = new FrequencyDictionary(str);
     var alphabet2 = new FrequencyDictionary(chain);
     Assert.True(alphabet1.Equals(alphabet2));
     alphabet1.Remove(alphabet1.GetWord(1));
     Assert.True(!alphabet1.Equals(alphabet2));
 }
 public void RemoveTest()
 {
     var alphabet = new FrequencyDictionary(chain);
     string[] words = { "A", "G", "C", "T", "WORD", "AG" };
     alphabet.Remove(words[0]);
     Assert.True(!alphabet.Contains(words[0]));
     alphabet.Remove(words[1]);
     Assert.True(!alphabet.Contains(words[1]));
     alphabet.Remove(words[2]);
     Assert.True(!alphabet.Contains(words[2]));
     alphabet.Remove(words[3]);
     Assert.True(!alphabet.Contains(words[3]));
     Assert.True(alphabet.Count == 0);
 }