/// <summary> /// The cut. /// </summary> /// <param name="parameters"> /// The current parameters for segmentation. /// </param> /// <returns> /// The convoluted chain <see cref="ComplexChain"/>. /// </returns> public override ComplexChain Cut(ContentValues parameters) { int maxWindowLen = (int)parameters.Get(Enum.GetName(typeof(Parameter), Parameter.Window)); int windowDec = (int)parameters.Get(Enum.GetName(typeof(Parameter), Parameter.WindowDecrement)); convoluted = (ComplexChain)parameters.Get(Formalism.GetName(typeof(Formalism), Formalism.Sequence)); alphabet = new FrequencyDictionary(); for (int winLen = maxWindowLen; (winLen >= windowDec) && (winLen > 1); winLen -= windowDec) { bool flag = true; while (flag) { UpdateParams(parameters, winLen); KeyValuePair<List<string>, List<int>>? pair = WordExtractorFactory.GetSeeker(extractor).Find(parameters); flag = pair != null; if (flag) { pair.Value.Value.Reverse(); foreach (int position in pair.Value.Value) { convoluted.Join(position, winLen); } alphabet.Add(Helper.ToString(pair.Value.Key), pair.Value.Value); } } } FindLastWords(); return convoluted; }
public void CloneTest() { string str = chain.ToString(); var alphabet1 = new FrequencyDictionary(str); var alphabet2 = new FrequencyDictionary(chain); FrequencyDictionary alphabet3 = alphabet2.Clone(); Assert.True(alphabet1.Equals(alphabet2) && alphabet3.Equals(alphabet1)); }
/// <summary> /// The distortion. /// </summary> /// <param name="chain"> /// The chain. /// </param> /// <param name="alphabet"> /// The alphabet. /// </param> /// <returns> /// The <see cref="double"/>. /// </returns> public override double Distortion(ComplexChain chain, FrequencyDictionary alphabet) { double maxFrequency = MaxFrequency(alphabet); double power = alphabet.Count; double greaterToSmaller = power / maxFrequency; double sumToGreater = (power + maxFrequency) / power; return Math.Abs(greaterToSmaller - sumToGreater); }
public void AddTest() { var alphabet = new FrequencyDictionary(); var alphabetTest = new FrequencyDictionary(chain); string[] words = { "A", "G", "C", "T" }; int power = 1; alphabet.Add(words[0], alphabetTest[words[0]]); alphabet.Add(words[0], alphabetTest[words[0]]); Assert.True(alphabet.Contains(words[0]) && alphabet.Count == power); }
public void ContainsTest() { var alphabet = new FrequencyDictionary(chain); string[] words = { "A", "G", "C", "T", "WORD", "AG" }; Assert.True(alphabet.Contains(words[0])); Assert.True(alphabet.Contains(words[1])); Assert.True(alphabet.Contains(words[2])); Assert.True(alphabet.Contains(words[3])); Assert.True(!alphabet.Contains(words[4])); Assert.True(!alphabet.Contains(words[5])); }
/// <summary> /// The state. /// </summary> /// <param name="chain"> /// The chain. /// </param> /// <param name="alphabet"> /// The alphabet. /// </param> /// <returns> /// The <see cref="bool"/>. /// </returns> public override bool State(ComplexChain chain, FrequencyDictionary alphabet) { double current = Distortion(chain, alphabet); if (Value > current) { Value = current; this.chain = chain.Clone(); this.alphabet = alphabet.Clone(); ThresholdToStop.SaveBest(); } return ThresholdToStop.Distance > ThresholdVariator.Precision; }
/// <summary> /// The state. /// </summary> /// <param name="chain"> /// The chain. /// </param> /// <param name="alphabet"> /// The alphabet. /// </param> /// <returns> /// The <see cref="bool"/>. /// </returns> public override bool State(ComplexChain chain, FrequencyDictionary alphabet) { double distortion = Distortion(chain, alphabet); if (Math.Abs(Value) > Math.Abs(distortion)) { this.chain = chain.Clone(); this.alphabet = alphabet.Clone(); Value = distortion; ThresholdToStop.SaveBest(); } return ThresholdToStop.Distance > ThresholdVariator.Precision; }
/// <summary> /// The state. /// </summary> /// <param name="chain"> /// The chain. /// </param> /// <param name="alphabet"> /// The alphabet. /// </param> /// <returns> /// The <see cref="bool"/>. /// </returns> public override bool State(ComplexChain chain, FrequencyDictionary alphabet) { double currentDistortion = depth.Calculate(chain, chain.Anchor); // - calculate(gamutDeep, chain); if (Math.Abs(currentDistortion) > Value) { this.chain = chain.Clone(); this.alphabet = alphabet.Clone(); ThresholdToStop.SaveBest(); Value = currentDistortion; } return ThresholdToStop.Distance > ThresholdVariator.Precision; }
/// <summary> /// The get taxons value. /// </summary> /// <param name="alphabet"> /// The alphabet. /// </param> /// <returns> /// The <see cref="double"/>. /// </returns> public double GetTaxonsValue(FrequencyDictionary alphabet) { double taxons = 0; List<List<int>> positions = alphabet.GetWordsPositions(); for (int index = 0; index < alphabet.Count; index++) { int countT = positions[index].Count; taxons += (Math.Log(countT) * countT) - countT; } return taxons; }
/// <summary> /// Discards all words which enter in the alphabet and contains compound words /// </summary> /// <param name="alphabet"> /// The alphabet. /// </param> /// <param name="level"> /// The filter level. /// </param> /// <returns> /// The <see cref="T:KeyValuePair{List{string},List{int}}?"/>. /// </returns> protected KeyValuePair<List<string>, List<int>>? DiscardCompositeWords(FrequencyDictionary alphabet, double level) { var stds = new List<double>(wordPriority.Keys); var entries = new List<KeyValuePair<List<string>, List<int>>>(wordPriority.Values); for (int index = entries.Count; --index >= 0;) { List<string> entry = entries[index].Key; string entryS; if (!alphabet.Contains(new ValueString(entryS = Helper.ToString(entry))) && (entry.Count == entryS.Length)) { double bestStd = stds[index]; if (bestStd > level) { return new KeyValuePair<List<string>, List<int>>(wordPriority[bestStd].Key, wordPriority[bestStd].Value); } } } return null; }
/// <summary> /// The state. /// </summary> /// <param name="chain"> /// The chain. /// </param> /// <param name="alphabet"> /// The alphabet. /// </param> /// <returns> /// The <see cref="bool"/>. /// </returns> public override bool State(ComplexChain chain, FrequencyDictionary alphabet) { Update(chain, alphabet); return (ThresholdToStop.Distance > ThresholdVariator.Precision) && (Math.Abs(Distortion(chain, alphabet)) > precisionOfDifference); }
/// <summary> /// The distortion. /// </summary> /// <param name="chain"> /// The chain. /// </param> /// <param name="alphabet"> /// The alphabet. /// </param> /// <returns> /// The <see cref="double"/>. /// </returns> public override double Distortion(ComplexChain chain, FrequencyDictionary alphabet) { return depth.Calculate(chain.Original(), chain.Anchor); // - gamutDeep.Calculate(chain); }
/// <summary> /// The distortion. /// </summary> /// <param name="chain"> /// The chain. /// </param> /// <param name="alphabet"> /// The alphabet. /// </param> /// <returns> /// The <see cref="double"/>. /// </returns> public override double Distortion(ComplexChain chain, FrequencyDictionary alphabet) { return regularity.Calculate(chain, chain.Anchor); }
public void FillOneTest() { string str = chain.ToString(); var alphabet1 = new FrequencyDictionary(str); var alphabet2 = new FrequencyDictionary(chain); Assert.True(alphabet1.Equals(alphabet2)); }
/// <summary> /// The max frequency. /// </summary> /// <param name="alphabet"> /// The alphabet. /// </param> /// <returns> /// The <see cref="int"/>. /// </returns> private int MaxFrequency(FrequencyDictionary alphabet) { return alphabet.GetWordsPositions().Max(p => p.Count); }
/// <summary> /// The distortion. /// </summary> /// <param name="chain"> /// The chain. /// </param> /// <param name="alphabet"> /// The alphabet. /// </param> /// <returns> /// The <see cref="double"/>. /// </returns> public override sealed double Distortion(ComplexChain chain, FrequencyDictionary alphabet) { return TheoryVolume(chain, alphabet) - alphabet.Count; }
public void PutTest() { var alphabet = new FrequencyDictionary(chain); string word = "string"; string unknown = "WOW"; int pos = 20; alphabet.Put(word, pos); Assert.True(alphabet.Contains(word)); Assert.True(!alphabet.Contains(unknown)); }
public void GetWordsTest() { var alphabet = new FrequencyDictionary(chain); string[] words = { "A", "G", "C", "T" }; List<string> alphabetWords = alphabet.GetWords(); Assert.True(!words.Except(alphabetWords).Any()); }
/// <summary> /// The distortion. /// </summary> /// <param name="chain"> /// The chain. /// </param> /// <param name="alphabet"> /// The alphabet. /// </param> /// <returns> /// The <see cref="double"/>. /// </returns> public override double Distortion(ComplexChain chain, FrequencyDictionary alphabet) { return 0; }
/// <summary> /// The symmetry. /// </summary> /// <param name="alphabet"> /// The alphabet. /// </param> /// <returns> /// The <see cref="double"/>. /// </returns> private double Symmetry(FrequencyDictionary alphabet) { double taxons = 0; double merons = 0; int arrayMaxLength = 0; List<List<int>> positions = alphabet.GetWordsPositions(); for (int index = 0; index < alphabet.Count; index++) { int countT = positions[index].Count; taxons += (Math.Log(countT) * countT) - countT; int arraySize = positions[index].Count; if (arrayMaxLength < arraySize) { arrayMaxLength = arraySize; } } for (int meronIndex = 0, countM = 0; meronIndex < arrayMaxLength; meronIndex++) { for (int index = 0; index < alphabet.Count; index++) { if (positions[index].Count >= meronIndex) { countM = countM + 1; } } merons += (Math.Log(countM) * countM) - countM; countM = 0; } return taxons + merons; }
/// <summary> /// The get merons value. /// </summary> /// <param name="alphabet"> /// The alphabet. /// </param> /// <returns> /// The <see cref="double"/>. /// </returns> public double GetMeronsValue(FrequencyDictionary alphabet) { double merons = 0; return merons; }
public void GetWordTest() { var alphabet = new FrequencyDictionary(chain); for (int index = 0; index < alphabet.Count; index++) { Assert.True(alphabet.Contains(alphabet.GetWord(index))); } }
public void PowerTest() { var alphabetChain = new FrequencyDictionary(chain); int power = 4; Assert.True(alphabetChain.Count == power); }
public void ClearTest() { var alphabet = new FrequencyDictionary(chain); alphabet.Clear(); Assert.True(alphabet.Count == 0); }
/// <summary> /// Calculates the theoretical volume the alphabet for a chain. /// </summary> /// <param name="chain"> /// An estimated chain. /// </param> /// <param name="alphabet"> /// Current alphabet. /// </param> /// <returns> /// The theoretical volume the alphabet. /// </returns> public double TheoryVolume(ComplexChain chain, FrequencyDictionary alphabet) { double f = 0; List<string> wordsList = alphabet.GetWords(); foreach (string word in wordsList) { double freq = Frequency(chain, word); if (freq > f) { f = freq; } } double z = chain.GetLength(); double k = 1 / Math.Log(f * z); double b = (k / f) - 1; double v = (k * z) - b; return v; }
/// <summary> /// The state. /// </summary> /// <param name="chain"> /// The chain. /// </param> /// <param name="alphabet"> /// The alphabet. /// </param> /// <returns> /// The <see cref="bool"/>. /// </returns> public override bool State(ComplexChain chain, FrequencyDictionary alphabet) { return false; }
/// <summary> /// The update. /// </summary> /// <param name="chain"> /// The chain. /// </param> /// <param name="alphabet"> /// The alphabet. /// </param> private void Update(ComplexChain chain, FrequencyDictionary alphabet) { double dist = TheoryVolume(chain, alphabet) - alphabet.Count; if (Math.Abs(Value) > Math.Abs(dist)) { this.alphabet = alphabet.Clone(); this.chain = chain.Clone(); Value = dist; ThresholdToStop.SaveBest(); } }
public void GetTest() { // AACAGGTGCCCCTTATTT var alphabet = new FrequencyDictionary(chain); string[] words = { "A", "G", "C", "T", "WORD", "AG" }; int[] positionsA = { 0, 1, 3, 14 }; int[] positionsG = { 4, 5, 7 }; int[] positionsC = { 2, 8, 9, 10, 11 }; int[] positionsT = { 6, 12, 13, 15, 16, 17 }; Helper.ArraysEqual(positionsA, alphabet[words[0]].ToArray()); Assert.True(Helper.ArraysEqual(positionsA, alphabet[words[0]].ToArray())); Assert.True(Helper.ArraysEqual(positionsG, alphabet[words[1]].ToArray())); Assert.True(Helper.ArraysEqual(positionsC, alphabet[words[2]].ToArray())); Assert.True(Helper.ArraysEqual(positionsT, alphabet[words[3]].ToArray())); }
public void EqualsTest() { string str = chain.ToString(); var alphabet1 = new FrequencyDictionary(str); var alphabet2 = new FrequencyDictionary(chain); Assert.True(alphabet1.Equals(alphabet2)); alphabet1.Remove(alphabet1.GetWord(1)); Assert.True(!alphabet1.Equals(alphabet2)); }
public void RemoveTest() { var alphabet = new FrequencyDictionary(chain); string[] words = { "A", "G", "C", "T", "WORD", "AG" }; alphabet.Remove(words[0]); Assert.True(!alphabet.Contains(words[0])); alphabet.Remove(words[1]); Assert.True(!alphabet.Contains(words[1])); alphabet.Remove(words[2]); Assert.True(!alphabet.Contains(words[2])); alphabet.Remove(words[3]); Assert.True(!alphabet.Contains(words[3])); Assert.True(alphabet.Count == 0); }