public static void ParseAndFill(QuadgramDataset dataset, string fileName) { using (StreamReader reader = new StreamReader(fileName)) { while (!reader.EndOfStream) { string[] arr = reader.ReadLine().Split(' '); dataset.Add(arr[0], Int64.Parse(arr[1])); } } }
public TextFitnessCalculator(QuadgramDataset dataset) { Guard.Argument(dataset, nameof(dataset)).NotNull(); Guard.Argument(dataset, nameof(dataset)).Require(dataset.Alphabet.Count <= Byte.MaxValue, x => "Alphabet must be no longer than 256 characters"); _posByChar = new Dictionary <char, byte>(); _charByPos = new Dictionary <byte, char>(); char[] alph_arr = dataset.Alphabet.Select(x => Char.ToUpper(x)).ToArray(); _alphabetLength = alph_arr.Length; for (byte i = 0; i < _alphabetLength; i++) { _posByChar[_charByPos[i] = alph_arr[i]] = i; } long total_set_size = dataset.SetSize; _quadgramFrequencies = new double[_alphabetLength, _alphabetLength, _alphabetLength, _alphabetLength]; _fitnessFloor = Math.Log10(1.0 / total_set_size); Parallel.For(0, _alphabetLength, i_1 => { for (int i_2 = 0; i_2 < _alphabetLength; i_2++) { for (int i_3 = 0; i_3 < _alphabetLength; i_3++) { for (int i_4 = 0; i_4 < _alphabetLength; i_4++) { _quadgramFrequencies[i_1, i_2, i_3, i_4] = _fitnessFloor; } } } }); double f_normal = 0; foreach (KeyValuePair <string, long> pair in dataset) { string quad = pair.Key; long quad_count = pair.Value; f_normal += (_quadgramFrequencies[_posByChar[quad[0]], _posByChar[quad[1]], _posByChar[quad[2]], _posByChar[quad[3]]] = Math.Log10((double)quad_count / total_set_size)); } _fitnessNormal = Math.Abs(f_normal / dataset.QuadgramsCount); }