public Tuple<SequenceData, SequenceData> RandomSplit(double ratio, int random_seed) { SequenceData part1 = new SequenceData(NumSymbols); SequenceData part2 = new SequenceData(NumSymbols); List<int[]> shuffled = sequence_list.Select(e => e).ToList(); Utilities.Shuffle(shuffled, random_seed); int size_part1 = (int)(shuffled.Count * ratio); for (int i = 0; i < shuffled.Count; i++) { if (i < size_part1) part1.AddSequence(shuffled[i]); else part2.AddSequence(shuffled[i]); } part1.SaveAddedSequences(); part2.SaveAddedSequences(); return new Tuple<SequenceData, SequenceData>(part1, part2); }
public static SequenceData LoadSequences(string file) { string[] lines = System.IO.File.ReadAllLines(file); //parse number of different symbols int num_symbols = Int32.Parse(lines[0].Split(' ')[1]); SequenceData seqData = new SequenceData(num_symbols); //parse sequences for (int i = 1; i < lines.Length; i++) { string[] currentSeqStr = lines[i].Split(' '); //skip first element on each line, which contains the length of the sequence int[] currentSeq = currentSeqStr.Skip(1).Select(p => Int32.Parse(p)).ToArray(); seqData.AddSequence(currentSeq); } seqData.SaveAddedSequences(); return seqData; }