public void Convert(string outputPath, string description, GlobalImportCommon.FileType fileType) { var inputPath = outputPath.Replace(".dat", ".dat.tmp"); using (var reader = new TempPredictionReader(inputPath, description, fileType)) { Console.Write($"- loading {description}... "); var tempPredictions = Load(reader); Console.WriteLine("finished."); Console.Write($"- creating {description} LUT... "); var oldLut = TempPrediction.CreateLookupTable(tempPredictions); var newLut = TempPrediction.ConvertLookupTable(oldLut); Console.WriteLine("finished."); Console.Write($"- converting {description} matrices... "); var predictionsPerRef = TempPrediction.ConvertMatrices(tempPredictions, oldLut, newLut, _numReferenceSeqs); Console.WriteLine("finished."); tempPredictions.Clear(); var header = PredictionCacheHeader.GetHeader(CurrentTimeTicks, reader.Header.GenomeAssembly, _numReferenceSeqs); Console.Write($"- writing to {Path.GetFileName(outputPath)}... "); using (var writer = new PredictionCacheWriter(outputPath, header)) { writer.Write(newLut, predictionsPerRef); } Console.WriteLine("finished."); } }
/// <summary> /// returns the file header /// </summary> public static GlobalImportHeader GetHeader(string description, string filePath, GlobalImportCommon.FileType expectedFileType, StreamReader reader) { string line = reader.ReadLine(); if (!IsValidFile(line, expectedFileType)) { throw new GeneralException($"The {description} file ({filePath}) has an invalid header."); } line = reader.ReadLine(); if (line == null) { throw new GeneralException($"The {description} file ({filePath}) has an invalid header."); } var cols = line.Split('\t'); if (cols.Length != GlobalImportCommon.NumHeaderColumns) { throw new GeneralException($"Expected {GlobalImportCommon.NumHeaderColumns} columns in the header but found {cols.Length}"); } var vepVersion = ushort.Parse(cols[0]); var vepReleaseTicks = long.Parse(cols[1]); var transcriptSource = (TranscriptDataSource)byte.Parse(cols[2]); var genomeAssembly = (GenomeAssembly)byte.Parse(cols[3]); return(new GlobalImportHeader(vepVersion, vepReleaseTicks, transcriptSource, genomeAssembly)); }
/// <summary> /// writes the header to our output file /// </summary> private static void WriteHeader(StreamWriter writer, GlobalImportCommon.FileType fileType, TranscriptDataSource transcriptSource, GenomeAssembly genomeAssembly) { var vepReleaseTicks = DateTime.Parse(ConfigurationSettings.VepReleaseDate).Ticks; writer.WriteLine("{0}\t{1}", GlobalImportCommon.Header, (byte)fileType); writer.WriteLine("{0}\t{1}\t{2}\t{3}", ConfigurationSettings.VepVersion, vepReleaseTicks, (byte)transcriptSource, (byte)genomeAssembly); }
/// <summary> /// constructor /// </summary> public VepSimpleIntervalReader(string filePath, string description, GlobalImportCommon.FileType fileType) { // sanity check if (!File.Exists(filePath)) { throw new FileNotFoundException($"The specified intron file ({filePath}) does not exist."); } // open the vcf file and parse the header _reader = GZipUtilities.GetAppropriateStreamReader(filePath); VepReaderCommon.GetHeader(description, filePath, fileType, _reader); }
/// <summary> /// constructor /// </summary> public TempPredictionReader(string filePath, string description, GlobalImportCommon.FileType fileType) { // sanity check if (!File.Exists(filePath)) { throw new FileNotFoundException($"The specified protein function prediction file ({filePath}) does not exist."); } // open the vcf file and parse the header _reader = GZipUtilities.GetAppropriateBinaryReader(filePath); Header = GetHeader(description, filePath, fileType, _reader); }
/// <summary> /// writes the header to our output file /// </summary> private static void WriteHeader(BinaryWriter writer, GlobalImportCommon.FileType fileType, TranscriptDataSource transcriptSource, GenomeAssembly genomeAssembly) { var vepReleaseTicks = DateTime.Parse(ConfigurationSettings.VepReleaseDate).Ticks; writer.Write(GlobalImportCommon.Header); writer.Write((byte)fileType); writer.Write(ConfigurationSettings.VepVersion); writer.Write(vepReleaseTicks); writer.Write((byte)transcriptSource); writer.Write((byte)genomeAssembly); writer.Write(CacheConstants.GuardInt); }
/// <summary> /// returns the file header /// </summary> private static GlobalImportHeader GetHeader(string description, string filePath, GlobalImportCommon.FileType expectedFileType, BinaryReader reader) { var header = reader.ReadString(); var fileType = (GlobalImportCommon.FileType)reader.ReadByte(); if (!IsValidFile(header, fileType, expectedFileType)) { throw new GeneralException($"The {description} file ({filePath}) has an invalid header."); } var vepVersion = reader.ReadUInt16(); var vepReleaseTicks = reader.ReadInt64(); var transcriptSource = (TranscriptDataSource)reader.ReadByte(); var genomeAssembly = (GenomeAssembly)reader.ReadByte(); var guardInt = reader.ReadUInt32(); if (guardInt != CacheConstants.GuardInt) { throw new GeneralException($"The {description} file ({filePath}) has an invalid header."); } return(new GlobalImportHeader(vepVersion, vepReleaseTicks, transcriptSource, genomeAssembly)); }
/// <summary> /// returns true if this is a valid VEP reader file (binary) /// </summary> private static bool IsValidFile(string header, GlobalImportCommon.FileType fileType, GlobalImportCommon.FileType expectedFileType) { return(header == GlobalImportCommon.Header && fileType == expectedFileType); }
/// <summary> /// returns true if this is a valid VEP reader file (text) /// </summary> private static bool IsValidFile(string line, GlobalImportCommon.FileType fileType) { string expectedString = $"{GlobalImportCommon.Header}\t{(byte)fileType}"; return(line == expectedString); }