Beispiel #1
0
        public void Convert(string outputPath, string description, GlobalImportCommon.FileType fileType)
        {
            var inputPath = outputPath.Replace(".dat", ".dat.tmp");

            using (var reader = new TempPredictionReader(inputPath, description, fileType))
            {
                Console.Write($"- loading {description}... ");
                var tempPredictions = Load(reader);
                Console.WriteLine("finished.");

                Console.Write($"- creating {description} LUT... ");
                var oldLut = TempPrediction.CreateLookupTable(tempPredictions);
                var newLut = TempPrediction.ConvertLookupTable(oldLut);
                Console.WriteLine("finished.");

                Console.Write($"- converting {description} matrices... ");
                var predictionsPerRef = TempPrediction.ConvertMatrices(tempPredictions, oldLut, newLut, _numReferenceSeqs);
                Console.WriteLine("finished.");

                tempPredictions.Clear();

                var header = PredictionCacheHeader.GetHeader(CurrentTimeTicks, reader.Header.GenomeAssembly, _numReferenceSeqs);

                Console.Write($"- writing to {Path.GetFileName(outputPath)}... ");
                using (var writer = new PredictionCacheWriter(outputPath, header))
                {
                    writer.Write(newLut, predictionsPerRef);
                }
                Console.WriteLine("finished.");
            }
        }
Beispiel #2
0
        /// <summary>
        /// returns the file header
        /// </summary>
        public static GlobalImportHeader GetHeader(string description, string filePath,
                                                   GlobalImportCommon.FileType expectedFileType, StreamReader reader)
        {
            string line = reader.ReadLine();

            if (!IsValidFile(line, expectedFileType))
            {
                throw new GeneralException($"The {description} file ({filePath}) has an invalid header.");
            }

            line = reader.ReadLine();

            if (line == null)
            {
                throw new GeneralException($"The {description} file ({filePath}) has an invalid header.");
            }

            var cols = line.Split('\t');

            if (cols.Length != GlobalImportCommon.NumHeaderColumns)
            {
                throw new GeneralException($"Expected {GlobalImportCommon.NumHeaderColumns} columns in the header but found {cols.Length}");
            }

            var vepVersion       = ushort.Parse(cols[0]);
            var vepReleaseTicks  = long.Parse(cols[1]);
            var transcriptSource = (TranscriptDataSource)byte.Parse(cols[2]);
            var genomeAssembly   = (GenomeAssembly)byte.Parse(cols[3]);

            return(new GlobalImportHeader(vepVersion, vepReleaseTicks, transcriptSource, genomeAssembly));
        }
        /// <summary>
        /// writes the header to our output file
        /// </summary>
        private static void WriteHeader(StreamWriter writer, GlobalImportCommon.FileType fileType,
                                        TranscriptDataSource transcriptSource, GenomeAssembly genomeAssembly)
        {
            var vepReleaseTicks = DateTime.Parse(ConfigurationSettings.VepReleaseDate).Ticks;

            writer.WriteLine("{0}\t{1}", GlobalImportCommon.Header, (byte)fileType);
            writer.WriteLine("{0}\t{1}\t{2}\t{3}", ConfigurationSettings.VepVersion, vepReleaseTicks, (byte)transcriptSource, (byte)genomeAssembly);
        }
Beispiel #4
0
        /// <summary>
        /// constructor
        /// </summary>
        public VepSimpleIntervalReader(string filePath, string description, GlobalImportCommon.FileType fileType)
        {
            // sanity check
            if (!File.Exists(filePath))
            {
                throw new FileNotFoundException($"The specified intron file ({filePath}) does not exist.");
            }

            // open the vcf file and parse the header
            _reader = GZipUtilities.GetAppropriateStreamReader(filePath);
            VepReaderCommon.GetHeader(description, filePath, fileType, _reader);
        }
        /// <summary>
        /// constructor
        /// </summary>
        public TempPredictionReader(string filePath, string description, GlobalImportCommon.FileType fileType)
        {
            // sanity check
            if (!File.Exists(filePath))
            {
                throw new FileNotFoundException($"The specified protein function prediction file ({filePath}) does not exist.");
            }

            // open the vcf file and parse the header
            _reader = GZipUtilities.GetAppropriateBinaryReader(filePath);
            Header  = GetHeader(description, filePath, fileType, _reader);
        }
        /// <summary>
        /// writes the header to our output file
        /// </summary>
        private static void WriteHeader(BinaryWriter writer, GlobalImportCommon.FileType fileType,
                                        TranscriptDataSource transcriptSource, GenomeAssembly genomeAssembly)
        {
            var vepReleaseTicks = DateTime.Parse(ConfigurationSettings.VepReleaseDate).Ticks;

            writer.Write(GlobalImportCommon.Header);
            writer.Write((byte)fileType);
            writer.Write(ConfigurationSettings.VepVersion);
            writer.Write(vepReleaseTicks);
            writer.Write((byte)transcriptSource);
            writer.Write((byte)genomeAssembly);
            writer.Write(CacheConstants.GuardInt);
        }
        /// <summary>
        /// returns the file header
        /// </summary>
        private static GlobalImportHeader GetHeader(string description, string filePath,
                                                    GlobalImportCommon.FileType expectedFileType, BinaryReader reader)
        {
            var header   = reader.ReadString();
            var fileType = (GlobalImportCommon.FileType)reader.ReadByte();

            if (!IsValidFile(header, fileType, expectedFileType))
            {
                throw new GeneralException($"The {description} file ({filePath}) has an invalid header.");
            }

            var vepVersion       = reader.ReadUInt16();
            var vepReleaseTicks  = reader.ReadInt64();
            var transcriptSource = (TranscriptDataSource)reader.ReadByte();
            var genomeAssembly   = (GenomeAssembly)reader.ReadByte();
            var guardInt         = reader.ReadUInt32();

            if (guardInt != CacheConstants.GuardInt)
            {
                throw new GeneralException($"The {description} file ({filePath}) has an invalid header.");
            }

            return(new GlobalImportHeader(vepVersion, vepReleaseTicks, transcriptSource, genomeAssembly));
        }
 /// <summary>
 /// returns true if this is a valid VEP reader file (binary)
 /// </summary>
 private static bool IsValidFile(string header, GlobalImportCommon.FileType fileType,
                                 GlobalImportCommon.FileType expectedFileType)
 {
     return(header == GlobalImportCommon.Header && fileType == expectedFileType);
 }
Beispiel #9
0
        /// <summary>
        /// returns true if this is a valid VEP reader file (text)
        /// </summary>
        private static bool IsValidFile(string line, GlobalImportCommon.FileType fileType)
        {
            string expectedString = $"{GlobalImportCommon.Header}\t{(byte)fileType}";

            return(line == expectedString);
        }