private static void WritePredictions(PredictionWriter writer, IReadOnlyList <MutableTranscript> transcripts,
                                             Func <MutableTranscript, string> predictionFunc, IChromosome chromosome)
        {
            var predictionDict = new Dictionary <string, List <int> >(StringComparer.Ordinal);

            for (var transcriptIndex = 0; transcriptIndex < transcripts.Count; transcriptIndex++)
            {
                var    transcript     = transcripts[transcriptIndex];
                string predictionData = predictionFunc(transcript);
                if (predictionData == null)
                {
                    continue;
                }

                if (predictionDict.TryGetValue(predictionData, out var transcriptIdList))
                {
                    transcriptIdList.Add(transcriptIndex);
                }
                else
                {
                    predictionDict[predictionData] = new List <int> {
                        transcriptIndex
                    }
                };
            }

            writer.Write(chromosome, predictionDict);
        }
        private static ExitCodes ProgramExecution()
        {
            var transcriptSource = GetSource(_transcriptSource);
            var sequenceReader   = new CompressedSequenceReader(FileUtilities.GetReadStream(_inputReferencePath));
            var vepRootDirectory = new VepRootDirectory(sequenceReader.RefNameToChromosome);
            var refIndexToVepDir = vepRootDirectory.GetRefIndexToVepDir(_inputVepDirectory);

            var  genomeAssembly  = GenomeAssemblyHelper.Convert(_genomeAssembly);
            long vepReleaseTicks = DateTime.Parse(_vepReleaseDate).Ticks;
            var  idToGenbank     = GetIdToGenbank(genomeAssembly, transcriptSource);

            // =========================
            // create the pre-cache file
            // =========================

            // process each VEP directory
            int numRefSeqs = sequenceReader.NumRefSeqs;
            var header     = new IntermediateIoHeader(_vepVersion, vepReleaseTicks, transcriptSource, genomeAssembly, numRefSeqs);

            string siftPath       = _outputStub + ".sift.gz";
            string polyphenPath   = _outputStub + ".polyphen.gz";
            string transcriptPath = _outputStub + ".transcripts.gz";
            string regulatoryPath = _outputStub + ".regulatory.gz";

            using (var mergeLogger = new TranscriptMergerLogger(FileUtilities.GetCreateStream(_outputStub + ".merge_transcripts.log")))
                using (var siftWriter = new PredictionWriter(GZipUtilities.GetStreamWriter(siftPath), header, IntermediateIoCommon.FileType.Sift))
                    using (var polyphenWriter = new PredictionWriter(GZipUtilities.GetStreamWriter(polyphenPath), header, IntermediateIoCommon.FileType.Polyphen))
                        using (var transcriptWriter = new MutableTranscriptWriter(GZipUtilities.GetStreamWriter(transcriptPath), header))
                            using (var regulatoryRegionWriter = new RegulatoryRegionWriter(GZipUtilities.GetStreamWriter(regulatoryPath), header))
                            {
                                var converter           = new VepCacheParser(transcriptSource);
                                var emptyPredictionDict = new Dictionary <string, List <int> >();

                                for (ushort refIndex = 0; refIndex < numRefSeqs; refIndex++)
                                {
                                    var chromosome = sequenceReader.RefIndexToChromosome[refIndex];

                                    if (!refIndexToVepDir.TryGetValue(refIndex, out string vepSubDir))
                                    {
                                        siftWriter.Write(chromosome, emptyPredictionDict);
                                        polyphenWriter.Write(chromosome, emptyPredictionDict);
                                        continue;
                                    }

                                    Console.WriteLine("Parsing reference sequence [{0}]:", chromosome.UcscName);

                                    var rawData                 = converter.ParseDumpDirectory(chromosome, vepSubDir);
                                    var mergedTranscripts       = TranscriptMerger.Merge(mergeLogger, rawData.Transcripts, idToGenbank);
                                    var mergedRegulatoryRegions = RegulatoryRegionMerger.Merge(rawData.RegulatoryRegions);

                                    int numRawTranscripts    = rawData.Transcripts.Count;
                                    int numMergedTranscripts = mergedTranscripts.Count;
                                    Console.WriteLine($"- # merged transcripts: {numMergedTranscripts}, # total transcripts: {numRawTranscripts}");

                                    WriteTranscripts(transcriptWriter, mergedTranscripts);
                                    WriteRegulatoryRegions(regulatoryRegionWriter, mergedRegulatoryRegions);
                                    WritePredictions(siftWriter, mergedTranscripts, x => x.SiftData, chromosome);
                                    WritePredictions(polyphenWriter, mergedTranscripts, x => x.PolyphenData, chromosome);
                                }
                            }

            Console.WriteLine("\n{0} directories processed.", refIndexToVepDir.Count);

            return(ExitCodes.Success);
        }