private static SupplementaryAnnotationHeader GetSaHeader(string saDir) { var saFiles = Directory.GetFiles(saDir, "*.nsa"); if (saFiles == null) { throw new UserErrorException($"Unable to find any supplementary annotation files in the following directory: {saDir}"); } long intervalsPosition; return(saFiles.Length > 0 ? SupplementaryAnnotationReader.GetHeader(saFiles.First(), out intervalsPosition) : null); }
/// <summary> /// checks the supplementary annotation directory to ensure that the directory only contains files for one /// cache schema (8), one VEP version (79), and one genome assembly (GRCh37) /// </summary> public static void CheckDirectoryIntegrity(string saDir, List <DataSourceVersion> mainDataSourceVersions, out SupplementaryAnnotationDirectory saDirectory) { // sanity check: make sure the directory is set if (string.IsNullOrEmpty(saDir)) { saDirectory = null; return; } var observedDataVersions = new HashSet <ushort>(); var refSeqsToPaths = new Dictionary <string, string>(); var observedGenomeAssemblies = new HashSet <GenomeAssembly>(); var earliestCreationTime = long.MaxValue; var hasDataSourceVersions = false; var foundDifferentDataSourceVersions = false; var dataSourceVersions = new List <DataSourceVersion>(); // grab the header data from each cache file foreach (var saPath in Directory.GetFiles(saDir, "*.nsa")) { long intervalsPosition; var header = SupplementaryAnnotationReader.GetHeader(saPath, out intervalsPosition); if (header == null) { continue; } observedDataVersions.Add(header.DataVersion); refSeqsToPaths[header.ReferenceSequenceName] = saPath; observedGenomeAssemblies.Add(header.GenomeAssembly); if (header.CreationTimeTicks < earliestCreationTime) { earliestCreationTime = header.CreationTimeTicks; } if (hasDataSourceVersions) { if (!dataSourceVersions.SequenceEqual(header.DataSourceVersions)) { foundDifferentDataSourceVersions = true; } } else { dataSourceVersions = header.DataSourceVersions; hasDataSourceVersions = true; } } // sanity check: no references were found if (refSeqsToPaths.Count == 0 || observedDataVersions.Count == 0) { throw new UserErrorException($"Unable to find any supplementary annotation files in the following directory: {saDir}"); } // sanity check: more than one cache data version found if (observedDataVersions.Count > 1) { throw new UserErrorException($"Found more than one cache data version represented in the following directory: {saDir}"); } // sanity check: make sure all of the files have the same data source versions if (foundDifferentDataSourceVersions) { throw new UserErrorException($"Found more than one set of data source versions represented in the following directory: {saDir}"); } if (observedGenomeAssemblies.Count > 1) { throw new UserErrorException($"Found more than one set of Genome Assemblies represented in the following directory: {saDir}"); } saDirectory = new SupplementaryAnnotationDirectory(observedDataVersions.First(), observedGenomeAssemblies.First()); mainDataSourceVersions.AddRange(dataSourceVersions); }