protected override StageResult Extract(int?recordBatchSize = null, int?recordLimit = null, Dictionary <string, string> options = null) { if (CompressedFileExtensions.Contains(InputFile.Extension)) { using (FileStream stream = InputFile.OpenRead()) using (IReader reader = ReaderFactory.Open(stream)) { bool fileFound = false; while (reader.MoveToNextEntry()) { if (!reader.Entry.IsDirectory) { fileFound = true; break; } } if (!fileFound) { Error("{0} has no file entries in zip archive.".F(InputFile.FullName)); return(StageResult.INPUT_ERROR); } else { Info("Unzipping file {0} with size {1} bytes.", reader.Entry.Key, reader.Entry.Size); } using (Stream rs = reader.OpenEntryStream()) using (StreamReader r = new StreamReader(rs)) { ExtractedRecords.AddRange(ReadRecordsFromFileStream(this, r, WriterOptions)); } } } else { Info("Reading file {0} with size {1} bytes...", InputFile.Name, InputFile.Length); using (FileStream f = InputFile.OpenRead()) using (StreamReader r = new StreamReader(f)) { ExtractedRecords.AddRange(ReadRecordsFromFileStream(this, r, WriterOptions)); } } return(StageResult.SUCCESS); }
public override int Extract(int vulnerabilitiesLimit, Dictionary <string, string> options) { XmlSerializer serializer = new XmlSerializer(typeof(nvd)); using (FileStream fs = InputFile.OpenRead()) using (GZipStream gzs = new GZipStream(fs, CompressionMode.Decompress)) using (MemoryStream ms = new MemoryStream()) { L.Information("Read {bytes} bytes from compressed file.", fs.Length); gzs.CopyTo(ms); L.Information("Read {bytes} bytes from decompressed memory stream.", ms.Length); ms.Seek(0, SeekOrigin.Begin); try { NVDFeed = (nvd)serializer.Deserialize(ms); } catch (Exception e) { L.Error(e, "Error deserializing {file} using NVD 2.0 schema.", InputFile.FullName); } } if (NVDFeed == null || NVDFeed.entry == null) { L.Error("Could not deserialize XML entries from file {file}.", InputFile.FullName); return(0); } else if (NVDFeed.entry.Count() == 0) { L.Information("XML file {file} has 0 entries.", InputFile.FullName, NVDFeed.entry.Count()); return(0); } int extractedCount = 0; foreach (vulnerabilityType v in NVDFeed.entry) { if (v.cwe == null) { continue; } try { ExtractedRecords.Add(new Record { VulnerabilityId = long.Parse(v.id.Replace("-", string.Empty).Replace("CVE", string.Empty)), Title = FilterNonText(v.summary), Description = string.Empty, CWEId = int.Parse(v.cwe.First().id.Replace("CWE-", string.Empty)), References = v.references?.Select(re => re.reference.href).ToArray(), Published = v.publisheddatetimeSpecified ? v.publisheddatetime : DateTime.MinValue }); extractedCount++; } catch (Exception e) { L.Warning(e, "Exception thrown attempting to extract vulnerability record from CVE entry {cve}.", v.id); } if (vulnerabilitiesLimit > 0 && extractedCount > vulnerabilitiesLimit) { break; } } L.Information("Extracted {0} vulnerability records with valid CWEs from NVD CVE XML file {1} with total {entries} entries.", extractedCount, InputFile.FullName, NVDFeed.entry.Count()); return(ExtractedRecords.Count); }
public override int Extract(int vulnerabilitiesLimit, Dictionary <string, string> options) { OSSIndexHttpClient client = null; if (!string.IsNullOrEmpty(Authentication)) { client = new OSSIndexHttpClient("2.0", User, Token); } else { client = new OSSIndexHttpClient("2.0"); } List <ExtractedRecord> records = new List <ExtractedRecord>(); VulnerablityComparator vc = new VulnerablityComparator(); ExtractedRecordComparator erc = new ExtractedRecordComparator(); bool hasNext = false; long from = 0, till = -1; do { QueryResponse response = client.GetPackages(PackageManager, from, till).Result; L.Information("Got {ps} package entries with {vuln} distinct vulnerability entries for package manager {pm}.", response.packages.Select(p => p.Id).Distinct().Count(), response.packages.SelectMany(p => p.Vulnerabilities).Distinct(vc).Count(), PackageManager); hasNext = !string.IsNullOrEmpty(response.NextUrl); var duplicates = response.packages.SelectMany(p => p.Vulnerabilities).GroupBy(v => v.Id) .Where(g => g.Count() > 1) .Select(y => y.Key) .ToList(); L.Information("Got {0} duplicate vulnerabilities.", duplicates.Count); foreach (Package package in response.packages) { records.AddRange( package.Vulnerabilities.Select(v => new ExtractedRecord { PackageManager = this.PackageManager, PackageId = package.Id, PackageName = package.Name, VulnerabilityId = v.Id, Title = FilterNonText(v.Title), Description = FilterNonText(v.Description), //strip out any URLs References = v.References, Updated = v.Updated.HasValue ? v.Updated.Value : DateTime.MinValue, Published = v.Published.HasValue ? v.Published.Value : DateTime.MinValue }) ); } hasNext = !string.IsNullOrEmpty(response.NextUrl); if (hasNext) { Uri n = new Uri(response.NextUrl); till = Int64.Parse(n.Segments[7]); } if (PackagesLimit > 0 && records.Distinct(erc).Select(r => r.PackageId).Distinct().Count() > PackagesLimit) { hasNext = false; } if (vulnerabilitiesLimit > 0 && records.Distinct(erc).Count() > vulnerabilitiesLimit) { hasNext = false; } }while (hasNext); ExtractedRecords = records.Distinct(erc).Select(er => (Record)er).ToList(); L.Information("Extracted {packages} packages with {vulnd} distinct vulnerabilities.", records.Distinct(erc).Select(r => r.PackageId).Distinct().Count(), ExtractedRecords.Count()); return(ExtractedRecords.Count); }
public bool CreateModelDataset() { if (InputFile.Extension == ".gz") { using (StreamReader f = new StreamReader(new GZipStream(InputFile.OpenRead(), CompressionMode.Decompress))) using (JsonTextReader reader = new JsonTextReader(f)) { JsonSerializer serializer = new JsonSerializer(); ExtractedRecords = serializer.Deserialize <List <Record> >(reader); } } else { using (StreamReader f = new StreamReader(InputFile.OpenRead())) using (JsonTextReader reader = new JsonTextReader(f)) { JsonSerializer serializer = new JsonSerializer(); ExtractedRecords = serializer.Deserialize <List <Record> >(reader); } } ModelDatasetRecords = ExtractedRecords.Select(r => TransformRecordWithAvailableCWE(r)).Where(r => r.CWEId.HasValue).ToList(); TargetDatasetRecords = ExtractedRecords.Select(r => TransformRecordWithAvailableCWE(r)).Where(r => !r.CWEId.HasValue).ToList(); int vuln_count = 0; foreach (Record r in ModelDatasetRecords) { if (r.VulnerabilityId % 10 < Split) { TestRecords.Add(r); } else { TrainingRecords.Add(r); } if (VulnerabilitiesLimit > 0 && ++vuln_count > VulnerabilitiesLimit) { break; } } using (FileStream trfs = new FileStream(TrainingOuputFile.FullName, FileMode.Create)) using (StreamWriter trswe = new StreamWriter(trfs)) { try { foreach (Record r in TrainingRecords) { if (WithDescription && !string.IsNullOrEmpty(r.Description)) { trswe.WriteLine("{0}\t{1} {2}", r.CWEId, r.Title, r.Description); } else { trswe.WriteLine("{0}\t{1}.", r.CWEId, r.Title); } } trswe.Flush(); L.Information("Wrote {0} vulnerability records to training data file {1}.", TrainingRecords.Count, TrainingOuputFile.FullName); } catch (IOException ioe) { L.Error(ioe, "I/O Error writing to training data file {0}.", TrainingOuputFile.FullName); return(false); } catch (Exception e) { Log.Error(e, "Error writing to training data file {0}.", TrainingOuputFile.FullName); return(false); } using (FileStream tefs = new FileStream(TestOuputFile.FullName, FileMode.Create)) using (StreamWriter teswe = new StreamWriter(tefs)) { try { foreach (Record r in TestRecords) { if (WithDescription && !string.IsNullOrEmpty(r.Description)) { teswe.WriteLine("{0}\t{1} {2}\t{3}", r.CWEId, r.Title, r.Description, r.VulnerabilityId); } else { teswe.WriteLine("{0}\t{1}\t{2}", r.CWEId, r.Title, r.VulnerabilityId); } } teswe.Flush(); L.Information("Wrote {0} vulnerability records to test data file {1}.", TestRecords.Count, TestOuputFile.FullName); } catch (IOException ioe) { L.Error(ioe, "I/O Error writing to test data file {0}.", TestOuputFile.FullName); return(false); } catch (Exception e) { Log.Error(e, "Error writing to test data file {0}.", TestOuputFile.FullName); return(false); } } using (FileStream tarfs = new FileStream(TargetOuputFile.FullName, FileMode.Create)) using (StreamWriter tarswe = new StreamWriter(tarfs)) { try { foreach (Record r in TargetDatasetRecords) { if (WithDescription && !string.IsNullOrEmpty(r.Description)) { tarswe.WriteLine("{0}\t{1} {2}\t{3}", string.Empty, r.Title, r.Description, r.VulnerabilityId); } else { tarswe.WriteLine("{0}\t{1}\t{2}", string.Empty, r.Title, r.VulnerabilityId); } } tarswe.Flush(); L.Information("Wrote {0} vulnerability records to target data file {1}.", TargetDatasetRecords.Count, TargetOuputFile.FullName); } catch (IOException ioe) { L.Error(ioe, "I/O Error writing to target data file {0}.", TargetOuputFile.FullName); return(false); } catch (Exception e) { Log.Error(e, "Error writing to target data file {0}.", TargetOuputFile.FullName); return(false); } } return(true); } }