protected virtual StageResult Load(int?recordBatchSize = null, int?recordLimit = null, Dictionary <string, string> options = null) { Contract.Requires(InputRecords.Count > 0); float s = TrainingTestSplit; float c = InputRecords.Count; int splitCount = (int)((1f / s) * c); for (int i = 0; i < splitCount; i++) { TestRecords.Add(InputRecords[i]); } for (int i = splitCount; i < InputRecords.Count; i++) { TrainingRecords.Add(InputRecords[i]); } return(StageResult.SUCCESS); }
public bool CreateModelDataset() { if (InputFile.Extension == ".gz") { using (StreamReader f = new StreamReader(new GZipStream(InputFile.OpenRead(), CompressionMode.Decompress))) using (JsonTextReader reader = new JsonTextReader(f)) { JsonSerializer serializer = new JsonSerializer(); ExtractedRecords = serializer.Deserialize <List <Record> >(reader); } } else { using (StreamReader f = new StreamReader(InputFile.OpenRead())) using (JsonTextReader reader = new JsonTextReader(f)) { JsonSerializer serializer = new JsonSerializer(); ExtractedRecords = serializer.Deserialize <List <Record> >(reader); } } ModelDatasetRecords = ExtractedRecords.Select(r => TransformRecordWithAvailableCWE(r)).Where(r => r.CWEId.HasValue).ToList(); TargetDatasetRecords = ExtractedRecords.Select(r => TransformRecordWithAvailableCWE(r)).Where(r => !r.CWEId.HasValue).ToList(); int vuln_count = 0; foreach (Record r in ModelDatasetRecords) { if (r.VulnerabilityId % 10 < Split) { TestRecords.Add(r); } else { TrainingRecords.Add(r); } if (VulnerabilitiesLimit > 0 && ++vuln_count > VulnerabilitiesLimit) { break; } } using (FileStream trfs = new FileStream(TrainingOuputFile.FullName, FileMode.Create)) using (StreamWriter trswe = new StreamWriter(trfs)) { try { foreach (Record r in TrainingRecords) { if (WithDescription && !string.IsNullOrEmpty(r.Description)) { trswe.WriteLine("{0}\t{1} {2}", r.CWEId, r.Title, r.Description); } else { trswe.WriteLine("{0}\t{1}.", r.CWEId, r.Title); } } trswe.Flush(); L.Information("Wrote {0} vulnerability records to training data file {1}.", TrainingRecords.Count, TrainingOuputFile.FullName); } catch (IOException ioe) { L.Error(ioe, "I/O Error writing to training data file {0}.", TrainingOuputFile.FullName); return(false); } catch (Exception e) { Log.Error(e, "Error writing to training data file {0}.", TrainingOuputFile.FullName); return(false); } using (FileStream tefs = new FileStream(TestOuputFile.FullName, FileMode.Create)) using (StreamWriter teswe = new StreamWriter(tefs)) { try { foreach (Record r in TestRecords) { if (WithDescription && !string.IsNullOrEmpty(r.Description)) { teswe.WriteLine("{0}\t{1} {2}\t{3}", r.CWEId, r.Title, r.Description, r.VulnerabilityId); } else { teswe.WriteLine("{0}\t{1}\t{2}", r.CWEId, r.Title, r.VulnerabilityId); } } teswe.Flush(); L.Information("Wrote {0} vulnerability records to test data file {1}.", TestRecords.Count, TestOuputFile.FullName); } catch (IOException ioe) { L.Error(ioe, "I/O Error writing to test data file {0}.", TestOuputFile.FullName); return(false); } catch (Exception e) { Log.Error(e, "Error writing to test data file {0}.", TestOuputFile.FullName); return(false); } } using (FileStream tarfs = new FileStream(TargetOuputFile.FullName, FileMode.Create)) using (StreamWriter tarswe = new StreamWriter(tarfs)) { try { foreach (Record r in TargetDatasetRecords) { if (WithDescription && !string.IsNullOrEmpty(r.Description)) { tarswe.WriteLine("{0}\t{1} {2}\t{3}", string.Empty, r.Title, r.Description, r.VulnerabilityId); } else { tarswe.WriteLine("{0}\t{1}\t{2}", string.Empty, r.Title, r.VulnerabilityId); } } tarswe.Flush(); L.Information("Wrote {0} vulnerability records to target data file {1}.", TargetDatasetRecords.Count, TargetOuputFile.FullName); } catch (IOException ioe) { L.Error(ioe, "I/O Error writing to target data file {0}.", TargetOuputFile.FullName); return(false); } catch (Exception e) { Log.Error(e, "Error writing to target data file {0}.", TargetOuputFile.FullName); return(false); } } return(true); } }