private static Dictionary <string, List <CsccContextInfo> > CreateModel() { var combinedModel = new Dictionary <string, List <CsccContextInfo> >(); var modelDirectory = Model.GetModelDirectory(); var modelFiles = Directory.GetFiles(modelDirectory, "*_cscc"); foreach (var modelFile in modelFiles) { var model = ContextModel <CsccContextInfo> .Load(modelFile); foreach (var type in namespaces) { List <CsccContextInfo> contexts = combinedModel.TryGet(type); contexts.AddRange(model.GetContextsForType(type)); combinedModel[type] = contexts; } } foreach (var type in namespaces) { combinedModel[type] = new HashSet <CsccContextInfo>(combinedModel[type]).ToList(); } var savePath = Path.Combine(modelDirectory, "base_cscc"); new ContextModel <CsccContextInfo>(combinedModel).Save(savePath); return(combinedModel); }
public static void CreateModel() { var combinedModel = new Dictionary <string, List <ContextInfo> >(); var modelDirectory = Model.GetModelDirectory(); var modelFiles = Directory.GetFiles(modelDirectory).Where(file => !file.EndsWith("_cscc")); foreach (var modelFile in modelFiles) { var model = ContextModel <ContextInfo> .Load(modelFile); foreach (var type in namespaces) { List <ContextInfo> contexts; if (!combinedModel.TryGetValue(type, out contexts)) { contexts = new List <ContextInfo>(); } contexts.AddRange(model.GetContextsForType(type)); combinedModel[type] = contexts; } } foreach (var type in namespaces) { combinedModel[type] = new HashSet <ContextInfo>(combinedModel[type]).ToList(); } var savePath = Path.Combine(modelDirectory, "base"); new ContextModel <ContextInfo>(combinedModel).Save(savePath); }
private static void CombineAllModels() { var modelDirectory = Model.GetModelDirectory(); var combinedModel = new Dictionary <string, HashSet <ContextInfo> >(); var modelFiles = Directory.GetFiles(modelDirectory).Where(file => !file.EndsWith("_cscc") && !file.Equals("training")); foreach (var modelFile in modelFiles) { var model = ContextModel <ContextInfo> .Load(modelFile).Contexts; foreach (var type in model.Keys) { HashSet <ContextInfo> contexts; if (!combinedModel.TryGetValue(type, out contexts)) { contexts = new HashSet <ContextInfo>(); } contexts.UnionWith(model[type]); combinedModel[type] = contexts; } } Console.WriteLine(combinedModel.Sum(x => x.Value.Count)); Console.ReadKey(); }
private static Dictionary <string, List <CsccContextInfo> > CombineCscc() { var modelDirectory = Model.GetModelDirectory(); var combinedModel = new Dictionary <string, List <CsccContextInfo> >(); var modelFiles = Directory.GetFiles(modelDirectory, "*_cscc").Where(file => !file.Equals("base_cscc")); foreach (var modelFile in modelFiles) { var model = ContextModel <CsccContextInfo> .Load(modelFile).Contexts; foreach (var type in model.Keys) { List <CsccContextInfo> contexts = combinedModel.TryGet(type); contexts.AddRange(model[type]); combinedModel[type] = contexts; } } /*foreach (var type in combinedModel.Keys.ToList()) * { * combinedModel[type] = new HashSet<CsccContextInfo>(combinedModel[type]).ToList(); * }*/ var flatModel = combinedModel.ToList(); flatModel.Sort((a, b) => b.Value.Count.CompareTo(a.Value.Count)); var sortedTop = flatModel.Take(100); return(sortedTop.ToDictionary(kvp => kvp.Key, kvp => kvp.Value)); }
private static void Evaluation() { Dictionary <string, List <CsccContextInfo> >[] trainingModels; var trainingModel = ContextModel <CsccContextInfo> .Load(Path.Combine(Model.GetModelDirectory(), "training_cscc")); var namespaces = trainingModel.Contexts.ToList(); namespaces.Sort((a, b) => b.Value.Count.CompareTo(a.Value.Count)); trainingModels = new Dictionary <string, List <CsccContextInfo> > [10]; for (int i = 0; i < 10; i++) { trainingModels[i] = new Dictionary <string, List <CsccContextInfo> >(); } for (int i = 0; i < namespaces.Count / 2; i++) { trainingModels[i % 10].Add(namespaces[i].Key, namespaces[i].Value); trainingModels[i % 10].Add(namespaces[namespaces.Count - i - 1].Key, namespaces[namespaces.Count - i - 1].Value); } var result = 0.0; foreach (var training in trainingModels) { result += new CsccEvaluationModel(training).Evaluate().AverageFmeasure; Console.WriteLine(result); } Console.WriteLine(result / 10); Console.ReadKey(); }
public static void EvaluateGeneCscc() { var model = ContextModel <ContextInfo> .Load(Path.Combine(Model.GetModelDirectory(), "training")); var cscc = new GeneCSCC.GeneCSCC(model); var rand = new Random(); var unorderedQueryData = model.Contexts.SelectMany(kvp => kvp.Value.Select(c => new Tuple <string, ContextInfo>(kvp.Key, c))).ToList(); unorderedQueryData.Shuffle(RandomProvider.GetThreadRandom()); var queryData = unorderedQueryData.Take(3000); var sw = new Stopwatch(); sw.Start(); foreach (var query in queryData) { cscc.GetPredictions(query.Item2, query.Item1); } sw.Stop(); Console.WriteLine( $"Queries: {model.Contexts.Sum(kvp => kvp.Value.Count)} Inference speed: {(double) sw.Elapsed.Milliseconds/3000}"); }
public static void GenCsccSize() { var modelDirectory = Model.GetModelDirectory(); var modelFiles = Directory.GetFiles(modelDirectory).Where(file => !file.EndsWith("_cscc") && !file.Equals("training")); foreach (var modelFile in modelFiles) { var model = ContextModel <ContextInfo> .Load(modelFile).Contexts; long extendedLength = 0; long localLength = 0; long invocationLength = 0; long length = 0; foreach (var contexts in model.Values) { foreach (var context in new HashSet <ContextInfo>(contexts)) { length++; extendedLength += context.ExtendedContext.Length; localLength += context.LocalContext.Length; invocationLength += context.Invocation.Length; } } Console.WriteLine(modelFile + " " + extendedLength + " " + " " + localLength + " " + " " + invocationLength + " " + length); } Console.ReadKey(); }
private static void TopNamespaces() { foreach (var kvp in ContextModel <ContextInfo> .Load(Path.Combine(Model.GetModelDirectory(), "training")).Contexts) { Console.WriteLine(kvp.Key + " " + kvp.Value.Count); } Console.ReadKey(); }
private static Dictionary <string, List <ContextInfo> > LoadModel() { Dictionary <string, List <ContextInfo> > model; var modelDirectory = Model.GetModelDirectory(); var savePath = Path.Combine(modelDirectory, "base"); if (File.Exists(savePath)) { model = ContextModel <ContextInfo> .Load(savePath).Contexts; } else { model = CreateModel(); } return(model); }
public void InitializeTrainingModels() { var trainingModel = ContextModel <ContextInfo> .Load(Path.Combine(Model.GetModelDirectory(), "training")); var namespaces = trainingModel.Contexts.ToList(); namespaces.Sort((a, b) => b.Value.Count.CompareTo(a.Value.Count)); trainingModels = new Dictionary <string, List <ContextInfo> > [_folds]; for (int i = 0; i < _folds; i++) { trainingModels[i] = new Dictionary <string, List <ContextInfo> >(); } for (int i = 0; i < namespaces.Count / 2; i++) { trainingModels[i % _folds].Add(namespaces[i].Key, namespaces[i].Value); trainingModels[i % _folds].Add(namespaces[namespaces.Count - i - 1].Key, namespaces[namespaces.Count - i - 1].Value); } }
private static void TestGene() { var training = ContextModel <ContextInfo> .Load(Path.Combine(Model.GetModelDirectory(), "Source")); var model = new GeneCSCC.GeneCSCC(training); var validation = ContextModel <ContextInfo> .Load(Path.Combine(Model.GetModelDirectory(), "NewtonsoftJson-master")).Contexts; var list = validation.SelectMany(kvp => kvp.Value.Select(ci => new Tuple <string, ContextInfo>(kvp.Key, ci))).ToList(); var validationError = 0.0; var validations = 0; for (var i = 0; i < validation.Count; i++) { if (!training.Contexts.ContainsKey(list[i].Item1)) { continue; } var predictions = model.GetPredictions(list[i].Item2, list[i].Item1); validations++; if (predictions.Count == 0) { continue; } if (list[i].Item2.Invocation.Equals(predictions[0])) { validationError++; } } Console.WriteLine(validationError / validations); Console.ReadKey(); }
private static void PerformanceEvaluation() { var modelDirectory = Model.GetModelDirectory(); var modelFiles = Directory.GetFiles(modelDirectory).Where(file => !file.EndsWith("_cscc") && !file.Equals("training")); var keys = ContextModel <ContextInfo> .Load(Path.Combine(modelDirectory, "training")).GetAllTypes(); var models = new List <ContextModel <ContextInfo> >(); foreach (var modelFile in modelFiles) { var model = ContextModel <ContextInfo> .Load(modelFile); model.KeepTypes(keys); model.RemoveDuplicates(); models.Add(model); } Console.WriteLine("Models loaded..."); var precision = 0.0; var recall = 0.0; for (int i = 0; i < models.Count; i++) { var trainingFolds = models.Where((foldIndices, foldIndex) => foldIndex != i).ToArray(); var trainingModel = ContextModel <ContextInfo> .Combine(trainingFolds); trainingModel.RemoveDuplicates(); Console.WriteLine("Training model created..."); var validationFold = models[i].Contexts.SelectMany(kvp => kvp.Value.Select(ci => new Tuple <string, ContextInfo>(kvp.Key, ci))).Take(1000); var cscc = new GeneCSCC.GeneCSCC(trainingModel); var validationHits = 0.0; var recallHits = 0.0; var validations = 0; foreach (var validation in validationFold) { if (!trainingModel.Contexts.ContainsKey(validation.Item1)) { continue; } var predictions = cscc.GetPredictions(validation.Item2, validation.Item1); validations++; if (predictions.Count == 0) { continue; } recallHits++; if (validation.Item2.Invocation.Equals(predictions[0])) { validationHits++; } } precision += validationHits / validations; recall += recallHits / validations; Console.WriteLine(validationHits / validations); } Console.WriteLine("Precision: {0} Recall: {1}", precision / models.Count, recall / models.Count); Console.ReadKey(); }
public static Dictionary <string, List <CsccContextInfo> > LoadCsccTrainingModel() { return(ContextModel <CsccContextInfo> .Load(Path.Combine(Model.GetModelDirectory(), "training_cscc")).Contexts); }