Esempio n. 1
        /// <summary>
        /// 为Eclipse项目建立数据集
        /// </summary>
        public override void Execute()
            var eclipseDataSourceFolderPath = Utility.DatasetFolderPath + DataSetFolderName + @"Source\"; //数据源文件目录

            // Create file mapping since its hard to find
            MyListTDictionary <FileInfo> javaFiles = new MyListTDictionary <FileInfo>();

            GetFiles(new DirectoryInfo(eclipseDataSourceFolderPath)).ForEach(x => javaFiles.Add(x.Name.ToLowerInvariant(), x));

            Dictionary <string, EclipseRelevanceFile> allRelevanceFileToResolvePath = new Dictionary <string, EclipseRelevanceFile>();
            List <EclipseBug> allBugs = new List <EclipseBug>();

            // Create list of query and Relevance
            XDocument xmlDocument      = XDocument.Load(Utility.DatasetFolderPath + DataSetFolderName + EclipseQuerySourceFileName);
            var       bugs             = xmlDocument.Descendants("bug").ToList();
            int       bugsCount        = bugs.Count;
            int       currentBugsCount = 0;

            foreach (var bug in bugs)
                Utility.Status("Reading bug (" + currentBugsCount + " of " + bugsCount + ")");

                EclipseBug eclipseBug = new EclipseBug();

                // query
                var bugInformation = bug.Element("buginformation");
                eclipseBug.Summary     = bugInformation.Element("summary").Value;
                eclipseBug.Description = bugInformation.Element("description").Value;

                // id
                eclipseBug.BugId = bug.Attribute("id").Value;

                // create relList
                var files = bug.Element("fixedFiles").Elements("file");
                foreach (var fileNode in files)
                    if (!allRelevanceFileToResolvePath.ContainsKey(fileNode.Value.ToLowerInvariant()))
                        allRelevanceFileToResolvePath.Add(fileNode.Value.ToLowerInvariant(), new EclipseRelevanceFile(fileNode.Value));


            // Match file name to get file path
            // If file name try to match with folder name else throw and error
            Dictionary <string, List <FileInfo> > relevanceMappingDictionary = new Dictionary <string, List <FileInfo> >();

            foreach (var relevanceFileWithEclipseRelevanceObject in allRelevanceFileToResolvePath)
                var matchingFile = ResolvePath(relevanceFileWithEclipseRelevanceObject.Value, javaFiles);
                relevanceMappingDictionary.Add(relevanceFileWithEclipseRelevanceObject.Key, matchingFile);

            // Create relevance list
            Dictionary <FileInfo, int> allIndexedFiles = new Dictionary <FileInfo, int>();
            int counter = 1;

            foreach (var fileList in javaFiles.Values)
                fileList.ForEach(x => allIndexedFiles.Add(x, counter++));

            const string eclipseReportFolderPath = Utility.ReportFolderPath + DataSetFolderName;

            // Create a directory in eclipse saying Corpus
            if (Directory.Exists(eclipseReportFolderPath))
                Directory.Delete(eclipseReportFolderPath, true);

            const string eclipseCorpusFolderPath = eclipseReportFolderPath + OutputCorpusFoldername;


            int corpusCounter = 1;
            int totalCorpus   = allIndexedFiles.Count;

            foreach (var fileWithIndex in allIndexedFiles)
                File.WriteAllLines(eclipseCorpusFolderPath + fileWithIndex.Value + ".txt", TextWithFilter(File.ReadAllText(fileWithIndex.Key.FullName)));
                Console.WriteLine("Writing corpus " + corpusCounter + " of " + totalCorpus);

            File.WriteAllLines(eclipseReportFolderPath + OutputFileListFileName, allIndexedFiles.Select(x => x.Value + " " + x.Key.FullName.Substring(eclipseDataSourceFolderPath.Length)));

            // Create stuff
            int bugCounter = 1;
            int totalBug   = allBugs.Count;

            foreach (var bug in allBugs)
                string bugFolderPath = eclipseReportFolderPath + bug.BugId + @"\";

                File.WriteAllLines(bugFolderPath + OutputBugQueryFileName, TextWithFilter(bug.Summary + " " + bug.Description));

                List <string> relevanceList = bug.FixedFiles.SelectMany(x => relevanceMappingDictionary[x].Select(y => allIndexedFiles[y].ToString())).Distinct().ToList();

                File.WriteAllLines(bugFolderPath + OutputRelListFileName, relevanceList);

                Console.WriteLine("Done writing bug " + bugCounter + " of " + totalBug);
Esempio n. 2
        private static List <FileInfo> ResolvePath(EclipseRelevanceFile eclipseRelevanceFile, MyListTDictionary <FileInfo> allJavaFiles)
            string fileName = eclipseRelevanceFile.GetFileName();

            if (!allJavaFiles.ContainsKey(fileName))
                throw new FileNotFoundException(fileName + " in eclipseRelevanceFile was not found in allJavaFiles");

            if (allJavaFiles[fileName].Count == 1)

            Dictionary <FileInfo, int> returnFileInfos = new Dictionary <FileInfo, int>();

            foreach (var fileInfo in allJavaFiles[fileName].ToList())
                int currentMatchScore = eclipseRelevanceFile.GetMatchWithFile(fileInfo);
                returnFileInfos.Add(fileInfo, currentMatchScore);

            int             maxScore = returnFileInfos.Max(x => x.Value);
            List <FileInfo> results  = returnFileInfos.Where(x => x.Value == maxScore && x.Value != 0).Select(x => x.Key).ToList();

            if (results.Count == 0)
                throw new InvalidDataException("Could not find file for " + eclipseRelevanceFile.GetFileName());
