private void UpdateSolrIndexForProject(IndexSettings settings, ISolrOperations<CodeDocument> solr, Project proj) { List<string> alldocs = null; //find out if directory exists before doing anything to the index if (!Directory.Exists(proj.Path)) { Console.WriteLine(DateTime.Now.ToString() + ": Directory for project " + proj.ProjectName + " did not exist, skipping"); return; } //find all of the files using (var csw = new ConsoleStopWatch("")) { alldocs = GetDocsForProject(proj, settings.DefaultIncludedPath, settings.DefaultExcludedPath); csw.Name = "Finding " + alldocs.Count.ToString() + " files for project " + proj.ProjectName; } using (var csw = new ConsoleStopWatch("Deleting all solr docs for project " + proj.ProjectName)) { solr.Delete(new SolrQuery("project:\"" + proj.ProjectName + "\"")); solr.Commit(); } //breakout the file list into chunks of DOCS_PER_POST for speed. One at a time is too slow, too many can cause solr memory and thread issues var fileChunks = Chunkify(alldocs.ToArray(), DOCS_PER_POST); using (var csw = new ConsoleStopWatch("Adding the documents to solr for project " + proj.ProjectName)) { //convert each to a solr document for (int i = 0; i < fileChunks.Length; i++) { var file = fileChunks[i]; var codedocs = MakeDocument(file, proj); //submit each to solr //Tweak to leverage new CommitIWithin option of SolrNet so that we do not need to pay the cost of a commit for each group. solr.AddRange(codedocs, new AddParameters { CommitWithin = 10000 }); } solr.Optimize(); } }
private List<string> GetDocsForProject(Project proj, string[] defaultInclude, string[] defaultExclude) { string rootPath = proj.Path; List<string> allDocs = new List<string>(); List<string> filteredDocs = new List<string>(); //first get a list of all the documents under the root allDocs.AddRange(Directory.GetFiles(rootPath, "*", SearchOption.AllDirectories)); //include ones that match the default pattern foreach (var pattern in defaultInclude) { filteredDocs.AddRange(allDocs.Where(X => Regex.IsMatch(X, pattern, System.Text.RegularExpressions.RegexOptions.IgnoreCase))); } //include ones that match the project pattern foreach (var pattern in proj.IncludedPath) { filteredDocs.AddRange(allDocs.Where(X => Regex.IsMatch(X, pattern, System.Text.RegularExpressions.RegexOptions.IgnoreCase))); } //exclude default patterns foreach (var pattern in defaultExclude) { filteredDocs.RemoveAll(X => Regex.IsMatch(X, pattern, System.Text.RegularExpressions.RegexOptions.IgnoreCase)); } //exlude project patterns foreach (var pattern in proj.ExcludedPath) { filteredDocs.RemoveAll(X => Regex.IsMatch(X, pattern, System.Text.RegularExpressions.RegexOptions.IgnoreCase)); } return filteredDocs; }
private CodeDocument[] MakeDocument(string[] filepath, Project project) { var allDocs = new List<CodeDocument>(); for (int i = 0; i < filepath.Length; i++) { var filename = Path.GetFileName(filepath[i]); //shouldn't have to do this var filedata = File.ReadAllText(filepath[i]).Trim(); //we don't care about empty files if (filedata.Length == 0) continue; var thisDocument = new CodeDocument(); thisDocument.ID = CalculateMD5Hash(filepath[i]); thisDocument.Name = filename; thisDocument.FileData = filedata; thisDocument.FullPath = filepath[i]; thisDocument.Project = project.ProjectName; thisDocument.Category = project.Category; allDocs.Add(thisDocument); } return allDocs.ToArray(); }