public static void Main(string[] args) { if (args.Length > 0) { var argsList = args.ToList(); _path = argsList.Contains("--path") ? argsList[argsList.IndexOf("--path") + 1] : AppDomain.CurrentDomain.BaseDirectory; _isRecursive = argsList.Contains("--recursive"); _whitelist = argsList.Contains("--whitelist") ? argsList[argsList.IndexOf("--whitelist") + 1] : null; _documentTypes = argsList.Contains("--document-types") ? argsList[argsList.IndexOf("--document-types") + 1].Split(',') : null; _infoTypes = argsList.Contains("--info-types") ? argsList[argsList.IndexOf("--info-types") + 1].Split(',') : new [] { "keyword", "email", "name", "phone", "card" }; _sensitivity = argsList.Contains("--sensitivity") ? int.Parse(argsList[argsList.IndexOf("--sensitivity") + 1]) : 5; } if (_infoTypes.Contains("keyword")) { _keywordSearch = new KeywordSearch("keyword_dict.txt"); } if (_infoTypes.Contains("name")) { _nameSearch = new NameSearch("nam_dict.txt"); } List <FileReport> reports = ScanFilesInDirectory(_path); // okay, what do we do with the reports? }
// Looks for patterns that suggest a (person's) name, // and then looks for their presence in a set of common names. // Are we only gonna match names with Latin chars? I guess we are. Sorry. public static Match IsName(string candidate, NameSearch nameSearch) { var nameRegex = new Regex(@"^[A-Z]([a-z]+)(-\s[A-Z]([a-z]+))*$"); Match match = Match.Negative; if (nameRegex.IsMatch(candidate)) { match = Match.Ambiguous; // Try to match against our collection of common first names // not sure what the perf of this is gonna be like :/ var firstName = candidate.Split(' ')[0]; if (nameSearch.Contains(firstName)) { match = Match.Positive; } } return(match); }