static string FindTheAuthor(string text) { AuthorProfile textProfile = new AuthorProfile() { Ngrams = GenerateProfile(text), Author = "UNKNOWN" }; string BestMatchAuthor = "UNKNOWN"; int minDis = int.MaxValue; // XML serialization/reading variables DirectoryInfo d = new DirectoryInfo(Environment.CurrentDirectory + $"\\Profiles"); XmlSerializer serializer = new XmlSerializer(typeof(AuthorProfile)); FileStream fileStream; foreach (var file in d.GetFiles("*.xml")) { // Check all xml profiles in /Profile folder fileStream = new FileStream($"Profiles/{file}", FileMode.Open); AuthorProfile tmp = (AuthorProfile)serializer.Deserialize(fileStream); int dis = CompareTwoProfiles(tmp, textProfile); if (dis <= minDis) { minDis = dis; BestMatchAuthor = tmp.Author; } fileStream.Close(); } //Console.WriteLine(BestMatchAuthor); return(BestMatchAuthor); }
/// <summary> /// Compares two profiles and return their dissimilarity. /// </summary> static int CompareTwoProfiles(AuthorProfile profile1, AuthorProfile profile2) { int sum = 0; List <NgramFreq> allNgrams = new List <NgramFreq>(); int indexOfItemInOtherP; foreach (KeyValuePair <string, int> item in profile1.Ngrams) { indexOfItemInOtherP = profile2.Ngrams.IndexOf(item); allNgrams.Add(new NgramFreq() { ngram = item.NGram, f1 = item.Freq, f2 = (indexOfItemInOtherP == -1 ? 0 : profile2.Ngrams[indexOfItemInOtherP].Freq) }); } foreach (KeyValuePair <string, int> item in profile2.Ngrams) { if (!allNgrams.Exists(x => x.ngram == item.NGram)) { // Ngram was NOT processed yet indexOfItemInOtherP = profile1.Ngrams.IndexOf(item); allNgrams.Add(new NgramFreq() { ngram = item.NGram, f1 = (indexOfItemInOtherP == -1 ? 0 : profile1.Ngrams[indexOfItemInOtherP].Freq), f2 = item.Freq }); } } // Calculate total dissimilarity foreach (var item in allNgrams) { //sum += (int)Math.Floor(Math.Pow(2 * (item.f1 - item.f2) / (item.f1 + item.f2), 2)); sum += (int)Math.Abs(2 * (item.f1 - item.f2) / (item.f1 + item.f2)); } return(sum); }
/// <summary> /// Create a new author profile and save it to a file /// </summary> static void DefineNewAuthorProfile(string learnData, string author = null) { //StreamWriter file = File.AppendText("profiles.txt"); AuthorProfile authorProfile = new AuthorProfile(); FileStream xmlFile; XmlSerializer writer = new XmlSerializer(typeof(AuthorProfile)); // Extract only words from text List <KeyValuePair <string, int> > myList = GenerateProfile(learnData, profileLimit); // Author's name string catname; if (author == null) { // Manual author's name input Console.Write($"Enter authors name:"); catname = Console.ReadLine(); } else { catname = author; } if (catname.Length == 0) { catname = "UNDEFINED"; } authorProfile.Author = catname.ToLower(); authorProfile.Ngrams = myList; Directory.CreateDirectory("Profiles"); xmlFile = File.Create(Environment.CurrentDirectory + $"\\Profiles\\{catname}.xml"); writer.Serialize(xmlFile, authorProfile); xmlFile.Close(); }