Beispiel #1
0
        static string FindTheAuthor(string text)
        {
            AuthorProfile textProfile = new AuthorProfile()
            {
                Ngrams = GenerateProfile(text), Author = "UNKNOWN"
            };
            string BestMatchAuthor = "UNKNOWN";
            int    minDis          = int.MaxValue;

            // XML serialization/reading variables
            DirectoryInfo d          = new DirectoryInfo(Environment.CurrentDirectory + $"\\Profiles");
            XmlSerializer serializer = new XmlSerializer(typeof(AuthorProfile));
            FileStream    fileStream;

            foreach (var file in d.GetFiles("*.xml"))
            {
                // Check all xml profiles in /Profile folder
                fileStream = new FileStream($"Profiles/{file}", FileMode.Open);
                AuthorProfile tmp = (AuthorProfile)serializer.Deserialize(fileStream);

                int dis = CompareTwoProfiles(tmp, textProfile);
                if (dis <= minDis)
                {
                    minDis          = dis;
                    BestMatchAuthor = tmp.Author;
                }

                fileStream.Close();
            }

            //Console.WriteLine(BestMatchAuthor);
            return(BestMatchAuthor);
        }
Beispiel #2
0
        /// <summary>
        /// Compares two profiles and return their dissimilarity.
        /// </summary>
        static int CompareTwoProfiles(AuthorProfile profile1, AuthorProfile profile2)
        {
            int sum = 0;
            List <NgramFreq> allNgrams = new List <NgramFreq>();
            int indexOfItemInOtherP;

            foreach (KeyValuePair <string, int> item in profile1.Ngrams)
            {
                indexOfItemInOtherP = profile2.Ngrams.IndexOf(item);
                allNgrams.Add(new NgramFreq()
                {
                    ngram = item.NGram,
                    f1    = item.Freq,
                    f2    = (indexOfItemInOtherP == -1 ? 0 : profile2.Ngrams[indexOfItemInOtherP].Freq)
                });
            }

            foreach (KeyValuePair <string, int> item in profile2.Ngrams)
            {
                if (!allNgrams.Exists(x => x.ngram == item.NGram))
                {
                    // Ngram was NOT processed yet
                    indexOfItemInOtherP = profile1.Ngrams.IndexOf(item);
                    allNgrams.Add(new NgramFreq()
                    {
                        ngram = item.NGram,
                        f1    = (indexOfItemInOtherP == -1 ? 0 : profile1.Ngrams[indexOfItemInOtherP].Freq),
                        f2    = item.Freq
                    });
                }
            }

            // Calculate total dissimilarity
            foreach (var item in allNgrams)
            {
                //sum += (int)Math.Floor(Math.Pow(2 * (item.f1 - item.f2) / (item.f1 + item.f2), 2));
                sum += (int)Math.Abs(2 * (item.f1 - item.f2) / (item.f1 + item.f2));
            }

            return(sum);
        }
Beispiel #3
0
        /// <summary>
        /// Create a new author profile and save it to a file
        /// </summary>
        static void DefineNewAuthorProfile(string learnData, string author = null)
        {
            //StreamWriter file = File.AppendText("profiles.txt");
            AuthorProfile authorProfile = new AuthorProfile();
            FileStream    xmlFile;
            XmlSerializer writer = new XmlSerializer(typeof(AuthorProfile));

            // Extract only words from text
            List <KeyValuePair <string, int> > myList = GenerateProfile(learnData, profileLimit);

            // Author's name
            string catname;

            if (author == null)
            {
                // Manual author's name input
                Console.Write($"Enter authors name:");
                catname = Console.ReadLine();
            }
            else
            {
                catname = author;
            }

            if (catname.Length == 0)
            {
                catname = "UNDEFINED";
            }

            authorProfile.Author = catname.ToLower();
            authorProfile.Ngrams = myList;
            Directory.CreateDirectory("Profiles");
            xmlFile = File.Create(Environment.CurrentDirectory + $"\\Profiles\\{catname}.xml");
            writer.Serialize(xmlFile, authorProfile);
            xmlFile.Close();
        }