void RemoveUnnassesaryWords(TajikWord word, TajikSentence sentence)
        {
            var shouldBeRemoved = ShouldBeRemoved(word.Value);

            if (shouldBeRemoved)
            {
                sentence.Words.Remove(word);
            }
        }
 void ShakliJam(TajikWord word)
 {
     if (pasoyandJam.Any(s => word.Value.EndsWith(s)))
     {
         var splited = word.Value.Substring(0, word.Value.Length - 2);
         if (splited.Length > 1 && Context.Words.Any(s => s.Content == splited))
         {
             word.Value = splited;
         }
     }
 }
Esempio n. 3
0
        static void CalculateCategory(IDFCategory category, IWordDataSet wordsData, List <TajikDocument> documents)
        {
            var word = new TajikWord(wordsData.Content);
            var idf  = KEAGlobal.TFIDFManager.CalCulateIDF(documents, word);

            if (wordsData.IDFCategoryLinks == null)
            {
                wordsData.IDFCategoryLinks = new List <IDFCategoryLink>();
            }
            wordsData.IDFCategoryLinks.Add(new IDFCategoryLink()
            {
                Category = category, IDF = idf
            });
        }
 void Ishorakuni(TajikWord word)
 {
     foreach (var ishora in ishorakuni)
     {
         if (word.Value.EndsWith(ishora))
         {
             var splited = word.Value.Substring(0, word.Value.Length - ishora.Length);
             if (splited.Length > 2 && Context.Words.Any(s => s.Content == splited))
             {
                 word.Value = splited;
             }
         }
     }
 }
 void BandakiU(TajikWord word)
 {
     foreach (var bandak in bandakiU)
     {
         if (word.Value.EndsWith(bandak))
         {
             var splited = word.Value.Substring(0, word.Value.Length - bandak.Length);
             if (splited.Length > 2 && Context.Words.Any(s => s.Content == splited))
             {
                 word.Value = splited;
             }
         }
     }
 }
 void BandakiI(TajikWord word)
 {
     foreach (var bandak in bandakiI)
     {
         if (word.Value.EndsWith(bandak))
         {
             var splited = word.Value.Substring(0, word.Value.Length - bandak.Length);
             if (splited.Length > 2 && DataSetContains(splited, out string bandakToEnd))
             {
                 if (bandakToEnd != null)
                 {
                     word.Value = bandakToEnd;
                 }
                 else
                 {
                     word.Value = splited;
                 }
             }
         }
     }
 }
Esempio n. 7
0
        static void Main(string[] args)
        {
            TajikKEAContext jsonContext = new TajikKEAContext();

            KEAGlobal.InitiateKEAGlobal(jsonContext);
            PDFHelper pDFHelper = new PDFHelper();

            var badei = new IDFCategory()
            {
                Guid = Guid.NewGuid(), Name = "Бадеӣ"
            };
            var badeiDocs = GetDocuments(pDFHelper, @"C:\Users\dilshodk\Desktop\for me\Бадеи");

            var gumanitari = new IDFCategory()
            {
                Guid = Guid.NewGuid(), Name = "Гуманитарӣ"
            };
            var gumanitariDocs = GetDocuments(pDFHelper, @"C:\Users\dilshodk\Desktop\for me\Гуманитари");

            var иқтисодӣ = new IDFCategory()
            {
                Guid = Guid.NewGuid(), Name = "Иқтисодӣ"
            };
            var иқтисодӣDocs = GetDocuments(pDFHelper, @"C:\Users\dilshodk\Desktop\for me\Иқтисодӣ");

            var илмидақиқ = new IDFCategory()
            {
                Guid = Guid.NewGuid(), Name = "Илми дақиқ"
            };
            var илмидақиқDocs = GetDocuments(pDFHelper, @"C:\Users\dilshodk\Desktop\for me\Илми дакик");

            var сиёсӣ = new IDFCategory()
            {
                Guid = Guid.NewGuid(), Name = "Сиёсӣ"
            };
            var сиёсӣDocs = GetDocuments(pDFHelper, @"C:\Users\dilshodk\Desktop\for me\Сиёси");

            var техникӣ = new IDFCategory()
            {
                Guid = Guid.NewGuid(), Name = "Техникӣ"
            };
            var техникӣDocs = GetDocuments(pDFHelper, @"C:\Users\dilshodk\Desktop\for me\Техники");

            var тиб = new IDFCategory()
            {
                Guid = Guid.NewGuid(), Name = "Тиб"
            };
            var тибDocs = GetDocuments(pDFHelper, @"C:\Users\dilshodk\Desktop\for me\Тиб");

            var физика = new IDFCategory()
            {
                Guid = Guid.NewGuid(), Name = "Физика"
            };
            var физикаDocs = GetDocuments(pDFHelper, @"C:\Users\dilshodk\Desktop\for me\Физика");

            var химия = new IDFCategory()
            {
                Guid = Guid.NewGuid(), Name = "Химия"
            };
            var химияDocs = GetDocuments(pDFHelper, @"C:\Users\dilshodk\Desktop\for me\Химия");

            List <TajikDocument> allDocuments = badeiDocs.ToList();

            allDocuments.AddRange(gumanitariDocs);
            allDocuments.AddRange(иқтисодӣDocs);
            allDocuments.AddRange(илмидақиқDocs);
            allDocuments.AddRange(сиёсӣDocs);
            allDocuments.AddRange(техникӣDocs);
            allDocuments.AddRange(тибDocs);
            allDocuments.AddRange(физикаDocs);
            allDocuments.AddRange(химияDocs);
            var minimum = 0.00000000000000000000000112;

            foreach (var item in jsonContext.Words)
            {
                CalculateCategory(badei, item, badeiDocs);
                CalculateCategory(gumanitari, item, gumanitariDocs);
                CalculateCategory(иқтисодӣ, item, иқтисодӣDocs);
                CalculateCategory(илмидақиқ, item, илмидақиқDocs);
                CalculateCategory(сиёсӣ, item, сиёсӣDocs);
                CalculateCategory(техникӣ, item, техникӣDocs);
                CalculateCategory(тиб, item, тибDocs);
                CalculateCategory(физика, item, физикаDocs);
                CalculateCategory(химия, item, химияDocs);

                var word = new TajikWord(item.Content);
                var idf  = KEAGlobal.TFIDFManager.CalCulateIDF(allDocuments, word);
                if (idf == 0)
                {
                    idf = minimum;
                }
                item.CommonIDF = idf;
            }
            foreach (var item in jsonContext.Words)
            {
                foreach (var item2 in item.IDFCategoryLinks)
                {
                    if (item2.IDF == 0)
                    {
                        item2.IDF = minimum;
                    }
                }
            }
            var text = JsonConvert.SerializeObject(jsonContext.Words, Formatting.Indented);

            File.WriteAllText("WordAllIDF.json", text);
            Console.ReadLine();
        }
Esempio n. 8
0
 public TF(TajikWord termin, Document.TajikDocument document)
 {
     Termin   = termin;
     Document = document;
 }
        public double CalCulateIDF(List <Document.TajikDocument> documentsDataSet, TajikWord wordToCalculate)
        {
            IDF iDF = new IDF(documentsDataSet, wordToCalculate);

            return(iDF.CalculateIDF());
        }
        public double CalCulateTF(TajikWord wordToCalculate, Document.TajikDocument documentToCalculate)
        {
            TF tF = new TF(wordToCalculate, documentToCalculate);

            return(tF.CalculateTF());
        }
Esempio n. 11
0
 public IDF(IEnumerable <Document.TajikDocument> documents, TajikWord termin)
 {
     Documents = documents;
     Termin    = termin;
 }