public void HitungPembobotanKata() { List <Term> InvertedIndex = Lingkungan.LoadInvertedIndex(); if (InvertedIndex == null) { InvertedIndex = new List <Term>(); } if (db == null) { db = new dbDataContext(); } List <tbDomain> Domain = db.tbDomains.ToList(); // seluruh domain yang ada List <int> DomainCount = new List <int>(); // Counter setiap domain for (int i = 0; i < Domain.Count; i++) // inisialisasi counter domain { DomainCount.Add(0); } // Hitung jumlah kata dalam seluruh dokumen dan perdomain int TermCount = 0; //counter kata foreach (var item in InvertedIndex) { TermCount += item.Index.Count; for (int i = 0; i < Domain.Count; i++) { DomainCount[i] += item.Index.Where(x => x.DomainID == Domain[i].Id).Count(); } } for (int i = 1; i < DomainCount.Count; i++) { if (DomainCount[i] == 0) { for (int j = 1; j < DomainCount.Count; j++) { DomainCount[i]++; } break; } } //pembobotan foreach (var item in InvertedIndex) { item.Bobot.Clear(); for (int i = 0; i < Domain.Count; i++) { int count = item.Index.Where(x => x.DomainID == Domain[i].Id).Count(); double bobot = Math.Log10((Lingkungan.getLambda(0) * (count / TermCount)) + (Lingkungan.getLambda(1) * count / DomainCount[i])); item.Bobot.Add(bobot); } } Lingkungan.SaveInvertedIndex(InvertedIndex); }
public void createInvertedIndex(string data, int domain, int infID, int InfDetID) { List <string> words = Regex.Split(data, @"[^A-Za-z0-9]").Where(i => i != string.Empty).ToList(); List <string> stopwords = Lingkungan.getStopWordList(); List <Term> InvertedIndex = Lingkungan.LoadInvertedIndex(); if (InvertedIndex == null) { InvertedIndex = new List <Term>(); } Term kata = null; for (int i = 0; i < words.Count; i++) { kata = InvertedIndex.Where(x => x.Word.ToLower().Equals(words[i].ToLower())).FirstOrDefault(); if (kata == null) { kata = new Term(); kata.Word = words[i].ToLower(); kata.Jenis = JenisKata.Unknown; InvertedIndex.Add(kata); } if (kata.Jenis == JenisKata.Unknown) { if (InetCon) { ScraptDataFromWebKBBI(kata.Word); } if (File.Exists(Lingkungan.getDataCacheKata() + kata.Word + ".html")) { kata.Jenis = GetJenisKataFromScraptFile(kata.Word); } } if (stopwords != null && stopwords.Where(x => x.ToLower().Equals(words[i].ToLower())).Count() > 0) { kata.StopWord = true; } else { kata.StopWord = false; } kata.Index.Add(new Location(domain, infID, InfDetID, i)); } Lingkungan.SaveInvertedIndex(InvertedIndex); //hitung pembobotan }