public void HitungPembobotanKata() { List <Term> InvertedIndex = Lingkungan.LoadInvertedIndex(); if (InvertedIndex == null) { InvertedIndex = new List <Term>(); } if (db == null) { db = new dbDataContext(); } List <tbDomain> Domain = db.tbDomains.ToList(); // seluruh domain yang ada List <int> DomainCount = new List <int>(); // Counter setiap domain for (int i = 0; i < Domain.Count; i++) // inisialisasi counter domain { DomainCount.Add(0); } // Hitung jumlah kata dalam seluruh dokumen dan perdomain int TermCount = 0; //counter kata foreach (var item in InvertedIndex) { TermCount += item.Index.Count; for (int i = 0; i < Domain.Count; i++) { DomainCount[i] += item.Index.Where(x => x.DomainID == Domain[i].Id).Count(); } } for (int i = 1; i < DomainCount.Count; i++) { if (DomainCount[i] == 0) { for (int j = 1; j < DomainCount.Count; j++) { DomainCount[i]++; } break; } } //pembobotan foreach (var item in InvertedIndex) { item.Bobot.Clear(); for (int i = 0; i < Domain.Count; i++) { int count = item.Index.Where(x => x.DomainID == Domain[i].Id).Count(); double bobot = Math.Log10((Lingkungan.getLambda(0) * (count / TermCount)) + (Lingkungan.getLambda(1) * count / DomainCount[i])); item.Bobot.Add(bobot); } } Lingkungan.SaveInvertedIndex(InvertedIndex); }
private void buttonAdd_Click(object sender, EventArgs e) { string newLocation = ""; try { if (string.IsNullOrWhiteSpace(textBoxLocation.Text)) { TextboxLocationDoubleClick(null, null); } foreach (var item in textBoxLocation.Text.Split('+').ToList()) { newLocation = ""; string[] fragment = item.Split('\\').ToArray(); if (!File.Exists(Lingkungan.getDataBaru() + fragment[fragment.Length - 1]) && db.tbInformasis.Where(x => x.Judul.ToString() == fragment[fragment.Length - 1].ToString()).FirstOrDefault() == null) { tbInformasi baru = new tbInformasi(); baru.Judul = fragment[fragment.Length - 1]; baru.Indexed = 0; baru.Lokasi = Lingkungan.getDataBaru() + fragment[fragment.Length - 1]; current.tbInformasis.Add(baru); newLocation = Lingkungan.getDataBaru() + fragment[fragment.Length - 1]; LokasilamaData.Add(item); LokasibaruData.Add(newLocation); DataBaru.Add(baru); // File.Copy(item, newLocation); // db.SubmitChanges(); // MessageBox.Show("Data " + fragment[fragment.Length - 1] + " berhasil ditambahkan"); } else { //MessageBox.Show("Data " + fragment[fragment.Length - 1] + " sudah ada"); } } } catch (Exception ex) { if (ex is FileNotFoundException) { MessageBox.Show("Masukkan data"); } else { MessageBox.Show("Error! " + ex.Message); } } finally { textBoxLocation.Text = ""; IndexChange(null, null); } }
public tbInfDetail[] PencarianInformasi(Dialogue inpt, int domain, List <Term> extra) { List <tbInfDetail> dataDomain = db.tbInfDetails.Where(x => x.tbInformasi.DomainID == domain).ToList(); List <decimal> bobot = new List <decimal>(); List <Term> qry = inpt.StringToTerm(); if (extra != null) { foreach (var item in extra) { if (qry.Where(x => x.Word == item.Word).FirstOrDefault() == null) { qry.Add(item); } } } int TermCount = 0; foreach (var item in Lingkungan.LoadInvertedIndex()) { TermCount += item.Index.Count; } decimal termC = (decimal)TermCount; decimal b; for (int i = 0; i < dataDomain.Count; i++) { string[] fragment = dataDomain[i].info.Split(' ').Where(x => string.IsNullOrWhiteSpace(x) == false).ToArray(); b = 1; foreach (var item in qry) { decimal FragmentTermcounter = Convert.ToDecimal(fragment.Where(x => x.ToLower().Contains(item.Word.ToLower())).Count()); decimal FragmentCounter = Convert.ToDecimal(fragment.Count()); decimal itemCounter = Convert.ToDecimal(item.Bobot.Count); b = b * ((FragmentTermcounter / FragmentCounter) * Convert.ToDecimal(Lingkungan.getLambda(1)) + (itemCounter / termC) * Convert.ToDecimal(Lingkungan.getLambda(0))); } bobot.Add(b); } List <tbInfDetail> top10 = new List <tbInfDetail>(); while (dataDomain.Count > 0 && top10.Count < 10) { int idx = bobot.LastIndexOf(bobot.Max()); top10.Add(dataDomain[idx]); bobot.RemoveAt(idx); dataDomain.RemoveAt(idx); } return(top10.ToArray()); }
public void dataIndexing(string args) { List <tbInformasi> InfoList = null;// db.tbInformasis.ToList(); tbInfDetail datadetil = null; if (CheckInternetConnectionByPing() && CheckInternetConnectionbyWebPage()) { InetCon = true; } if (args.ToLower().Equals("all")) { InfoList = db.tbInformasis.ToList(); } else { InfoList = db.tbInformasis.Where(x => x.Indexed == 0).ToList(); } for (int i = 0; i < InfoList.Count; i++) { bool cek = File.Exists(InfoList[i].Lokasi); if (cek)// && InfoList[i].Indexed==0) { InfoList[i].Indexed = 1; string text = File.ReadAllText(InfoList[i].Lokasi); string[] Fragment = text.Split(';').ToArray(); for (int j = 0; j < Fragment.Length; j++) { if (!String.IsNullOrWhiteSpace(Fragment[j])) { datadetil = CreateDataDetil(Fragment[j]); if (datadetil != null && db.tbInfDetails.Where(x => x.info.ToString() == datadetil.info.ToString()).FirstOrDefault() == null) { InfoList[i].tbInfDetails.Add(datadetil); } createInvertedIndex(Fragment[j], InfoList[i].DomainID, InfoList[i].Id, j); } } bool isExist = File.Exists(Lingkungan.getDataCache() + InfoList[i].tbDomain.Name + "\\" + InfoList[i].Judul); File.Move(InfoList[i].Lokasi, Lingkungan.getDataCache() + InfoList[i].tbDomain.Name + "\\" + InfoList[i].Judul); InfoList[i].Lokasi = Lingkungan.getDataCache() + InfoList[i].tbDomain.Name + "\\" + InfoList[i].Judul; } } if (File.Exists(Lingkungan.getInvertedIndexLocation())) { HitungPembobotanKata(); } InetCon = false; db.SubmitChanges(); }
public ChatbotForm() { //Inisialisasi Program InitializeComponent(); Lingkungan.CreateLocation(); db = new dbDataContext(); Engine = new EngineActuator("MLM", db); bot = db.tbUsers.Where(x => x.Id == 0).FirstOrDefault(); DM = new DialogueManager(db, bot); //bot define bool testing = true; //Login if (!testing) //temporary code { currentUser = db.tbUsers.Where(x => x.Id == 1).FirstOrDefault(); this.Text = bot.Name + " Chatbot"; } else { #region True Initiation this.Text = bot.Name + " Chatbot"; try { LoginlogoutToolStripMenuItem.PerformClick(); if (Login == DialogResult.Cancel) { throw new Exception(); } } catch (Exception) { if (System.Windows.Forms.Application.MessageLoop) { System.Windows.Forms.Application.Exit(); // WinForms app } else { System.Environment.Exit(1); // Console app } this.Close(); this.Dispose(); } #endregion } // DM.ManageDialogue(bot, "Welcome " + currentUser.Name + " to " + bot.Name + " Chatbot.",null); refreshListBoxChat(); this.AcceptButton = this.buttonSend; }
public void createInvertedIndex(string data, int domain, int infID, int InfDetID) { List <string> words = Regex.Split(data, @"[^A-Za-z0-9]").Where(i => i != string.Empty).ToList(); List <string> stopwords = Lingkungan.getStopWordList(); List <Term> InvertedIndex = Lingkungan.LoadInvertedIndex(); if (InvertedIndex == null) { InvertedIndex = new List <Term>(); } Term kata = null; for (int i = 0; i < words.Count; i++) { kata = InvertedIndex.Where(x => x.Word.ToLower().Equals(words[i].ToLower())).FirstOrDefault(); if (kata == null) { kata = new Term(); kata.Word = words[i].ToLower(); kata.Jenis = JenisKata.Unknown; InvertedIndex.Add(kata); } if (kata.Jenis == JenisKata.Unknown) { if (InetCon) { ScraptDataFromWebKBBI(kata.Word); } if (File.Exists(Lingkungan.getDataCacheKata() + kata.Word + ".html")) { kata.Jenis = GetJenisKataFromScraptFile(kata.Word); } } if (stopwords != null && stopwords.Where(x => x.ToLower().Equals(words[i].ToLower())).Count() > 0) { kata.StopWord = true; } else { kata.StopWord = false; } kata.Index.Add(new Location(domain, infID, InfDetID, i)); } Lingkungan.SaveInvertedIndex(InvertedIndex); //hitung pembobotan }
public List <Term> StringToTerm(string inpt) { if (inpt.ToLower() == "all") { List <Term> data = new List <Term>(); List <Term> idx = Lingkungan.LoadInvertedIndex(); foreach (string item in Regex.Split(Str, @"[^A-Za-z0-9]").Where(i => i != string.Empty).ToList()) { Term x = idx.Where(i => i.Word.ToLower() == item.ToLower()).FirstOrDefault(); data.Add(x); } idx = null; return(data); } return(null); }
public List <Term> StringToTerm() { List <Term> data = new List <Term>(); List <Term> idx = Lingkungan.LoadInvertedIndex(); foreach (string item in Regex.Split(Str, @"[^A-Za-z0-9]").Where(i => i != string.Empty).ToList()) { Term x = idx.Where(i => i.Word.ToLower() == item.ToLower()).FirstOrDefault(); if (x != null && x.StopWord == false) { data.Add(x); } } idx = null; return(data); }
public void FileCrawler(string args) //uncalled { //Crawler List <string> LokasiFile = new List <string>(); //menyimpan daftar folder yang akan di crawl Queue <string> ListLocation = new Queue <string>(); //menimpan daftar file yang akan di index List <string> FileLocation = new List <string>(); string process; // variabel pemrosesan string location; if (args == "cache") { location = Lingkungan.getDataCache(); // menyimpan lokasi cache yang akan dicrawl } else { location = Lingkungan.getDataBaru(); // menyimpan lokasi Data baru yang akan dicrawl } if ((File.Exists(location) || Directory.Exists(location)) && string.IsNullOrWhiteSpace(location) == false) { ListLocation.Enqueue(location); do { process = ListLocation.Dequeue(); if (File.GetAttributes(process) == FileAttributes.Directory) { foreach (string item in Directory.GetDirectories(process)) { ListLocation.Enqueue(item); } foreach (string item2 in Directory.GetFiles(process)) { FileLocation.Add(item2); } } } while (ListLocation.Count > 0); } else { System.Windows.Forms.MessageBox.Show("Gagal Melakukan Crawling"); } locations = FileLocation; }
public bool ScraptDataFromWebKBBI(string kata) { try { string url = "http://kbbi.web.id/" + kata; // string cachePath = ImportantLocation.getWordScrapedCacheLocation(); string cachePath = Lingkungan.getDataCacheKata(); string Data = ""; HttpWebRequest Request = (HttpWebRequest)WebRequest.Create(url); HttpWebResponse Response = (HttpWebResponse)Request.GetResponse(); if (Request != null && Response != null) { Stream receiveStream = Response.GetResponseStream(); StreamReader ReaderStream = null; if (Response.CharacterSet == null) { ReaderStream = new StreamReader(receiveStream); } else { ReaderStream = new StreamReader(receiveStream, Encoding.GetEncoding(Response.CharacterSet)); } Data = ReaderStream.ReadToEnd(); ReaderStream.Close(); System.IO.FileInfo file = new System.IO.FileInfo(cachePath + kata.ToLower() + ".html"); file.Directory.Create(); // If the directory already exists, this method does nothing. System.IO.File.WriteAllText(file.FullName, Data); } return(true); } catch (Exception e) { // System.Windows.Forms.MessageBox.Show(e.Message); return(false); } }
public JenisKata GetJenisKataFromScraptFile(string kata) { JenisKata retur = JenisKata.Unknown; HtmlAgilityPack.HtmlDocument htmldoc = new HtmlAgilityPack.HtmlDocument(); string location = Lingkungan.getDataCacheKata() + kata.ToLower() + ".html"; htmldoc.Load(location); List <string> toParse2 = new List <string>(); try { foreach (HtmlNode node in htmldoc.DocumentNode.SelectNodes("//textarea[@id='jsdata']")) { toParse2.AddRange(Regex.Split(node.ChildNodes[0].InnerHtml, @"[^A-Za-z0-9]").Where(i => i != string.Empty).ToList()); } int x = 0; for (int i = 0; i < toParse2.Count - 1; i++) { if (toParse2[i].ToLower().Equals(kata.ToLower())) { x = i; break; } } if (x > 0) { for (int j = x; j < toParse2.Count - 2; j++) { if (toParse2[j - 2].ToLower().Equals("em") && toParse2[j - 2].ToLower().Equals("em")) { if (toParse2[j].ToLower().Equals("n")) { retur = JenisKata.Benda; break; } else if (toParse2[j].ToLower().Equals("v")) { retur = JenisKata.Kerja; break; } else if (toParse2[j].ToLower().Equals("a")) { retur = JenisKata.Sifat; break; } else if (toParse2[j].ToLower().Equals("pron")) { retur = JenisKata.Ganti; break; } else if (toParse2[j].ToLower().Equals("adv")) { retur = JenisKata.Keterangan; break; } else if (toParse2[j].ToLower().Equals("p")) { retur = JenisKata.Tugas; break; } else if (toParse2[j].ToLower().Equals("num")) { retur = JenisKata.Bilangan; break; } //else if (toParse2[j].ToLower().Equals("aa")) //{ // retur = JenisKata.Ganti; // break; //} } } } return(retur); } catch (Exception) { return(retur); } }