public static string readOCR (Uri ocr_url) { AzureSearchServiceController checkUrl = new AzureSearchServiceController(); string ocrPrevText = ""; if (checkUrl.RemoteFileExists(ocr_url.ToString())) { System.Net.WebClient wc = new System.Net.WebClient(); byte[] raw = wc.DownloadData(ocr_url); string webData = System.Text.Encoding.UTF8.GetString(raw); string[] ocrSplit = webData.Split(' '); for (int i = 0; i < 300; i++) { ocrPrevText += ocrSplit[i]; ocrPrevText += " "; } return ocrPrevText; } else { ocrPrevText ="Unfortunately OCR is not available for this document"; } return ocrPrevText; }
// // GET: /StatisticalAnalysis/ public ActionResult Index(Uri website, string titleOfBook) { AzureSearchServiceController checkUrl = new AzureSearchServiceController(); if (checkUrl.RemoteFileExists(website.ToString())) { List<string> MostFrequentWords = new List<string>(); List<int> FrequencyOfMostFrequentWords = new List<int>(); frequentWords(website, out MostFrequentWords, out FrequencyOfMostFrequentWords); ViewBag.FrequencyOfMostFrequentWords = FrequencyOfMostFrequentWords; ViewBag.MostFrequentWords = MostFrequentWords; ViewBag.titleOfBook = titleOfBook; ViewBag.website = website; //stores the user keyword to identify the frequency of a word string keyword = Request["txtSearch"]; ViewBag.keyword = Request["txtSearch"]; ///////////////////////////////////////// //stores the user keyword to identify the frequency of a biagram string keywordBiagram = Request["txtSearchBiagram"]; ViewBag.keywordBiagram = Request["txtSearchBiagram"]; //////////////////////////////////////////////////// //stores the user keyword to identify the frequency of a triagram string keywordTriagram = Request["txtSearchTriagram"]; ViewBag.keywordBiagram = Request["txtSearchTriagram"]; char[] delimiters = new char[] { ' ' }; int frequencyWordInOCR = 0; if (String.IsNullOrEmpty(keyword)) { ViewBag.initialState = 0; } else { string[] userKeyword = keyword.Split(delimiters, StringSplitOptions.RemoveEmptyEntries); //display a message to the user (the user has entered more than one word) if (userKeyword.Count() != 1) { ViewBag.MessageOneWord = "Please enter one word"; } else { frequencyWordInOCR = frequentWordInOCR(website, keyword.ToLower().Trim()); if (frequencyWordInOCR == 0) { ViewBag.WordNotFound = "Word: " + keyword + " " + "was not found"; } else { ViewBag.wordFound = "The frequency of the word " + keyword + " " + "is " + frequencyWordInOCR; } } } //////////////////////////////Biagram///////////////////////////// int frequencyOfBiagram = 0; if (String.IsNullOrEmpty(keywordBiagram)) { ViewBag.initialStateBiagram = 0; } else { string[] wordsBiagram = keywordBiagram.Split(delimiters, StringSplitOptions.RemoveEmptyEntries); //check whether the user has entered two words if (wordsBiagram.Count() != 2) { ViewBag.MessageBiagram = "Please enter a valid biagram(biagram consists of two words)"; } else { frequencyOfBiagram = frequentOfBiagrams(website, keywordBiagram.ToLower().Trim()); if (frequencyOfBiagram == 0) { ViewBag.WordNotFoundBiagram = "Word: " + keywordBiagram + " " + "was not found"; } else { ViewBag.wordFoundBiagram = "The frequency of a biagram " + keywordBiagram + " " + "is " + frequencyOfBiagram; } } } /////////////////////////////////////////////////////////////////////////////////////// //this section calculate the frequency word of a triagram and checks whether the user has entered three words in the document int frequencyOfTriagram = 0; if (String.IsNullOrEmpty(keywordTriagram)) { ViewBag.initialStateTriagram = 0; } else { string[] wordsTriagram = keywordTriagram.Split(delimiters, StringSplitOptions.RemoveEmptyEntries); //check whether the user has entered three words if (wordsTriagram.Count() != 3) { ViewBag.MessageTriagram = "Please enter a valid triagram(triagram consists of two words)"; } else { frequencyOfTriagram = frequentOfBiagrams(website, keywordTriagram.ToLower().Trim()); if (frequencyOfTriagram == 0) { ViewBag.WordNotFoundTriagram = "Word: " + keywordTriagram + " " + "was not found"; } else { ViewBag.wordFoundTriagram = "The frequency of a triagram " + keywordTriagram + " " + "is " + frequencyOfTriagram; } } } return View(); } else { return RedirectToAction("ErrorPage", "ErrorPage"); } }