public static string readOCR (Uri ocr_url) { AzureSearchServiceController checkUrl = new AzureSearchServiceController(); string ocrPrevText = ""; if (checkUrl.RemoteFileExists(ocr_url.ToString())) { System.Net.WebClient wc = new System.Net.WebClient(); byte[] raw = wc.DownloadData(ocr_url); string webData = System.Text.Encoding.UTF8.GetString(raw); string[] ocrSplit = webData.Split(' '); for (int i = 0; i < 300; i++) { ocrPrevText += ocrSplit[i]; ocrPrevText += " "; } return ocrPrevText; } else { ocrPrevText ="Unfortunately OCR is not available for this document"; } return ocrPrevText; }
public ActionResult Index(string queryID, int? page) { ViewBag.queryID = queryID; if (!serviceUrl.EndsWith("/")) { serviceUrl += "/"; } if (!serviceUrl.EndsWith("indexes/")) { serviceUrl += "indexes/"; } try { //Delete the Index if it exists DeleteIndex(serviceUrl + indexName); // Create the index if (CreateIndex() == false) { ViewBag.Error = "Error From Create Index"; } // Add documents to index - loading in 100 document batches if (PostDocuments("data2.json") == false) { ViewBag.ErrorPostDoc = "Error from post documents0"; } if (PostDocuments("data3.json") == false) { ViewBag.ErrorPostDoc = "Error from post documents0"; } //Wait 5 seconds // Console.WriteLine("Waiting 5 seconds for data to be indexed..."); Thread.Sleep(TimeSpan.FromSeconds(5)); setUserInput(queryID); //Construct of an Object AzureSearchServiceController azureSearchService = new AzureSearchServiceController(); List<int> booleanSelection; string[] keywordOfFields; //declare keywordOfFields as string array of any size String toPrintQuerySelection = ""; List<List<BookModel>> listOfQueries; azureSearchService.initialiseListsFromAzureSearch(queryID, out keywordOfFields, out booleanSelection, out listOfQueries, ref toPrintQuerySelection); ViewBag.searchQuery = toPrintQuerySelection; var title = listOfQueries[0].Select(item => item.title); var finalResults = listOfQueries[7].Where(item => title.Contains(item.title)); int position; List<BookModel> finalList = new List<BookModel>(); azureSearchService.booleanFunctionality(queryID,booleanSelection, keywordOfFields, out position, listOfQueries, ref finalResults, ref finalList); insertElements(finalResults, ref finalList); finalList = finalList.GroupBy(x => x.title).Select(x => x.First()).ToList(); List<BookModel> bookListIntersection = new List<BookModel>(); bookListIntersection = finalList.ToList(); /* ViewBag.list = listOfQueries; ViewBag.keywords = keywordOfFields; ViewBag.boolean = booleanSelection; ViewBag.count = position; ViewBag.final = finalList; ViewBag.finalCount = finalList.Count; */ //ViewBag.countIntersection = bookListIntersection.Count(); ViewBag.ListResponse = bookListIntersection; resultingList = bookListIntersection.ToList(); int bookListCount; if ((bookListIntersection.Count() != 0)) { bookListCount = bookListIntersection.Count(); } else { bookListCount = 0; } bookListTotal = bookListCount; ViewBag.resultCount = bookListCount; } catch (Exception e) { // Console.WriteLine("Unhandled exception caught:"); while (e != null) { //Console.WriteLine("\t{0}", e.Message); e = e.InnerException; } //Console.WriteLine("\nDid you remember to paste your service URL and API key into App.config?\n"); } // Console.Write("Complete. Press <enter> to continue: "); //var name = Console.ReadLine(); var pagedResultList = resultingList.ToList(); int pageResultTotal = pagedResultList.Count(); ViewBag.resultTotal = pageResultTotal; var pageNumber = page ?? 1; var onePageOfProducts = pagedResultList.ToPagedList(pageNumber, 15); ViewBag.OnePageOfProducts = onePageOfProducts; if (page > 1) { ViewBag.PageNumber = page; } else { ViewBag.PageNumber = 1; } /********************* CODE FOR "DISPLAYING XX - YY ... RESULTS" *************************/ //amount of pages int pageTotal = 0; if (bookListTotal != 0) { pageTotal = (bookListTotal / 15) + 1; } else { pageTotal = 1; } // from and to int fromResult = 0; int toResult = 0; int lastPageResults = (bookListTotal % 15); //amount of results to display on last page //calculating resutlts to and from indexes //last page - checked first if (pageNumber == pageTotal) //last page { if (bookListTotal == 0) //if no results { fromResult = 0; toResult = 0; } else { fromResult = (bookListTotal - lastPageResults) + 1; toResult = bookListTotal; } } else { if (pageNumber != 1) //pages 2 - penulti { fromResult = (15 * (pageNumber - 1)) + 1; toResult = (15 * pageNumber); } else //first page { fromResult = 1; if (bookListTotal < 15) //when less than 15 results to display { toResult = bookListTotal; } else { toResult = 15; } } } ViewBag.fromResult = fromResult; ViewBag.toResult = toResult; return View(); }
// // GET: /StatisticalAnalysis/ public ActionResult Index(Uri website, string titleOfBook) { AzureSearchServiceController checkUrl = new AzureSearchServiceController(); if (checkUrl.RemoteFileExists(website.ToString())) { List<string> MostFrequentWords = new List<string>(); List<int> FrequencyOfMostFrequentWords = new List<int>(); frequentWords(website, out MostFrequentWords, out FrequencyOfMostFrequentWords); ViewBag.FrequencyOfMostFrequentWords = FrequencyOfMostFrequentWords; ViewBag.MostFrequentWords = MostFrequentWords; ViewBag.titleOfBook = titleOfBook; ViewBag.website = website; //stores the user keyword to identify the frequency of a word string keyword = Request["txtSearch"]; ViewBag.keyword = Request["txtSearch"]; ///////////////////////////////////////// //stores the user keyword to identify the frequency of a biagram string keywordBiagram = Request["txtSearchBiagram"]; ViewBag.keywordBiagram = Request["txtSearchBiagram"]; //////////////////////////////////////////////////// //stores the user keyword to identify the frequency of a triagram string keywordTriagram = Request["txtSearchTriagram"]; ViewBag.keywordBiagram = Request["txtSearchTriagram"]; char[] delimiters = new char[] { ' ' }; int frequencyWordInOCR = 0; if (String.IsNullOrEmpty(keyword)) { ViewBag.initialState = 0; } else { string[] userKeyword = keyword.Split(delimiters, StringSplitOptions.RemoveEmptyEntries); //display a message to the user (the user has entered more than one word) if (userKeyword.Count() != 1) { ViewBag.MessageOneWord = "Please enter one word"; } else { frequencyWordInOCR = frequentWordInOCR(website, keyword.ToLower().Trim()); if (frequencyWordInOCR == 0) { ViewBag.WordNotFound = "Word: " + keyword + " " + "was not found"; } else { ViewBag.wordFound = "The frequency of the word " + keyword + " " + "is " + frequencyWordInOCR; } } } //////////////////////////////Biagram///////////////////////////// int frequencyOfBiagram = 0; if (String.IsNullOrEmpty(keywordBiagram)) { ViewBag.initialStateBiagram = 0; } else { string[] wordsBiagram = keywordBiagram.Split(delimiters, StringSplitOptions.RemoveEmptyEntries); //check whether the user has entered two words if (wordsBiagram.Count() != 2) { ViewBag.MessageBiagram = "Please enter a valid biagram(biagram consists of two words)"; } else { frequencyOfBiagram = frequentOfBiagrams(website, keywordBiagram.ToLower().Trim()); if (frequencyOfBiagram == 0) { ViewBag.WordNotFoundBiagram = "Word: " + keywordBiagram + " " + "was not found"; } else { ViewBag.wordFoundBiagram = "The frequency of a biagram " + keywordBiagram + " " + "is " + frequencyOfBiagram; } } } /////////////////////////////////////////////////////////////////////////////////////// //this section calculate the frequency word of a triagram and checks whether the user has entered three words in the document int frequencyOfTriagram = 0; if (String.IsNullOrEmpty(keywordTriagram)) { ViewBag.initialStateTriagram = 0; } else { string[] wordsTriagram = keywordTriagram.Split(delimiters, StringSplitOptions.RemoveEmptyEntries); //check whether the user has entered three words if (wordsTriagram.Count() != 3) { ViewBag.MessageTriagram = "Please enter a valid triagram(triagram consists of two words)"; } else { frequencyOfTriagram = frequentOfBiagrams(website, keywordTriagram.ToLower().Trim()); if (frequencyOfTriagram == 0) { ViewBag.WordNotFoundTriagram = "Word: " + keywordTriagram + " " + "was not found"; } else { ViewBag.wordFoundTriagram = "The frequency of a triagram " + keywordTriagram + " " + "is " + frequencyOfTriagram; } } } return View(); } else { return RedirectToAction("ErrorPage", "ErrorPage"); } }
public void AzureSearch() { AzureSearchServiceController SearchByPublisher = new AzureSearchServiceController(); var results = SearchByPublisher.insertANDElemensToFinalList() results.RouteValues["action"].Equals("Index"); results.RouteValues["controller"].Equals("Home"); Assert.AreEqual("Index", results.RouteValues["action"]); Assert.AreEqual("AzureSearchService", results.RouteValues["controller"]); }
public void Search_By_Publisher() { //Create Mock Data List<string> tagsList = new List<string>(); tagsList.Add("lewis"); tagsList.Add("Shetlander"); tagsList.Add("shetlander"); tagsList.Add("Lewis"); List<string> creators_and_contributorsList = new List<string>(); creators_and_contributorsList.Add("GUNN, John - M.A., D.Sc"); //Create a mock model to pass as a parameter BookModel model = new BookModel { azure_url = "http://blmc.blob.core.windows.net/1894/001544024_0_000191_1_1894_plates.jpg", creators_and_contributors = creators_and_contributorsList, date = 1894, electronicsysnum = 014812388, flickr_original_jpeg = "http://farm4.staticflickr.com/3811/11230026636_0021861570_o.jpg", flickr_url = "https://flickr.com/photos/britishlibrary/11230026636", fromshelfmark = "British Library HMNTS 012630.e.11.", height = 1896, idx = 1, // This OCR Text is INVALID ocrtext = "http://blmc.blob.core.windows.net/ocrplaintext/001544024_0.txt", place = "London", printsysnum = 001218010, publisher = "T. Nelson & Sons", scannumber = 000191, sizebracket = "plates", tags = tagsList, title = "Sons of the Vikings. An Orkney story", vol = 0, width = 1256 }; AzureSearchServiceController SearchByPublisher = new AzureSearchServiceController(); var results = (RedirectToRouteResult)SearchByPublisher.SearchByPublisher(model.publisher); results.RouteValues["action"].Equals("Index"); results.RouteValues["controller"].Equals("Home"); Assert.AreEqual("Index", results.RouteValues["action"]); Assert.AreEqual("AzureSearchService", results.RouteValues["controller"]); }
public void Download_Test_VALID_URL() { //Create Mock Data List<string> tagsList = new List<string>(); tagsList.Add("lewis"); tagsList.Add("Shetlander"); tagsList.Add("shetlander"); tagsList.Add("Lewis"); List<string> creators_and_contributorsList = new List<string>(); creators_and_contributorsList.Add("GUNN, John - M.A., D.Sc"); //Create a mock model to pass as a parameter BookModel model = new BookModel { azure_url = "http://blmc.blob.core.windows.net/1894/001544024_0_000191_1_1894_plates.jpg", creators_and_contributors = creators_and_contributorsList, date = 1894, electronicsysnum = 014812388, flickr_original_jpeg = "http://farm4.staticflickr.com/3811/11230026636_0021861570_o.jpg", flickr_url = "https://flickr.com/photos/britishlibrary/11230026636", fromshelfmark = "British Library HMNTS 012630.e.11.", height = 1896, idx = 1, // This OCR Text is INVALID ocrtext = "http://blmc.blob.core.windows.net/ocrplaintext/001544024_0.txt", place = "London", printsysnum = 001218010, publisher = "T. Nelson & Sons", scannumber = 000191, sizebracket = "plates", tags = tagsList, title = "Sons of the Vikings. An Orkney story", vol = 0, width = 1256 }; AzureSearchServiceController DownloadTest = new AzureSearchServiceController(); Uri testUri = new Uri(model.ocrtext); var results = DownloadTest.Download(testUri, model.azure_url, model.creators_and_contributors, model.date.ToString(), model.electronicsysnum.ToString(), model.flickr_original_jpeg , model.flickr_url, model.fromshelfmark, model.height.ToString(), model.idx.ToString(), model.place, model.printsysnum.ToString(), model.publisher, model.scannumber.ToString(), model.sizebracket, model.tags.ToString() , model.title, model.vol.ToString(), model.width.ToString()); Assert.AreEqual(typeof(FileStreamResult), results.GetType()); }