/// <summary> /// Retrieves J /// </summary> /// <param name="article"></param> /// <returns></returns> public string GetJsonArticle(Article article) { var unpackedDictionary = new Dictionary <String, String>(); var jsonFileName = Zipper.Unzip(article.ZippedJsonFileName.ToByteArray()); var bucketFileName = Zipper.Unzip(article.ZippedBucketFileName.ToByteArray()); using (var fs = new FileStream(Path.Combine(_inputDirectory, bucketFileName), FileMode.Open)) { using (var zip = new ZipArchive(fs)) { var entry = zip.Entries.First(); using (StreamReader sr = new StreamReader(entry.Open())) { BinaryFormatter serializer = new BinaryFormatter(); unpackedDictionary = serializer.Deserialize(sr.BaseStream) as Dictionary <String, String>; } } } var json = String.Empty; if (unpackedDictionary.ContainsKey(jsonFileName)) { json = unpackedDictionary[jsonFileName]; } return(json); }
/// <summary> /// Searches for a specific list of articles containing key words in abstract. /// </summary> /// <param name="keyWords"></param> /// <returns></returns> public Articles SearchArticles(string phraseToSearch) { var result = new Articles(); var smallKeyWords = phraseToSearch .ToLower() .Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries) .Distinct() .ToList(); var commonArticles = new Articles(); bool haveCommonArticlesBeenSet = false; // First check if a key word is found or not. foreach (var keyWord in smallKeyWords) { if (keyWord.Length < 3) { // no intereset in short words continue; } var keyWordIndex = keyWord.ToLower().Substring(0, 3); if (!ArticlesHubDictionary.ContainsKey(keyWordIndex)) { keyWordIndex = REST; } // now look at contents of index file var articlesHub = new ArticlesHub(); var indexPath = Path.Combine(_outputDirectory, "index", $"index_{keyWordIndex}.bin.zip"); if (!File.Exists(indexPath)) { continue; } articlesHub.Deserialize(indexPath); // now search for keywords in this hub if (articlesHub.KeyWordsDictionary.Dictionary.Keys.Contains(keyWord)) { var articlesFound = articlesHub.KeyWordsDictionary.Dictionary[keyWord]; var commonArticlesCopy = new Articles(); if (haveCommonArticlesBeenSet) { var commonArticlesHashSet = commonArticles .List.Select(x => Zipper.Unzip(x.ZippedJsonFileName.ToByteArray())) .ToHashSet(); foreach (var articleFound in articlesFound.List) { var articleFoundFileName = Zipper.Unzip(articleFound.ZippedJsonFileName.ToByteArray()); var hasCommonArticle = commonArticlesHashSet.Contains(articleFoundFileName); if (hasCommonArticle) { var isTitleEmpty = String.IsNullOrWhiteSpace(Zipper.Unzip(articleFound.ZippedTitle.ToByteArray())); commonArticlesCopy.List.Add(articleFound); } } commonArticles = commonArticlesCopy; } else { var nonEmptyTitlesList = articlesFound.List .Where(x => !String.IsNullOrWhiteSpace(Zipper.Unzip(x.ZippedTitle.ToByteArray()))); commonArticles.List.AddRange(nonEmptyTitlesList); haveCommonArticlesBeenSet = true; } } } return(commonArticles); }