/// <summary> /// Indexes the files located in the specified folder. /// </summary> /// <param name="p_Args">Parsed command-line arguments.</param> private static void IndexFiles(ProgramArguments p_Args) { string folder = Path.GetFullPath(p_Args.folder); if (!folder.EndsWith(Path.DirectorySeparatorChar)) { folder += Path.DirectorySeparatorChar; } Console.WriteLine($"Pushing files \"{p_Args.include}\" from folder \"{folder}\"..."); ulong orderingId = RequestOrderingUtilities.CreateOrderingId(); ICoveoPlatformConfig platformConfig = new CoveoPlatformConfig(GetPushApiUrl(p_Args), GetPlatformApiUrl(p_Args), p_Args.apikey, p_Args.organizationid); using (ICoveoPlatformClient platformClient = new CoveoPlatformClient(platformConfig)) { IList <PushDocument> documentBatch = new List <PushDocument>(); foreach (FileInfo fileInfo in new DirectoryInfo(folder).EnumerateFiles(p_Args.include, p_Args.recursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly)) { if (!fileInfo.FullName.StartsWith(folder)) { throw new Exception("Unexpected file gathered from outside the source folder."); } Console.WriteLine(fileInfo.FullName.Substring(folder.Length)); PushDocument document = new PushDocument(new Uri(fileInfo.FullName).AbsoluteUri) { ModifiedDate = fileInfo.LastWriteTimeUtc }; document.AddMetadata("title", fileInfo.Name); document.AddMetadata("fileextension", fileInfo.Extension); if (fileInfo.Length > 0) { PushDocumentHelper.SetBinaryContentFromFileAndCompress(document, fileInfo.FullName); } documentBatch.Add(document); if (documentBatch.Count >= p_Args.batchSize) { // Push this batch of documents. SendBatch(platformClient, documentBatch, p_Args.sourceid, orderingId); } } // Send the (partial) final batch of documents. SendBatch(platformClient, documentBatch, p_Args.sourceid, orderingId); // Delete the already indexed files that no longer exist. platformClient.DocumentManager.DeleteDocumentsOlderThan(p_Args.sourceid, orderingId, null); } }
private static void StartPokemonCrawler() { var htmlDocument = CreateCrawler("https://pokemondb.net/pokedex/national").Result; var pokemonByGen = htmlDocument.DocumentNode.Descendants("div") .Where(node => node.GetAttributeValue("class", "").Equals("infocard-list infocard-list-pkmn-lg")).ToList(); Console.WriteLine($"{pokemonByGen.Count} generations of pokemons to add"); for (int i = 0; i < pokemonByGen.Count; i++) { var pokemons = new List <PushDocument>(); var allPokemonsInSpecificGen = pokemonByGen[i].Descendants("div").Where(node => node.GetAttributeValue("class", "").Equals("infocard ")).ToList(); foreach (var pokemonInGen in allPokemonsInSpecificGen) { var pokemonUrl = $"https://pokemondb.net/{pokemonInGen.Descendants("a").FirstOrDefault(node => node.GetAttributeValue("class", "").Equals("ent-name")).ChildAttributes("href").FirstOrDefault().Value}"; var pokemonDetailsHtmlDocument = CreateCrawler(pokemonUrl).Result; var description = pokemonDetailsHtmlDocument.DocumentNode.Descendants("div").FirstOrDefault(node => node.GetAttributeValue("class", "").Equals("grid-col span-md-6 span-lg-8")).InnerHtml; var details = pokemonDetailsHtmlDocument.DocumentNode.Descendants("table").Where(node => node.GetAttributeValue("class", "").Equals("vitals-table")).ToList(); var pokedexDataTable = details.FirstOrDefault().Descendants("td").ToList(); var breedingTable = details[2].Descendants("td").ToList(); var genderArray = breedingTable[1].InnerText.Split(','); var pokemon = new PokeDexItem() { CharacterName = pokemonInGen.Descendants("a").FirstOrDefault(node => node.GetAttributeValue("class", "").Equals("ent-name")).InnerText, ImageUrl = pokemonInGen.Descendants("span").FirstOrDefault(node => node.GetAttributeValue("class", "").Contains("img-fixed img-sprite")).ChildAttributes("data-src").FirstOrDefault().Value, Generation = $"Generation {i + 1}", UrlToStats = pokemonUrl, Types = pokemonInGen.Descendants("a").Where(node => node.GetAttributeValue("class", "").Contains("itype")).Select(s => s.InnerText).ToList(), Weight = decimal.Parse(GetFirstSectionInString(pokedexDataTable[4].InnerText)), description = RemoveHtmlTags(description), Number = Int32.Parse(pokedexDataTable[0].InnerText), Male = genderArray.Length == 1?0:decimal.Parse(GetFirstSectionInString(genderArray.First())), Female = genderArray.Length == 1 ? 0:decimal.Parse(GetFirstSectionInString(genderArray.Last().Trim())) }; //Check if Evolution has any data if yes, load the parent var evolutionDiv = pokemonDetailsHtmlDocument.DocumentNode.Descendants("div").FirstOrDefault(node => node.GetAttributeValue("class", "").Equals("infocard-list-evo")); if (evolutionDiv != null) { pokemon.Parent = evolutionDiv.Descendants("a").FirstOrDefault(node => node.GetAttributeValue("class", "").Equals("ent-name")).InnerText; } var documentToAdd = new PushDocument(pokemon.UrlToStats) { ClickableUri = pokemon.UrlToStats, ModifiedDate = DateTime.UtcNow, Metadata = { new KeyValuePair <string, JToken>("charactername", pokemon.CharacterName), new KeyValuePair <string, JToken>("ImageUrl", pokemon.ImageUrl), new KeyValuePair <string, JToken>("generationstring", pokemon.Generation), new KeyValuePair <string, JToken>("UrlToStats", pokemon.UrlToStats), new KeyValuePair <string, JToken>("Types", string.Join(";", pokemon.Types)), new KeyValuePair <string, JToken>("description", pokemon.description), new KeyValuePair <string, JToken>("pokemonnumber", pokemon.Number), new KeyValuePair <string, JToken>("pokemonweight", pokemon.Weight), new KeyValuePair <string, JToken>("male", pokemon.Male), new KeyValuePair <string, JToken>("female", pokemon.Female), } }; //add folding fields if evolition is present if (!string.IsNullOrWhiteSpace(pokemon.Parent)) { var foldingChild = new KeyValuePair <string, JToken>("foldingchild", pokemon.CharacterName); documentToAdd.Metadata.Add(foldingChild); var foldingParent = new KeyValuePair <string, JToken>("foldingparent", pokemon.Parent); documentToAdd.Metadata.Add(foldingParent); var foldingCollection = new KeyValuePair <string, JToken>("foldingcollection", pokemon.Parent); documentToAdd.Metadata.Add(foldingCollection); } pokemons.Add(documentToAdd); } PushToSource(pokemons); Console.WriteLine($"generation {i} added..."); } Console.WriteLine("Successful...."); Console.WriteLine("Press Enter to exit the program..."); ConsoleKeyInfo keyinfor = Console.ReadKey(true); if (keyinfor.Key == ConsoleKey.Enter) { System.Environment.Exit(0); } }
private static PushDocument CreateDocumentFromRecord(GsaFeedRecord p_Record, bool p_DownloadContent, string p_ParentId, string p_fileExt) { IDictionary <string, JToken> metadata = p_Record.ConvertMetadata(); if (p_Record.DisplayUrl == null) { p_Record.DisplayUrl = p_Record.Url; } p_Record.Url = p_Record.Url.Replace("&", "|"); metadata.Add("clickableuri", p_Record.DisplayUrl); metadata.Add(nameof(p_Record.DisplayUrl), p_Record.DisplayUrl); metadata.Add(nameof(p_Record.Lock), p_Record.Lock); metadata.Add(nameof(p_Record.MimeType), p_Record.MimeType); metadata.Add(nameof(p_Record.PageRank), p_Record.PageRank); metadata.Add(nameof(p_Record.Scoring), p_Record.Scoring); metadata.Add(nameof(p_Record.Url), p_Record.Url); metadata.Add(nameof(p_Record.AuthMethod), p_Record.AuthMethod.ToString()); metadata.Add(nameof(p_Record.CrawlImmediately), p_Record.CrawlImmediately); metadata.Add(nameof(p_Record.CrawlOnce), p_Record.CrawlOnce); PushDocument document = new PushDocument(p_Record.Url) { ModifiedDate = p_Record.LastModified ?? DateTime.MinValue, Metadata = metadata, ParentId = p_ParentId, FileExtension = p_fileExt }; if (p_Record.Acl != null) { DocumentPermissionSet currentDocSet = new DocumentPermissionSet(); PermissionIdentity denyGroup = new PermissionIdentity(p_Record.Url + DISALLOW_GROUP, PermissionIdentityType.VirtualGroup); PermissionIdentity allowGroup = new PermissionIdentity(p_Record.Url + ALLOW_GROUP, PermissionIdentityType.VirtualGroup); currentDocSet.DeniedPermissions.Add(denyGroup); currentDocSet.AllowedPermissions.Add(allowGroup); DocumentPermissionLevel currentDocLevel = new DocumentPermissionLevel(); currentDocLevel.PermissionSets.Add(currentDocSet); if (p_Record.Acl.ParentAcl != null) { GsaFeedAcl currentAcl = p_Record.Acl; List <DocumentPermissionLevel> allLevels = new List <DocumentPermissionLevel>(); allLevels.Add(currentDocLevel); int currentLevelIndex = 0; while (currentAcl.ParentAcl != null) { GsaFeedAcl curParentAcl = currentAcl.ParentAcl; DocumentPermissionSet curParentDocSet = new DocumentPermissionSet(); PermissionIdentity parentDenyGroup = new PermissionIdentity(curParentAcl.DocumentUrl + DISALLOW_GROUP, PermissionIdentityType.VirtualGroup); PermissionIdentity parentAllowGroup = new PermissionIdentity(curParentAcl.DocumentUrl + ALLOW_GROUP, PermissionIdentityType.VirtualGroup); //We sill always need the parents in a different set curParentDocSet.DeniedPermissions.Add(parentDenyGroup); curParentDocSet.AllowedPermissions.Add(parentAllowGroup); switch (curParentAcl.InheritanceType) { case GsaFeedAclInheritance.BothPermit: //The parent and the document are in two different sets allLevels.ElementAt(currentLevelIndex).PermissionSets.Add(curParentDocSet); break; case GsaFeedAclInheritance.ChildOverrides: //The parent is in a lower level than the current document DocumentPermissionLevel parentLowerDocLevel = new DocumentPermissionLevel(); parentLowerDocLevel.PermissionSets.Add(curParentDocSet); //We are adding our self after the children currentLevelIndex++; allLevels.Insert(currentLevelIndex, parentLowerDocLevel); break; case GsaFeedAclInheritance.ParentOverrides: //The parent is in a higher level than the current document //on doit ajouter avant l'enfant DocumentPermissionLevel parentHigherDocLevel = new DocumentPermissionLevel(); parentHigherDocLevel.PermissionSets.Add(curParentDocSet); allLevels.Insert(currentLevelIndex, parentHigherDocLevel); break; case GsaFeedAclInheritance.LeafNode: //The document is not suppose to have inheritance from a leaf node ConsoleUtilities.WriteLine("> Warning: You are trying to have inheritance on a LeafNode. Document in error: {0}", ConsoleColor.Yellow, p_Record.Url); curParentAcl.ParentAcl = null; break; } currentAcl = curParentAcl; } //Now we push the permissions foreach (DocumentPermissionLevel documentPermissionLevel in allLevels) { document.Permissions.Add(documentPermissionLevel); } } else { //We might need to add the parent level before, so we will not default this action. document.Permissions.Add(currentDocLevel); } } if (p_DownloadContent) { string content = s_HttpDownloader.Download(p_Record.Url); PushDocumentHelper.SetCompressedEncodedContent(document, Compression.GetCompressedBinaryData(content)); } else { if (p_Record.Content.Encoding == GsaFeedContentEncoding.Base64Compressed) { PushDocumentHelper.SetCompressedEncodedContent(document, p_Record.Content.Value.Trim(Convert.ToChar("\n"))); } else { PushDocumentHelper.SetContent(document, p_Record.Content.GetDecodedValue()); } } return(document); }