Ejemplo n.º 1
0
        /// <summary>
        /// Indexes the files located in the specified folder.
        /// </summary>
        /// <param name="p_Args">Parsed command-line arguments.</param>
        private static void IndexFiles(ProgramArguments p_Args)
        {
            string folder = Path.GetFullPath(p_Args.folder);

            if (!folder.EndsWith(Path.DirectorySeparatorChar))
            {
                folder += Path.DirectorySeparatorChar;
            }
            Console.WriteLine($"Pushing files \"{p_Args.include}\" from folder \"{folder}\"...");

            ulong orderingId = RequestOrderingUtilities.CreateOrderingId();

            ICoveoPlatformConfig platformConfig = new CoveoPlatformConfig(GetPushApiUrl(p_Args), GetPlatformApiUrl(p_Args), p_Args.apikey, p_Args.organizationid);

            using (ICoveoPlatformClient platformClient = new CoveoPlatformClient(platformConfig)) {
                IList <PushDocument> documentBatch = new List <PushDocument>();
                foreach (FileInfo fileInfo in new DirectoryInfo(folder).EnumerateFiles(p_Args.include, p_Args.recursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly))
                {
                    if (!fileInfo.FullName.StartsWith(folder))
                    {
                        throw new Exception("Unexpected file gathered from outside the source folder.");
                    }
                    Console.WriteLine(fileInfo.FullName.Substring(folder.Length));

                    PushDocument document = new PushDocument(new Uri(fileInfo.FullName).AbsoluteUri)
                    {
                        ModifiedDate = fileInfo.LastWriteTimeUtc
                    };
                    document.AddMetadata("title", fileInfo.Name);
                    document.AddMetadata("fileextension", fileInfo.Extension);
                    if (fileInfo.Length > 0)
                    {
                        PushDocumentHelper.SetBinaryContentFromFileAndCompress(document, fileInfo.FullName);
                    }
                    documentBatch.Add(document);

                    if (documentBatch.Count >= p_Args.batchSize)
                    {
                        // Push this batch of documents.
                        SendBatch(platformClient, documentBatch, p_Args.sourceid, orderingId);
                    }
                }

                // Send the (partial) final batch of documents.
                SendBatch(platformClient, documentBatch, p_Args.sourceid, orderingId);

                // Delete the already indexed files that no longer exist.
                platformClient.DocumentManager.DeleteDocumentsOlderThan(p_Args.sourceid, orderingId, null);
            }
        }
Ejemplo n.º 2
0
        private static void StartPokemonCrawler()
        {
            var htmlDocument = CreateCrawler("https://pokemondb.net/pokedex/national").Result;

            var pokemonByGen =
                htmlDocument.DocumentNode.Descendants("div")
                .Where(node => node.GetAttributeValue("class", "").Equals("infocard-list infocard-list-pkmn-lg")).ToList();

            Console.WriteLine($"{pokemonByGen.Count} generations of pokemons to add");

            for (int i = 0; i < pokemonByGen.Count; i++)
            {
                var pokemons = new List <PushDocument>();

                var allPokemonsInSpecificGen = pokemonByGen[i].Descendants("div").Where(node => node.GetAttributeValue("class", "").Equals("infocard ")).ToList();

                foreach (var pokemonInGen in allPokemonsInSpecificGen)
                {
                    var pokemonUrl =
                        $"https://pokemondb.net/{pokemonInGen.Descendants("a").FirstOrDefault(node => node.GetAttributeValue("class", "").Equals("ent-name")).ChildAttributes("href").FirstOrDefault().Value}";

                    var pokemonDetailsHtmlDocument = CreateCrawler(pokemonUrl).Result;
                    var description = pokemonDetailsHtmlDocument.DocumentNode.Descendants("div").FirstOrDefault(node =>
                                                                                                                node.GetAttributeValue("class", "").Equals("grid-col span-md-6 span-lg-8")).InnerHtml;
                    var details = pokemonDetailsHtmlDocument.DocumentNode.Descendants("table").Where(node =>
                                                                                                     node.GetAttributeValue("class", "").Equals("vitals-table")).ToList();
                    var pokedexDataTable = details.FirstOrDefault().Descendants("td").ToList();

                    var breedingTable = details[2].Descendants("td").ToList();
                    var genderArray   = breedingTable[1].InnerText.Split(',');
                    var pokemon       = new PokeDexItem()
                    {
                        CharacterName = pokemonInGen.Descendants("a").FirstOrDefault(node => node.GetAttributeValue("class", "").Equals("ent-name")).InnerText,
                        ImageUrl      = pokemonInGen.Descendants("span").FirstOrDefault(node => node.GetAttributeValue("class", "").Contains("img-fixed img-sprite")).ChildAttributes("data-src").FirstOrDefault().Value,
                        Generation    = $"Generation {i + 1}",
                        UrlToStats    = pokemonUrl,
                        Types         = pokemonInGen.Descendants("a").Where(node => node.GetAttributeValue("class", "").Contains("itype")).Select(s => s.InnerText).ToList(),
                        Weight        = decimal.Parse(GetFirstSectionInString(pokedexDataTable[4].InnerText)),
                        description   = RemoveHtmlTags(description),
                        Number        = Int32.Parse(pokedexDataTable[0].InnerText),
                        Male          = genderArray.Length == 1?0:decimal.Parse(GetFirstSectionInString(genderArray.First())),
                        Female        = genderArray.Length == 1 ? 0:decimal.Parse(GetFirstSectionInString(genderArray.Last().Trim()))
                    };

                    //Check if Evolution has any data if yes, load the parent
                    var evolutionDiv = pokemonDetailsHtmlDocument.DocumentNode.Descendants("div").FirstOrDefault(node =>
                                                                                                                 node.GetAttributeValue("class", "").Equals("infocard-list-evo"));
                    if (evolutionDiv != null)
                    {
                        pokemon.Parent = evolutionDiv.Descendants("a").FirstOrDefault(node =>
                                                                                      node.GetAttributeValue("class", "").Equals("ent-name")).InnerText;
                    }

                    var documentToAdd = new PushDocument(pokemon.UrlToStats)
                    {
                        ClickableUri = pokemon.UrlToStats,
                        ModifiedDate = DateTime.UtcNow,
                        Metadata     =
                        {
                            new KeyValuePair <string, JToken>("charactername",    pokemon.CharacterName),
                            new KeyValuePair <string, JToken>("ImageUrl",         pokemon.ImageUrl),
                            new KeyValuePair <string, JToken>("generationstring", pokemon.Generation),
                            new KeyValuePair <string, JToken>("UrlToStats",       pokemon.UrlToStats),
                            new KeyValuePair <string, JToken>("Types",            string.Join(";",       pokemon.Types)),
                            new KeyValuePair <string, JToken>("description",      pokemon.description),
                            new KeyValuePair <string, JToken>("pokemonnumber",    pokemon.Number),
                            new KeyValuePair <string, JToken>("pokemonweight",    pokemon.Weight),
                            new KeyValuePair <string, JToken>("male",             pokemon.Male),
                            new KeyValuePair <string, JToken>("female",           pokemon.Female),
                        }
                    };
                    //add folding fields if evolition is present
                    if (!string.IsNullOrWhiteSpace(pokemon.Parent))
                    {
                        var foldingChild = new KeyValuePair <string, JToken>("foldingchild", pokemon.CharacterName);
                        documentToAdd.Metadata.Add(foldingChild);

                        var foldingParent = new KeyValuePair <string, JToken>("foldingparent", pokemon.Parent);
                        documentToAdd.Metadata.Add(foldingParent);

                        var foldingCollection = new KeyValuePair <string, JToken>("foldingcollection", pokemon.Parent);
                        documentToAdd.Metadata.Add(foldingCollection);
                    }

                    pokemons.Add(documentToAdd);
                }

                PushToSource(pokemons);
                Console.WriteLine($"generation {i} added...");
            }
            Console.WriteLine("Successful....");
            Console.WriteLine("Press Enter to exit the program...");
            ConsoleKeyInfo keyinfor = Console.ReadKey(true);

            if (keyinfor.Key == ConsoleKey.Enter)
            {
                System.Environment.Exit(0);
            }
        }
Ejemplo n.º 3
0
        private static PushDocument CreateDocumentFromRecord(GsaFeedRecord p_Record,
                                                             bool p_DownloadContent, string p_ParentId, string p_fileExt)
        {
            IDictionary <string, JToken> metadata = p_Record.ConvertMetadata();

            if (p_Record.DisplayUrl == null)
            {
                p_Record.DisplayUrl = p_Record.Url;
            }

            p_Record.Url = p_Record.Url.Replace("&", "|");

            metadata.Add("clickableuri", p_Record.DisplayUrl);
            metadata.Add(nameof(p_Record.DisplayUrl), p_Record.DisplayUrl);
            metadata.Add(nameof(p_Record.Lock), p_Record.Lock);
            metadata.Add(nameof(p_Record.MimeType), p_Record.MimeType);
            metadata.Add(nameof(p_Record.PageRank), p_Record.PageRank);
            metadata.Add(nameof(p_Record.Scoring), p_Record.Scoring);
            metadata.Add(nameof(p_Record.Url), p_Record.Url);
            metadata.Add(nameof(p_Record.AuthMethod), p_Record.AuthMethod.ToString());
            metadata.Add(nameof(p_Record.CrawlImmediately), p_Record.CrawlImmediately);
            metadata.Add(nameof(p_Record.CrawlOnce), p_Record.CrawlOnce);

            PushDocument document = new PushDocument(p_Record.Url)
            {
                ModifiedDate  = p_Record.LastModified ?? DateTime.MinValue,
                Metadata      = metadata,
                ParentId      = p_ParentId,
                FileExtension = p_fileExt
            };

            if (p_Record.Acl != null)
            {
                DocumentPermissionSet currentDocSet = new DocumentPermissionSet();

                PermissionIdentity denyGroup  = new PermissionIdentity(p_Record.Url + DISALLOW_GROUP, PermissionIdentityType.VirtualGroup);
                PermissionIdentity allowGroup = new PermissionIdentity(p_Record.Url + ALLOW_GROUP, PermissionIdentityType.VirtualGroup);
                currentDocSet.DeniedPermissions.Add(denyGroup);
                currentDocSet.AllowedPermissions.Add(allowGroup);
                DocumentPermissionLevel currentDocLevel = new DocumentPermissionLevel();
                currentDocLevel.PermissionSets.Add(currentDocSet);


                if (p_Record.Acl.ParentAcl != null)
                {
                    GsaFeedAcl currentAcl = p_Record.Acl;
                    List <DocumentPermissionLevel> allLevels = new List <DocumentPermissionLevel>();
                    allLevels.Add(currentDocLevel);
                    int currentLevelIndex = 0;

                    while (currentAcl.ParentAcl != null)
                    {
                        GsaFeedAcl            curParentAcl     = currentAcl.ParentAcl;
                        DocumentPermissionSet curParentDocSet  = new DocumentPermissionSet();
                        PermissionIdentity    parentDenyGroup  = new PermissionIdentity(curParentAcl.DocumentUrl + DISALLOW_GROUP, PermissionIdentityType.VirtualGroup);
                        PermissionIdentity    parentAllowGroup = new PermissionIdentity(curParentAcl.DocumentUrl + ALLOW_GROUP, PermissionIdentityType.VirtualGroup);


                        //We sill always need the parents in a different set
                        curParentDocSet.DeniedPermissions.Add(parentDenyGroup);
                        curParentDocSet.AllowedPermissions.Add(parentAllowGroup);
                        switch (curParentAcl.InheritanceType)
                        {
                        case GsaFeedAclInheritance.BothPermit:
                            //The parent and the document are in two different sets

                            allLevels.ElementAt(currentLevelIndex).PermissionSets.Add(curParentDocSet);
                            break;

                        case GsaFeedAclInheritance.ChildOverrides:
                            //The parent is in a lower level than the current document
                            DocumentPermissionLevel parentLowerDocLevel = new DocumentPermissionLevel();
                            parentLowerDocLevel.PermissionSets.Add(curParentDocSet);
                            //We are adding our self after the children
                            currentLevelIndex++;
                            allLevels.Insert(currentLevelIndex, parentLowerDocLevel);
                            break;

                        case GsaFeedAclInheritance.ParentOverrides:
                            //The parent is in a higher level than the current document
                            //on doit ajouter avant l'enfant
                            DocumentPermissionLevel parentHigherDocLevel = new DocumentPermissionLevel();
                            parentHigherDocLevel.PermissionSets.Add(curParentDocSet);
                            allLevels.Insert(currentLevelIndex, parentHigherDocLevel);
                            break;

                        case GsaFeedAclInheritance.LeafNode:
                            //The document is not suppose to have inheritance from a leaf node
                            ConsoleUtilities.WriteLine("> Warning: You are trying to have inheritance on a LeafNode. Document in error: {0}", ConsoleColor.Yellow, p_Record.Url);
                            curParentAcl.ParentAcl = null;
                            break;
                        }
                        currentAcl = curParentAcl;
                    }
                    //Now we push the permissions
                    foreach (DocumentPermissionLevel documentPermissionLevel in allLevels)
                    {
                        document.Permissions.Add(documentPermissionLevel);
                    }
                }
                else
                {
                    //We might need to add the parent level before, so we will not default this action.
                    document.Permissions.Add(currentDocLevel);
                }
            }

            if (p_DownloadContent)
            {
                string content = s_HttpDownloader.Download(p_Record.Url);

                PushDocumentHelper.SetCompressedEncodedContent(document, Compression.GetCompressedBinaryData(content));
            }
            else
            {
                if (p_Record.Content.Encoding == GsaFeedContentEncoding.Base64Compressed)
                {
                    PushDocumentHelper.SetCompressedEncodedContent(document, p_Record.Content.Value.Trim(Convert.ToChar("\n")));
                }
                else
                {
                    PushDocumentHelper.SetContent(document, p_Record.Content.GetDecodedValue());
                }
            }

            return(document);
        }